################################################################
## Section I: Programming Basics
################################
## 1. Introduction and Preliminaries
# Download & Install R: www.r-project.org
# Introduction Document: http://cran.r-project.org/doc/manuals/R-intro.pdf
# Dowload and install RStudio: http://www.rstudio.org
# RStudio Screen
## Console - simple scripts and output
## Workspace - Environment (active objects) and History of commands
## Files/Plots/Packages/Help
## RScript
# R interface:
## 1. You can use R console for simple calculation / simple command trial.
### Type
3+2
### in Console, hit Enter or Run
## 2. BUT you should write/submit R codes in Script Editor, and save your script frequently.
## 3. Go to: File, New File, R Script
# Set Working Directory - where objects from this Script file will be stored
## Make a folder somewhere on your computer
## Copy the address to that folder (e.g. C:\Users\v\Documents\My Documents full backup\ODU\Modern topics seminar\Day 01)
# Type in RScript: (note the double \\ and the '')
setwd('C:\\Users\\v\\Documents\\My Documents full backup\\ODU\\Modern topics seminar\\Day 01')
# Run
# Check to make sure it worked
getwd()
# Also useful if you don't know where your stuff is being saved
#Save Script file
## File, Save As, "Name of file.R"
# A few notes
## Running a block of script from R Editor:
### 1. Select a block of script and hit 'ctrl+R' or "Run" to run selection;
### 2. If no selection is made, 'ctrl+R' or "Run" runs the line where the cursor is at.
## Commenting in a script:
### prefix '#' to add comments, contents after '#' in a line will not be executed.
## Naming conventions:
### 1. R is case SENSITIVE! 'abc' 'ABC' 'Abc' are different.
### 2. Only '.', '_' and alphanumeric symbols 'a-z A-Z 0-9' allowed.
### 3. The name of an object must begin with a letter or '.' then a letter.
### 4. Avoid using the same names of R constants or default functions (e.g., "3", "mean", "glm")
### 5. Use meaningful names.
##Accessing Help
## Use ?functionname for help on a specific function
?mean
## Use ??content to search for help documents related to content
??'lm'
## Google: 'linear model R'
# Download packages and load libraries
## Menu: Packages -> install packages
## Choose AICcmodavg as an example, we'll use it later
## Installed libraries must be loaded to use
## Load library
library(AICcmodavg)
## You can also just hit square in Packages tab
# Assignment operation:
## 1. Traditional assignment operator is '<-'
## 2. Now '=' works too
v1 <- 123
v2 = 123 # a and b are the same
v1
v2
############################################
# Basic value types in R:
# Basic value types:
2.13 # numeric value
#TRUE; FALSE # logical value; or use T, F
'Hello' # character value
#ask how R reads a value
is.numeric('Hello')
is.numeric(2.13)
#assign a value to be a character/factor value
as.character(2.13)
is.numeric('2.13')
is.character('2.13')
#But be careful, have to use ' '
is.numeric(2.13)
## END of 1. Introduction and Preliminaries
################################
################################
## 2. Vectors
########
## 2.1. Vector assignment
# In R, a vector is a 1-dimensional group of ordered elements.
## A constant is a vector of length 1.
v.1 = 2.13
v.2 = TRUE
v.3 = "Hello"
# Use 'c()' to combine multiple values into one vector
v.4 = c(1,3,5,7)
v.5 = c(v.1,v.4)
v.4
v.5
# Elements in a vector can ONLY be of ONE value type, namely: numeric, logical, or character.
# Mixed vectos default to character
v.6 = c(1,2,3,"X")
v.6
########
## 2.2. Generating regular sequences
# Use 'seq()' or ':' to generate a regular sequence
?seq # Get help document on
# Default usage:
## seq(from = 1, to = 1, by = ((to - from)/(length.out - 1)),
## length.out = NULL, along.with = NULL, ...)
seq(from=1,to=10,by=.5) # increment of .5
seq(1,10,.5)
seq(1,10,length.out=15) # no. of values in sequence is 15
# ':' is a simplied version with increment of either +1 or -1
1:10
10:1
# Use 'rep()' to replicate elements in a vector
v.8 = 1:5
v.8
rep(v.8,times=3) # repeat whole sequence 3 times
rep(v.8,each=3) # repeat each element 3 times
########
## 2.3. Referencing elements in a vector
v.10 = 11:20
# Use 'length()' to get number of elements in a vector .
length(v.10)
# Use '[ ]' and index vectors to select subset of elements in a vector
## 1. Use index vector of positive integers to select elements based on position.
v.10[3]
v.10[c(1,3,8)]
## 2. Use index vector of negative integers to exclude elements.
v.10[-3]
v.10[-c(1,3,8)]
## 3. Use logical index vector to select elements based their values.
v.10>=.5
v.10>20
v.10[v.10>=.5]
v.10[v.10>20]
v.10[v.10>=13 & v.10<17] #And
v.10[v.10<=13 | v.10>17] #OR
########
## 2.4. Commonly Used Functions
# Commonly Used Descriptive Functions
sort(v.10)
min(v.10)
max(v.10)
mean(v.10)
sd(v.10)
summary(v.10) # 5 number summary plus mean
# Be careful with NA values, function may act differently from what you think
## if NA values present. Many R built-in functions allow you to exclude NA
## values from calculation by specifying the 'na.rm=TRUE'
v.10[1] = NA # now v.10 has an NA (Not Available) in the first position
mean(v.10)
mean(v.10,na.rm=T)
## END of 2. Vectors
################################
# Other objects in R:
# list, array, matrix, table, data frame
################################
## 3. Data.frame
########
########
## 3.2. Data.Frame
# A data frame is used for storing data tables. It is a list of vectors of equal length.
#For example, the following object (df.1) is a data.frame containing 3 vectors (v.a, v.b, v.c) of length 4.
#data frames can contain mixed types of data
v.a = c(2,3,5,7)
v.b = c("aa","bb","cc","dd")
v.c = c(TRUE,FALSE,TRUE,TRUE)
df.1 = data.frame(v.a,v.b,v.c) #df.1 appears like a matrix, but it's not.
df.1 #Also look at df.1 in environment
# Data frame is the data structure in R for data imported from external files through the functions
# 'read.table()' and 'read.csv()'
## Get the path of current working directory for R Make sure your data file is stored in the same working directory you are in
getwd()
## Now load 'houses.txt' file
df.2 = read.table('houses.txt',header=T)
## Or use 'read.csv()' specifically for comma separated file. If you have your data
## in Excel, you can first save your data file as '.csv' file type in Excel, then import
## into R using 'read.csv()'.
# You can also output your R data object to an external file
write.csv(df.2, 'my_output_data.csv')
# Check that you have "my_output_data" in your folder and open it
df.2B=read.csv('my_output_data.csv') #Use to read data from Excel (converted to .csv)
df.2B
# You can reference values in a data.frame in the same way as for a list
df.2[[2]] # 2nd component/variable/column in df.2
names(df.2)
head(df.2)
df.2$Age # Age of houses
## You can also use this way of referencing values.
df.2[1,] # 1st row;
df.2[,-2] # Except 2nd column
# Use 'attach()' to make variables in a data.frame directly accessable
df.2$Price
Price
attach(df.2) #makes it easy to reference columns but be careful, hard to then change column features
Price
# Now you've seen different data structures in R, many R functions may operate
## differently depending on the structure of input objects.
df.3 = df.2[,-6] # now df.3 is a data.frame with 5 numeric variables (we removed the 6th column)
m.5 = as.matrix(df.3) # now m.5 is a matrix with the same values
class(df.3)
class(m.5)
mean(df.3) #You have to specify the column
mean(df.3$Price) # 'mean()' for a d.f. object returns means for each variable
mean(m.5) # 'mean()' for a matrix returns mean of all elements
colMeans(m.5) # for a matrix, 'colMeans()' works
## END of 3. List and Data.Frame
################################
# We have created lots of objects, but we don't need them now. Let's clear all
## existing objects.
ls() # list all existing objects
rm(list=ls()) # delete all objects
# You can also use the broom in the workspace
# You can also clear the Console - Menu, Edit, Clear Console
## END of Section I: Programming Basics
################################################################
################################################################
## Section II: Graphing
# High-level plotting functions create a new plot on the graphics device,
## possibly with axes, labels, titles and so on.
# Low-level plotting functions add more information to an existing plot,
## such as extra points, lines and labels.
# For this section, we will work with the R built-in data set 'mtcars'
?mtcars # description of data set
attach(mtcars) # now we can directly use component names
mtcars #to see the dataset
################
## 4. Graphing Functions
# Plotting functions create a new plot on the graphics device,
## possibly with axes, labels, titles and so on.
########
## 4.1. 'plot()'
?plot()
plot(wt,mpg) # if inputs are 2 vectors, scatterplot of 1st on X, and 2nd on Y
plot(mpg~wt) # you can also use foumula expression
plot(mpg~factor(cyl)) # if 'y~factor', boxplot for each level
plot(factor(cyl)) # if input is a factor, bar chart of # of obs in each level
plot(cbind(wt,mpg,cyl)) # if input is a matrix, scatterplot of first 2 columns
plot(mtcars) # if input is a data.frame, scatterplot matrix
pairs(cbind(mpg,wt,hp,cyl)) # use pairs() for matrix
########
## 4.2. other plotting functions
hist(mpg,freq=F) # histogram
boxplot(mpg) # boxplot
boxplot(mpg~cyl,col=c('red','green','blue')) # you can also use formula
cyl_freq = table(cyl) #
pie(cyl_freq) # pie chart for freq. data
barplot(cyl_freq, horiz=T) # barplot for freq. data, horiz=T / F
qqnorm(mpg) # Normal Q-Q plot
curve(.5*(x-3)^2+log(x),from=0,to=4) # draw function curve
########
## 4.3. Misc.
# You can control graphing parameters within a function argument.
## Some commonly used parameters:
## 'col=' color, 'pch=' point symbol, 'cex=' magnification ratio
## 'lwd=' line width, 'lty=' line type
## ?par for more details.
plot(mpg~wt,col='red',pch=3,cex=.8) # color, point style, 80% of default size
curve(37.285-5.344*x,1.5,5.5,add=T,lwd=2,lty=2,col='blue')
## END of 4.
################
#####END