R, a GNU project, is a language and environment for statistical computing and graphics, providing a wide variety of statistical analysis.
GENERAL RESOURCES
R BASICS
Basics
- R is case sensitive, object-oriented.
- A command ends with a semi-colon (;). The last semi-colon can be omitted.
- A comment begins with # regardless of its location. The single quotes ('') and double quotes ("") are used interchangably.
- Packages contains data sets and functions, are accessed through library().
- Objects include vectors, lists, data frames, matrices (array), and factors.
- An R list is an object consisting of an ordered collection of objects known as its components. lists are a general form of vector in which the various elements need not be of the same type, and are often themselves vectors or lists.
- Data frames are matrix-like structures, in which the columns can be of different types. A data frame is a list with class "data.frame".
- A factor is a vector object used to specify a discrete classification (grouping) of the components of other vectors of the same length.
- Matrices or more generally arrays are multi-dimensional generalizations of vectors. An array can be considered as a multiply subscripted collection of data entries
- The "pi" is the constant 3.141592654. The "NA" indicates a missing value (default).
- The "pkg" (package); "d" (data frame); "m" (matrix); "v" (vector), url, file (file), obj (objects), fit (fitted model), n (number); s (string).
Basic Commands
- quit(); q()
- help(command); help.start()
- search(); help.search()
- dir(); methods()
- library(p); identify(); attach(); detatch()
- remove(); rm()
- start:end; c(); rep(); seq()
- scan(); print(); str(); ls()
- cat(); cat("concaternate", c, "and print", "\t")
- options(prompt='.', continue="///", digits=10); getOption("width")
- source(); source.url() /* run commands in a file */
Simple examples
library() # list packages available
library(car) # load a package
list(data()) # list data sets in the current package
summary(Davis)
list(Davis)
list(Davis$weight)
stem(Davis[,2]) # equal to stem(Davis$weight)
stem(Davis$height, scale=4)
boxplot(Davis$weight)
w<-Davis$weight
h<-Davis$height
plot(w ~ h)
cor(Davis[,c(2:3)])
cor.test(w,h)
t.test(Davis[,2], mu=65)
t.test(Davis$height, Davis$weight, mu=100, paired=FALSE)
var.test(Davis$height, Davis$weight)
d<=read.csv("c:/temp/R/nes.csv", header=TRUE)
list(names(d)) # list variable names
OPERATOR/FUNCTION
Operators
- <- (left assignment), -> (right assignment)
- +, -, *, /, ^, %% (modulus)
- >, >=, <, <=, == (equal), != (not equal)
- & (and), | (or)
- %*% (matrix product); %/% (division)
- %o% (Outer product); %x% (Kronecker product)
- %in% (Matching operator);
Functions
- abs(); sin(); cos(); tan(); exp(); sqrt(); min(); max()
- log(); log(v,10); log10(); log2(); log(v, base=10)
- mean(); sum(); median(); range(); var(); sd()
- rank(); ave(v, group); by(group)
- c(a, b, c); c(start:end); seq(start:end); seq(10, 100, by=5)
- rep(n, time); rep(7, 3); rep(start:end, time)
- rep(1:3, c(2,2,2)); rep(1:3, each=2); rep(1:3, c(1:3))
- seq(1,4); seq(1,10, by=2); seq(0,1, length=10)
- length(), sort(), order(); rev(v) ## to reverse
- dnorm(1.96); dt(1.96, 100); df(1.96, 1, 100); dchisq(1.96, 10)
- pnorm(1.96); pt(1.96, 100); pf(1.96, 1, 100); pchisq(1.96, 10)
- rpois(n, lamda); rnorm(n); rt(n, df); rt(n, df=c(1:10)); rexp(n)
- substring(s, start, stop); substr(s, start, stop); nchar(s)
- date()
- mode() ## type of object

INPUT OUTPUT
Reading Text Files
- source(f); /* to execute commands in the file */
- read.table(f); read.table.url(url)
- download.file(url); url.show(url)
- m<-read.table("f:/temp/cigar.txt", header=TRUE)
- m<-read.table('f:/temp/cigar.txt')
- names(m)<-c("a", "b", "c")
- read.csv(f, header=TRUE, sep=",", quote="\"", dec=".")
- read.csv2(f, header=TRUE, sep=";", quote="\"", dec=",")
- read.delim(f, header=TRUE, sep="\t", quote="\"", dec=".")
- read.delim2(f, header=TRUE, sep="\t", quote="\"", dec=",")
- m<-read.csv("nes2.csv, header=TRUE)
- read.fwf(file, widths=c(3,5,3), header="FALSE, sep="", as.is=FALSE)
- as.is=TRUE; as.is=T # not to be converted into a factor
- na.strings<-c(".", "NA", "", "#") # characters for missing
- cnt=count.fields(df); which(cnt=7);
Reading Data Frames
- load(d);
- data(d); data(d, package="p")
- data.frame(v1, v2) /* to make a data frame out of vectors */
- m3<-data.frame(as.matrix(m[,2:4]))
- m2<-edit(m); m2<-edit(data.frame(m))
- data.entry(df)
Handling Data
- m2<-match(v1, v2, nomatch=0) # data merging
- m2<-match(m[,1], m[,3])
Writing Data
- cat(); print()
- cat("y x1 x2", "2 4 2", "5 2 7", file="sample.txt", sep="\n")
- write.(obj, f)
- write.table(df, file='firms.csv', sep=",", row.names=NA, col.names=NA)
- save(f, obj); save.image(f)
- sink(); format()

MATRICES
Defining Matrices
- m<-c(1, 2, 3, 4); c(1, 2, 3, 4)->m; assign("m", c(1, 2, 3, 4))
- m<-data.frame(column1=c(1,2,3), column2=c(4,5,6)); ## 2 by 3
- rep(c(1,2,3), 2); rep(c(1,2,3), each=2);
- rep(c(1,2,3), c(2,2,2,)); m<-c(c1=15, c2=54, c3=50)
- seq(1,4); seq(1,10, by=2); seq(0,1, length=10);
- intm<-1:4; intm<-numeric(); intm[1]m<-1; intm[2]m>-2
- strm<c("a", "b", "c"); strm<-charanter(); strm[1]m<-"a"; strm[2]m<-"b"
- blm<-c(T,F); blm<-v1>10; ## a boolean vector of TRUE and FALSE
- m<-scan()
- mm<-matrix(1:12,4); mm<-matrix(1:12, nrow=4)
- mm<-matrix(1:12, ncol=3); mm<-matrix(1:12, nrow=4)
- mm<-matrix(1:12, nrow=4, ncol=3); mm<-matrix(1:12, 4, 3)
- arrm<-array(1:10); arrm<-array(1:10, dim=c(2,5))
- cbind(); rbind(); gl(); expand.grid()
- list()
Referring Matrices
- m[,2]; v=m[2,]; m[-1, -3] ## to extract elements
- m[c(1, 5, 6)]; m2=m[-c(1, 5, 6)] ## to extract elements
- m<-c(c1=15, c2=54, c3=50); m<-c("c1", "c3")
- m2<-m$c2; m2<-m[,2]; m2<-m[,"c2"]; m2<-m[[2]]
- m[,3:5]; m3<-m[,c(3, 4, 5)]; m3<-m[,c("c3", "c4", "c5")]
- m<-c(4, 2, 4); names(m)<-c("Grape", "Pear", "Apple")
- m1$v2 /*variable 2 of the data frame 1*/
- white(); which.max(); which(min)
- attr(m, which); attributes(obj)
Matrix Functions
- t(); det(); rank(); eigen(); diag(); prod(); crossprod()
- sum(); mean(); var(); sd(); min(); max(); prod(); cumsum(); cumprod()
- is.na(m) ## to check if m contains a missing value
- rowsum(); colsum(); nrow(); ccol()
- dim(m); dimnames(m)
- merge(df1, df2)
- as.factor(); as.matrix(), as.vector(); /* conversion*/
- is.factor(); is.matrix(), is.vector();
- class(); unclass()
- na.omit(); na.fail(); unique(); table(); sample()
- as.array(); as.data.frame()
- as.numeric(); as.characters(); as.logical(); as.complex()

REGRESSION
Ordinary Least Squares (OLS)
- lm(); glm()
- m.ols<-lm(v1~v2+v3, data=m) ## linear model
- lm(v1~v2+v3, data=m); summary(lm(v1~v2+v3, data=m)); summary(m.ols)
- names(m.ols); coef(m.ols); fitted(m.ols); resid(m.ols)
- predict(fit); AIC(fit); logLik(fit); deviance(fit)
- model.matrix(v1~v2+v3, data=m)
- m.ols2<-model.matrix(v1~v2+v3, data=m); summary(m.ols2)
Binary Response Regressions
- m.logit<-glm(v1~v2+v3,family=binomial(link=logit),data=m)
- summary(m.logit); coef(m.logit); fitted(m.logit); resid(m.logit)
- lsfit(v1,v2)
- nls(); m.nonlin<-lm(v1~v2+v2^2, data=m)
- anova(m.ols, m.nonlin)
- m.qr<-qr(m) ## QR Decomposition of a Matrix

STATISTICS
Descriptives
- summary(m); fivenum(m)
- stem(v); boxplot(v); boxplot(v1, v2); hist(v)
- qqnorm(v); qqline(v)
- rug(); lines()
- table() /*to make a table*/
- tabulate()
Multivariate Analysis
- cor(m); cor(sqrt(m)) ## Pearson correlation
- cor.test(v1, v2)
- prcomp() /* Principal components in the mva package*/
- kmeans() /* Kmeans cluster analysis in the mva package*/
- factanal() /* Factor analysis in the mva package*/
- cancor() /* Canonical correlation in the mva package*/
Categorical Data Analysis
- chisq.test(v1,v2) ## Pearson Chi-squared Test
- fisher.test(v1,v2) ## Fisher Exact Test
- friedman.test(v1,v2) ## Friedman Test
- prop.test(); binom.test() ## sign test
- kruskal.test(v1,v2) ## Kruskal-Wallis Rank Sum Test
- wilcox.test(v1,v2) ## Wilcoxon Rank Sum (Mann-Whitney) Test
- ks.test(v1,v2) ## Two Sample Kolmogorov-Smirnov Test
- bartlett.test(v1,v2) ## Bartlett Test for Homogeneity of Variances

T-test and ANOVA (pdf)
- t.test(v1,v2); t.test(v1,v2, var.equal=FALSE)
- t.test(v1,v2, mu=0 paired=FALSE)
- t.test(v1.v2, mu=10, paired=F, var.equal=T)
- power.t.test(v1,v2); pairwise.t.test()
- var.test(v1,v2) ## F test for equal variance
- m.anova<-aov(v1~v2+v3, data=m)
- aov(); anova()
- summary(m.anova)
- power.anova.test() ## Power calculations for balanced one-way ANOVA tests

PROGRAMMING
Modules
frame_name<-function(arguments) {...}
mile.to.km<-function(mile) {mile*8/5}
km<-mile.to.km(c(35, 55, 75))
Flow Control
if (condition) {...} else if (condition) {...} else {...}
while (condition ) {...} # {} may be omitted for a single line expression
for (index in start:end) {...}
for (i in 1t:100) {sum <- sum + i}
repeat {...}
switch (statement, list)
Programming Functions
- expression(); parse(); deparse(); eval()
- optim() /* general-purpose optimization */
- nlm() /* Newton algorithm */
- lm() /* linear models */
- nls() /* nonlinear least squares model */

GRAPHICS
Plotting
- plot(y~x, data=m, pch=16) # plotting character (pch)
- pairs(m) # scatterplot matrix
- xyrange<-range(m) # to get range of m
- plot(y~x, data=m, xlim=xyrange, ylim=xyrange)
- abline(0,1)
- plot((0:10), sin((1:10)*pi, type="1") # 1 joins the points
- barplot(); boxplot(); stem(); hist();
- matplot() /* matrix plot */
- pairs(m) /* scatterplots */
- coplot() /* conditional plot */
- stripplot() /* strip plot */
- qqplot(); qqnorm(); qqline() /* quantile0quantile plot */
Options
- points() # to add points to a plot
- lines() # to add lines
- text() # to add texts
- mtext() # to add margin texts
- axis() # to control axis
- par(cex=1.25 mex=1.25)
- par(mfrow=c(2,2), mfcol=c(1,1))
