> x = 10.5 # assign a decimal value to x, x <- 10.5 also work
> x
[1] 10.5
> class(x) # print the class name of x
[1] "numeric"
> is.numeric(x) # check whether x is a numeric variable
[1] TRUE
> is.integer(x)
[1] FALSE
> y = as.integer(3)
> y
[1] 3
> class(y) # print the class name of y
[1] "integer"
> is.numeric(y) # integer is a special numeric variable
[1] TRUE
和多数编程语言不同,"."在R语言中不是一个特殊符号,可以在变量名和函数名中使用
logical (布尔变量)
> x = 1; y = 2
> z = x > y # is x larger than y?
> z # print the logical value
[1] FALSE
> class(z) # print the class name of z
[1] "logical"
> as.integer(z)
[1] 0
character(字符变量)
> x = as.character(10.5)
> x # print the character string
[1] "10.5"
> class(x) # print the class name of x
[1] "character"
# 因子可以理解成有次序的一组离散变量,"Levels: a b c"表明了三个因子的次序
> x <- factor(c("a","a","b","b","b","c"))
> x
[1] a a b b b c
Levels: a b c
# 因子可以被转换为对应相应次序的整数
> as.integer(x)
[1] 1 1 2 2 2 3
# 因子的次序默认按字母表确定,也可以人为指定
> x <- factor(c("a","a","b","b","b","c"),levels=c("b","c","a"))
> x
[1] a a b b b c
Levels: b c a
#获取因子的levels
> levels(x)
[1] "b" "c" "a"
2c) Matrix
R中的matrix即为二维的数组,它和一维数组一样,存储相同类型的变量
> B = matrix(
+ c(2, 4, 3, 1, 5, 7),
+ nrow=3,
+ ncol=2)
> B # B has 3 rows and 2 columns
[,1] [,2]
[1,] 2 1
[2,] 4 5
[3,] 3 7
2d) List
R语言中的列表(list)可以用来存储一组任意类型的变量(变量类型不一定要相同)
> n = c(2, 3, 5)
> s = c("aa", "bb", "cc", "dd", "ee")
> b = c(TRUE, FALSE, TRUE, FALSE, FALSE)
> x = list(n, s, b, 3) # x contains copies of n, s, b
> x
[[1]]
[1] 2 3 5
[[2]]
[1] "aa" "bb" "cc" "dd" "ee"
[[3]]
[1] TRUE FALSE TRUE FALSE FALSE
[[4]]
[1] 3
# 可以用双方括号[[]]获取列的元素
> x[[1]]
[1] 2 3 5
# 可以用字符串给列表元素命名
> names(x) <- c("A","B","C","D")
> names(x)
[1] "A" "B" "C" "D"
# 可以用以下两种方式访问有命名的列表元素
> x[["A"]]
[1] 2 3 5
> x$A
[1] 2 3 5
x=c(5.6,7.9,8.9,19.5,20.5,39.5)
y=c(6.5,8.3,9.1,17.9,29.4,22.8)
# unpaired two tail t test
t.test(x,y,alternative="two.sided")
# unpaired single tail t test for alternative hypothesis mean(x) < mean(y)
t.test(x,y,alternative="less")
# paired single tail t test
t.test(x,y,altrenative="less",paired=TRUE)
One way ANOVA:
# 生成示例数据集
# 从均值为1,0,-0.5,标准差均为1的3个正态分布中各生成20个样本
R <- c(rnorm(20)+1,rnorm(20),rnorm(20)-0.5)
# 用因子类型定义分组
D <- as.factor(c(rep("A",20),rep("B",20),rep("C",20)))
# 将数据放到一个数据框中
table <- data.frame(R=R,D=D)
# 用one way anova检验A,B,C三组之间是否存在差异
summary(aov(R~D,data=table))
Df Sum Sq Mean Sq F value Pr(>F)
D 2 16.65 8.323 8.417 0.000627 ***
Residuals 57 56.36 0.989
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
3c) aggregate
在原生R语言中,aggregate函数可以根据数据框的特定列进行分组计算
# 在mtcars数据集中,按"vs"列分组,计算"mpg"列的均值
> aggregate(mpg ~ vs, mtcars, mean)
vs mpg
1 0 16.61667
2 1 24.5571
# this approach also work
> aggregate(mtcars$mpg,by=list(vs=mtcars$vs),mean)
vs x
1 0 16.61667
2 1 24.55714