1. prop.table(x, margin=NULL)
> m <- matrix ( 1 : 4 , 2 )
> m
[, 1 ] [, 2 ]
[ 1 ,] 1 3
[ 2 ,] 2 4
> prop.table ( m )
[, 1 ] [, 2 ]
[ 1 ,] 0.1 0.3
[ 2 ,] 0.2 0.4
> prop.table ( m , 1 )
[, 1 ] [, 2 ]
[ 1 ,] 0.2500000 0.7500000
[ 2 ,] 0.3333333 0.6666667
> prop.table ( m , 2 )
[, 1 ] [, 2 ]
[ 1 ,] 0.3333333 0.4285714
[ 2 ,] 0.6666667 0.5714286
d <- rbind ( c ( 1 , 0 , 6 , 4 ),
c ( 2 , 5 , 7 , 5 ),
c ( 3 , 6 , 8 , 6 ),
c ( 4 , 0 , 0 , 0 ))
f <- as.matrix ( d )
f [ - which ( rowSums ( f == 0 ) > 0 ),]
rowSums ( f == 0 ) : 计算值为0的列数
which去除符合条件的值的坐标 , - 取反
data [ rowSums ( is.na ( data )) < ncol ( data ),]
data <- data [ rowSums ( data ) != 0 ,]
t ( x ) #转置后输出时,row.names 和col.names也会变化
aggregate ( x , by = list ( 1 , 1 , 2 , 2 ), FUN = mean )
Splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.
5.修改行或列的名字,rownames, colnames, paste
cluster.mean.colnames <- colnames ( data2.cluster.mean )
cluster.mean.colnames [ 1 ] = paste ( '#' , cluster.mean.colnames [ 1 ], sep = '' )
colnames ( data2.cluster.mean ) <- cluster.mean.colnames
paste ( '1' , '2' , sep = '\t' )
source ( 'script.r' )
data <- read.table ( file = 'file' , sep = '\t' , header = T , row.names = 1 )
row.names = 1 #not true but first column
If the first line have one item less than the total number of columns , header = TRUE autonomously.
If header = TRUE , col.names is the values in that line , no need to specify again.
If row.name and col.name starts with a number and check.names = TRUE ( default ), an X will be added before each name. One can turn it off to avoid adding characters.
if ( is.na ( result )[ 1 ]) { print ( "NA" ) } else { coef ( result )[[ 1 ]] }
10.attach, 把数据表或列表的一列放入R的搜索空间,使其可以直接调用
>>> data [ 2 : 5 ,]
Gene hmc expression
2 NM_001001144 0.1999845 0.768915
3 NM_001001152 0.0000000 -0.663424
4 NM_001001160 0.1203579 -0.636796
5 NM_001001176 0.0000000 0.249296
>>> attach ( data )
>>> hmc [ 2 : 5 ]
[ 1 ] 0.1999846 0.0000000 0.1203579 0.0000000
>>> expression [ 2 : 5 ]
[ 1 ] 0.768915 -0.663424 -0.636796 0.249296
11.lowess, 局部曲线拟合, This function performs the computations for the _LOWESS_ smoother which uses locally-weighted polynomial regression.
> attach ( mtcars )
> plot ( wt , mpg , main = "Scatter Example" , xlab = "Car Weight" , ylab = "Miles Per Gallon" , pch = 19 )
> abline ( lm ( mpg ~ wt ), col = "red" )
> lines ( lowess ( wt , mpg ), col = "blue" )
> mtcars
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
> data <- mtcars ( c ( -2 , -4 , -7 , -8 ))
> data <- mtcars ( c ( 1 , 3 , 5 , 6 ))
> data
mpg disp drat wt
Mazda RX4 21.0 160.0 3.90 2.620
Mazda RX4 Wag 21.0 160.0 3.90 2.875
Datsun 710 22.8 108.0 3.85 2.320
Hornet 4 Drive 21.4 258.0 3.08 3.215
Hornet Sportabout 18.7 360.0 3.15 3.440
vector <- as.vector ( read.table ( file = "file" , sep = "\t" , header = F ) $ V1 )
rep ( c ( 1 , 2 , 3 ), each = 3 )
rep ( data $ firstcolum , data $ secondcolumn )
dist and cor
This function computes and returns the distance matrix computed by using the specified distance measure to compute the distances between the rows of a data matrix.
If x and y are matrices then the covariances (or correlations) between the columns of x and the columns of y are computed.
dist (and dist objects, which is what heatmap.2 is assuming it’s getting) assume that you’ve calculated the distance between rows , while using cor you are essentially calculating the distance between columns
grepl ( pattern , x , ignore.case = FALSE , perl = FALSE , fixed = FALSE , useBytes = FALSE )
matirx [, grepl ( "Treat" , colnames ( matrix )) | grepl ( "Control" , colnames ( matrix ))]
17.substr, 去除矩阵的一部分
substr ( x , start , stop )
substring ( text , first , last = 1000000L )
substr ( x , start , stop ) <- value
substring ( text , first , last = 1000000L ) <- value
":" 的优先级高于四则运算 , 所以 1 : 3+1 的结果是2 , 3 , 4 , 而不是1 , 2 , 3 , 4
quit ()
write.table ( "filename.xls" , sep = "\t" , col.names = NA , row.names = TRUE )
21.R中and操作使用&,而不是&&, or操作使用
data [ data $ ok < 3 ] | data $ nok == ‘ 'yes' ,]
data $ ok [ data $ ok < 3 ] <- 3
23.取出满足某一列符合条件的行(注意 , 的使用)
data_fdr <- data [ data [, 2 ] >= 2 ,]
data <- data [ order ( data $ col ),]
T2 <- T2 [ order ( T2 $ gene.index ),]
data.frame <- data.frame [ ! duplicate ( data.frame ),]
data.frame <- unique ( data.frame )
> countfiles
[ 1 ] "./treated1en.counts" "./treated2en.counts" "./treated3en.counts"
[ 4 ] "./untreated1en.counts" "./untreated2en.counts" "./untreated3en.counts"
[ 7 ] "./untreated4en.counts"
> strsplit ( countfiles , 'en' )[[ 1 ]]
[ 1 ] "./treated1" ".counts"
[[ 2 ]]
[ 1 ] "./treated2" ".counts"
[[ 3 ]]
[ 1 ] "./treated3" ".counts"
[[ 4 ]]
[ 1 ] "./untreated1" ".counts"
[[ 5 ]]
[ 1 ] "./untreated2" ".counts"
[[ 6 ]]
[ 1 ] "./untreated3" ".counts"
[[ 7 ]]
[ 1 ] "./untreated4" ".counts"
> sapply ( strsplit ( countfiles , 'en' ), "[[" , 1 )
[ 1 ] "./treated1" "./treated2" "./treated3" "./untreated1" "./untreated2"
[ 6 ] "./untreated3" "./untreated4"
> strsplit ( sapply ( strsplit ( countfiles , 'en' ), "[[" , 1 ), "\\/" )
[[ 1 ]]
[ 1 ] "." "treated1"
[[ 2 ]]
[ 1 ] "." "treated2"
[[ 3 ]]
[ 1 ] "." "treated3"
[[ 4 ]]
[ 1 ] "." "untreated1"
[[ 5 ]]
[ 1 ] "." "untreated2"
[[ 6 ]]
[ 1 ] "." "untreated3"
[[ 7 ]]
[ 1 ] "." "untreated4"
> sapply ( strsplit ( sapply ( strsplit ( countfiles , 'en' ), "[[" , 1 ), "\\/" ), "[[" , 2 )
[ 1 ] "treated1" "treated2" "treated3" "untreated1" "untreated2"
[ 6 ] "untreated3" "untreated4"
27.翻转矩阵或数据框(turn matrix or data.frame upsiede-down)
> a
X1 X2 X3
ac 1 4 7
cb 2 5 8
bc 3 6 9
> a [ rev ( rownames ( a )),]
X1 X2 X3
bc 3 6 9
cb 2 5 8
ac 1 4 7
> data <- matrix ( c ( 1 , 2 , 3 , 4 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 0 , 0 , 2 , 0 ), nrow = 3 )
> data
[, 1 ] [, 2 ] [, 3 ] [, 4 ] [, 5 ]
[ 1 ,] 1 4 2 5 0
[ 2 ,] 2 0 3 6 2
[ 3 ,] 3 1 4 0 0
> log2 ( data )
[, 1 ] [, 2 ] [, 3 ] [, 4 ] [, 5 ]
[ 1 ,] 0.000000 2 1.000000 2.321928 - Inf
[ 2 ,] 1.000000 - Inf 1.584963 2.584963 1
[ 3 ,] 1.584963 0 2.000000 - Inf - Inf
> data_log <- log2 ( data )
> data_log [ data_log ==- Inf ] = 0
> data_log
[, 1 ] [, 2 ] [, 3 ] [, 4 ] [, 5 ]
[ 1 ,] 0.000000 2 1.000000 2.321928 0
[ 2 ,] 1.000000 0 1.584963 2.584963 1
[ 3 ,] 1.584963 0 2.000000 0.000000 0
> data.m $ value [ data.m $ value < $ small ] <- 0
x [ n ,] # 取出第n行
x [, n ] # 取出第n列
x [, c ( 1 , 3 )] #取出第1和3列
x [ -1 ,] #去除第一行
x [ 'pattern' ] #取出名为pattern的行