# Read the input files# “header=T” means that the data has a title, and sep="\t" is used as the separatordata <-read.table("input/box_plots_mtcars.txt",header=T,sep="\t")df <- data[, c("mpg", "cyl", "wt")]df2 <-read.table("input/histogram_plots.txt",header=T,sep="\t")df3 <-read.table("input/volcano_plots.txt", header=T)df4 <-read.table("input/manhattan_plots_gwasResults.txt",header=T,sep="\t")df5 <-read.table("input/heatmaps.txt",header=T,sep="\t")# Covert data into matrix format# nrow(df5) and ncol(df5) return the number of rows and columns of matrix df5 respectively.dm <-data.matrix(df5[1:nrow(df5),2:ncol(df5)])# Get the row namesrow.names(dm) <- df5[,1]df6 <-read.table("input/ballon_plots_GO.txt", header=T, sep="\t")df7 <-read.table("input/box_plots_David_GO.txt",header=T,sep="\t")df7 <- df7[1:10,]
library(ggplot2) # R语言中最常用的基于grid的可视化工具# 另外两个比较常见的作图packagelibrary(gplots) library(plotrix)library(qqman) # 用于GWAS数据可视化library(pheatmap) #用于绘制热图,ComplexHeatmap也是另外一个常用的packagelibrary(scales) # map numeric value to colorlibrary(RColorBrewer) #提供常见的配色方案# reshape data in Rlibrary(reshape2) library(plyr)
# 指定输出pdf,路径为output/1.1.Basic_boxplot.pdf,高度宽度均为3pdf("output/1.1.Basic_boxplot.pdf", height =3, width =3)# ggplot从数据框df中读取作图所需的数据# aes(x=cyl, y=mpg)告诉ggplot2将数据框中的cyl列作为x轴,mpg列作为y轴ggplot(df, aes(x=cyl, y=mpg))+# 加号在ggplot中意思是在当前的ggplot对象上进行修改# draw the boxplot and fill it with graygeom_boxplot(fill="gray")+# Use the labs function to set the title and modify x and ylabs(title="Plot of mpg per cyl",x="Cyl", y ="Mpg")+# Set the theme styletheme_classic()# Save the plotdev.off()
# Begin to plotp <-ggplot(df, aes(x=cyl, y=mpg))+geom_boxplot(fill="gray")+labs(title="Plot of mpg per cyl",x="Cyl", y ="Mpg")+theme_classic()# Sava as pdfggsave("output/1.1.Basic_boxplot.pdf", plot=p, height =3, width =3)
ggplot(df, aes(x=cyl, y=mpg))+geom_boxplot(fill="gray")+labs(title="Plot of mpg per cyl",x="Cyl", y ="Mpg")+theme_classic()
1b) Change continuous color by groups
ggplot(df, aes(x=cyl, y=mpg, fill=cyl))+# fill=cyl: 用颜色表示cyl一列的数值geom_boxplot()+labs(title="Plot of mpg per cyl",x="Cyl", y ="Mpg")+scale_fill_brewer(palette="Blues")+# palette="Blues": 定义了一种数值到颜色的对应关系,数值越大蓝色的颜色越深theme_bw()
#Read the data tabledata=read.csv("boxplot_example.csv")####################I.Prepare the data#1.Normalize the data, etcfor (i in12:17){ data[,i]=log(data[,i]+1e-3) # log some expression values}for (i in9:17) { maxValue=max(data[,i]) #scale the data into 0-1 minValue=min(data[,i]) range=maxValue-minValue data[,i]=(data[,i]-minValue)/range}data$X8.Identity=data$X8.Identity/100#2.Make the new matrix for boxplot: cleaning the data tablelibrary("reshape")m=melt(data[,c(2,7:12,14:17)], id=1)# remove some columns not to show and reshape the matrix into 3 columns for boxplot drawing in bwplot
colnames(m)=c("Type","Feature","Normalized_Value") #define the new column names#3.Clean the names of each type and each feature#Merge sub-types of different elementsm[,1]=sub ("ncRNA_selected","RNAI", m[,1])m[,1]=sub ("ncRNA_3019","RNAII", m[,1])m[,1]=sub ("exon_CCDS","CDS", m[,1])m[,1]=sub ("five_prime_UTR","UTR", m[,1])m[,1]=sub ("three_prime_UTR","UTR", m[,1])m[,1]=sub ("ancestral_repeat","AP", m[,1])#Rename the featurem[,2]=sub('X7.GC','01.GC Content',m[,2])m[,2]=sub('X8.Identity','02.DNA Conservation',m[,2])m[,2]=sub('X9.z_score','03.RNA Struc. Free Energy',m[,2])m[,2]=sub('X10.SCI','04.RNA Struc. Cons.',m[,2])m[,2]=sub('X11.tblastx_score','05.Protein Conservation',m[,2])m[,2]=sub('X12.polyA_RNAseq_MAX','06.PolyA+ RNA-seq',m[,2])m[,2]=sub('X14.small_RNAseq_MAX','07.Small RNA-seq',m[,2])m[,2]=sub('X15.Array_totalRNA_MAX','08.Total RNA Array',m[,2])m[,2]=sub('X16.Array_polyA_MAX','09.PolyA+ RNA Array',m[,2])m[,2]=sub('X17.Array_nonpolyA_MAX','10.PolyA- RNA Array',m[,2])############################Making Boxplotlibrary("lattice")png("boxplot.png",width=1500,height=500) # pdf is recommended for most cases, or png for figure with huge amount of data points
#pdf("boxplot.pdf") attach(m)bwplot(Normalized_Value ~ Type|Feature,fill=c("green","red","yellow","blue","light blue"),layout=c(10,1))dev.off()
ggplot(df, aes(x=cyl, y=mpg))+geom_violin(trim=FALSE)+labs(title="Plot of mpg per cyl", x="Cyl", y ="Mpg")
2b) Add summary statistics on a violin plot
(2b.1) Add median and quartile
ggplot(df, aes(x=cyl, y=mpg))+geom_violin(trim=FALSE)+labs(title="Plot of mpg per cyl", x="Cyl", y ="Mpg")+stat_summary(fun.y=mean, geom="point", shape=23, size=2, color="red")
or
ggplot(df, aes(x=cyl, y=mpg))+geom_violin(trim=FALSE)+labs(title="Plot of mpg per cyl", x="Cyl", y ="Mpg")+geom_boxplot(width=0.1)
(2b.2) Add mean and standard deviation
ggplot(df, aes(x=cyl, y=mpg))+geom_violin(trim=FALSE)+labs(title="Plot of mpg per cyl", x="Cyl", y ="Mpg")+stat_summary(fun.data="mean_sdl", fun.args =list(mult =1), geom="crossbar", width=0.1)
or
ggplot(df, aes(x=cyl, y=mpg))+geom_violin(trim=FALSE)+labs(title="Plot of mpg per cyl", x="Cyl", y ="Mpg")+stat_summary(fun.data=mean_sdl, fun.args =list(mult =1), geom="pointrange", color="red")
2c) Change violin plot fill colors
ggplot(df, aes(x=cyl, y=mpg, fill=cyl))+geom_violin(trim=FALSE)+geom_boxplot(width=0.1, fill="white")+labs(title="Plot of mpg per cyl", x="Cyl", y ="Mpg")+scale_fill_brewer(palette="Blues")+theme_classic()
##Use the plyr package plyr to calculate the average weight of each group :mu <-ddply(df2, "sex", summarise, grp.mean=mean(weight))head(mu)
### sex grp.mean
### 1 F 54.70
### 2 M 65.36
draw the plot
4d) Change fill colors
ggplot(df2, aes(x=weight, fill=sex))+geom_density(alpha=0.7)+geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+labs(title="Weight density curve",x="Weight(kg)", y ="Density")+scale_color_brewer(palette="Paired")+scale_fill_brewer(palette="Blues")+theme_classic()
4e) Change line colors
ggplot(df2, aes(x=weight, color=sex))+geom_density()+geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+labs(title="Weight density curve",x="Weight(kg)", y ="Density")+scale_color_brewer(palette="Paired")+theme_classic()
4f) Combine histogram and density plots
ggplot(df2, aes(x=weight, color=sex, fill=sex))+geom_histogram(binwidth=1, aes(y=..density..), alpha=0.5, position="identity")+geom_density(alpha=.2)+labs(title="Weight density curve",x="Weight(kg)", y ="Density")+scale_color_brewer(palette="Paired")+scale_fill_brewer(palette="Blues")+theme_classic()
ggplot(df, aes(x=cyl, y=mpg, fill=cyl, shape=cyl))+geom_dotplot(binaxis='y', stackdir='center', binwidth=1, dotsize=0.8)+labs(title="Plot of mpg per cyl",x="Cyl", y ="Mpg")+#stat_summary(fun.data="mean_sdl", fun.args = list(mult=1), geom="crossbar", width=0.5) +scale_fill_brewer(palette="Blues")+#scale_color_brewer(palette="Blues") +theme_classic()
5d) Change dot colors, shapes and align types
ggplot(df, aes(x=cyl, y=mpg, color=cyl, shape=cyl))+geom_jitter(position=position_jitter(0.1), cex=2)+labs(title="Plot of mpg per cyl",x="Cyl", y ="Mpg")+scale_color_brewer(palette="Blues")+theme_classic()
##to draw high expression value in red, we use colorRampPalette instead of redblue in heatmap.2
##colorRampPalette is a function in the RColorBrewer package
cr <- colorRampPalette(c("blue","white","red"))
heatmap.2(dm,
scale="row", #scale the rows, scale each gene's expression value
key=T, keysize=1.1,
cexCol=0.9,cexRow=0.8,
col=cr(1000),
ColSideColors=c(rep(c("blue","red"),5)),
density.info="none",trace="none",
#dendrogram='none', #if you want to remove dendrogram
Colv = T,Rowv = T) #clusters by both row and col
##9.3.1.cluster by row and col
##cluster and re-order rows
rowclust = hclust(dist(dm))
reordered = dm[rowclust$order,]
##cluster and re-order columns
colclust = hclust(dist(t(dm)))
##9.3.2.scale each row value in [0,1]
dm.reordered = reordered[, colclust$order]
dm.reordered=apply(dm.reordered,1,rescale) #rescale is a function in the scales package
dm.reordered=t(dm.reordered) #transposed matrix
##9.3.3.save col and row names before changing the matrix format
col_name=colnames(dm.reordered)
row_name=rownames(dm.reordered)
##9.3.4.change data format for geom_title
colnames(dm.reordered)=1:ncol(dm.reordered)
rownames(dm.reordered)=1:nrow(dm.reordered)
dm.reordered=melt(dm.reordered) #melt is a function in the reshape2 package
head(dm.reordered)
##9.3.5.draw the heatmap
ggplot(dm.reordered, aes(Var2, Var1)) +
geom_tile(aes(fill = value), color = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
theme_grey(base_size = 10) +
labs(x = "", y = "") +
scale_x_continuous(expand = c(0, 0),labels=col_name,breaks=1:length(col_name)) +
scale_y_continuous(expand = c(0, 0),labels=row_name,breaks=1:length(row_name))
10) Ballon plots
10a) basic ballon plots
head(df6)
### Biological.process Fold.enrichment X.log10.Pvalue. col
### 1 Small molecule metabolic process 1.0 16 1
### 2 Single-organism catabolic process 1.5 12 1
### 3 Oxoacid metabolic process 2.0 23 1
### 4 Small molecule biosynthetic process 2.5 6 1
### 5 Carboxylic acid metabolic process 2.7 24 1
### 6 Organic acid metabolic process 2.7 25 1