Nunz-mik-12-12-05.R

From Organic Design wiki

Code snipits and programs written in R, S or S-PLUS

  1. Load required R libraries.

library(limma) datadir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR"

  1. Read targets (Miks file)
  2. targets<-readTargets()
  1. Define weight function to read in the recorded flags in each GPR file

wt.obs<-function(gpr){

 flagged <- (gpr[, "Flags"])
 flagged

}

  1. Read in GPR files
  2. Currently not picking up any weight information
  3. names=targets$Name,

RG<-read.maimages(files=dir(datadir),source="genepix",path=datadir,columns=list(Rf = "F635 Median", Gf = "F532 Median",Rb = "B635 Median", Gb = "B532 Median")) RG$printer <- getLayout(RG$genes)

  1. Define object containing weights for each GPR file, and set weights to "NULL" in
  2. RG variables (otherwise weights are used in normalization).

RG.weights<-RG$weights

RG$weights=NULL

save(list=c("RG","RG.weights","targets"),file="RGw.data")

  1. Set zero values in foreground and background to 0.5

load("RGw.data") library(limma)

RG$R[RG$R==0]<-0.5 RG$G[RG$G==0]<-0.5 RG$Rb[RG$Rb==0]<-0.5 RG$Gb[RG$Gb==0]<-0.5

  1. Create objects containing background corrected intensity data, and non-background
  2. (RGh) corrected intensity data (RGb).

RGb<-backgroundCorrect(RG,method="none") RGh<-backgroundCorrect(RG,method="half")

  1. Create MA objects based on RGb and RGh, prior to normalization.

MAh<-normalizeWithinArrays(RGh,method="none") MAb<-normalizeWithinArrays(RGb,method="none")

  1. Perform loess normalization on MAh and MAb.

MAh.n<-normalizeWithinArrays(RGh,method='loess') MAb.n<-normalizeWithinArrays(RGb,method='loess')

save(list=c("RGb","RGh","MAh","MAb","MAh.n","MAb.n","RG.weights"),file="normw.data")

  1. Image plots

load('RGw.data') for(i in 1:(dim(RG$R)[2]/8)){ # BAD CODING ... Should be 1:(ncol(RG$R)/9)

 fname<-paste("JPG/BGFG/BG_FG_",i,".png",sep="") ... pngs better
  1. jpeg(fname, height = 900, width=1200, quality=80, bg="white")
 png(fname, height = 900, width=1200, pointsize=15)  
 par(mfrow=c(8,4))
 for(j in 1:8){
   ar<-(i-1)*8+j
   imageplot(log2(RG$Rb[,ar]), RG$printer, low="white", high="red")
   text(1,80,colnames(RG$R),pos=4)
   imageplot(log2(RG$Gb[,ar]), RG$printer, low="white", high="green")
   text(1,80,colnames(RG$R),pos=4)
   imageplot(log2(RG$R[,ar]), RG$printer, low="white", high="red")
   imageplot(log2(RG$G[,ar]), RG$printer, low="white", high="green")
 }
 dev.off()

}

  1. Create histograms of red and green background and foreground intensities
  2. for each array. Four arrays per page.

load('RGw.data') load('normw.data')

for(i in 1:(ncol(RG$R)/4)){

 fname<-paste("JPG/Hist/BGFG_hist_",i,".jpg",sep="")
 jpeg(fname, height = 900, width=1200, quality=80, bg="white")
  1. par(mfrow=c(4,5))
 par(mfrow=c(4,4))
 for(j in 1:4){
   ar<-(i-1)*4+j
   hist(log2(RG$Rb[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(Rb)",col='red')
   hist(log2(RG$Gb[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(Gb)",col='green')
   hist(log2(RG$R[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(R)",col='red')
   hist(log2(RG$G[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(G)",col='green')
  1. plot(density(log2(RG$G[,ar])),col='green',lty=1)
  2. lines(density(log2(RG$Rb[,ar])),col='red',lty=2)
  3. lines(density(log2(RG$R[,ar])),col='red',lty=1)
  4. lines(density(log2(RG$Gb[,ar])),col='green',lty=2)
 }
 dev.off()

}

  1. Boxplots for NBC data (Pre-norm and post-norm)

lo<-grep("low", colnames(RG)) md<-grep("average", colnames(RG)) hi<-grep("high", colnames(RG))

jpeg("JPG/Boxplots/boxplot-full-nbc-pre.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAb$M~col(MAb$M),names=targets$Name,main="Pre-normalization (NBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,lo]~col(MAb$M[,lo]),names=targets$Name[lo],main="Pre-normalization (NBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,md]~col(MAb$M[,md]),names=targets$Name[md],main="Pre-normalization (NBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,hi]~col(MAb$M[,hi]),names=targets$Name[hi],main="Pre-normalization (NBC): high arrays",cex.lab=0.7) dev.off()

jpeg("JPG/Boxplots/boxplot-full-nbc-post.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAb.n$M~col(MAb.n$M),names=targets$Name,main="Post-normalization (NBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,lo]~col(MAb.n$M[,lo]),names=targets$Name[lo],main="Post-normalization (NBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,md]~col(MAb.n$M[,md]),names=targets$Name[md],main="Post-normalization (NBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,hi]~col(MAb.n$M[,hi]),names=targets$Name[hi],main="Post-normalization (NBC): high arrays",cex.axis=0.7) dev.off()

  1. Boxplots for HBC data (Pre-norm and post-norm)

jpeg("JPG/Boxplots/boxplot-full-hbc-pre.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAh$M~col(MAh$M),names=targets$Name,main="Pre-normalization (HBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,lo]~col(MAh$M[,lo]),names=targets$Name[lo],main="Pre-normalization (HBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,md]~col(MAh$M[,md]),names=targets$Name[md],main="Pre-normalization (HBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,hi]~col(MAh$M[,hi]),names=targets$Name[hi],main="Pre-normalization (HBC): high arrays",cex.lab=0.7) dev.off()

jpeg("JPG/Boxplots/boxplot-full-hbc-post.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAh.n$M~col(MAh.n$M),names=targets$Name,main="Post-normalization (HBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,lo]~col(MAh.n$M[,lo]),names=targets$Name[lo],main="Post-normalization (HBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,md]~col(MAh.n$M[,md]),names=targets$Name[md],main="Post-normalization (HBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,hi]~col(MAh.n$M[,hi]),names=targets$Name[hi],main="Post-normalization (HBC): high arrays",cex.axis=0.7) dev.off()

  1. Mb.nn.hi<-normalizeBetweenArrays(MAb.n$M[,odd])
  2. Mb.nn.med<-normalizeBetweenArrays(MAb.n$M[,even])
  3. Mh.nn.hi<-normalizeBetweenArrays(MAh.n$M[,odd])
  4. Mh.nn.med<-normalizeBetweenArrays(MAh.n$M[,even])
  5. save(list=c("Mh.nn.hi","Mh.nn.med","Mb.nn.hi","Mb.nn.med"),file="norm2w.data")
  1. Find saturated spots and create combined data sets

library(limma)

load("RGw.data") load("normw.data")

  1. load("norm2w.data")

sat.r<-RG$R==(2^16-1) # Alot more saturated in R channel sat.g<-RG$G==(2^16-1) sat<-sat.r | sat.g

  1. lo<-grep("l",targets$Name) Already done above
  2. md<-grep("a",targets$Name)
  3. hi<-grep("h",targets$Name)

tsat<-sat[,lo] & sat[,md] & sat[,hi]

sum(tsat) sum(apply(tsat,1,sum)>0)

MCb.n<-MAb.n MCb.n$M<-MAb.n$M[,hi] MCb.n$M[sat[,hi]]<-MAb.n$M[,md][sat[,hi]] MCb.n$M[sat[,md]]<-MAb.n$M[,lo][sat[,md]] MCb.n$A<-MAb.n$A[,hi] MCb.n$A[sat[,hi]]<-MAb.n$A[,md][sat[,hi]] MCb.n$A[sat[,md]]<-MAb.n$A[,lo][sat[,md]]

MCh.n<-MAh.n MCh.n$M<-MAh.n$M[,hi] MCh.n$M[sat[,hi]]<-MAh.n$M[,md][sat[,hi]] MCh.n$M[sat[,md]]<-MAh.n$M[,lo][sat[,md]] MCh.n$A<-MAh.n$A[,hi] MCh.n$A[sat[,hi]]<-MAh.n$A[,md][sat[,hi]] MCh.n$A[sat[,md]]<-MAh.n$A[,lo][sat[,md]]

  1. boxplot(MCh.n~col(MCh.n))
  2. boxplot(MCh.n~col(MCh.n))
  1. par(mfrow=c(3,4))
  2. for(i in 1:ncol(MCb.n$M)){
  3. plotMA(MCb.n,array=i)
  4. }
  5. par(mfrow=c(3,4))
  6. for(i in 1:ncol(MCh.n$M)){
  7. plotMA(MCh.n,array=i)
  8. }
  1. Linear model analysis

targ<-targets[md,] design<-matrix(c(0, 0,1, 0, 0,0,-1,0,0, 0,0,0, # C.25

                0, 0,0,-1, 0,0, 0,1,0, 0,0,0,  # M.I10.25
                1, 0,0, 0,-1,0, 0,0,0, 0,0,0,  # C.12
                0,-1,0, 0, 0,0, 0,0,0, 0,1,0,  # M.I0.12
                0, 0,0, 0, 0,0, 0,0,1,-1,0,0,  # M.I5.22                 
                ),12,5)

colnames(design)<-c("C.25","M.I10.25","C.12","M.I0.12","M.I5.22") rownames(design)<- substr(colnames(RG)[md], 20, 27) # targets$Name[md]

MM<-MCb.n[,c(1:5,7:11)] # Pool 3 and ref-ref removed design<-design[c(1:5,7:11),]

fit<-lmFit(MM,design) cont.matrix<-makeContrasts(C.25vsM.I10.25=C.25-M.I10.25,C.12vsM.I0.12=C.12-M.I0.12,

                          C.25vsC.12=C.25-C.12,M.I10.25vsM.I0.12=M.I10.25-M.I0.12,
                          levels=design)

fit2<-contrasts.fit(fit,cont.matrix) fit2<-eBayes(fit2)

par(mfrow=c(2,2)) p.n<-c("C.25vsM.I10.25","C.12vsM.I0.12","C.25vsC.12","M.I10.25vsM.I0.12") # Names on cont.matrix tt<-kk<-list() for(i in 1:length(p.n)){

 tti<-toptable(fit2,coef=i,adjust="BH",number=nrow(RG$R))
 kki<-as.numeric(rownames(tti)[1:20])

} for(i in 1:length(p.n)){

 MA<-list()
 MA$M<-fit2$coef[,i]
 MA$A<-fit2$Amean
 plotMA(MA,main=p.n[i],ylim=c(-4,4))
 points(fit2$Amean[kki],fit2$coef[kki,i],pch=16,col="red")
  1. points(fit2$Amean[kkabs(i-3)],fit2$coef[kkabs(i-3),i],pch=16,col="green")

}

  1. write.table(file="out.txt",RG$genes$Name[kk],quote=F,row.names=F,col.names=F) # this fails...

write.table(file="out.txt",RG$genes$Name[sort(unique(unlist(kk)))],quote=F,row.names=F,col.names=F)

ord1<-order(as.numeric(rownames(tt1))) ord2<-order(as.numeric(rownames(tt2))) ord3<-order(as.numeric(rownames(tt3))) ord4<-order(as.numeric(rownames(tt4))) out.mat<-cbind(MCb.n$genes$ID,

              MCb.n$genes$Name,
              MCb.n$M[,1],-MCb.n$M[,5],
  1. MCb.n$M[,3],MCb.n$M[,6],-MCb.n$M[,7], # This should be only 4 + 11
              MCb.n$M[,3],-MCb.n$M[,7], # This should be only 4 + 11
              -MCb.n$M[,2],MCb.n$M[,11],
              -MCb.n$M[,4],MCb.n$M[,8],
              MCb.n$M[,9],-MCb.n$M[,10],
              MCb.n$M[,12],
  1. tt1[as.numeric(rownames(tt1)),1:3],
  2. tt2[as.numeric(rownames(tt2)),1:3],
  3. tt3[as.numeric(rownames(tt3)),1:3],
  4. tt4[as.numeric(rownames(tt4)),1:3])
              tt1[ord1,1:3],
              tt2[ord2,1:3],
              tt3[ord3,1:3],
              tt4[ord4,1:3])

colnames(out.mat)<-c("ProbeID","ProbeName",

  1. paste("Pool",c("1.C12","10.M.I0.12","11.C25","12.M.I10.25","2.C.12","3.C.25","4.C25","5.M.I10.25","7.M.I5.22","8.M.I5.22","9.M.I0.12")[c(1,5,3,6,7,2,11,4,8,9,10)],sep=""),"Ref",
                    paste("Pool",c("1.C12","10.M.I0.12","11.C25","12.M.I10.25","2.C.12","4.C25","5.M.I10.25","7.M.I5.22","8.M.I5.22","9.M.I0.12")[c(1,5,3,7,2,11,4,8,9,10)],sep=""),"Ref",
                    paste(c("log2FC","t.stat","p.value"),".",p.n[1],sep=""),
                    paste(c("log2FC","t.stat","p.value"),".",p.n[2],sep=""),
                    paste(c("log2FC","t.stat","p.value"),".",p.n[3],sep=""),
                    paste(c("log2FC","t.stat","p.value"),".",p.n[4],sep=""))


write.table(out.mat,file='results.txt',sep='\t',row.names=F,col.names=T,quote=F)


  1. Checks
  2. design maps the dye swaps
  3. cont.matrix maps the contrasts

i <- 1 p.n[i] colnames(design) toptable(fit2,coef=i,adjust="BH",number=nrow(RG$R))[ord1,][1:10,]

ind <- 1:10

  1. Applying the cont.matrix[,1] to second group

apply(cbind(MM$M[ind,]%*%design[,1], -MM$M[ind,]%*%design[,2]), 1, mean)

  1. Checking with outputs

tt1[ord1,][1:10,]