Personal tools

FirstPrinciples.R

From OrganicDesign Wiki

Jump to: navigation, search
# 
 
 
library(limma)
options(digits=2)
packageDescription("limma", field="Version")
 
# ========================================================================
# 0) cDNA example analysing M matrix in MAList
# ========================================================================
 
simData <- function() {
#  Generating a RGList for transformation into MAList
 
  set.seed(2)
  n     <<- 4
  preps <<- 4
RG <- new("RGList")
RG$R <- matrix(rnorm(n*preps,8,2), nc=preps)
RG$G <- matrix(rnorm(n*preps,8,2), nc=preps)
# Ordering genes same ass topTable for convenience
RG <<- RG[c(4,2,1,3),]
 
MA <<- MA.RG(RG)
colnames(MA$M) <<- paste("array", 1:preps, sep="")
rownames(MA$M) <<- paste("gene", 1:n, sep="")
MA$M
invisible()
}
 
 
simData()
 
design <- rep(1, preps)
fit <- lmFit(MA, design=design, weights=NULL)
fit$coeff
fit <- eBayes(fit)
topTable(fit, adjust="none")
 
# Checking M values
cbind(rowMeans=rowMeans(MA$M), fit$coef)
 
# Ordinary t-statistics use unequal variance
ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma 
ordinary.t
# Checking t-statistics, var.equal T/F is the same
check.t <- apply(MA$M,1, function(x){t.test(x, paired=FALSE,
                                            var.equal=TRUE)$statistic})
as.matrix(check.t)
cbind(ordinary.t, check.t)
 
# Sigma component - sd(x)
fit$sigma
apply(MA$M, 1, sd)
 
# Stderr component - se(x_bar)
drop(fit$stdev.unscaled) * fit$sigma
apply(MA$M, 1, function(x){sd(x)/sqrt(length(x))})
 
 
# ========================================================================
# 2) Underlying lm.series fits a weighted linear regression 
# ========================================================================
 
x <- as.matrix(design)
Y <- t(MA$M)
 
# lm.series, no weights usage lm.fit(x,Y)
lmfit <- lm.fit(x,Y)
lmfit$coef
 
# Same as rowmeans if no weights vector provided
rowMeans(MA$M)
 
# lmFit coefficients
c(fit$coef)
 
# gene 1, lm.fit(x,y)
lmfit <- lm.fit(x,Y)
lmfit$coef
 
# first principles
solve(t(x)%*%x)%*%t(x)%*%Y
 
# ========================================================================
# 3) Adding weights to genes
# ========================================================================
 
RG$weights <- matrix(1, n,preps)
RG$weights[row(RG$weights)<col(RG$weights)] <- 0
RG$weights
 
MA <- MA.RG(RG)
 
design <- rep(1, preps)
fit <- lmFit(MA, design=design, weights=MA$weights)
fit$coeff
fit <- eBayes(fit)
topTable(fit, adjust="none")
 
fit$sigma
# Sigma component - sd(x)
tmp <- MA$M
tmp[MA$weights==0] <- NA
apply(tmp, 1, sd, na.rm=TRUE)
 
# Stderr component - se(x_bar)
drop(fit$stdev.unscaled) * fit$sigma
apply(tmp, 1, function(x,...){sd(x,...)/sqrt(length(x[!is.na(x)]))}, na.rm=TRUE)
 
# ========================================================================
# 3) First principles using lm.wfit
# ========================================================================
 
# fit on first gene only
#y <- MA$M[1,]
 
W <- MA$weights
# weighted loop over all four genes lm.wfit(x,y,w) ( remember Y is t(MA$M) )
for(i in 1:preps) {
  print(lm.wfit(x, Y[,i], W[i,])$coef)
}
fit$coef
 
# Equivalent subsetting/averaging out 0 weight information
for(i in 1:preps) {
  print(Y[,i] %*% W[i,] / sum(W[i,]))
}
 
# first principles on gene 2
i <- 2
w <- t(MA$weights[i,])
y <- MA$M[i,]
solve(t(x)%*%t(w)%*%w%*%x)%*%t(x)%*%t(w)%*%w%*%y
 
# Modifying the first weight
w[1] <- 0.5
print(lm.wfit(x, y, w)$coef)
# verification by multiplying weights by y
sum(w/sum(w) * y)
 
# ========================================================================
# Adding blocks, duplicateCorrelation calls randomizedBlockFit (statmod)
# corfit <- duplicateCorrelation(MA, ndups=1, block=c(1,1,2,2))
# fit <- lmFit(MA, block=c(1,1,2,2), correlation=corfit$consensus)
# ========================================================================
# ========================================================================
# 3) Analysis using limma
# ========================================================================
 
block <- rep(1:2, each=2)
corfit <- duplicateCorrelation(MA, ndups=1, block=block)
fit <- lmFit(MA, block=block, correlation=corfit$consensus,
             weights=RG$weights)
design <- rep(1, preps)
fit$coeff
fit <- eBayes(fit)
topTable(fit)
 
# Block structure and 0 correlation identical to no blocking
fit2 <- lmFit(MA, block=block, correlation=0)
fit2 <- eBayes(fit2)
topTable(fit2)
 
fit3 <- lmFit(MA, correlation=0, block=block)
fit3 <- eBayes(fit3)
topTable(fit3)
 
for( cor in seq(0, 0.99, by=0.1) ) {
  fit <- lmFit(MA, block=block, correlation=cor)
  fit <- eBayes(fit)
  cat(sprintf("%4.2f", fit$p.value), "\n")
}
 
 
i <- 4
ndups <- 1
narrays <- preps
A <- factor(rep(1:narrays, rep(ndups, narrays)))
block <- rep(1:2, each=2)
y <- MA$M[i,]
X <- as.matrix(design)
 
# under independence Z <- model.matrix(~0+factor(1:preps))
Z <- model.matrix(~0+block)
 
w  <- MA$weights[i,]
s  <- randomizedBlockFit(y, X, Z, only.varcomp = TRUE, 
                  maxit = 20)$varcomp
 
s2 <- randomizedBlockFit(y, X, Z, w, only.varcomp = TRUE, 
                  maxit = 20)$varcomp
 
# gls.series by first principles
ub <- unique(block)
nblocks <- length(ub)
Z <- matrix(block, narrays, nblocks) == matrix(ub, narrays, 
             nblocks, byrow = TRUE)
V <- Z %*% (corfit$cor * t(Z))
diag(V) <- 1
wrs <- 1/sqrt(drop(MA$weights[i, ]))
 
cholV <- chol(V)
y2 <- backsolve(cholV, y, transpose = TRUE)
X2 <- backsolve(cholV, X, transpose = TRUE)
        out <- lm.fit(X2, y2)
 
# Verification
fit$coef[4]
out$coef
 
# ========================================================================
# Section from lm.series which calls lm.fit, and lm.wfit
# ========================================================================
#  y <- as.vector(M[i, ])
#        obs <- is.finite(y)
#        if (sum(obs) > 0) {
#            X <- design[obs, , drop = FALSE]
#            y <- y[obs]
#            if (is.null(weights)) 
#                out <- lm.fit(X, y)
#            else {
#                w <- as.vector(weights[i, obs])
#                out <- lm.wfit(X, y, w)
#            }
# ========================================================================
 
fit <- lmFit(MA)
names(fit)
 
# ebayes adds df's, ted) (moderated) lods, F, and pvalues (usually adjusted)
fit2 <- ebayes(fit)
names(fit2)
 
#  ----------------------- Affymetrix approach for t-tests ------------------------ #
 
library(affy)
eset <- new("exprSet")
eset@exprs <- cbind(log2(RG$R), log2(RG$G))
 
design <- cbind(rep(1, preps*2), rep(0:1, each=preps))
fit <- lmFit(eset, design=design, weights=NULL)
fit$coeff
fit <- eBayes(fit)
topTable(fit, coef=2, adjust="none")
 
fit$coef # First coef is M of group 1, second coef is the mean differences between groups
for(i in 1:n) {
  print(t.test(eset@exprs[i,1:4], eset@exprs[i,5:8])$est)
}
 
ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma
ordinary.t
 
# Checking t-statistics (not paired because we are using M values)
check.t <- apply(exprs(eset),1, function(x){# var.equal T/F is the same
                                            n <- length(x)
                                            t.test(x[(n/2+1):n], x[1:(n/2)],
                                            paired=FALSE, var.equal=TRUE)$statistic})
check.t
#  ----------------------- Subsetting out sigma component example ------------------------ #
 
#MA <- new("MAList")
#MA$M <- matrix(rnorm(100),nc=4)
 
fit <- lmFit(MA)
fit <- eBayes(fit)
topTable(fit, number=1)
 
# Select first gene
rowID <- as.numeric(rownames(topTable(fit))[1])
fit$sigma[ rowID ] <- NA
 
topTable(fit, number=1)
 
# First gene now different
fit <- eBayes(fit)
topTable(fit, number=1)
 
# Check gene 1 is now gene 4
topTable(fit,number=25)
 
 
x <- rep(1:4, 2:5)

The GNU Project Debian Linux Ubuntu Linux Wikipedia online encycopedia MediaWiki