# #——style-knitr eval = TRUE,呼应= FALSE,结果= "飞机 "-------------------- BiocStyle:乳胶 () ## ---- data_preparation1 ----------------------------------------------------- ############################################################### # 生成ewa数据 ############################################################### set.seed(123) # # #定义一个函数将样本狄利克雷分布rDirichlet (alpha_vec) {num < < -功能-长度(alpha_vec)临时< rgamma (num,shape = alpha_vec, rate = 1) return(temp / sum(temp))} n <- 180 #样本数量n1 <- 60 #对照组数量n2 <- 120 #病例数量m <- 2000 # CpG位点数量K <- 3 #底层细胞类型数量##模拟甲基化基线配置文件#假设1型细胞和2型细胞来自同一谱系#细胞类型1甲基1 <- rbeta(m,3,6) #细胞类型2甲基2 <-甲基1 + rnorm(m, sd=0.01) ind <- sample(1:m,m/5)甲基2[ind] <- rbeta(length(ind),3,6) #细胞类型3甲基3 <- rbeta(m,3,6) mu <- cbind(甲基1,甲基2,甲基3)#协变量数量p <- 2 ###模拟协变量/表型(疾病状态和年龄)X <- rbind(c(rep(0, n1),rep(1, n2)), runif(n, min=20, max=50)) ##模拟表型效应β <- array(0, dim=c(m,K,p)) #对照vs病例m_common <- 10 max_signal <- 0.15 min_signal <- 0.07 #我们允许不同的符号和大小符号<- sample(c(-1,1), m_common*K,replace=TRUE) beta[1:m_common,1:K,1] <- signs * runif(m_common*K, min=min_signal, max=max_signal) m_seperate <- 10 signs <- sample(c(-1,1), m_seperate*2, replace=TRUE) beta[m_common * runif(m_seperate*2, min=min_signal, max=max_signal) signs <- sample(c(-1,1), m_seperate, replace=TRUE) beta[m_common+m_seperate+(1:m_seperate),K,1] <- signs * runif(m_seperate, min=min_signal, min= max_signal,max=max_signal) #age base <- 20 m_common <- 10 max_signal <- 0.015 min_signal <- 0.007 signs <- sample(c(-1,1), m_common*K, replace=TRUE) beta[base+1:m_common,1:K,2] <- signs * runif(m_common*K, min=min_signal, max=max_signal) m_separator <- 10 signs <- sample(c(-1,1), m_separator *2, replace=TRUE) beta[base+m_common+(1: m_separator),1:2,2] <- signs * runif(m_separator *2, min=min_signal, max=max_signal) signs <- sample(c(-1,1), m_separator,replace=TRUE) beta[base+m_common+m_seperate+(1:m_seperate +(1:m_seperate),K,2] <- signs * runif(m_seperate, min=min_signal, max=max_signal) ###生成细胞成分P <- sapply(1:n, min=min_signal, max=max_signal) {if control rDirichlet(c(4,4,2 +X[2,i]/10))}else{rDirichlet(c(4,4,5 +X[2,i]/10))}}) ###生成观察到的甲基化配置文件Ometh <- NULL for(i in 1:n){utmp <- t(sapply(1:m, function(j)){tmp1 <- colsum (X[,i] * t(beta[j,,])) rnorm(K,mean=mu[j,+tmp1,sd=0.01)})) tmp2 <- colsum (P[,i] * t(utmp)) Ometh <- cbind(Ometh, tmp2 + rnorm(m,sd = 0.01))} #抑制甲基化值在0和1之间Ometh [Ometh > 1] < - 1 Ometh [Ometh < 0] < - 0 # #——data-preparation2 ----------------------------------------------------- # 甲基化矩阵的类类(Ometh) #甲基化矩阵中的值头(Ometh[1:6]) #协变量矩阵类的类(X) #协变量中的值矩阵X[1:6] # #——model1 ---------------------------------------------------------------- 库(HIREewas) ret_list <雇佣(Ometh X,num_celltype = K, tol = 10 ^ (5), num_iter = 1000,alpha=0.01) ## ----model2---------------------------------------------------------------- # ret_list类(ret_list) #估计的细胞组成ret_list$P_t[,1:6] #估计的细胞类型特异性甲基化基线概要头(ret_list$mu_t) #估计的表型效应头(ret_list$beta_t) #惩罚的BIC值ret_list$ ppic #估计的p值,以声明CpG位点是否有风险#在某些细胞类型的共变量#病例/对照的p值矩阵 head(ret_list$pvalues[ ,1:3]) #p value matrix for age head(ret_list$pvalues[ ,4:6]) ## -------------------------------------------------------------------------- #estimated cell compositions vs the truth par(mfrow=c(1,3)) plot(ret_list$P_t[2, ], P[1, ], xlim=c(0,1), ylim=c(0,1)) abline(a=0, b=1, col="red") plot(ret_list$P_t[1, ], P[2, ], xlim=c(0,1), ylim=c(0,1)) abline(a=0, b=1, col="red") plot(ret_list$P_t[3, ], P[3, ], xlim=c(0,1), ylim=c(0,1)) abline(a=0, b=1, col="red") ## -------------------------------------------------------------------------- riskCpGpattern(ret_list$pvalues[1:100, K+c(2,1,3)], main_title="Detected association pattern\n with age", hc_row_ind = FALSE)