参数< -列表(种子= 41 ) ## ---- eval = FALSE -------------------------------------------------------------- # 如果(!需要(“BiocManager”))# install.packages (BiocManager) # BiocManager::安装(“glmSparseNet ") ## ---- 包,消息= FALSE,警告= FALSE ----------------------------------- 库(dplyr)库(ggplot2)图书馆(生存)图书馆(futile.logger)图书馆(curatedTCGAData)图书馆(TCGAutils) #库(glmSparseNet) # #一些通用选项徒劳的。记录调试包。最后。值<- flag .layout(布局。format('[~l] ~m'))。value <- glmSparseNet:::show.message(FALSE) #设置ggplot2默认主题为最小theme_set(ggplot2::theme_minimal()) ## ----curated_data, include=FALSE---------------------------------------------- # chunk不包括,因为它会产生许多不必要的消息brca <- curatedTCGAData(疾病编码= " brca ", assays = "RNASeq2GeneNorm", version = "1.1.38",干。run =FALSE) ## ----curated_data_non_eval,eval = FALSE ---------------------------------------- # brca < - curatedTCGAData (diseaseCode =“brca化验=“RNASeq2GeneNorm”,# version =“1.1.38 dry.run = FALSE) # #——数据,警告= FALSE,消息= FALSE --------------------------------------- # 只保留固体肿瘤(代码:01)brca.primary.solid.tumor < - TCGAutils:: TCGAsplitAssays xdata (brca,“01”)。raw <- t(assay(brca.primary.solid.tumor[[1]])) #获取生存信息。raw <- colData(brca.primary.solid.tumor) %>% as.data.frame %>% #只保留与存活或样本相关的数据dplyr::select(patientID, vitital_status, Days.to.date.of. of.)死亡,Days.to.Date.of.Last。联系人,days_to_death, days_to_last_followup, Vital.Status) %>% #将天数转换为整数dplyr::mutate(days .to.date.of. status) %>% #死亡= as.integer(days .to.date.of.Death)) %>% dplyr::mutate(days .to. last . contact = as.integer(days .to.date.of. last . contact)) %>% #查找所有天之间的最大时间(忽略缺失)dplyr::rowwise() %>% dplyr::mutate(time = max(days_to_last_followup, days .to.date.of. date.of. date. date. date.of. date. date. date. contact) %>% dplyr::mutate(time = max(days_to_last_followup, days .to.date.of. date. contact) %>%死亡,Days.to.Last。联系人,days_to_death, narm = TRUE)) %>% #只保留生存变量和代码dplyr::select(patientID, status = vitital_status, time) %>% #丢弃生存时间小于或等于0的个体dplyr::filter(!is.na(time) & time > 0) %>% as.data.frame() #将索引设置为patientID的行名(ydata.raw) <- ydata. frame)。获取生存数据和分析数据xdata之间的匹配。Raw <- xdata。raw[TCGAbarcode(rownames(xdata.raw)) %in% rownames(ydata.raw),] xdata.raw <- xdata.raw %>% { (apply(., 2, sd) != 0) } %>% { xdata.raw[, .] } %>% scale # Order ydata the same as assay ydata.raw <- ydata.raw[TCGAbarcode(rownames(xdata.raw)), ] # Using only a subset of genes previously selected to keep this short example. set.seed(params$seed) small.subset <- c('CD5', 'CSF2RB', 'IRGC', 'NEUROG2', 'NLRC4', 'PDE11A', 'PTEN', 'TP53', 'BRAF', 'PIK3CB', 'QARS', 'RFC3', 'RPGRIP1L', 'SDC1', 'TMEM31', 'YME1L1', 'ZBTB11', sample(colnames(xdata.raw), 100)) %>% unique xdata <- xdata.raw[, small.subset[small.subset %in% colnames(xdata.raw)]] ydata <- ydata.raw %>% dplyr::select(time, status) ## ----fit---------------------------------------------------------------------- set.seed(params$seed) fitted <- cv.glmHub(xdata, Surv(ydata$time, ydata$status), family = 'cox', lambda = buildLambda(1), network = 'correlation', network.options = networkOptions(cutoff = .6, min.degree = .2)) ## ----results------------------------------------------------------------------ plot(fitted) ## ----show_coefs--------------------------------------------------------------- coefs.v <- coef(fitted, s = 'lambda.min')[,1] %>% { .[. != 0]} coefs.v %>% { data.frame(gene.name = names(.), coefficient = ., stringsAsFactors = FALSE) } %>% arrange(gene.name) %>% knitr::kable() ## ----hallmarks---------------------------------------------------------------- names(coefs.v) %>% { hallmarks(.)$heatmap } ## ----------------------------------------------------------------------------- separate2GroupsCox(as.vector(coefs.v), xdata[, names(coefs.v)], ydata, plot.title = 'Full dataset', legend.outside = FALSE) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()