参数< -列表(种子= 8432 ) ## ---- eval = FALSE -------------------------------------------------------------- # 如果需要(BiocManager)) (! # install.packages (BiocManager) # BiocManager::安装(“glmSparseNet ') ## ---- 包,消息= FALSE,警告= FALSE ----------------------------------- 库(dplyr)库(ggplot2)图书馆(生存)图书馆(futile.logger)图书馆(curatedTCGAData)图书馆(TCGAutils) #库(glmSparseNet) # #一些通用选项徒劳的。记录调试包。最后。值<- flag .layout(布局。format('[~l] ~m'))。value <- glmSparseNet:::show.message(FALSE) #设置ggplot2默认主题为最小theme_set(ggplot2::theme_minimal()) ## ----curated_data, include=FALSE---------------------------------------------- # chunk不包括,因为它会产生许多不必要的消息skcm <- curatedTCGAData(疾病码= ' skcm ', assays = 'RNASeq2GeneNorm', version = '1.1.38',干。run =FALSE) ## ----curated_data_non_eval,eval = FALSE ---------------------------------------- # skcm < - curatedTCGAData (diseaseCode = skcm,化验=“RNASeq2GeneNorm”,# version = 1.1.38, dry.run = FALSE) # #——数据。秀,警告= FALSE,错误= FALSE ------------------------------------ skcm。转移<- TCGAutils::TCGAsplitAssays(skcm, '06') xdata。raw <- t(assay(skcm. metastasis[[1]])) #获取生存信息ydata。raw <- colData(skmm . metastasis) %>% as.data.frame %>% #查找所有天之间的最大时间(忽略缺失)dplyr::rowwise() %>% dplyr::mutate(time = max(days_to_last_followup, days_to_death, na.;rm = TRUE)) %>% #只保留生存变量和代码dplyr::select(patientID, status = vitital_status, time) %>% #丢弃生存时间小于或等于0的个体dplyr::filter(!is.na(time) & time > 0) %>% as.data.frame() #获取生存信息ydata. frame。raw <- colData(skcm) %>% as.data.frame %>% #查找所有天之间的最大时间(忽略缺失)dplyr::rowwise() %>% dplyr::mutate(time = max(days_to_last_followup, days_to_death, na.;rm = TRUE)) %>% #只保留生存变量和代码dplyr::select(patientID, status = vitital_status, time) %>% #丢弃生存时间小于或等于0的个体dplyr::filter(!is.na(time) & time > 0) %>% as.data.frame #设置索引为patientID的行名(ydata.raw) <- ydata. frame)。raw$patientID #只保留具有标准偏差> 0 xdata的特征。Raw <- xdata。raw[TCGAbarcode(rownames(xdata.raw)) %in% rownames(ydata.raw),] xdata.raw <- xdata.raw %>% { (apply(., 2, sd) != 0) } %>% { xdata.raw[, .] } %>% scale # Order ydata the same as assay ydata.raw <- ydata.raw[TCGAbarcode(rownames(xdata.raw)), ] set.seed(params$seed) small.subset <- c('FOXL2', 'KLHL5', 'PCYT2', 'SLC6A10P', 'STRAP', 'TMEM33', 'WT1-AS', sample(colnames(xdata.raw), 100)) xdata <- xdata.raw[, small.subset[small.subset %in% colnames(xdata.raw)]] ydata <- ydata.raw %>% dplyr::select(time, status) ## ----fit---------------------------------------------------------------------- fitted <- cv.glmHub( xdata, Surv(ydata$time, ydata$status), family = 'cox', foldid = glmSparseNet:::balanced.cv.folds(!!ydata$status)$output, network = 'correlation', network.options = networkOptions(min.degree = .2, cutoff = .6) ) ## ----results------------------------------------------------------------------ plot(fitted) ## ----show_coefs--------------------------------------------------------------- coefs.v <- coef(fitted, s = 'lambda.min')[,1] %>% { .[. != 0]} coefs.v %>% { data.frame(ensembl.id = names(.), gene.name = geneNames(names(.))$external_gene_name, coefficient = ., stringsAsFactors = FALSE) } %>% arrange(gene.name) %>% knitr::kable() ## ----hallmarks---------------------------------------------------------------- geneNames(names(coefs.v)) %>% { hallmarks(.$external_gene_name)$heatmap } ## ----------------------------------------------------------------------------- separate2GroupsCox(as.vector(coefs.v), xdata[, names(coefs.v)], ydata, plot.title = 'Full dataset', legend.outside = FALSE) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()