参数< - list(seed = 2924)## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#if(!quired(“ Biocmanager”))#install.packages(“ Biocmanager”)#Biocmanager :: install(“ Glmsparsenet”)## ----软件包,消息= false,警告= false ------------------------------------------------------------------------------------------------------------------库(dplyr)库(ggplot2)库(生存)库(futile.logger)库(curatedtcgadata)库(tcgautils)#library(glmsparsenet)#######futile.logger的一些常规选项调试软件包.last.value <-flog.layout(layout.format('[〜l] 〜m')).last.value <-glmsparsenet ::::: show.message(false)#设置ggplot2 default2 default主题为最小值theme_set(ggplot2 :: theme_minimal())## --------------------------------------------------------------------------------------------------------------------------------------------------------------------#不包含块,因为它会产生许多不必要的消息prad <-CuratedTcgadata(diseAseCode =“ prad”,assays =“ rnaseq2genenorm”,版本='1.1.38',dry.run = false)## ------------------------------------------------------------------------------------------------------------ # prad <- curatedTCGAData(diseaseCode = "PRAD", assays = "RNASeq2GeneNorm", # version = '1.1.38', dry.run =false))## --- data.show,警告= false,错误= false -----------------------------------------------------------------------------------------------------------------#仅保持实体肿瘤(代码:01)prad.primary.solid.solid.tumor <-tcgautils :: tcgasplitassays(prad,'01')xdata.raw <-t(Assay(prad.prad.solid.solid.solid.solid.solid.solid).tumor[[1]])) # Get survival information ydata.raw <- colData(prad.primary.solid.tumor) %>% as.data.frame %>% # Find max time between all days (ignoring missings) dplyr::rowwise() %>% dplyr::mutate( time = max(days_to_last_followup, days_to_death, na.rm = TRUE) ) %>% # Keep only survival variables and codes dplyr::select(patientID, status = vital_status, time) %>% # Discard individuals with survival time less or equal to 0 dplyr::filter(!is.na(time) & time > 0) %>% as.data.frame() # Set index as the patientID rownames(ydata.raw) <- ydata.raw$patientID # keep only features that have standard deviation > 0 xdata.raw <- xdata.raw[TCGAbarcode(rownames(xdata.raw)) %in% rownames(ydata.raw),] xdata.raw <- xdata.raw %>% { (apply(., 2, sd) != 0) } %>% { xdata.raw[, .] } %>% scale # Order ydata the same as assay ydata.raw <- ydata.raw[TCGAbarcode(rownames(xdata.raw)), ] set.seed(params$seed) small.subset <- c(geneNames(c('ENSG00000103091', 'ENSG00000064787', 'ENSG00000119915', 'ENSG00000120158', 'ENSG00000114491', 'ENSG00000204176', 'ENSG00000138399'))$external_gene_name, sample(colnames(xdata.raw), 100)) %>% unique %>% sort xdata <- xdata.raw[, small.subset[small.subset %in% colnames(xdata.raw)]] ydata <- ydata.raw %>% dplyr::select(time, status) ## ----fit---------------------------------------------------------------------- set.seed(params$seed) fitted <- cv.glmHub(xdata, Surv(ydata$time, ydata$status), family = 'cox', nlambda = 1000, network = 'correlation', network.options = networkOptions(cutoff = .6, min.degree = .2)) ## ----results------------------------------------------------------------------ plot(fitted) ## ----show_coefs--------------------------------------------------------------- coefs.v <- coef(fitted, s = 'lambda.min')[,1] %>% { .[. != 0]} coefs.v %>% { data.frame(ensembl.id = names(.), gene.name = geneNames(names(.))$external_gene_name, coefficient = ., stringsAsFactors = FALSE) } %>% arrange(gene.name) %>% knitr::kable() ## ----hallmarks---------------------------------------------------------------- geneNames(names(coefs.v)) %>% { hallmarks(.$external_gene_name)$heatmap } ## ----------------------------------------------------------------------------- separate2GroupsCox(as.vector(coefs.v), xdata[, names(coefs.v)], ydata, plot.title = 'Full dataset', legend.outside = FALSE) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()