<!—徽章:start—>生命周期:成熟<!—“badge”:end - >

在本文中,我们展示了tidybulk/tidyverse和base r之间编码差异的一些示例。我们注意到,赋值减少了> 10x,行号减少了> 2x。

创建tidybulk宠物猫。

tt = se_mini

总重复成绩单

整洁的转录组”{。r .yellow} rowData(tt)$gene_name = rownames(tt) tt。Aggr = tt %>% aggregate_duplections (.)转录= gene_name)”
Base R " R temp = data.frame(symbol = dge_list$genes$symbol, dge_list$counts)nr <- by(temp, temp$symbol, function(df) if(length(df[1,1])>) matrixStats::: colsum (as.matrix(df[,-1]))nr < -。调用("rbind", dge_list.nr) colnames(dge_list.nr) <- colnames(dge_list)' ' '

规模计数

整齐的转录组学" r tt。规范= tt。Aggr %>% identify_abundance (factor_of_interest = condition) %>% scale_abundance() "
Base R " R库(edgeR) dgList <- DGEList(count_m=x,group=group) keep <- filterByExpr(dgList) dgList <- dgList[keep,,keep.lib.]大小= FALSE)[…dgList <- calcNormFactors(dgList, method="TMM") norm_counts。表< - cpm (dgList)' ' '

过滤器变量记录

我们可能需要识别和筛选变量转录本。

整洁的转录组学" r tt.norm.variable = tt。规范%>% keep_variable() "
基数R ' R库(edgeR) x = norm_counts。table s <- rowMeans((x-rowMeans(x))^2) o <- order(s, reduced =TRUE) x <- x[o[1L:top],,drop=FALSE] norm_counts.表中记录的数量。表= norm_counts. Table [rownames(x)] norm_counts. Table [rownames(x)]table$cell_type = tibble_counts[match(tibble_counts$sample, rownames(norm_counts.table)), "Cell type"] ' ' ' '

减少

整齐的转录组" r tt.norm. mds = tt。norm %>% reduce_dimensions(method= " MDS ", .dim = 2) "
Base R”R库(limma) count_m_log = log(count_m + 1) cmds = limma::plotMDS(ndim = .dims, plot = FALSE) cmds = cmds %$% cmdscale. log。out %>% setNames(sprintf(" Dim%s ", 1:6)) cmds$cell_type = tibble_counts[match(tibble_counts$sample, rownames(cmds)), " Cell type "] "

主成分分析

整齐的转录组" r tt.norm. pca = tt。norm %>% reduce_dimensions(method= " PCA ", .dim = 2) "
Base R " R count_m_log = log(count_m + 1) pc = count_m_log %>% prcomp(scale = TRUE)方差= pc$sdev^2方差=(方差/ sum(方差))[1:6]pc$cell_type = counts[match(counts$sample, rownames(pc)), " Cell type "] "

tSNE

Tidy转录组学" r tt. normal .tSNE = breast_tcga_mini_SE %>% tidybulk(sample, ens, count_scaled) %>% identify_ample () %>% reduce_dimensions(method = " tSNE ", perplexity=10, pca_scale =TRUE) "
Base R " R count_m_log = log(count_m + 1) tsne = Rtsne::Rtsne(t(count_m_log), perplexity=10, pca_scale =TRUE)$Y tsne$cell_type = tibble_counts[match(tibble_counts$sample, rownames(tsne)), " Cell type "] "

旋转

Tidy转录组学" r tt. normal . mds .rotate = tt. normal . mds %>% rotate_dimensions(Dim1, Dim2, rotation_degrees = 45, action= " get ") "
Base R " R rotation = function(m, d) {R = d * pi / 180 ((bind_rows(c(1 = cos®,2 = -sin®),c(1 = sin®,2 = cos®))%>% as_matrix) %*% m)} mds_r = pca %>% rotation(rotation_degrees) mds_r$cell_type = counts[match(counts$sample, rownames(mds_r)), " Cell type "] "

测试微分丰富

整洁的转录组学" r tt.de = tt %>% test_differential_abundance(~ condition, action= " get ") tt.de "
Base R " R库(edgeR) dgList <- DGEList(counts=counts_m,group=group) keep <- filterByExpr(dgList) dgList <- dgList[keep,,keep.lib.]size =FALSE] dgList <- calcNormFactors(dgList) design <- model.matrix(~group) dgList <- estimateDisp(dgList,design) fit <- glmQLFit(dgList,design) qlf <- glmQLFTest(fit,coef=2) topTags(qlf, n=Inf)' ' '

调整计数

整齐的转录组学r tt.norm.adj = tt。%>% adjust_abundance(~条件+时间)"
基准R ' R库(va) count_m_log = log(count_m + 1) design = model。矩阵(对象= ~条件+时间,数据=注释)count_m_log。sva = ComBat(batch = design[,2], mod = design,…)va = ceiling(exp(count_m_log. va) -1) count_m_log. Sva =天花。sa $cell_type = counts[match(counts$sample, rownames(count_m_log. csv)), "单元格类型"]"

Deconvolve细胞类型组成

整齐的转录组学" r tt。Cibersort = tt %>% deconvolve_cellaffinity (action= " get ", cores=1) "
Base R " R source(' cibbsort .R ') count_m %>% write.table("mixture_file.txt") results <- CIBERSORT("sig_matrix_file.txt", "mixture_file.txt", perm=100, QN=TRUE) results$cell_type = tibble_counts[match(tibble_counts$sample, rownames(results)), "Cell type"] ' ' '

集群样品

k - means

整洁的转录组" r tt.norm.cluster = tt.norm.MDS %>% cluster_elements(method="kmeans ", centers = 2, action= " get ") "
基数R " R count_m_log = log(count_m + 1) k = kmeans(count_m_log, iter。max = 1000,…)cluster = k$cluster cluster$cell_type = tibble_counts[match(tibble_counts$sample, rownames(cluster)), c(" Cell type ", " Dim1 ", " Dim2 ")] "

SNN

Matrix包(v1.3-3)在此方法中使用Seurat::FindNeighbors时会导致错误。我们正在努力解决这个问题。目前这个选项是不可用的。

整洁的转录组" r tt.norm.SNN = tt.norm.tSNE %>% cluster_elements(method = " SNN ") "
Base R”R库(Seurat) snn = CreateSeuratObject(count_m) snn = ScaleData(snn, display. snn) snn = snn (snn, display. snn)progress = TRUE, num.cores=4, do。snn = FindVariableFeatures(snn,选择。方法= " vst ") snn = FindVariableFeatures(snn,选择。method = " vst ") snn = RunPCA(snn, npcs = 30) snn = FindNeighbors(snn) snn = FindClusters(snn, method = " igraph ",…)snn = snn[[" seurat_clusters "]] snn$cell_type = tibble_counts[match(tibble_counts$sample, rownames(snn)), c(" Cell type ", " Dim1 ", " Dim2 ")] "

下降冗余成绩单

整齐的转录组学。non_redundancy = tt.norm.MDS %>% remove_redundancy(method = " correlation ") "
基本R”R库(widyr) .data。related = pairwise_cor(counts, sample, transcript, rc, sort = TRUE, diag = FALSE, upper = FALSE) %>% filter(correlation > correlation_threshold) %>% distinct(item1) %>% rename(!!。#返回非冗余数据帧计数%>% anti_join(.data. relevant) %>% spread(sample, rc, - transcript) %>% left_join(annotation) "

的热图

tidytranscriptomics " r tt.norm.MDS %>% # filter lowly abundance keep_abundance () %>% # extract 500 most variable genes keep_variable(.abundance = count_scaling, top = 500) %>% # create heatmap heatmap(sample, transcript, count_scaling, transform = log1p) %>% add_tile(Cell type) "
Base R”R #示例取自BioC2020研讨会的气道数据集。dgList <- SE2DGEList(气道)group <- factor(dgList$samples$ ' Cell type ') keep。exprs <- filterByExpr(dgList, group=group) dgList <- dgList[keep.]exprs、keep.lib。size =FALSE] dgList <- calcNormFactors(dgList) logcounts <- cpm(dgList, log=TRUE) var_genes <- apply(logcounts, 1, var) select_var <- names(sort(var_genes, deleting =TRUE))[1:50 00] highly_variable_lcpm <- logcounts[select_var,] colors <- c("#440154FF", "#21908CFF", "#fefada") col.group <- c("red","grey")[group] gplots::heatmap。2(highly_variable_lcpm, col= colors, trace="none", ColSideColors=col。集团规模= "行")' ' '

密度图

tidy转录组学“r #示例取自BioC2020研讨会的气道数据集。Airway %>% tidybulk() %>% identify_abundance () %>% scale_abundance() %>% pivot_longer(cols = starts_with("counts "), names_to = " source ", values_to = " abundance ") %>% filter(! lowly_abundance) %>% ggplot(aes(x=丰度+ 1,color=sample)) + geom_density() + facet_wrap(~source) + scale_x_log10() "
Base R”R #示例取自BioC2020研讨会的气道数据集。dgList <- SE2DGEList(气道)group <- factor(dgList$samples$dex) keep。exprs <- filterByExpr(dgList, group=group) dgList <- dgList[keep.]exprs、keep.lib。size =FALSE] dgList <- calcNormFactors(dgList) logcounts <- cpm(dgList, log=TRUE) var_genes <- apply(logcounts, 1, var) select_var <- names(sort(var_genes, deleting =TRUE))[1:50 00] highly_variable_lcpm <- logcounts[select_var,] colors <- c("#440154FF", "#21908CFF", "#fefada") col.group <- c("red","grey")[group] gplots::heatmap。2(highly_variable_lcpm, col= colors, trace="none", ColSideColors=col。集团规模= "行")' ' '

附录

sessionInfo ()
## R版本4.2.0 RC (22-04-19 r82224) ##平台:x86_64-pc-linux-gnu(64位)##运行在:Ubuntu 20.04.4 LTS ## ##矩阵产品:default ## BLAS: /home/biocbuild/bbs-3.15-bio /R/lib/libRblas. ##因此## LAPACK: /home/biocbuild/bbs-3.15-bio /R/lib/libRlapack。因此## ## locale: ## [1] LC_CTYPE=en_US。UTF-8 LC_NUMERIC= c# [3] LC_TIME=en_GB LC_COLLATE= c# [5] LC_MONETARY=en_US。utf - 8 LC_MESSAGES = en_US。UTF-8 ## [7] LC_PAPER=en_US。UTF-8 LC_NAME= c# [9] LC_ADDRESS=C LC_TELEPHONE= c# [11] LC_MEASUREMENT=en_US。UTF-8 LC_IDENTIFICATION=C ## ##附加的基本包:## [1]stats4 stats graphics grDevices utils datasets methods ## [8] base ## ##其他附加的包:# [1] tidysummarizedexperiment_1 .26.0 ## [3] Biobase_2.56.0 genomics icranges_1 .48.0 ## [5] GenomeInfoDb_1.32.0 IRanges_2.30.0 ## [7] S4Vectors_0.34.0 biocgenerics_0.0.0 ## [9] MatrixGenerics_1.8.0 matrixstats_0.0.6 2.0 ## [11] tidybulk_1.8.0 ggrepel_0.9.1 ## [13] ggplot_2 .3.5 magrittr_2.0.3 ## [15] tibble_3.1.6 tidyr_1.2.0 ## [17] dplyr_1.0.8 knitr_1.38 ## ##通过命名空间加载(并没有连接):# [1] nlme_1 .1-157 bitops_1.0-7 bit64_4.0.5 ## [4] httr_1.4.2 SnowballC_0.7.0 backports_1.4.1 ## [7] tools_4.2.0 utf8_1.2.2 R6_2.5.1 ## [10] DBI_1.1.2 lazyeval_0.2.2 mgcv_1.8-40 ## [13] colorspace_2.0-3 withr_2.5.0 tidyselect_1.1.2 ## [13] bitspace_0.4 compiler_4.2.0 preprocessCore_1.58.0 ## [16] cli_3.3.0 DelayedArray_0.22.0 plotly_1 .10.0 ## [25] scales_1.2.0 readr_2.1.2 genefilter_1.78.0 ## [25] string_1 .4.0 digest_0.6.29 XVector_0.36.0 ## [28] pkgconfig_2.0.3 htmltools_0.5.2 fastmap_1.1.0 ## [13][31] limma_1 .52.0 htmlwidgets_1.5.4 rlang_1.0.2 ## [34] RSQLite_2.2.12 generics_0.1.2 jsonlite_1.8.0 ## [37] BiocParallel_1.30.0 tokenizers_0.2.1 RCurl_1.98-1.6 ## [40] GenomeInfoDbData_1.2.8 Matrix_1.4-1 Rcpp_1.0.8.3 ## [43] munsell_0.5.0 fansi_1.0.3 lifecycle_1.0.1 ## [46] stringi_1.7.6 edgeR_3.38.0 zlibbioc_1.42.0 ## [49] plyr_1.8.7 Rtsne_0.16 grid_4.2.0 ## [52] blob_1.2.3 parallel_4.2.0 crayon_1.5.1 ## [55] lattice_0.20-45 Biostrings_2.64.0 splines_4.2.0 ## [58] annotate_1.74.0 hms_1.1.1KEGGREST_1.36.0 ## [61] locfit_1.5-9.5 pillar_1.7.0 widyr_0.1.4 ## [64] reshape2_1.4.4 codetools_0.2-18 xml_1 .99-0.9 ## [67] glue_1.6.2 evaluate_0.15 tidytext_0.3.2 ## [70] data.table_1.14.2 vctrs_0.4.1 png_0.1-7 ## [73] tzdb_0.3.0 gctrs_0.3.0 png_0.1-7 ## [76] assertthat_0.2.1 cachem_1.0.6 xfun_0.30 ## [79] broom_0.8.0 xtable_1. 0.8 -4 janeaustenr_0.1.5 ## [82] memoise_2.0.1 sva_3.44.0 ellipsis_0.3.2