HDCytoData 1.6.6
中数据集的几个示例和用例HDCytoData
包中。
使用聚类数据集,我们可以生成用颜色表示基本真理细胞群体标签的降维图。这提供了这些数据集中细胞群体结构的可视化表示,这在探索性数据分析和表示聚类或其他下游分析算法的输出时非常有用。
下面,我们比较了其中一个数据集(Levine_32dim
).该数据集包含14个免疫细胞群的真实细胞群标签。
suppressPackageStartupMessages(library(HDCytoData)) suppressPackageStartupMessages(library(summarizeexperiment)) suppressPackageStartupMessages(library(rtssne)) suppressPackageStartupMessages(library(umap)) suppressPackageStartupMessages(library(ggplot2))
# --------- # 加载数据 # --------- d_SE < - Levine_32dim_SE ()
## snapshotDate(): 2019-10-22
##参见?HDCytoData和browseVignettes('HDCytoData')的文档
##从缓存加载
# ------------- #预处理# ------------- # select 'cell type'标记列用于定义集群d_sub <- assay(d_SE[, colData(d_SE)$ markker_class == "type"]) #提取细胞种群标签population <- rowData(d_SE)$population_id dim(d_sub)
## [1] 265627 32
Stopifnot (nrow(d_sub) == length(population)) #用asinh(cofactor <- 5)转换数据cofactor <- asinh(d_sub / cofactor) summary(d_sub)
## CD45RA CD133 CD19 CD22 ## Min.:-0.05731 Min.:-0.05808 Min.:-0.05809 Min.:-0.05734 ##第一曲:0.20463第一曲:-0.02294第一曲:-0.01884第一曲:-0.02069 ##中位数:0.54939中位数:0.02535中位数:0.07521中位数:0.05879 ##平均值:0.68813平均值:0.14596平均值:0.50930平均值:0.39732 ##第三曲:1.03120第三曲:0.22430第三曲:0.54839第三曲:0.38648 ## Max。: 6.69120最大。: 5.52750最大。: 4.99008最大。: 5.16048 ## CD11b CD4 CD8 CD34 ## Min.:-0.058236 Min.:-0.05775 Min.:-0.05800 Min.:-0.05801 ## 1曲:-0.000294 1曲:-0.01259 1曲:-0.01732 1曲:-0.01117 ##中位数:0.257923中位数:0.13122中位数:0.07363中位数:0.11071 ##平均值:0.710319平均值:0.36760平均值:0.56522平均值:0.33989 ## 3曲:0.923517 3曲:0.57812 3曲:0.48642 3曲:0.39281 ## Max。: 5.260789 Max。: 6.58176最大值。: 4.69369最大。: 5.14800 ## Flt3 CD20 CXCR4 CD235ab ## Min.:-0.057884 Min.:-0.05813 Min.:-0.05704 Min.:-0.05761 ##第一曲:-0.007793第一曲:-0.02207第一曲:0.25290第一曲:0.23100 ##中位数:0.110317中位数:0.03382中位数:0.66539中位数:0.54043 ##平均值:0.229768平均值:0.38441平均值:0.79247平均值:0.63189 ##第三曲:0.336117第三曲:0.32551第三曲:1.20168第三曲:0.92358 ## Max。: 7.117323最大。 : 6.05141 Max. : 5.69667 Max. : 6.64670 ## CD45 CD123 CD321 CD14 ## Min. :2.040 Min. :-0.05800 Min. :-0.05355 Min. :-0.057954 ## 1st Qu.:5.116 1st Qu.:-0.01162 1st Qu.: 1.32346 1st Qu.:-0.026326 ## Median :5.645 Median : 0.09602 Median : 1.90479 Median :-0.005379 ## Mean :5.408 Mean : 0.37241 Mean : 1.93542 Mean : 0.077030 ## 3rd Qu.:5.939 3rd Qu.: 0.41310 3rd Qu.: 2.51781 3rd Qu.: 0.089789 ## Max. :7.238 Max. : 6.64063 Max. : 6.86739 Max. : 5.006121 ## CD33 CD47 CD11c CD7 ## Min. :-0.05808 Min. :-0.05509 Min. :-0.058053 Min. :-0.05816 ## 1st Qu.:-0.01813 1st Qu.: 2.08788 1st Qu.:-0.002711 1st Qu.:-0.01567 ## Median : 0.06107 Median : 2.71442 Median : 0.212063 Median : 0.13002 ## Mean : 0.30792 Mean : 2.65608 Mean : 0.703504 Mean : 0.81384 ## 3rd Qu.: 0.34147 3rd Qu.: 3.27654 3rd Qu.: 0.861448 3rd Qu.: 1.37083 ## Max. : 5.61247 Max. : 6.40249 Max. : 6.520939 Max. : 6.31922 ## CD15 CD16 CD44 CD38 ## Min. :-0.05808 Min. :-0.05778 Min. :0.02606 Min. :-0.05719 ## 1st Qu.:-0.01502 1st Qu.:-0.02255 1st Qu.:3.12712 1st Qu.: 0.40198 ## Median : 0.09355 Median : 0.01424 Median :3.87967 Median : 1.02032 ## Mean : 0.23136 Mean : 0.16123 Mean :3.76018 Mean : 1.47781 ## 3rd Qu.: 0.38331 3rd Qu.: 0.16077 3rd Qu.:4.47392 3rd Qu.: 2.19146 ## Max. : 1.53415 Max. : 5.33831 Max. :7.40456 Max. : 7.29308 ## CD13 CD3 CD61 CD117 ## Min. :-0.05773 Min. :-0.05824 Min. :-0.05764 Min. :-0.05767 ## 1st Qu.: 0.02110 1st Qu.: 0.08495 1st Qu.:-0.01285 1st Qu.:-0.02396 ## Median : 0.18706 Median : 0.60376 Median : 0.09569 Median :-0.00041 ## Mean : 0.36856 Mean : 2.16576 Mean : 0.34446 Mean : 0.13120 ## 3rd Qu.: 0.53550 3rd Qu.: 4.66522 3rd Qu.: 0.41579 3rd Qu.: 0.15474 ## Max. : 6.98119 Max. : 6.74836 Max. : 7.74850 Max. : 5.50213 ## CD49d HLA-DR CD64 CD41 ## Min. :-0.05806 Min. :-0.05797 Min. :-0.05820 Min. :-0.05824 ## 1st Qu.: 0.28301 1st Qu.: 0.05771 1st Qu.:-0.01058 1st Qu.:-0.02017 ## Median : 0.67721 Median : 0.61133 Median : 0.12249 Median : 0.05223 ## Mean : 0.79494 Mean : 1.52181 Mean : 0.55151 Mean : 0.26175 ## 3rd Qu.: 1.19079 3rd Qu.: 2.88824 3rd Qu.: 0.60413 3rd Qu.: 0.30559 ## Max. : 5.15344 Max. : 7.05251 Max. : 4.51784 Max. : 7.71829
# subsample单元在vignette n <- 2000 set.seed(123) ix <- sample(seq_len(nrow(d_sub)), n) d_sub <- d_sub[ix,] population <- population[ix] dim(d_sub)
## [1] 2000 32
stopifnot(nrow(d_sub) == length(population)) #删除任何接近重复的行(Rtsne要求)dps <- duplicate (d_sub) d_sub <- d_sub[!人口<-人口[!]dup)暗(d_sub)
## [1] 1998 32
Stopifnot (nrow(d_sub) == length(population))
# ------------------------ # 降维:主成分分析 # ------------------------ n_dims < - 2 #运行PCA #(注意:没有扩展,因为asinh-transformed维度已经可比)out_PCA < - prcomp (d_sub,中心= TRUE,规模。= FALSE) dims_PCA <- out_PCA$x[, seq_len(n_dms)] colnames(dims_PCA) <- c("PC_1", "PC_2") head(dims_PCA)
## pc_1 pc_2 ## [1,] 1.450702 3.1573053 ## [2,] 2.453109 -0.9381139 ## [3,] -2.705226 0.7090551 ## [4,] 2.718284 -2.2801305 ## [5,] -2.714230 -0.1954170 ## [6,] -3.003650 -0.1938087
stopifnot(nrow(dims_PCA) == length(population)) colnames(dims_PCA) <- c("dimension_x", "dimension_y") dims_PCA <- cbind(as.data.frame(dims_PCA), population, type = "PCA") head(dims_PCA)
##维度_x维度_y人口类型## 1 1.450702 3.1573053未分配PCA ## 2 2.453109 -0.9381139未分配PCA ## 3 -2.705226 0.7090551未分配PCA ## 4 2.718284 -2.2801305未分配PCA ## 5 -2.714230 -0.1954170未分配PCA ## 6 -3.003650 -0.1938087未分配PCA
str (dims_PCA)
## 'data.frame': 1998 obs。4个变量:## $ dimension_x: num 1.45 2.45 -2.71 2.72 -2.71…## $维度_y: num 3.157 -0.938 0.709 -2.28 -0.195…## $ population:因子w/ 15水平“嗜碱性粒细胞”,“CD16-_NK_cells”,..: 15 15 15 15 15 15 15 9 10 10…## $类型:因子w/ 1级别“PCA”:1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1…
#生成d_plot <- dims_PCA str(d_plot)
## 'data.frame': 1998 obs。4个变量:## $ dimension_x: num 1.45 2.45 -2.71 2.72 -2.71…## $维度_y: num 3.157 -0.938 0.709 -2.28 -0.195…## $ population:因子w/ 15水平“嗜碱性粒细胞”,“CD16-_NK_cells”,..: 15 15 15 15 15 15 15 9 10 10…## $类型:因子w/ 1级别“PCA”:1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1…
Colors <- c(rainbow(14), "gray75") ggplot(d_plot, aes(x = dimension_x, y = dimension_y, color = population)) + facet_wrap(~ type, scales = "free") + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = Colors) + labs(x = "dimension x", y = "dimension y") + theme_bw() + theme(aspect.)Ratio = 1, legend.key.height =单位(4,"mm"))
# ------------------------- # 降维:tSNE # ------------------------- # 运行Rtsne set.seed (123) out_Rtsne < - Rtsne (as.matrix (d_sub) = n_dims) dim dims_Rtsne < - $ Y out_Rtsne colnames (dims_Rtsne) < - c(“tSNE_1”、“tSNE_2”)头(dims_Rtsne)
## tSNE_1 tSNE_2 ## [1,] 21.317627 -9.102072 ## [2,] -6.457918 21.639648 ## [3,] -4.640114 -21.309045 ## [4,] -5.916015 26.265756 ## [5,] -18.297775 -15.155949 ## [6,] -5.113306 -13.962952
stopifnot(nrow(dims_Rtsne) == length(population)) colnames(dims_Rtsne) <- c("dimension_x", "dimension_y") dims_Rtsne <- cbind(as.data.frame(dims_Rtsne), population, type = "tSNE") head(dims_Rtsne)
##维度_x维度_y人口类型## 1 21.317627 -9.102072未分配tSNE ## 2 -6.457918 21.639648未分配tSNE ## 3 -4.640114 -21.309045未分配tSNE ## 4 -5.916015 26.265756未分配tSNE ## 5 -18.297775 -15.155949未分配tSNE ## 6 -5.113306 -13.962952未分配tSNE
str (dims_Rtsne)
## 'data.frame': 1998 obs。4个变量:## $ dimension_x: num 21.32 -6.46 -4.64 -5.92 -18.3…# # $ dimension_y: num -9.1 21.6 -21.3 26.3 -15.2……## $ population:因子w/ 15水平“嗜碱性粒细胞”,“CD16-_NK_cells”,..: 15 15 15 15 15 15 15 9 10 10…## $类型:因子w/ 1级别“tSNE”:1 1 1 1 1 1 1 1 1 1 1 1 1 1 1…
# generate plot d_plot <- dims_rtssne ggplot(d_plot, aes(x = dimension_x, y = dimension_y, color = population)) + facet_wrap(~ type, scales = "free") + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + labs(x = "dimension x", y = "dimension y") + theme_bw() + theme(aspect.)Ratio = 1, legend.key.height =单位(4,"mm"))
# ------------------------- # 降维:UMAP # ------------------------- # 运行umap set.seed (123) out_umap < - umap (d_sub) dims_umap < - out_umap布局colnames美元(dims_umap) < - c(“UMAP_1”、“UMAP_2”)头(dims_umap)
## umap_1 umap_2 ## [1,] -6.6991260 6.304541 ## [3,] 0.9351728 -6.959218 ## [4,] -6.2004201 6.935849 ## [5,] -1.5881799 -7.895300 ## [6,] -0.3589191 -6.045946
stopifnot(nrow(dims_umap) == length(population)) colnames(dims_umap) <- c("dimension_x", "dimension_y") dims_umap <- cbind(as.data.frame(dims_umap), population, type = "UMAP") head(dims_umap)
## 1 7.4298928 3.545640未分配UMAP ## 2 -6.6991260 6.304541未分配UMAP ## 3 0.9351728 -6.959218未分配UMAP ## 4 -6.2004201 6.935849未分配UMAP ## 5 -1.5881799 -7.895300未分配UMAP ## 6 -0.3589191 -6.045946未分配UMAP
str (dims_umap)
## 'data.frame': 1998 obs。4个变量:## $ dimension_x: num 7.43 -6.699 0.935 -6.2 -1.588…## $维度_y: num 3.55 6.3 -6.96 6.94 -7.9…## $ population:因子w/ 15水平“嗜碱性粒细胞”,“CD16-_NK_cells”,..: 15 15 15 15 15 15 15 9 10 10…## $类型:因子w/ 1级“UMAP”:1 1 1 1 1 1 1 1 1 1 1 1 1 1…
# generate plot d_plot <- dims_umap ggplot(d_plot, aes(x = dimension_x, y = dimension_y, color = population)) + facet_wrap(~ type, scales = "free") + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + labs(x = "dimension x", y = "dimension y") + theme_bw() + theme(aspect.)Ratio = 1, legend.key.height =单位(4,"mm"))
我们还可以使用聚类数据集使用我们选择的算法来计算新的聚类,然后通过与ground truth cell population标签进行比较来评估聚类的性能。评估聚类性能的常用指标包括平均F1分数和调整后的Rand指数。
下面,我们使用FlowSOM聚类算法(Van Gassen et al. 2015)来计算一个新的聚类Samusik_01
数据集,然后使用ground truth标签计算聚类性能。
注意,在这个小插图中,为了简单起见,我们只计算调整后的兰德指数。这个计算没有考虑到每个集群的单元数,所以可能会被一个或两个大集群所主导。对于基于平均F1分数的更复杂的评估,我们(i)平等地加权聚类(而不是平等地加权单元格),以及(ii)检查多映射种群(即防止多个聚类映射到相同的地面真相种群),请参阅我们之前出版物(Weber和Robinson, 2016)中GitHub存储库中的代码https://github.com/lmweber/cytometry-clustering-comparison.
suppressPackageStartupMessages(library(HDCytoData)) suppressPackageStartupMessages(library(FlowSOM)) suppressPackageStartupMessages(library(flowCore)) suppressPackageStartupMessages(library(mclust)) suppressPackageStartupMessages(library(umap)) suppressPackageStartupMessages(library(ggplot2))
# --------- # 加载数据 # --------- d_SE < - Samusik_01_SE ()
## snapshotDate(): 2019-10-22
##参见?HDCytoData和browseVignettes('HDCytoData')的文档
##从缓存加载
暗(d_SE)
## [1] 86864 51
# ------------- #预处理# ------------- # select 'cell type'标记列用于定义集群d_sub <- assay(d_SE[, colData(d_SE)$ markker_class == "type"]) #提取细胞种群标签population <- rowData(d_SE)$population_id dim(d_sub)
## [1] 86864 39
stopifnot(nrow(d_sub) == length(population)) #使用asinh与cofactor 5转换数据cofactor <- 5 d_sub <- asinh(d_sub / cofactor) #创建flowFrame对象(需要的输入格式为FlowSOM) d_FlowSOM <- flowFrame(d_sub)
# ----------- #运行FlowSOM # ----------- #为重现性设置种子set.seed(123) #运行FlowSOM(元聚类之前的初始步骤)out <- ReadInput(d_FlowSOM, transform = FALSE, scale = FALSE) out <- BuildSOM(out)
##建筑SOM
##映射数据到SOM
out <- BuildMST(out)
##建筑MST
#可选FlowSOM可视化# PlotStars集群()#提取标签(pre meta-clustering)从输出对象labels_pre < -地图映射[1]#美元指定最终数量的集群meta-clustering k < - 40 #运行meta-clustering种子< - 123 < - metaClustering_consensus(地图代码美元,k = k,种子=种子)#提取集群标签从输出对象标签< - [labels_pre] #总结集群大小和数量的集群表(标签)
# # # #标签1 2 3 4 5 6 7 8 9 10 11 12 13 # # 1257 15597 20715 387 5248 3912 287 1499 1035 497 1984 292 322 # # 14 15 16 17 18 19 20 21日22日23日24日25日26 # # 620 469 909 303 105 369 555 542 1603 260 341 698 9767 33 # # 27 28 29 30 31 32 34 35 36 37 38 39 # # 477 5913 757 815 231 721 2876 595 293 1469 434 787 739 40 # # # # 1184
长度(表(标签)
## [1] 40
# ------------------------------- # 集群性能评估 # ------------------------------- # 计算调整兰德指数#注意:此计算对所有单元格的权重相同,这可能不适合某些数据集(见上文)stopifnot(nrow(d_sub) == length(labels)) stopifnot(length(population) == length(labels)) #从聚类评估中删除“未分配的”单元格(但注意这些是为聚类而包含的)ix_unassigned <- population == "unassigned" d_sub_eval <- d_sub[!Ix_unassigned,] population_eval <- population[!]Ix_unassigned] labels_eval <- labels[!]ix_unassigned] stopifnot(nrow(d_sub_eval) == length(labels_eval)) stopifnot(length(population_eval) == length(labels_eval)) #计算调整后的Rand索引adjustedandindex (population_eval, labels_eval)
## [1] 0.8935566
# ------------ #绘图结果# ------------ #子样本单元,在vignette n <- 4000 set.seed(1004) ix <- sample(seq_len(nrow(d_sub)), n) d_sub <- d_sub[ix,] population <- population[ix] labels <- labels[ix] dim(d_sub)
## [1] 4000 39
stopifnot (nrow (d_sub) = =长度(人口)stopifnot (nrow(人口)= =长度(标签))#运行umap set.seed (1234) out_umap < - umap (d_sub) dims_umap < - out_umap布局colnames美元(dims_umap) < - c(“UMAP_1”、“UMAP_2”)stopifnot (nrow (dims_umap) = =长度(人口)stopifnot (nrow(人口)= =长度(标签))d_plot < - cbind (as.data.frame (dims_umap),人口,标签= as.factor(标签),类型=“umap”)#生成块颜色< - c(彩虹(24),“gray75”)ggplot (d_plot, aes (x = UMAP_1,y = UMAP_2, color = population)) + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + ggtitle("Ground truth population labels") + theme_bw() + theme(aspect。Ratio = 1, legend.key.height =单位(4,"mm"))
ggplot(d_plot, aes(x = UMAP_1, y = UMAP_2, color = labels)) + geom_point(size = 0.7, alpha = 0.5) + ggtitle("FlowSOM集群标签")+ theme_bw() + theme(aspect. properties)Ratio = 1, legend.key.height =单位(4,"mm"))
在本节中,我们使用半模拟差分分析数据集(Weber_AML_sim
而且Weber_BCR_XL_sim
),以演示如何使用diffcyt
包装(Weber et al. 2019)。
有关如何使用的详细信息diffcyt
软件包,请参阅Bioconductor装饰图案.
有关在高维细胞术数据中执行差异发现分析的完整工作流程,包括探索性分析、差异测试和可视化,请参见Nowicka等人(2017,2019)Bioconductor工作流包).
我们执行两组差异分析:测试细胞群的差异丰度(DA)(使用Weber_AML_sim
数据集),并测试细胞群内的差异状态(DS)(使用Weber_BCR_XL_sim
数据集)。在这两种情况下,聚类都是使用“细胞类型”标记来定义的,而对于DS测试,我们还使用额外的“细胞状态”标记来测试聚类内的差异表达。参见我们的论文介绍diffcyt
框架(Weber et al. 2019)了解更多细节。有关显示如何使用ground truth标签(插入单元格)计算性能的扩展评估,请参阅我们之前出版物(Weber et al. 2019)附带的GitHub存储库中的代码,可在https://github.com/lmweber/diffcyt-evaluations.
suppressPackageStartupMessages(库(diffcyt)) suppressPackageStartupMessages(图书馆(SummarizedExperiment))
# --------- # 加载数据 # --------- d_SE < - Weber_AML_sim_main_5pc_SE ()
## snapshotDate(): 2019-10-22
##参见?HDCytoData和browseVignettes('HDCytoData')的文档
##从缓存加载
# --------------- #设置元数据# --------------- #设置列名colnames(d_SE) <- colData(d_SE)$marker_name #将每个样本的输入数据分割为一个矩阵d_input <- split(as.data.frame(assay(d_SE)), rowData(d_SE)$sample_id) #提取样本信息实验者信息<-元数据(d_SE)$实验者信息实验者信息
## 1 healthy H1 healthy_H1 ## 2 healthy H2 healthy_H2 ## 3 healthy H3 healthy_H3 ## 4 healthy H4 healthy_H4 ## 5 healthy H5 healthy_H5 ## 6 CN H1 CN_H1 ## 7 CN H2 CN_H2 ## 8 CN H3 CN_H3 ## 9 CN H4 CN_H4 ## 10 CN H5 CN_H5 ## 11 CBF H1 CBF_H1 ## 12 CBF H2 CBF_H2 ## 13 CBF H3 CBF_H3 ## 14 CBF H4 CBF_H4 ## 15 CBF H5 CBF_H5
#提取标记信息marker_info <- colData(d_SE) marker_info . #
## channel_name markker_name markker_class ## <字符> <字符> <因子> ##时间时间时间时间无## Cell_length Cell_length Cell_length无## DNA1 DNA1(Ir191)Di DNA1无## DNA2 DNA2(Ir193)Di DNA2无## BC1 BC1(Pd104)Di BC1无## ... ... ... ...CD41 CD41(Lu175)Di CD41 type ##生命力生命力(Pt195)Di生命力无## file_number file_number file_number无## event_number event_number无## barcode barcode无
# ----------------------------------- # 微分丰富(DA)测试 # ----------------------------------- # 创建设计矩阵设计< - createDesignMatrix (experiment_info cols_design = c(“group_id”、“patient_id”))的设计
# #(拦截)group_idCN group_idCBF patient_idH2 patient_idH3 patient_idH4 # # 1 1 0 0 0 0 0 # # 2 1 0 0 1 0 0 # # 3 1 0 0 0 1 0 # # 4 1 0 0 0 0 1 # # 5 1 0 0 0 0 0 # # 6 1 1 0 0 0 0 # # 7 1 1 0 1 0 0 # # 8 1 1 0 0 1 0 # # 9 1 1 0 0 0 1 # # 10 1 1 0 0 0 0 # # 11 1 0 1 0 0 0 # # 12 1 0 1 1 0 0 # # 13 1 0 1 0 1 0 # # 14 1 0 1 0 0 1 # # 15 1 0 1 0 0 0 # # patient_idH5 # # 1 # # 2 0 # # 3 0 0 # # 4 # 5 # 1 # 6 # 0 # # 7 0 # # 8 0 0 10 # # 1 # # 9 # # 11 # # 12 0 # # 13 0 14 0 # # 15 # # 1 # # attr(“转让”),# # [1]0 1 1 2 222## attr(,"contrasts") ## attr(,"contrasts")$group_id ## [1] "contr.treatment" ## ## attr(,"contrasts")$patient_id ## [1] "contr.treatment"
#创建对比矩阵#注:测试条件CN与健康对比<- createContrast(c(0, 1, 0, 0, 0, 0, 0))对比
# # # # (1) [1] 0 # # [2] 1 0 # # # # (3) [4] 0 # # [5] 0 # # [6] 0 # # [7] 0
#测试集群差异丰度(DA) out_DA <- diffcyt(d_input, experiment_info, markker_info, design = design, contrast = contrast, analysis_type = "DA", seed_clustering = 1234)
##准备数据…
##转换数据…
##生成集群…
## FlowSOM集群在8.6秒内完成
##计算功能…
##使用方法“diffcyt-DA-edgeR”计算DA测试…
topTable(out_DA, format_vals = TRUE)
## 20行3列的数据帧## cluster_id p_val p_adj ## <因子> <数字> <数字> ## 2 2 7.87e-268 7.8e-266 ## 97 97 0.000917 0.0454 ## 84 84 0.00387 0.128 ## 25 25 0.00831 0.206 ## 17 17 0.0994 0.651 ## # ... ... ... ...## 98 98 0.13 0.759 ## 7 7 0.237 0.904 ## 28 28 0.236 0.904 ## 32 32 0.205 0.904
# --------- # 加载数据 # --------- d_SE < - Weber_BCR_XL_sim_main_SE ()
## snapshotDate(): 2019-10-22
##参见?HDCytoData和browseVignettes('HDCytoData')的文档
##从缓存加载
# --------------- #设置元数据# --------------- #设置列名colnames(d_SE) <- colData(d_SE)$marker_name #将每个样本的输入数据分割为一个矩阵d_input <- split(as.data.frame(assay(d_SE)), rowData(d_SE)$sample_id) #提取样本信息实验者信息<-元数据(d_SE)$实验者信息实验者信息
## group_id patient_id sample_id ## 1 base patient1 patient1_base ## 2 base patient2 patient2 patient2_base ## 3 base patient3 patient3_base ## 4 base patient4 patient4_base ## 5 base patient4 patient4_base ## 6 base patient6 patient6_base ## 7 base patient7 patient7_base ## 8 base patient8 patient8_base ## 9 spike patient1 patient1_spike ## 10 spike patient2 patient2_spike ## 11 spike patient3 patient3_spike ## 12 spike patient4 patient5_spike ## 14 spike patient615 spike patient7 patient7_spike ## 16 spike patient8 patient8_spike
#提取标记信息marker_info <- colData(d_SE) marker_info . #
## channel_name markker_name markker_class ## <字符> <字符> <因子> ##时间时间时间时间无## Cell_length Cell_length Cell_length无## CD3 CD3(110:114)Dd CD3类型## CD45 CD45(In115)Dd CD45类型## BC1 BC1(La139)Dd BC1无## ... ... ... ...## HLA-DR HLA-DR(Yb174)Dd HLA-DR类型## BC7 BC7(Lu175)Dd BC7无## CD7 CD7(Yb176)Dd CD7类型## DNA-1 DNA-1(Ir191)Dd DNA-1无## DNA-2 DNA-2(Ir193)Dd DNA-2无
# ------------------------------- # 微分状态(DS)测试 # ------------------------------- # 创建设计矩阵设计< - createDesignMatrix (experiment_info cols_design = c(“group_id”、“patient_id”))的设计
# #(拦截)group_idspike patient_idpatient2 patient_idpatient3 # # 1 1 0 0 0 # # 2 1 0 1 0 # # 3 1 0 0 1 # # 4 1 0 0 0 # # 5 1 0 0 0 # # 6 1 0 0 0 # # 7 1 0 0 0 # # 8 1 0 0 0 # # 9 1 1 0 0 # # 10 1 1 1 0 # # 11 1 1 0 1 # # 12 1 1 0 0 # # 13 1 1 0 0 # # 14 1 1 0 0 # # 15 1 1 0 0 # # 16 1 1 0 0 # # patient_idpatient4 patient_idpatient5 patient_idpatient6 patient_idpatient7 # # 1 0 0 0 0 # # 2 0 0 0 0 # # 3 0 0 0 0 # # 4 1 1 0 0 0 # # 5 0 0 0 # # 6 0 0 1 0 7 0 0 0 # # 1 # # 8 0 0 0 0 # # 9 0 0 0 0 # # 10 0 0 0 0 # # 11 00 0 0 # # 12 1 0 0 0 # # 13 0 1 0 0 # # 14 15 0 0 0 0 0 1 0 # # 1 # # 16 0 0 0 0 0 # # patient_idpatient8 # # 1 # # 2 0 0 # # 3 # # 4 0 # # 5 0 0 # # 6 # 7 # 8 # # 1 # # 9 0 # # 10 0 # # 11 # # 12 0 # # 13 0 # # 14 0 15 0 # # 16 # # 1 # # attr(“转让”),# # [1]0 1 2 2 2 2 2 2 2 # # attr(“对比”)# # attr(“对比”)group_id # #美元[1]“contr.treatment”# # # # attr(“对比”)patient_id # #美元[1]“contr.treatment”
#创建对比矩阵#注:测试条件峰值对比基础对比<- createContrast(c(0,1,0,0,0,0,0,0))对比
# # # # (1) [1] 0 # # [2] 1 0 # # # # (3) [4] 0 # # [5] 0 # # [6] 0 # # [7] 0 # # [8] 0 # # [9] 0
#测试集群差异丰富度(DA) out_DS <- diffcyt(d_input, experiment_info, markker_info, design = design, contrast = contrast, analysis_type = "DS", seed_clustering = 1234)
##准备数据…
##转换数据…
##生成集群…
## FlowSOM集群在3.4秒内完成
##计算功能…
使用“diffcyt-DS-limma”方法计算DS测试…
##警告:28个探头的部分NA系数
topTable(out_DS, format_vals = TRUE)
##数据帧与20行4列## cluster_id marker_id p_val p_adj ## <因子> <因子> <数值> <数值> ## 89 89 pS6 6.45e-15 8.85e-12 ## 90 90 pS6 5.23e-13 3.59e-10 ## 80 80 pS6 7.71e-12 3.53e-09 ## 80 80 pPlcg2 3.78e-11 1.3e-08 ## 99 99 pS6 1.06e-10 2.92e-08 ## # ... ... ... ... ...## 80 80 pAkt 1.8e-06 0.000154 ## 80 80 pNFkB 1.94e-06 0.000156 ## 99 99 pErk 2.46e-06 0.000187 ## 89 89 pErk 3.28e-06 0.000237 ## 89 89 pAkt 5.23e-06 0.000359