内容

1介绍

STexampleData包包含空间解析转录组学(SRT)数据集的集合,这些数据集已格式化为SpatialExperimentBioconductor类,用于示例,演示和教程。这些数据集来自几个不同的SRT平台,并来自各种公开来源。一些数据集包括图像和/或真实标注标签。

2安装

要安装STexampleDataBioconductor提供的包装:

install.packages(“BiocManager”)BiocManager::安装(“STexampleData”)

或者,最新版本也可以从GitHub安装:

install.packages("remotes") remotes::install_github("lmweber/STexampleData", build_vignettes = TRUE)

3.数据集

该软件包包含以下数据集:

4加载数据

下面的示例展示了如何加载示例数据集SpatialExperiment对象在R会话中。

加载数据集有两种选择:使用命名访问器函数或通过查询ExperimentHub数据库。

4.1使用命名访问器加载

库(SpatialExperiment)库(STexampleData)

以下4.4.1Visium_humanDLPFC

#加载对象spe <- Visium_humanDLPFC() #检查对象spe
## class: SpatialExperiment ## dim: 33538 4992 ## metadata(0): ## assays(1): counts ## rownames(33538): ENSG00000243485 ENSG00000237613…ENSG00000277475 ## ENSG00000268674 ## rowData names(3): gene_id gene_name feature_type ## colnames(4992): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1…## TTGTTTGTGTAAATTC-1 ## colData names(7): barcode_id sample_id…ground_truth cell_count ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords names(2): pxl_col_in_fullres pxl_row_in_fullres ## imgData names(4): sample_id image_id data scaleFactor . 0
暗(spe)
## [1] 33538 4992
assayNames (spe)
##[1]“计数”
rowData (spe)
##数据帧包含33538行和3列## gene_id gene_name feature_type ##    ## ENSG00000243485 ENSG00000243485 MIR1302-2HG基因表达## ENSG00000237613 ENSG00000237613 FAM138A基因表达## ENSG00000186092 ensg00000236092 OR4F5基因表达## ENSG00000238009 ENSG00000239945 ENSG00000239945 AL627309.3基因表达## ... ... ... ...## ENSG00000277856 AC233755.2基因表达## ENSG00000275063 ENSG00000275063 AC233755.1基因表达## ENSG00000271254 ENSG00000271254 AC240274.1基因表达## ENSG00000277475 ENSG00000277475 AC213203.1基因表达## ENSG00000268674 ENSG00000268674 FAM231C基因表达
colData (spe)
## DataFrame包含4992行和7列## barcode_id sample_id in_tissue array_row ##     ## AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1 AAACAAGTATCTCCCA-1 sample_151673 1 50 ## AAACAATCTACTAGCA-1 AAACAATCTACTAGCA-1 sample_151673 1 59 ## AAACAGAGCGACTCCT-1 AAACAGAGCGACTCCT-1 sample_151673 1 14 ## ... ... ... ... ...1 # # TTGTTTCACATCCAGG-1 TTGTTTCACATCCAGG-1 sample_151673 58 # # TTGTTTCATTAGTCTA-1 TTGTTTCATTAGTCTA-1 sample_151673 1 60 # # TTGTTTCCATACAACT-1 TTGTTTCCATACAACT-1 sample_151673 1 45 # # TTGTTTGTATTACACG-1 TTGTTTGTATTACACG-1 sample_151673 73 # # TTGTTTGTGTAAATTC-1 TTGTTTGTGTAAATTC-1 sample_151673 1 7 # # array_col ground_truth cell_count # # <整数> <人物> <整数> # # AAACAACGAATAGTTC-1 16 NA NA # # AAACAAGTATCTCCCA-1 102 Layer3 6 # # 16 # # AAACACCAATAACTGC-1 19 AAACAATCTACTAGCA-1 43 Layer1 WM5 # # AAACAGAGCGACTCCT-1 94 Layer3 2  ## ... ... ... ...## ttgtttcatccagg -1 42 WM 3 ## ttgtttcattattcta -1 30 WM 4 ## ttgtttccatacata -1 27 Layer6 3 ## ttgtttttttttaacacg -1 41 WM 16 ## ttgtttttgtgtaaattc -1 51 Layer2 5
头(spatialCoords (spe))
## aaacaagatattgc -1 4068 9505 ## AAACAGAGCGACTCCT-1 9271 4151 ## AAACAGCTTTCAGAAG-1 3393 7583
imgData (spe)
## sample_id image_id数据scaleFactor ##     ## 1 sample_151673 lowres #### 0.0450045 ## 2 sample_151673 hires #### 0.1500150

4.1.2Visium_mouseCoronal

#加载对象spe <- Visium_mouseCoronal() #检查对象spe
## class: SpatialExperiment ## dim: 32285 4992 ## metadata(0): ## assays(1): counts ## rownames(32285): ENSMUSG00000051951 ENSMUSG00000089699…## ENSMUSG00000095019 ENSMUSG00000095041 ## rowData names(3): gene_id gene_name feature_type ## colnames(4992): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1…## ttgtttgtaaattc -1 ## colData名称(5):barcode_id sample_id in_tissue array_row array_col ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords名称(2):pxl_col_in_fullres pxl_row_in_fullres ## imgData名称(4):sample_id image_id data scaleFactor . 1

4.1.3seqFISH_mouseEmbryo

#加载对象spe <- seqFISH_mouseEmbryo() #检查对象spe
## class: SpatialExperiment ## dim: 351 11026 ## metadata(0): ## assays(2): counts molecules ## rownames(351): Abcc4 Acp5…Zfp57 Zic3 ## rowData names(1): gene_name ## colnames(11026): embryo1_Pos0_cell10_z2 embryo1_Pos0_cell100_z2…## embryo1_Pos28_cell97_z2 ## colData names(14): cell_id embryo…segmentation_vertices sample_id ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords names(2): x y ## imgData names(0):

4.1.4ST_mouseOB

#加载对象spe <- ST_mouseOB() #检查对象spe
## class: SpatialExperiment ## dim: 15928 262 ## metadata(0): ## assays(1): counts ## rownames(15928): 0610007N19Rik 0610007P14Rik…Zzef1 Zzz3 ## rowData names(1): gene_name ## colnames(262): ACAACTATGGGTTGGCGG ACACAGATCCTGTTCTGA…## TTTCAACCCGAGGAAGTC TTTCTAACTCATAAGGAT ## colData names(3): barcode_id sample_id layer ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords names(2): x y ## imgData names(0):

4.1.5SlideSeqV2_mouseHPC

#加载对象spe <- SlideSeqV2_mouseHPC() #检查对象spe
## class: SpatialExperiment ## dim: 23264 53208 ## metadata(0): ## assays(1): counts ## rownames(23264): 0610005C13Rik 0610007P14Rik…n-R5s40 n-R5s95 ## rowData names(1): gene_name ## colnames(53208): AACGTCATAATCGT TACTTTAGCGCAGT…GACTTTTCTTAAAG ## GTCAATAAAGGGCG ## colData names(3): barcode_id sample_id celltype ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords names(2): xcoord ycoord ## imgData names(0):

4.2通过查询ExperimentHub数据库加载

库(ExperimentHub)
#查询STexampleData数据集myfiles <- query(eh, "STexampleData") myfiles
## ExperimentHub与5条记录## # snapshotDate(): 2022-10-24 ## # $数据提供者:NA ## ## $物种:小家鼠,智人## # $rdataclass: SpatialExperiment ## #附加mcols():taxonomyid, genome, description, ## # coordinate_1_based, maintainer, rdatadateadded, prepareclass, tags, ## # rdatapath, sourceurl, sourcetype ## #检索记录,例如,'object[[[“EH7538”]]' ## ## title ## EH7538 | Visium_humanDLPFC ## EH7539 | Visium_mouseCoronal ## EH7540 | seqFISH_mouseEmbryo ## EH7541 | ST_mouseOB ## EH7542 | SlideSeqV2_mouseHPC
# metadata md <- as.data.frame(mcols(myfiles))
#加载Visium_humanDLPFC数据集使用ExperimentHub查询spe <- myfiles[[1]] spe
## class: SpatialExperiment ## dim: 33538 4992 ## metadata(0): ## assays(1): counts ## rownames(33538): ENSG00000243485 ENSG00000237613…ENSG00000277475 ## ENSG00000268674 ## rowData names(3): gene_id gene_name feature_type ## colnames(4992): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1…## TTGTTTGTGTAAATTC-1 ## colData names(7): barcode_id sample_id…ground_truth cell_count ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords names(2): pxl_col_in_fullres pxl_row_in_fullres ## imgData names(4): sample_id image_id data scaleFactor . 0
#使用ExperimentHub ID加载Visium_humanDLPFC数据集spe <- myfiles[["EH7538"]] spe
## class: SpatialExperiment ## dim: 33538 4992 ## metadata(0): ## assays(1): counts ## rownames(33538): ENSG00000243485 ENSG00000237613…ENSG00000277475 ## ENSG00000268674 ## rowData names(3): gene_id gene_name feature_type ## colnames(4992): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1…## TTGTTTGTGTAAATTC-1 ## colData names(7): barcode_id sample_id…ground_truth cell_count ## reducedDimNames(0): ## mainExpName: NULL ## altExpNames(0): ## spatialCoords names(2): pxl_col_in_fullres pxl_row_in_fullres ## imgData names(4): sample_id image_id data scaleFactor . 0

5从原始数据文件生成对象

作为参考,我们包含了代码脚本来生成SpatialExperiment对象从原始数据文件。

这些脚本保存在/ /本月/脚本的源代码STexampleData包中。这些脚本包括对每个数据集的原始数据源的数据文件的引用和链接。

6会话信息

sessionInfo ()
## R开发中(不稳定)(2022-10-25 r83175) ##平台:x86_64-pc-linux-gnu(64位)##运行在:Ubuntu 22.04.1 LTS ## ##矩阵产品:默认## BLAS: /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas。因此## LAPACK: /usr/lib/x86_64-linux-gnu/ LAPACK /liblapack.so.3.10.0 ## ## locale: ## [1] LC_CTYPE=en_US。UTF-8 LC_NUMERIC= c# # [3] LC_TIME=en_GB LC_COLLATE= c# # [5] LC_MONETARY=en_US。utf - 8 LC_MESSAGES = en_US。UTF-8 ## [7] LC_PAPER=en_US。UTF-8 LC_NAME=C ## [9] LC_ADDRESS=C LC_TELEPHONE=C ## [11] LC_MEASUREMENT=en_US。UTF-8 LC_IDENTIFICATION=C ## ##附加的基本包:## [1]stats4 stats graphics grDevices utils数据集方法## [8]base ## ##其他附加包:# # # # [1] BumpyMatrix_1.7.0 STexampleData_1.7.0 [3] ExperimentHub_2.7.0 AnnotationHub_3.7.0 # # [5] BiocFileCache_2.7.0 dbplyr_2.2.1 # # [7] SpatialExperiment_1.9.0 SingleCellExperiment_1.21.0 # # [9] SummarizedExperiment_1.29.0 Biobase_2.59.0 # # [11] GenomicRanges_1.51.0 GenomeInfoDb_1.35.0 # # [13] IRanges_2.33.0 S4Vectors_0.37.0 # # [15] BiocGenerics_0.45.0 MatrixGenerics_1.11.0 # # [17] matrixStats_0.62.0 BiocStyle_2.27.0 # # # #通过加载一个名称空间(而不是附加):# # # # [1] DBI_1.1.3 bitops_1.0-7 [3] rlang_1.0.6 magrittr_2.0.3 # # [5] compiler_4.3.0 RSQLite_2.2.18 # # [7] DelayedMatrixStats_1.21.0 png_0.1-7 # # [9] vctrs_0.5.0 stringr_1.4.1 # # [11] crayon_1.5.2 pkgconfig_2.0.3 # # [13] fastmap_1.1.0 magick_2.7.3 # # [15] XVector_0.39.0 ellipsis_0.3.2 # # [17] scuttle_1.9.0 utf8_1.2.2 # # [19] promises_1.2.0.1 rmarkdown_2.17 # # [21] purrr_0.3.5 bit_4.0.4 # # [23] xfun_0.34 zlibbioc_1.45.0 # # [25] cachem_1.0.6 beachmat_2.15.0 # # [27] jsonlite_1.8.3 blob_1.2.3 # #[29] later_1.3.0 rhdf5filters_1.11.0 ## [31] delayedarray_1 .25.0 Rhdf5lib_1.21.0 ## [33] BiocParallel_1.33.0 interactiveDisplayBase_1.37.0 ## [35] parallel_1 .3.0 r6_1 .5.1 ## [37] bsli_0 .4.0 string_1 .7.8 ## [39] limma_3.55.0 jquerylib_0.1.4 ## [41] Rcpp_1.0.9 assertthat_1 .2.1 knitr_1.40 ## [45] R.utils_2.12.1 httpuv_1.6.6 ## [47] Matrix_1.5-1 tidyselect_1.2.0 ## [49] curl_4.3.3 codetools_0.20 -45 ## [53] tibble_3.1.8 withr_2.5.0 ## [55]shiny_1.7.3 KEGGREST_1.39.0 # # [57] evaluate_0.17 Biostrings_2.67.0 # # [59] pillar_1.8.1 BiocManager_1.30.19 # # [61] filelock_1.0.2 generics_0.1.3 # # [63] rcurl_1.98 - 1.9 BiocVersion_3.17.0 # # [65] sparseMatrixStats_1.11.0 xtable_1.8-4 # # [67] glue_1.6.2 tools_4.3.0 # # [69] locfit_1.5 - 9.6 rhdf5_2.43.0 # # [71] grid_4.3.0 DropletUtils_1.19.0 # # [73] AnnotationDbi_1.61.0 edgeR_3.41.0 # # [75] GenomeInfoDbData_1.2.9 HDF5Array_1.27.0 # # [77] cli_3.4.1 rappdirs_0.3.3 # # [79] fansi_1.0.3 dplyr_1.0.10 # #[81] R.methodsS3_1.8.2 sass_1 .4.2 ## [83] digest_1 .6.30 dqrng_1 .3.0 ## [85] rjson_1 .2.21 memoise_2.0.1 ## [87] htmltools_0.5.3 R.oo_1.25.0 ## [89] lifecycle_1.0.3 httr_1.4.4 ## [91] mime_1 .12 bit64_4.0.5