Run sample size calculation for pilot data for reference dataset

runSampleCal(
  exprsMat,
  cellTypes,
  n_list = c(20, 40, 60, 80, 100, seq(200, 500, 100)),
  num_repeat = 20,
  level = NULL,
  cellType_tree = NULL,
  BPPARAM = BiocParallel::SerialParam(),
  subset_test = FALSE,
  num_test = NULL,
  ...
)

Arguments

exprsMat	A matrix of expression matrix of pilot dataset (log-transformed, or normalised)
cellTypes	A vector of cell types of pilot dataset
n_list	A vector of integer indicates the sample size to run.
num_repeat	An integer indicates the number of run for each sample size will be repeated.
level	An integer indicates the accuracy rate is calculate based on the n-th level from top of cell type tree. If it is NULL (by default), it will be the bottom of the cell type tree. It can not be larger than the total number of levels of the tree.
cellType_tree	A list indicates the cell type tree (optional), if it is NULL, the accuracy rate is calculate based on the provided cellTypes.
BPPARAM	A `BiocParallelParam` class object from the `BiocParallel` package is used. Default is SerialParam().
subset_test	A ogical input indicates whether we used a subset of data (fixed number for each sample size) to test instead of all remaining data. By default, it is FALSE.
num_test	An integer indicates the size of the test data.
...	other parameter from scClassify

Value

A matrix of accuracy matrix, where columns corresponding to different sample sizes, rows corresponding to the number of repetation.

Examples

data("scClassify_example")
xin_cellTypes <- scClassify_example$xin_cellTypes
exprsMat_xin_subset <- scClassify_example$exprsMat_xin_subset

exprsMat_xin_subset <- as(exprsMat_xin_subset, "dgCMatrix")
set.seed(2019)
accMat <- runSampleCal(exprsMat_xin_subset,
xin_cellTypes,
n_list = seq(20, 100, 20),
num_repeat = 5, BPPARAM = BiocParallel::SerialParam())
#> [1] "n= 20"
#> cellTypes_train
#> alpha  beta delta gamma 
#>     8     8     3     3 
#> There are only 4 selected genes in reference data expressed in query data
#> cellTypes_train
#> alpha  beta delta gamma 
#>     8     8     3     3 
#> There are only 4 selected genes in reference data expressed in query data
#> There are only 4 selected genes in reference data expressed in query data
#> cellTypes_train
#> alpha  beta delta gamma 
#>     8     8     3     3 
#> cellTypes_train
#> alpha  beta delta gamma 
#>     8     8     3     3 
#> cellTypes_train
#> alpha  beta delta gamma 
#>     8     8     3     3 
#> There are only 3 selected genes in reference data expressed in query data
#>                              [,1]        [,2]        [,3]       [,4]      [,5]
#> correct                0.44631902 0.000000000 0.966257669 0.79294479 0.4079755
#> correctly unassigned   0.00000000 0.000000000 0.000000000 0.00000000 0.0000000
#> intermediate           0.51993865 0.957055215 0.015337423 0.01073620 0.3987730
#> incorrectly unassigned 0.02147239 0.038343558 0.010736196 0.11349693 0.1932515
#> error assigned         0.00000000 0.000000000 0.000000000 0.00000000 0.0000000
#> misclassified          0.01226994 0.004601227 0.007668712 0.08282209 0.0000000
#> [1] "n= 40"
#> cellTypes_train
#> alpha  beta delta gamma 
#>    17    15     3     5 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    17    15     3     5 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    17    15     3     5 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    17    15     3     5 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    17    15     3     5 
#>                               [,1]        [,2]       [,3]       [,4]
#> correct                0.878548896 0.971608833 0.83596215 0.80757098
#> correctly unassigned   0.000000000 0.000000000 0.00000000 0.00000000
#> intermediate           0.031545741 0.011041009 0.00000000 0.00000000
#> incorrectly unassigned 0.088328076 0.007886435 0.15299685 0.17192429
#> error assigned         0.000000000 0.000000000 0.00000000 0.00000000
#> misclassified          0.001577287 0.009463722 0.01104101 0.02050473
#>                               [,5]
#> correct                0.766561514
#> correctly unassigned   0.000000000
#> intermediate           0.009463722
#> incorrectly unassigned 0.171924290
#> error assigned         0.000000000
#> misclassified          0.052050473
#> [1] "n= 60"
#> cellTypes_train
#> alpha  beta delta gamma 
#>    25    23     4     7 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    25    23     4     7 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    25    23     4     7 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    25    23     4     7 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    25    23     4     7 
#>                               [,1]        [,2]        [,3]        [,4]
#> correct                0.865040650 0.910569106 0.902439024 0.884552846
#> correctly unassigned   0.000000000 0.000000000 0.000000000 0.000000000
#> intermediate           0.000000000 0.000000000 0.001626016 0.001626016
#> incorrectly unassigned 0.125203252 0.081300813 0.082926829 0.108943089
#> error assigned         0.000000000 0.000000000 0.000000000 0.000000000
#> misclassified          0.009756098 0.008130081 0.013008130 0.004878049
#>                               [,5]
#> correct                0.845528455
#> correctly unassigned   0.000000000
#> intermediate           0.001626016
#> incorrectly unassigned 0.143089431
#> error assigned         0.000000000
#> misclassified          0.009756098
#> [1] "n= 80"
#> cellTypes_train
#> alpha  beta delta gamma 
#>    34    31     6     9 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    34    31     6     9 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    34    31     6     9 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    34    31     6     9 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    34    31     6     9 
#>                               [,1]        [,2]       [,3]        [,4]
#> correct                0.937710438 0.877104377 0.91919192 0.877104377
#> correctly unassigned   0.000000000 0.000000000 0.00000000 0.000000000
#> intermediate           0.003367003 0.000000000 0.00000000 0.000000000
#> incorrectly unassigned 0.052188552 0.114478114 0.07070707 0.116161616
#> error assigned         0.000000000 0.000000000 0.00000000 0.000000000
#> misclassified          0.006734007 0.008417508 0.01010101 0.006734007
#>                               [,5]
#> correct                0.873737374
#> correctly unassigned   0.000000000
#> intermediate           0.001683502
#> incorrectly unassigned 0.114478114
#> error assigned         0.000000000
#> misclassified          0.010101010
#> [1] "n= 100"
#> cellTypes_train
#> alpha  beta delta gamma 
#>    42    39     7    12 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    42    39     7    12 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    42    39     7    12 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    42    39     7    12 
#> cellTypes_train
#> alpha  beta delta gamma 
#>    42    39     7    12 
#>                              [,1]       [,2]        [,3]        [,4]       [,5]
#> correct                0.89198606 0.91463415 0.951219512 0.979094077 0.89198606
#> correctly unassigned   0.00000000 0.00000000 0.000000000 0.000000000 0.00000000
#> intermediate           0.00000000 0.00000000 0.001742160 0.003484321 0.00000000
#> incorrectly unassigned 0.09233449 0.07491289 0.040069686 0.010452962 0.09756098
#> error assigned         0.00000000 0.00000000 0.000000000 0.000000000 0.00000000
#> misclassified          0.01567944 0.01045296 0.006968641 0.006968641 0.01045296