R/sampleSizeCal.R
runSampleCal.Rd
Run sample size calculation for pilot data for reference dataset
runSampleCal( exprsMat, cellTypes, n_list = c(20, 40, 60, 80, 100, seq(200, 500, 100)), num_repeat = 20, level = NULL, cellType_tree = NULL, BPPARAM = BiocParallel::SerialParam(), subset_test = FALSE, num_test = NULL, ... )
exprsMat | A matrix of expression matrix of pilot dataset (log-transformed, or normalised) |
---|---|
cellTypes | A vector of cell types of pilot dataset |
n_list | A vector of integer indicates the sample size to run. |
num_repeat | An integer indicates the number of run for each sample size will be repeated. |
level | An integer indicates the accuracy rate is calculate based on the n-th level from top of cell type tree. If it is NULL (by default), it will be the bottom of the cell type tree. It can not be larger than the total number of levels of the tree. |
cellType_tree | A list indicates the cell type tree (optional), if it is NULL, the accuracy rate is calculate based on the provided cellTypes. |
BPPARAM | A |
subset_test | A ogical input indicates whether we used a subset of data (fixed number for each sample size) to test instead of all remaining data. By default, it is FALSE. |
num_test | An integer indicates the size of the test data. |
... | other parameter from scClassify |
A matrix of accuracy matrix, where columns corresponding to different sample sizes, rows corresponding to the number of repetation.
data("scClassify_example") xin_cellTypes <- scClassify_example$xin_cellTypes exprsMat_xin_subset <- scClassify_example$exprsMat_xin_subset exprsMat_xin_subset <- as(exprsMat_xin_subset, "dgCMatrix") set.seed(2019) accMat <- runSampleCal(exprsMat_xin_subset, xin_cellTypes, n_list = seq(20, 100, 20), num_repeat = 5, BPPARAM = BiocParallel::SerialParam())#> [1] "n= 20" #> cellTypes_train #> alpha beta delta gamma #> 8 8 3 3#>#> cellTypes_train #> alpha beta delta gamma #> 8 8 3 3#>#>#> cellTypes_train #> alpha beta delta gamma #> 8 8 3 3 #> cellTypes_train #> alpha beta delta gamma #> 8 8 3 3 #> cellTypes_train #> alpha beta delta gamma #> 8 8 3 3#>#> [,1] [,2] [,3] [,4] [,5] #> correct 0.44631902 0.000000000 0.966257669 0.79294479 0.4079755 #> correctly unassigned 0.00000000 0.000000000 0.000000000 0.00000000 0.0000000 #> intermediate 0.51993865 0.957055215 0.015337423 0.01073620 0.3987730 #> incorrectly unassigned 0.02147239 0.038343558 0.010736196 0.11349693 0.1932515 #> error assigned 0.00000000 0.000000000 0.000000000 0.00000000 0.0000000 #> misclassified 0.01226994 0.004601227 0.007668712 0.08282209 0.0000000 #> [1] "n= 40" #> cellTypes_train #> alpha beta delta gamma #> 17 15 3 5 #> cellTypes_train #> alpha beta delta gamma #> 17 15 3 5 #> cellTypes_train #> alpha beta delta gamma #> 17 15 3 5 #> cellTypes_train #> alpha beta delta gamma #> 17 15 3 5 #> cellTypes_train #> alpha beta delta gamma #> 17 15 3 5 #> [,1] [,2] [,3] [,4] #> correct 0.878548896 0.971608833 0.83596215 0.80757098 #> correctly unassigned 0.000000000 0.000000000 0.00000000 0.00000000 #> intermediate 0.031545741 0.011041009 0.00000000 0.00000000 #> incorrectly unassigned 0.088328076 0.007886435 0.15299685 0.17192429 #> error assigned 0.000000000 0.000000000 0.00000000 0.00000000 #> misclassified 0.001577287 0.009463722 0.01104101 0.02050473 #> [,5] #> correct 0.766561514 #> correctly unassigned 0.000000000 #> intermediate 0.009463722 #> incorrectly unassigned 0.171924290 #> error assigned 0.000000000 #> misclassified 0.052050473 #> [1] "n= 60" #> cellTypes_train #> alpha beta delta gamma #> 25 23 4 7 #> cellTypes_train #> alpha beta delta gamma #> 25 23 4 7 #> cellTypes_train #> alpha beta delta gamma #> 25 23 4 7 #> cellTypes_train #> alpha beta delta gamma #> 25 23 4 7 #> cellTypes_train #> alpha beta delta gamma #> 25 23 4 7 #> [,1] [,2] [,3] [,4] #> correct 0.865040650 0.910569106 0.902439024 0.884552846 #> correctly unassigned 0.000000000 0.000000000 0.000000000 0.000000000 #> intermediate 0.000000000 0.000000000 0.001626016 0.001626016 #> incorrectly unassigned 0.125203252 0.081300813 0.082926829 0.108943089 #> error assigned 0.000000000 0.000000000 0.000000000 0.000000000 #> misclassified 0.009756098 0.008130081 0.013008130 0.004878049 #> [,5] #> correct 0.845528455 #> correctly unassigned 0.000000000 #> intermediate 0.001626016 #> incorrectly unassigned 0.143089431 #> error assigned 0.000000000 #> misclassified 0.009756098 #> [1] "n= 80" #> cellTypes_train #> alpha beta delta gamma #> 34 31 6 9 #> cellTypes_train #> alpha beta delta gamma #> 34 31 6 9 #> cellTypes_train #> alpha beta delta gamma #> 34 31 6 9 #> cellTypes_train #> alpha beta delta gamma #> 34 31 6 9 #> cellTypes_train #> alpha beta delta gamma #> 34 31 6 9 #> [,1] [,2] [,3] [,4] #> correct 0.937710438 0.877104377 0.91919192 0.877104377 #> correctly unassigned 0.000000000 0.000000000 0.00000000 0.000000000 #> intermediate 0.003367003 0.000000000 0.00000000 0.000000000 #> incorrectly unassigned 0.052188552 0.114478114 0.07070707 0.116161616 #> error assigned 0.000000000 0.000000000 0.00000000 0.000000000 #> misclassified 0.006734007 0.008417508 0.01010101 0.006734007 #> [,5] #> correct 0.873737374 #> correctly unassigned 0.000000000 #> intermediate 0.001683502 #> incorrectly unassigned 0.114478114 #> error assigned 0.000000000 #> misclassified 0.010101010 #> [1] "n= 100" #> cellTypes_train #> alpha beta delta gamma #> 42 39 7 12 #> cellTypes_train #> alpha beta delta gamma #> 42 39 7 12 #> cellTypes_train #> alpha beta delta gamma #> 42 39 7 12 #> cellTypes_train #> alpha beta delta gamma #> 42 39 7 12 #> cellTypes_train #> alpha beta delta gamma #> 42 39 7 12 #> [,1] [,2] [,3] [,4] [,5] #> correct 0.89198606 0.91463415 0.951219512 0.979094077 0.89198606 #> correctly unassigned 0.00000000 0.00000000 0.000000000 0.000000000 0.00000000 #> intermediate 0.00000000 0.00000000 0.001742160 0.003484321 0.00000000 #> incorrectly unassigned 0.09233449 0.07491289 0.040069686 0.010452962 0.09756098 #> error assigned 0.00000000 0.00000000 0.000000000 0.000000000 0.00000000 #> misclassified 0.01567944 0.01045296 0.006968641 0.006968641 0.01045296