Get a sparse summary of the calibration

Usage

getCalibrationSummary(
  prediction,
  predictionType,
  typeColumn = "evaluation",
  numberOfStrata = 10,
  truncateFraction = 0.05
)

Arguments

prediction: A prediction object as generated using the predict functions.
predictionType: The type of prediction (binary or survival)
typeColumn: A column that is used to stratify the results
numberOfStrata: The number of strata in the plot.
truncateFraction: This fraction of probability values will be ignored when plotting, to avoid the x-axis scale being dominated by a few outliers.

Value

A dataframe with the calibration summary

Details

Generates a sparse summary showing the predicted probabilities and the observed fractions. Predictions are stratified into equally sized bins of predicted probabilities.

Examples

# simulate data
data("simulationProfile")
plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
#> Generating covariates
#> Generating cohorts
#> Generating outcomes
# create study population, split into train/test and preprocess with default settings
population <- createStudyPopulation(plpData, outcomeId = 3)
#> outcomeId: 3
#> binary: TRUE
#> includeAllOutcomes: TRUE
#> firstExposureOnly: FALSE
#> washoutPeriod: 0
#> removeSubjectsWithPriorOutcome: TRUE
#> priorOutcomeLookback: 99999
#> requireTimeAtRisk: TRUE
#> minTimeAtRisk: 364
#> restrictTarToCohortEnd: FALSE
#> riskWindowStart: 1
#> startAnchor: cohort start
#> riskWindowEnd: 365
#> endAnchor: cohort start
#> restrictTarToCohortEnd: FALSE
#> Removing subjects with prior outcomes (if any)
#> Removing non outcome subjects with insufficient time at risk (if any)
#> Outcome is 0 or 1
#> Population created with: 477 observations, 477 unique subjects and 235 outcomes
#> Population created in 0.0429 secs
data <- splitData(plpData, population, createDefaultSplitSetting())
#> test: 0.25
#> train: 0.75
#> nfold: 3
#> seed: 87414
#> Creating a 25% test and 75% train (into 3 folds) random stratified split by class
#> Data split into 118 test cases and 359 train cases (120, 120, 119)
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Data split in 0.779 secs
data$Train$covariateData <- preprocessData(data$Train$covariateData)
#> minFraction: 0.001
#> normalize: TRUE
#> removeRedundancy: TRUE
#> Removing 1 redundant covariates
#> Removing 0 infrequent covariates
#> Normalizing covariates
#> Tidying covariates took 1.21 secs
saveLoc <- file.path(tempdir(), "calibrationSummary")
# fit a lasso logistic regression model using the training data
plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
                   analysisId=1, analysisPath=saveLoc)
#> Running Cyclops
#> Done.
#> GLM fit status:  OK
#> Returned from fitting to LassoLogisticRegression
#> Getting variable importance
#> Creating variable importance data frame
#> Getting predictions on train set
#> predictProbabilities - predictAndromeda start
#> Prediction took 0.148 secs
#> Returned from classifier function
#> Time to fit model: 0.326 secs
calibrationSummary <- getCalibrationSummary(plpModel$prediction, 
                                            "binary", 
                                            numberOfStrata = 10,
                                            typeColumn = "evaluationType")
calibrationSummary
#>    predictionThreshold PersonCountAtRisk PersonCountWithOutcome
#> 1            0.0000000                80                     26
#> 2            0.4378269                56                     25
#> 3            0.4449642                54                     24
#> 4            0.4525005                47                     21
#> 5            0.4596815                15                      8
#> 6            0.5432848                38                     25
#> 7            0.5851795                43                     33
#> 8            0.5995251                26                     15
#> 9            0.0000000                53                     22
#> 10           0.4347187                48                     21
#> 11           0.4360719                31                     15
#> 12           0.4459475                37                     13
#> 13           0.4612994                43                     22
#> 14           0.4784741                 3                      0
#> 15           0.4880717                37                     16
#> 16           0.4904711                45                     34
#> 17           0.5418925                27                     17
#> 18           0.6244405                35                     17
#>    averagePredictedProbability StDevPredictedProbability
#> 1                    0.4199545              0.0324866091
#> 2                    0.4449642              0.0000000000
#> 3                    0.4525005              0.0000000000
#> 4                    0.4596815              0.0000000000
#> 5                    0.5128086              0.0193008989
#> 6                    0.5785022              0.0090283277
#> 7                    0.5950694              0.0036456538
#> 8                    0.6608608              0.0536727630
#> 9                    0.4249096              0.0280345463
#> 10                   0.4360439              0.0001359861
#> 11                   0.4459475              0.0000000000
#> 12                   0.4612994              0.0000000000
#> 13                   0.4780158              0.0011538080
#> 14                   0.4784741              0.0000000000
#> 15                   0.4904711              0.0000000000
#> 16                   0.5281255              0.0151116471
#> 17                   0.5780195              0.0249779640
#> 18                   0.6417163              0.0218675577
#>    MinPredictedProbability P25PredictedProbability MedianPredictedProbability
#> 1                0.3529353               0.4378269                  0.4378269
#> 2                0.4449642               0.4449642                  0.4449642
#> 3                0.4525005               0.4525005                  0.4525005
#> 4                0.4596815               0.4596815                  0.4596815
#> 5                0.4820612               0.5005591                  0.5042112
#> 6                0.5504580               0.5706220                  0.5851140
#> 7                0.5921234               0.5921234                  0.5921886
#> 8                0.6064548               0.6064548                  0.6590167
#> 9                0.3263596               0.4347187                  0.4347187
#> 10               0.4353985               0.4360719                  0.4360719
#> 11               0.4459475               0.4459475                  0.4459475
#> 12               0.4612994               0.4612994                  0.4612994
#> 13               0.4747578               0.4784741                  0.4784741
#> 14               0.4784741               0.4784741                  0.4784741
#> 15               0.4904711               0.4904711                  0.4904711
#> 16               0.5039843               0.5222591                  0.5404674
#> 17               0.5531805               0.5531805                  0.5825347
#> 18               0.6249322               0.6265852                  0.6355502
#>    P75PredictedProbability MaxPredictedProbability observedIncidence evaluation
#> 1                0.4378269               0.4378269         0.3250000      Train
#> 2                0.4449642               0.4449642         0.4464286      Train
#> 3                0.4525005               0.4525005         0.4444444      Train
#> 4                0.4596815               0.4596815         0.4468085      Train
#> 5                0.5285091               0.5432848         0.5333333      Train
#> 6                0.5851140               0.5851795         0.6578947      Train
#> 7                0.5995251               0.5995251         0.7674419      Train
#> 8                0.7171175               0.7244831         0.5769231      Train
#> 9                0.4347187               0.4347187         0.4150943         CV
#> 10               0.4360719               0.4360719         0.4375000         CV
#> 11               0.4459475               0.4459475         0.4838710         CV
#> 12               0.4612994               0.4612994         0.3513514         CV
#> 13               0.4784741               0.4784741         0.5116279         CV
#> 14               0.4784741               0.4784741         0.0000000         CV
#> 15               0.4904711               0.4904711         0.4324324         CV
#> 16               0.5418925               0.5418925         0.7555556         CV
#> 17               0.5825347               0.6244405         0.6296296         CV
#> 18               0.6423369               0.7193268         0.4857143         CV
# clean up
unlink(saveLoc, recursive = TRUE)