Get a sparse summary of the calibration

Usage

getCalibrationSummary(
  prediction,
  predictionType,
  typeColumn = "evaluation",
  numberOfStrata = 10,
  truncateFraction = 0.05
)

Arguments

prediction: A prediction object as generated using the predict functions.
predictionType: The type of prediction (binary or survival)
typeColumn: A column that is used to stratify the results
numberOfStrata: The number of strata in the plot.
truncateFraction: This fraction of probability values will be ignored when plotting, to avoid the x-axis scale being dominated by a few outliers.

Value

A dataframe with the calibration summary

Details

Generates a sparse summary showing the predicted probabilities and the observed fractions. Predictions are stratified into equally sized bins of predicted probabilities.

Examples

# simulate data
data("simulationProfile")
plpData <- simulatePlpData(simulationProfile, n=500)
#> Generating covariates
#> Generating cohorts
#> Generating outcomes
# create study population, split into train/test and preprocess with default settings
population <- createStudyPopulation(plpData, outcomeId = 3)
#> outcomeId: 3
#> binary: TRUE
#> includeAllOutcomes: TRUE
#> firstExposureOnly: FALSE
#> washoutPeriod: 0
#> removeSubjectsWithPriorOutcome: TRUE
#> priorOutcomeLookback: 99999
#> requireTimeAtRisk: TRUE
#> minTimeAtRisk: 364
#> restrictTarToCohortEnd: FALSE
#> riskWindowStart: 1
#> startAnchor: cohort start
#> riskWindowEnd: 365
#> endAnchor: cohort start
#> restrictTarToCohortEnd: FALSE
#> Removing subjects with prior outcomes (if any)
#> Removing non outcome subjects with insufficient time at risk (if any)
#> Outcome is 0 or 1
#> Population created with: 484 observations, 484 unique subjects and 225 outcomes
#> Population created in 0.0379 secs
data <- splitData(plpData, population, createDefaultSplitSetting())
#> test: 0.25
#> train: 0.75
#> nfold: 3
#> seed: 26262
#> Creating a 25% test and 75% train (into 3 folds) random stratified split by class
#> Data split into 120 test cases and 364 train cases (122, 121, 121)
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Data split in 0.247 secs
data$Train$covariateData <- preprocessData(data$Train$covariateData)
#> minFraction: 0.001
#> normalize: TRUE
#> removeRedundancy: TRUE
#> Removing 1 redundant covariates
#> Removing 0 infrequent covariates
#> Normalizing covariates
#> Tidying covariates took 0.859 secs
saveLoc <- file.path(tempdir(), "calibrationSummary")
# fit a lasso logistic regression model using the training data
plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
                   analysisId=1, analysisPath=saveLoc)
#> Running Cyclops
#> Done.
#> GLM fit status:  OK
#> Returned from fitting to LassoLogisticRegression
#> Getting variable importance
#> Creating variable importance data frame
#> Getting predictions on train set
#> predictProbabilities - predictAndromeda start
#> Prediction took 0.0947 secs
#> Returned from classifier function
#> Time to fit model: 0.214 secs
calibrationSummary <- getCalibrationSummary(plpModel$prediction, 
                                            "binary", 
                                            numberOfStrata = 10,
                                            typeColumn = "evaluationType")
calibrationSummary
#>    predictionThreshold PersonCountAtRisk PersonCountWithOutcome
#> 1            0.0000000               112                     39
#> 2            0.3808331                37                     14
#> 3            0.4643576               142                     72
#> 4            0.5009203                64                     36
#> 5            0.6025060                 9                      8
#> 6            0.0000000                39                     18
#> 7            0.3613226                44                     16
#> 8            0.4045705                37                     12
#> 9            0.4103970                35                     12
#> 10           0.4391317                38                     24
#> 11           0.4788650                62                     32
#> 12           0.4867281                72                     35
#> 13           0.5401973                37                     20
#>    averagePredictedProbability StDevPredictedProbability
#> 1                    0.3760671              1.133612e-02
#> 2                    0.4166930              3.910214e-02
#> 3                    0.4958521              6.635993e-03
#> 4                    0.5561273              4.874859e-02
#> 5                    0.6066332              0.000000e+00
#> 6                    0.3579737              1.175270e-02
#> 7                    0.4045705              3.689970e-17
#> 8                    0.4096096              2.019368e-03
#> 9                    0.4340070              8.455038e-03
#> 10                   0.4711291              1.400897e-02
#> 11                   0.4857355              0.000000e+00
#> 12                   0.5102439              3.227806e-03
#> 13                   0.5996290              6.020963e-02
#>    MinPredictedProbability P25PredictedProbability MedianPredictedProbability
#> 1                0.3485086               0.3808331                  0.3808331
#> 2                0.3849122               0.3849122                  0.3849122
#> 3                0.4686541               0.4991940                  0.4991940
#> 4                0.5035099               0.5035099                  0.5876909
#> 5                0.6066332               0.6066332                  0.6066332
#> 6                0.3177865               0.3613226                  0.3613226
#> 7                0.4045705               0.4045705                  0.4045705
#> 8                0.4045705               0.4103970                  0.4103970
#> 9                0.4188113               0.4267579                  0.4391317
#> 10               0.4434405               0.4694431                  0.4788650
#> 11               0.4857355               0.4857355                  0.4857355
#> 12               0.4956609               0.5082611                  0.5124601
#> 13               0.5520847               0.5520847                  0.5551972
#>    P75PredictedProbability MaxPredictedProbability observedIncidence evaluation
#> 1                0.3808331               0.3808331         0.3482143      Train
#> 2                0.4643576               0.4643576         0.3783784      Train
#> 3                0.4991940               0.4991940         0.5070423      Train
#> 4                0.6025060               0.6025060         0.5625000      Train
#> 5                0.6066332               0.6066332         0.8888889      Train
#> 6                0.3613226               0.3613226         0.4615385         CV
#> 7                0.4045705               0.4045705         0.3636364         CV
#> 8                0.4103970               0.4103970         0.3243243         CV
#> 9                0.4391317               0.4391317         0.3428571         CV
#> 10               0.4788650               0.4788650         0.6315789         CV
#> 11               0.4857355               0.4857355         0.5161290         CV
#> 12               0.5124601               0.5124601         0.4861111         CV
#> 13               0.6757366               0.6757366         0.5405405         CV
# clean up
unlink(saveLoc, recursive = TRUE)