Get a sparse summary of the calibration
Usage
getCalibrationSummary(
prediction,
predictionType,
typeColumn = "evaluation",
numberOfStrata = 10,
truncateFraction = 0.05
)
Arguments
- prediction
A prediction object as generated using the
predict
functions.- predictionType
The type of prediction (binary or survival)
- typeColumn
A column that is used to stratify the results
- numberOfStrata
The number of strata in the plot.
- truncateFraction
This fraction of probability values will be ignored when plotting, to avoid the x-axis scale being dominated by a few outliers.
Details
Generates a sparse summary showing the predicted probabilities and the observed fractions. Predictions are stratified into equally sized bins of predicted probabilities.
Examples
# simulate data
data("simulationProfile")
plpData <- simulatePlpData(simulationProfile, n=500)
#> Generating covariates
#> Generating cohorts
#> Generating outcomes
# create study population, split into train/test and preprocess with default settings
population <- createStudyPopulation(plpData, outcomeId = 3)
#> outcomeId: 3
#> binary: TRUE
#> includeAllOutcomes: TRUE
#> firstExposureOnly: FALSE
#> washoutPeriod: 0
#> removeSubjectsWithPriorOutcome: TRUE
#> priorOutcomeLookback: 99999
#> requireTimeAtRisk: TRUE
#> minTimeAtRisk: 364
#> restrictTarToCohortEnd: FALSE
#> riskWindowStart: 1
#> startAnchor: cohort start
#> riskWindowEnd: 365
#> endAnchor: cohort start
#> restrictTarToCohortEnd: FALSE
#> Removing subjects with prior outcomes (if any)
#> Removing non outcome subjects with insufficient time at risk (if any)
#> Outcome is 0 or 1
#> Population created with: 484 observations, 484 unique subjects and 225 outcomes
#> Population created in 0.0379 secs
data <- splitData(plpData, population, createDefaultSplitSetting())
#> test: 0.25
#> train: 0.75
#> nfold: 3
#> seed: 26262
#> Creating a 25% test and 75% train (into 3 folds) random stratified split by class
#> Data split into 120 test cases and 364 train cases (122, 121, 121)
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Data split in 0.247 secs
data$Train$covariateData <- preprocessData(data$Train$covariateData)
#> minFraction: 0.001
#> normalize: TRUE
#> removeRedundancy: TRUE
#> Removing 1 redundant covariates
#> Removing 0 infrequent covariates
#> Normalizing covariates
#> Tidying covariates took 0.859 secs
saveLoc <- file.path(tempdir(), "calibrationSummary")
# fit a lasso logistic regression model using the training data
plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
analysisId=1, analysisPath=saveLoc)
#> Running Cyclops
#> Done.
#> GLM fit status: OK
#> Returned from fitting to LassoLogisticRegression
#> Getting variable importance
#> Creating variable importance data frame
#> Getting predictions on train set
#> predictProbabilities - predictAndromeda start
#> Prediction took 0.0947 secs
#> Returned from classifier function
#> Time to fit model: 0.214 secs
calibrationSummary <- getCalibrationSummary(plpModel$prediction,
"binary",
numberOfStrata = 10,
typeColumn = "evaluationType")
calibrationSummary
#> predictionThreshold PersonCountAtRisk PersonCountWithOutcome
#> 1 0.0000000 112 39
#> 2 0.3808331 37 14
#> 3 0.4643576 142 72
#> 4 0.5009203 64 36
#> 5 0.6025060 9 8
#> 6 0.0000000 39 18
#> 7 0.3613226 44 16
#> 8 0.4045705 37 12
#> 9 0.4103970 35 12
#> 10 0.4391317 38 24
#> 11 0.4788650 62 32
#> 12 0.4867281 72 35
#> 13 0.5401973 37 20
#> averagePredictedProbability StDevPredictedProbability
#> 1 0.3760671 1.133612e-02
#> 2 0.4166930 3.910214e-02
#> 3 0.4958521 6.635993e-03
#> 4 0.5561273 4.874859e-02
#> 5 0.6066332 0.000000e+00
#> 6 0.3579737 1.175270e-02
#> 7 0.4045705 3.689970e-17
#> 8 0.4096096 2.019368e-03
#> 9 0.4340070 8.455038e-03
#> 10 0.4711291 1.400897e-02
#> 11 0.4857355 0.000000e+00
#> 12 0.5102439 3.227806e-03
#> 13 0.5996290 6.020963e-02
#> MinPredictedProbability P25PredictedProbability MedianPredictedProbability
#> 1 0.3485086 0.3808331 0.3808331
#> 2 0.3849122 0.3849122 0.3849122
#> 3 0.4686541 0.4991940 0.4991940
#> 4 0.5035099 0.5035099 0.5876909
#> 5 0.6066332 0.6066332 0.6066332
#> 6 0.3177865 0.3613226 0.3613226
#> 7 0.4045705 0.4045705 0.4045705
#> 8 0.4045705 0.4103970 0.4103970
#> 9 0.4188113 0.4267579 0.4391317
#> 10 0.4434405 0.4694431 0.4788650
#> 11 0.4857355 0.4857355 0.4857355
#> 12 0.4956609 0.5082611 0.5124601
#> 13 0.5520847 0.5520847 0.5551972
#> P75PredictedProbability MaxPredictedProbability observedIncidence evaluation
#> 1 0.3808331 0.3808331 0.3482143 Train
#> 2 0.4643576 0.4643576 0.3783784 Train
#> 3 0.4991940 0.4991940 0.5070423 Train
#> 4 0.6025060 0.6025060 0.5625000 Train
#> 5 0.6066332 0.6066332 0.8888889 Train
#> 6 0.3613226 0.3613226 0.4615385 CV
#> 7 0.4045705 0.4045705 0.3636364 CV
#> 8 0.4103970 0.4103970 0.3243243 CV
#> 9 0.4391317 0.4391317 0.3428571 CV
#> 10 0.4788650 0.4788650 0.6315789 CV
#> 11 0.4857355 0.4857355 0.5161290 CV
#> 12 0.5124601 0.5124601 0.4861111 CV
#> 13 0.6757366 0.6757366 0.5405405 CV
# clean up
unlink(saveLoc, recursive = TRUE)