Get a sparse summary of the calibration
Usage
getCalibrationSummary(
prediction,
predictionType,
typeColumn = "evaluation",
numberOfStrata = 10,
truncateFraction = 0.05
)
Arguments
- prediction
A prediction object as generated using the
predict
functions.- predictionType
The type of prediction (binary or survival)
- typeColumn
A column that is used to stratify the results
- numberOfStrata
The number of strata in the plot.
- truncateFraction
This fraction of probability values will be ignored when plotting, to avoid the x-axis scale being dominated by a few outliers.
Details
Generates a sparse summary showing the predicted probabilities and the observed fractions. Predictions are stratified into equally sized bins of predicted probabilities.
Examples
# simulate data
data("simulationProfile")
plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
#> Generating covariates
#> Generating cohorts
#> Generating outcomes
# create study population, split into train/test and preprocess with default settings
population <- createStudyPopulation(plpData, outcomeId = 3)
#> outcomeId: 3
#> binary: TRUE
#> includeAllOutcomes: TRUE
#> firstExposureOnly: FALSE
#> washoutPeriod: 0
#> removeSubjectsWithPriorOutcome: TRUE
#> priorOutcomeLookback: 99999
#> requireTimeAtRisk: TRUE
#> minTimeAtRisk: 364
#> restrictTarToCohortEnd: FALSE
#> riskWindowStart: 1
#> startAnchor: cohort start
#> riskWindowEnd: 365
#> endAnchor: cohort start
#> restrictTarToCohortEnd: FALSE
#> Removing subjects with prior outcomes (if any)
#> Removing non outcome subjects with insufficient time at risk (if any)
#> Outcome is 0 or 1
#> Population created with: 477 observations, 477 unique subjects and 235 outcomes
#> Population created in 0.0429 secs
data <- splitData(plpData, population, createDefaultSplitSetting())
#> test: 0.25
#> train: 0.75
#> nfold: 3
#> seed: 87414
#> Creating a 25% test and 75% train (into 3 folds) random stratified split by class
#> Data split into 118 test cases and 359 train cases (120, 120, 119)
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Starting to limit covariate data to population...
#> Finished limiting covariate data to population...
#> Data split in 0.779 secs
data$Train$covariateData <- preprocessData(data$Train$covariateData)
#> minFraction: 0.001
#> normalize: TRUE
#> removeRedundancy: TRUE
#> Removing 1 redundant covariates
#> Removing 0 infrequent covariates
#> Normalizing covariates
#> Tidying covariates took 1.21 secs
saveLoc <- file.path(tempdir(), "calibrationSummary")
# fit a lasso logistic regression model using the training data
plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
analysisId=1, analysisPath=saveLoc)
#> Running Cyclops
#> Done.
#> GLM fit status: OK
#> Returned from fitting to LassoLogisticRegression
#> Getting variable importance
#> Creating variable importance data frame
#> Getting predictions on train set
#> predictProbabilities - predictAndromeda start
#> Prediction took 0.148 secs
#> Returned from classifier function
#> Time to fit model: 0.326 secs
calibrationSummary <- getCalibrationSummary(plpModel$prediction,
"binary",
numberOfStrata = 10,
typeColumn = "evaluationType")
calibrationSummary
#> predictionThreshold PersonCountAtRisk PersonCountWithOutcome
#> 1 0.0000000 80 26
#> 2 0.4378269 56 25
#> 3 0.4449642 54 24
#> 4 0.4525005 47 21
#> 5 0.4596815 15 8
#> 6 0.5432848 38 25
#> 7 0.5851795 43 33
#> 8 0.5995251 26 15
#> 9 0.0000000 53 22
#> 10 0.4347187 48 21
#> 11 0.4360719 31 15
#> 12 0.4459475 37 13
#> 13 0.4612994 43 22
#> 14 0.4784741 3 0
#> 15 0.4880717 37 16
#> 16 0.4904711 45 34
#> 17 0.5418925 27 17
#> 18 0.6244405 35 17
#> averagePredictedProbability StDevPredictedProbability
#> 1 0.4199545 0.0324866091
#> 2 0.4449642 0.0000000000
#> 3 0.4525005 0.0000000000
#> 4 0.4596815 0.0000000000
#> 5 0.5128086 0.0193008989
#> 6 0.5785022 0.0090283277
#> 7 0.5950694 0.0036456538
#> 8 0.6608608 0.0536727630
#> 9 0.4249096 0.0280345463
#> 10 0.4360439 0.0001359861
#> 11 0.4459475 0.0000000000
#> 12 0.4612994 0.0000000000
#> 13 0.4780158 0.0011538080
#> 14 0.4784741 0.0000000000
#> 15 0.4904711 0.0000000000
#> 16 0.5281255 0.0151116471
#> 17 0.5780195 0.0249779640
#> 18 0.6417163 0.0218675577
#> MinPredictedProbability P25PredictedProbability MedianPredictedProbability
#> 1 0.3529353 0.4378269 0.4378269
#> 2 0.4449642 0.4449642 0.4449642
#> 3 0.4525005 0.4525005 0.4525005
#> 4 0.4596815 0.4596815 0.4596815
#> 5 0.4820612 0.5005591 0.5042112
#> 6 0.5504580 0.5706220 0.5851140
#> 7 0.5921234 0.5921234 0.5921886
#> 8 0.6064548 0.6064548 0.6590167
#> 9 0.3263596 0.4347187 0.4347187
#> 10 0.4353985 0.4360719 0.4360719
#> 11 0.4459475 0.4459475 0.4459475
#> 12 0.4612994 0.4612994 0.4612994
#> 13 0.4747578 0.4784741 0.4784741
#> 14 0.4784741 0.4784741 0.4784741
#> 15 0.4904711 0.4904711 0.4904711
#> 16 0.5039843 0.5222591 0.5404674
#> 17 0.5531805 0.5531805 0.5825347
#> 18 0.6249322 0.6265852 0.6355502
#> P75PredictedProbability MaxPredictedProbability observedIncidence evaluation
#> 1 0.4378269 0.4378269 0.3250000 Train
#> 2 0.4449642 0.4449642 0.4464286 Train
#> 3 0.4525005 0.4525005 0.4444444 Train
#> 4 0.4596815 0.4596815 0.4468085 Train
#> 5 0.5285091 0.5432848 0.5333333 Train
#> 6 0.5851140 0.5851795 0.6578947 Train
#> 7 0.5995251 0.5995251 0.7674419 Train
#> 8 0.7171175 0.7244831 0.5769231 Train
#> 9 0.4347187 0.4347187 0.4150943 CV
#> 10 0.4360719 0.4360719 0.4375000 CV
#> 11 0.4459475 0.4459475 0.4838710 CV
#> 12 0.4612994 0.4612994 0.3513514 CV
#> 13 0.4784741 0.4784741 0.5116279 CV
#> 14 0.4784741 0.4784741 0.0000000 CV
#> 15 0.4904711 0.4904711 0.4324324 CV
#> 16 0.5418925 0.5418925 0.7555556 CV
#> 17 0.5825347 0.6244405 0.6296296 CV
#> 18 0.6423369 0.7193268 0.4857143 CV
# clean up
unlink(saveLoc, recursive = TRUE)