Specify settings for developing a single model
Usage
createModelDesign(
targetId = NULL,
outcomeId = NULL,
restrictPlpDataSettings = createRestrictPlpDataSettings(),
populationSettings = createStudyPopulationSettings(),
covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
featureEngineeringSettings = NULL,
sampleSettings = NULL,
preprocessSettings = NULL,
modelSettings = NULL,
splitSettings = createDefaultSplitSetting(),
runCovariateSummary = TRUE
)
Arguments
- targetId
The id of the target cohort that will be used for data extraction (e.g., the ATLAS id)
- outcomeId
The id of the outcome that will be used for data extraction (e.g., the ATLAS id)
- restrictPlpDataSettings
The settings specifying the extra restriction settings when extracting the data created using
createRestrictPlpDataSettings()
.- populationSettings
The population settings specified by
createStudyPopulationSettings()
- covariateSettings
The covariate settings, this can be a list or a single
'covariateSetting'
object.- featureEngineeringSettings
Either NULL or an object of class
featureEngineeringSettings
specifying any feature engineering used during model development- sampleSettings
Either NULL or an object of class
sampleSettings
with the over/under sampling settings used for model development- preprocessSettings
Either NULL or an object of class
preprocessSettings
created usingcreatePreprocessingSettings()
- modelSettings
The model settings such as
setLassoLogisticRegression()
- splitSettings
The train/validation/test splitting used by all analyses created using
createDefaultSplitSetting()
- runCovariateSummary
Whether to run the covariateSummary
Examples
# L1 logistic regression model to predict the outcomeId 2 using the targetId 2
# with with default population, restrictPlp, split, and covariate settings
createModelDesign(
targetId = 1,
outcomeId = 2,
modelSettings = setLassoLogisticRegression(seed=42),
populationSettings = createStudyPopulationSettings(),
restrictPlpDataSettings = createRestrictPlpDataSettings(),
covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
splitSettings = createDefaultSplitSetting(splitSeed = 42),
runCovariateSummary = TRUE
)
#> $targetId
#> [1] 1
#>
#> $outcomeId
#> [1] 2
#>
#> $restrictPlpDataSettings
#> $studyStartDate
#> [1] ""
#>
#> $studyEndDate
#> [1] ""
#>
#> $firstExposureOnly
#> [1] FALSE
#>
#> $washoutPeriod
#> [1] 0
#>
#> $sampleSize
#> NULL
#>
#> attr(,"class")
#> [1] "restrictPlpDataSettings"
#>
#> $covariateSettings
#> $temporal
#> [1] FALSE
#>
#> $temporalSequence
#> [1] FALSE
#>
#> $DemographicsGender
#> [1] TRUE
#>
#> $DemographicsAgeGroup
#> [1] TRUE
#>
#> $DemographicsRace
#> [1] TRUE
#>
#> $DemographicsEthnicity
#> [1] TRUE
#>
#> $DemographicsIndexYear
#> [1] TRUE
#>
#> $DemographicsIndexMonth
#> [1] TRUE
#>
#> $ConditionGroupEraLongTerm
#> [1] TRUE
#>
#> $ConditionGroupEraShortTerm
#> [1] TRUE
#>
#> $DrugGroupEraLongTerm
#> [1] TRUE
#>
#> $DrugGroupEraShortTerm
#> [1] TRUE
#>
#> $DrugGroupEraOverlapping
#> [1] TRUE
#>
#> $ProcedureOccurrenceLongTerm
#> [1] TRUE
#>
#> $ProcedureOccurrenceShortTerm
#> [1] TRUE
#>
#> $DeviceExposureLongTerm
#> [1] TRUE
#>
#> $DeviceExposureShortTerm
#> [1] TRUE
#>
#> $MeasurementLongTerm
#> [1] TRUE
#>
#> $MeasurementShortTerm
#> [1] TRUE
#>
#> $MeasurementRangeGroupLongTerm
#> [1] TRUE
#>
#> $MeasurementRangeGroupShortTerm
#> [1] TRUE
#>
#> $MeasurementValueAsConceptLongTerm
#> [1] TRUE
#>
#> $MeasurementValueAsConceptShortTerm
#> [1] TRUE
#>
#> $ObservationLongTerm
#> [1] TRUE
#>
#> $ObservationShortTerm
#> [1] TRUE
#>
#> $ObservationValueAsConceptLongTerm
#> [1] TRUE
#>
#> $ObservationValueAsConceptShortTerm
#> [1] TRUE
#>
#> $CharlsonIndex
#> [1] TRUE
#>
#> $Dcsi
#> [1] TRUE
#>
#> $Chads2
#> [1] TRUE
#>
#> $Chads2Vasc
#> [1] TRUE
#>
#> $includedCovariateConceptIds
#> logical(0)
#>
#> $includedCovariateIds
#> logical(0)
#>
#> $addDescendantsToInclude
#> [1] FALSE
#>
#> $excludedCovariateConceptIds
#> logical(0)
#>
#> $addDescendantsToExclude
#> [1] FALSE
#>
#> $shortTermStartDays
#> [1] -30
#>
#> $mediumTermStartDays
#> [1] -180
#>
#> $endDays
#> [1] 0
#>
#> $longTermStartDays
#> [1] -365
#>
#> attr(,"fun")
#> [1] "getDbDefaultCovariateData"
#> attr(,"class")
#> [1] "covariateSettings"
#>
#> $populationSettings
#> $binary
#> [1] TRUE
#>
#> $includeAllOutcomes
#> [1] TRUE
#>
#> $firstExposureOnly
#> [1] FALSE
#>
#> $washoutPeriod
#> [1] 0
#>
#> $removeSubjectsWithPriorOutcome
#> [1] TRUE
#>
#> $priorOutcomeLookback
#> [1] 99999
#>
#> $requireTimeAtRisk
#> [1] TRUE
#>
#> $minTimeAtRisk
#> [1] 364
#>
#> $riskWindowStart
#> [1] 1
#>
#> $startAnchor
#> [1] "cohort start"
#>
#> $riskWindowEnd
#> [1] 365
#>
#> $endAnchor
#> [1] "cohort start"
#>
#> $restrictTarToCohortEnd
#> [1] FALSE
#>
#> attr(,"class")
#> [1] "populationSettings"
#>
#> $sampleSettings
#> $sampleSettings[[1]]
#> $numberOutcomestoNonOutcomes
#> [1] 1
#>
#> $sampleSeed
#> [1] 1
#>
#> attr(,"fun")
#> [1] "sameData"
#> attr(,"class")
#> [1] "sampleSettings"
#>
#>
#> $featureEngineeringSettings
#> $featureEngineeringSettings[[1]]
#> list()
#> attr(,"fun")
#> [1] "sameData"
#> attr(,"class")
#> [1] "featureEngineeringSettings"
#>
#>
#> $preprocessSettings
#> $minFraction
#> [1] 0.001
#>
#> $normalize
#> [1] TRUE
#>
#> $removeRedundancy
#> [1] TRUE
#>
#> attr(,"class")
#> [1] "preprocessSettings"
#>
#> $modelSettings
#> $fitFunction
#> [1] "fitCyclopsModel"
#>
#> $param
#> $param$priorParams
#> $param$priorParams$priorType
#> [1] "laplace"
#>
#> $param$priorParams$forceIntercept
#> [1] FALSE
#>
#> $param$priorParams$variance
#> [1] 0.01
#>
#> $param$priorParams$exclude
#> [1] 0
#>
#>
#> $param$includeCovariateIds
#> NULL
#>
#> $param$upperLimit
#> [1] 20
#>
#> $param$lowerLimit
#> [1] 0.01
#>
#> $param$priorCoefs
#> NULL
#>
#> attr(,"settings")
#> attr(,"settings")$priorfunction
#> [1] "Cyclops::createPrior"
#>
#> attr(,"settings")$selectorType
#> [1] "byPid"
#>
#> attr(,"settings")$crossValidationInPrior
#> [1] TRUE
#>
#> attr(,"settings")$modelType
#> [1] "logistic"
#>
#> attr(,"settings")$addIntercept
#> [1] TRUE
#>
#> attr(,"settings")$useControl
#> [1] TRUE
#>
#> attr(,"settings")$seed
#> [1] 42
#>
#> attr(,"settings")$name
#> [1] "Lasso Logistic Regression"
#>
#> attr(,"settings")$threads
#> [1] -1
#>
#> attr(,"settings")$tolerance
#> [1] 2e-06
#>
#> attr(,"settings")$cvRepetitions
#> [1] 1
#>
#> attr(,"settings")$maxIterations
#> [1] 3000
#>
#> attr(,"modelType")
#> [1] "binary"
#> attr(,"saveType")
#> [1] "RtoJson"
#>
#> attr(,"class")
#> [1] "modelSettings"
#>
#> $splitSettings
#> $test
#> [1] 0.25
#>
#> $train
#> [1] 0.75
#>
#> $seed
#> [1] 42
#>
#> $nfold
#> [1] 3
#>
#> attr(,"fun")
#> [1] "randomSplitter"
#> attr(,"class")
#> [1] "splitSettings"
#>
#> $executeSettings
#> $runSplitData
#> [1] TRUE
#>
#> $runSampleData
#> [1] FALSE
#>
#> $runFeatureEngineering
#> [1] FALSE
#>
#> $runPreprocessData
#> [1] FALSE
#>
#> $runModelDevelopment
#> [1] TRUE
#>
#> $runCovariateSummary
#> [1] TRUE
#>
#> attr(,"class")
#> [1] "executeSettings"
#>
#> attr(,"class")
#> [1] "modelDesign"