Simulation Cyclops dataset — simulateCyclopsData • Cyclops

simulateCyclopsData generates a simulated large, sparse data set for use by fitCyclopsSimulation.

simulateCyclopsData(
  nstrata = 200,
  nrows = 10000,
  ncovars = 20,
  effectSizeSd = 1,
  zeroEffectSizeProp = 0.9,
  eCovarsPerRow = ncovars/100,
  model = "survival"
)

Arguments

nstrata: Numeric: Number of strata
nrows: Numeric: Number of observation rows
ncovars: Numeric: Number of covariates
effectSizeSd: Numeric: Standard derivation of the non-zero simulated regression coefficients
zeroEffectSizeProp: Numeric: Expected proportion of zero effect size
eCovarsPerRow: Number: Effective number of non-zero covariates per data row
model: String: Simulation model. Choices are: logistic, poisson or survival

Value

A simulated data set

Examples

#Generate some simulated data:
sim <- simulateCyclopsData(nstrata = 1, nrows = 1000, ncovars = 2, eCovarsPerRow = 0.5, 
                           model = "poisson")
#> Sparseness = 77.45 %
cyclopsData <- convertToCyclopsData(sim$outcomes, sim$covariates, modelType = "pr", 
                                    addIntercept = TRUE)
#> Sorting covariates by covariateId and rowId

#Define the prior and control objects to use cross-validation for finding the 
#optimal hyperparameter:
prior <- createPrior("laplace", exclude = 0, useCrossValidation = TRUE)
control <- createControl(cvType = "auto", noiseLevel = "quiet")

#Fit the model
fit <- fitCyclopsModel(cyclopsData,prior = prior, control = control)  
#> Using cross-validation selector type byRow
#> Performing 10-fold cross-validation [seed = 1698789255] with data partitions of sizes 100 100 100 100 100 100 100 100 100 100
#> Using 1 thread(s)
#> Starting var = 0.2255 (default)
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #1 Rep #1 pred log like = 273.307
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #2 Rep #1 pred log like = 399.873
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #3 Rep #1 pred log like = 404.29
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #4 Rep #1 pred log like = 453.587
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #5 Rep #1 pred log like = 409.265
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #6 Rep #1 pred log like = 383.626
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #7 Rep #1 pred log like = 322.027
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #8 Rep #1 pred log like = 388.538
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #9 Rep #1 pred log like = 307.366
#> Running at Laplace(2.97812) None  Grid-point #1 at 0.2255 	Fold #10 Rep #1 pred log like = 439.966
#> AvgPred = 378.185 with stdev = 55.6106
#> Completed at 0.2255
#> Next point at 2.255 with value 0 and continue = 1
#> search[ 0.2255 ] = 378.185(55.6106)
#> 
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #1 Rep #1 pred log like = 273.215
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #2 Rep #1 pred log like = 399.868
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #3 Rep #1 pred log like = 404.281
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #4 Rep #1 pred log like = 453.566
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #5 Rep #1 pred log like = 409.239
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #6 Rep #1 pred log like = 383.621
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #7 Rep #1 pred log like = 322.048
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #8 Rep #1 pred log like = 388.517
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #9 Rep #1 pred log like = 307.357
#> Running at Laplace(0.941763) None  Grid-point #2 at 2.255 	Fold #10 Rep #1 pred log like = 439.957
#> AvgPred = 378.167 with stdev = 55.6206
#> Completed at 2.255
#> Next point at 0.02255 with value 0 and continue = 1
#> search[ 0.2255 ] = 378.185(55.6106)
#> search[ 2.255 ] = 378.167(55.6206)
#> 
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #1 Rep #1 pred log like = 273.567
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #2 Rep #1 pred log like = 399.871
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #3 Rep #1 pred log like = 404.295
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #4 Rep #1 pred log like = 453.644
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #5 Rep #1 pred log like = 409.323
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #6 Rep #1 pred log like = 383.623
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #7 Rep #1 pred log like = 321.955
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #8 Rep #1 pred log like = 388.579
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #9 Rep #1 pred log like = 307.378
#> Running at Laplace(9.41763) None  Grid-point #3 at 0.02255 	Fold #10 Rep #1 pred log like = 439.958
#> AvgPred = 378.219 with stdev = 55.5785
#> Completed at 0.02255
#> Next point at 0.002255 with value 0 and continue = 1
#> search[ 0.02255 ] = 378.219(55.5785)
#> search[ 0.2255 ] = 378.185(55.6106)
#> search[ 2.255 ] = 378.167(55.6206)
#> 
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #1 Rep #1 pred log like = 274.347
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #2 Rep #1 pred log like = 399.829
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #3 Rep #1 pred log like = 404.168
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #4 Rep #1 pred log like = 453.483
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #5 Rep #1 pred log like = 409.31
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #6 Rep #1 pred log like = 383.395
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #7 Rep #1 pred log like = 321.694
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #8 Rep #1 pred log like = 388.495
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #9 Rep #1 pred log like = 307.173
#> Running at Laplace(29.7812) None  Grid-point #4 at 0.002255 	Fold #10 Rep #1 pred log like = 439.822
#> AvgPred = 378.172 with stdev = 55.4357
#> Completed at 0.002255
#> Next point at 0.0505023 with value 378.206 and continue = 0
#> search[ 0.002255 ] = 378.172(55.4357)
#> search[ 0.02255 ] = 378.219(55.5785)
#> search[ 0.2255 ] = 378.185(55.6106)
#> search[ 2.255 ] = 378.167(55.6206)
#> 
#> 
#> Maximum predicted log likelihood (378.206) estimated at:
#> 	0.0505023 (variance)
#> 	6.29302 (lambda)
#> 
#> Fitting model at optimal hyperparameter
#> Using prior: Laplace(6.29302) None 

#Find out what the optimal hyperparameter was:
getHyperParameter(fit)
#> [1] 0.0505023

#Extract the current log-likelihood, and coefficients
logLik(fit)
#> 'log Lik.' -2012.552 (df=3)
coef(fit)
#> (Intercept)           1           2 
#> -3.94394237  0.00000000  0.06899875 

#We can only retrieve the confidence interval for unregularized coefficients:
confint(fit, c(0))
#> Using 1 thread(s)
#>             covariate     2.5 %    97.5 % evaluations
#> (Intercept)         0 -3.977897 -3.909783          22