RunAndUploadDbProfile.Rmd
This document will take you through the steps on how to run the database profile functions and upload them to a location to be reference when running database diagnostics.
Call the createConnectionDetails
function to create the
object, inputting the information for how to connect to your database.
Detailed instructions on how to do this can be found here.
library(DbDiagnostics)
<- DatabaseConnector::createConnectionDetails(
connectionDetails dbms = "postgresql",
server = "localhost/synthea",
user = "postgres",
password = "postgres"
)
# The schema where your CDM-structured data are housed
<- "cdm_54_test"
cdmDatabaseSchema
# The schema where your achilles results are or will be housed
<- "cdm_54_results"
resultsDatabaseSchema
# The schema where any missing achilles analyses should be written.
<- "cdm_54_results"
writeTo
# The schema where your vocabulary tables are housed, typically the same as the cdmDatabaseSchema
<- cdmDatabaseSchema
vocabDatabaseSchema
# A unique, identifiable name for your database
<- "Synthea_v54"
cdmSourceName
# The folder where your results should be written
<- folder where output is stored
outputFolder
# The version of the OMOP CDM you are currently on, v5.3 and v5.4 are supported.
<- "5.4"
cdmVersion
# Whether the function should append existing Achilles tables or create new ones
<- FALSE
appendAchilles
# Whether to round to the 10s or 100s place. Valid inputs are 10 or 100, default is 10.
<- 10
roundTo
# Vector of concepts to exclude from the output. Note: No patient-level data is pulled as part of the package or included as part of the output
<- c()
excludedConcepts
# Whether the DQD should be run as part of the profile exercise
<- FALSE
addDQD
DbDiagnostics::executeDbProfile(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
writeTo = writeTo,
vocabDatabaseSchema = vocabDatabaseSchema,
cdmSourceName = cdmSourceName,
outputFolder = outputFolder,
cdmVersion = cdmVersion,
appendAchilles = appendAchilles,
roundTo = roundTo,
excludedConcepts = excludedConcepts,
addDQD = addDQD
)
Upon completion, the summary statistics results *.csv
file, the CDM_SOURCE table, metadata.csv and the data quality dashboard
JSON file (if specified) will be located in a zip file in the output
location you set as part of the execute function.
Be sure to unzip the dbProfile results that are generated in the
above. Then, set the location where you unzipped the results as the
parameter resultsLocation
. Any parameter below denoted as
executeDbProfile
was run. You can find this
in the file names.
# set the location of the results that were generated from executeDbProfile
resultsLocation <- ""
# set a parameter detailing if the DQD was run
addDQD <- FALSE
# add dbId and prep output files for writing into the results schema
db_profile_results <- read.csv(paste0(resultsLocation,"/db_profile_results.csv"), stringsAsFactors = F, colClasses = c("STRATUM_1"="character"))
# make sure the columns are read in as characters to facilitate dbDiagnostics execution
db_profile_results$STRATUM_1 <- as.character(db_profile_results$STRATUM_1)
db_profile_results$STRATUM_2 <- as.character(db_profile_results$STRATUM_2)
db_profile_results$STRATUM_3 <- as.character(db_profile_results$STRATUM_3)
db_profile_results$STRATUM_4 <- as.character(db_profile_results$STRATUM_4)
db_profile_results$STRATUM_5 <- as.character(db_profile_results$STRATUM_5)
# read in the metadata
db_metadata <- read.csv(paste0(resultsLocation,"/<cdmSourceName>_metadata.csv"), stringsAsFactors = F)
# read in the cdm_source table
db_cdm_source <- read.csv(paste0(resultsLocation,"/<cdmSourceName>_cdm_source.csv"), stringsAsFactors = F)
# determine which tables should be uploaded based on if the DQD was included
if (addDQD) {
dqdJsonDf <- jsonlite::fromJSON(
paste0(outputFolder,"/",dbId,"_DbProfile.json"),
simplifyDataFrame = TRUE)
dqd_overview <- as.data.frame(dqdJsonDf$Overview)
dqd_checkresults <- as.data.frame(dqdJsonDf$CheckResults)
dqd_checkresults$THRESHOLD_VALUE <- as.character(dqd_checkresults$THRESHOLD_VALUE)
tablesToUpload <- c("db_profile_results","db_metadata","db_cdm_source","dqd_checkresults","dqd_overview")
} else {
tablesToUpload <- c("db_profile_results","db_metadata","db_cdm_source")
}
# create the connectionDetails for the database where the results should be uploaded. It is likely this will be different than the database where the dbProfile was run
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "postgresql",
server = "localhost/synthea",
user = "postgres",
password = "postgres"
)
conn <- DatabaseConnector::connect(connectionDetails)
# When the schema is empty, use createTable = TRUE
for (tableName in tablesToUpload) {
DatabaseConnector::insertTable(
connection = conn,
tableName = tableName,
databaseSchema = "cdm_54_results",
data = eval(parse(text=tableName)),
dropTableIfExists = FALSE,
createTable = TRUE,
tempTable = FALSE,
progressBar = TRUE)
}
DatabaseConnector::disconnect(conn)