RunAndUploadDbProfile.Rmd
This document will take you through the steps on how to run the database profile functions and upload them to a location to be reference when running database diagnostics.
Call the createConnectionDetails
function to create the
object, inputting the information for how to connect to your database.
Detailed instructions on how to do this can be found here.
##### --------- Install the package
remotes::install_github("OHDSI/DbDiagnostics")
# Call the library
library(DbDiagnostics)
# Turn off the connection pane to speed up run time
options(connectionObserver = NULL)
# Set up connectionDetails to connect to the database
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "postgresql",
server = "localhost/synthea",
user = "postgres",
password = Sys.getenv("POSTGRES_PW"),
pathToDriver = "/Users/clairblacketer/Documents/DatabaseConnector_Jars"
)
# The schema where your CDM-structured data are housed
cdmDatabaseSchema <- "cdm_54_test"
# The schema where your achilles results are or will be housed
resultsDatabaseSchema <- "cdm_54_results"
# The schema where your vocabulary tables are housed, typically the same as the cdmDatabaseSchema
vocabDatabaseSchema <- cdmDatabaseSchema
# A unique, identifiable name for your database
cdmSourceName <- "Synthea_v54_OHDSI_Example"
# The folder where your results should be written
outputFolder <- "/Users/clairblacketer/Documents/Output/DbProfiles"
# The version of the OMOP CDM you are currently on, v5.3 and v5.4 are supported.
cdmVersion <- "5.4"
# Whether the function should append existing Achilles tables or create new ones
appendAchilles <- FALSE
# The schema where any missing achilles analyses should be written. Only set if appendAchilles = FALSE
writeTo <- "synthea_achilles"
# Whether to round to the 10s or 100s place. Valid inputs are 10 or 100, default is 10.
roundTo <- 10
# Vector of concepts to exclude from the output. Note: No patient-level data is pulled as part of the package or included as part of the output
excludedConcepts <- c()
# Whether the DQD should be run as part of the profile exercise
addDQD <- FALSE
DbDiagnostics::executeDbProfile(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
writeTo = writeTo,
vocabDatabaseSchema = vocabDatabaseSchema,
cdmSourceName = cdmSourceName,
outputFolder = outputFolder,
cdmVersion = cdmVersion,
appendAchilles = appendAchilles,
roundTo = roundTo,
excludedConcepts = excludedConcepts,
addDQD = addDQD
)
Upon completion, the summary statistics results *.csv
file, the CDM_SOURCE table, metadata.csv and the data quality dashboard
JSON file (if specified) will be located in a zip file in the output
location you set as part of the execute function.
Be sure to unzip the dbProfile results that are generated in the
above. Then, set the location where you unzipped the results as the
parameter resultsLocation
.
# set the location of the unzipped results that were generated from executeDbProfile
resultsLocation <- "<location of unzipped results>"
# set a parameter detailing if the DQD was run
addDQD <- FALSE
# set the schema name in your database where you want the files uploaded
databaseSchema <- "db_profile"
# Read in the results and make sure the columns are characters to facilitate dbDiagnostics execution
db_profile_results <- read.csv(paste0(resultsLocation,"/db_profile_results.csv"),
stringsAsFactors = F,
colClasses = c("stratum_1"="character",
"stratum_2"="character",
"stratum_3"="character",
"stratum_4"="character",
"stratum_5"="character"))
db_profile_results_dist <- read.csv(paste0(resultsLocation,"/db_profile_results_dist.csv"),
stringsAsFactors = F,
colClasses = c("stratum_1"="character",
"stratum_2"="character",
"stratum_3"="character",
"stratum_4"="character",
"stratum_5"="character"))
# read in the metadata
metadataFile <- list.files(path = resultsLocation, pattern = "\\metadata.csv$")
db_metadata <- read.csv(paste0(resultsLocation,"/",metadataFile),
stringsAsFactors = F,
colClasses = c("dataDoiType"="character",
"dataShortName"="character"))
# read in the cdm_source table
cdmSourceFile <- list.files(path = resultsLocation, pattern = "\\cdm_source.csv$")
db_cdm_source <- read.csv(paste0(resultsLocation,"/",cdmSourceFile),
stringsAsFactors = F)
# determine which tables should be uploaded based on if the DQD was included
if (addDQD) {
dqdJsonDf <- jsonlite::fromJSON(
paste0(outputFolder,"/",dbId,"_DbProfile.json"),
simplifyDataFrame = TRUE)
dqd_overview <- as.data.frame(dqdJsonDf$Overview)
dqd_checkresults <- as.data.frame(dqdJsonDf$CheckResults)
dqd_checkresults$THRESHOLD_VALUE <- as.character(dqd_checkresults$THRESHOLD_VALUE)
tablesToUpload <- c("db_profile_results",
"db_profile_results_dist",
"db_metadata",
"db_cdm_source",
"dqd_checkresults",
"dqd_overview")
} else {
tablesToUpload <- c("db_profile_results",
"db_profile_results_dist",
"db_metadata",
"db_cdm_source")
}
# create the connectionDetails for the database where the results should be uploaded. It is likely this will be different than the database where the dbProfile was run
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "postgresql",
server = "localhost/synthea",
user = "postgres",
password = Sys.getenv("POSTGRES_PW")
)
conn <- DatabaseConnector::connect(connectionDetails)
# When the schema is empty, use createTable = TRUE
for (tableName in tablesToUpload) {
DatabaseConnector::insertTable(
connection = conn,
tableName = tableName,
databaseSchema = databaseSchema,
data = eval(parse(text=tableName)),
dropTableIfExists = TRUE,
createTable = TRUE,
tempTable = FALSE,
progressBar = TRUE)
}
DatabaseConnector::disconnect(conn)