Skip to contents

In previous vignettes we have seen numerous R functions that can help us to add a cohort of interest to a cdm reference. When our cdm reference is to tables in a database, as is often the code, our R code will have been translated to SQL that is run against tables in the databases (for more details on how this is all implemented see https://oxford-pharmacoepi.github.io/Tidy-R-programming-with-OMOP/).

Let’s again work with Eunomia and get the codes needed to create a set of drug cohorts.

library(omock)
library(CodelistGenerator)
library(PatientProfiles)
library(CohortConstructor)
library(dplyr)

cdm <- mockCdmFromDataset(datasetName = "GiBleed", source = "duckdb")

drug_codes <- getDrugIngredientCodes(
  cdm = cdm, 
  name = c("acetaminophen", "amoxicillin", "diclofenac", "simvastatin", "warfarin")
)

To capture all the SQL executed as we use CohortConstructor functions we can set a global option. For this, we just need to point to a directory in which we’ll save each SQL statement run behind the scenes. Note that in this example we’re using duckdb so the SQL is for this database management system. If you were running on a different type of database the SQL would be adapted accordingly.

dir_sql <- file.path(tempdir(), "sql_folder")
dir.create(dir_sql)
options("omopgenerics.log_sql_path" = dir_sql)

cdm$drugs <- conceptCohort(cdm, 
                           conceptSet = drug_codes,
                           exit = "event_end_date",
                           name = "drugs")

# print sql in order they were saved
files <- file.info(list.files(dir_sql, full.names = TRUE))
sorted_files <- rownames(files[order(files$ctime),])
for(i in seq_along(sorted_files)) {
  cat(paste0("### ", sorted_files[i], "\n\n"))
  sql_with_quotes <- paste0('"', paste(readLines(sorted_files[i]), collapse = '\n'), '"')
  cat(sql_with_quotes, "\n```\n\n")
}
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00003_on_2025_10_10_at_17_27_49.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: tmp_001_og_004_1760117269
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_uploadCohortCodelist
#> time_taken: 0.147 seconds
#> <SQL>
#> SELECT
#>   cohort_definition_id,
#>   CAST(concept_id AS INTEGER) AS concept_id,
#>   LOWER(domain_id) AS domain_id
#> FROM (
#>   SELECT test_tmp_001_og_004_1760117269.*, domain_id
#>   FROM results.test_tmp_001_og_004_1760117269
#>   LEFT JOIN concept
#>     ON (test_tmp_001_og_004_1760117269.concept_id = concept.concept_id)
#> ) q01" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00004_on_2025_10_10_at_17_27_49.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: og_005_1760117270_1
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_tempCohort_
#> time_taken: 0.124 seconds
#> <SQL>
#> SELECT
#>   person_id AS subject_id,
#>   drug_concept_id AS concept_id,
#>   drug_exposure_start_date AS cohort_start_date,
#>   drug_exposure_end_date AS cohort_end_date,
#>   cohort_definition_id
#> FROM drug_exposure
#> INNER JOIN (
#>   SELECT concept_id, cohort_definition_id
#>   FROM results.test_tmp_001_og_004_1760117269
#>   WHERE (domain_id IN ('drug'))
#> ) RHS
#>   ON (drug_exposure.drug_concept_id = RHS.concept_id)" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00005_on_2025_10_10_at_17_27_49.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_conceptCohort_reduce_
#> time_taken: 0.085 seconds
#> <SQL>
#> SELECT
#>   cohort_definition_id,
#>   subject_id,
#>   cohort_start_date,
#>   COALESCE(cohort_end_date, cohort_start_date) AS cohort_end_date
#> FROM results.test_og_005_1760117270_1" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00006_on_2025_10_10_at_17_27_50.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_fulfillCohortReqs_observationJoin_
#> time_taken: 0.109 seconds
#> <SQL>
#> SELECT
#>   test_drugs.*,
#>   observation_period_id,
#>   observation_period_start_date,
#>   observation_period_end_date
#> FROM results.test_drugs
#> LEFT JOIN observation_period
#>   ON (test_drugs.subject_id = observation_period.person_id)" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00007_on_2025_10_10_at_17_27_50.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_fulfillCohortReqs_useRecordsBeforeObservation_
#> time_taken: 0.159 seconds
#> <SQL>
#> SELECT
#>   cohort_definition_id,
#>   subject_id,
#>   cohort_start_date,
#>   CASE WHEN (observation_period_end_date >= cohort_end_date) THEN cohort_end_date WHEN NOT (observation_period_end_date >= cohort_end_date) THEN observation_period_end_date END AS cohort_end_date
#> FROM results.test_drugs
#> WHERE
#>   (cohort_start_date >= observation_period_start_date) AND
#>   (cohort_start_date <= observation_period_end_date)" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00008_on_2025_10_10_at_17_27_51.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_fulfillCohortReqs_filterStart_
#> time_taken: 0.093 seconds
#> <SQL>
#> SELECT test_drugs.*
#> FROM results.test_drugs
#> WHERE (NOT((cohort_start_date IS NULL)))" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00009_on_2025_10_10_at_17_27_52.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_fulfillCohortReqs_filterStartEnd_
#> time_taken: 0.129 seconds
#> <SQL>
#> SELECT
#>   cohort_definition_id,
#>   subject_id,
#>   cohort_start_date,
#>   CASE WHEN (cohort_start_date <= cohort_end_date) THEN cohort_end_date WHEN NOT (cohort_start_date <= cohort_end_date) THEN cohort_start_date END AS cohort_end_date
#> FROM results.test_drugs" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00010_on_2025_10_10_at_17_27_52.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_fulfillCohortReqs_sex_
#> time_taken: 0.136 seconds
#> <SQL>
#> SELECT q01.*
#> FROM (
#>   SELECT test_drugs.*, gender_concept_id, year_of_birth
#>   FROM results.test_drugs
#>   INNER JOIN person
#>     ON (test_drugs.subject_id = person.person_id)
#> ) q01
#> WHERE (NOT((gender_concept_id IS NULL)))" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00011_on_2025_10_10_at_17_27_52.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_fulfillCohortReqs_birth_year_
#> time_taken: 0.126 seconds
#> <SQL>
#> SELECT cohort_definition_id, subject_id, cohort_start_date, cohort_end_date
#> FROM results.test_drugs
#> WHERE (NOT((year_of_birth IS NULL)))" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00012_on_2025_10_10_at_17_27_53.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: og_006_1760117274
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_joinOverlap_workingTbl_
#> time_taken: 0.166 seconds
#> <SQL>
#> SELECT q01.*, -1.0 AS date_id
#> FROM (
#>   SELECT cohort_definition_id, subject_id, cohort_start_date AS date
#>   FROM results.test_drugs
#> ) q01
#> 
#> UNION ALL
#> 
#> SELECT q01.*, 1.0 AS date_id
#> FROM (
#>   SELECT cohort_definition_id, subject_id, cohort_end_date AS date
#>   FROM results.test_drugs
#> ) q01" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00013_on_2025_10_10_at_17_27_53.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: og_006_1760117274
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_joinOverlap_ids_
#> time_taken: 0.609 seconds
#> <SQL>
#> SELECT
#>   cohort_definition_id,
#>   subject_id,
#>   SUM(CAST(era_id AS NUMERIC)) OVER (PARTITION BY cohort_definition_id, subject_id ORDER BY date, date_id ROWS UNBOUNDED PRECEDING) AS era_id,
#>   "name",
#>   date
#> FROM (
#>   SELECT
#>     test_og_006_1760117274.*,
#>     SUM(date_id) OVER (PARTITION BY cohort_definition_id, subject_id ORDER BY date, date_id ROWS UNBOUNDED PRECEDING) AS cum_id,
#>     CASE WHEN (date_id = -1.0) THEN 'cohort_start_date' WHEN NOT (date_id = -1.0) THEN 'cohort_end_date' END AS "name",
#>     CASE WHEN (date_id = -1.0) THEN 1.0 WHEN NOT (date_id = -1.0) THEN 0.0 END AS era_id
#>   FROM results.test_og_006_1760117274
#> ) q01
#> WHERE (cum_id = 0.0 OR (cum_id = -1.0 AND date_id = -1.0))" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00014_on_2025_10_10_at_17_27_54.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: og_006_1760117274
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_joinOverlap_pivot_wider_
#> time_taken: 0.180 seconds
#> <SQL>
#> SELECT
#>   cohort_definition_id,
#>   subject_id,
#>   MAX(CASE WHEN ("name" = 'cohort_end_date') THEN date END) AS cohort_end_date,
#>   MAX(CASE WHEN ("name" = 'cohort_start_date') THEN date END) AS cohort_start_date
#> FROM results.test_og_006_1760117274
#> GROUP BY cohort_definition_id, subject_id, era_id" 
#> ```
#> 
#> ### /tmp/RtmpmpTotL/sql_folder/logged_query_00015_on_2025_10_10_at_17_27_54.sql
#> 
#> "type: compute
#> schema: results
#> prefix: test_
#> name: drugs
#> temporary: FALSE
#> overwrite: TRUE
#> log_prefix: CohortConstructor_joinOverlap_relocate_
#> time_taken: 0.082 seconds
#> <SQL>
#> SELECT DISTINCT
#>   cohort_definition_id,
#>   subject_id,
#>   cohort_start_date,
#>   cohort_end_date
#> FROM results.test_og_006_1760117274" 
#> ```

If we want even more detail, we also have the option to see the execution plan along with the SQL.

dir_explain <- file.path(tempdir(), "explain_folder")
dir.create(dir_explain)
options("omopgenerics.log_sql_explain_path" = dir_explain)

cdm$drugs <- cdm$drugs |> 
  requireIsFirstEntry()

files <- list.files(dir_explain, full.names = TRUE)
file_names <- list.files(dir_explain, full.names = FALSE)

for(i in seq_along(files)) {
  cat(paste0("### ", file_names[i], "\n\n"))
  sql_with_quotes <- paste0('"', paste(readLines(files[i]), collapse = '\n'), '"')
  cat(sql_with_quotes, "\n```\n\n")
}