Skip to content

concept_context_retriever

add_concept_context(concept_table, concept_id_column='matched_concept_id', domain_id_column='matched_domain_id', concept_class_id_column='matched_concept_class_id', vocabulary_id_column='matched_vocabulary_id', add_parents=True, parents_column='matched_parents', add_children=True, children_column='matched_children', add_synonyms=True, synonyms_column='matched_synonyms')

Adds concept context (domain, concept class, vocabulary, parents, children, synonyms) to the given concept table. Multiple entries per concept will be concatenated with semicolons. Children are limited to 10 random entries per concept.

Parameters:

Name Type Description Default
concept_table DataFrame

DataFrame containing concept IDs.

required
concept_id_column str

Name of the column with concept IDs.

'matched_concept_id'
domain_id_column str

Name of the column for the domain ID.

'matched_domain_id'
concept_class_id_column str

Name of the column for the domain concept class ID.

'matched_concept_class_id'
vocabulary_id_column str

Name of the column for the domain vocabulary ID.

'matched_vocabulary_id'
add_parents bool

Whether to add parent concepts.

True
parents_column str

Name of the column for parent concept names.

'matched_parents'
add_children bool

Whether to add child concepts.

True
children_column str

Name of the column for child concept names.

'matched_children'
add_synonyms bool

Whether to add concept synonyms.

True
synonyms_column str

Name of the column for concept synonyms.

'matched_synonyms'

Returns:

Type Description
DataFrame

DataFrame enriched with concept context columns.

Source code in src/ariadne/llm_mapping/concept_context_retriever.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def add_concept_context(
    concept_table: pd.DataFrame,
    concept_id_column: str = "matched_concept_id",
    domain_id_column: str = "matched_domain_id",
    concept_class_id_column: str = "matched_concept_class_id",
    vocabulary_id_column: str = "matched_vocabulary_id",
    add_parents: bool = True,
    parents_column: str = "matched_parents",
    add_children: bool = True,
    children_column: str = "matched_children",
    add_synonyms: bool = True,
    synonyms_column: str = "matched_synonyms",
) -> pd.DataFrame:
    """
    Adds concept context (domain, concept class, vocabulary, parents, children, synonyms) to the given concept table.
    Multiple entries per concept will be concatenated with semicolons. Children are limited to 10 random entries per
    concept.

    Args:
        concept_table: DataFrame containing concept IDs.
        concept_id_column: Name of the column with concept IDs.
        domain_id_column: Name of the column for the domain ID.
        concept_class_id_column: Name of the column  for the domain concept class ID.
        vocabulary_id_column: Name of the column  for the domain vocabulary ID.
        add_parents:  Whether to add parent concepts.
        parents_column: Name of the column for parent concept names.
        add_children: Whether to add child concepts.
        children_column: Name of the column for child concept names.
        add_synonyms: Whether to add concept synonyms.
        synonyms_column: Name of the column for concept synonyms.

    Returns:
        DataFrame enriched with concept context columns.
    """

    engine = create_engine(get_environment_variable("VOCAB_CONNECTION_STRING"))

    concept_ids = concept_table[concept_id_column].unique().tolist()
    query = _create_query(
        concept_ids=concept_ids,
        concept_class_id_column=concept_class_id_column,
        domain_id_column=domain_id_column,
        vocabulary_id_column=vocabulary_id_column,
        add_parents=add_parents,
        parents_column=parents_column,
        add_children=add_children,
        children_column=children_column,
        add_synonyms=add_synonyms,
        synonyms_column=synonyms_column,
        engine=engine,
    )

    with engine.connect() as connection:
        result = connection.execute(query)
        context_df = pd.DataFrame(result.fetchall(), columns=result.keys())
    merged_df = concept_table.merge(context_df, left_on=concept_id_column, right_on="concept_id", how="left")
    merged_df.drop(columns=["concept_id"], inplace=True)
    return merged_df