Skip to content

concept_search_evaluator

Evaluate the concept search results against the gold standard.

Parameters:

Name Type Description Default
search_results DataFrame

Pandas DataFrame containing the results of the concept search.

required
output_file str | Path

Path to save the evaluation results.

required
gold_standard_file str

Path to the CSV file containing the gold standard mappings.

'data/gold_standards/exact_matching_gs.csv'
source_id_column str

Name of the column in the search results with source concept IDs.

'source_concept_id'
term_column str

Name of the column in the search results with the search terms.

'cleaned_term'
matched_concept_id_column str

Name of the column in the search results with matched concept IDs.

'matched_concept_id'
matched_concept_name_column str

Name of the column in the search results with matched concept names.

'matched_concept_name'
match_rank_column str

Name of the column in the search results with the rank of the matched concepts.

'match_rank'

Returns:

Type Description
None

None. Execution results are written to the specified output file.

Source code in src/ariadne/evaluation/concept_search_evaluator.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def evaluate_concept_search(
    search_results: pd.DataFrame,
    output_file: str | Path,
    gold_standard_file: str = "data/gold_standards/exact_matching_gs.csv",
    source_id_column: str = "source_concept_id",
    term_column: str = "cleaned_term",
    matched_concept_id_column: str = "matched_concept_id",
    matched_concept_name_column: str = "matched_concept_name",
    match_rank_column: str = "match_rank",
) -> None:
    """
    Evaluate the concept search results against the gold standard.

    Args:
        search_results: Pandas DataFrame containing the results of the concept search.
        output_file: Path to save the evaluation results.
        gold_standard_file: Path to the CSV file containing the gold standard mappings.
        source_id_column: Name of the column in the search results with source concept IDs.
        term_column: Name of the column in the search results with the search terms.
        matched_concept_id_column: Name of the column in the search results with matched concept IDs.
        matched_concept_name_column: Name of the column in the search results with matched concept names.
        match_rank_column: Name of the column in the search results with the rank of the matched concepts.

    Returns:
        None. Execution results are written to the specified output file.
    """
    detail_strings = []
    # gold_standard = _load_gold_standard(resolve_path(gold_standard_file))
    gold_standard = pd.read_csv(resolve_path(gold_standard_file))
    evaluated_gs_count = 0
    mean_average_precision = 0
    recall_1 = 0
    recall_3 = 0
    recall_10 = 0
    recall_25 = 0

    grouped = search_results.groupby(source_id_column)
    for source_id, group in grouped:
        gs_entry = gold_standard[gold_standard[SOURCE_CONCEPT_ID] == source_id]
        if gs_entry.empty:
            continue
        gs_entry = gs_entry.iloc[0]
        gs_source_term = gs_entry[SOURCE_TERM]
        gs_concept_id = gs_entry[TARGET_CONCEPT_ID]
        gs_concept_id_b = gs_entry[TARGET_CONCEPT_ID_B]
        gold_predicate = gs_entry[PREDICATE]
        gold_predicate_b = gs_entry[PREDICATE_B]
        if gold_predicate == BROAD_MATCH:
            gs_concept_id = None
        if gold_predicate_b == BROAD_MATCH:
            gs_concept_id_b = None
        if gs_concept_id is None and (gs_concept_id_b is None or math.isnan(gs_concept_id_b)):
            continue
        evaluated_gs_count = evaluated_gs_count + 1
        gs_rank = group.loc[group[matched_concept_id_column] == gs_concept_id, match_rank_column]
        if gs_concept_id_b is not None:
            gs_rank_b = group.loc[group[matched_concept_id_column] == gs_concept_id_b, match_rank_column]
            if not gs_rank_b.empty:
                if gs_rank.empty or gs_rank_b.iloc[0] < gs_rank.iloc[0]:
                    gs_rank = gs_rank_b
                    gs_concept_id = gs_concept_id_b
        detail_strings.append(f"Source term: {gs_source_term} ({source_id})")
        detail_strings.append(f"Searched term: {group[term_column].iloc[0]}")
        if gs_rank.empty:
            detail_strings.append("Gold standard concept not found")
            gs_concept_name = gs_entry[TARGET_CONCEPT_NAME]
            detail_strings.append(f"Correct target was: {gs_concept_name} ({gs_concept_id})")
        else:
            gs_rank = gs_rank.iloc[0]
            detail_strings.append(f"Gold standard concept rank: {gs_rank}")
            mean_average_precision = mean_average_precision + 1 / gs_rank
            if gs_rank <= 1:
                recall_1 = recall_1 + 1
            if gs_rank <= 3:
                recall_3 = recall_3 + 1
            if gs_rank <= 10:
                recall_10 = recall_10 + 1
            if gs_rank <= 25:
                recall_25 = recall_25 + 1

        detail_strings.append("")

        table = group[[match_rank_column, matched_concept_id_column, matched_concept_name_column]].copy()
        correct = np.where(table[matched_concept_id_column] == gs_concept_id, "Yes", "")
        if gs_concept_id_b:
            correct_b = np.where(table[matched_concept_id_column] == gs_concept_id_b, "Yes", "")
            correct = np.where(correct == "Yes", "Yes", correct_b)

        table.insert(1, "Correct", correct)
        detail_strings.append(table.to_string(index=False))
        detail_strings.append("")

    mean_average_precision = mean_average_precision / evaluated_gs_count
    recall_1 = recall_1 / evaluated_gs_count
    recall_3 = recall_3 / evaluated_gs_count
    recall_10 = recall_10 / evaluated_gs_count
    recall_25 = recall_25 / evaluated_gs_count

    summary_strings = [
        f"Evaluated gold standard concepts: {evaluated_gs_count}",
        f"Mean Average Precision: {mean_average_precision}",
        f"Recall@1: {recall_1}",
        f"Recall@3: {recall_3}",
        f"Recall@10: {recall_10}",
        f"Recall@25: {recall_25}",
    ]

    with open(output_file, "w", encoding="UTF-8") as f:
        f.write("\n".join(summary_strings))
        f.write("\n\n")
        f.write("\n".join(detail_strings))
        f.write("\n")

    print(f"Evaluation complete. Results written to {output_file}")