Skip to content

settings

Shared, per-component settings dataclasses used by processing classes.

Top-level config classes (Config, ConfigDrugMapping) are composed of these settings so the same structures are reused across different workflows. Each processing class accepts only the settings it actually needs.

ConceptContextSettings dataclass

Controls which context columns :func:add_concept_context adds.

Source code in src/ariadne/utils/settings.py
154
155
156
157
158
159
160
161
162
163
164
@dataclass
class ConceptContextSettings:
    """Controls which context columns :func:`add_concept_context` adds."""

    include_target_parents: bool = True
    include_target_children: bool = True
    include_target_synonyms: bool = True
    include_target_domain: bool = True
    include_target_class: bool = True
    include_target_vocabulary: bool = True
    re_insert_source_target_details: bool = True

DrugStructuringSettings dataclass

Everything :class:LlmDrugStructurer needs.

Source code in src/ariadne/utils/settings.py
179
180
181
182
183
184
185
186
187
188
189
190
@dataclass
class DrugStructuringSettings:
    """Everything :class:`LlmDrugStructurer` needs."""

    llm_mapper_responses_folder: str = "data/llm_drug_mapper_responses"
    drug_device_system_prompt: str = ""
    ingredient_system_prompt: str = ""
    drug_system_prompt: str = ""
    device_system_prompt: str = ""

    def __post_init__(self) -> None:
        self.llm_mapper_responses_folder = resolve_path(self.llm_mapper_responses_folder)

EvaluationConfig dataclass

Output and gold-standard paths for hierarchy evaluation.

Source code in src/ariadne/utils/settings.py
230
231
232
233
234
235
236
@dataclass
class EvaluationConfig:
    """Output and gold-standard paths for hierarchy evaluation."""

    attribute_gold_standard_path: str = "./data/gold_standards/hierarchy_attributes_snomed_gs.csv"
    parent_gold_standard_path: str = "./data/gold_standards/hierarchy_snomed_gs.csv"
    output_dir: str = "./data/notebook_results"

HierarchySettings dataclass

Settings block loaded from the optional top-level hierarchy config key.

Source code in src/ariadne/utils/settings.py
247
248
249
250
251
252
253
254
255
256
@dataclass
class HierarchySettings:
    """Settings block loaded from the optional top-level ``hierarchy`` config key."""

    models: ModelsConfig = field(default_factory=ModelsConfig)
    retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
    scoring: ScoringConfig = field(default_factory=ScoringConfig)
    evaluation: EvaluationConfig = field(default_factory=EvaluationConfig)
    prompts: PromptsConfig = field(default_factory=PromptsConfig)
    snomed_relationships: List[str] = field(default_factory=lambda: list(_DEFAULT_SNOMED_RELATIONSHIPS))

LlmMapperSettings dataclass

Everything :class:LlmMapper needs.

Source code in src/ariadne/utils/settings.py
167
168
169
170
171
172
173
174
175
176
@dataclass
class LlmMapperSettings:
    """Everything :class:`LlmMapper` needs."""

    llm_mapper_responses_folder: str = "data/llm_mapper_responses"
    context: ConceptContextSettings = field(default_factory=ConceptContextSettings)
    system_prompts: List[str] = field(default_factory=list)

    def __post_init__(self) -> None:
        self.llm_mapper_responses_folder = resolve_path(self.llm_mapper_responses_folder)

MappingPerConceptClassSettings dataclass

Per-concept-class settings used by the drug-mapping pipeline.

Source code in src/ariadne/utils/settings.py
193
194
195
196
197
198
199
200
201
@dataclass
class MappingPerConceptClassSettings:
    """Per-concept-class settings used by the drug-mapping pipeline."""

    verbatim_mapping: VerbatimMappingSettings = field(
        default_factory=VerbatimMappingSettings
    )
    vector_search: VectorSearchSettings = field(default_factory=VectorSearchSettings)
    llm_mapping: LlmMapperSettings = field(default_factory=LlmMapperSettings)

ModelsConfig dataclass

LLM / embedding model identifiers for hierarchy extraction.

Source code in src/ariadne/utils/settings.py
204
205
206
207
208
209
210
@dataclass
class ModelsConfig:
    """LLM / embedding model identifiers for hierarchy extraction."""

    embedding: str = "text-embedding-3-large"
    extraction: str = "o3"
    selection: str = "o3"

PromptsConfig dataclass

Prompt templates for hierarchy extraction and candidate selection.

Source code in src/ariadne/utils/settings.py
239
240
241
242
243
244
@dataclass
class PromptsConfig:
    """Prompt templates for hierarchy extraction and candidate selection."""

    extraction: str = ""
    selection: str = ""

RetrievalConfig dataclass

Retrieval-stage hyper-parameters for hierarchy extraction.

Source code in src/ariadne/utils/settings.py
213
214
215
216
217
218
219
@dataclass
class RetrievalConfig:
    """Retrieval-stage hyper-parameters for hierarchy extraction."""

    num_reference_examples: int = 5
    top_k_per_category: int = 20
    hnsw_ef_search: int = 200

ScoringConfig dataclass

Similarity score overrides used by hierarchy ranking.

Source code in src/ariadne/utils/settings.py
222
223
224
225
226
227
@dataclass
class ScoringConfig:
    """Similarity score overrides used by hierarchy ranking."""

    reference_similarity: float = 0.9
    hierarchy_similarity: float = 0.85

StandardConceptFilter dataclass

Controls which concepts are included in the verbatim-mapping vocabulary download.

Source code in src/ariadne/utils/settings.py
104
105
106
107
108
109
110
111
112
113
@dataclass
class StandardConceptFilter:
    """Controls which concepts are included in the verbatim-mapping vocabulary download."""

    vocabularies: Optional[List[str]] = None
    domain_ids: Optional[List[str]] = None
    concept_class_ids: Optional[List[str]] = None
    include_classification_concepts: bool = False
    include_synonyms: bool = True
    standard_concept: bool = True  # True → restrict to standard_concept = 'S'

TermCleanerSettings dataclass

Everything :class:TermCleaner needs.

Source code in src/ariadne/utils/settings.py
119
120
121
122
123
@dataclass
class TermCleanerSettings:
    """Everything :class:`TermCleaner` needs."""

    system_prompt: str = ""

VectorSearchSettings dataclass

Everything concept searchers read from config.

Source code in src/ariadne/utils/settings.py
147
148
149
150
151
@dataclass
class VectorSearchSettings:
    """Everything concept searchers read from config."""

    max_candidates: int = 25

VerbatimMappingSettings dataclass

Everything :func:download_terms, :class:VocabVerbatimTermMapper, and :class:TermNormalizer need.

Source code in src/ariadne/utils/settings.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
@dataclass
class VerbatimMappingSettings:
    """Everything :func:`download_terms`, :class:`VocabVerbatimTermMapper`, and
    :class:`TermNormalizer` need."""

    terms_folder: str = "data/terms"
    verbatim_mapping_index_file: str = "data/verbatim_mapping_index.pkl"
    download_batch_size: int = 100_000
    log_folder: str = "logs"
    substrings_to_remove: List[str] = field(default_factory=list)
    preferred_vocabulary_ids: List[str] = field(default_factory=list)
    standard_concept_filter: StandardConceptFilter = field(
        default_factory=StandardConceptFilter
    )

    def __post_init__(self) -> None:
        self.terms_folder = resolve_path(self.terms_folder)
        self.verbatim_mapping_index_file = resolve_path(self.verbatim_mapping_index_file)
        self.log_folder = resolve_path(self.log_folder)

build_dataclass(cls, data)

Recursively build a dataclass instance from a plain dict.

Source code in src/ariadne/utils/settings.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def build_dataclass(cls: Type[Any], data: Dict[str, Any] | None) -> Any:
    """Recursively build a dataclass instance from a plain dict."""
    if data is None:
        data = {}
    if not is_dataclass(cls):
        return data
    kw: dict[str, Any] = {}
    # Resolve string annotations to actual types
    try:
        resolved_hints = get_type_hints(cls)
    except Exception:
        resolved_hints = {}
    for f in fields(cls):
        if f.name not in data:
            continue
        value = data[f.name]
        # Use resolved type hint if available, else fall back to f.type
        raw_type = resolved_hints.get(f.name, f.type)
        # Unwrap Optional[X] / X | None to get the inner type
        origin = getattr(raw_type, "__origin__", None)
        if origin is not None:
            args = getattr(raw_type, "__args__", ())
            # For Optional[X], args is (X, NoneType); pick the non-None arg
            non_none = [a for a in args if a is not type(None)]
            raw_type = non_none[0] if non_none else raw_type
        if is_dataclass(raw_type) and isinstance(value, dict):
            kw[f.name] = build_dataclass(raw_type, value)
        else:
            kw[f.name] = value
    return cls(**kw)

serialize_dataclass(obj)

Recursively serialize a dataclass (or list/dict of dataclasses) to plain dicts.

Source code in src/ariadne/utils/settings.py
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def serialize_dataclass(obj: Any) -> Any:
    """Recursively serialize a dataclass (or list/dict of dataclasses) to plain dicts."""
    if is_dataclass(obj):
        result = {}
        for f in fields(obj):
            value = getattr(obj, f.name)
            result[f.name] = serialize_dataclass(value)
        return result
    elif isinstance(obj, dict):
        return {k: serialize_dataclass(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [serialize_dataclass(item) for item in obj]
    else:
        return obj