Skip to content

medcat.utils.legacy.convert_config

Functions:

Attributes:

CONFIG_KEEP_IDENTICAL module-attribute

CONFIG_KEEP_IDENTICAL = {'cdb_maker', 'preprocessing'}

CONFIG_MOVE module-attribute

CONFIG_MOVE = {'linking': 'components.linking', 'ner': 'components.ner', 'version.description': 'meta.description', 'version.id': 'meta.hash', 'version.ontology': 'meta.ontology', 'general.spacy_model': 'general.nlp.modelname', 'general.spacy_disabled_components': 'general.nlp.disabled_components'}

CONFIG_MOVE_OPTIONAL module-attribute

CONFIG_MOVE_OPTIONAL = {'version.description', 'version.id', 'version.ontology'}

MOVE_WITH_REMOVES module-attribute

MOVE_WITH_REMOVES = {'general': {'checkpoint', 'spacy_model', 'spacy_disabled_components', 'usage_monitor'}, 'annotation_output': {'doc_extended_info'}}

SET_IDENTIFIER module-attribute

SET_IDENTIFIER = '==SET=='

logger module-attribute

logger = getLogger(__name__)

fix_spacy_model_name

fix_spacy_model_name(cnf: Config, tokenizer: BaseTokenizer | None = None) -> None
Source code in medcat-v2/medcat/utils/legacy/convert_config.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def fix_spacy_model_name(
        cnf: Config,
        tokenizer: BaseTokenizer | None = None) -> None:
    if cnf.general.nlp.modelname in ('spacy_model', 'en_core_sci_md',
                                     'en_core_sci_lg'):
        logger.info("Fixing spacy model. "
                    "Moving from '%s' to 'en_core_web_md'!",
                    cnf.general.nlp.modelname)
        cnf.general.nlp.modelname = 'en_core_web_md'
        # NOTE: the tokenizer uses an internally cached name that we need to
        #       fix here as well so that the name of the subsequently saved
        #       files is more descriptive than just 'spacy_model'
        if tokenizer:
            from medcat.tokenizing.spacy_impl.tokenizers import SpacyTokenizer
            cast(SpacyTokenizer,
                 tokenizer)._spacy_model_name = cnf.general.nlp.modelname

get_config_from_nested_dict

get_config_from_nested_dict(old_data: dict) -> Config

Get the v2 config from v1 json data.

Parameters:

  • old_data

    (dict) –

    The json (nested dict) data.

Returns:

  • Config ( Config ) –

    The v 2 config.

Source code in medcat-v2/medcat/utils/legacy/convert_config.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
def get_config_from_nested_dict(old_data: dict) -> Config:
    """Get the v2 config from v1 json data.

    Args:
        old_data (dict): The json (nested dict) data.

    Returns:
        Config: The v 2 config.
    """
    old_data = fix_old_style_cnf(old_data)
    cnf = Config()
    # v1 models always used spacy
    # but we now default to regex
    cnf.general.nlp.provider = 'spacy'
    cnf = _make_changes(cnf, old_data)
    return cnf

get_config_from_old

get_config_from_old(path: str) -> Config

Convert the saved v1 config into a v2 Config.

Parameters:

  • path

    (str) –

    The v1 config path.

Returns:

  • Config ( Config ) –

    The v2 config.

Source code in medcat-v2/medcat/utils/legacy/convert_config.py
192
193
194
195
196
197
198
199
200
201
202
203
def get_config_from_old(path: str) -> Config:
    """Convert the saved v1 config into a v2 Config.

    Args:
        path (str): The v1 config path.

    Returns:
        Config: The v2 config.
    """
    with open(path) as f:
        old_cnf_data = json.load(f)
    return get_config_from_nested_dict(old_cnf_data)

get_config_from_old_per_cls

get_config_from_old_per_cls(path: str, cls: Type[SerialisableBaseModel]) -> SerialisableBaseModel

Convert the saved v1 config into a v2 Config for a specific class.

Parameters:

Returns:

Source code in medcat-v2/medcat/utils/legacy/convert_config.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def get_config_from_old_per_cls(
        path: str, cls: Type[SerialisableBaseModel]) -> SerialisableBaseModel:
    """Convert the saved v1 config into a v2 Config for a specific class.

    Args:
        path (str): The v1 config path.
        cls (Type[SerialisableBaseModel]): The class to convert to.

    Returns:
        SerialisableBaseModel: The converted config.
    """
    from medcat.config.config_meta_cat import ConfigMetaCAT
    from medcat.config.config_transformers_ner import ConfigTransformersNER
    from medcat.config.config_rel_cat import ConfigRelCAT
    if cls is Config:
        return get_config_from_old(path)
    elif cls is ConfigMetaCAT:
        from medcat.utils.legacy.convert_meta_cat import (
            load_cnf as load_meta_cat_cnf)
        return load_meta_cat_cnf(path)
    elif cls is ConfigTransformersNER:
        from medcat.utils.legacy.convert_deid import (
            get_cnf as load_deid_cnf)
        return load_deid_cnf(path)
    elif cls is ConfigRelCAT:
        from medcat.utils.legacy.convert_rel_cat import (
            load_cnf as load_rel_cat_cnf)
        return load_rel_cat_cnf(path)
    raise ValueError(f"The config at '{path}' is not a {cls.__name__}!")

get_val_and_parent_model

get_val_and_parent_model(old_data: Optional[dict], cnf: Optional[Config], path: str) -> tuple[Optional[Any], Optional[BaseModel]]

Get the value and the model to set it for from the path specified.

The paths may be specified in a .-separated manner. This unwraps that and figures out the value in the old model and the class that should be used in the new model.

Parameters:

Returns:

  • tuple[Optional[Any], Optional[BaseModel]]

    tuple[Optional[Any], Optional[BaseModel]]: The value to set, and the model to set it for.

Source code in medcat-v2/medcat/utils/legacy/convert_config.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def get_val_and_parent_model(old_data: Optional[dict],
                             cnf: Optional[Config],
                             path: str
                             ) -> tuple[Optional[Any], Optional[BaseModel]]:
    """Get the value and the model to set it for from the path specified.

    The paths may be specified in a `.`-separated manner. This unwraps that
    and figures out the value in the old model and the class that should
    be used in the new model.

    Args:
        old_data (Optional[dict]): The raw v1 config data.
        cnf (Optional[Config]): The v2 config.
        path (str): The path to look for.

    Returns:
        tuple[Optional[Any], Optional[BaseModel]]: The value to set, and the
            model to set it for.
    """
    val = old_data
    target_model: Optional[BaseModel] = cnf
    name = path
    while name:
        parts = name.split(".", 1)
        cname = parts[0]
        if len(parts) == 2:
            name = parts[1]
            if target_model is not None:
                target_model = cast(BaseModel, getattr(target_model, cname))
        else:
            name = ''
        if val is not None:
            if path in CONFIG_MOVE_OPTIONAL and cname not in val:
                logger.warning(
                    "Optional path '%s' not found in old config. Ignoring",
                    path)
                val = None
                break
            val = val[cname]
    return val, target_model