Skip to content

medcat.utils.legacy.convert_cdb

Classes:

Functions:

Attributes:

CUI2KEYS module-attribute

CUI2KEYS = {'cui2names', 'cui2snames', 'cui2context_vectors', 'cui2count_train', 'cui2info', 'cui2tags', 'cui2type_ids', 'cui2preferred_name', 'cui2average_confidence'}

CUI2KEYS_OPTIONAL module-attribute

CUI2KEYS_OPTIONAL = {'cui2info'}

EXPECTED_USEFUL_KEYS module-attribute

EXPECTED_USEFUL_KEYS = ['name2cuis', 'name2cuis2status', 'name2count_train', 'name_isupper', 'snames', 'cui2names', 'cui2snames', 'cui2context_vectors', 'cui2count_train', 'cui2tags', 'cui2type_ids', 'cui2preferred_name', 'cui2average_confidence', 'addl_info', 'vocab']

NAME2KEYS module-attribute

NAME2KEYS = {'name2cuis', 'name2cuis2status', 'name2count_train', 'name_isupper'}

OPTIONAL_NAME2_KEYS module-attribute

OPTIONAL_NAME2_KEYS = {'name_isupper'}

TO_RENAME module-attribute

TO_RENAME = {'vocab': 'token_counts'}

logger module-attribute

logger = getLogger(__name__)

CustomUnpickler

Bases: Unpickler

Methods:

find_class

find_class(module, name)
Source code in medcat-v2/medcat/utils/legacy/convert_cdb.py
25
26
27
28
29
30
31
32
def find_class(self, module, name):
    try:
        return super().find_class(module, name)
    except (AttributeError, ModuleNotFoundError):
        logger.warning(
            "Missing class %s.%s, replacing with LegacyClassNotFound.",
            module, name)
        return LegacyClassNotFound

LegacyClassNotFound

LegacyClassNotFound(*args, **kwargs)

Attributes:

Source code in medcat-v2/medcat/utils/legacy/convert_cdb.py
16
17
18
def __init__(self, *args, **kwargs):
    self.args = args
    self.kwargs = kwargs

args instance-attribute

args = args

kwargs instance-attribute

kwargs = kwargs

convert_data

convert_data(all_data: dict, fix_spacy_model_name: bool = True) -> CDB

Convert the raw v1 data into a CDB.

Parameters:

  • all_data

    (dict) –

    The raw v1 data off disk.

  • fix_spacy_model_name

    (bool, default: True ) –

    Whether to fix the spacy model name. Older models may have unsuported spacy model names. So these may sometimes need to be fixed. Defaults to True.

Returns:

  • CDB ( CDB ) –

    The v2 CDB.

Source code in medcat-v2/medcat/utils/legacy/convert_cdb.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def convert_data(all_data: dict, fix_spacy_model_name: bool = True) -> CDB:
    """Convert the raw v1 data into a CDB.

    Args:
        all_data (dict): The raw v1 data off disk.
        fix_spacy_model_name (bool): Whether to fix the spacy model name.
            Older models may have unsuported spacy model names. So these
            may sometimes need to be fixed. Defaults to True.

    Returns:
        CDB: The v2 CDB.
    """
    data = all_data['cdb']
    cdb = CDB(Config())
    cdb = _add_cui_info(cdb, data)
    cdb = _add_name_info(cdb, data)
    update_names(cdb, data)
    if 'config' in all_data:
        logger.info("Loading old style CDB with config included.")
        cdb.config = get_config_from_nested_dict(all_data['config'])
        if fix_spacy_model_name:
            apply_spacy_model_fix(cdb.config)
    return cdb

get_cdb_from_old

get_cdb_from_old(old_path: str, fix_spacy_model_name: bool = True) -> CDB

Get the v2 CDB from a v1 CDB path.

Parameters:

  • old_path

    (str) –

    The v1 CDB path.

  • fix_spacy_model_name

    (bool, default: True ) –

    Whether to fix the spacy model name. Older models may have unsuported spacy model names. So these may sometimes need to be fixed. Defaults to True.

Returns:

  • CDB ( CDB ) –

    The v2 CDB.

Source code in medcat-v2/medcat/utils/legacy/convert_cdb.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
def get_cdb_from_old(old_path: str,
                     fix_spacy_model_name: bool = True) -> CDB:
    """Get the v2 CDB from a v1 CDB path.

    Args:
        old_path (str): The v1 CDB path.
        fix_spacy_model_name (bool): Whether to fix the spacy model name.
            Older models may have unsuported spacy model names. So these
            may sometimes need to be fixed. Defaults to True.

    Returns:
        CDB: The v2 CDB.
    """
    data = load_old_raw_data(old_path)
    return convert_data(data, fix_spacy_model_name)

load_old_raw_data

load_old_raw_data(old_path: str) -> dict

Looads the raw data from old file.

This uses a wrapper that allows loading the data even if the classes do not exist.

Parameters:

  • old_path

    (str) –

    The path of the file to read.

Returns:

  • dict ( dict ) –

    The resulting raw data.

Source code in medcat-v2/medcat/utils/legacy/convert_cdb.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def load_old_raw_data(old_path: str) -> dict:
    """Looads the raw data from old file.

    This uses a wrapper that allows loading the data even if the classes
    do not exist.

    Args:
        old_path (str): The path of the file to read.

    Returns:
        dict: The resulting raw data.
    """
    with open(old_path, 'rb') as f:
        # NOTE: custom unpickler needed because we
        #       do not have access to original modules within medcat(v1)
        data = CustomUnpickler(f).load()
    return data

update_names

update_names(cdb: CDB, data: dict)
Source code in medcat-v2/medcat/utils/legacy/convert_cdb.py
209
210
211
def update_names(cdb: CDB, data: dict):
    for name_from, name_to in TO_RENAME.items():
        setattr(cdb, name_to, data[name_from])