Skip to content

medcat.components.linking.two_step_context_based_linker

Classes:

Functions:

Attributes:

TYPE_ID_PREFIX module-attribute

TYPE_ID_PREFIX: str = 'TYPE_ID:'

logger module-attribute

logger = getLogger(__name__)

PerEntityWeights

PerEntityWeights(doc: MutableDocument)

Bases: MutableMapping[MutableEntity, dict[str, float]]

Methods:

Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
308
309
310
def __init__(self, doc: MutableDocument):
    self._doc = doc
    self._cui_weights: dict[tuple[int, int], dict[str, float]] = {}

keys

Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
344
345
def keys(self) -> KeysView[MutableEntity]:
    return {self._from_key(k): None for k in self._cui_weights}.keys()

TwoStepLinker

TwoStepLinker(cdb: CDB, vocab: Vocab, config: Config)

Bases: AbstractCoreComponent

Link to a biomedical database.

Parameters:

  • cdb

    (CDB) –

    The Context Database.

  • vocab

    (Vocab) –

    The vocabulary.

  • config

    (Config) –

    The config.

Methods:

Attributes:

Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def __init__(self, cdb: CDB, vocab: Vocab, config: Config) -> None:
    self.cdb = cdb
    self.vocab = vocab
    self.config = config
    self._init_cnf()
    self._linker = NormalLinker(cdb, vocab, config)
    self._linker.context_model._disamb_preprocessors.append(
        self._preprocess_disamb)
    add_tuis_to_cui_info(self.cdb.cui2info, self.cdb.type_id2info)
    self._tui_context_model = ContextModel(
        self.cdb.cui2info,
        self.cdb.name2info,
        self.cdb.weighted_average_function,
        self.vocab,
        self.config.components.linking,
        self.config.general.separator)

cdb instance-attribute

cdb = cdb

config instance-attribute

config = config

name class-attribute instance-attribute

name = 'medcat2_two_step_linker'

two_step_config property

two_step_config: TwoStepLinkerConfig

vocab instance-attribute

vocab = vocab

create_new_component classmethod

create_new_component(cnf: ComponentConfig, tokenizer: BaseTokenizer, cdb: CDB, vocab: Vocab, model_load_path: Optional[str]) -> TwoStepLinker
Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
257
258
259
260
261
262
@classmethod
def create_new_component(
        cls, cnf: ComponentConfig, tokenizer: BaseTokenizer,
        cdb: CDB, vocab: Vocab, model_load_path: Optional[str]
        ) -> 'TwoStepLinker':
    return cls(cdb, vocab, cdb.config)

get_type

get_type() -> CoreComponentType
Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
76
77
def get_type(self) -> CoreComponentType:
    return CoreComponentType.linking

train

train(cui: str, entity: MutableEntity, doc: MutableDocument, negative: bool = False, names: Union[list[str], dict] = []) -> None

Train the linker.

This simply trains the context model.

Parameters:

  • cui

    (str) –

    The CUI to train.

  • entity

    (BaseEntity) –

    The entity we're at.

  • doc

    (BaseDocument) –

    The document within which we're working.

  • negative

    (bool, default: False ) –

    Whether or not the example is negative. Defaults to False.

  • names

    (list[str] / dict, default: [] ) –

    Optionally used to update the status of a name-cui pair in the CDB.

Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def train(self, cui: str,
          entity: MutableEntity,
          doc: MutableDocument,
          negative: bool = False,
          names: Union[list[str], dict] = []) -> None:
    """Train the linker.

    This simply trains the context model.

    Args:
        cui (str): The CUI to train.
        entity (BaseEntity): The entity we're at.
        doc (BaseDocument): The document within which we're working.
        negative (bool): Whether or not the example is negative.
            Defaults to False.
        names (list[str]/dict):
            Optionally used to update the `status` of a name-cui
            pair in the CDB.
    """
    pdc = PerDocumentTokenCache()
    tuis = self.cdb.cui2info[cui]['type_ids']
    for tui in tuis:
        # one CUI may have multiple type IDs
        tui = next(iter(tuis))
        self._tui_context_model.train(f"{TYPE_ID_PREFIX}{tui}",
                                      entity, doc, pdc,
                                      negative=negative, names=names)
    self._linker.train(cui, entity, doc, negative, names,
                       per_doc_valid_token_cache=pdc)

TwoStepLinkerConfig

Bases: SerialisableBaseModel

Attributes:

alpha_midpoint class-attribute instance-attribute

alpha_midpoint: float = 0.5

The midpoint for the sigmoid. alpha = sigmoid(alpha_sharpness(similarity - alpha_midpoint)) This is used for weighting the type similarity vs the concept similarity.

alpha_sharpness class-attribute instance-attribute

alpha_sharpness: float = 5.0

The sharpness for the sigmoid. alpha = sigmoid(alpha_sharpness(similarity - alpha_midpoint)) This is used for weighting the type similarity vs the concept similarity.

type_learning_rate_coefficient class-attribute instance-attribute

type_learning_rate_coefficient: float = 0.2

The coefficient for the type-based context model learning rate.

The idea is that since there's a far fewer classes for types, we need to lower the learning rate. In the Snomed examples we have around 10 000 more CUIs then types so a coefficient like 0.2 should be appropriate.

add_tuis_to_cui_info

add_tuis_to_cui_info(cui2info: dict[str, CUIInfo], type_ids: dict[str, TypeInfo])
Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
30
31
32
33
34
35
36
37
def add_tuis_to_cui_info(cui2info: dict[str, CUIInfo],
                         type_ids: dict[str, TypeInfo]
                         ):
    for tid, tid_info in type_ids.items():
        prefixed_tid = f"{TYPE_ID_PREFIX}{tid}"
        if prefixed_tid not in cui2info:
            cui2info[prefixed_tid] = get_new_cui_info(
                tid, preferred_name=tid_info.name, names={tid_info.name})

changed_learning_rate

changed_learning_rate(config: Config, two_step_cnf: TwoStepLinkerConfig)
Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
348
349
350
351
352
353
354
355
356
357
358
359
def changed_learning_rate(config: Config, two_step_cnf: 'TwoStepLinkerConfig'):
    coef = two_step_cnf.type_learning_rate_coefficient
    comp_optim = config.components.linking.optim.copy()
    for learning_rate_name in comp_optim:
        if 'lr' not in learning_rate_name:
            continue
        comp_optim[learning_rate_name] *= coef
    logger.debug("Changing learning rate from %s to %s",
                 config.components.linking.optim,
                 comp_optim)
    return temp_changed_config(
        config.components.linking, 'optim', comp_optim)

temp_attribute

temp_attribute(obj: Any, attr_name: str, attr_val: Any)
Source code in medcat-v2/medcat/components/linking/two_step_context_based_linker.py
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
@contextmanager
def temp_attribute(obj: Any, attr_name: str, attr_val: Any):
    if hasattr(obj, attr_name):
        prev_val = getattr(obj, attr_name)
        logger.warning(
            "Object '%s' already had an attribute '%s' - has type '%s'",
            obj, attr_val, type(prev_val))
    else:
        prev_val = None
    setattr(obj, attr_name, attr_val)
    yield
    # and reset
    if prev_val is not None:
        setattr(obj, attr_name, prev_val)
    else:
        delattr(obj, attr_name)