Skip to content

medcat.components.linking.context_based_linker

Classes:

  • Linker

    Link to a biomedical database.

Attributes:

logger module-attribute

logger = getLogger(__name__)

Linker

Linker(cdb: CDB, vocab: Vocab, config: Config)

Bases: AbstractEntityProvidingComponent

Link to a biomedical database.

Parameters:

  • cdb

    (CDB) –

    The Context Database.

  • vocab

    (Vocab) –

    The vocabulary.

  • config

    (Config) –

    The config.

Methods:

Attributes:

Source code in medcat-v2/medcat/components/linking/context_based_linker.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(self, cdb: CDB, vocab: Vocab, config: Config) -> None:
    super().__init__()
    self.cdb = cdb
    self.vocab = vocab
    self.config = config
    self.context_model = ContextModel(self.cdb.cui2info,
                                      self.cdb.name2info,
                                      self.cdb.weighted_average_function,
                                      self.vocab,
                                      self.config.components.linking,
                                      self.config.general.separator)
    # Counter for how often did a pair (name,cui) appear and
    # was used during training
    self.train_counter: dict = {}

cdb instance-attribute

cdb = cdb

config instance-attribute

config = config

context_model instance-attribute

context_model = ContextModel(cui2info, name2info, weighted_average_function, vocab, linking, separator)

name class-attribute instance-attribute

name = 'medcat2_linker'

train_counter instance-attribute

train_counter: dict = {}

vocab instance-attribute

vocab = vocab

create_new_component classmethod

create_new_component(cnf: ComponentConfig, tokenizer: BaseTokenizer, cdb: CDB, vocab: Vocab, model_load_path: Optional[str]) -> Linker
Source code in medcat-v2/medcat/components/linking/context_based_linker.py
257
258
259
260
261
262
@classmethod
def create_new_component(
        cls, cnf: ComponentConfig, tokenizer: BaseTokenizer,
        cdb: CDB, vocab: Vocab, model_load_path: Optional[str]
        ) -> 'Linker':
    return cls(cdb, vocab, cdb.config)

get_type

get_type() -> CoreComponentType
Source code in medcat-v2/medcat/components/linking/context_based_linker.py
50
51
def get_type(self) -> CoreComponentType:
    return CoreComponentType.linking

predict_entities

predict_entities(doc: MutableDocument, ents: list[MutableEntity] | None = None) -> list[MutableEntity]
Source code in medcat-v2/medcat/components/linking/context_based_linker.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def predict_entities(self, doc: MutableDocument,
                     ents: list[MutableEntity] | None = None
                     ) -> list[MutableEntity]:
    # Reset main entities, will be recreated later
    cnf_l = self.config.components.linking

    if ents is None:
        raise ValueError("Need to have NER'ed entities provided")

    if cnf_l.train:
        linked_entities = self._train_on_doc(doc, ents)
    else:
        linked_entities = self._inference(doc, ents)
    # evaluating generator here because the `all_ents` list gets
    # cleared afterwards otherwise
    le = list(linked_entities)

    # doc.ner_ents.clear()
    # doc.ner_ents.extend(le)

    # TODO - reintroduce pretty labels? and apply here?

    # TODO - reintroduce groups? and map here?

    return filter_linked_annotations(
        doc, le, self.config.general.show_nested_entities)

train

Train the linker.

This simply trains the context model.

Parameters:

  • cui

    (str) –

    The CUI to train.

  • entity

    (BaseEntity) –

    The entity we're at.

  • doc

    (BaseDocument) –

    The document within which we're working.

  • negative

    (bool, default: False ) –

    Whether or not the example is negative. Defaults to False.

  • names

    (list[str] / dict, default: [] ) –

    Optionally used to update the status of a name-cui pair in the CDB.

  • per_doc_valid_token_cache

    (PerDocumentTokenCache, default: None ) –

    Optionally, provide the per doc valid token cache.

Source code in medcat-v2/medcat/components/linking/context_based_linker.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def train(self, cui: str,
          entity: MutableEntity,
          doc: MutableDocument,
          negative: bool = False,
          names: Union[list[str], dict] = [],
          per_doc_valid_token_cache: Optional[PerDocumentTokenCache] = None
          ) -> None:
    """Train the linker.

    This simply trains the context model.

    Args:
        cui (str): The CUI to train.
        entity (BaseEntity): The entity we're at.
        doc (BaseDocument): The document within which we're working.
        negative (bool): Whether or not the example is negative.
            Defaults to False.
        names (list[str]/dict):
            Optionally used to update the `status` of a name-cui
            pair in the CDB.
        per_doc_valid_token_cache (PerDocumentTokenCache):
            Optionally, provide the per doc valid token cache.
    """
    if per_doc_valid_token_cache is None:
        per_doc_valid_token_cache = PerDocumentTokenCache()
    self.context_model.train(
        cui, entity, doc, per_doc_valid_token_cache, negative, names)