medcat.components.linking.context_based_linker

Classes:

Linker –

Link to a biomedical database.

Attributes:

logger –

logger `module-attribute`

logger = getLogger(__name__)

Linker

Linker(cdb: CDB, vocab: Vocab, config: Config)

Bases: AbstractEntityProvidingComponent

Link to a biomedical database.

Parameters:

cdb
(CDB) –

The Context Database.
vocab
(Vocab) –

The vocabulary.
config
(Config) –

The config.

Methods:

create_new_component –
get_type –
predict_entities –
train –

Train the linker.

Attributes:

cdb –
config –
context_model –
name –
train_counter (dict) –
vocab –

Source code in medcat-v2/medcat/components/linking/context_based_linker.py

def __init__(self, cdb: CDB, vocab: Vocab, config: Config) -> None:
    super().__init__()
    self.cdb = cdb
    self.vocab = vocab
    self.config = config
    self.context_model = ContextModel(self.cdb.cui2info,
                                      self.cdb.name2info,
                                      self.cdb.weighted_average_function,
                                      self.vocab,
                                      self.config.components.linking,
                                      self.config.general.separator)
    # Counter for how often did a pair (name,cui) appear and
    # was used during training
    self.train_counter: dict = {}

cdb `instance-attribute`

cdb = cdb

config `instance-attribute`

config = config

context_model `instance-attribute`

context_model = ContextModel(cui2info, name2info, weighted_average_function, vocab, linking, separator)

name `class-attribute` `instance-attribute`

name = 'medcat2_linker'

train_counter `instance-attribute`

train_counter: dict = {}

vocab `instance-attribute`

vocab = vocab

create_new_component `classmethod`

create_new_component(cnf: ComponentConfig, tokenizer: BaseTokenizer, cdb: CDB, vocab: Vocab, model_load_path: Optional[str]) -> Linker

Source code in medcat-v2/medcat/components/linking/context_based_linker.py

@classmethod
def create_new_component(
        cls, cnf: ComponentConfig, tokenizer: BaseTokenizer,
        cdb: CDB, vocab: Vocab, model_load_path: Optional[str]
        ) -> 'Linker':
    return cls(cdb, vocab, cdb.config)

get_type

get_type() -> CoreComponentType

Source code in medcat-v2/medcat/components/linking/context_based_linker.py

def get_type(self) -> CoreComponentType:
    return CoreComponentType.linking

predict_entities

predict_entities(doc: MutableDocument, ents: list[MutableEntity] | None = None) -> list[MutableEntity]

Source code in medcat-v2/medcat/components/linking/context_based_linker.py

def predict_entities(self, doc: MutableDocument,
                     ents: list[MutableEntity] | None = None
                     ) -> list[MutableEntity]:
    # Reset main entities, will be recreated later
    cnf_l = self.config.components.linking

    if ents is None:
        raise ValueError("Need to have NER'ed entities provided")

    if cnf_l.train:
        linked_entities = self._train_on_doc(doc, ents)
    else:
        linked_entities = self._inference(doc, ents)
    # evaluating generator here because the `all_ents` list gets
    # cleared afterwards otherwise
    le = list(linked_entities)

    # doc.ner_ents.clear()
    # doc.ner_ents.extend(le)

    # TODO - reintroduce pretty labels? and apply here?

    # TODO - reintroduce groups? and map here?

    return filter_linked_annotations(
        doc, le, self.config.general.show_nested_entities)

train

train(cui: str, entity: MutableEntity, doc: MutableDocument, negative: bool = False, names: Union[list[str], dict] = [], per_doc_valid_token_cache: Optional[PerDocumentTokenCache] = None) -> None

Train the linker.

This simply trains the context model.

Parameters:

cui
(str) –

The CUI to train.
entity
(BaseEntity) –

The entity we're at.
doc
(BaseDocument) –

The document within which we're working.
negative
(bool, default: False ) –

Whether or not the example is negative. Defaults to False.
names
(list[str] / dict, default: [] ) –

Optionally used to update the status of a name-cui pair in the CDB.
per_doc_valid_token_cache
(PerDocumentTokenCache, default: None ) –

Optionally, provide the per doc valid token cache.

Source code in medcat-v2/medcat/components/linking/context_based_linker.py

def train(self, cui: str,
          entity: MutableEntity,
          doc: MutableDocument,
          negative: bool = False,
          names: Union[list[str], dict] = [],
          per_doc_valid_token_cache: Optional[PerDocumentTokenCache] = None
          ) -> None:
    """Train the linker.

    This simply trains the context model.

    Args:
        cui (str): The CUI to train.
        entity (BaseEntity): The entity we're at.
        doc (BaseDocument): The document within which we're working.
        negative (bool): Whether or not the example is negative.
            Defaults to False.
        names (list[str]/dict):
            Optionally used to update the `status` of a name-cui
            pair in the CDB.
        per_doc_valid_token_cache (PerDocumentTokenCache):
            Optionally, provide the per doc valid token cache.
    """
    if per_doc_valid_token_cache is None:
        per_doc_valid_token_cache = PerDocumentTokenCache()
    self.context_model.train(
        cui, entity, doc, per_doc_valid_token_cache, negative, names)

medcat.components.linking.context_based_linker

logger `module-attribute`

Linker

`cdb`

`vocab`

`config`

cdb `instance-attribute`

config `instance-attribute`

context_model `instance-attribute`

name `class-attribute` `instance-attribute`

train_counter `instance-attribute`

vocab `instance-attribute`

create_new_component `classmethod`

get_type

predict_entities

train

`cui`

`entity`

`doc`

`negative`

`names`

`per_doc_valid_token_cache`

medcat.components.linking.context_based_linker

logger module-attribute

Linker

cdb

vocab

config

cdb instance-attribute

config instance-attribute

context_model instance-attribute

name class-attribute instance-attribute

train_counter instance-attribute

vocab instance-attribute

create_new_component classmethod

get_type

predict_entities

train

cui

entity

doc

negative

names

per_doc_valid_token_cache

logger `module-attribute`

`cdb`

`vocab`

`config`

cdb `instance-attribute`

config `instance-attribute`

context_model `instance-attribute`

name `class-attribute` `instance-attribute`

train_counter `instance-attribute`

vocab `instance-attribute`

create_new_component `classmethod`

`cui`

`entity`

`doc`

`negative`

`names`

`per_doc_valid_token_cache`