Skip to content

medcat.components.addons.relation_extraction.bert.tokenizer

Classes:

Attributes:

logger module-attribute

logger = getLogger(__name__)

TokenizerWrapperBERT

TokenizerWrapperBERT(hf_tokenizers=None, max_seq_length: Optional[int] = None, add_special_tokens: Optional[bool] = False)

Bases: BaseTokenizerWrapper

Methods:

Attributes:

Source code in medcat-v2/medcat/components/addons/relation_extraction/tokenizer.py
17
18
19
20
21
22
def __init__(self, hf_tokenizers=None,
             max_seq_length: Optional[int] = None,
             add_special_tokens: Optional[bool] = False):
    self.hf_tokenizers = hf_tokenizers
    self.max_seq_length = max_seq_length
    self._add_special_tokens = add_special_tokens

name class-attribute instance-attribute

name = 'bert-tokenizer'

Wrapper around a huggingface BERT tokenizer so that it works with the RelCAT models.

Parameters:

  • `transformers.models.bert.tokenization_bert_fast.PreTrainedTokenizerFast`)

    A huggingface Fast BERT.

pretrained_model_name_or_path class-attribute instance-attribute

pretrained_model_name_or_path = 'bert-base-uncased'

load classmethod

load(tokenizer_path: str, relcat_config: ConfigRelCAT, **kwargs) -> TokenizerWrapperBERT
Source code in medcat-v2/medcat/components/addons/relation_extraction/bert/tokenizer.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@classmethod
def load(cls, tokenizer_path: str, relcat_config: ConfigRelCAT, **kwargs
         ) -> "TokenizerWrapperBERT":
    tokenizer = cls()
    path = os.path.join(tokenizer_path, cls.name)

    if tokenizer_path:
        tokenizer.hf_tokenizers = BertTokenizerFast.from_pretrained(
            pretrained_model_name_or_path=path, **kwargs)
    else:
        relcat_config.general.model_name = (
            cls.pretrained_model_name_or_path)
        tokenizer.hf_tokenizers = BertTokenizerFast.from_pretrained(
            pretrained_model_name_or_path=relcat_config.general.model_name)
    return tokenizer