Skip to content

medcat.components.addons.relation_extraction.llama.tokenizer

Classes:

Attributes:

logger module-attribute

logger = getLogger(__name__)

TokenizerWrapperLlama

TokenizerWrapperLlama(hf_tokenizers=None, max_seq_length: Optional[int] = None, add_special_tokens: Optional[bool] = False)

Bases: BaseTokenizerWrapper

Wrapper around a huggingface Llama tokenizer so that it works with the RelCAT models.

Parameters:

  • hf_tokenizers

    (`transformers.LlamaTokenizerFast`, default: None ) –

    A huggingface Fast Llama.

Methods:

Attributes:

Source code in medcat-v2/medcat/components/addons/relation_extraction/tokenizer.py
17
18
19
20
21
22
def __init__(self, hf_tokenizers=None,
             max_seq_length: Optional[int] = None,
             add_special_tokens: Optional[bool] = False):
    self.hf_tokenizers = hf_tokenizers
    self.max_seq_length = max_seq_length
    self._add_special_tokens = add_special_tokens

name class-attribute instance-attribute

name = 'tokenizer_wrapper_llama_rel'

pretrained_model_name_or_path class-attribute instance-attribute

pretrained_model_name_or_path = 'meta-llama/Llama-3.1-8B'

load classmethod

load(tokenizer_path: str, relcat_config: ConfigRelCAT, **kwargs) -> TokenizerWrapperLlama
Source code in medcat-v2/medcat/components/addons/relation_extraction/llama/tokenizer.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
@classmethod
def load(cls, tokenizer_path: str, relcat_config: ConfigRelCAT, **kwargs
         ) -> "TokenizerWrapperLlama":
    tokenizer = cls()
    path = os.path.join(tokenizer_path, cls.name)

    if tokenizer_path:
        tokenizer.hf_tokenizers = LlamaTokenizerFast.from_pretrained(
            path, **kwargs)
    else:
        relcat_config.general.model_name = (
            cls.pretrained_model_name_or_path)
        tokenizer.hf_tokenizers = LlamaTokenizerFast.from_pretrained(
            pretrained_model_name_or_path=relcat_config.general.model_name)
    return tokenizer