Skip to content

medcat.deid

Classes:

DeIdModel

DeIdModel(cat: CAT)

Bases: NerModel

The DeID model.

This wraps a CAT instance and simplifies its use as a de-identification model.

It provides methods for creating one from a TransformersNER as well as loading from a model pack (along with some validation).

It also exposes some useful parts of the CAT it wraps such as the config and the concept database.

Methods:

Attributes:

Source code in medcat-v2/medcat/components/ner/trf/deid.py
68
69
def __init__(self, cat: CAT) -> None:
    self.cat = cat

cat instance-attribute

cat = cat

create classmethod

create(cdb: CDB, cnf: ConfigTransformersNER)
Source code in medcat-v2/medcat/components/ner/trf/deid.py
199
200
201
202
203
204
205
206
@classmethod
def create(cls, cdb: CDB, cnf: ConfigTransformersNER):
    cdb.config.components.ner.comp_name = TransformersNER.name
    cdb.config.components.ner.custom_cnf = cnf
    # no-action linker
    cdb.config.components.linking.comp_name = 'no_action'
    cat = CAT(cdb=cdb, vocab=None, config=cdb.config)
    return cls(cat)

deid_multi_text

deid_multi_text(texts: Iterable[str], redact: bool = False, n_process: Optional[int] = None) -> list[str]
Source code in medcat-v2/medcat/components/ner/trf/deid.py
123
124
125
126
127
128
129
130
131
def deid_multi_text(self, texts: Iterable[str], redact: bool = False,
                    n_process: Optional[int] = None) -> list[str]:
    warnings.warn(
        "deid_multi_text() is deprecated and will be removed in a "
        "future release. Use deid_multi_texts() instead.",
        DeprecationWarning,
        stacklevel=2
    )
    return self.deid_multi_texts(texts, redact, n_process)

deid_multi_texts

deid_multi_texts(texts: Iterable[str], redact: bool = False, n_process: Optional[int] = None) -> list[str]
Source code in medcat-v2/medcat/components/ner/trf/deid.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def deid_multi_texts(self, texts: Iterable[str], redact: bool = False,
                     n_process: Optional[int] = None) -> list[str]:
    if n_process is None:
        n_process = 1

    entities = self.cat.get_entities_multi_texts(
        texts, n_process=n_process)
    out: list[str] = []
    for raw_text, (_, _ents) in zip(texts, entities):
        ents = _ents['entities']
        text: str
        if isinstance(raw_text, tuple):
            text = raw_text[1]
        elif isinstance(raw_text, str):
            text = raw_text
        else:
            raise ValueError("Unknown raw text: "
                             f"{type(raw_text)}: {raw_text}")
        new_text = replace_entities_in_text(
            text, ents, get_cui_name=self.cat.cdb.get_name, redact=redact)
        out.append(new_text)
    return out

deid_text

deid_text(text: str, redact: bool = False) -> str

Deidentify text and potentially redact information.

De-identified text. If redaction is enabled, identifiable entities will be replaced with starts (e.g *****). Otherwise, the replacement will be the CUI or in other words, the type of information that was hidden (e.g [PATIENT]).

Parameters:

  • text

    (str) –

    The text to deidentify.

  • redact

    (bool, default: False ) –

    Whether to redact the information.

Returns:

  • str ( str ) –

    The deidentified text.

Source code in medcat-v2/medcat/components/ner/trf/deid.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def deid_text(self, text: str, redact: bool = False) -> str:
    """Deidentify text and potentially redact information.

    De-identified text.
    If redaction is enabled, identifiable entities will be
    replaced with starts (e.g `*****`).
    Otherwise, the replacement will be the CUI or in other words,
    the type of information that was hidden (e.g [PATIENT]).

    Args:
        text (str): The text to deidentify.
        redact (bool): Whether to redact the information.

    Returns:
        str: The deidentified text.
    """
    new_text, _entities = self.deid_text_with_entities(text, redact=redact)
    return new_text

deid_text_with_entities

deid_text_with_entities(text: str, redact: bool = False) -> tuple[str, Entities]

Deidentify text and potentially redact information.

De-identified text. If redaction is enabled, identifiable entities will be replaced with starts (e.g *****). Otherwise, the replacement will be the CUI or in other words, the type of information that was hidden (e.g [PATIENT]).

Parameters:

  • text

    (str) –

    The text to deidentify.

  • redact

    (bool, default: False ) –

    Whether to redact the information.

Returns:

  • tuple[str, Entities]

    Tuple[str, Entities]: A tuple containing: - The deidentified text as a string. - The entities found and linked within the text.

Source code in medcat-v2/medcat/components/ner/trf/deid.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def deid_text_with_entities(self, text: str, redact: bool = False
                                ) -> tuple[str, Entities]:
    """Deidentify text and potentially redact information.

    De-identified text.
    If redaction is enabled, identifiable entities will be
    replaced with starts (e.g `*****`).
    Otherwise, the replacement will be the CUI or in other words,
    the type of information that was hidden (e.g [PATIENT]).

    Args:
        text (str): The text to deidentify.
        redact (bool): Whether to redact the information.

    Returns:
        Tuple[str, Entities]: A tuple containing:
            - The deidentified text as a string.
            - The entities found and linked within the text.
    """
    entities = self.cat.get_entities(text)
    new_text = replace_entities_in_text(
        text,
        entities['entities'],
        self.cat.cdb.get_name,
        redact=redact,
    )
    return new_text, entities

load_model_pack classmethod

load_model_pack(model_pack_path: str, config: Optional[dict] = None) -> DeIdModel

Load DeId model from model pack.

The method first loads the CAT instance.

It then makes sure that the model pack corresponds to a valid DeId model.

Parameters:

  • config

    (Optional[dict], default: None ) –

    Config for DeId model pack (primarily for stride of overlap window)

  • model_pack_path

    (str) –

    The model pack path.

Raises:

  • ValueError

    If the model pack does not correspond to a DeId model.

Returns:

  • DeIdModel ( DeIdModel ) –

    The resulting DeI model.

Source code in medcat-v2/medcat/components/ner/trf/deid.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
@classmethod
def load_model_pack(cls, model_pack_path: str,
                    config: Optional[dict] = None) -> 'DeIdModel':
    """Load DeId model from model pack.

    The method first loads the CAT instance.

    It then makes sure that the model pack corresponds to a
    valid DeId model.

    Args:
        config: Config for DeId model pack (primarily for stride of
            overlap window)
        model_pack_path (str): The model pack path.

    Raises:
        ValueError: If the model pack does not correspond to a DeId model.

    Returns:
        DeIdModel: The resulting DeI model.
    """
    ner_model = NerModel.load_model_pack(model_pack_path, config=config)
    cat = ner_model.cat
    if not cls._is_deid_model(cat):
        raise ValueError(
            f"The model saved at {model_pack_path} is not a deid model "
            f"({cls._get_reason_not_deid(cat)})")
    model = cls(ner_model.cat)
    return model

train

train(json_path: Union[str, list, None], *args, **kwargs) -> tuple[Any, Any, Any]
Source code in medcat-v2/medcat/components/ner/trf/deid.py
71
72
73
74
def train(self, json_path: Union[str, list, None],
          *args, **kwargs) -> tuple[Any, Any, Any]:
    return super().train(json_path,
                         *args, **kwargs)  # type: ignore