Skip to content

medcat.data.mctexport

Classes:

Functions:

Attributes:

MedCATTrainerExportProjectInfo module-attribute

MedCATTrainerExportProjectInfo = tuple[str, Any, str, Optional[str]]

The project name, project ID, CUIs str, and TUIs str

MedCATTrainerExport

Bases: TypedDict

Attributes:

projects instance-attribute

MedCATTrainerExportAnnotation

Bases: MedCATTrainerExportAnnotationRequired

Attributes:

id instance-attribute

id: Union[str, int]

meta_anns instance-attribute

validated instance-attribute

validated: Optional[bool]

MedCATTrainerExportAnnotationRequired

Bases: TypedDict

Attributes:

cui instance-attribute

cui: str

end instance-attribute

end: int

start instance-attribute

start: int

value instance-attribute

value: str

MedCATTrainerExportDocument

Bases: TypedDict

Attributes:

annotations instance-attribute

id instance-attribute

id: Any

last_modified instance-attribute

last_modified: str

name instance-attribute

name: str

text instance-attribute

text: str

MedCATTrainerExportProject

Bases: TypedDict

Attributes:

cuis instance-attribute

cuis: str

documents instance-attribute

id instance-attribute

id: Any

name instance-attribute

name: str

tuis instance-attribute

tuis: Optional[str]

MetaAnnotation

Bases: TypedDict

Attributes:

acc instance-attribute

acc: float

name instance-attribute

name: str

validated instance-attribute

validated: bool

value instance-attribute

value: str

count_all_annotations

count_all_annotations(export: MedCATTrainerExport) -> int

Count the number of annotations in a trainer export.

Parameters:

Returns:

  • int ( int ) –

    The total number of annotations.

Source code in medcat-v2/medcat/data/mctexport.py
106
107
108
109
110
111
112
113
114
115
def count_all_annotations(export: MedCATTrainerExport) -> int:
    """Count the number of annotations in a trainer export.

    Args:
        export (MedCATTrainerExport): The trainer export.

    Returns:
        int: The total number of annotations.
    """
    return len(list(iter_anns(export)))

count_all_docs

count_all_docs(export: MedCATTrainerExport) -> int

Count the number of documents in a trainer export.

Parameters:

Returns:

  • int ( int ) –

    The total number of documents.

Source code in medcat-v2/medcat/data/mctexport.py
118
119
120
121
122
123
124
125
126
127
def count_all_docs(export: MedCATTrainerExport) -> int:
    """Count the number of documents in a trainer export.

    Args:
        export (MedCATTrainerExport): The trainer export.

    Returns:
        int: The total number of documents.
    """
    return len(list(iter_docs(export)))

count_anns_per_concept

count_anns_per_concept(export: MedCATTrainerExport) -> dict[str, int]
Source code in medcat-v2/medcat/data/mctexport.py
142
143
144
145
146
def count_anns_per_concept(export: MedCATTrainerExport) -> dict[str, int]:
    counts: dict[str, int] = Counter()
    for _, _, ann in iter_anns(export):
        counts[ann['cui']] += 1
    return dict(counts)

get_nr_of_annotations

get_nr_of_annotations(doc: MedCATTrainerExportDocument) -> int

Get the number of annotations for a tariner export document.

Parameters:

Returns:

  • int ( int ) –

    The number of annotations within the document.

Source code in medcat-v2/medcat/data/mctexport.py
130
131
132
133
134
135
136
137
138
139
def get_nr_of_annotations(doc: MedCATTrainerExportDocument) -> int:
    """Get the number of annotations for a tariner export document.

    Args:
        doc (MedCATTrainerExportDocument): The trainer export document.

    Returns:
        int: The number of annotations within the document.
    """
    return len(doc['annotations'])

iter_anns

Iterate over all the annotations in a trainer export.

Parameters:

Yields:

Source code in medcat-v2/medcat/data/mctexport.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def iter_anns(export: MedCATTrainerExport
              ) -> Iterator[tuple[MedCATTrainerExportProjectInfo,
                                  MedCATTrainerExportDocument,
                                  MedCATTrainerExportAnnotation]]:
    """Iterate over all the annotations in a trainer export.

    Args:
        export (MedCATTrainerExport): The trainer export.

    Yields:
        Iterator[tuple[MedCATTrainerExportProjectInfo,
                       MedCATTrainerExportDocument,
                       MedCATTrainerExportAnnotation]]:
            The project info, the document, and the annotation.
    """
    for proj_info, doc in iter_docs(export):
        for ann in doc['annotations']:
            yield proj_info, doc, ann

iter_docs

Iterate over all the docs in a trainer export.

Parameters:

Yields:

Source code in medcat-v2/medcat/data/mctexport.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def iter_docs(export: MedCATTrainerExport
              ) -> Iterator[tuple[MedCATTrainerExportProjectInfo,
                                  MedCATTrainerExportDocument]]:
    """Iterate over all the docs in a trainer export.

    Args:
        export (MedCATTrainerExport): The trainer export.

    Yields:
        Iterator[tuple[MedCATTrainerExportProjectInfo,
                       MedCATTrainerExportDocument]]:
            The project info and the document.
    """
    for project in iter_projects(export):
        info: MedCATTrainerExportProjectInfo = (
            project['name'], project['id'], project['cuis'],
            project.get('tuis', None)
        )
        for doc in project['documents']:
            yield info, doc

iter_projects

Iterate over all the projects in the trainer export.

Parameters:

Yields:

Source code in medcat-v2/medcat/data/mctexport.py
51
52
53
54
55
56
57
58
59
60
61
def iter_projects(export: MedCATTrainerExport
                  ) -> Iterator[MedCATTrainerExportProject]:
    """Iterate over all the projects in the trainer export.

    Args:
        export (MedCATTrainerExport): The trainer export.

    Yields:
        Iterator[MedCATTrainerExportProject]: Project iterator.
    """
    yield from export['projects']