Skip to content

draw_io

trestle.core.draw_io ¤

Functionality for reading information from a drawio file.

Attributes¤

logger = logging.getLogger(__name__) module-attribute ¤

Classes¤

DrawIO ¤

Access and process drawio data / metadata.

Source code in trestle/core/draw_io.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class DrawIO():
    """Access and process drawio data / metadata."""

    def __init__(self, file_path: pathlib.Path) -> None:
        """
        Load drawio object into memory.

        args:
            file_path: Path to the drawio object.
        """
        self.file_path: pathlib.Path = file_path
        self._load()
        self.banned_keys = ['id', 'label']

    def _load(self) -> None:
        """Load the file."""
        if not self.file_path.exists() or self.file_path.is_dir():
            raise TrestleError(f'Candidate drawio file {str(self.file_path)} does not exist or is a directory')
        try:
            self.raw_xml = defusedxml.ElementTree.parse(self.file_path, forbid_dtd=True)
        except Exception as e:
            raise TrestleError(f'Exception loading Element tree from file: {e}')
        self.mx_file = self.raw_xml.getroot()
        if not self.mx_file.tag == 'mxfile':
            raise TrestleError('DrawIO file is not a draw io file (mxfile)')
        self.diagrams = []
        for diagram in list(self.mx_file):
            # Determine if compressed or not
            # Assumption 1 mxGraphModel
            n_children = len(list(diagram))
            if n_children == 0:
                # Compressed object
                self.diagrams.append(self._uncompress(diagram.text))
            elif n_children == 1:
                self.diagrams.append(list(diagram)[0])
            else:
                raise TrestleError('Unhandled behaviour in drawio read.')

    def _uncompress(self, compressed_text: str) -> Element:
        """
        Given a compressed object from a drawio file return an xml element for the mxGraphModel.

        Args:
            compressed_text: A compressed mxGraphModel from inside an mxfile

        Returns:
            An element containing the mxGraphModel
        """
        # Assume b64 encode
        decoded = base64.b64decode(compressed_text)
        clean_text = unquote(zlib.decompress(decoded, -15).decode(const.FILE_ENCODING))
        element = defusedxml.ElementTree.fromstring(clean_text, forbid_dtd=True)
        if not element.tag == 'mxGraphModel':
            raise TrestleError('Unknown data structure within a compressed drawio file.')
        return element

    def get_metadata(self) -> List[Dict[str, str]]:
        """Get metadata from each tab if it exists or provide an empty dict."""
        # Note that id and label are special for drawio.
        md_list: List[Dict[str, str]] = []
        for diagram in self.diagrams:
            md_dict: Dict[str, str] = {}
            # Drawio creates data within a root and then an object element type
            children = list(diagram)
            root_obj = children[0]
            md_objects = root_obj.findall('object')
            # Should always be true - to test presumptions.
            if len(md_objects) == 0:
                md_list.append(md_dict)
                continue
            items = md_objects[0].items()
            for item in items:
                key = item[0]
                val = item[1]
                if key in self.banned_keys:
                    continue
                md_dict[key] = val
            md_list.append(md_dict)
        return md_list

    @classmethod
    def restructure_metadata(cls, input_dict: Dict[str, str]) -> Dict[str, Any]:
        """Restructure metadata into a hierarchial dict assuming a period separator."""
        # get the list of duplicate keys
        # Get a count of keys
        result = {}
        key_map = {}
        for keys in input_dict.keys():
            stub = keys.split('.')[0]
            tmp = key_map.get(stub, [])
            tmp.append(keys)
            key_map[stub] = tmp

        for key, values in key_map.items():
            holding = {}
            if len(values) == 1 and key == values[0]:
                result[key] = input_dict[key]
            else:
                for value in values:
                    holding[value.split('.', 1)[-1]] = input_dict[value]
                result[key] = cls.restructure_metadata(holding)
        return result

    def write_drawio_with_metadata(
        self, path: pathlib.Path, metadata: Dict, diagram_metadata_idx: int, target_path: pathlib.Path = None
    ) -> None:
        """
        Write modified metadata to drawio file.

        Writes given metadata to 'object' element attributes inside of the selected drawio diagram element.
        Currently supports writing only uncompressed elements.

        Args:
            path: path to write modified drawio file to
            metadata: dictionary of modified metadata to insert to drawio
            diagram_metadata_idx: index of diagram which metadata was modified
            target_path: if not provided the changes will be written to path
        """
        flattened_dict = self._flatten_dictionary(metadata)
        if diagram_metadata_idx >= len(list(self.diagrams)):
            raise TrestleError(f'Drawio file {path} does not contain a diagram for index {diagram_metadata_idx}')

        diagram = list(self.diagrams)[diagram_metadata_idx]
        children = list(diagram)
        root_obj = children[0]
        md_objects = root_obj.findall('object')
        if len(md_objects) == 0:
            raise TrestleError(f'Unable to write metadata, diagram in drawio file {path} does not have objects.')

        for key in md_objects[0].attrib.copy():
            if key not in flattened_dict.keys() and key not in self.banned_keys:
                # outdated key delete
                del md_objects[0].attrib[key]
                continue
            if key in self.banned_keys:
                continue
            md_objects[0].attrib[key] = flattened_dict[key]
        for key in flattened_dict.keys():
            if key in self.banned_keys:
                continue
            md_objects[0].attrib[key] = flattened_dict[key]
        parent_diagram = self.mx_file.findall('diagram')[diagram_metadata_idx]
        if len(parent_diagram.findall('mxGraphModel')) == 0:
            parent_diagram.insert(0, diagram)

        if target_path:
            self.raw_xml.write(target_path)
        else:
            self.raw_xml.write(path)

    def _flatten_dictionary(self, metadata: Dict, parent_key='', separator='.') -> Dict[str, str]:
        """Flatten hierarchial dict back to xml attributes."""
        items = []
        for key, value in metadata.items():
            new_key = parent_key + separator + key if parent_key else key
            if isinstance(value, Dict):
                items.extend(self._flatten_dictionary(value, new_key, separator).items())
            else:
                items.append((new_key, value))
        return dict(items)
Attributes¤
banned_keys = ['id', 'label'] instance-attribute ¤
file_path: pathlib.Path = file_path instance-attribute ¤
Functions¤
__init__(file_path) ¤

Load drawio object into memory.

Parameters:

Name Type Description Default
file_path Path

Path to the drawio object.

required
Source code in trestle/core/draw_io.py
37
38
39
40
41
42
43
44
45
46
def __init__(self, file_path: pathlib.Path) -> None:
    """
    Load drawio object into memory.

    args:
        file_path: Path to the drawio object.
    """
    self.file_path: pathlib.Path = file_path
    self._load()
    self.banned_keys = ['id', 'label']
get_metadata() ¤

Get metadata from each tab if it exists or provide an empty dict.

Source code in trestle/core/draw_io.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def get_metadata(self) -> List[Dict[str, str]]:
    """Get metadata from each tab if it exists or provide an empty dict."""
    # Note that id and label are special for drawio.
    md_list: List[Dict[str, str]] = []
    for diagram in self.diagrams:
        md_dict: Dict[str, str] = {}
        # Drawio creates data within a root and then an object element type
        children = list(diagram)
        root_obj = children[0]
        md_objects = root_obj.findall('object')
        # Should always be true - to test presumptions.
        if len(md_objects) == 0:
            md_list.append(md_dict)
            continue
        items = md_objects[0].items()
        for item in items:
            key = item[0]
            val = item[1]
            if key in self.banned_keys:
                continue
            md_dict[key] = val
        md_list.append(md_dict)
    return md_list
restructure_metadata(input_dict) classmethod ¤

Restructure metadata into a hierarchial dict assuming a period separator.

Source code in trestle/core/draw_io.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
@classmethod
def restructure_metadata(cls, input_dict: Dict[str, str]) -> Dict[str, Any]:
    """Restructure metadata into a hierarchial dict assuming a period separator."""
    # get the list of duplicate keys
    # Get a count of keys
    result = {}
    key_map = {}
    for keys in input_dict.keys():
        stub = keys.split('.')[0]
        tmp = key_map.get(stub, [])
        tmp.append(keys)
        key_map[stub] = tmp

    for key, values in key_map.items():
        holding = {}
        if len(values) == 1 and key == values[0]:
            result[key] = input_dict[key]
        else:
            for value in values:
                holding[value.split('.', 1)[-1]] = input_dict[value]
            result[key] = cls.restructure_metadata(holding)
    return result
write_drawio_with_metadata(path, metadata, diagram_metadata_idx, target_path=None) ¤

Write modified metadata to drawio file.

Writes given metadata to 'object' element attributes inside of the selected drawio diagram element. Currently supports writing only uncompressed elements.

Parameters:

Name Type Description Default
path Path

path to write modified drawio file to

required
metadata Dict

dictionary of modified metadata to insert to drawio

required
diagram_metadata_idx int

index of diagram which metadata was modified

required
target_path Path

if not provided the changes will be written to path

None
Source code in trestle/core/draw_io.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def write_drawio_with_metadata(
    self, path: pathlib.Path, metadata: Dict, diagram_metadata_idx: int, target_path: pathlib.Path = None
) -> None:
    """
    Write modified metadata to drawio file.

    Writes given metadata to 'object' element attributes inside of the selected drawio diagram element.
    Currently supports writing only uncompressed elements.

    Args:
        path: path to write modified drawio file to
        metadata: dictionary of modified metadata to insert to drawio
        diagram_metadata_idx: index of diagram which metadata was modified
        target_path: if not provided the changes will be written to path
    """
    flattened_dict = self._flatten_dictionary(metadata)
    if diagram_metadata_idx >= len(list(self.diagrams)):
        raise TrestleError(f'Drawio file {path} does not contain a diagram for index {diagram_metadata_idx}')

    diagram = list(self.diagrams)[diagram_metadata_idx]
    children = list(diagram)
    root_obj = children[0]
    md_objects = root_obj.findall('object')
    if len(md_objects) == 0:
        raise TrestleError(f'Unable to write metadata, diagram in drawio file {path} does not have objects.')

    for key in md_objects[0].attrib.copy():
        if key not in flattened_dict.keys() and key not in self.banned_keys:
            # outdated key delete
            del md_objects[0].attrib[key]
            continue
        if key in self.banned_keys:
            continue
        md_objects[0].attrib[key] = flattened_dict[key]
    for key in flattened_dict.keys():
        if key in self.banned_keys:
            continue
        md_objects[0].attrib[key] = flattened_dict[key]
    parent_diagram = self.mx_file.findall('diagram')[diagram_metadata_idx]
    if len(parent_diagram.findall('mxGraphModel')) == 0:
        parent_diagram.insert(0, diagram)

    if target_path:
        self.raw_xml.write(target_path)
    else:
        self.raw_xml.write(path)

DrawIOMetadataValidator ¤

Validator to check whether drawio metadata meets validation expectations.

Source code in trestle/core/draw_io.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
class DrawIOMetadataValidator():
    """Validator to check whether drawio metadata meets validation expectations."""

    def __init__(self, template_path: pathlib.Path, must_be_first_tab: bool = True) -> None:
        """
        Initialize drawio validator.

        Args:
            template_path: Path to a templated drawio file where metadata will be looked up on the first tab only.
            must_be_first_tab: Whether to search the candidate file for a metadata across multiple tabs.
        """
        self.template_path = template_path
        self.must_be_first_tab = must_be_first_tab
        # Load metadat from template
        template_drawio = DrawIO(self.template_path)
        # Zero index as must be first tab
        self.template_metadata = template_drawio.get_metadata()[0]
        self.template_version = MarkdownValidator.extract_template_version(self.template_metadata)
        if self.template_version not in str(self.template_path):
            raise TrestleError(
                f'Version of the template {self.template_version} does not match the path {self.template_path}.'
                + f'Move the template to the folder {self.template_version}'
            )
        if 'Version' in self.template_metadata.keys() and self.template_metadata['Version'] != self.template_version:
            raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')

    def validate(self, candidate: pathlib.Path) -> bool:
        """
        Run drawio validation against a candidate file.

        Args:
            candidate: The path to a candidate markdown file to be validated.

        Returns:
            Whether or not the validation passes.

        Raises:
            err.TrestleError: If a file IO / formatting error occurs.
        """
        logging.info(f'Validating drawio file {candidate} against template file {self.template_path}')
        candidate_drawio = DrawIO(candidate)
        drawio_metadata = candidate_drawio.get_metadata()

        if self.must_be_first_tab:
            return MarkdownValidator.compare_keys(self.template_metadata, drawio_metadata[0])
        for md_tab in drawio_metadata:
            status = MarkdownValidator.compare_keys(self.template_metadata, md_tab)
            if status:
                return status
        return False
Attributes¤
must_be_first_tab = must_be_first_tab instance-attribute ¤
template_metadata = template_drawio.get_metadata()[0] instance-attribute ¤
template_path = template_path instance-attribute ¤
template_version = MarkdownValidator.extract_template_version(self.template_metadata) instance-attribute ¤
Functions¤
__init__(template_path, must_be_first_tab=True) ¤

Initialize drawio validator.

Parameters:

Name Type Description Default
template_path Path

Path to a templated drawio file where metadata will be looked up on the first tab only.

required
must_be_first_tab bool

Whether to search the candidate file for a metadata across multiple tabs.

True
Source code in trestle/core/draw_io.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def __init__(self, template_path: pathlib.Path, must_be_first_tab: bool = True) -> None:
    """
    Initialize drawio validator.

    Args:
        template_path: Path to a templated drawio file where metadata will be looked up on the first tab only.
        must_be_first_tab: Whether to search the candidate file for a metadata across multiple tabs.
    """
    self.template_path = template_path
    self.must_be_first_tab = must_be_first_tab
    # Load metadat from template
    template_drawio = DrawIO(self.template_path)
    # Zero index as must be first tab
    self.template_metadata = template_drawio.get_metadata()[0]
    self.template_version = MarkdownValidator.extract_template_version(self.template_metadata)
    if self.template_version not in str(self.template_path):
        raise TrestleError(
            f'Version of the template {self.template_version} does not match the path {self.template_path}.'
            + f'Move the template to the folder {self.template_version}'
        )
    if 'Version' in self.template_metadata.keys() and self.template_metadata['Version'] != self.template_version:
        raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
validate(candidate) ¤

Run drawio validation against a candidate file.

Parameters:

Name Type Description Default
candidate Path

The path to a candidate markdown file to be validated.

required

Returns:

Type Description
bool

Whether or not the validation passes.

Raises:

Type Description
TrestleError

If a file IO / formatting error occurs.

Source code in trestle/core/draw_io.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def validate(self, candidate: pathlib.Path) -> bool:
    """
    Run drawio validation against a candidate file.

    Args:
        candidate: The path to a candidate markdown file to be validated.

    Returns:
        Whether or not the validation passes.

    Raises:
        err.TrestleError: If a file IO / formatting error occurs.
    """
    logging.info(f'Validating drawio file {candidate} against template file {self.template_path}')
    candidate_drawio = DrawIO(candidate)
    drawio_metadata = candidate_drawio.get_metadata()

    if self.must_be_first_tab:
        return MarkdownValidator.compare_keys(self.template_metadata, drawio_metadata[0])
    for md_tab in drawio_metadata:
        status = MarkdownValidator.compare_keys(self.template_metadata, md_tab)
        if status:
            return status
    return False

handler: python