draw_io
trestle.core.draw_io
¤
Functionality for reading information from a drawio file.
logger
¤
Classes¤
DrawIO
¤
Access and process drawio data / metadata.
Source code in trestle/core/draw_io.py
class DrawIO():
"""Access and process drawio data / metadata."""
def __init__(self, file_path: pathlib.Path) -> None:
"""
Load drawio object into memory.
args:
file_path: Path to the drawio object.
"""
self.file_path: pathlib.Path = file_path
self._load()
self.banned_keys = ['id', 'label']
def _load(self) -> None:
"""Load the file."""
if not self.file_path.exists() or self.file_path.is_dir():
raise TrestleError(f'Candidate drawio file {str(self.file_path)} does not exist or is a directory')
try:
self.raw_xml = defusedxml.ElementTree.parse(self.file_path, forbid_dtd=True)
except Exception as e:
raise TrestleError(f'Exception loading Element tree from file: {e}')
self.mx_file = self.raw_xml.getroot()
if not self.mx_file.tag == 'mxfile':
raise TrestleError('DrawIO file is not a draw io file (mxfile)')
self.diagrams = []
for diagram in list(self.mx_file):
# Determine if compressed or not
# Assumption 1 mxGraphModel
n_children = len(list(diagram))
if n_children == 0:
# Compressed object
self.diagrams.append(self._uncompress(diagram.text))
elif n_children == 1:
self.diagrams.append(list(diagram)[0])
else:
raise TrestleError('Unhandled behaviour in drawio read.')
def _uncompress(self, compressed_text: str) -> Element:
"""
Given a compressed object from a drawio file return an xml element for the mxGraphModel.
Args:
compressed_text: A compressed mxGraphModel from inside an mxfile
Returns:
An element containing the mxGraphModel
"""
# Assume b64 encode
decoded = base64.b64decode(compressed_text)
clean_text = unquote(zlib.decompress(decoded, -15).decode(const.FILE_ENCODING))
element = defusedxml.ElementTree.fromstring(clean_text, forbid_dtd=True)
if not element.tag == 'mxGraphModel':
raise TrestleError('Unknown data structure within a compressed drawio file.')
return element
def get_metadata(self) -> List[Dict[str, str]]:
"""Get metadata from each tab if it exists or provide an empty dict."""
# Note that id and label are special for drawio.
md_list: List[Dict[str, str]] = []
for diagram in self.diagrams:
md_dict: Dict[str, str] = {}
# Drawio creates data within a root and then an object element type
children = list(diagram)
root_obj = children[0]
md_objects = root_obj.findall('object')
# Should always be true - to test presumptions.
if len(md_objects) == 0:
md_list.append(md_dict)
continue
items = md_objects[0].items()
for item in items:
key = item[0]
val = item[1]
if key in self.banned_keys:
continue
md_dict[key] = val
md_list.append(md_dict)
return md_list
@classmethod
def restructure_metadata(cls, input_dict: Dict[str, str]) -> Dict[str, Any]:
"""Restructure metadata into a hierarchial dict assuming a period separator."""
# get the list of duplicate keys
# Get a count of keys
result = {}
key_map = {}
for keys in input_dict.keys():
stub = keys.split('.')[0]
tmp = key_map.get(stub, [])
tmp.append(keys)
key_map[stub] = tmp
for key, values in key_map.items():
holding = {}
if len(values) == 1 and key == values[0]:
result[key] = input_dict[key]
else:
for value in values:
holding[value.split('.', 1)[-1]] = input_dict[value]
result[key] = cls.restructure_metadata(holding)
return result
def write_drawio_with_metadata(
self, path: pathlib.Path, metadata: Dict, diagram_metadata_idx: int, target_path: pathlib.Path = None
) -> None:
"""
Write modified metadata to drawio file.
Writes given metadata to 'object' element attributes inside of the selected drawio diagram element.
Currently supports writing only uncompressed elements.
Args:
path: path to write modified drawio file to
metadata: dictionary of modified metadata to insert to drawio
diagram_metadata_idx: index of diagram which metadata was modified
target_path: if not provided the changes will be written to path
"""
flattened_dict = self._flatten_dictionary(metadata)
if diagram_metadata_idx >= len(list(self.diagrams)):
raise TrestleError(f'Drawio file {path} does not contain a diagram for index {diagram_metadata_idx}')
diagram = list(self.diagrams)[diagram_metadata_idx]
children = list(diagram)
root_obj = children[0]
md_objects = root_obj.findall('object')
if len(md_objects) == 0:
raise TrestleError(f'Unable to write metadata, diagram in drawio file {path} does not have objects.')
for key in md_objects[0].attrib.copy():
if key not in flattened_dict.keys() and key not in self.banned_keys:
# outdated key delete
del md_objects[0].attrib[key]
continue
if key in self.banned_keys:
continue
md_objects[0].attrib[key] = flattened_dict[key]
for key in flattened_dict.keys():
if key in self.banned_keys:
continue
md_objects[0].attrib[key] = flattened_dict[key]
parent_diagram = self.mx_file.findall('diagram')[diagram_metadata_idx]
if len(parent_diagram.findall('mxGraphModel')) == 0:
parent_diagram.insert(0, diagram)
if target_path:
self.raw_xml.write(target_path)
else:
self.raw_xml.write(path)
def _flatten_dictionary(self, metadata: Dict, parent_key='', separator='.') -> Dict[str, str]:
"""Flatten hierarchial dict back to xml attributes."""
items = []
for key, value in metadata.items():
new_key = parent_key + separator + key if parent_key else key
if isinstance(value, Dict):
items.extend(self._flatten_dictionary(value, new_key, separator).items())
else:
items.append((new_key, value))
return dict(items)
Methods¤
__init__(self, file_path)
special
¤
Load drawio object into memory.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_path |
Path |
Path to the drawio object. |
required |
Source code in trestle/core/draw_io.py
def __init__(self, file_path: pathlib.Path) -> None:
"""
Load drawio object into memory.
args:
file_path: Path to the drawio object.
"""
self.file_path: pathlib.Path = file_path
self._load()
self.banned_keys = ['id', 'label']
get_metadata(self)
¤
Get metadata from each tab if it exists or provide an empty dict.
Source code in trestle/core/draw_io.py
def get_metadata(self) -> List[Dict[str, str]]:
"""Get metadata from each tab if it exists or provide an empty dict."""
# Note that id and label are special for drawio.
md_list: List[Dict[str, str]] = []
for diagram in self.diagrams:
md_dict: Dict[str, str] = {}
# Drawio creates data within a root and then an object element type
children = list(diagram)
root_obj = children[0]
md_objects = root_obj.findall('object')
# Should always be true - to test presumptions.
if len(md_objects) == 0:
md_list.append(md_dict)
continue
items = md_objects[0].items()
for item in items:
key = item[0]
val = item[1]
if key in self.banned_keys:
continue
md_dict[key] = val
md_list.append(md_dict)
return md_list
restructure_metadata(input_dict)
classmethod
¤
Restructure metadata into a hierarchial dict assuming a period separator.
Source code in trestle/core/draw_io.py
@classmethod
def restructure_metadata(cls, input_dict: Dict[str, str]) -> Dict[str, Any]:
"""Restructure metadata into a hierarchial dict assuming a period separator."""
# get the list of duplicate keys
# Get a count of keys
result = {}
key_map = {}
for keys in input_dict.keys():
stub = keys.split('.')[0]
tmp = key_map.get(stub, [])
tmp.append(keys)
key_map[stub] = tmp
for key, values in key_map.items():
holding = {}
if len(values) == 1 and key == values[0]:
result[key] = input_dict[key]
else:
for value in values:
holding[value.split('.', 1)[-1]] = input_dict[value]
result[key] = cls.restructure_metadata(holding)
return result
write_drawio_with_metadata(self, path, metadata, diagram_metadata_idx, target_path=None)
¤
Write modified metadata to drawio file.
Writes given metadata to 'object' element attributes inside of the selected drawio diagram element. Currently supports writing only uncompressed elements.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
Path |
path to write modified drawio file to |
required |
metadata |
Dict |
dictionary of modified metadata to insert to drawio |
required |
diagram_metadata_idx |
int |
index of diagram which metadata was modified |
required |
target_path |
Path |
if not provided the changes will be written to path |
None |
Source code in trestle/core/draw_io.py
def write_drawio_with_metadata(
self, path: pathlib.Path, metadata: Dict, diagram_metadata_idx: int, target_path: pathlib.Path = None
) -> None:
"""
Write modified metadata to drawio file.
Writes given metadata to 'object' element attributes inside of the selected drawio diagram element.
Currently supports writing only uncompressed elements.
Args:
path: path to write modified drawio file to
metadata: dictionary of modified metadata to insert to drawio
diagram_metadata_idx: index of diagram which metadata was modified
target_path: if not provided the changes will be written to path
"""
flattened_dict = self._flatten_dictionary(metadata)
if diagram_metadata_idx >= len(list(self.diagrams)):
raise TrestleError(f'Drawio file {path} does not contain a diagram for index {diagram_metadata_idx}')
diagram = list(self.diagrams)[diagram_metadata_idx]
children = list(diagram)
root_obj = children[0]
md_objects = root_obj.findall('object')
if len(md_objects) == 0:
raise TrestleError(f'Unable to write metadata, diagram in drawio file {path} does not have objects.')
for key in md_objects[0].attrib.copy():
if key not in flattened_dict.keys() and key not in self.banned_keys:
# outdated key delete
del md_objects[0].attrib[key]
continue
if key in self.banned_keys:
continue
md_objects[0].attrib[key] = flattened_dict[key]
for key in flattened_dict.keys():
if key in self.banned_keys:
continue
md_objects[0].attrib[key] = flattened_dict[key]
parent_diagram = self.mx_file.findall('diagram')[diagram_metadata_idx]
if len(parent_diagram.findall('mxGraphModel')) == 0:
parent_diagram.insert(0, diagram)
if target_path:
self.raw_xml.write(target_path)
else:
self.raw_xml.write(path)
DrawIOMetadataValidator
¤
Validator to check whether drawio metadata meets validation expectations.
Source code in trestle/core/draw_io.py
class DrawIOMetadataValidator():
"""Validator to check whether drawio metadata meets validation expectations."""
def __init__(self, template_path: pathlib.Path, must_be_first_tab: bool = True) -> None:
"""
Initialize drawio validator.
Args:
template_path: Path to a templated drawio file where metadata will be looked up on the first tab only.
must_be_first_tab: Whether to search the candidate file for a metadata across multiple tabs.
"""
self.template_path = template_path
self.must_be_first_tab = must_be_first_tab
# Load metadat from template
template_drawio = DrawIO(self.template_path)
# Zero index as must be first tab
self.template_metadata = template_drawio.get_metadata()[0]
self.template_version = MarkdownValidator.extract_template_version(self.template_metadata)
if self.template_version not in str(self.template_path):
raise TrestleError(
f'Version of the template {self.template_version} does not match the path {self.template_path}.'
+ f'Move the template to the folder {self.template_version}'
)
if 'Version' in self.template_metadata.keys() and self.template_metadata['Version'] != self.template_version:
raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
def validate(self, candidate: pathlib.Path) -> bool:
"""
Run drawio validation against a candidate file.
Args:
candidate: The path to a candidate markdown file to be validated.
Returns:
Whether or not the validation passes.
Raises:
err.TrestleError: If a file IO / formatting error occurs.
"""
logging.info(f'Validating drawio file {candidate} against template file {self.template_path}')
candidate_drawio = DrawIO(candidate)
drawio_metadata = candidate_drawio.get_metadata()
if self.must_be_first_tab:
return MarkdownValidator.compare_keys(self.template_metadata, drawio_metadata[0])
for md_tab in drawio_metadata:
status = MarkdownValidator.compare_keys(self.template_metadata, md_tab)
if status:
return status
return False
Methods¤
__init__(self, template_path, must_be_first_tab=True)
special
¤
Initialize drawio validator.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
template_path |
Path |
Path to a templated drawio file where metadata will be looked up on the first tab only. |
required |
must_be_first_tab |
bool |
Whether to search the candidate file for a metadata across multiple tabs. |
True |
Source code in trestle/core/draw_io.py
def __init__(self, template_path: pathlib.Path, must_be_first_tab: bool = True) -> None:
"""
Initialize drawio validator.
Args:
template_path: Path to a templated drawio file where metadata will be looked up on the first tab only.
must_be_first_tab: Whether to search the candidate file for a metadata across multiple tabs.
"""
self.template_path = template_path
self.must_be_first_tab = must_be_first_tab
# Load metadat from template
template_drawio = DrawIO(self.template_path)
# Zero index as must be first tab
self.template_metadata = template_drawio.get_metadata()[0]
self.template_version = MarkdownValidator.extract_template_version(self.template_metadata)
if self.template_version not in str(self.template_path):
raise TrestleError(
f'Version of the template {self.template_version} does not match the path {self.template_path}.'
+ f'Move the template to the folder {self.template_version}'
)
if 'Version' in self.template_metadata.keys() and self.template_metadata['Version'] != self.template_version:
raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
validate(self, candidate)
¤
Run drawio validation against a candidate file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
candidate |
Path |
The path to a candidate markdown file to be validated. |
required |
Returns:
Type | Description |
---|---|
bool |
Whether or not the validation passes. |
Exceptions:
Type | Description |
---|---|
err.TrestleError |
If a file IO / formatting error occurs. |
Source code in trestle/core/draw_io.py
def validate(self, candidate: pathlib.Path) -> bool:
"""
Run drawio validation against a candidate file.
Args:
candidate: The path to a candidate markdown file to be validated.
Returns:
Whether or not the validation passes.
Raises:
err.TrestleError: If a file IO / formatting error occurs.
"""
logging.info(f'Validating drawio file {candidate} against template file {self.template_path}')
candidate_drawio = DrawIO(candidate)
drawio_metadata = candidate_drawio.get_metadata()
if self.must_be_first_tab:
return MarkdownValidator.compare_keys(self.template_metadata, drawio_metadata[0])
for md_tab in drawio_metadata:
status = MarkdownValidator.compare_keys(self.template_metadata, md_tab)
if status:
return status
return False
handler: python