Skip to content

markdown_validator

trestle.core.markdown.markdown_validator ¤

Markdown Validator.

Attributes¤

logger = logging.getLogger(__name__) module-attribute ¤

Classes¤

MarkdownValidator ¤

A markdown validator. Validates markdown instance against given template.

Source code in trestle/core/markdown/markdown_validator.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
class MarkdownValidator:
    """A markdown validator. Validates markdown instance against given template."""

    def __init__(
        self,
        tmp_path: pathlib.Path,
        template_header: Dict,
        template_tree: DocsMarkdownNode,
        validate_yaml_header: bool,
        validate_md_body: bool,
        governed_section: Optional[str] = None
    ):
        """Initialize markdown validator."""
        self._validate_yaml_header = validate_yaml_header
        self._validate_md_body = validate_md_body
        self.governed_section = governed_section.strip(' ') if governed_section is not None else None
        self.template_header = template_header
        self.template_tree = template_tree
        self.template_path = tmp_path
        self.template_version = self.extract_template_version(self.template_header)

        if self.template_version not in str(self.template_path):
            raise TrestleError(
                f'Version of the template {self.template_version} does not match the path {self.template_path}.'
                + f'Move the template to the folder {self.template_version}'
            )
        if 'Version' in self.template_header.keys() and self.template_header['Version'] != self.template_version:
            raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
        self._ignore_headers = []
        for key in self.template_header.keys():
            if key.lower().startswith('x-trestle-'):
                self._ignore_headers.append(key.lower())
                if key.lower() == 'x-trestle-ignore':
                    for key2 in template_header['x-trestle-ignore']:
                        self._ignore_headers.append(key2.lower())

    def is_valid_against_template(
        self, instance: pathlib.Path, instance_header: Dict, instance_tree: DocsMarkdownNode
    ) -> bool:
        """
        Validate instance markdown against template.

        Instance is correct against a template iff:
            1. For YAML header keys:
                a. All keys from the template are present and not modified
                b. Template version in the template and instance match
            2. On the Markdown w/o YAML header:
                a. No additional headers of the level 1 were added
                b. Headers were not reordered
                c. Headers in the instance should be a superset of the template headers
                d. Headers must be in heirarchical order (i.e. # then ### then ## is not allowed)
            3. If Governed Header is given then:
                a. Governed Header is not modified
                b. All keys (i.e. key: something) inside the section are present

        Args:
            instance: a path to the markdown instance that should be validated
            instance_header: a YAML header extracted from the markdown
            instance_tree: a tree structure representing markdown contents
        Returns:
            Whether or not the the candidate is valid against the template.
        """
        if self._validate_yaml_header:
            headers_match = self.compare_keys(self.template_header, instance_header, self._ignore_headers)

            if not headers_match:
                logger.info(f'YAML header mismatch between template {self.template_path} and instance {instance}')
                return False
            elif headers_match and not self._validate_md_body:
                return True

        if self.governed_section is not None:
            instance_gov_nodes = instance_tree.get_all_nodes_for_keys([self.governed_section], False)
            template_gov_nodes = self.template_tree.get_all_nodes_for_keys([self.governed_section], False)

            if not instance_gov_nodes:
                logger.info(f'Governed section {self.governed_section} not found in instance: {instance}')
                return False

            if not template_gov_nodes:
                logger.info(f'Governed section {self.governed_section} not found in template: {self.template_path}')
                return False

            if [node.key for node in instance_gov_nodes] != [node.key for node in template_gov_nodes]:
                logger.info(
                    f'Governed sections were changed, '
                    f'template expects: {[node.key for node in template_gov_nodes]},'
                    f'but found {[node.key for node in instance_gov_nodes]}.'
                )
                return False

            for instance_gov_node, template_gov_node in zip(instance_gov_nodes, template_gov_nodes):
                instance_keys = instance_gov_node.content.governed_document
                template_keys = template_gov_node.content.governed_document

                is_valid = self._validate_headings(instance, template_keys, instance_keys)
                if not is_valid:
                    return False

        if self._validate_md_body:
            instance_keys = instance_tree.content.subnodes_keys
            template_keys = self.template_tree.content.subnodes_keys
            if len(template_keys) > len(instance_keys):
                logger.info(f'Headings in the instance: {instance} were removed.')
                return False

            instance_lvl1_keys = list(instance_tree.get_all_headers_for_level(1))
            template_lvl1_keys = list(self.template_tree.get_all_headers_for_level(1))
            if len(template_lvl1_keys) < len(instance_lvl1_keys):
                logger.info(f'New headers of level 1 were added to the markdown instance: {instance}. ')
                return False

            is_valid = self._validate_headings(instance, template_keys, instance_keys)
            if not is_valid:
                return False

        return True

    @classmethod
    def compare_keys(
        cls,
        template: Dict[str, Any],
        candidate: Dict[str, Any],
        ignore_fields: Optional[Dict[str, Any]] = None
    ) -> bool:
        """
        Compare a template dictionary against a candidate as to whether key structure is maintained.

        Args:
            template: Template dict which is used as a model of key-value pairs
            candidate: Candidate dictionary to be measured
        Returns:
            Whether or not the the candidate matches the template keys.
        """
        if ignore_fields is None:
            ignore_fields = []
        for key in list(candidate.keys()):
            if key.lower() in ignore_fields:
                candidate.pop(key)
        for key in list(template.keys()):
            if key.lower() in ignore_fields:
                template.pop(key)
        template_version = cls.extract_template_version(template)
        candidate_version = cls.extract_template_version(candidate)
        if template_version != candidate_version:
            logger.info(f'Versions of the template {template_version} and instance {candidate_version} are different')
            return False

        if len(template.keys()) != len(candidate.keys()):
            logger.info(f'Number of keys does not match in template {template} and instance {candidate}')
            return False
        for key in template.keys():
            if key in candidate.keys():
                if isinstance(template[key], dict):
                    if isinstance(candidate[key], dict):
                        status = cls.compare_keys(template[key], candidate[key], ignore_fields)
                        if not status:
                            return status
                    else:
                        logger.info(f'Value under {key} must be dictionary in candidate {candidate}')
                        return False
            else:
                logger.info(f'Key {key} is not in candidate {candidate}')
                return False
        return True

    def _validate_headings(self, instance: pathlib.Path, template_keys: List[str], instance_keys: List[str]) -> bool:
        """Validate instance headings against template."""
        if len(template_keys) > len(instance_keys):
            logger.info(
                f'Headings in the instance: {instance} were removed.'
                f'Expected {len(template_keys)} headings, but found only {len(instance_keys)}.'
            )
            return False
        template_header_pointer = 0
        present_keys = []
        for key in instance_keys:
            if template_header_pointer >= len(template_keys):
                break
            if key in template_keys and key not in present_keys:
                present_keys.append(template_keys[template_keys.index(key)])
                template_header_pointer += 1
            elif re.search(md_const.SUBSTITUTION_REGEX, template_keys[template_header_pointer]) is not None:
                present_keys.append(template_keys[template_header_pointer])
                template_header_pointer += 1  # skip headers with substitutions
        diff_keys = set(template_keys) - set(present_keys)
        if template_header_pointer != len(template_keys) and len(diff_keys) > 0:
            logger.info(
                f'Headings in the instance: {instance} were removed. '
                f'Expected {len(template_keys)} headings, but found only {template_header_pointer}.'
            )
            for result in as_list(diff_keys):
                logger.info(f'Heading {result} in the instance: {instance} was removed or not present ')
            return False
        return True

    @classmethod
    def extract_template_version(cls, header: Dict[str, Any]) -> Optional[str]:
        """
        Extract the template version from the header.

        If no header is found then starting version(0.0.1) will be used by default
        """
        if TEMPLATE_VERSION_HEADER not in header.keys():
            return START_TEMPLATE_VERSION

        return header[TEMPLATE_VERSION_HEADER]
Attributes¤
governed_section = governed_section.strip(' ') if governed_section is not None else None instance-attribute ¤
template_header = template_header instance-attribute ¤
template_path = tmp_path instance-attribute ¤
template_tree = template_tree instance-attribute ¤
template_version = self.extract_template_version(self.template_header) instance-attribute ¤
Functions¤
__init__(tmp_path, template_header, template_tree, validate_yaml_header, validate_md_body, governed_section=None) ¤

Initialize markdown validator.

Source code in trestle/core/markdown/markdown_validator.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def __init__(
    self,
    tmp_path: pathlib.Path,
    template_header: Dict,
    template_tree: DocsMarkdownNode,
    validate_yaml_header: bool,
    validate_md_body: bool,
    governed_section: Optional[str] = None
):
    """Initialize markdown validator."""
    self._validate_yaml_header = validate_yaml_header
    self._validate_md_body = validate_md_body
    self.governed_section = governed_section.strip(' ') if governed_section is not None else None
    self.template_header = template_header
    self.template_tree = template_tree
    self.template_path = tmp_path
    self.template_version = self.extract_template_version(self.template_header)

    if self.template_version not in str(self.template_path):
        raise TrestleError(
            f'Version of the template {self.template_version} does not match the path {self.template_path}.'
            + f'Move the template to the folder {self.template_version}'
        )
    if 'Version' in self.template_header.keys() and self.template_header['Version'] != self.template_version:
        raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
    self._ignore_headers = []
    for key in self.template_header.keys():
        if key.lower().startswith('x-trestle-'):
            self._ignore_headers.append(key.lower())
            if key.lower() == 'x-trestle-ignore':
                for key2 in template_header['x-trestle-ignore']:
                    self._ignore_headers.append(key2.lower())
compare_keys(template, candidate, ignore_fields=None) classmethod ¤

Compare a template dictionary against a candidate as to whether key structure is maintained.

Parameters:

Name Type Description Default
template Dict[str, Any]

Template dict which is used as a model of key-value pairs

required
candidate Dict[str, Any]

Candidate dictionary to be measured

required

Returns: Whether or not the the candidate matches the template keys.

Source code in trestle/core/markdown/markdown_validator.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
@classmethod
def compare_keys(
    cls,
    template: Dict[str, Any],
    candidate: Dict[str, Any],
    ignore_fields: Optional[Dict[str, Any]] = None
) -> bool:
    """
    Compare a template dictionary against a candidate as to whether key structure is maintained.

    Args:
        template: Template dict which is used as a model of key-value pairs
        candidate: Candidate dictionary to be measured
    Returns:
        Whether or not the the candidate matches the template keys.
    """
    if ignore_fields is None:
        ignore_fields = []
    for key in list(candidate.keys()):
        if key.lower() in ignore_fields:
            candidate.pop(key)
    for key in list(template.keys()):
        if key.lower() in ignore_fields:
            template.pop(key)
    template_version = cls.extract_template_version(template)
    candidate_version = cls.extract_template_version(candidate)
    if template_version != candidate_version:
        logger.info(f'Versions of the template {template_version} and instance {candidate_version} are different')
        return False

    if len(template.keys()) != len(candidate.keys()):
        logger.info(f'Number of keys does not match in template {template} and instance {candidate}')
        return False
    for key in template.keys():
        if key in candidate.keys():
            if isinstance(template[key], dict):
                if isinstance(candidate[key], dict):
                    status = cls.compare_keys(template[key], candidate[key], ignore_fields)
                    if not status:
                        return status
                else:
                    logger.info(f'Value under {key} must be dictionary in candidate {candidate}')
                    return False
        else:
            logger.info(f'Key {key} is not in candidate {candidate}')
            return False
    return True
extract_template_version(header) classmethod ¤

Extract the template version from the header.

If no header is found then starting version(0.0.1) will be used by default

Source code in trestle/core/markdown/markdown_validator.py
227
228
229
230
231
232
233
234
235
236
237
@classmethod
def extract_template_version(cls, header: Dict[str, Any]) -> Optional[str]:
    """
    Extract the template version from the header.

    If no header is found then starting version(0.0.1) will be used by default
    """
    if TEMPLATE_VERSION_HEADER not in header.keys():
        return START_TEMPLATE_VERSION

    return header[TEMPLATE_VERSION_HEADER]
is_valid_against_template(instance, instance_header, instance_tree) ¤

Validate instance markdown against template.

Instance is correct against a template iff
  1. For YAML header keys: a. All keys from the template are present and not modified b. Template version in the template and instance match
  2. On the Markdown w/o YAML header: a. No additional headers of the level 1 were added b. Headers were not reordered c. Headers in the instance should be a superset of the template headers d. Headers must be in heirarchical order (i.e. # then ### then ## is not allowed)
  3. If Governed Header is given then: a. Governed Header is not modified b. All keys (i.e. key: something) inside the section are present

Parameters:

Name Type Description Default
instance Path

a path to the markdown instance that should be validated

required
instance_header Dict

a YAML header extracted from the markdown

required
instance_tree DocsMarkdownNode

a tree structure representing markdown contents

required

Returns: Whether or not the the candidate is valid against the template.

Source code in trestle/core/markdown/markdown_validator.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def is_valid_against_template(
    self, instance: pathlib.Path, instance_header: Dict, instance_tree: DocsMarkdownNode
) -> bool:
    """
    Validate instance markdown against template.

    Instance is correct against a template iff:
        1. For YAML header keys:
            a. All keys from the template are present and not modified
            b. Template version in the template and instance match
        2. On the Markdown w/o YAML header:
            a. No additional headers of the level 1 were added
            b. Headers were not reordered
            c. Headers in the instance should be a superset of the template headers
            d. Headers must be in heirarchical order (i.e. # then ### then ## is not allowed)
        3. If Governed Header is given then:
            a. Governed Header is not modified
            b. All keys (i.e. key: something) inside the section are present

    Args:
        instance: a path to the markdown instance that should be validated
        instance_header: a YAML header extracted from the markdown
        instance_tree: a tree structure representing markdown contents
    Returns:
        Whether or not the the candidate is valid against the template.
    """
    if self._validate_yaml_header:
        headers_match = self.compare_keys(self.template_header, instance_header, self._ignore_headers)

        if not headers_match:
            logger.info(f'YAML header mismatch between template {self.template_path} and instance {instance}')
            return False
        elif headers_match and not self._validate_md_body:
            return True

    if self.governed_section is not None:
        instance_gov_nodes = instance_tree.get_all_nodes_for_keys([self.governed_section], False)
        template_gov_nodes = self.template_tree.get_all_nodes_for_keys([self.governed_section], False)

        if not instance_gov_nodes:
            logger.info(f'Governed section {self.governed_section} not found in instance: {instance}')
            return False

        if not template_gov_nodes:
            logger.info(f'Governed section {self.governed_section} not found in template: {self.template_path}')
            return False

        if [node.key for node in instance_gov_nodes] != [node.key for node in template_gov_nodes]:
            logger.info(
                f'Governed sections were changed, '
                f'template expects: {[node.key for node in template_gov_nodes]},'
                f'but found {[node.key for node in instance_gov_nodes]}.'
            )
            return False

        for instance_gov_node, template_gov_node in zip(instance_gov_nodes, template_gov_nodes):
            instance_keys = instance_gov_node.content.governed_document
            template_keys = template_gov_node.content.governed_document

            is_valid = self._validate_headings(instance, template_keys, instance_keys)
            if not is_valid:
                return False

    if self._validate_md_body:
        instance_keys = instance_tree.content.subnodes_keys
        template_keys = self.template_tree.content.subnodes_keys
        if len(template_keys) > len(instance_keys):
            logger.info(f'Headings in the instance: {instance} were removed.')
            return False

        instance_lvl1_keys = list(instance_tree.get_all_headers_for_level(1))
        template_lvl1_keys = list(self.template_tree.get_all_headers_for_level(1))
        if len(template_lvl1_keys) < len(instance_lvl1_keys):
            logger.info(f'New headers of level 1 were added to the markdown instance: {instance}. ')
            return False

        is_valid = self._validate_headings(instance, template_keys, instance_keys)
        if not is_valid:
            return False

    return True

Functions¤

handler: python