Skip to content

markdown_validator

trestle.core.markdown.markdown_validator ¤

Markdown Validator.

logger ¤

Classes¤

MarkdownValidator ¤

A markdown validator. Validates markdown instance against given template.

Source code in trestle/core/markdown/markdown_validator.py
class MarkdownValidator:
    """A markdown validator. Validates markdown instance against given template."""

    def __init__(
        self,
        tmp_path: pathlib.Path,
        template_header: Dict,
        template_tree: DocsMarkdownNode,
        validate_yaml_header: bool,
        validate_md_body: bool,
        governed_section: Optional[str] = None
    ):
        """Initialize markdown validator."""
        self._validate_yaml_header = validate_yaml_header
        self._validate_md_body = validate_md_body
        self.governed_section = governed_section.strip(' ') if governed_section is not None else None
        self.template_header = template_header
        self.template_tree = template_tree
        self.template_path = tmp_path
        self.template_version = self.extract_template_version(self.template_header)

        if self.template_version not in str(self.template_path):
            raise TrestleError(
                f'Version of the template {self.template_version} does not match the path {self.template_path}.'
                + f'Move the template to the folder {self.template_version}'
            )
        if 'Version' in self.template_header.keys() and self.template_header['Version'] != self.template_version:
            raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
        self._ignore_headers = []
        for key in self.template_header.keys():
            if key.lower().startswith('x-trestle-'):
                self._ignore_headers.append(key.lower())
                if key.lower() == 'x-trestle-ignore':
                    for key2 in template_header['x-trestle-ignore']:
                        self._ignore_headers.append(key2.lower())

    def is_valid_against_template(
        self, instance: pathlib.Path, instance_header: Dict, instance_tree: DocsMarkdownNode
    ) -> bool:
        """
        Validate instance markdown against template.

        Instance is correct against a template iff:
            1. For YAML header keys:
                a. All keys from the template are present and not modified
                b. Template version in the template and instance match
            2. On the Markdown w/o YAML header:
                a. No additional headers of the level 1 were added
                b. Headers were not reordered
                c. Headers in the instance should be a superset of the template headers
                d. Headers must be in heirarchical order (i.e. # then ### then ## is not allowed)
            3. If Governed Header is given then:
                a. Governed Header is not modified
                b. All keys (i.e. key: something) inside the section are present

        Args:
            instance: a path to the markdown instance that should be validated
            instance_header: a YAML header extracted from the markdown
            instance_tree: a tree structure representing markdown contents
        Returns:
            Whether or not the the candidate is valid against the template.
        """
        if self._validate_yaml_header:
            headers_match = self.compare_keys(self.template_header, instance_header, self._ignore_headers)

            if not headers_match:
                logger.info(f'YAML header mismatch between template {self.template_path} and instance {instance}')
                return False
            elif headers_match and not self._validate_md_body:
                return True

        if self.governed_section is not None:
            instance_gov_nodes = instance_tree.get_all_nodes_for_keys([self.governed_section], False)
            template_gov_nodes = self.template_tree.get_all_nodes_for_keys([self.governed_section], False)

            if not instance_gov_nodes:
                logger.info(f'Governed section {self.governed_section} not found in instance: {instance}')
                return False

            if not template_gov_nodes:
                logger.info(f'Governed section {self.governed_section} not found in template: {self.template_path}')
                return False

            if [node.key for node in instance_gov_nodes] != [node.key for node in template_gov_nodes]:
                logger.info(
                    f'Governed sections were changed, '
                    f'template expects: {[node.key for node in template_gov_nodes]},'
                    f'but found {[node.key for node in instance_gov_nodes]}.'
                )
                return False

            for instance_gov_node, template_gov_node in zip(instance_gov_nodes, template_gov_nodes):
                instance_keys = instance_gov_node.content.governed_document
                template_keys = template_gov_node.content.governed_document

                is_valid = self._validate_headings(instance, template_keys, instance_keys)
                if not is_valid:
                    return False

        if self._validate_md_body:
            instance_keys = instance_tree.content.subnodes_keys
            template_keys = self.template_tree.content.subnodes_keys
            if len(template_keys) > len(instance_keys):
                logger.info(f'Headings in the instance: {instance} were removed.')
                return False

            instance_lvl1_keys = list(instance_tree.get_all_headers_for_level(1))
            template_lvl1_keys = list(self.template_tree.get_all_headers_for_level(1))
            if len(template_lvl1_keys) < len(instance_lvl1_keys):
                logger.info(f'New headers of level 1 were added to the markdown instance: {instance}. ')
                return False

            is_valid = self._validate_headings(instance, template_keys, instance_keys)
            if not is_valid:
                return False

        return True

    @classmethod
    def compare_keys(
        cls,
        template: Dict[str, Any],
        candidate: Dict[str, Any],
        ignore_fields: Optional[Dict[str, Any]] = None
    ) -> bool:
        """
        Compare a template dictionary against a candidate as to whether key structure is maintained.

        Args:
            template: Template dict which is used as a model of key-value pairs
            candidate: Candidate dictionary to be measured
        Returns:
            Whether or not the the candidate matches the template keys.
        """
        if ignore_fields is None:
            ignore_fields = []
        for key in list(candidate.keys()):
            if key.lower() in ignore_fields:
                candidate.pop(key)
        for key in list(template.keys()):
            if key.lower() in ignore_fields:
                template.pop(key)
        template_version = cls.extract_template_version(template)
        candidate_version = cls.extract_template_version(candidate)
        if template_version != candidate_version:
            logger.info(f'Versions of the template {template_version} and instance {candidate_version} are different')
            return False

        if len(template.keys()) != len(candidate.keys()):
            logger.info(f'Number of keys does not match in template {template} and instance {candidate}')
            return False
        for key in template.keys():
            if key in candidate.keys():
                if type(template[key]) == dict:
                    if type(candidate[key]) == dict:
                        status = cls.compare_keys(template[key], candidate[key], ignore_fields)
                        if not status:
                            return status
                    else:
                        logger.info(f'Value under {key} must be dictionary in candidate {candidate}')
                        return False
            else:
                logger.info(f'Key {key} is not in candidate {candidate}')
                return False
        return True

    def _validate_headings(self, instance: pathlib.Path, template_keys: List[str], instance_keys: List[str]) -> bool:
        """Validate instance headings against template."""
        if len(template_keys) > len(instance_keys):
            logger.info(
                f'Headings in the instance: {instance} were removed.'
                f'Expected {len(template_keys)} headings, but found only {len(instance_keys)}.'
            )
            return False
        template_header_pointer = 0
        present_keys = []
        for key in instance_keys:
            if template_header_pointer >= len(template_keys):
                break
            if key in template_keys and key not in present_keys:
                present_keys.append(template_keys[template_keys.index(key)])
                template_header_pointer += 1
            elif re.search(md_const.SUBSTITUTION_REGEX, template_keys[template_header_pointer]) is not None:
                present_keys.append(template_keys[template_header_pointer])
                template_header_pointer += 1  # skip headers with substitutions
        diff_keys = set(template_keys) - set(present_keys)
        if template_header_pointer != len(template_keys) and len(diff_keys) > 0:
            logger.info(
                f'Headings in the instance: {instance} were removed. '
                f'Expected {len(template_keys)} headings, but found only {template_header_pointer}.'
            )
            for result in as_list(diff_keys):
                logger.info(f'Heading {result} in the instance: {instance} was removed or not present ')
            return False
        return True

    @classmethod
    def extract_template_version(cls, header: Dict[str, Any]) -> Optional[str]:
        """
        Extract the template version from the header.

        If no header is found then starting version(0.0.1) will be used by default
        """
        if TEMPLATE_VERSION_HEADER not in header.keys():
            return START_TEMPLATE_VERSION

        return header[TEMPLATE_VERSION_HEADER]
Methods¤
__init__(self, tmp_path, template_header, template_tree, validate_yaml_header, validate_md_body, governed_section=None) special ¤

Initialize markdown validator.

Source code in trestle/core/markdown/markdown_validator.py
def __init__(
    self,
    tmp_path: pathlib.Path,
    template_header: Dict,
    template_tree: DocsMarkdownNode,
    validate_yaml_header: bool,
    validate_md_body: bool,
    governed_section: Optional[str] = None
):
    """Initialize markdown validator."""
    self._validate_yaml_header = validate_yaml_header
    self._validate_md_body = validate_md_body
    self.governed_section = governed_section.strip(' ') if governed_section is not None else None
    self.template_header = template_header
    self.template_tree = template_tree
    self.template_path = tmp_path
    self.template_version = self.extract_template_version(self.template_header)

    if self.template_version not in str(self.template_path):
        raise TrestleError(
            f'Version of the template {self.template_version} does not match the path {self.template_path}.'
            + f'Move the template to the folder {self.template_version}'
        )
    if 'Version' in self.template_header.keys() and self.template_header['Version'] != self.template_version:
        raise TrestleError(f'Version does not match template-version in template: {self.template_path}.')
    self._ignore_headers = []
    for key in self.template_header.keys():
        if key.lower().startswith('x-trestle-'):
            self._ignore_headers.append(key.lower())
            if key.lower() == 'x-trestle-ignore':
                for key2 in template_header['x-trestle-ignore']:
                    self._ignore_headers.append(key2.lower())
compare_keys(template, candidate, ignore_fields=None) classmethod ¤

Compare a template dictionary against a candidate as to whether key structure is maintained.

Parameters:

Name Type Description Default
template Dict[str, Any]

Template dict which is used as a model of key-value pairs

required
candidate Dict[str, Any]

Candidate dictionary to be measured

required

Returns:

Type Description
bool

Whether or not the the candidate matches the template keys.

Source code in trestle/core/markdown/markdown_validator.py
@classmethod
def compare_keys(
    cls,
    template: Dict[str, Any],
    candidate: Dict[str, Any],
    ignore_fields: Optional[Dict[str, Any]] = None
) -> bool:
    """
    Compare a template dictionary against a candidate as to whether key structure is maintained.

    Args:
        template: Template dict which is used as a model of key-value pairs
        candidate: Candidate dictionary to be measured
    Returns:
        Whether or not the the candidate matches the template keys.
    """
    if ignore_fields is None:
        ignore_fields = []
    for key in list(candidate.keys()):
        if key.lower() in ignore_fields:
            candidate.pop(key)
    for key in list(template.keys()):
        if key.lower() in ignore_fields:
            template.pop(key)
    template_version = cls.extract_template_version(template)
    candidate_version = cls.extract_template_version(candidate)
    if template_version != candidate_version:
        logger.info(f'Versions of the template {template_version} and instance {candidate_version} are different')
        return False

    if len(template.keys()) != len(candidate.keys()):
        logger.info(f'Number of keys does not match in template {template} and instance {candidate}')
        return False
    for key in template.keys():
        if key in candidate.keys():
            if type(template[key]) == dict:
                if type(candidate[key]) == dict:
                    status = cls.compare_keys(template[key], candidate[key], ignore_fields)
                    if not status:
                        return status
                else:
                    logger.info(f'Value under {key} must be dictionary in candidate {candidate}')
                    return False
        else:
            logger.info(f'Key {key} is not in candidate {candidate}')
            return False
    return True
extract_template_version(header) classmethod ¤

Extract the template version from the header.

If no header is found then starting version(0.0.1) will be used by default

Source code in trestle/core/markdown/markdown_validator.py
@classmethod
def extract_template_version(cls, header: Dict[str, Any]) -> Optional[str]:
    """
    Extract the template version from the header.

    If no header is found then starting version(0.0.1) will be used by default
    """
    if TEMPLATE_VERSION_HEADER not in header.keys():
        return START_TEMPLATE_VERSION

    return header[TEMPLATE_VERSION_HEADER]
is_valid_against_template(self, instance, instance_header, instance_tree) ¤

Validate instance markdown against template.

Instance is correct against a template iff: 1. For YAML header keys: a. All keys from the template are present and not modified b. Template version in the template and instance match 2. On the Markdown w/o YAML header: a. No additional headers of the level 1 were added b. Headers were not reordered c. Headers in the instance should be a superset of the template headers d. Headers must be in heirarchical order (i.e. # then ### then ## is not allowed) 3. If Governed Header is given then: a. Governed Header is not modified b. All keys (i.e. key: something) inside the section are present

Parameters:

Name Type Description Default
instance Path

a path to the markdown instance that should be validated

required
instance_header Dict

a YAML header extracted from the markdown

required
instance_tree DocsMarkdownNode

a tree structure representing markdown contents

required

Returns:

Type Description
bool

Whether or not the the candidate is valid against the template.

Source code in trestle/core/markdown/markdown_validator.py
def is_valid_against_template(
    self, instance: pathlib.Path, instance_header: Dict, instance_tree: DocsMarkdownNode
) -> bool:
    """
    Validate instance markdown against template.

    Instance is correct against a template iff:
        1. For YAML header keys:
            a. All keys from the template are present and not modified
            b. Template version in the template and instance match
        2. On the Markdown w/o YAML header:
            a. No additional headers of the level 1 were added
            b. Headers were not reordered
            c. Headers in the instance should be a superset of the template headers
            d. Headers must be in heirarchical order (i.e. # then ### then ## is not allowed)
        3. If Governed Header is given then:
            a. Governed Header is not modified
            b. All keys (i.e. key: something) inside the section are present

    Args:
        instance: a path to the markdown instance that should be validated
        instance_header: a YAML header extracted from the markdown
        instance_tree: a tree structure representing markdown contents
    Returns:
        Whether or not the the candidate is valid against the template.
    """
    if self._validate_yaml_header:
        headers_match = self.compare_keys(self.template_header, instance_header, self._ignore_headers)

        if not headers_match:
            logger.info(f'YAML header mismatch between template {self.template_path} and instance {instance}')
            return False
        elif headers_match and not self._validate_md_body:
            return True

    if self.governed_section is not None:
        instance_gov_nodes = instance_tree.get_all_nodes_for_keys([self.governed_section], False)
        template_gov_nodes = self.template_tree.get_all_nodes_for_keys([self.governed_section], False)

        if not instance_gov_nodes:
            logger.info(f'Governed section {self.governed_section} not found in instance: {instance}')
            return False

        if not template_gov_nodes:
            logger.info(f'Governed section {self.governed_section} not found in template: {self.template_path}')
            return False

        if [node.key for node in instance_gov_nodes] != [node.key for node in template_gov_nodes]:
            logger.info(
                f'Governed sections were changed, '
                f'template expects: {[node.key for node in template_gov_nodes]},'
                f'but found {[node.key for node in instance_gov_nodes]}.'
            )
            return False

        for instance_gov_node, template_gov_node in zip(instance_gov_nodes, template_gov_nodes):
            instance_keys = instance_gov_node.content.governed_document
            template_keys = template_gov_node.content.governed_document

            is_valid = self._validate_headings(instance, template_keys, instance_keys)
            if not is_valid:
                return False

    if self._validate_md_body:
        instance_keys = instance_tree.content.subnodes_keys
        template_keys = self.template_tree.content.subnodes_keys
        if len(template_keys) > len(instance_keys):
            logger.info(f'Headings in the instance: {instance} were removed.')
            return False

        instance_lvl1_keys = list(instance_tree.get_all_headers_for_level(1))
        template_lvl1_keys = list(self.template_tree.get_all_headers_for_level(1))
        if len(template_lvl1_keys) < len(instance_lvl1_keys):
            logger.info(f'New headers of level 1 were added to the markdown instance: {instance}. ')
            return False

        is_valid = self._validate_headings(instance, template_keys, instance_keys)
        if not is_valid:
            return False

    return True

handler: python