Skip to content

markdown_processor

trestle.core.markdown.markdown_processor ¤

A markdown processor.

Attributes¤

logger = logging.getLogger(__name__) module-attribute ¤

Classes¤

MarkdownProcessor ¤

A markdown processor.

Source code in trestle/core/markdown/markdown_processor.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class MarkdownProcessor:
    """A markdown processor."""

    def __init__(self) -> None:
        """Initialize markdown processor."""
        self.governed_header = None

    def render_gfm_to_html(self, markdown_text: str) -> str:
        """Render given Github Flavored Markdown to HTML."""
        try:
            html = cmarkgfm.github_flavored_markdown_to_html(markdown_text)
            return html
        except ValueError as e:
            raise TrestleError(f'Not a valid Github Flavored markdown: {e}.')

    def process_markdown(self,
                         md_path: pathlib.Path,
                         read_header: bool = True,
                         read_body: bool = True) -> Tuple[Dict, DocsMarkdownNode]:
        """Parse the markdown and builds the tree to operate over it."""
        header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header, read_body)

        _ = self.render_gfm_to_html(markdown_wo_header)

        lines = markdown_wo_header.split('\n')
        tree = DocsMarkdownNode.build_tree_from_markdown(lines, self.governed_header)
        return header, tree

    def process_control_markdown(
        self,
        md_path: pathlib.Path,
        cli_section_dict: Dict[str, str] = None,
        part_label_to_id_map: Dict[str, str] = None
    ) -> Tuple[Dict, ControlMarkdownNode]:
        """Parse control markdown and build tree with identified OSCAL components."""
        try:
            header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header=True, read_body=True)

            section_to_part_name_map = {}
            if cli_section_dict is not None:
                # Read x-trestle-sections to the dictionary and merge it with CLI provided dictionary
                yaml_header_sections_dict = header.get(const.SECTIONS_TAG, {})
                merged_dict = merge_dicts(yaml_header_sections_dict, cli_section_dict)
                section_to_part_name_map = {v: k for k, v in merged_dict.items()}
            _ = self.render_gfm_to_html(markdown_wo_header)

            lines = markdown_wo_header.split('\n')
            tree_context.reset()
            tree_context.section_to_part_name_map = section_to_part_name_map
            tree_context.part_label_to_id_map = part_label_to_id_map
            tree = ControlMarkdownNode.build_tree_from_markdown(lines)
            tree_context.reset()
            return header, tree
        except TrestleError as e:
            logger.error(f'Error while reading control markdown: {md_path}: {e}')
            raise e

    def read_markdown_wo_processing(self,
                                    md_path: pathlib.Path,
                                    read_header: bool = True,
                                    read_body: bool = True) -> Tuple[Dict, str]:
        """Read markdown header to dictionary and body to string."""
        try:
            contents = frontmatter.loads(md_path.open('r', encoding=const.FILE_ENCODING).read())
            header = {}
            markdown_wo_header = ''
            if read_header:
                header = contents.metadata
            if read_body:
                markdown_wo_header = contents.content

            return header, markdown_wo_header
        except UnicodeDecodeError as e:
            logger.debug(traceback.format_exc())
            raise TrestleError(f'Markdown cannot be decoded into {const.FILE_ENCODING}, error: {e}')
        except ScannerError as e:
            logger.debug(traceback.format_exc())
            raise TrestleError(f'Header is not in a valid YAML format: {e}')
        except FileNotFoundError as e:
            logger.debug(traceback.format_exc())
            raise TrestleError(f'Markdown with path {md_path}, not found: {e}')

    def fetch_value_from_header(self, md_path: pathlib.Path, key: str) -> Optional[str]:
        """Fetch value for the given key from the markdown header if exists."""
        header, _ = self.read_markdown_wo_processing(md_path)
        value = None

        if key in header.keys():
            value = header[key]

        return value
Attributes¤
governed_header = None instance-attribute ¤
Functions¤
__init__() ¤

Initialize markdown processor.

Source code in trestle/core/markdown/markdown_processor.py
40
41
42
def __init__(self) -> None:
    """Initialize markdown processor."""
    self.governed_header = None
fetch_value_from_header(md_path, key) ¤

Fetch value for the given key from the markdown header if exists.

Source code in trestle/core/markdown/markdown_processor.py
119
120
121
122
123
124
125
126
127
def fetch_value_from_header(self, md_path: pathlib.Path, key: str) -> Optional[str]:
    """Fetch value for the given key from the markdown header if exists."""
    header, _ = self.read_markdown_wo_processing(md_path)
    value = None

    if key in header.keys():
        value = header[key]

    return value
process_control_markdown(md_path, cli_section_dict=None, part_label_to_id_map=None) ¤

Parse control markdown and build tree with identified OSCAL components.

Source code in trestle/core/markdown/markdown_processor.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def process_control_markdown(
    self,
    md_path: pathlib.Path,
    cli_section_dict: Dict[str, str] = None,
    part_label_to_id_map: Dict[str, str] = None
) -> Tuple[Dict, ControlMarkdownNode]:
    """Parse control markdown and build tree with identified OSCAL components."""
    try:
        header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header=True, read_body=True)

        section_to_part_name_map = {}
        if cli_section_dict is not None:
            # Read x-trestle-sections to the dictionary and merge it with CLI provided dictionary
            yaml_header_sections_dict = header.get(const.SECTIONS_TAG, {})
            merged_dict = merge_dicts(yaml_header_sections_dict, cli_section_dict)
            section_to_part_name_map = {v: k for k, v in merged_dict.items()}
        _ = self.render_gfm_to_html(markdown_wo_header)

        lines = markdown_wo_header.split('\n')
        tree_context.reset()
        tree_context.section_to_part_name_map = section_to_part_name_map
        tree_context.part_label_to_id_map = part_label_to_id_map
        tree = ControlMarkdownNode.build_tree_from_markdown(lines)
        tree_context.reset()
        return header, tree
    except TrestleError as e:
        logger.error(f'Error while reading control markdown: {md_path}: {e}')
        raise e
process_markdown(md_path, read_header=True, read_body=True) ¤

Parse the markdown and builds the tree to operate over it.

Source code in trestle/core/markdown/markdown_processor.py
52
53
54
55
56
57
58
59
60
61
62
63
def process_markdown(self,
                     md_path: pathlib.Path,
                     read_header: bool = True,
                     read_body: bool = True) -> Tuple[Dict, DocsMarkdownNode]:
    """Parse the markdown and builds the tree to operate over it."""
    header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header, read_body)

    _ = self.render_gfm_to_html(markdown_wo_header)

    lines = markdown_wo_header.split('\n')
    tree = DocsMarkdownNode.build_tree_from_markdown(lines, self.governed_header)
    return header, tree
read_markdown_wo_processing(md_path, read_header=True, read_body=True) ¤

Read markdown header to dictionary and body to string.

Source code in trestle/core/markdown/markdown_processor.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def read_markdown_wo_processing(self,
                                md_path: pathlib.Path,
                                read_header: bool = True,
                                read_body: bool = True) -> Tuple[Dict, str]:
    """Read markdown header to dictionary and body to string."""
    try:
        contents = frontmatter.loads(md_path.open('r', encoding=const.FILE_ENCODING).read())
        header = {}
        markdown_wo_header = ''
        if read_header:
            header = contents.metadata
        if read_body:
            markdown_wo_header = contents.content

        return header, markdown_wo_header
    except UnicodeDecodeError as e:
        logger.debug(traceback.format_exc())
        raise TrestleError(f'Markdown cannot be decoded into {const.FILE_ENCODING}, error: {e}')
    except ScannerError as e:
        logger.debug(traceback.format_exc())
        raise TrestleError(f'Header is not in a valid YAML format: {e}')
    except FileNotFoundError as e:
        logger.debug(traceback.format_exc())
        raise TrestleError(f'Markdown with path {md_path}, not found: {e}')
render_gfm_to_html(markdown_text) ¤

Render given Github Flavored Markdown to HTML.

Source code in trestle/core/markdown/markdown_processor.py
44
45
46
47
48
49
50
def render_gfm_to_html(self, markdown_text: str) -> str:
    """Render given Github Flavored Markdown to HTML."""
    try:
        html = cmarkgfm.github_flavored_markdown_to_html(markdown_text)
        return html
    except ValueError as e:
        raise TrestleError(f'Not a valid Github Flavored markdown: {e}.')

Functions¤

handler: python