Skip to content

trestle.core.markdown.docs_markdown_node

trestle.core.markdown.docs_markdown_node ¤

A docs markdown node.

Attributes¤

logger = logging.getLogger(__name__) module-attribute ¤

Classes¤

DocsMarkdownNode ¤

Bases: BaseMarkdownNode

Markdown will be read to the tree.

Source code in trestle/core/markdown/docs_markdown_node.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
class DocsMarkdownNode(BaseMarkdownNode):
    """Markdown will be read to the tree."""

    def __init__(self, key: str, content: DocsSectionContent, starting_line: int):
        """Initialize markdown node."""
        super(DocsMarkdownNode, self).__init__(key, content, starting_line)
        self.content: DocsSectionContent = content

    @classmethod
    def build_tree_from_markdown(cls, lines: List[str], governed_header: Optional[str] = None):
        """Construct a tree out of the given markdown."""
        ob = cls.__new__(cls)
        start_level = ob._get_max_header_lvl(lines)
        ob, _ = ob._build_tree(lines, 'root', 0, start_level, governed_header)
        return ob

    def _build_tree(
        self,
        lines: List[str],
        root_key: str,
        starting_line: int,
        level: int,
        governed_header: Optional[str] = None
    ) -> Tuple[DocsMarkdownNode, int]:
        """
        Build a tree from the markdown recursively.

        The tree is contructed with valid headers as node's keys
        and node's content contains everything that is under that header.
        The subsections are placed into node's children with the same structure.

        A header is valid iff the line starts with # and it is not:
          1. Inside of the html blocks
          2. Inside single lined in the <> tags
          3. Inside the html comment
          4. Inside any table, code block or blockquotes
        """
        content = DocsSectionContent()
        node_children = []
        i = starting_line

        while True:
            if i >= len(lines):
                break
            line = lines[i].strip(' ')
            header_lvl = self._get_header_level_if_valid(line)

            if header_lvl is not None:
                if header_lvl >= level + 1:
                    # build subtree
                    subtree, i = self._build_tree(lines, line, i + 1, level + 1, governed_header)
                    node_children.append(subtree)
                    content.union(subtree)
                else:
                    break  # level of the header is above or equal to the current level, subtree is over
            elif self._does_start_with(line, md_const.CODEBLOCK_DEF):
                code_lines, i = self._read_code_lines(lines, line, i + 1)
                content.code_lines.extend(code_lines)
            elif self._does_start_with(line, md_const.HTML_COMMENT_START):
                html_lines, i = self._read_html_block(lines, line, i + 1, md_const.HTML_COMMENT_END_REGEX)
                content.html_lines.extend(html_lines)
            elif self._does_contain(line, md_const.HTML_TAG_REGEX_START):
                html_lines, i = self._read_html_block(lines, line, i + 1, md_const.HTML_TAG_REGEX_END)
                content.html_lines.extend(html_lines)
            elif self._does_start_with(line, md_const.TABLE_SYMBOL):
                table_block, i = self._read_table_block(lines, line, i + 1)
                content.tables.extend(table_block)
            elif self._does_start_with(line, md_const.BLOCKQUOTE_CHAR):
                content.blockquotes.append(line)
                i += 1
            elif governed_header is not None and self._does_contain(
                    root_key, fr'^[#]+ {governed_header}$') and self._does_contain(line, md_const.GOVERNED_DOC_REGEX):
                regexp = re.compile(md_const.GOVERNED_DOC_REGEX)
                match = regexp.search(line)
                header = match.group(0).strip('*').strip(':')
                content.governed_document.append(header)
                i += 1
            else:
                content.text.append(line)
                i += 1

        first_line_to_grab = starting_line - 1 if starting_line else 0
        content.raw_text = '\n'.join(lines[first_line_to_grab:i])
        md_node = DocsMarkdownNode(key=root_key, content=content, starting_line=first_line_to_grab)
        md_node.subnodes = node_children
        return md_node, i
Attributes¤
content = content instance-attribute ¤
Functions¤
__init__(key, content, starting_line) ¤

Initialize markdown node.

Source code in trestle/core/markdown/docs_markdown_node.py
54
55
56
57
def __init__(self, key: str, content: DocsSectionContent, starting_line: int):
    """Initialize markdown node."""
    super(DocsMarkdownNode, self).__init__(key, content, starting_line)
    self.content: DocsSectionContent = content
build_tree_from_markdown(lines, governed_header=None) classmethod ¤

Construct a tree out of the given markdown.

Source code in trestle/core/markdown/docs_markdown_node.py
59
60
61
62
63
64
65
@classmethod
def build_tree_from_markdown(cls, lines: List[str], governed_header: Optional[str] = None):
    """Construct a tree out of the given markdown."""
    ob = cls.__new__(cls)
    start_level = ob._get_max_header_lvl(lines)
    ob, _ = ob._build_tree(lines, 'root', 0, start_level, governed_header)
    return ob

DocsSectionContent ¤

Bases: BaseSectionContent

A content of the node.

Source code in trestle/core/markdown/docs_markdown_node.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class DocsSectionContent(BaseSectionContent):
    """A content of the node."""

    def __init__(self):
        """Initialize section content."""
        super(DocsSectionContent, self).__init__()
        self.tables = []
        self.text = []
        self.code_lines = []
        self.html_lines = []
        self.blockquotes = []
        self.governed_document = []

    def union(self, node: DocsMarkdownNode) -> None:
        """Unites contents together."""
        super().union(node)
        self.code_lines.extend(node.content.code_lines)
        self.html_lines.extend(node.content.html_lines)
        self.tables.extend(node.content.tables)
        self.blockquotes.extend(node.content.blockquotes)
Attributes¤
blockquotes = [] instance-attribute ¤
code_lines = [] instance-attribute ¤
governed_document = [] instance-attribute ¤
html_lines = [] instance-attribute ¤
tables = [] instance-attribute ¤
text = [] instance-attribute ¤
Functions¤
__init__() ¤

Initialize section content.

Source code in trestle/core/markdown/docs_markdown_node.py
32
33
34
35
36
37
38
39
40
def __init__(self):
    """Initialize section content."""
    super(DocsSectionContent, self).__init__()
    self.tables = []
    self.text = []
    self.code_lines = []
    self.html_lines = []
    self.blockquotes = []
    self.governed_document = []
union(node) ¤

Unites contents together.

Source code in trestle/core/markdown/docs_markdown_node.py
42
43
44
45
46
47
48
def union(self, node: DocsMarkdownNode) -> None:
    """Unites contents together."""
    super().union(node)
    self.code_lines.extend(node.content.code_lines)
    self.html_lines.extend(node.content.html_lines)
    self.tables.extend(node.content.tables)
    self.blockquotes.extend(node.content.blockquotes)

handler: python