markdown_processor
trestle.core.markdown.markdown_processor
¤
A markdown processor.
logger
¤
Classes¤
MarkdownProcessor
¤
A markdown processor.
Source code in trestle/core/markdown/markdown_processor.py
class MarkdownProcessor:
"""A markdown processor."""
def __init__(self) -> None:
"""Initialize markdown processor."""
self.governed_header = None
def render_gfm_to_html(self, markdown_text: str) -> str:
"""Render given Github Flavored Markdown to HTML."""
try:
html = cmarkgfm.github_flavored_markdown_to_html(markdown_text)
return html
except ValueError as e:
raise TrestleError(f'Not a valid Github Flavored markdown: {e}.')
def process_markdown(self,
md_path: pathlib.Path,
read_header: bool = True,
read_body: bool = True) -> Tuple[Dict, DocsMarkdownNode]:
"""Parse the markdown and builds the tree to operate over it."""
header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header, read_body)
_ = self.render_gfm_to_html(markdown_wo_header)
lines = markdown_wo_header.split('\n')
tree = DocsMarkdownNode.build_tree_from_markdown(lines, self.governed_header)
return header, tree
def process_control_markdown(
self,
md_path: pathlib.Path,
cli_section_dict: Dict[str, str] = None,
part_label_to_id_map: Dict[str, str] = None
) -> Tuple[Dict, ControlMarkdownNode]:
"""Parse control markdown and build tree with identified OSCAL components."""
try:
header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header=True, read_body=True)
section_to_part_name_map = {}
if cli_section_dict is not None:
# Read x-trestle-sections to the dictionary and merge it with CLI provided dictionary
yaml_header_sections_dict = header.get(const.SECTIONS_TAG, {})
merged_dict = merge_dicts(yaml_header_sections_dict, cli_section_dict)
section_to_part_name_map = {v: k for k, v in merged_dict.items()}
_ = self.render_gfm_to_html(markdown_wo_header)
lines = markdown_wo_header.split('\n')
tree_context.reset()
tree_context.section_to_part_name_map = section_to_part_name_map
tree_context.part_label_to_id_map = part_label_to_id_map
tree = ControlMarkdownNode.build_tree_from_markdown(lines)
tree_context.reset()
return header, tree
except TrestleError as e:
logger.error(f'Error while reading control markdown: {md_path}: {e}')
raise e
def read_markdown_wo_processing(self,
md_path: pathlib.Path,
read_header: bool = True,
read_body: bool = True) -> Tuple[Dict, str]:
"""Read markdown header to dictionary and body to string."""
try:
contents = frontmatter.loads(md_path.open('r', encoding=const.FILE_ENCODING).read())
header = {}
markdown_wo_header = ''
if read_header:
header = contents.metadata
if read_body:
markdown_wo_header = contents.content
return header, markdown_wo_header
except UnicodeDecodeError as e:
logger.debug(traceback.format_exc())
raise TrestleError(f'Markdown cannot be decoded into {const.FILE_ENCODING}, error: {e}')
except ScannerError as e:
logger.debug(traceback.format_exc())
raise TrestleError(f'Header is not in a valid YAML format: {e}')
except FileNotFoundError as e:
logger.debug(traceback.format_exc())
raise TrestleError(f'Markdown with path {md_path}, not found: {e}')
def fetch_value_from_header(self, md_path: pathlib.Path, key: str) -> Optional[str]:
"""Fetch value for the given key from the markdown header if exists."""
header, _ = self.read_markdown_wo_processing(md_path)
value = None
if key in header.keys():
value = header[key]
return value
Methods¤
__init__(self)
special
¤
Initialize markdown processor.
Source code in trestle/core/markdown/markdown_processor.py
def __init__(self) -> None:
"""Initialize markdown processor."""
self.governed_header = None
fetch_value_from_header(self, md_path, key)
¤
Fetch value for the given key from the markdown header if exists.
Source code in trestle/core/markdown/markdown_processor.py
def fetch_value_from_header(self, md_path: pathlib.Path, key: str) -> Optional[str]:
"""Fetch value for the given key from the markdown header if exists."""
header, _ = self.read_markdown_wo_processing(md_path)
value = None
if key in header.keys():
value = header[key]
return value
process_control_markdown(self, md_path, cli_section_dict=None, part_label_to_id_map=None)
¤
Parse control markdown and build tree with identified OSCAL components.
Source code in trestle/core/markdown/markdown_processor.py
def process_control_markdown(
self,
md_path: pathlib.Path,
cli_section_dict: Dict[str, str] = None,
part_label_to_id_map: Dict[str, str] = None
) -> Tuple[Dict, ControlMarkdownNode]:
"""Parse control markdown and build tree with identified OSCAL components."""
try:
header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header=True, read_body=True)
section_to_part_name_map = {}
if cli_section_dict is not None:
# Read x-trestle-sections to the dictionary and merge it with CLI provided dictionary
yaml_header_sections_dict = header.get(const.SECTIONS_TAG, {})
merged_dict = merge_dicts(yaml_header_sections_dict, cli_section_dict)
section_to_part_name_map = {v: k for k, v in merged_dict.items()}
_ = self.render_gfm_to_html(markdown_wo_header)
lines = markdown_wo_header.split('\n')
tree_context.reset()
tree_context.section_to_part_name_map = section_to_part_name_map
tree_context.part_label_to_id_map = part_label_to_id_map
tree = ControlMarkdownNode.build_tree_from_markdown(lines)
tree_context.reset()
return header, tree
except TrestleError as e:
logger.error(f'Error while reading control markdown: {md_path}: {e}')
raise e
process_markdown(self, md_path, read_header=True, read_body=True)
¤
Parse the markdown and builds the tree to operate over it.
Source code in trestle/core/markdown/markdown_processor.py
def process_markdown(self,
md_path: pathlib.Path,
read_header: bool = True,
read_body: bool = True) -> Tuple[Dict, DocsMarkdownNode]:
"""Parse the markdown and builds the tree to operate over it."""
header, markdown_wo_header = self.read_markdown_wo_processing(md_path, read_header, read_body)
_ = self.render_gfm_to_html(markdown_wo_header)
lines = markdown_wo_header.split('\n')
tree = DocsMarkdownNode.build_tree_from_markdown(lines, self.governed_header)
return header, tree
read_markdown_wo_processing(self, md_path, read_header=True, read_body=True)
¤
Read markdown header to dictionary and body to string.
Source code in trestle/core/markdown/markdown_processor.py
def read_markdown_wo_processing(self,
md_path: pathlib.Path,
read_header: bool = True,
read_body: bool = True) -> Tuple[Dict, str]:
"""Read markdown header to dictionary and body to string."""
try:
contents = frontmatter.loads(md_path.open('r', encoding=const.FILE_ENCODING).read())
header = {}
markdown_wo_header = ''
if read_header:
header = contents.metadata
if read_body:
markdown_wo_header = contents.content
return header, markdown_wo_header
except UnicodeDecodeError as e:
logger.debug(traceback.format_exc())
raise TrestleError(f'Markdown cannot be decoded into {const.FILE_ENCODING}, error: {e}')
except ScannerError as e:
logger.debug(traceback.format_exc())
raise TrestleError(f'Header is not in a valid YAML format: {e}')
except FileNotFoundError as e:
logger.debug(traceback.format_exc())
raise TrestleError(f'Markdown with path {md_path}, not found: {e}')
render_gfm_to_html(self, markdown_text)
¤
Render given Github Flavored Markdown to HTML.
Source code in trestle/core/markdown/markdown_processor.py
def render_gfm_to_html(self, markdown_text: str) -> str:
"""Render given Github Flavored Markdown to HTML."""
try:
html = cmarkgfm.github_flavored_markdown_to_html(markdown_text)
return html
except ValueError as e:
raise TrestleError(f'Not a valid Github Flavored markdown: {e}.')
handler: python