diff --git a/logya/docparser.py b/logya/docparser.py deleted file mode 100644 index 8f805ab..0000000 --- a/logya/docparser.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- -import markdown - -from yaml import load -try: - from yaml import CLoader as Loader -except ImportError: - from yaml import Loader - - -def parse(content, content_type=None): - """Parse document and return a dictionary of header fields and body.""" - - # Extract YAML header and body and load header into dict. - pos1 = content.index('---') - pos2 = content.index('---', pos1 + 1) - header = content[pos1:pos2].strip() - body = content[pos2 + 3:].strip() - parsed = load(header, Loader=Loader) - - # Parse body if not HTML/XML. - if body and content_type == 'markdown': - body = markdown.markdown( - body, - extensions=[ - 'markdown.extensions.attr_list', - 'markdown.extensions.def_list', - 'markdown.extensions.fenced_code' - ]) - - parsed['body'] = body - return parsed diff --git a/logya/docreader.py b/logya/docreader.py deleted file mode 100644 index 166abdc..0000000 --- a/logya/docreader.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- -import io -import os - -from datetime import datetime - -from logya import allowed_exts, path -from logya.docparser import parse - - -def content_type(filename): - ctype = None - ext = os.path.splitext(filename)[1] - if ext in ['.html', '.htm']: - ctype = 'html' - elif ext in ['.md', '.markdown']: - ctype = 'markdown' - return ctype - - -def iter_docs(basedir): - """Recurse through directory to add documents to process.""" - - return ( - os.path.join(root, f) for root, dirs, files in os.walk(basedir) - for f in files if os.path.splitext(f)[1].strip('.') in allowed_exts) - - -def set_date(doc, attribute, default): - doc[attribute] = doc.get(attribute, default) - if isinstance(doc[attribute], str): - try: - doc[attribute] = datetime.fromisoformat(doc[attribute]) - except ValueError: - print('Attribute "{}" could not be converted to datetime. URL: {}'.format(attribute, doc)) - - -class DocReader: - """A class for reading content documents.""" - - def __init__(self, basedir): - self.basedir = basedir - - @property - def parsed(self): - """Generator that reads all docs from base directory and returns parsed - content.""" - - for filename in iter_docs(self.basedir): - try: - with io.open(filename, 'r', encoding='utf-8') as f: - content = f.read().strip() - except Exception as err: - print('Error reading file {}\n{}'.format(filename, err)) - continue - - try: - parsed = parse(content, content_type=content_type(filename)) - except Exception as err: - print('Error parsing file {}\n{}'.format(filename, err)) - continue - - # Use file modification time for created and updated properties, - # if not set in document itself. - modified = datetime.fromtimestamp(os.stat(filename).st_mtime) - set_date(parsed, 'created', modified) - set_date(parsed, 'updated', modified) - - # Set url from filename if not set in parsed document. - if 'url' not in parsed: - parsed['url'] = path.url_from_filename(filename, basedir=self.basedir) - - yield parsed