diff options
Diffstat (limited to 'rss_generator.py')
-rw-r--r-- | rss_generator.py | 47 |
1 files changed, 38 insertions, 9 deletions
diff --git a/rss_generator.py b/rss_generator.py index 4397a45..f248003 100644 --- a/rss_generator.py +++ b/rss_generator.py @@ -1,12 +1,24 @@ import os from time import strftime, strptime, ctime from siteconfig import siteconfig +from view_functions import is_hidden_path class RSS_Item: + """ + RSS_Item - a (very) basic implementation of an object in an RSS + feed using only essential parameters as specified in: + https://www.rssboard.org/rss-specification#hrelementsOfLtitemgt + + Item data is generated from a given file path + """ PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH'] class NotAFile(Exception): + """ + Throws an exception if an RSS_Item is made out of a + directory or invalid file + """ def __init__(self, path: str): self.path = path self.message = f"{path} not a file" @@ -18,6 +30,7 @@ class RSS_Item: self.FULL_PATH = path self.TITLE = path.rsplit('.', 1)[0].split('/')[-1] + self.FILE_TYPE = path.rsplit('.', 1)[1] self.DESCRIPTION = self.parse_file() self.LAST_UPDATE = self.file_last_modified() self.URI = self.get_uri() @@ -35,7 +48,8 @@ class RSS_Item: """ parse_file - reads the file at FULL_PATH and saves the content from when the first <p> tag is hit up to and including the - closing </p> tag. Expects an HTML style file + closing </p> tag. Other files are interpreted as text files + and, just reads the first 3 paragraphs (two new lines in a row) """ with open(self.FULL_PATH) as f: in_body = False @@ -45,13 +59,19 @@ class RSS_Item: if paragraphs >= self.PARAGRAPHS: break line = line.strip() - if line.startswith("<p>"): - in_body = True - if in_body: + if self.FILE_TYPE in ['html', 'html!']: + if line.startswith("<p>"): + in_body = True + if in_body: + description += line + if line.endswith("</p>"): + in_body = False + paragraphs += 1 + else: description += line - if line.endswith("</p>"): - in_body = False - paragraphs += 1 + # remember, we stripped the line + if line == '': + paragraphs += 1 return ''.join(description) @@ -59,16 +79,25 @@ class RSS_Item: return ctime(os.stat(self.FULL_PATH).st_ctime) def get_uri(self): + # return everything after "./templates/" return '/'.join(self.FULL_PATH.split('/')[2:]) def get_rss_channel(): + """ + get_rss_channel - list all files from the BASE_DIR, and if allowed, + add them as RSS_Items to populate feed.xml. Called by feed.xml view + """ items = [] + extensions = siteconfig.rss_channel_config['RSS_FILE_EXT'] for root, dirs, files in os.walk(siteconfig.BASE_DIR): for f in files: + # remember, path will be like "./templates/site/..." path = os.path.join(root, f) if ( - path.endswith(".html") or f.endswith(".html!") - ) and path not in siteconfig.RSS_OMIT: + path.split(".")[-1] in extensions + and path not in siteconfig.RSS_OMIT + and not is_hidden_path(path.split('.', 1)[1]) + ): items.append(RSS_Item(path)) return items |