From c583a69362f86fcc8e1b35a45a06dd8377d6308f Mon Sep 17 00:00:00 2001 From: mjfernez Date: Thu, 14 Oct 2021 20:14:53 -0400 Subject: Adds RSS auto-generation for files in 'site' This commit adds rss_generator.py which contains the main logic for indexing the site directory and generating a feed on startup. It serves as a sort of ad-hoc database which is accessed when /feed.xml is requested. Also corrects various typos, README nonsense, and expands the config options for RSS. Instances of './templates/site' have been replaced with the general BASE_DIR variable in the siteconfig. --- rss_generator.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 rss_generator.py (limited to 'rss_generator.py') diff --git a/rss_generator.py b/rss_generator.py new file mode 100644 index 0000000..4397a45 --- /dev/null +++ b/rss_generator.py @@ -0,0 +1,74 @@ +import os +from time import strftime, strptime, ctime +from siteconfig import siteconfig + + +class RSS_Item: + PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH'] + + class NotAFile(Exception): + def __init__(self, path: str): + self.path = path + self.message = f"{path} not a file" + super().__init__(self.message) + + def __init__(self, path: str): + if not os.path.isfile(path): + raise self.NotAFile(path) + + self.FULL_PATH = path + self.TITLE = path.rsplit('.', 1)[0].split('/')[-1] + self.DESCRIPTION = self.parse_file() + self.LAST_UPDATE = self.file_last_modified() + self.URI = self.get_uri() + self.LINK = siteconfig.rss_channel_config['LINK'] + self.URI + + def __str__(self): + return "".format( + self.FULL_PATH, self.TITLE, self.short_timestamp() + ) + + def short_timestamp(self): + return strftime("%Y-%m-%d %H:%M %z", strptime(self.LAST_UPDATE)) + + def parse_file(self): + """ + parse_file - reads the file at FULL_PATH and saves the content + from when the first

tag is hit up to and including the + closing

tag. Expects an HTML style file + """ + with open(self.FULL_PATH) as f: + in_body = False + paragraphs = 0 + description = "" + for line in f.readlines(): + if paragraphs >= self.PARAGRAPHS: + break + line = line.strip() + if line.startswith("

"): + in_body = True + if in_body: + description += line + if line.endswith("

"): + in_body = False + paragraphs += 1 + + return ''.join(description) + + def file_last_modified(self): + return ctime(os.stat(self.FULL_PATH).st_ctime) + + def get_uri(self): + return '/'.join(self.FULL_PATH.split('/')[2:]) + + +def get_rss_channel(): + items = [] + for root, dirs, files in os.walk(siteconfig.BASE_DIR): + for f in files: + path = os.path.join(root, f) + if ( + path.endswith(".html") or f.endswith(".html!") + ) and path not in siteconfig.RSS_OMIT: + items.append(RSS_Item(path)) + return items -- cgit v1.2.3