diff options
author | mjfernez <mjfernez@gmail.com> | 2021-10-14 20:14:53 -0400 |
---|---|---|
committer | mjfernez <mjfernez@gmail.com> | 2021-10-14 20:14:53 -0400 |
commit | c583a69362f86fcc8e1b35a45a06dd8377d6308f (patch) | |
tree | 30609b89aa2781c95fc5c04ff96db3ab063e16e7 /rss_generator.py | |
parent | f7668243b7a55d1f69d508b3baaf891055715f63 (diff) | |
download | ezcms-c583a69362f86fcc8e1b35a45a06dd8377d6308f.tar.gz |
Adds RSS auto-generation for files in 'site'
This commit adds rss_generator.py which contains the main logic for
indexing the site directory and generating a feed on startup. It serves
as a sort of ad-hoc database which is accessed when /feed.xml is
requested.
Also corrects various typos, README nonsense, and expands the config
options for RSS. Instances of './templates/site' have been replaced with
the general BASE_DIR variable in the siteconfig.
Diffstat (limited to 'rss_generator.py')
-rw-r--r-- | rss_generator.py | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/rss_generator.py b/rss_generator.py new file mode 100644 index 0000000..4397a45 --- /dev/null +++ b/rss_generator.py @@ -0,0 +1,74 @@ +import os +from time import strftime, strptime, ctime +from siteconfig import siteconfig + + +class RSS_Item: + PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH'] + + class NotAFile(Exception): + def __init__(self, path: str): + self.path = path + self.message = f"{path} not a file" + super().__init__(self.message) + + def __init__(self, path: str): + if not os.path.isfile(path): + raise self.NotAFile(path) + + self.FULL_PATH = path + self.TITLE = path.rsplit('.', 1)[0].split('/')[-1] + self.DESCRIPTION = self.parse_file() + self.LAST_UPDATE = self.file_last_modified() + self.URI = self.get_uri() + self.LINK = siteconfig.rss_channel_config['LINK'] + self.URI + + def __str__(self): + return "<RSS_Item at {} - {}, {}>".format( + self.FULL_PATH, self.TITLE, self.short_timestamp() + ) + + def short_timestamp(self): + return strftime("%Y-%m-%d %H:%M %z", strptime(self.LAST_UPDATE)) + + def parse_file(self): + """ + parse_file - reads the file at FULL_PATH and saves the content + from when the first <p> tag is hit up to and including the + closing </p> tag. Expects an HTML style file + """ + with open(self.FULL_PATH) as f: + in_body = False + paragraphs = 0 + description = "" + for line in f.readlines(): + if paragraphs >= self.PARAGRAPHS: + break + line = line.strip() + if line.startswith("<p>"): + in_body = True + if in_body: + description += line + if line.endswith("</p>"): + in_body = False + paragraphs += 1 + + return ''.join(description) + + def file_last_modified(self): + return ctime(os.stat(self.FULL_PATH).st_ctime) + + def get_uri(self): + return '/'.join(self.FULL_PATH.split('/')[2:]) + + +def get_rss_channel(): + items = [] + for root, dirs, files in os.walk(siteconfig.BASE_DIR): + for f in files: + path = os.path.join(root, f) + if ( + path.endswith(".html") or f.endswith(".html!") + ) and path not in siteconfig.RSS_OMIT: + items.append(RSS_Item(path)) + return items |