Adds RSS auto-generation for files in 'site'

This commit adds rss_generator.py which contains the main logic for indexing the site directory and generating a feed on startup. It serves as a sort of ad-hoc database which is accessed when /feed.xml is requested. Also corrects various typos, README nonsense, and expands the config options for RSS. Instances of './templates/site' have been replaced with the general BASE_DIR variable in the siteconfig.
author: mjfernez <mjfernez@gmail.com> 2021-10-14 20:14:53 -0400
committer: mjfernez <mjfernez@gmail.com> 2021-10-14 20:14:53 -0400
commit: c583a69362f86fcc8e1b35a45a06dd8377d6308f (patch)
tree: 30609b89aa2781c95fc5c04ff96db3ab063e16e7 /rss_generator.py
parent: f7668243b7a55d1f69d508b3baaf891055715f63 (diff)
download: ezcms-c583a69362f86fcc8e1b35a45a06dd8377d6308f.tar.gz
1 files changed, 74 insertions, 0 deletions
diff --git a/rss_generator.py b/rss_generator.py
new file mode 100644
index 0000000..4397a45
--- /dev/null
+++ b/rss_generator.py
@@ -0,0 +1,74 @@
+import os
+from time import strftime, strptime, ctime
+from siteconfig import siteconfig
+
+
+class RSS_Item:
+    PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH']
+
+    class NotAFile(Exception):
+        def __init__(self, path: str):
+            self.path = path
+            self.message = f"{path} not a file"
+            super().__init__(self.message)
+
+    def __init__(self, path: str):
+        if not os.path.isfile(path):
+            raise self.NotAFile(path)
+
+        self.FULL_PATH = path
+        self.TITLE = path.rsplit('.', 1)[0].split('/')[-1]
+        self.DESCRIPTION = self.parse_file()
+        self.LAST_UPDATE = self.file_last_modified()
+        self.URI = self.get_uri()
+        self.LINK = siteconfig.rss_channel_config['LINK'] + self.URI
+
+    def __str__(self):
+        return "<RSS_Item at {} - {}, {}>".format(
+            self.FULL_PATH, self.TITLE, self.short_timestamp()
+        )
+
+    def short_timestamp(self):
+        return strftime("%Y-%m-%d %H:%M %z", strptime(self.LAST_UPDATE))
+
+    def parse_file(self):
+        """
+        parse_file - reads the file at FULL_PATH and saves the content
+        from when the first <p> tag is hit up to and including the
+        closing </p> tag. Expects an HTML style file
+        """
+        with open(self.FULL_PATH) as f:
+            in_body = False
+            paragraphs = 0
+            description = ""
+            for line in f.readlines():
+                if paragraphs >= self.PARAGRAPHS:
+                    break
+                line = line.strip()
+                if line.startswith("<p>"):
+                    in_body = True
+                if in_body:
+                    description += line
+                if line.endswith("</p>"):
+                    in_body = False
+                    paragraphs += 1
+
+        return ''.join(description)
+
+    def file_last_modified(self):
+        return ctime(os.stat(self.FULL_PATH).st_ctime)
+
+    def get_uri(self):
+        return '/'.join(self.FULL_PATH.split('/')[2:])
+
+
+def get_rss_channel():
+    items = []
+    for root, dirs, files in os.walk(siteconfig.BASE_DIR):
+        for f in files:
+            path = os.path.join(root, f)
+            if (
+                path.endswith(".html") or f.endswith(".html!")
+            ) and path not in siteconfig.RSS_OMIT:
+                items.append(RSS_Item(path))
+    return items
author	mjfernez <mjfernez@gmail.com>	2021-10-14 20:14:53 -0400
committer	mjfernez <mjfernez@gmail.com>	2021-10-14 20:14:53 -0400
commit	c583a69362f86fcc8e1b35a45a06dd8377d6308f (patch)
tree	30609b89aa2781c95fc5c04ff96db3ab063e16e7 /rss_generator.py
parent	f7668243b7a55d1f69d508b3baaf891055715f63 (diff)
download	ezcms-c583a69362f86fcc8e1b35a45a06dd8377d6308f.tar.gz