import os from time import strftime, strptime, ctime from siteconfig import siteconfig from view_functions import is_hidden_path class RSS_Item: """ RSS_Item - a (very) basic implementation of an object in an RSS feed using only essential parameters as specified in: https://www.rssboard.org/rss-specification#hrelementsOfLtitemgt Item data is generated from a given file path """ PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH'] class NotAFile(Exception): """ Throws an exception if an RSS_Item is made out of a directory or invalid file """ def __init__(self, path: str): self.path = path self.message = f"{path} not a file" super().__init__(self.message) def __init__(self, path: str): if not os.path.isfile(path): raise self.NotAFile(path) self.FULL_PATH = path self.TITLE = path.rsplit('.', 1)[0].split('/')[-1] self.FILE_TYPE = path.rsplit('.', 1)[1] self.DESCRIPTION = self.parse_file() self.LAST_UPDATE = self.file_last_modified() self.URI = self.get_uri() self.LINK = siteconfig.rss_channel_config['LINK'] + self.URI def __str__(self): return "".format( self.FULL_PATH, self.TITLE, self.short_timestamp() ) def short_timestamp(self): return strftime("%Y-%m-%d %H:%M %z", strptime(self.LAST_UPDATE)) def parse_file(self): """ parse_file - reads the file at FULL_PATH and saves the content from when the first

tag is hit up to and including the closing

tag. Other files are interpreted as text files and, just reads the first 3 paragraphs (two new lines in a row) """ with open(self.FULL_PATH) as f: in_body = False paragraphs = 0 description = "" for line in f.readlines(): if paragraphs >= self.PARAGRAPHS: break line = line.strip() if self.FILE_TYPE in ['html', 'html!']: if line.startswith("

"): in_body = True if in_body: description += line if line.endswith("

"): in_body = False paragraphs += 1 else: description += line # remember, we stripped the line if line == '': paragraphs += 1 return ''.join(description) def file_last_modified(self): return ctime(os.stat(self.FULL_PATH).st_ctime) def get_uri(self): # return everything after "./templates/" return '/'.join(self.FULL_PATH.split('/')[2:]) def get_rss_channel(): """ get_rss_channel - list all files from the BASE_DIR, and if allowed, add them as RSS_Items to populate feed.xml. Called by feed.xml view """ items = [] extensions = siteconfig.rss_channel_config['RSS_FILE_EXT'] for root, dirs, files in os.walk(siteconfig.BASE_DIR): for f in files: # remember, path will be like "./templates/site/..." path = os.path.join(root, f) if ( path.split(".")[-1] in extensions and path not in siteconfig.RSS_OMIT and not is_hidden_path(path.split('.', 1)[1]) ): items.append(RSS_Item(path)) return items