1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
import os
from time import strftime, strptime, ctime
from siteconfig import siteconfig
from view_functions import is_hidden_path
class RSS_Item:
"""
RSS_Item - a (very) basic implementation of an object in an RSS
feed using only essential parameters as specified in:
https://www.rssboard.org/rss-specification#hrelementsOfLtitemgt
Item data is generated from a given file path
"""
PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH']
class NotAFile(Exception):
"""
Throws an exception if an RSS_Item is made out of a
directory or invalid file
"""
def __init__(self, path: str):
self.path = path
self.message = f"{path} not a file"
super().__init__(self.message)
def __init__(self, path: str):
if not os.path.isfile(path):
raise self.NotAFile(path)
self.FULL_PATH = path
self.TITLE = path.rsplit('.', 1)[0].split('/')[-1]
self.FILE_TYPE = path.rsplit('.', 1)[1]
self.DESCRIPTION = self.parse_file()
self.LAST_UPDATE = self.file_last_modified()
self.URI = self.get_uri()
self.LINK = siteconfig.rss_channel_config['LINK'] + self.URI
def __str__(self):
return "<RSS_Item at {} - {}, {}>".format(
self.FULL_PATH, self.TITLE, self.short_timestamp()
)
def short_timestamp(self):
return strftime("%Y-%m-%d %H:%M %z", strptime(self.LAST_UPDATE))
def parse_file(self):
"""
parse_file - reads the file at FULL_PATH and saves the content
from when the first <p> tag is hit up to and including the
closing </p> tag. Other files are interpreted as text files
and, just reads the first 3 paragraphs (two new lines in a row)
"""
with open(self.FULL_PATH) as f:
in_body = False
paragraphs = 0
description = ""
for line in f.readlines():
if paragraphs >= self.PARAGRAPHS:
break
line = line.strip()
if self.FILE_TYPE in ['html', 'html!']:
if line.startswith("<p>"):
in_body = True
if in_body:
description += line
if line.endswith("</p>"):
in_body = False
paragraphs += 1
else:
description += line
# remember, we stripped the line
if line == '':
paragraphs += 1
return ''.join(description)
def file_last_modified(self):
return ctime(os.stat(self.FULL_PATH).st_ctime)
def get_uri(self):
# return everything after "./templates/"
return '/'.join(self.FULL_PATH.split('/')[2:])
def get_rss_channel():
"""
get_rss_channel - list all files from the BASE_DIR, and if allowed,
add them as RSS_Items to populate feed.xml. Called by feed.xml view
"""
items = []
extensions = siteconfig.rss_channel_config['RSS_FILE_EXT']
for root, dirs, files in os.walk(siteconfig.BASE_DIR):
for f in files:
# remember, path will be like "./templates/site/..."
path = os.path.join(root, f)
if (
path.split(".")[-1] in extensions
and path not in siteconfig.RSS_OMIT
and not is_hidden_path(path.split('.', 1)[1])
):
items.append(RSS_Item(path))
return items
|