aboutsummaryrefslogtreecommitdiffstats
path: root/rss_generator.py
blob: 4397a454af4e06c54ca56e70a28dfcd95b09b664 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
from time import strftime, strptime, ctime
from siteconfig import siteconfig


class RSS_Item:
    PARAGRAPHS = siteconfig.rss_channel_config['DESCRIPTION_LENGTH']

    class NotAFile(Exception):
        def __init__(self, path: str):
            self.path = path
            self.message = f"{path} not a file"
            super().__init__(self.message)

    def __init__(self, path: str):
        if not os.path.isfile(path):
            raise self.NotAFile(path)

        self.FULL_PATH = path
        self.TITLE = path.rsplit('.', 1)[0].split('/')[-1]
        self.DESCRIPTION = self.parse_file()
        self.LAST_UPDATE = self.file_last_modified()
        self.URI = self.get_uri()
        self.LINK = siteconfig.rss_channel_config['LINK'] + self.URI

    def __str__(self):
        return "<RSS_Item at {} - {}, {}>".format(
            self.FULL_PATH, self.TITLE, self.short_timestamp()
        )

    def short_timestamp(self):
        return strftime("%Y-%m-%d %H:%M %z", strptime(self.LAST_UPDATE))

    def parse_file(self):
        """
        parse_file - reads the file at FULL_PATH and saves the content
        from when the first <p> tag is hit up to and including the
        closing </p> tag. Expects an HTML style file
        """
        with open(self.FULL_PATH) as f:
            in_body = False
            paragraphs = 0
            description = ""
            for line in f.readlines():
                if paragraphs >= self.PARAGRAPHS:
                    break
                line = line.strip()
                if line.startswith("<p>"):
                    in_body = True
                if in_body:
                    description += line
                if line.endswith("</p>"):
                    in_body = False
                    paragraphs += 1

        return ''.join(description)

    def file_last_modified(self):
        return ctime(os.stat(self.FULL_PATH).st_ctime)

    def get_uri(self):
        return '/'.join(self.FULL_PATH.split('/')[2:])


def get_rss_channel():
    items = []
    for root, dirs, files in os.walk(siteconfig.BASE_DIR):
        for f in files:
            path = os.path.join(root, f)
            if (
                path.endswith(".html") or f.endswith(".html!")
            ) and path not in siteconfig.RSS_OMIT:
                items.append(RSS_Item(path))
    return items