genrss/genrss/__init__.py

import mimetypes
from lxml.etree import Element, CDATA, tostring
from typing import Optional, List, TypeVar, Dict, Any
from datetime import datetime
from collections import namedtuple

import pytz

__all__ = ('GenRSS', 'Enclosure',)

ElementT = TypeVar('ElementT')

Enclosure = namedtuple('Enclosure', ('url', 'size', 'type'))
Enclosure.__new__.__defaults__ = (None, None, None)
Enclosure.__doc__ = 'Creates information for enclosure tag.'

RSS_DEFAULT_GENERATOR = f'Generated by genrss for python'


def create_element(name: str, text: Any = None, children: List[ElementT] = None,
                   **kwargs) -> ElementT:
    """Creates xml node with text or children elements.

    :param name: Tag name of node with namespace
    :param text: Text of node
    :param children: Appends elements as child nodes
    """
    el = Element(name, **kwargs)
    if text:
        if isinstance(text, datetime):
            text = text.replace(tzinfo=pytz.timezone('GMT')). \
                strftime("%a, %d %b %Y %H:%M:%S %Z")
        el.text = text
    elif isinstance(children, (list, tuple)):
        for child in children:
            el.append(child)
    return el


class GenRSS:
    """Generates RSS feed of channel.

    :param title: Title of your site or feed
    :param site_url: Absolute url to the site that the feed is for
    :param feed_url: Absolute url to the rss feed
    :param description: A short description of feed
    :param image_url: Image absolute url for channel
    :param author: Author of channel
    :param pub_date: Datetime in utc when last item was published
    :param copyright: Copyright information for this feed
    :param language: The language of the content of this feed.
    :param editor: Who manages content in this feed
    :param webmaster: Who manages feed availability and technical support
    :param generator: Feed generator
    """

    def __init__(self, title: str, site_url: str, feed_url: str, **kwargs):
        self.title: str = title
        self.site_url: str = site_url
        self.feed_url: str = feed_url
        self.description: str = kwargs.pop('description', self.title)
        self.image_url: Optional[str] = kwargs.pop('image_url', None)
        self.author: Optional[str] = kwargs.pop('author', None)
        self.pub_date: Optional[datetime] = kwargs.pop('pub_date', None)
        self.copyright: Optional[str] = kwargs.pop('copyright', None)
        self.language: Optional[str] = kwargs.pop('language', None)
        self.editor: Optional[str] = kwargs.pop('editor', None)
        self.webmaster: Optional[str] = kwargs.pop('webmaster', None)
        self.docs_url: Optional[str] = kwargs.pop('docs_url', None)
        self.categories: List[str] = kwargs.pop('categories', [])

        self.items: List[Element] = []
        self.generator: str = kwargs.pop('generator', RSS_DEFAULT_GENERATOR)
        self.root_version: str = '2.0'
        self.root_nsmap: Dict[str, str] = {
            'atom': 'http://www.w3.org/2005/Atom'
        }

    def item(self, title: str, **kwargs):
        """Adds item to the feed.

        An item can be used for recipes, blog entries, project update, log
        entry, etc. Your RSS feed can have any number of items.

        :param title: Title of this particular item
        :param description: Content for the item. Can contain html but
            link and image urls must be absolute path including hostname
        :param url: Url to the item. This could be a blog entry
        :param guid: A unique string feed readers use to know if an item
            is new or has already been seen. If you use a guid never change
            it. If you don't provide a guid then your item urls must be unique
        :param author: If included it is the name of the item's creator.
            If not provided the item author will be the same as the feed
            author. This is typical except on multi-author blogs
        :param categories: If provided, each array item will be added as a
            category element
        :param enclosure: An enclosure object
        :param pub_date: The date and time of when the item was created.
            Feed readers use this to determine the sort order. Some readers
            will also use it to determine if the content should be presented
            as unread
        """
        description: str = kwargs.pop('description', '')
        url: Optional[str] = kwargs.pop('url', None)
        guid: Optional[str] = kwargs.pop('guid', None)
        author: Optional[str] = kwargs.pop('author', None)
        categories: List[str] = kwargs.pop('categories', [])
        enclosure: Optional[Enclosure] = kwargs.pop('enclosure', None)
        pub_date: Optional[datetime] = kwargs.pop('pub_date', None)

        item = create_element('item', children=[
            create_element('title', CDATA(title)),
            create_element('description', CDATA(description)),
        ])

        if url:
            item.append(create_element('link', url))

        item.append(create_element(
            'guid',
            attrib={'isPermaLink': str(bool(not guid and url)).lower()},
            text=(guid or url or CDATA(title))
        ))

        if author or self.author:
            if 'dc' not in self.root_nsmap:
                self.root_nsmap['dc'] = 'http://purl.org/dc/elements/1.1/'

            item.append(create_element(
                '{http://purl.org/dc/elements/1.1/}creator',
                CDATA(author or self.author)
            ))

        for category in categories:
            item.append(create_element('category', CDATA(category)))

        if enclosure:
            item.append(create_element(
                'enclosure',
                url=enclosure.url,
                length=str(enclosure.size or 0),
                type=enclosure.type or mimetypes.guess_type(enclosure.url)[0]
            ))

        if pub_date:
            item.append(create_element('pubDate', pub_date))

        self.items.append(item)

    def xml(self, pretty: bool = False) -> str:
        """Returns the XML as a string.

        :param pretty: Pretty print xml
        """
        root = Element('rss', nsmap=self.root_nsmap, version=self.root_version)
        channel = create_element('channel', children=[
            create_element('title', CDATA(self.title)),
            create_element('description', CDATA(self.description)),
            create_element('link', self.site_url),
            create_element('{http://www.w3.org/2005/Atom}link',
                           href=self.feed_url, rel='self',
                           type='application/rss+xml'),
            create_element('generator', self.generator),
            create_element('lastBuildDate', datetime.utcnow())
        ])

        if self.image_url:
            channel.append(create_element('image', children=[
                create_element('url', self.image_url),
                create_element('title', CDATA(self.title)),
                create_element('link', self.site_url)
            ]))
        for category in self.categories:
            channel.append(create_element('category', CDATA(category)))
        if self.pub_date:
            channel.append(create_element('pubDate', self.pub_date))
        if self.copyright:
            channel.append(create_element('copyright', CDATA(self.copyright)))
        if self.language:
            channel.append(create_element('language', CDATA(self.language)))
        if self.editor:
            channel.append(create_element('managingEditor', CDATA(self.editor)))
        if self.webmaster:
            channel.append(create_element('webMaster', CDATA(self.webmaster)))
        if self.docs_url:
            channel.append(create_element('docs', self.docs_url))

        for item in self.items:
            channel.append(item)

        root.append(channel)

        return '<?xml version="1.0" encoding="UTF-8"?>\n' \
               + ('\n' if pretty else '') \
               + tostring(root, pretty_print=pretty).decode('utf-8')