genrss/genrss/__init__.py

196 lines
7.8 KiB
Python

import mimetypes
from lxml.etree import Element, CDATA, tostring
from typing import Optional, List, TypeVar, Dict, Any
from datetime import datetime
from collections import namedtuple
import pytz
__all__ = ('GenRSS', 'Enclosure',)
ElementT = TypeVar('ElementT')
Enclosure = namedtuple('Enclosure', ('url', 'size', 'type'))
Enclosure.__new__.__defaults__ = (None, None, None)
Enclosure.__doc__ = 'Creates information for enclosure tag.'
RSS_DEFAULT_GENERATOR = f'Generated by genrss for python'
def create_element(name: str, text: Any = None, children: List[ElementT] = None,
**kwargs) -> ElementT:
"""Creates xml node with text or children elements.
:param name: Tag name of node with namespace
:param text: Text of node
:param children: Appends elements as child nodes
"""
el = Element(name, **kwargs)
if text:
if isinstance(text, datetime):
text = text.replace(tzinfo=pytz.timezone('GMT')). \
strftime("%a, %d %b %Y %H:%M:%S %Z")
el.text = text
elif isinstance(children, (list, tuple)):
for child in children:
el.append(child)
return el
class GenRSS:
"""Generates RSS feed of channel.
:param title: Title of your site or feed
:param site_url: Absolute url to the site that the feed is for
:param feed_url: Absolute url to the rss feed
:param description: A short description of feed
:param image_url: Image absolute url for channel
:param author: Author of channel
:param pub_date: Datetime in utc when last item was published
:param copyright: Copyright information for this feed
:param language: The language of the content of this feed.
:param editor: Who manages content in this feed
:param webmaster: Who manages feed availability and technical support
:param generator: Feed generator
"""
def __init__(self, title: str, site_url: str, feed_url: str, **kwargs):
self.title: str = title
self.site_url: str = site_url
self.feed_url: str = feed_url
self.description: str = kwargs.pop('description', self.title)
self.image_url: Optional[str] = kwargs.pop('image_url', None)
self.author: Optional[str] = kwargs.pop('author', None)
self.pub_date: Optional[datetime] = kwargs.pop('pub_date', None)
self.copyright: Optional[str] = kwargs.pop('copyright', None)
self.language: Optional[str] = kwargs.pop('language', None)
self.editor: Optional[str] = kwargs.pop('editor', None)
self.webmaster: Optional[str] = kwargs.pop('webmaster', None)
self.docs_url: Optional[str] = kwargs.pop('docs_url', None)
self.categories: List[str] = kwargs.pop('categories', [])
self.items: List[Element] = []
self.generator: str = kwargs.pop('generator', RSS_DEFAULT_GENERATOR)
self.root_version: str = '2.0'
self.root_nsmap: Dict[str, str] = {
'atom': 'http://www.w3.org/2005/Atom'
}
def item(self, title: str, **kwargs):
"""Adds item to the feed.
An item can be used for recipes, blog entries, project update, log
entry, etc. Your RSS feed can have any number of items.
:param title: Title of this particular item
:param description: Content for the item. Can contain html but
link and image urls must be absolute path including hostname
:param url: Url to the item. This could be a blog entry
:param guid: A unique string feed readers use to know if an item
is new or has already been seen. If you use a guid never change
it. If you don't provide a guid then your item urls must be unique
:param author: If included it is the name of the item's creator.
If not provided the item author will be the same as the feed
author. This is typical except on multi-author blogs
:param categories: If provided, each array item will be added as a
category element
:param enclosure: An enclosure object
:param pub_date: The date and time of when the item was created.
Feed readers use this to determine the sort order. Some readers
will also use it to determine if the content should be presented
as unread
"""
description: str = kwargs.pop('description', '')
url: Optional[str] = kwargs.pop('url', None)
guid: Optional[str] = kwargs.pop('guid', None)
author: Optional[str] = kwargs.pop('author', None)
categories: List[str] = kwargs.pop('categories', [])
enclosure: Optional[Enclosure] = kwargs.pop('enclosure', None)
pub_date: Optional[datetime] = kwargs.pop('pub_date', None)
item = create_element('item', children=[
create_element('title', CDATA(title)),
create_element('description', CDATA(description)),
])
if url:
item.append(create_element('link', url))
item.append(create_element(
'guid',
attrib={'isPermaLink': str(bool(not guid and url)).lower()},
text=(guid or url or CDATA(title))
))
if author or self.author:
if 'dc' not in self.root_nsmap:
self.root_nsmap['dc'] = 'http://purl.org/dc/elements/1.1/'
item.append(create_element(
'{http://purl.org/dc/elements/1.1/}creator',
CDATA(author or self.author)
))
for category in categories:
item.append(create_element('category', CDATA(category)))
if enclosure:
item.append(create_element(
'enclosure',
url=enclosure.url,
length=str(enclosure.size or 0),
type=enclosure.type or mimetypes.guess_type(enclosure.url)[0]
))
if pub_date:
item.append(create_element('pubDate', pub_date))
self.items.append(item)
def xml(self, pretty: bool = False) -> str:
"""Returns the XML as a string.
:param pretty: Pretty print xml
"""
root = Element('rss', nsmap=self.root_nsmap, version=self.root_version)
channel = create_element('channel', children=[
create_element('title', CDATA(self.title)),
create_element('description', CDATA(self.description)),
create_element('link', self.site_url),
create_element('{http://www.w3.org/2005/Atom}link',
href=self.feed_url, rel='self',
type='application/rss+xml'),
create_element('generator', self.generator),
create_element('lastBuildDate', datetime.utcnow())
])
if self.image_url:
channel.append(create_element('image', children=[
create_element('url', self.image_url),
create_element('title', CDATA(self.title)),
create_element('link', self.site_url)
]))
for category in self.categories:
channel.append(create_element('category', CDATA(category)))
if self.pub_date:
channel.append(create_element('pubDate', self.pub_date))
if self.copyright:
channel.append(create_element('copyright', CDATA(self.copyright)))
if self.language:
channel.append(create_element('language', CDATA(self.language)))
if self.editor:
channel.append(create_element('managingEditor', CDATA(self.editor)))
if self.webmaster:
channel.append(create_element('webMaster', CDATA(self.webmaster)))
if self.docs_url:
channel.append(create_element('docs', self.docs_url))
for item in self.items:
channel.append(item)
root.append(channel)
return '<?xml version="1.0" encoding="UTF-8"?>\n' \
+ ('\n' if pretty else '') \
+ tostring(root, pretty_print=pretty).decode('utf-8')