genrss/genrss/__init__.py

140 lines
5.4 KiB
Python

import mimetypes
from lxml.etree import Element, CDATA, tostring
from typing import Optional, List, NoReturn
from datetime import datetime
from collections import namedtuple
import pytz
Enclosure = namedtuple('Enclosure', ('url', 'size', 'type'))
Enclosure.__new__.__defaults__ = (None, None, None)
RSS_DEFAULT_GENERATOR = f'Generated by genrss for python'
def create_element(name: str, text=None, children=None, **kwargs) -> Element:
el = Element(name, **kwargs)
if text:
if isinstance(text, datetime):
text = text.replace(tzinfo=pytz.timezone('GMT')). \
strftime("%a, %d %b %Y %H:%M:%S %Z")
el.text = text
elif isinstance(children, (list, tuple)):
for child in children:
el.append(child)
return el
class GenRSS:
def __init__(self, title: str, site_url: str, feed_url: str, **kwargs):
self.title: str = title
self.site_url: str = site_url
self.feed_url: str = feed_url
self.description: str = kwargs.pop('description', self.title)
self.image_url: Optional[str] = kwargs.pop('image_url', None)
self.author: Optional[str] = kwargs.pop('author', None)
self.pub_date: Optional[datetime] = kwargs.pop('pub_date', None)
self.copyright: Optional[str] = kwargs.pop('copyright', None)
self.language: Optional[str] = kwargs.pop('language', None)
self.editor: Optional[str] = kwargs.pop('editor', None)
self.webmaster: Optional[str] = kwargs.pop('webmaster', None)
self.docs_url: Optional[str] = kwargs.pop('docs_url', None)
self.categories: List[str] = kwargs.pop('categories', [])
self.items: List[Element] = []
self.generator = kwargs.pop('generator', RSS_DEFAULT_GENERATOR)
self.root_version = '2.0'
self.root_nsmap = {
'atom': 'http://www.w3.org/2005/Atom'
}
def item(self, title: str, **kwargs) -> NoReturn:
description: str = kwargs.pop('description', '')
url: Optional[str] = kwargs.pop('url', None)
guid: Optional[str] = kwargs.pop('guid', None)
author: Optional[str] = kwargs.pop('author', None)
categories: List[str] = kwargs.pop('categories', [])
enclosure: Optional[Enclosure] = kwargs.pop('enclosure', None)
pub_date: Optional[datetime] = kwargs.pop('pub_date', None)
item = create_element('item', children=[
create_element('title', CDATA(title)),
create_element('description', CDATA(description)),
])
if url:
item.append(create_element('link', url))
item.append(create_element(
'guid',
attrib={'isPermaLink': str(bool(not guid and url)).lower()},
text=(guid or url or CDATA(title))
))
if author or self.author:
if 'dc' not in self.root_nsmap:
self.root_nsmap['dc'] = 'http://purl.org/dc/elements/1.1/'
item.append(create_element(
'{http://purl.org/dc/elements/1.1/}creator',
CDATA(author or self.author)
))
for category in categories:
item.append(create_element('category', CDATA(category)))
if enclosure:
item.append(create_element(
'enclosure',
url=enclosure.url,
length=str(enclosure.size or 0),
type=enclosure.type or mimetypes.guess_type(enclosure.url)[0]
))
if pub_date:
item.append(create_element('pubDate', pub_date))
self.items.append(item)
def xml(self, pretty: bool = False) -> str:
root = Element('rss', nsmap=self.root_nsmap, version=self.root_version)
channel = create_element('channel', children=[
create_element('title', CDATA(self.title)),
create_element('description', CDATA(self.description)),
create_element('link', self.site_url),
create_element('{http://www.w3.org/2005/Atom}link',
href=self.feed_url, rel='self',
type='application/rss+xml'),
create_element('generator', self.generator),
create_element('lastBuildDate', datetime.utcnow())
])
if self.image_url:
channel.append(create_element('image', children=[
create_element('url', self.image_url),
create_element('title', CDATA(self.title)),
create_element('link', self.site_url)
]))
for category in self.categories:
channel.append(create_element('category', CDATA(category)))
if self.pub_date:
channel.append(create_element('pubDate', self.pub_date))
if self.copyright:
channel.append(create_element('copyright', CDATA(self.copyright)))
if self.language:
channel.append(create_element('language', CDATA(self.language)))
if self.editor:
channel.append(create_element('managingEditor', CDATA(self.editor)))
if self.webmaster:
channel.append(create_element('webMaster', CDATA(self.webmaster)))
if self.docs_url:
channel.append(create_element('docs', self.docs_url))
for item in self.items:
channel.append(item)
root.append(channel)
return '<?xml version="1.0" encoding="UTF-8"?>\n' \
+ tostring(root, pretty_print=pretty).decode('utf-8')