#!/usr/bin/env python## Copyright (c) 2010 Andrew Barr <andrew.james.barr@gmail.com>## This program is free software; you can redistribute it and/or modify# it under the terms of the GNU General Public License as published by# the Free Software Foundation; version 3 of the License, or at your # option, any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.import xml.dom.minidom, urllib2, string, sysprint "Content-Type: application/rss+xml"print "Status: 200 OK"print ""class NotModifiedHandler(urllib2.BaseHandler): def http_error_304(self, req, fp, code, message, headers): addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url()) addinfourl.code = code return addinfourldef get_feed(): theurl = 'http://feeds.gawker.com/gawker/vip' txdata = None txheaders = {'User-agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1) Gecko/20090423 Firefox/3.5.5'} feed = urllib2.Request(theurl, txdata, txheaders) try: saved_etag = open('/var/lib/rssfilt/etag/gawker.txt', 'r').read() except IOError: saved_etag = None if(saved_etag is not None): # put it into if-none-modified header feed.add_header('If-None-Match', saved_etag) opener = urllib2.build_opener(NotModifiedHandler()) feed_stream = opener.open(feed) headers = feed_stream.info() if hasattr(feed_stream, 'code') and feed_stream.code == 304: return open('/var/lib/rssfilt/cache/gawker.xml', 'r') else: # save etag and feed to disk etag = feed_stream.headers.get('ETag') open('/var/lib/rssfilt/etag/gawker.txt', 'w+', 0600).write(etag) open('/var/lib/rssfilt/cache/gawker.xml', 'w+', 0600).write(feed_stream.read()) return open('/var/lib/rssfilt/cache/gawker.xml', 'rU')feed = get_feed()dom = xml.dom.minidom.parse(feed)# only print one authorfor author in dom.getElementsByTagName('dc:creator'): newell = '<dc:creator><![CDATA[Jim Newell]]></dc:creator>' if(author.toxml() != newell): item = author.parentNode item.parentNode.removeChild(item)print dom.toxml("utf-8")