Mine

public python v1 · immutable
#1876351
·published 2010-06-03 01:14 UTC
#!/usr/bin/env python## Copyright (c) 2010 Andrew Barr <andrew.james.barr@gmail.com>##    This program is free software; you can redistribute it and/or modify#    it under the terms of the GNU General Public License as published by#    the Free Software Foundation; version 3 of the License, or at your #    option, any later version.##    This program is distributed in the hope that it will be useful,#    but WITHOUT ANY WARRANTY; without even the implied warranty of#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the#    GNU General Public License for more details.import xml.dom.minidom, urllib2, string, sysprint "Content-Type: application/rss+xml"print "Status: 200 OK"print ""class NotModifiedHandler(urllib2.BaseHandler):      def http_error_304(self, req, fp, code, message, headers):        addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url())        addinfourl.code = code        return addinfourl        def get_feed():		theurl = 'http://feeds.gawker.com/gawker/vip'	txdata = None	txheaders = {'User-agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1) Gecko/20090423 Firefox/3.5.5'}		feed = urllib2.Request(theurl, txdata, txheaders)	try:		saved_etag = open('/var/lib/rssfilt/etag/gawker.txt', 'r').read()	except IOError:		saved_etag = None		if(saved_etag is not None):		# put it into if-none-modified header		feed.add_header('If-None-Match', saved_etag)					opener = urllib2.build_opener(NotModifiedHandler())	feed_stream = opener.open(feed)	headers = feed_stream.info()		if hasattr(feed_stream, 'code') and feed_stream.code == 304:		return open('/var/lib/rssfilt/cache/gawker.xml', 'r')	else:		# save etag and feed to disk		etag = feed_stream.headers.get('ETag')		open('/var/lib/rssfilt/etag/gawker.txt', 'w+', 0600).write(etag)		open('/var/lib/rssfilt/cache/gawker.xml', 'w+', 0600).write(feed_stream.read())						return open('/var/lib/rssfilt/cache/gawker.xml', 'rU')feed = get_feed()dom = xml.dom.minidom.parse(feed)# only print one authorfor author in dom.getElementsByTagName('dc:creator'):	newell = '<dc:creator><![CDATA[Jim Newell]]></dc:creator>'	if(author.toxml() != newell):		item = author.parentNode		item.parentNode.removeChild(item)print dom.toxml("utf-8")