All pastes #1951930 Raw Edit

Stuff

public text v1 · immutable
#1951930 ·published 2010-09-30 16:45 UTC
rendered paste body
# -*- coding: utf-8  -*-

import hashlib
import sys
import time
import xmlreader
import socket

filename = sys.argv[1]
xml = xmlreader.XmlDump(filename, allrevisions=True)

limit = 10000
c = 0
t1=time.time()
tt=time.time()
f=open('%s.parsed' % filename.split('.xml.7z')[0], 'w')
for x in xml.parse():
	rev = [x.title, x.id, x.username, x.timestamp, x.revisionid]
	
	try:
		output = '\t'.join(rev) + '\n'
		f.write(output.encode('utf-8'))
	except:
		print rev
	
	c += 1
	if c % limit == 0:
		print limit/(time.time()-t1), 'ed/s'
		t1=time.time()
f.close()

print 'Total time', time.time()-tt, 'seg', (time.time()-tt)/60.0, 'min'