rendered paste body# -*- coding: utf-8 -*-
import hashlib
import sys
import time
import xmlreader
import socket
filename = sys.argv[1]
xml = xmlreader.XmlDump(filename, allrevisions=True)
limit = 10000
c = 0
t1=time.time()
tt=time.time()
f=open('%s.parsed' % filename.split('.xml.7z')[0], 'w')
for x in xml.parse():
rev = [x.title, x.id, x.username, x.timestamp, x.revisionid]
try:
output = '\t'.join(rev) + '\n'
f.write(output.encode('utf-8'))
except:
print rev
c += 1
if c % limit == 0:
print limit/(time.time()-t1), 'ed/s'
t1=time.time()
f.close()
print 'Total time', time.time()-tt, 'seg', (time.time()-tt)/60.0, 'min'