rendered paste body#!/usr/bin/env pythonimport cPickleimport csvimport collectionsimport sysimport reimport pdbSTATE_START, STATE_TIME, STATE_USER, STATE_QUERY_TIME, STATE_QUERY = range(5)re_time = re.compile(r'''^# Time: (\d+)\s+(\d+:\d+:\d+)$''')re_user = re.compile(r'''^# User@Host: ([a-zA-Z0-9-]+)\[([a-zA-Z0-9-]+)\] @ ([a-zA-Z0-9.]+) \[(.*)\]$''')re_query_time = re.compile(r'''^# Query_time:\s*(\d+)\s+Lock_time:\s*(\d+)\s+Rows_sent:\s*(\d+)\s+Rows_examined:\s*(\d+)$''')def parse_log(f): state = STATE_START r = None for line in f: try: if line.startswith('# Time:'): yield r state = STATE_TIME r = collections.OrderedDict() elif line.startswith('# User@Host:'): if r.has_key('user1'): yield r state = STATE_USER if state == STATE_TIME: m = re_time.match(line) r['time1'], r['time2'] = m.groups() state = STATE_USER elif state == STATE_USER: m = re_user.match(line) r['user1'], r['user2'], r['host1'], r['host2'] = m.groups() state = STATE_QUERY_TIME elif state == STATE_QUERY_TIME: m = re_query_time.match(line) r['query_time'], r['lock_time'], r['rows_sent'], r['rows_examined'] = m.groups() state = STATE_QUERY r['query'] = '' elif state == STATE_QUERY: r['query'] += line.rstrip() + ' ' elif state == STATE_START: continue else: raise Exception('Invalid state') except: pdb.set_trace() yield rif __name__ == '__main__': with open(sys.argv[2], 'w') as fout: csv_writer = csv.writer(fout) csv_writer.writerow(('date', 'time', 'user1', 'user2', 'host', 'ip', 'query_time', 'lock_time', 'rows_sent', 'rows_examined', 'query')) with open(sys.argv[1]) as fin: for record in parse_log(fin): if record: csv_writer.writerow(record.values()) #print record #cPickle.dump(record, fout)