All pastes #2120178 Raw Edit

Something

public python v1 · immutable
#2120178 ·published 2012-02-20 18:57 UTC
rendered paste body
#!/usr/bin/env pythonimport cPickleimport csvimport collectionsimport sysimport reimport pdbSTATE_START, STATE_TIME, STATE_USER, STATE_QUERY_TIME, STATE_QUERY = range(5)re_time = re.compile(r'''^# Time: (\d+)\s+(\d+:\d+:\d+)$''')re_user = re.compile(r'''^# User@Host: ([a-zA-Z0-9-]+)\[([a-zA-Z0-9-]+)\] @ ([a-zA-Z0-9.]+) \[(.*)\]$''')re_query_time = re.compile(r'''^# Query_time:\s*(\d+)\s+Lock_time:\s*(\d+)\s+Rows_sent:\s*(\d+)\s+Rows_examined:\s*(\d+)$''')def parse_log(f):    state = STATE_START    r = None    for line in f:        try:            if line.startswith('# Time:'):                yield r                state = STATE_TIME                r = collections.OrderedDict()            elif line.startswith('# User@Host:'):               if r.has_key('user1'):                  yield r               state = STATE_USER            if state == STATE_TIME:                m = re_time.match(line)                r['time1'], r['time2'] = m.groups()                state = STATE_USER            elif state == STATE_USER:                m = re_user.match(line)                r['user1'], r['user2'], r['host1'], r['host2'] = m.groups()                state = STATE_QUERY_TIME            elif state == STATE_QUERY_TIME:                m = re_query_time.match(line)                r['query_time'], r['lock_time'], r['rows_sent'], r['rows_examined'] = m.groups()                state = STATE_QUERY                r['query'] = ''            elif state == STATE_QUERY:                r['query'] += line.rstrip() + ' '            elif state == STATE_START:                continue            else:                raise Exception('Invalid state')        except:            pdb.set_trace()    yield rif __name__ == '__main__':    with open(sys.argv[2], 'w') as fout:        csv_writer = csv.writer(fout)        csv_writer.writerow(('date', 'time', 'user1', 'user2', 'host', 'ip', 'query_time', 'lock_time', 'rows_sent', 'rows_examined', 'query'))        with open(sys.argv[1]) as fin:            for record in parse_log(fin):                if record:                    csv_writer.writerow(record.values())                #print record                #cPickle.dump(record, fout)