All pastes #1481049 Raw Edit

Splunk bucket .data file case-re

public python v1 · immutable
#1481049 ·published 2009-07-01 18:34 UTC
rendered paste body
#!/usr/bin/python""" splunk_datafile_restore_case.py:  Script to fix the case of entries in yourHosts.data, Sources.data, and SourceTypes.data files based on the known entriesin the top-level files of the same names.This script should be run from within a Splunk database bucket (a path namedlike "$SPLUNK_HOME/var/lib/splunk/<index>/db/db_<start>_<end>_<seq>").  Eachfile will be read and written out to a new file (ending in ".new") and youcan then choose to replace the originals if you like the results.This is most useful after a crash or something, or anytime you have to runSplunk's "recover-metadata" tool."""__author__ = "Lowell Alleman"class CaseRestoreTool:   def __init__(self):      self.samples = {}      self.rename_map = {}   def loadSample(self, name):      lname = name.lower()      if lname in self.samples:         if name != self.samples[lname]:            print "Found different entries for '%s': '%s' vs '%s'" % (lname, self.samples[lname], name)      else:         self.samples[lname] = name   def loadSamplesFromDataFile(self, path):      data = iter(open(path))      data.next() # skip index 0 (has no name)      samples = []      for line in data:         name = line.split("\t")[1]         self.loadSample(name)   def buildRenameMap(self):      """ build the internal rename map based on the the collected samples. """      # I was going to search for the most popular case variation, but didn't      # do code any of that logic yet....      self.rename_map = dict(self.samples)   def updateDataFile(self, fn):      stream = open(fn + ".new", "w")      for line in open(fn):         index, name, stats = line.split("\t", 2)         lname = name.lower()         if lname in self.rename_map:            name = self.rename_map[lname]            line = "\t".join([index, name, stats])         stream.write(line)      stream.close()      # Could rename here, if we wanted to:  os.unlink(fn); os.rename(fn + ".new", fn)if __name__ == "__main__":   crt = CaseRestoreTool()   # You can pull proper-case examples from other places if you would like.  Just add them here:   for x in ["../../db/Sources.data", "../../db/SourceTypes.data", "../../db/Hosts.data"]:      crt.loadSamplesFromDataFile(x)   crt.buildRenameMap()   crt.updateDataFile("Sources.data")   crt.updateDataFile("SourceTypes.data")   crt.updateDataFile("Hosts.data")