#!/usr/bin/python""" splunk_datafile_restore_case.py: Script to fix the case of entries in yourHosts.data, Sources.data, and SourceTypes.data files based on the known entriesin the top-level files of the same names.This script should be run from within a Splunk database bucket (a path namedlike "$SPLUNK_HOME/var/lib/splunk/<index>/db/db_<start>_<end>_<seq>"). Eachfile will be read and written out to a new file (ending in ".new") and youcan then choose to replace the originals if you like the results.This is most useful after a crash or something, or anytime you have to runSplunk's "recover-metadata" tool."""__author__ = "Lowell Alleman"class CaseRestoreTool: def __init__(self): self.samples = {} self.rename_map = {} def loadSample(self, name): lname = name.lower() if lname in self.samples: if name != self.samples[lname]: print "Found different entries for '%s': '%s' vs '%s'" % (lname, self.samples[lname], name) else: self.samples[lname] = name def loadSamplesFromDataFile(self, path): data = iter(open(path)) data.next() # skip index 0 (has no name) samples = [] for line in data: name = line.split("\t")[1] self.loadSample(name) def buildRenameMap(self): """ build the internal rename map based on the the collected samples. """ # I was going to search for the most popular case variation, but didn't # do code any of that logic yet.... self.rename_map = dict(self.samples) def updateDataFile(self, fn): stream = open(fn + ".new", "w") for line in open(fn): index, name, stats = line.split("\t", 2) lname = name.lower() if lname in self.rename_map: name = self.rename_map[lname] line = "\t".join([index, name, stats]) stream.write(line) stream.close() # Could rename here, if we wanted to: os.unlink(fn); os.rename(fn + ".new", fn)if __name__ == "__main__": crt = CaseRestoreTool() # You can pull proper-case examples from other places if you would like. Just add them here: for x in ["../../db/Sources.data", "../../db/SourceTypes.data", "../../db/Hosts.data"]: crt.loadSamplesFromDataFile(x) crt.buildRenameMap() crt.updateDataFile("Sources.data") crt.updateDataFile("SourceTypes.data") crt.updateDataFile("Hosts.data")