All pastes #2103043 Raw Edit

Untitled

public text v1 · immutable
#2103043 ·published 2012-01-14 17:34 UTC
rendered paste body
import sys
import subprocess
import collections

pipe = subprocess.Popen(['sort'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)

with sys.stdin as f:
        with open('/Users/John/Desktop/Genome/19/Chr19.John') as f: text = f.read()
        window = 20
        for x in xrange(len(text) - window):
                fragment = text[x:x+window]
                pipe.stdin.write(fragment + '\n')
                if x % 100000 == 0:
                        print x
pipe.stdin.close()

with open('UniqueFile', "w") as unique:
        with open('RepeatedFile', "w") as repeat:
                queue = collections.deque(['', ''])
                for line in pipe.stdout:
                        if line != queue[1]:
                                if queue[0] == queue[1]:
                                        repeat.write(queue[1] + ',')
                                else:
                                        unique.write(queue[1] + ',')
                        queue.append(line)
                        queue.popleft()

                if queue[0] == queue[1]:
                        repeat.write(queue[1])
                else:
                        unique.write(queue[1])