All pastes #960311 Raw Edit

learning pygr

public python v1 · immutable
#960311 ·published 2008-03-28 00:25 UTC
rendered paste body
import collectionsfrom pygr import seqdb, cnestedlist# gene name, chr, strand, [start0, stop0, start1, stop1 ...]feats ="""\gene_a,chr1,1,10,20,22,32,34,40,42,48gene_b,chr1,1,46,50,61,100gene_c,chr1,1,110,122,124,130"""# fake sequence.print >>open('/tmp/k.fa', 'w'), '>chr1' + '\n'  + 40 * 'a' + 40 * 'c' + 40 * 'g' + 40 * 't'sequence = seqdb.BlastDB('/tmp/k.fa')slice_dict=dict(name=0, id=1, orientation=2, start=3, stop=4)feat_dict = collections.defaultdict(dict)feat_dict['sequence'] = sequencefor feat in [f.split(",") for f in feats.split("\n")]:    feat[2:] = map(int, feat[2:])    for i, exon in enumerate(range(3, len(feat), 2)):        exon = feat[exon:exon + 2]        # exon.0, exon.1 ...        exon_name =  "exon.%i" % (i,)                                       #  name, chr, strand + start, stop        feat_dict[feat[0]][exon_name] = tuple(feat[:3] + exon)    feat_dict[feat[0]] = seqdb.AnnotationDB(feat_dict[feat[0]], sequence, sliceAttrDict=slice_dict)anno = cnestedlist.NLMSA('/tmp/k', 'w', seqdb.PrefixUnionDict(feat_dict), pairwiseMode=True)for k in feat_dict.keys():    if k == 'sequence': continue    for v in feat_dict[k].values():        anno.addAnnotation(v)anno.build(verbose=False)q = anno.seqDict.prefixDict['sequence']['chr1'][44:55]assert len(anno[q].items()) == 2gae3 = anno[q].items()[0]a3 = feat_dict['gene_a']['exon.3']b0 = feat_dict['gene_b']['exon.0']assert  42 == a3.sequence.start and  48 == a3.sequence.stopassert  46 == b0.sequence.start and  50 == b0.sequence.stopprint repr(a3.path), repr(b0.path)# prints annotgene_a.exon3[0:6] annotgene_b.exon0[0:4]# this fails without change to pygr code.assert a3.overlaps(b0)assert a3.sequence.overlaps(b0.sequence)