Source code for mrparse.mr_region
"""
Created on 18 Oct 2018
@author: jmht
"""
from operator import attrgetter
from mrparse.mr_hit import sort_hits_by_size
[docs]class RegionData:
def __init__(self):
self.target_name = ""
self.index = 0
self.midpoint = 0
self.extent = 0
self.matches = []
@property
def start_stop(self):
assert self.midpoint >= 0 or self.extent >= 0, "Need non-zero midpoint and extent!"
half_len = int(self.extent / 2)
start = self.midpoint - half_len
stop = self.midpoint + half_len
return start, stop
@property
def id(self):
return self.index + 1
def __str__(self):
attrs = [k for k in self.__dict__.keys() if not k.startswith('_')]
INDENT = " "
out_str = "Class: {}\nData:\n".format(self.__class__)
for a in sorted(attrs):
out_str += INDENT + "{} : {}\n".format(a, self.__dict__[a])
return out_str
[docs]class RegionFinder(object):
def __init__(self):
pass
[docs] def find_regions_from_hits(self, hits, sort=True):
"""Figure out the regions for the target that have been matched"""
# Hits need to be sorted from smallest to largest or the domain finding won't work
if sort:
hits = sort_hits_by_size(hits, ascending=True)
target_regions = []
for hit in hits.values():
# print "CHECKING HIT %s %s %s" % (hit.name, hit.hit_extent, hit.hit_midpoint)
self.create_or_update_region(hit, target_regions)
if sort:
target_regions = self.sort_regions(target_regions)
return target_regions
[docs] def create_or_update_region(self, hit, target_regions):
for region in target_regions:
# print "Checking region %s %s %s" % (region.id, region.extent, region.midpoint)
if self.hit_within_region(hit, region):
# print "WITHIN"
return self.update_region(hit, region)
self.add_new_region(hit, target_regions)
return
[docs] def hit_within_region(self, hit, region, extentTolerance=50, midpointTolerance=20):
# print "e- %s e+ %s m- %s m+ %s" % (region.extent - extentTolerance,
# region.extent + extentTolerance,
# region.midpoint - midpointTolerance,
# region.midpoint + midpointTolerance)
if hit.query_extent >= region.extent - extentTolerance and \
hit.query_extent <= region.extent + extentTolerance and \
hit.query_midpoint >= region.midpoint - midpointTolerance and \
hit.query_midpoint <= region.midpoint + midpointTolerance:
return True
return False
[docs] def update_region(self, hit, region):
# Should we update the midpoint and extent of the region?
region.matches.append(hit)
hit.region = region
return
[docs] def add_new_region(self, hit, target_regions):
region = RegionData()
region.index = len(target_regions)
region.midpoint = hit.query_midpoint
region.extent = hit.query_extent
region.matches.append(hit)
target_regions.append(region)
hit.region = region
return target_regions
[docs] def sort_regions(self, regions, ascending=False):
reverse = not(ascending)
# Need to think about better ways of sorting - probably store reference to hit in region?
regions = sorted(regions, key=attrgetter('extent'), reverse=reverse)
# The matches and ranges also need to be sorted
for i, r in enumerate(regions):
r.index = i
r.matches.reverse()
return regions