Source code for mrparse.mr_annotation

"""
Created on 23 Nov 2018

@author: jmht
"""
import copy


[docs]class AnnotationSymbol(object): def __init__(self, name=None, symbol=None, stype=None): __slots__ = ('name', 'symbol', 'stype', 'score', 'source') self.name = name self.symbol = symbol self.stype = stype self.score = None self.source = None def __eq__(self, other): if isinstance(other, self.__class__): # return self.__dict__ == other.__dict__ return other.stype == self.stype else: return False def __ne__(self, other): return not self.__eq__(other) def __str__(self): attrs = [k for k in self.__dict__.keys() if not k.startswith('_')] INDENT = " " out_str = "Class: {}\nData:\n".format(self.__class__) for a in sorted(attrs): out_str += INDENT + "{} : {}\n".format(a, self.__dict__[a]) return out_str
NULL_ANNOTATION = AnnotationSymbol() NULL_ANNOTATION.name ='null' NULL_ANNOTATION.symbol ='-' NULL_ANNOTATION.stype = 'null' NULL_ANNOTATION.score = 0.0 NULL_ANNOTATION.source = 'null'
[docs]class SequenceAnnotation(object): def __init__(self, null_symbol=NULL_ANNOTATION.symbol): __slots__ = ('source', 'scores', 'annotation', 'annotation_library', 'null_symbol') self.source = None self.scores = [] self.annotation = '' # list of annotation symbols self.annotation_library = dict() self.null_symbol = null_symbol
[docs] def add_annotation(self, annotation): if annotation != NULL_ANNOTATION: assert self.annotation_is_significant(annotation), "Cannot find: %s" % annotation self.annotation += annotation.symbol self.scores.append(annotation.score)
[docs] def annotation_is_significant(self, annotation): return annotation is not NULL_ANNOTATION and annotation in self.annotation_library.values()
[docs] def has_annotation_symbol(self, symbol): return symbol in self.annotation_library
[docs] def library_add_annotation(self, annotation): assert isinstance(annotation, AnnotationSymbol) annotation.source = self.source self.annotation_library[annotation.symbol] = annotation
def __getitem__(self, idx): symbol = self.annotation[idx] if self.has_annotation_symbol(symbol): a = copy.copy(self.annotation_library[symbol]) a.score = self.scores[idx] # elif symbol == self.null_symbol: else: a = copy.copy(NULL_ANNOTATION) a.symbol = self.null_symbol return a def __add__(self, other): if not isinstance(other, self.__class__): raise TypeError(other) assert len(self) == len(other) ca = self.__class__() # Class holding consensus annotation # Combine annotation_libraries - is it worth removing those entries that aren't # included in the consensus? ca.annotation_library = dict(self.annotation_library, **other.annotation_library) # For now just use prediction and leave probabiltiies for i in range(len(self)): if self.annotation_is_significant(self[i]) and other.annotation_is_significant(other[i]): ca.add_annotation(NULL_ANNOTATION) elif self.annotation_is_significant(self[i]): ca.add_annotation(self[i]) elif other.annotation_is_significant(other[i]): ca.add_annotation(other[i]) else: ca.add_annotation(NULL_ANNOTATION) return ca def __len__(self): return len(self.annotation) def __str__(self): attrs = [k for k in self.__dict__.keys() if not k.startswith('_')] INDENT = " " out_str = "Class: {}\nData:\n".format(self.__class__) for a in sorted(attrs): out_str += INDENT + "{} : {}\n".format(a, self.__dict__[a]) return out_str
[docs]class AnnotationChunk(object): def __init__(self, start=None, end=None, annotation=None): self.start = start self.end = end self.annotation = annotation def __str__(self): attrs = [k for k in self.__dict__.keys() if not k.startswith('_')] INDENT = " " out_str = "Class: {}\nData:\n".format(self.__class__) for a in sorted(attrs): out_str += INDENT + "{} : {}\n".format(a, self.__dict__[a]) return out_str
[docs]def get_annotation_chunks(annotation): if annotation is None: return None chunks = [] chunk = None for i, a in enumerate(annotation): if a != NULL_ANNOTATION: if not chunk: chunk = AnnotationChunk(start=i, annotation=a) elif chunk.annotation != a: chunk.end = i chunks.append(chunk) chunk = AnnotationChunk(start=i, annotation=a) else: if chunk: chunk.end = i chunks.append(chunk) chunk = None if chunk: chunk.end = i chunks.append(chunk) return chunks