Source code for mrparse.mr_hkl

"""
Created on 18 Oct 2018

@author: jmht
"""
import os

from ample.util.ample_util import filename_append
from cctbx.crystal import symmetry
from mmtbx.scaling.matthews import matthews_rupp
from mrbump.ccp4.MRBUMP_ctruncate import Ctruncate

from iotbx import reflection_file_reader
from iotbx.reflection_file_utils import looks_like_r_free_flags_info


[docs]class GetLabels(object):
    """Class to get the column labels for input mtz file
    Attributes
    ----------
    f : str
        f column label
    sigf : str
        fp column label
    i : str
        i column label
    sigi : str
        sigi column label
    fplus : str
        f(+) column label
    sigfplus : str
        sigf(+) column label
    fminus : str
        f(-) column label
    sigfminus : str
        sigf(-) column label
    iplus : str
        i(+) column label
    sigiplus : str
        sigi{+} column label
    iminus : str
        i(-) column label
    sigiminus : str
        sigi(-) column label
    dano : str
        dano column label
    sigdano : str
        sigdano column label
    free : str
        free column label
    """

    def __init__(self, mtz_file):
        self.f = None
        self.sigf = None
        self.i = None
        self.sigi = None
        self.fplus = None
        self.sigfplus = None
        self.fminus = None
        self.sigfminus = None
        self.iplus = None
        self.sigiplus = None
        self.iminus = None
        self.sigiminus = None
        self.dano = None
        self.sigdano = None
        self.free = None

        self.run(mtz_file)

[docs]    def run(self, mtz_file):
        reflection_file = reflection_file_reader.any_reflection_file(file_name=mtz_file)
        if not reflection_file.file_type() == "ccp4_mtz":
            msg = "File is not of type ccp4_mtz: {0}".format(mtz_file)
            LOG.critical(msg)
            raise RuntimeError(msg)

        miller_arrays = reflection_file.as_miller_arrays()

        for m_a in miller_arrays:
            if looks_like_r_free_flags_info(m_a.info()) and not self.free:
                self.free = m_a.info().labels[0]
            elif self.check_anomalous(m_a):
                if self.check_for_dano_labels(m_a):
                    if len(m_a.info().labels) == 5:
                        self.f, self.sigf, self.dano, self.sigdano, isym = m_a.info().labels
                    elif len(m_a.info().labels) == 4:
                        self.f, self.sigf, self.dano, self.sigdano = m_a.info().labels
                    elif len(m_a.info().labels) == 2:
                        self.dano, self.sigdano = m_a.info().labels
                    else:
                        LOG.debug("Unexpected number of columns found in anomalous miller array")
                elif self.check_for_plus_minus_labels(m_a):
                    if m_a.is_xray_amplitude_array():
                        self.fplus, self.sigfplus, self.fminus, self.sigfminus = m_a.info().labels
                    elif m_a.is_xray_intensity_array():
                        self.iplus, self.sigiplus, self.iminus, self.sigiminus = m_a.info().labels
                    else:
                        LOG.debug("Type of anomalous miller array unknown")
                else:
                    LOG.debug("Type of anomalous miller array unknown")
            elif m_a.is_xray_intensity_array() and len(m_a.info().labels) == 2 and not self.i:
                self.i, self.sigi = m_a.info().labels
            elif m_a.is_xray_amplitude_array() and len(m_a.info().labels) == 2 and not self.f:
                self.f, self.sigf = m_a.info().labels

[docs]    def check_anomalous(self, miller_array):
        if miller_array.anomalous_flag():
            return True
        elif miller_array.info().type_hints_from_file == "anomalous_difference":
            return True
        # Check for anomalous miller arrays which aren't properly labeled
        elif self.check_for_dano_labels(miller_array):
            return True
        elif self.check_for_plus_minus_labels(miller_array):
            return True
        return False

[docs]    @staticmethod
    def check_for_dano_labels(miller_array):
        return any(["DANO" in i.upper() or "DP" == i.upper() for i in miller_array.info().labels])

[docs]    @staticmethod
    def check_for_plus_minus_labels(miller_array):
        return any(["(+)" in i for i in miller_array.info().labels])


[docs]def crystal_data(mtz_file):
    """Set crystallographic parameters from mtz file
    Parameters
    ----------
    mtz_file : str
       The path to the mtz file
    Returns
    -------
    space_group : str
       The space group
    resolution : str
       The resolution
    cell_parameters : tuple
       The cell parameters
    """

    reflection_file = reflection_file_reader.any_reflection_file(file_name=mtz_file)
    content = reflection_file.file_content()
    space_group = content.space_group_name().replace(" ", "")
    resolution = content.max_min_resolution()[1]
    cell_parameters = content.crystals()[0].unit_cell_parameters()
    return space_group, resolution, cell_parameters


[docs]class HklInfo(object):
    def __init__(self, hklin, seq_info=None):
        self.hklin = hklin
        self.seq_info = seq_info
        if not os.path.isfile(hklin):
            raise RuntimeError("Cannot find hklin file: %s" % hklin)
        self.name = os.path.splitext(os.path.basename(hklin))[0]
        self.labels = GetLabels(hklin)
        self.space_group, self.resolution, self.cell_parameters = crystal_data(self.hklin)
        self.predicted_solvent_content = None
        self.predicted_ncopies = None
        self.molecular_weight = None
        self.has_ncs = False
        self.has_twinning = False
        self.has_anisotropy = False
        if self.seq_info:
            self.molecular_weight = self.seq_info.molecular_weight
            self.calculate_matthews_probabilties()
    
    def __call__(self):
        """Required so that we can use multiprocessing pool. We need to be able to pickle the object passed
        to the pool and instance methods don't work, so we add the object to the pool and define __call__
        https://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-multiprocessing-pool-map/6975654#6975654
        """
        self.check_pathologies()
        return self
    
[docs]    def calculate_matthews_probabilties(self):
        crystal_symmetry = symmetry(unit_cell=self.cell_parameters, space_group_symbol=self.space_group)
        result = matthews_rupp(crystal_symmetry, n_residues=self.seq_info.nresidues)
        self.predicted_solvent_content = result.solvent_content
        self.predicted_ncopies = result.n_copies
    
[docs]    def check_pathologies(self):
        """DOC TODO"""
        hklin = self.hklin
        hklout = filename_append(filename=hklin, directory=os.getcwd(), astr='fixcols')
        ctr_colin = None
        ctr_colin_sig = None
        plus_minus = None
        mtz_obj = self.labels
        ctr = Ctruncate()
        ctr.debug = False
        log_file = hklout.rsplit(".", 1)[0] + '.log'
        ctr.setlogfile(log_file)
        if mtz_obj.f:
            input_f = True
        else:
            input_f = False
    
        if mtz_obj.f or mtz_obj.i:
            plus_minus = False
            if mtz_obj.i:
                ctr_colin = mtz_obj.i
                ctr_colin_sig = mtz_obj.sigi
            else:
                ctr_colin = mtz_obj.f
                ctr_colin_sig = mtz_obj.sigf
        elif mtz_obj.iplus:
            plus_minus = True
            ctr_colin = []
            ctr_colin_sig = []
            ctr_colin.append(mtz_obj.fplus)
            ctr_colin.append(mtz_obj.fminus)
            ctr_colin_sig.append(mtz_obj.sigfplus)
            ctr_colin_sig.append(mtz_obj.sigfminus)
        elif mtz_obj.fplus:
            plus_minus = True
            ctr_colin = []
            ctr_colin_sig = []
            ctr_colin.append(mtz_obj.fplus)
            ctr_colin.append(mtz_obj.fminus)
            ctr_colin_sig.append(mtz_obj.sigfplus)
            ctr_colin_sig.append(mtz_obj.sigfminus)
    
        if mtz_obj.i and mtz_obj.free:
            ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", colinFREE=mtz_obj.free,
                          USEINTEN=True, INPUTF=input_f, PLUSMINUS=plus_minus)
        elif mtz_obj.i and not mtz_obj.free:
            ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", USEINTEN=True, INPUTF=input_f,
                          PLUSMINUS=plus_minus)
        elif mtz_obj.free:
            ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", colinFREE=mtz_obj.free,
                          USEINTEN=False, PLUSMINUS=plus_minus)
        else:
            ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", USEINTEN=False,
                          PLUSMINUS=plus_minus)
        self.has_ncs = ctr.NCS
        self.has_twinning = ctr.TWIN
        self.has_anisotropy = ctr.ANISO
        os.unlink(hklout)
        return

[docs]    def as_dict(self):
        attrs = ['hklin', 'name', 'space_group', 'resolution', 'cell_parameters', 'has_ncs', 'has_twinning', 'has_anisotropy']
        d = {}
        for k, v in self.__dict__.items():
            if k in attrs:
                d[k] = v
        return d
    
[docs]    def as_html(self):
        return """
<table border="1">
  <thead>
    <tr style="text-align: right;">
      <th>name</th>
      <th>Resolution</th>
      <th>Space Group</th>
      <th>Has NCS?</th>
      <th>Has Twinning?</th>
      <th>Has Anisotropy?</th>
      <th>File Path</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>{name}</td>
      <td>{resolution:5.3F}</td>
      <td>{space_group}</td>
      <td>{has_ncs}</td>
      <td>{has_twinning}</td>
      <td>{has_anisotropy}</td>
      <td>{hklin}</td>
    </tr>
  </tbody>
</table>
""".format(**self.__dict__)
        
    def __str__(self):
        ostr = "HKL Info for file %s\n" % self.hklin
        ostr += "Space Group: %s\n" % self.space_group
        ostr += "Resolution: %s\n" % self.resolution
#         ostr += "Cell Parameters: %s\n" % self.cell_parameters
        ostr += "Has NCS?: %s\n" % self.has_ncs
        ostr += "Has Twinning?: %s\n" % self.has_twinning
        ostr += "Has Anisotropy?: %s\n" % self.has_anisotropy
        return ostr