Source code for polymerist.polymers.building.mbconvert

'''
Enhanced conversions to and from mbuild Compound objects which
preserve more molecular information than the utilities provided by 
'''

__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'

from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage
if not modules_installed('openbabel'):
    raise MissingPrerequisitePackage(
        importing_package_name=__spec__.name,
        use_case='Translation between chemical representations of polymers',
        install_link='https://libraries.io/conda/openbabel',
        dependency_name='openbabel',
        dependency_name_formal='the OpenBabel chemical toolbox',
    )

from typing import Optional
from pathlib import Path

import warnings
with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings
    warnings.filterwarnings('ignore',  category=DeprecationWarning)
    from mbuild import Compound
    from mbuild.conversion import from_rdkit, to_rdkit
    
from rdkit import Chem

from ...genutils.fileutils.pathutils import allow_string_paths, allow_pathlib_paths
from ...molfiles.pdb import SerialAtomLabeller
from ...rdutils.bonding.portlib import get_linker_ids
from ...rdutils.bonding.substitution import hydrogenate_rdmol_ports
from ...mdtools.openmmtools.serialization.topology import serialize_openmm_pdb


# Conversion from other formats to Compound
[docs] def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None, kekulize : bool=True) -> tuple[Compound, list[int]]: ''' Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports If "resname" is provided, will assign that name to the mBuild Compound returned ''' linker_ids = [i for i in get_linker_ids(rdmol)] # record indices of ports - MUST unpack generator for mbuild compatibility # create port-free version of molecule which RDKit can embed without errors prot_mol = hydrogenate_rdmol_ports(rdmol, in_place=False) sanitize_ops = Chem.SANITIZE_ALL if kekulize: sanitize_ops &= ~Chem.SANITIZE_SETAROMATICITY # disable aromaticity cleaning to enforce kekulization Chem.SanitizeMol(prot_mol, sanitizeOps=sanitize_ops) # sanitize to ensure Mol is valid (namely, avoids implicitValence issues) # convert cleaned RDKit Mol into mbuild Compound ## native from_rdkit() method actually appears to preserve atom ordering, since itering over GetAtoms() - https://github.com/mosdef-hub/mbuild/blob/main/mbuild/conversion.py#L849 mb_compound = from_rdkit(prot_mol) if resname is not None: mb_compound.name = resname ## copy formal charges (mbuild.conversion doesn't do this by default for some reason) for rdatom, mbatom in zip(prot_mol.GetAtoms(), mb_compound.particles()): mbatom.charge = rdatom.GetFormalCharge() return mb_compound, linker_ids
# Conversion from Compound to other formats _DEFAULT_RESNAME_MAP : dict[str, str] = { # module-wide config for default PDB residue name replacements for polymers 'RES' : 'Pol', }
[docs] def mbmol_to_rdmol( # TODO: deduplify PDB atom name and residue numbering code against serialize_openmm_pdb() mbmol : Compound, atom_labeller : Optional[SerialAtomLabeller]=None, resname_map : Optional[dict[str, str]]=None ) -> Chem.Mol: '''Convert an mBuild Compound into an RDKit Mol, with correct atom coordinates and PDB residue info''' if atom_labeller is None: atom_labeller = SerialAtomLabeller() if resname_map is None: resname_map = _DEFAULT_RESNAME_MAP rdmol = to_rdkit(mbmol) conformer = Chem.Conformer() conformer.Set3D(True) atom_id : int = 0 for resnum, mb_monomer in enumerate(mbmol.children, start=1): resname = resname_map.get(mb_monomer.name, mb_monomer.name[:3]) # if no remapping is found, just take first 3 chars # NOTE: the order of monomers and atoms within those monomers were added in the same order as iterated over here... #... so the atom indices **SHOULD** be in the correct order (hate that this even might be uncertain) for mbatom in mb_monomer.particles(): conformer.SetAtomPosition(atom_id, 10*mbatom.pos.astype(float)) # convert from nm to angstrom # set PDB residue info if monomer hierarchy is present if mbatom != mb_monomer: # for Compounds with a flat hierarchy, the children and particles of children will coincide rdatom = rdmol.GetAtomWithIdx(atom_id) rdatom.SetFormalCharge(mbatom.charge) # as when going from_rdkit, formal charge is not carrier over by default mbuild converter pdb_info = Chem.AtomPDBResidueInfo( atomName=4*' ' if not atom_labeller else atom_labeller.get_atom_label(mbatom.element.symbol), residueName=resname, residueNumber=resnum, chainId='1', isHeteroAtom=True, ) rdatom.SetPDBResidueInfo(pdb_info) atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this conf_id = rdmol.AddConformer(conformer) # NOTE: recording this to self-document return values (this is intentionally not used) return rdmol
# Serialization of Compounds to files
[docs] @allow_pathlib_paths def mbmol_to_rdkit_pdb( pdb_path : str, mbmol : Compound, atom_labeller : Optional[SerialAtomLabeller]=None, resname_map : Optional[dict[str, str]]=None, ) -> None: # DEV: "missing" docstring here is deliberate; this is needed to dynamically set the resname_map default as it displays Chem.MolToPDBFile( mbmol_to_rdmol( mbmol, atom_labeller=atom_labeller, resname_map=resname_map, ), pdb_path, )
mbmol_to_rdkit_pdb.__doc__ = f''' Save an mBuild Compound into an RDKit-formatted PDB file Parameters ---------- pdb_path : str The PDB file path to write the structure to mbmol : Compound The mBuild Compound to convert atom_labeller : Optional[SerialAtomLabeller], default SerialAtomLabeller() An optional SerialAtomLabeller instance which defines the desired behavior for sequentially naming PDB atom lines resname_map : Optional[dict[str, str]], default {_DEFAULT_RESNAME_MAP} An optional remapping dict of 3-letter residue names Any residue name found in the keys of this map will be replaced with its corresponding value in the PDB output For example, a dict with pair "FOO" : "BAR" would rename all residues named "FOO" to "BAR" in the PDB output '''
[docs] @allow_string_paths def mbmol_to_openmm_pdb( pdb_path : Path, mbmol : Compound, atom_labeller : Optional[SerialAtomLabeller]=None, resname_map : Optional[dict[str, str]]=None, ) -> None: # DEV: "missing" docstring here is deliberate; this is needed to dynamically set the resname_map default as it displays if resname_map is None: # avoid mutable default resname_map = _DEFAULT_RESNAME_MAP # NOTE: converting through MDTraj first before going to OpenMM preserves much # of the necessary chemical info that is discarded when converting through other formats traj = mbmol.to_trajectory(residues=[residue.name for residue in mbmol.children]) # extract names of repeat units omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory # for whatever reason, the mbuild -> mdtraj conversion preserves formal charges, while the mdtraj -> OpenMM conversion doesn't :P for mdt_atom, omm_atom in zip(traj.topology.atoms, omm_top.atoms()): # atom order appear to be preserved if mdt_atom.charge != 0: omm_atom.formalCharge = mdt_atom.charge serialize_openmm_pdb( pdb_path, topology=omm_top, positions=omm_pos, atom_labeller=atom_labeller, resname_map=resname_map, )
mbmol_to_rdkit_pdb.__doc__ = f''' Save an mBuild Compound into an OpenMM-formatted PDB file Parameters ---------- pdb_path : str The PDB file path to write the structure to mbmol : Compound The mBuild Compound to convert atom_labeller : Optional[SerialAtomLabeller], default SerialAtomLabeller() An optional SerialAtomLabeller instance which defines the desired behavior for sequentially naming PDB atom lines resname_map : Optional[dict[str, str]], default {_DEFAULT_RESNAME_MAP} An optional remapping dict of 3-letter residue names Any residue name found in the keys of this map will be replaced with its corresponding value in the PDB output For example, a dict with pair "FOO" : "BAR" would rename all residues named "FOO" to "BAR" in the PDB output '''