'''Utilities for serializing, converting, and extracting info from OpenFF topologies'''
__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'
import logging
LOGGER = logging.getLogger(__name__)
from pathlib import Path
from ast import literal_eval
import numpy as np
from rdkit import Chem
from rdkit.Chem.rdchem import (
Atom as RDAtom,
Mol,
)
from openff.toolkit import ToolkitRegistry
from openff.toolkit.topology import (
Atom as OFFAtom,
Molecule,
Topology,
)
from . import GTR
from ...genutils.iteration import asiterable
from ...genutils.fileutils.pathutils import dotless, allow_string_paths
from ...genutils.decorators.functional import optional_in_place
from ...rdutils.rdprops import RDPROP_SETTERS, copy_rdobj_props
from ...rdutils.rdcoords.tiling import tile_lattice_with_rdmol
# TOPOLOGY INFO
[docs]
def get_largest_offmol(offtop : Topology) -> Molecule:
'''Return the largest molecule in a Topology'''
return max(offtop.molecules, key=lambda offmol : offmol.n_atoms)
# RDKIT METADATA-PRESERVING INTERCONVERSION
# MOLECULE METADATA SERIALIZATION / DESERIALIZATION
# FILE I/O FUNCTIONS
[docs]
@allow_string_paths
def save_molecule(path : Path, offmol : Molecule, toolkit_registry : ToolkitRegistry=GTR) -> None: # TODO : generalize to work for bytes w/ opened file
'''Syntactic sugar for annoying suffix re-specification when saving OpenFF Molecules'''
offmol.to_file(str(path), file_format=dotless(path), toolkit_registry=toolkit_registry)
[docs]
@allow_string_paths
def topology_to_sdf(path : Path, offtop : Topology, toolkit_registry : ToolkitRegistry=GTR) -> None:
'''
Save an OpenFF Topology to an SDF file, preserving all atom metadata
Parameters
----------
path : Path
The path of the SDF file to save to
offtop : Topology
An OpenFF Topology
toolkit_registry : ToolkitRegistry, default=GLOBAL_TOOLKIT_REGISTRY
The toolkit registry to use for serializing each Molecule object within the Topology
Defaults to the Global Toolkit Registry supplied by the OpenFF toolkit
'''
assert(path.suffix == '.sdf')
with path.open('w') as sdf_file:# TODO : maybe change to append mode instead?
for mol in offtop.molecules:
serial_mol = package_atom_metadata(mol, in_place=False) # packageage metadata for serialization WITHOUT disturbing the original molecule
serial_mol.to_file(sdf_file, file_format='SDF', toolkit_registry=toolkit_registry) # NOTE : not using save_molecule() here, as that function does not currently support bytes-like input (needed for multiple mols in same file)
LOGGER.debug(f'Successfully serialized SDF Topology to {path}')
[docs]
@allow_string_paths
def topology_from_sdf(path : Path, *args, **kwargs) -> Topology:
'''
Load an OpenFF Topology from an SDF file, assigning metadata and partial charges if stored
Parameters
----------
path : Path
The path of the SDF file to load
*args, **kwargs : Any
Additional positional arguments to pass to Molecule.from_file()
Returns
-------
Topology
An OpenFF Topology object containing the molecule(s) found in the SDF,
each with its associated metadata and partial charges from the SDF bound
'''
assert(path.suffix == '.sdf')
LOGGER.debug(f'Loading serialized SDF Topology from {path}')
return Topology.from_molecules(
unpackage_atom_metadata(mol, in_place=False) # reassign metadata if serialized
for mol in asiterable(Molecule.from_file(path, *args, **kwargs)) # asiterable() gracefully handles the singleton case
)
# TOPOLOGY BUILD FUNCTIONS
[docs]
def topology_from_molecule_onto_lattice(
offmol : Molecule,
lattice_points : np.ndarray,
rotate_randomly : bool=True,
unique_mol_ids : bool=True
) -> Topology:
'''
Copy and tile an OpenFF Molecule onto a set of specified lattice sites
For each lattice site provided, a copy of the Molecule will be placed with its center of mass coincident to that site
Parameters
----------
offmol : Molecule
The OpenFF Molecule to tile onto the lattice
lattice_points : np.ndarray
An Nx3 array where each row is the xyz coordinate of a lattice site
rotate_randomly : bool, default=True
Whether to apply a random SO(3) rotation to each copy of the Molecule
unique_mol_ids : bool, default=True
Whether to assign unique IDs to each copy of the Molecule
Returns
-------
Topology
A Topology containing all tiled copies of the original molecule
'''
tiled_rdmol = tile_lattice_with_rdmol(offmol.to_rdkit(), lattice_points, rotate_randomly=rotate_randomly)
tiled_offmols = []
for mol_id, tiled_mol_copy in enumerate(Chem.GetMolFrags(tiled_rdmol, asMols=True, sanitizeFrags=False)):
copy_rdobj_props(tiled_rdmol, tiled_mol_copy) # ensure each individual fragment preserves the information of the parent molecule
tiled_offmol = Molecule.from_rdkit(
rdmol=tiled_mol_copy,
allow_undefined_stereo=True,
hydrogens_are_explicit=True
)
if unique_mol_ids:
for atom in tiled_offmol.atoms:
atom.metadata['residue_number'] = mol_id # for now, this appears to be the mechanism by which Interchange assigns labels to unique molecules
tiled_offmols.append(tiled_offmol)
return Topology.from_molecules(tiled_offmols)