'''For automating RDKit molecule sanitization and chemical property cleanup'''
__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'
from typing import Callable, Concatenate, Generator, Iterable, Optional, ParamSpec, TypeAlias, TypeVar, Union
P = ParamSpec('P')
from functools import wraps
from rdkit.Chem import Mol, MolFromSmiles
from rdkit.Chem.rdmolops import SanitizeMol, SanitizeFlags, SANITIZE_ALL, SANITIZE_NONE
from rdkit.Chem.rdmolops import AromaticityModel, SetAromaticity, Kekulize, AROMATICITY_RDKIT, AROMATICITY_MDL
from ..genutils.decorators.functional import optional_in_place
from ..genutils.decorators.meta import extend_to_methods
from ..smileslib.cleanup import Smiles, expanded_SMILES
[docs]
@optional_in_place
def sanitize_mol(
mol : Mol,
sanitize_ops : Union[None, int, SanitizeFlags]=SANITIZE_NONE,
aromaticity_model : Optional[AromaticityModel]=None,
) -> None:
'''Apply chemical sanitization operations, including bond aromaticity
By default, performs NO sanitization; all sanitization operations must be explicitly specified!
'''
# enforce correct typing from deliberate (or accidental) NoneType pass
if sanitize_ops is None:
sanitize_ops = SANITIZE_NONE
# aromaticity determination
if aromaticity_model is not None:
sanitize_ops = sanitize_ops & ~SanitizeFlags.SANITIZE_KEKULIZE & ~SanitizeFlags.SANITIZE_SETAROMATICITY # prevents final SanitizeMol call from undoing aromatcity model
Kekulize(mol, clearAromaticFlags=True)
SetAromaticity(mol, model=aromaticity_model)
# hydrogen handling?
...
# miscellaneous sanitization operations
SanitizeMol(mol, sanitizeOps=sanitize_ops) # regardless of settings, sanitization should be done last to give greatest likelihodd of molecule validity
[docs]
@extend_to_methods
def sanitizable_mol_outputs(mol_func : Callable[P, Union[Mol, Iterable[Mol]]]) -> Callable[Concatenate[Union[None, int, SanitizeFlags], Optional[AromaticityModel], P], Union[Mol, tuple[Mol, ...]]]:
'''
Decorator which injects molecule sanitization capability into a function which returns RDKit Mols
By default, performs NO sanitization; all sanitization operations must be explicitly specified!
Acts on functions which are assumed to return a single mol object, or an iterable containing Mols
Decorated function will return a single mol in the former case, or a tuple of mols in the latter
'''
@wraps(mol_func)
def wrapped_func(
*args : P.args,
sanitize_ops : Union[None, int, SanitizeFlags]=SANITIZE_NONE,
aromaticity_model : Optional[AromaticityModel]=None,
**kwargs : P.kwargs,
) -> Union[Mol, tuple[Mol, ...]]:
# determine if return is singular, iterable, or invalid
outputs = mol_func(*args, **kwargs)
if isinstance(outputs, Mol):
is_singular = True
outputs = (outputs,)
elif isinstance(outputs, Iterable):
is_singular = False
outputs = tuple(outputs) # cast to tuple in advance; all changes from here are made in-place
else:
raise TypeError(f'Expected wrapped function to return etiher a Chem.Mol object or an iterable of Chem.Mol, not {type(outputs)}')
# perform sanitization
for mol in outputs:
if isinstance(mol, Mol): # for now, tolerates other types of objects in output stream by skipping over them
sanitize_mol(mol, sanitize_ops=sanitize_ops, aromaticity_model=aromaticity_model, in_place=True)
continue
return outputs[0] if is_singular else outputs
return wrapped_func
[docs]
def explicit_mol_from_SMILES(
smiles : Smiles,
assign_map_nums : bool=False,
sanitize_ops : Union[None, int, SanitizeFlags]=SANITIZE_ALL,
aromaticity_model : Optional[AromaticityModel]=AROMATICITY_MDL,
) -> Mol:
'''Convenience method for creating a chemically-explicit AND chemically-validated RDKit Mol from a SMILES string'''
mol = MolFromSmiles(
expanded_SMILES( # NOTE: expanded_SMILES already validated passed SMILES; no need to redo it in local scope
smiles,
assign_map_nums=assign_map_nums,
kekulize=False, # leave this up to the sanitization/aromatization step
),
sanitize=False,
)
sanitize_mol(
mol,
sanitize_ops=sanitize_ops,
aromaticity_model=aromaticity_model,
in_place=True,
)
return mol