'''Utilities for validating, cleaning, and adding information into up SMILES and SMARTS strings'''
__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'
from typing import Callable, TypeAlias, TypeVar
T = TypeVar('T')
from functools import wraps
from rdkit import Chem, RDLogger
from rdkit.Chem.rdmolops import SanitizeFlags, SanitizeMol, SANITIZE_ALL, SANITIZE_SETAROMATICITY
[docs]
def suppress_rdkit_errors(func : Callable[..., T]) -> Callable[..., T]:
'''Decorator to suppress RDKit error messages during function execution'''
@wraps(func)
def decorator(*args, **kwargs):
RDLogger.DisableLog('rdApp.error')
ret = func(*args, **kwargs)
RDLogger.EnableLog('rdApp.error')
return ret
return decorator
# TYPING AND VALIDATION
Smiles : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers
Smarts : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers
[docs]
@suppress_rdkit_errors
def is_valid_SMARTS(smarts : Smarts) -> bool:
'''Check if SMARTS string is valid (according to RDKit)'''
return (Chem.MolFromSmarts(smarts) is not None)
[docs]
@suppress_rdkit_errors
def is_valid_SMILES(smiles : Smiles) -> bool:
'''Check if SMARTS string is valid (according to RDKit)'''
return (Chem.MolFromSmiles(smiles) is not None)
# CUSTOM EXCEPTIONS
[docs]
class InvalidChemicalLineNotation(ValueError):
'''Exception raised when a malformed chemical notation string is passed somewhere'''
...
## DEVNOTE: there are certainly more line notations out there; I'm just covering those actually used in the codebase here
[docs]
class InvalidSMILES(InvalidChemicalLineNotation):
'''Exception raised when a malformed SMILES string is passed somewhere'''
...
[docs]
class InvalidSMARTS(InvalidChemicalLineNotation):
'''Exception raised when a malformed SMARTS string is passed somewhere'''
...
[docs]
class InvalidInChI(InvalidChemicalLineNotation):
'''Exception raised when a malformed InChI string is passed somewhere'''
...
# CANONICALIZATION AND STRUCTURE EXPANSION
[docs]
def canonical_SMILES_from_mol(mol : Chem.Mol) -> str:
'''
Cast Mol to a "canonical" SMILES format
Mols with identical chemical structure should produce identical strings
'''
return Chem.CanonSmiles(Chem.MolToSmiles(mol, canonical=True))
[docs]
def expanded_SMILES(
smiles : str,
assign_map_nums : bool=True,
start_from : int=1,
kekulize : bool=True,
canonicalize : bool=True, # DEV: set to match legacy behavior
) -> str:
'''
Expands and clarifies the chemical information contained within a passed SMILES string
namely explicit hydrogens and bond orders, and (optionally) kekulized aromatic bonds and atom map numbers
'''
if not is_valid_SMILES(smiles):
raise InvalidSMILES(f'Passed string "{smiles}" cannot be interpreted as a valid SMILES pattern')
rdmol = Chem.MolFromSmiles(smiles, sanitize=False)
rdmol.UpdatePropertyCache() # inject valence and ring info without mangling from sanitization
rdmol = Chem.AddHs(rdmol, addCoords=False)
if assign_map_nums:
for map_num, atom in enumerate(rdmol.GetAtoms(), start=start_from): # NOTE: deliberately did not use anything from rdutils.chemlabel here to avoid coupling
atom.SetAtomMapNum(map_num) # NOTE that starting from anything below 1 will cause an atom somewhere to be mapped to 0 (i.e. not mapped)
if kekulize:
Chem.Kekulize(rdmol, clearAromaticFlags=True)
Chem.SanitizeMol(rdmol)
return Chem.MolToSmiles(rdmol, kekuleSmiles=kekulize, allBondsExplicit=True, allHsExplicit=True, canonical=canonicalize)