Source code for polymerist.rdutils.bonding.portlib

'''Specification for representing, defining, and characterizing selective intermolecular bond placeholders ("ports")'''

__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'

from typing import ClassVar, Generator, Optional
from dataclasses import dataclass, field

from rdkit import Chem
from rdkit.Chem.rdchem import Atom, Bond, Mol

from ...genutils.iteration import iter_len
from ...genutils.containers import UnorderedRegistry


# PORT DEFINITIONS
PORT_SCHEMA = '[#0D1]~[!#0]' # an atom of null type bonded (with exact one of any type of bond) to any non-null atom
PORT_QUERY  = Chem.MolFromSmarts(PORT_SCHEMA) # prototype molecule for a port

[docs] class MolPortError(Exception): '''Raised when port-related errors as encountered''' pass
[docs] def is_linker(rdatom : Atom) -> bool: '''Indicate whether an atom is a linker (intermonomer "*" type atom)''' return rdatom.GetAtomicNum() == 0
[docs] def get_num_linkers(rdmol : Mol) -> int: '''Count how many wild-type inter-molecule linker atoms are in a Mol''' return sum( is_linker(atom) for atom in rdmol.GetAtoms() )
[docs] @dataclass(frozen=True) class Port: '''Class for encapsulating the components of a "port" bonding site (linker-bond-bridgehead)''' linker : Atom bond : Bond bridgehead : Atom bondable_flavors : ClassVar[UnorderedRegistry] = field(default=UnorderedRegistry((0, 0))) # by default, only two (0, 0)-flavor ("unlabelled") ports are bondable @property def flavor(self) -> int: '''Return the flavor of the port''' return self.linker.GetIsotope() def __eq__(self, other : 'Port') -> bool: if not isinstance(other, Port): raise TypeError(f'Cannot compare {self.__class__.__name__} to object of type {other.__class__.__name__}') return ( self.linker.Match(other.linker) # must use explicit "Match" call to correctly compare QueryAtoms and QueryBonds (may be distinct but equivalent objects) and self.bond.Match(other.bond) and self.bridgehead.Match(other.bridgehead) )
[docs] @staticmethod def are_bondable(port_1 : 'Port', port_2 : 'Port') -> bool: '''Determine if two port atoms can be combined into a new bond''' return ( ((port_1.flavor, port_2.flavor) in Port.bondable_flavors) # 1) port flavors are one of the registered pairs and port_1.bond.GetBondType() == port_2.bond.GetBondType() # 2) the ports agree on the new bond order and port_1.bridgehead.GetIdx() != port_2.bridgehead.GetIdx() # 3) the two ports aren't connected to the same atom # and not port_1.bridgehead.Match(port_2.bridgehead) # TODO : workshop this comparison for query atoms TOSELF : this match is too general (matches by query, not identity) )
def __hash__(self) -> int: # necessary to variation in seemingly identical RDKit atoms/bonds '''Distills down unique info about a Port. Simplifies equality check and allows Ports to participate in sets as expected''' return hash((self.flavor, self.linker.GetIdx(), self.bridgehead.GetIdx(), self.bond.GetBondTypeAsDouble()))
[docs] def matches_flavor(self, target_flavor : Optional[int]) -> bool: '''Returns whether a port has a particular flavor''' if target_flavor is None: return True # always returning True if None is passed (simplifies non-specific defaults) return self.flavor == target_flavor
# PORT ENUMERATION
[docs] def get_port_ids(rdmol : Mol) -> Generator[tuple[int, int], None, None]: '''Get the linker and bridgehead indices of all ports found in an RDKit Mol''' for (linker_id, bh_id) in rdmol.GetSubstructMatches(PORT_QUERY): yield linker_id, bh_id # unpacked purely for self-documentation
[docs] def get_linker_ids(rdmol : Mol) -> Generator[int, None, None]: '''Get indices of all atoms which are ports''' for (linker_id, bh_id) in get_port_ids(rdmol): yield linker_id
[docs] def get_ports(rdmol : Mol, target_atom_id : Optional[int]=None, target_flavor : Optional[int]=None) -> Generator[Port, None, None]: '''Find and generate all ports in a molecule. Can optioanlly narrow scope to port whose bridegehead is a particular atom and/or whose linker has a particular flavor''' target_atom = None if (target_atom_id is None) else rdmol.GetAtomWithIdx(target_atom_id) for (linker_id, bh_id) in get_port_ids(rdmol): port = Port( linker=rdmol.GetAtomWithIdx(linker_id), bond=rdmol.GetBondBetweenAtoms(linker_id, bh_id), bridgehead=rdmol.GetAtomWithIdx(bh_id) ) if port.matches_flavor(target_flavor): # if (target_atom is None) or port.bridgehead.Match(target_atom):# will match if no atom is given OR if an atom is given and the bridgehead matches that Atom if (target_atom_id is None) or (port.bridgehead.GetIdx() == target_atom_id):# will match if no atom is given OR if an atom is given and the bridgehead matches that Atom yield port
[docs] def get_num_ports(rdmol : Mol, target_atom_id : Optional[int]=None, target_flavor : Optional[int]=None) -> int: '''Counts the number of port atoms present in a Mol''' return iter_len(get_ports(rdmol, target_atom_id=target_atom_id, target_flavor=target_flavor))
[docs] def get_single_port(rdmol : Mol) -> Port: '''Get the singular port of a Mol which contains only 1 port''' num_ports = get_num_ports(rdmol) if num_ports == 0: raise MolPortError("Can't return port for molecule which has no ports") if num_ports > 1: raise MolPortError("Molecule has multiple ports; choice of single port is ambiguous") return next(get_ports(rdmol)) # return if port count checks pass