Source code for polymerist.polymers.monographs

'''Tools for generating and manipulating monomer connectivity graphs'''

__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'

from typing import Any, ClassVar, Generator, Optional, Sequence, Union

import networkx as nx
from itertools import product as cartesian_product

from ..genutils.iteration import asiterable
from ..genutils.sequences.discernment import DISCERNMENTSolver
from ..genutils.textual.delimiters import validate_braces
from ..genutils.fileutils.jsonio.serialize import TypeSerializer


[docs] class MonomerGraph(nx.Graph): '''A graph representation of the connectivity of monomer fragments in a polymer topology''' MONOMER_NAME_ATTR : ClassVar[str] = 'monomer_name' # node attribute name assigned to monomer names FLAVOR_DICT_ATTR : ClassVar[dict[int, int]] = 'neighbor_flavors' # node attribute name assigned to outgoing flavors for bonds to neighbor ports BONDTYPE_ATTR : ClassVar[str] = 'bondtype' # edge attribute name assigned to bond type annotations # node and edge attributes
[docs] def get_monomer_name_at_node_index(self, node_idx : int) -> Optional[str]: '''Recover the assigned monomer name for the node at the given index''' return self.nodes[node_idx].get(self.MONOMER_NAME_ATTR)
monomer_name = get_monomer_name = get_monomer_name_at_node_idx = get_monomer_name_at_node_index
[docs] def get_flavor_dict_at_node_index(self, node_idx : int) -> Optional[dict[int, int]]: '''Recover the assigned dictionary of neighbor flavors for the node at the given index''' return self.nodes[node_idx].get(self.FLAVOR_DICT_ATTR)
flavor_dict = get_flavor_dict = get_flavor_dict_at_node_idx = get_flavor_dict_at_node_index # connectivity properties @property def num_monomers(self) -> int: '''Number of monomer units represented in the current polymer''' return self.number_of_nodes() @property def is_unbranched(self) -> bool: '''Whether the monomer graph represents straight chain(s) without branching''' return all(node_deg <= 2 for node_id, node_deg in self.degree) is_linear = is_unbranched @property def is_unbranched(self) -> bool: '''Whether the monomer graph represents straight chain(s) without branching''' return not self.is_unbranched @property def terminal_monomers(self) -> Generator[int, None, None]: '''Generates the indices of all nodes corresponding to terminal monomers (i.e. those wiht only one outgoing bond)''' for node_idx, degree in self.degree: if degree == 1: yield node_idx termini = leaves = terminal_monomers # topological and multi-chain properties @property def num_chains(self) -> int: '''The number of disconnected chains represented by the MonoGraph''' return nx.number_connected_components(self) @property def chains(self) -> Generator['MonomerGraph', None, None]: '''Generates all disconnected polymers chains in the graph sequentially''' for cc_nodes in nx.connected_components(self): yield self.subgraph(cc_nodes) @property def unique_monomer_names(self) -> set[str]: '''The collection of unique monomer names embedded in the graph nodes''' return set(nx.get_node_attributes(self, self.MONOMER_NAME_ATTR).values()) # visualization
[docs] def draw(self, label_monomers : bool=True, label_bonds : bool=True, **kwargs) -> None: # TODO: expand arg passing (positions, matplotlib axes, etc) '''Visualize graph structure with NetworkX''' if 'pos' not in kwargs: kwargs['pos'] = nx.spring_layout(self) # TODO: try other layouts to see which looks best nx.draw(self, with_labels=label_monomers, labels=nx.get_node_attributes(self, self.MONOMER_NAME_ATTR), **kwargs, ) if label_bonds: # TODO: add flavor labels to drawing nx.draw_networkx_edge_labels(self, edge_labels=nx.get_edge_attributes(self, self.BONDTYPE_ATTR), **kwargs, )
visualize = draw # chemical information checks
[docs] def insert_chemical_info(self, chemical_info : dict[str, dict]) -> None: '''Insert SMILES, SMARTS, and atom/linker count info into nodes from minimal set of monomer info templates''' raise NotImplemented
def _validate(self) -> bool: '''Check whether the chemical information inserted into the monomer graph is valid''' raise NotImplemented # SMILES-like in-line encodings ## Reading string
[docs] @classmethod def from_smidge_string(cls, smidge_string : str, start_node_idx : int=0) -> 'MonomerGraph': '''Parse a SMIDGE ("SMILES-like Monomer Interconnectivity & Degree Graph Encoding") string and read it into a networkX Graph''' from .smidgelib.smidgeread import SMIDGEReader reader = SMIDGEReader() return reader.read_smidge(smidge_string, start_node_idx=start_node_idx)
from_SMIDGE = from_smidge = from_smidge_string ## Writing string def _validate_start_node_idxs(self, start_node_idxs : Optional[Union[int, Sequence[int]]]=None) -> dict[int, int]: '''Check if a collection of DFS traversal start indices are valid for the current graph topology''' # 0) if explicitly NO ids are passed, no validation is needed n_chains = self.num_chains if start_node_idxs is None: return { i : min(chain) # assign the smallest node in each component as the starting indices for i, chain in enumerate(self.chains) } # 1) check that there are enough start nodes for the present number of chains start_node_idxs = asiterable(start_node_idxs) # convert to iterable to handle singleton values in a unified way n_nodes = len(start_node_idxs) if n_nodes != n_chains: quantifier = 'few' if (n_nodes < n_chains) else 'many' raise ValueError(f'Provided too {quantifier} chain start indices traversal of the given graph ({n_nodes} provided for {n_chains}-chain graph)') # 2) check that there exists a 1:1 mapping between the provided node collection and DISTINCT connected components cc_order_planner = DISCERNMENTSolver(nx.connected_components(self)) if not cc_order_planner.solution_exists(start_node_idxs, unique_bins=True): raise ValueError('Starting node indices provided do not uniquely correspond to distinct chains') else: cc_order = next(cc_order_planner.enumerate_choices(start_node_idxs, unique_bins=True)) return { chain_idx : start_node_idx for (chain_idx, start_node_idx) in zip(cc_order, start_node_idxs) # the parity of this is guaranteed by the prior length match check }
[docs] def to_smidge_string(self, start_node_idxs : Optional[Union[int, Sequence[int]]]=None) -> str: '''Convert a monomer graph into a SMIDGE ("SMILES-like Monomer Interconnectivity & Degree Graph Encoding") string''' from .smidgelib.smidgewrite import SMIDGEWriter writer = SMIDGEWriter() return writer.write_smidge(self, start_node_idxs=start_node_idxs)
to_smidge = to_SMIDGE = to_smidge_string ## Testing string translation def _passes_string_conversion_tests(self) -> tuple[bool, Optional[tuple[int]]]: '''Developer function, tests if conversion to and from graph strings preserves the graph topology invariant to the starting node Returns a bool of whether test passes for all possible traversal starting positions, and tuple of positions of first failure (or None if passing)''' for start_idxs in cartesian_product(*[chain.nodes for chain in self.chains]): isostr = self.to_smidge_string(start_node_idxs=start_idxs) isograph = self.from_smidge_string(isostr) if not nx.is_isomorphic(self, isograph): return False, start_idxs else: return True, None
MonoGraph = MonomerGraph # alias for convenience
[docs] class MonomerGraphSerializer(TypeSerializer, python_type=MonomerGraph): '''JSON serializer for storing MonomerGraphs as SMIDGE strings '''
[docs] @staticmethod def encode(python_obj : MonomerGraph) -> str: return python_obj.to_smidge_string()
[docs] @staticmethod def decode(json_obj : str) -> MonomerGraph: return MonomerGraph.from_smidge(json_obj)