'''Automata for reading SMIDGE strings into their graph representations'''
__author__ = 'Timotej Bernat'
__email__ = 'timotej.bernat@colorado.edu'
import logging
LOGGER = logging.getLogger(__name__)
from typing import Optional
from dataclasses import dataclass, field
from abc import ABC, abstractmethod
import re
from . import MonomerGraphBondInfo, BOND_TOKEN_RE
from ..monographs import MonomerGraph
from ...genutils.textual.delimiters import validate_braces
# READER REGISTER
[docs]
@dataclass
class SMIDGEReaderRegister:
'''Information accessed by machine states during the SMIDGE writing process'''
monograph : Optional[MonomerGraph] = field(default_factory=MonomerGraph)
bond_info : MonomerGraphBondInfo = field(default_factory=MonomerGraphBondInfo)
curr_token : str = field(default_factory=str)
node_idx : int = field(default_factory=int)
str_buffer : str = field(default_factory=str)
node_stack : list[int] = field(default_factory=list)
# READER STATE BASE
[docs]
class SMIDGEReadState(ABC):
'''Abstract based for reading MID graphs from SMIDGE strings'''
[docs]
@abstractmethod
def state_action(self, register : SMIDGEReaderRegister) -> None:
pass
[docs]
def transition(self, register : SMIDGEReaderRegister) -> 'SMIDGEReadState': # same transition rule for all states, based on the current character
'''Define which states should follow the current one based on input'''
READ_STATE_MAP : dict[str, SMIDGEReadState] = {
'.' : ChainNew(),
'[' : MonomerStart(),
']' : MonomerEnd(),
'(' : BranchStart(),
')' : BranchEnd(),
'<' : BondStart(),
'>' : BondEnd(),
}
return READ_STATE_MAP.get(register.curr_token, Accumulate()) # treat Accumulate as default (and thereby starting) state
# CONCRETE STATES
[docs]
class Accumulate(SMIDGEReadState):
'''Collect characters into a buffer'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.str_buffer += register.curr_token
[docs]
class ChainNew(SMIDGEReadState):
'''Reset actions when beginning a new chain'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.node_stack.clear()
[docs]
class MonomerStart(SMIDGEReadState):
'''Begin reading in a new monomer'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.str_buffer = '' # clear string buffer to read new monomer name
register.node_idx += 1 # and increment node index
[docs]
class MonomerEnd(SMIDGEReadState):
'''Finish reading a monomer and add it to the graph'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.monograph.add_node( # once a new monomer is read, add a new node corresponding to that monomer at the current index
register.node_idx,
**{
MonomerGraph.MONOMER_NAME_ATTR : register.str_buffer,
MonomerGraph.FLAVOR_DICT_ATTR : {}
}
) # add a new node with the current index and name
if register.node_stack: # if previously-node_stack nodes exist...
curr_node_id = register.node_idx
prev_node_id = register.node_stack.pop() # remove the last node_stack node from the traversal stack
register.monograph.nodes[prev_node_id][MonomerGraph.FLAVOR_DICT_ATTR][curr_node_id] = register.bond_info.incoming_flavor
register.monograph.nodes[curr_node_id][MonomerGraph.FLAVOR_DICT_ATTR][prev_node_id] = register.bond_info.outgoing_flavor
register.monograph.add_edge(prev_node_id, curr_node_id, **{MonomerGraph.BONDTYPE_ATTR : register.bond_info.bondtype}) # link the current node to it, with appropriate bond type
register.node_stack.append(register.node_idx) # add the current node to the stack of node_stack nodes
[docs]
class BondStart(SMIDGEReadState):
'''Initialize reading of a bond token'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.str_buffer = ''
[docs]
class BondEnd(SMIDGEReadState):
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.bond_info = MonomerGraphBondInfo.from_match(re.match(BOND_TOKEN_RE, register.str_buffer))
[docs]
class BranchStart(SMIDGEReadState):
'''Mark position of branch point in stack for backtrack'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.node_stack.append(register.node_stack[-1])
[docs]
class BranchEnd(SMIDGEReadState):
'''Return to previous branch point'''
[docs]
def state_action(self, register : SMIDGEReaderRegister) -> None:
register.node_stack.pop() # remove the last node_stack position and return to the previous most recent node_stack node
# READER
[docs]
@dataclass
class SMIDGEReader:
'''Pushdown automaton for translating SMIDGE strings to and from monomer graphs'''
state : SMIDGEReadState = field(default_factory=ChainNew, init=False) # initial state is always the new chain state
[docs]
def read_smidge(self, smidge_string : str, start_node_idx : int=0) -> MonomerGraph:
'''Parse a SMIDGE ("SMILES-like Monomer Interconnectivity & Degree Graph Encoding") string and read it into a networkX Graph'''
validate_braces(smidge_string) # check that all braces are in order before proceeding
register = SMIDGEReaderRegister(
node_idx=start_node_idx-1,
)
for char in smidge_string:
register.curr_token = char
LOGGER.debug(f'Current {register.__class__.__name__}: {register!r}')
LOGGER.debug(f'Current {self.__class__.__name__} state: {self.state.__class__.__name__}')
self.state = self.state.transition(register)
self.state.state_action(register) # TODO: figure out how to make this work with the order of actions and transitions reversed
return register.monograph