"""
OPLS-AA Atom Typing and Charge Assignment
Assign OPLS-AA atom types and partial charges to atoms.
"""
from typing import Dict, Tuple, List, Optional
from dataclasses import dataclass
from rdkit import Chem
from .constants import OPLS_ATOM_TYPES, OPLS_BOND_PARAMS, OPLS_ANGLE_PARAMS
@dataclass
class AtomProperty:
"""Atom type and charge information."""
atom_idx: int
symbol: str
opls_type: str
charge: float
mass: float
aromatic: bool
[docs]
class AtomTyper:
"""Assign OPLS-AA atom types based on chemical environment."""
def __init__(self):
self.opls_types = OPLS_ATOM_TYPES
self.bond_params = OPLS_BOND_PARAMS
self.angle_params = OPLS_ANGLE_PARAMS
[docs]
def assign_atom_types(self, mol: Chem.Mol) -> Dict[int, str]:
"""
Assign OPLS-AA atom types to all atoms.
Args:
mol: RDKit molecule
Returns:
Dictionary of {atom_idx: opls_type}
"""
atom_types = {}
for atom in mol.GetAtoms():
idx = atom.GetIdx()
atom_type = self._determine_atom_type(mol, atom)
atom_types[idx] = atom_type
return atom_types
def _determine_atom_type(self, mol: Chem.Mol, atom: Chem.Atom) -> str:
"""
Determine OPLS atom type for a single atom.
Logic:
- Aromatic C -> CA, aromatic H -> HA
- Aliphatic sp3 C -> CT, H on sp3 C -> HC
- Carbonyl O -> OC, hydroxyl O -> OH, ether O -> OS
"""
atomic_num = atom.GetAtomicNum()
is_aromatic = atom.GetIsAromatic()
# Carbon
if atomic_num == 6:
# For large fused-ring systems RDKit Kekulization can fail, leaving
# is_aromatic=False on all atoms. Fall back to ring membership +
# degree-3 connectivity (2 ring C neighbours + 1 H or O, or 3 ring C
# neighbours for interior junction atoms) as a reliable proxy.
ring_info = mol.GetRingInfo()
in_ring = ring_info.NumAtomRings(atom.GetIdx()) > 0
if is_aromatic or (in_ring and atom.GetDegree() == 3):
return "CA"
else:
# Check if connected to C=O
for neighbor in atom.GetNeighbors():
if neighbor.GetAtomicNum() == 8:
bond = mol.GetBondBetweenAtoms(atom.GetIdx(), neighbor.GetIdx())
if bond and bond.GetBondType() == Chem.BondType.DOUBLE:
return "C" # Carboxylic acid carbon
return "CT" # Aliphatic carbon
# Hydrogen
elif atomic_num == 1:
# Check what it's bonded to
neighbors = atom.GetNeighbors()
if neighbors:
neighbor = neighbors[0]
neighbor_type = self._determine_atom_type(mol, neighbor)
if neighbor_type == "CA":
return "HA"
elif neighbor_type == "OH":
return "HO"
elif neighbor_type == "OH2":
return "HO2"
elif neighbor_type == "NA":
return "HNA"
elif neighbor_type == "SH_":
return "HSH"
elif neighbor_type == "NPR":
return "HNPR"
else:
return "HC"
return "HC"
# Oxygen
elif atomic_num == 8:
# Check bonding environment
num_bonds = len(atom.GetBonds())
neighbors = list(atom.GetNeighbors())
if num_bonds == 1:
# Terminal oxygen (likely carbonyl)
neighbor = neighbors[0]
if neighbor.GetAtomicNum() == 6:
# Check if double bond
bond = mol.GetBondBetweenAtoms(atom.GetIdx(), neighbor.GetIdx())
if bond and bond.GetBondType() == Chem.BondType.DOUBLE:
return "OC" # Carbonyl
else:
return "O" # Carboxylic acid O
elif num_bonds == 2:
# Connected to two atoms
if any(n.GetAtomicNum() == 1 for n in neighbors):
return "OH" # Hydroxyl
else:
return "OS" # Ether
else:
# Special case for carboxylic acid OH
return "OH2"
# Nitrogen
elif atomic_num == 7:
neighbors = list(atom.GetNeighbors())
h_count = sum(1 for n in neighbors if n.GetAtomicNum() == 1)
heavy_neighbors = [n for n in neighbors if n.GetAtomicNum() != 1]
has_aromatic_c = any(
n.GetAtomicNum() == 6 and n.GetIsAromatic() for n in neighbors
)
# Ring-substituting nitrogen (pyridinic / pyrrolic / graphitic).
# A ring N replaced INTO the skeleton is bonded only to ring carbons
# (plus, for pyrrolic, one H). Aniline N (pendant Ar-NH2) is not in
# a ring, so it is excluded here.
ring_info = mol.GetRingInfo()
if ring_info.NumAtomRings(atom.GetIdx()) > 0:
ring_sizes = [
len(r) for r in ring_info.AtomRings() if atom.GetIdx() in r
]
if h_count >= 1 and 5 in ring_sizes:
return "NPR" # Pyrrolic N (5-ring, N-H)
if h_count == 0 and 6 in ring_sizes:
if len(heavy_neighbors) >= 3:
return "NGR" # Graphitic / quaternary N (interior)
return "NPY" # Pyridinic N (edge 6-ring, no H)
if has_aromatic_c and h_count >= 1:
return "NA" # Aniline-type aromatic primary amine (Ar-NH2)
elif len(atom.GetBonds()) == 3:
return "N"
else:
return "NT"
# Sulfur
elif atomic_num == 16:
if any(n.GetAtomicNum() == 1 for n in atom.GetNeighbors()):
return "SH_" # Thiol sulfur (Ar-SH)
else:
return "SS" # Thioether sulfur (Ar-S-Ar)
# Default
return f"X{atomic_num}"
[docs]
def get_bond_parameters(
self, atom_type_1: str, atom_type_2: str
) -> Optional[Tuple[float, float]]:
"""
Get bond parameters for atom type pair.
Returns:
(force_constant_kcal_mol_A2, equilibrium_length_A)
"""
key = tuple(sorted([atom_type_1, atom_type_2]))
return self.bond_params.get(key)
[docs]
def get_angle_parameters(
self, atom_type_1: str, atom_type_2: str, atom_type_3: str
) -> Optional[Tuple[float, float]]:
"""
Get angle parameters for atom type triple.
Returns:
(force_constant_kcal_mol_rad2, equilibrium_angle_deg)
"""
return self.angle_params.get((atom_type_1, atom_type_2, atom_type_3))
[docs]
class ChargeAssigner:
"""
Assign partial charges to atoms using OPLS-AA parameters.
Strategy: Use predefined OPLS charges, with adjustments for unusual groups.
"""
def __init__(self):
self.opls_types = OPLS_ATOM_TYPES
[docs]
def assign_charges(
self, mol: Chem.Mol, atom_types: Dict[int, str]
) -> Dict[int, float]:
"""
Assign partial charges to all atoms.
Args:
mol: RDKit molecule
atom_types: Dictionary of {atom_idx: opls_type}
Returns:
Dictionary of {atom_idx: charge}
"""
charges = {}
for idx, opls_type in atom_types.items():
if opls_type in self.opls_types:
# Use predefined charge
charges[idx] = self.opls_types[opls_type][2]
else:
# Estimate charge based on atom type
charges[idx] = self._estimate_charge(mol, idx, opls_type)
# Apply charge equilibration to ensure overall neutrality
charges = self._equilibrate_charges(mol, charges)
return charges
def _estimate_charge(self, mol: Chem.Mol, atom_idx: int, opls_type: str) -> float:
"""
Estimate charge for atom if not in standard OPLS table.
Args:
mol: RDKit molecule
atom_idx: Atom index
opls_type: OPLS atom type string
Returns:
Estimated partial charge
"""
atom = mol.GetAtomWithIdx(atom_idx)
atomic_num = atom.GetAtomicNum()
# Default charge based on element and hybridization
if atomic_num == 6: # Carbon
return -0.15 # Typical carbon charge
elif atomic_num == 8: # Oxygen
return -0.50 # Typical oxygen charge
elif atomic_num == 1: # Hydrogen
return 0.10 # Typical hydrogen charge
elif atomic_num == 7: # Nitrogen
return -0.30
elif atomic_num == 16: # Sulfur
return -0.20
else:
return 0.0
def _equilibrate_charges(
self, mol: Chem.Mol, charges: Dict[int, float]
) -> Dict[int, float]:
"""
Adjust charges to ensure overall neutrality.
Scales all charges proportionally to sum to zero.
Args:
mol: RDKit molecule
charges: Dictionary of {atom_idx: charge}
Returns:
Equilibrated charges
"""
total_charge = sum(charges.values())
if abs(total_charge) < 1e-6:
return charges
# Find polar atoms to adjust
# Prefer to adjust oxygen and nitrogen
adjustable_atoms = [
idx
for idx, charge in charges.items()
if mol.GetAtomWithIdx(idx).GetAtomicNum() in [8, 7]
]
if not adjustable_atoms:
adjustable_atoms = list(charges.keys())
# Scale adjustment
scale = total_charge / len(adjustable_atoms)
adjusted_charges = charges.copy()
for idx in adjustable_atoms:
adjusted_charges[idx] -= scale
return adjusted_charges
class OPLSPropertyTable:
"""
Generate OPLS property table for GROMACS topology.
Contains atom type, mass, charge information for all atoms.
"""
def __init__(self, mol: Chem.Mol, atom_types: Dict[int, str], charges: Dict[int, float]):
self.mol = mol
self.atom_types = atom_types
self.charges = charges
self.properties: List[AtomProperty] = []
self._build_properties()
def _build_properties(self):
"""Build atom property table."""
for atom in self.mol.GetAtoms():
idx = atom.GetIdx()
opls_type = self.atom_types.get(idx, "CT")
if opls_type in OPLS_ATOM_TYPES:
desc, mass, default_charge = OPLS_ATOM_TYPES[opls_type]
else:
mass = atom.GetMass()
default_charge = 0.0
charge = self.charges.get(idx, default_charge)
prop = AtomProperty(
atom_idx=idx,
symbol=atom.GetSymbol(),
opls_type=opls_type,
charge=charge,
mass=mass,
aromatic=atom.GetIsAromatic(),
)
self.properties.append(prop)
def get_properties(self) -> List[AtomProperty]:
"""Get atom properties."""
return self.properties
def get_total_charge(self) -> float:
"""Calculate total molecular charge."""
return sum(p.charge for p in self.properties)
def get_mass(self) -> float:
"""Calculate total molecular mass."""
return sum(p.mass for p in self.properties)
def validate(self) -> Tuple[bool, List[str]]:
"""
Validate OPLS properties.
Returns:
(is_valid, error_messages)
"""
errors = []
# Check for unassigned atom types
for prop in self.properties:
if prop.opls_type.startswith("X"):
errors.append(f"Atom {prop.atom_idx} has unrecognized type")
# Check for extreme charges
for prop in self.properties:
if abs(prop.charge) > 2.0:
errors.append(
f"Atom {prop.atom_idx} has extreme charge: {prop.charge:.2f}"
)
# Check total charge is reasonable
total_charge = self.get_total_charge()
if abs(total_charge) > 1.0:
# Warning, not error
pass
return len(errors) == 0, errors