"""
GROMACS File Export
Write biochar structures to GROMACS format files (.gro, .top, .itp).
"""
from typing import Dict, Optional, Tuple
from pathlib import Path
from datetime import datetime
import numpy as np
from rdkit import Chem
from .opls_typing import OPLSPropertyTable
from .constants import GROMACS_OPLS_TYPE_MAP, OPLS_ATOM_TYPES, OPLS_LJ_PARAMS
[docs]
class GROFileWriter:
"""Write GROMACS structure (.gro) files."""
[docs]
@staticmethod
def write(
filepath: str,
mol: Chem.Mol,
coords: np.ndarray,
molecule_name: str = "BC",
box_vectors: Optional[np.ndarray] = None,
title: Optional[str] = None,
):
"""
Write .gro structure file.
Args:
filepath: Output file path
mol: RDKit molecule
coords: 3D coordinates (in Ångströms, will be converted to nanometers)
molecule_name: Name of molecule in GRO file (max 5 chars for .gro format)
box_vectors: Box vectors (3x3 matrix) for periodic systems (in nm)
title: Optional title line
Note:
Residue names in GROMACS .gro format are limited to 5 characters.
Suggested naming conventions for mixed biochar simulations:
- Temperature-based: BC400, BC600, BC800
- Composition: BCH05 (H/C=0.5), BCO10 (O/C=0.10)
- Sequential: BC001, BC002, BC003
Coordinate Units:
- Input: RDKit coordinates (Ångströms, Å)
- Output: GROMACS coordinates (nanometers, nm)
- Conversion: 1 Å = 0.1 nm
"""
if title is None:
title = f"Biochar structure generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
num_atoms = mol.GetNumAtoms()
# Ensure residue name is max 5 characters (GROMACS requirement)
residue_name = molecule_name[:5]
if len(molecule_name) > 5:
print(f"Warning: Residue name '{molecule_name}' exceeds 5 character limit, truncating to '{residue_name}'")
# Convert coordinates from Ångströms (RDKit) to nanometers (GROMACS)
coords_nm = coords * 0.1
# Compute a valid bounding box and shift so the molecule sits inside it.
# A zero-size box (0.0 0.0 0.0) is invalid for VMD/PyMOL/GROMACS — they can't
# draw the periodic cell or run minimization. When no box is supplied, build
# an orthogonal box around the molecule with 1 nm padding and shift the
# coordinates so all atoms are at positive (in-box) positions.
if box_vectors is None:
mins = coords_nm.min(axis=0)
maxs = coords_nm.max(axis=0)
extent = maxs - mins
padding = 1.0 # nm on each side
box = np.maximum(extent + 2.0 * padding, 1.0)
shift = padding - mins # places mins at +padding inside the box
coords_nm = coords_nm + shift
box_line = f"{box[0]:.5f} {box[1]:.5f} {box[2]:.5f}\n"
else:
if box_vectors.shape == (3,):
box_line = (
f"{box_vectors[0]:.5f} {box_vectors[1]:.5f} "
f"{box_vectors[2]:.5f}\n"
)
else:
box_line = (
f"{box_vectors[0,0]:.5f} {box_vectors[1,1]:.5f} "
f"{box_vectors[2,2]:.5f} {box_vectors[0,1]:.5f} "
f"{box_vectors[0,2]:.5f} {box_vectors[1,0]:.5f} "
f"{box_vectors[1,2]:.5f} {box_vectors[2,0]:.5f} "
f"{box_vectors[2,1]:.5f}\n"
)
with open(filepath, "w") as f:
# Title line
f.write(f"{title}\n")
# Number of atoms
f.write(f"{num_atoms:5d}\n")
# Atom lines
for atom_idx, atom in enumerate(mol.GetAtoms()):
residue_num = 1
atom_name = f"{atom.GetSymbol()}{atom_idx:d}"[:5] # Truncate to 5 chars max
atom_num = atom_idx + 1
x, y, z = coords_nm[atom_idx]
# Format: residue number, residue name, atom name, atom number, x, y, z
# Columns: 1-5 (resnum), 6-10 (resname), 11-15 (atomname), 16-20 (atomnum)
# 21-28 (x), 29-36 (y), 37-44 (z)
# Note: GROMACS format requires residue_name LEFT-aligned, atom_name RIGHT-aligned
# GROMACS expects coordinates in nanometers (nm) with 3 decimal places
f.write(
f"{residue_num:5d}{residue_name:<5s}{atom_name:>5s}{atom_num:5d}"
f"{x:8.3f}{y:8.3f}{z:8.3f}\n"
)
# Box vectors line (nm). Always emit a valid non-zero box.
f.write(box_line)
[docs]
class TOPFileWriter:
"""Write GROMACS topology (.top) files."""
@staticmethod
def _forcefield_has_atom_types(forcefield_path: str) -> bool:
"""
Check if the forcefield file already defines atom types.
Args:
forcefield_path: Path to forcefield file (relative or absolute)
Returns:
True if [ atomtypes ] section found, False otherwise
"""
# Try different possible locations
possible_paths = [
Path(forcefield_path),
Path.cwd() / forcefield_path,
Path.cwd().parent / forcefield_path,
]
for fpath in possible_paths:
if fpath.exists():
try:
with open(fpath, "r") as f:
content = f.read()
if "[ atomtypes ]" in content or "[atomtypes]" in content:
return True
except Exception:
pass
# If we can't find/read the file, assume atom types might be there
# and don't include them (safer assumption)
return True
[docs]
@staticmethod
def write(
filepath: str,
mol: Chem.Mol,
atom_types: Dict[int, str],
charges: Dict[int, float],
molecule_name: str = "BIOCHAR",
forcefield_path: str = "oplsaa.ff/forcefield.itp",
include_dihedrals: bool = True,
):
"""
Write .top topology file.
Args:
filepath: Output file path
mol: RDKit molecule
atom_types: Dictionary of {atom_idx: opls_type}
charges: Dictionary of {atom_idx: charge}
molecule_name: Name of molecule/residue
forcefield_path: Path to forcefield file (relative or absolute)
include_dihedrals: If True, include dihedral section
"""
with open(filepath, "w") as f:
# Header
f.write("; Generated by Biochar Generator\n")
f.write(f"; Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
# Include forcefield (for bond/angle/dihedral parameters and atom types)
f.write(f"#include \"{forcefield_path}\"\n\n")
# Only define atom types if not already in forcefield
if not TOPFileWriter._forcefield_has_atom_types(forcefield_path):
f.write("[ atomtypes ]\n")
f.write("; name mass charge particle_type sigma epsilon\n")
written: set[str] = set()
for generic in sorted(OPLS_ATOM_TYPES.keys()):
gromacs_name = GROMACS_OPLS_TYPE_MAP.get(generic, generic)
if gromacs_name in written:
continue
written.add(gromacs_name)
desc, mass, default_charge = OPLS_ATOM_TYPES[generic]
sigma, epsilon = OPLS_LJ_PARAMS.get(generic, (0.0, 0.0))
f.write(
f"{gromacs_name:10s} {mass:7.3f} {default_charge:8.4f} A "
f"{sigma:8.5f} {epsilon:10.5f}\n"
)
f.write("\n")
# Moleculetype section
f.write("[ moleculetype ]\n")
f.write(f"{molecule_name:20s} 3\n\n")
# Atoms section
f.write("[ atoms ]\n")
f.write("; nr type resnr residue atom cgnr charge mass\n")
prop_table = OPLSPropertyTable(mol, atom_types, charges)
for prop in prop_table.get_properties():
atom_num = prop.atom_idx + 1
opls_type = GROMACS_OPLS_TYPE_MAP.get(prop.opls_type, prop.opls_type)
residue_num = 1
residue_name = molecule_name
atom_name = f"{prop.symbol}{prop.atom_idx}"
cgnr = 1
charge = prop.charge
mass = prop.mass
f.write(
f"{atom_num:5d} {opls_type:5s} {residue_num:5d} {residue_name:5s} "
f"{atom_name:5s} {cgnr:5d} {charge:10.6f} {mass:10.4f}\n"
)
# Bonds section
f.write("\n[ bonds ]\n")
f.write("; ai aj funct c0 c1\n")
for bond in mol.GetBonds():
i = bond.GetBeginAtomIdx() + 1
j = bond.GetEndAtomIdx() + 1
# GROMACS harmonic bond: funct=1
f.write(f"{i:5d} {j:5d} 1\n")
# Angles section
f.write("\n[ angles ]\n")
f.write("; ai aj ak funct c0 c1\n")
ring_info = mol.GetRingInfo()
ring_info.NumRings() # Force computation
# Extract angles from rings and bonds
angles_written = set()
for atom in mol.GetAtoms():
neighbors = [n.GetIdx() for n in atom.GetNeighbors()]
if len(neighbors) >= 2:
for i, n1 in enumerate(neighbors):
for n2 in neighbors[i + 1 :]:
angle_tuple = tuple(sorted([n1, atom.GetIdx(), n2]))
if angle_tuple not in angles_written:
f.write(f"{n1 + 1:5d} {atom.GetIdx() + 1:5d} {n2 + 1:5d} 1\n")
angles_written.add(angle_tuple)
# Dihedrals section (if requested)
if include_dihedrals:
f.write("\n[ dihedrals ]\n")
f.write("; ai aj ak al funct\n")
# Simple approach: write dihedrals for all 4-atom chains
dihedrals_written = set()
for atom in mol.GetAtoms():
for neighbor in atom.GetNeighbors():
for third in neighbor.GetNeighbors():
if third.GetIdx() == atom.GetIdx():
continue
for fourth in third.GetNeighbors():
if fourth.GetIdx() == neighbor.GetIdx():
continue
dihedral = (
atom.GetIdx(),
neighbor.GetIdx(),
third.GetIdx(),
fourth.GetIdx(),
)
# Only write unique dihedrals
dih_sorted = tuple(sorted(dihedral))
if dih_sorted not in dihedrals_written:
f.write(
f"{dihedral[0] + 1:5d} {dihedral[1] + 1:5d} "
f"{dihedral[2] + 1:5d} {dihedral[3] + 1:5d} 3\n"
)
dihedrals_written.add(dih_sorted)
# System section
f.write("\n[ system ]\n")
f.write(f"{molecule_name}\n\n")
# Molecules section
f.write("[ molecules ]\n")
f.write(f"{molecule_name} 1\n")
[docs]
class ITPFileWriter:
"""Write GROMACS include topology (.itp) files.
Note: .itp files are typically included in a .top file that defines atom types.
We don't include [ atomtypes ] here to avoid duplication.
"""
[docs]
@staticmethod
def write(
filepath: str,
mol: Chem.Mol,
atom_types: Dict[int, str],
charges: Dict[int, float],
molecule_name: str = "BIOCHAR",
include_dihedrals: bool = True,
):
"""
Write .itp molecule definition file.
Args:
filepath: Output file path
mol: RDKit molecule
atom_types: Dictionary of {atom_idx: opls_type}
charges: Dictionary of {atom_idx: charge}
molecule_name: Name of molecule
include_dihedrals: If True, include dihedral section
Note:
The .itp file includes atom type definitions for standalone use.
When included in a .top file, the atom types will be shared.
"""
with open(filepath, "w") as f:
# Header
f.write("; Molecular definition for biochar\n")
f.write(f"; Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write("; Include this file in a .top file that defines atom types\n\n")
# Note: We don't include [ atomtypes ] here. The .itp file is meant to be
# included in a .top file that already defines all atom types via #include.
# Moleculetype section
f.write("[ moleculetype ]\n")
f.write(f"{molecule_name:20s} 3\n\n")
# Atoms section
f.write("[ atoms ]\n")
f.write("; nr type resnr residue atom cgnr charge mass\n")
prop_table = OPLSPropertyTable(mol, atom_types, charges)
for prop in prop_table.get_properties():
atom_num = prop.atom_idx + 1
opls_type = GROMACS_OPLS_TYPE_MAP.get(prop.opls_type, prop.opls_type)
residue_num = 1
residue_name = molecule_name
atom_name = f"{prop.symbol}{prop.atom_idx}"
cgnr = 1
charge = prop.charge
mass = prop.mass
f.write(
f"{atom_num:5d} {opls_type:5s} {residue_num:5d} {residue_name:5s} "
f"{atom_name:5s} {cgnr:5d} {charge:10.6f} {mass:10.4f}\n"
)
# Bonds section
f.write("\n[ bonds ]\n")
f.write("; ai aj funct c0 c1\n")
for bond in mol.GetBonds():
i = bond.GetBeginAtomIdx() + 1
j = bond.GetEndAtomIdx() + 1
f.write(f"{i:5d} {j:5d} 1\n")
# Angles section
f.write("\n[ angles ]\n")
f.write("; ai aj ak funct c0 c1\n")
angles_written = set()
for atom in mol.GetAtoms():
neighbors = [n.GetIdx() for n in atom.GetNeighbors()]
if len(neighbors) >= 2:
for i, n1 in enumerate(neighbors):
for n2 in neighbors[i + 1 :]:
angle_tuple = tuple(sorted([n1, atom.GetIdx(), n2]))
if angle_tuple not in angles_written:
f.write(f"{n1 + 1:5d} {atom.GetIdx() + 1:5d} {n2 + 1:5d} 1\n")
angles_written.add(angle_tuple)
if include_dihedrals:
f.write("\n[ dihedrals ]\n")
f.write("; ai aj ak al funct\n")
dihedrals_written = set()
for atom in mol.GetAtoms():
for neighbor in atom.GetNeighbors():
for third in neighbor.GetNeighbors():
if third.GetIdx() == atom.GetIdx():
continue
for fourth in third.GetNeighbors():
if fourth.GetIdx() == neighbor.GetIdx():
continue
dihedral = (
atom.GetIdx(),
neighbor.GetIdx(),
third.GetIdx(),
fourth.GetIdx(),
)
dih_sorted = tuple(sorted(dihedral))
if dih_sorted not in dihedrals_written:
f.write(
f"{dihedral[0] + 1:5d} {dihedral[1] + 1:5d} "
f"{dihedral[2] + 1:5d} {dihedral[3] + 1:5d} 3\n"
)
dihedrals_written.add(dih_sorted)
[docs]
class GromacsExporter:
"""High-level interface for exporting to GROMACS files."""
def __init__(self, output_directory: str = "."):
self.output_dir = Path(output_directory)
self.output_dir.mkdir(parents=True, exist_ok=True)
[docs]
def export(
self,
mol: Chem.Mol,
coords: np.ndarray,
atom_types: Dict[int, str],
charges: Dict[int, float],
molecule_name: str = "BC",
basename: str = "structure",
include_periodic_box: bool = False,
box_size: Optional[np.ndarray] = None,
) -> Tuple[Path, Path, Path]:
"""
Export molecule to GROMACS files.
Args:
mol: RDKit molecule
coords: 3D coordinates
atom_types: Atom type mapping
charges: Charge mapping
molecule_name: Molecule name
basename: Base filename (without extension)
include_periodic_box: If True, create periodic box
box_size: Box dimensions [Lx, Ly, Lz]
Returns:
(gro_path, top_path, itp_path)
"""
# Determine box vectors
box_vectors = None
if include_periodic_box:
if box_size is None:
# Auto-determine box size from coordinates
box_size = self._calculate_box_size(coords)
box_vectors = np.diag(box_size)
# Write files
gro_path = self.output_dir / f"{basename}.gro"
GROFileWriter.write(
str(gro_path),
mol,
coords,
molecule_name=molecule_name,
box_vectors=box_vectors,
)
top_path = self.output_dir / f"{basename}.top"
TOPFileWriter.write(
str(top_path),
mol,
atom_types,
charges,
molecule_name=molecule_name,
)
itp_path = self.output_dir / f"{basename}.itp"
ITPFileWriter.write(
str(itp_path),
mol,
atom_types,
charges,
molecule_name=molecule_name,
)
return gro_path, top_path, itp_path
@staticmethod
def _calculate_box_size(coords: np.ndarray, padding: float = 1.0) -> np.ndarray:
"""
Calculate appropriate box size based on coordinates.
Args:
coords: 3D coordinates (expected in Ångströms from RDKit)
padding: Padding around molecule (nm)
Returns:
Box size [Lx, Ly, Lz] in nanometers (nm)
"""
# Coordinates are in Ångströms, convert to nanometers
coords_nm = coords * 0.1
min_coords = coords_nm.min(axis=0)
max_coords = coords_nm.max(axis=0)
extent = max_coords - min_coords
box_size = extent + 2 * padding
return box_size
[docs]
class MultiSheetGROWriter:
"""Write a multi-residue .gro file for surface systems.
Each sheet occupies a separate residue with an incrementing residue
number. Atom numbering is global and contiguous across all sheets.
"""
[docs]
@staticmethod
def write(
filepath: str,
sheets: list, # List[SheetResult] — avoid circular import
box_vectors: np.ndarray, # 1-D (3,) in nm, or 3×3 in nm
title: Optional[str] = None,
):
"""
Write a single .gro containing atoms from multiple sheets.
Args:
filepath: Output path.
sheets: List of SheetResult objects. Each must expose
``.mol`` (RDKit Mol), ``.coords`` (Å, shape N×3),
and ``.molecule_name`` (str ≤ 5 chars).
box_vectors: Periodic box vectors in nm (orthogonal 1-D or
full 3×3 triclinic).
title: Optional title line.
"""
if title is None:
title = (
f"Multi-sheet surface — "
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
)
total_atoms = sum(s.mol.GetNumAtoms() for s in sheets)
with open(filepath, "w") as f:
f.write(f"{title}\n")
f.write(f"{total_atoms:5d}\n")
global_atom_num = 0
for residue_num, sheet in enumerate(sheets, start=1):
residue_name = sheet.molecule_name[:5]
for atom_idx, atom in enumerate(sheet.mol.GetAtoms()):
global_atom_num += 1
atom_name = f"{atom.GetSymbol()}{atom_idx}"[:5]
x_nm, y_nm, z_nm = sheet.coords[atom_idx] * 0.1 # Å → nm
# Format: residue_name LEFT-aligned, atom_name RIGHT-aligned
f.write(
f"{residue_num % 100000:5d}"
f"{residue_name:<5s}"
f"{atom_name:>5s}"
f"{global_atom_num % 100000:5d}"
f"{x_nm:8.3f}{y_nm:8.3f}{z_nm:8.3f}\n"
)
# Box vectors line
bv = np.asarray(box_vectors)
if bv.ndim == 1 and bv.shape[0] == 3:
f.write(
f"{bv[0]:.5f} {bv[1]:.5f} {bv[2]:.5f}\n"
)
else:
# Full 3×3 triclinic in GROMACS 9-value order:
# v1x v2y v3z v1y v1z v2x v2z v3x v3y
f.write(
f"{bv[0,0]:.5f} {bv[1,1]:.5f} {bv[2,2]:.5f} "
f"{bv[0,1]:.5f} {bv[0,2]:.5f} {bv[1,0]:.5f} "
f"{bv[1,2]:.5f} {bv[2,0]:.5f} {bv[2,1]:.5f}\n"
)
[docs]
class SurfaceTopologyWriter:
"""Write .top files for multi-sheet surface systems."""
[docs]
@staticmethod
def write(
filepath: str,
sheets: list, # List[SheetResult]
itp_paths: list, # List[Path]
sheets_identical: bool,
system_name: str = "Slit Pore Surface",
forcefield_path: str = "oplsaa.ff/forcefield.itp",
):
"""
Write a combined .top for a surface system.
When *sheets_identical* is ``True``:
- One ``#include`` for the single .itp.
- ``[ molecules ]`` lists the molecule name once with count = N.
When *sheets_identical* is ``False``:
- One ``#include`` per distinct .itp.
- ``[ molecules ]`` lists each molecule name once with count = 1.
"""
with open(filepath, "w") as f:
f.write("; Surface topology — GROMACS simulation\n")
f.write("; Auto-generated by Biochar Surface Builder\n\n")
f.write(f'#include "{forcefield_path}"\n\n')
if sheets_identical:
f.write(f'#include "{Path(itp_paths[0]).name}"\n\n')
f.write("[ system ]\n")
f.write(f"{system_name}\n\n")
f.write("[ molecules ]\n")
f.write("; Name #molecules\n")
f.write(
f"{sheets[0].molecule_name:<20s} {len(sheets)}\n"
)
else:
for itp_path in itp_paths:
f.write(f'#include "{Path(itp_path).name}"\n')
f.write("\n[ system ]\n")
f.write(f"{system_name}\n\n")
f.write("[ molecules ]\n")
f.write("; Name #molecules\n")
for sheet in sheets:
f.write(f"{sheet.molecule_name:<20s} 1\n")