Source code for wetting_angle_kit.parser.parser_xyz

from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence, Tuple

import numpy as np

from .base_parser import BaseParser


[docs] class XYZParser(BaseParser): """XYZ trajectory parser. Parameters ---------- filepath : str Path to extended XYZ trajectory containing per-frame atom count, comment line with lattice vectors, then atom symbol + coordinates. """ def __init__(self, filepath: str): self.filepath = filepath self.frames = self.load_xyz_file()
[docs] def load_xyz_file(self) -> List[Dict[str, Any]]: """Load all frames from the XYZ file into memory. Returns ------- list[dict] Each entry has keys: ``symbols``, ``positions``, ``lattice_matrix``. """ frames = [] with open(self.filepath, "r") as file: lines = file.readlines() frame_start = 0 while frame_start < len(lines): num_atoms = int(lines[frame_start].strip()) frame_start += 1 comment_line = lines[frame_start].strip() lattice_info = comment_line.split('Lattice="')[1].split('"')[0] lattice_vectors = np.array(lattice_info.split(), dtype=float) lattice_matrix = lattice_vectors.reshape(3, 3) frame_start += 1 symbols = [] positions = [] for i in range(num_atoms): parts = lines[frame_start + i].strip().split() symbols.append(parts[0]) positions.append([float(coord) for coord in parts[1:4]]) frames.append( { "symbols": np.array(symbols), "positions": np.array(positions), "lattice_matrix": lattice_matrix, } ) frame_start += num_atoms return frames
[docs] def parse(self, frame_index: int, indices: np.ndarray | None = None) -> np.ndarray: """Return Cartesian coordinates for selected atoms in a frame. Parameters ---------- frame_index : int Frame index. indices : sequence[int], optional Atom indices to select; if None all atoms are returned. Returns ------- ndarray, shape (M, 3) Coordinates of requested atoms. """ frame = self.frames[frame_index] if indices is not None and len(indices) > 0: indices = np.array(indices, dtype=int) return frame["positions"][indices] return frame["positions"]
[docs] def parse_liquid_particles( self, liquid_particle_types: List[str], frame_index: int ) -> np.ndarray: """Return positions of liquid particles (filter by symbols). Parameters ---------- liquid_particle_types : sequence[str] Atom symbols considered liquid (e.g. water molecule constituents). frame_index : int Frame index. Returns ------- ndarray, shape (L, 3) Cartesian coordinates of liquid atoms. """ frame = self.frames[frame_index] mask = np.isin(frame["symbols"], liquid_particle_types) return frame["positions"][mask]
[docs] def get_profile_coordinates( self, frame_indices: Sequence[int], droplet_geometry: str = "cylinder_y", atom_indices: Optional[Sequence[int]] = None, ) -> Tuple[np.ndarray, np.ndarray, int]: """ Compute 2D projection coordinates (r, z) for contact angle analysis. Projects 3D atomic positions onto a 2D plane based on the assumed droplet geometry and simulation box boundaries. Parameters ---------- frame_indices : Sequence[int] List of frames to process. droplet_geometry : str, default 'cylinder_y' The physical shape of the water droplet in the simulation box: * 'cylinder_y': A hemi-cylindrical droplet aligned along the Y-axis. (Returns x as the radial coordinate). * 'cylinder_x': A hemi-cylindrical droplet aligned along the X-axis. (Returns y as the radial coordinate). * 'spherical': A spherical cap droplet. (Returns sqrt(x^2 + y^2) as the radial coordinate). atom_indices : Sequence[int], optional Subset of atom indices to include (e.g., only liquid atoms). Returns ------- r_values : np.ndarray The lateral/radial distances from the droplet center/axis. z_values : np.ndarray The vertical coordinates (height) of the atoms. n_frames : int Number of frames processed. """ r_values = np.array([]) z_values = np.array([]) for frame_idx in frame_indices: frame = self.frames[frame_idx] x_par = frame["positions"] if atom_indices is not None: atom_indices_arr = np.array(atom_indices) x_par = x_par[atom_indices_arr] x_cm = np.mean(x_par, axis=0) x_0 = x_par - x_cm x_0[:, 2] = x_par[:, 2] if droplet_geometry == "cylinder_y": r_frame = np.abs(x_0[:, 0] + 0.01) elif droplet_geometry == "cylinder_x": r_frame = np.abs(x_0[:, 1] + 0.01) else: # spherical r_frame = np.sqrt(x_0[:, 0] ** 2 + x_0[:, 1] ** 2) z_frame = x_0[:, 2] r_values = np.concatenate((r_values, r_frame)) z_values = np.concatenate((z_values, z_frame)) if frame_idx % 10 == 0: print(f"Frame: {frame_idx}\nCenter of Mass: {x_cm}") print(f"\nr range:\t({np.min(r_values)},{np.max(r_values)})") print(f"z range:\t({np.min(z_values)},{np.max(z_values)})") return r_values, z_values, len(frame_indices)
[docs] def box_length_max(self, frame_index: int) -> float: """Return the maximum lattice vector length for a frame. Parameters ---------- frame_index : int Frame index. Returns ------- float Max ``|a_i|`` over lattice vectors. """ lattice_matrix = self.frames[frame_index]["lattice_matrix"] return float(np.max(np.linalg.norm(lattice_matrix, axis=1)))
[docs] def box_size_x(self, frame_index: int) -> float: """Return the box length along x (a1x component). Parameters ---------- frame_index : int Frame index. Returns ------- float Box x-length. """ lattice_matrix = self.frames[frame_index]["lattice_matrix"] return float(lattice_matrix[0, 0])
[docs] def box_size_y(self, frame_index: int) -> float: """Return the box length along y (a2y component). Parameters ---------- frame_index : int Frame index. Returns ------- float Box y-length. """ lattice_matrix = self.frames[frame_index]["lattice_matrix"] return float(lattice_matrix[1, 1])
[docs] def frame_count(self): """Return total number of frames loaded.""" return len(self.frames)
[docs] class XYZWaterMoleculeFinder(BaseParser): """Parser specialized for identifying water oxygen atoms in XYZ trajectories. Parameters ---------- filepath : str Path to XYZ file. particle_type_wall : sequence[str] Symbols that represent wall (excluded) particles. oxygen_type : str, default "O" Oxygen atom symbol. hydrogen_type : str, default "H" Hydrogen atom symbol. oh_cutoff : float, default 1.2 Distance cutoff (Å) for O-H bonding to identify water molecules. """ def __init__( self, filepath, particle_type_wall, oxygen_type="O", hydrogen_type="H", oh_cutoff: float = 1.2, ): self.filepath = filepath self.particle_type_wall = particle_type_wall self.oxygen_type = oxygen_type self.hydrogen_type = hydrogen_type self.oh_cutoff = oh_cutoff self.frames = self.load_xyz_file()
[docs] def load_xyz_file(self): """Load frames (without lattice) for water oxygen analysis.""" frames = [] with open(self.filepath, "r") as file: lines = file.readlines() frame_start = 0 while frame_start < len(lines): num_atoms = int(lines[frame_start].strip()) frame_start += 1 frame_start += 1 # skip comment symbols = [] positions = [] for i in range(num_atoms): parts = lines[frame_start + i].strip().split() symbols.append(parts[0]) positions.append([float(coord) for coord in parts[1:4]]) frames.append( {"symbols": np.array(symbols), "positions": np.array(positions)} ) frame_start += num_atoms return frames
[docs] def parse(self, liquid_particle_types: List[str], frame_index: int) -> np.ndarray: """Return liquid particle coordinates filtering wall types. Parameters ---------- liquid_particle_types : sequence[str] Symbols for liquid particles. frame_index : int Frame index. Returns ------- ndarray, shape (L, 3) Liquid atom positions. """ frame = self.frames[frame_index] mask = np.isin(frame["symbols"], liquid_particle_types) return frame["positions"][mask]
[docs] def get_profile_coordinates( self, frame_indices: Sequence[int], droplet_geometry: str = "cylinder_y", atom_indices: Optional[Sequence[int]] = None, ) -> Tuple[np.ndarray, np.ndarray, int]: """ Compute 2D projection coordinates (r, z) for contact angle analysis. Projects 3D atomic positions onto a 2D plane based on the assumed droplet geometry and simulation box boundaries. Parameters ---------- frame_indices : Sequence[int] List of frames to process. droplet_geometry : str, default 'cylinder_y' The physical shape of the water droplet in the simulation box: * 'cylinder_y': A hemi-cylindrical droplet aligned along the Y-axis. (Returns x as the radial coordinate). * 'cylinder_x': A hemi-cylindrical droplet aligned along the X-axis. (Returns y as the radial coordinate). * 'spherical': A spherical cap droplet. (Returns sqrt(x^2 + y^2) as the radial coordinate). atom_indices : Sequence[int], optional Subset of atom indices to include (e.g., only liquid atoms). Returns ------- r_values : np.ndarray The lateral/radial distances from the droplet center/axis. z_values : np.ndarray The vertical coordinates (height) of the atoms. n_frames : int Number of frames processed. """ r_values = np.array([]) z_values = np.array([]) for frame_idx in frame_indices: frame = self.frames[frame_idx] x_par = frame["positions"] if atom_indices is not None: atom_indices_arr = np.array(atom_indices) x_par = x_par[atom_indices_arr] x_cm = np.mean(x_par, axis=0) x_0 = x_par - x_cm x_0[:, 2] = x_par[:, 2] if droplet_geometry == "cylinder_y": r_frame = np.abs(x_0[:, 0] + 0.01) elif droplet_geometry == "cylinder_x": r_frame = np.abs(x_0[:, 1] + 0.01) else: r_frame = np.sqrt(x_0[:, 0] ** 2 + x_0[:, 1] ** 2) z_frame = x_0[:, 2] r_values = np.concatenate((r_values, r_frame)) z_values = np.concatenate((z_values, z_frame)) if frame_idx % 10 == 0: print(f"Frame: {frame_idx}\nCenter of Mass: {x_cm}") print(f"\nr range:\t({np.min(r_values)},{np.max(r_values)})") print(f"z range:\t({np.min(z_values)},{np.max(z_values)})") return r_values, z_values, len(frame_indices)
[docs] def box_length_max(self, frame_index: int) -> float: """Return the maximum lattice vector length for a frame. Parameters ---------- frame_index : int Frame index. Returns ------- float Max ``|a_i|`` over lattice vectors. """ lattice_matrix = self.frames[frame_index]["lattice_matrix"] return float(np.max(np.linalg.norm(lattice_matrix, axis=1)))
[docs] def get_water_oxygen_indices(self, frame_index): """Return indices of oxygen atoms belonging to water molecules. Parameters ---------- frame_index : int Frame index. Returns ------- ndarray Indices of oxygen atoms with exactly two hydrogens within cutoff. """ data = self.frames[frame_index] positions = data["positions"] symbols = data["symbols"] oxygen_indices = np.where(symbols == self.oxygen_type)[0] hydrogen_indices = np.where(symbols == self.hydrogen_type)[0] return self._manual_water_identification( positions, oxygen_indices, hydrogen_indices )
[docs] def get_water_oxygen_positions(self, frame_index): """Return coordinates of water oxygen atoms for a frame. Parameters ---------- frame_index : int Frame index. Returns ------- ndarray, shape (N, 3) Oxygen atom positions; empty array if none detected. """ positions = self.frames[frame_index]["positions"] indices = self.get_water_oxygen_indices(frame_index) if len(indices) == 0: return np.empty((0, 3)) return positions[indices]
def _manual_water_identification(self, positions, oxygen_indices, hydrogen_indices): """Identify water oxygens by counting hydrogens within cutoff distance. Parameters ---------- positions : ndarray, shape (N, 3) Atomic positions. oxygen_indices : ndarray Candidate oxygen indices. hydrogen_indices : ndarray Hydrogen indices to check. Returns ------- ndarray Oxygen indices with exactly two nearby hydrogens. """ water_oxygens = [] for o_idx in oxygen_indices: o_pos = positions[o_idx] h_count = 0 for h_idx in hydrogen_indices: h_pos = positions[h_idx] if np.linalg.norm(o_pos - h_pos) <= self.oh_cutoff: h_count += 1 if h_count == 2: water_oxygens.append(o_idx) return np.array(water_oxygens)