Source code for wetting_angle_kit.parser.parser_xyz

from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence, Tuple

import numpy as np

from .base_parser import BaseParser



[docs]
class XYZParser(BaseParser):
    """XYZ trajectory parser.

    Parameters
    ----------
    filepath : str
        Path to extended XYZ trajectory containing per-frame atom count, comment line
        with lattice vectors, then atom symbol + coordinates.
    """

    def __init__(self, filepath: str):
        self.filepath = filepath
        self.frames = self.load_xyz_file()


[docs]
    def load_xyz_file(self) -> List[Dict[str, Any]]:
        """Load all frames from the XYZ file into memory.

        Returns
        -------
        list[dict]
            Each entry has keys: ``symbols``, ``positions``, ``lattice_matrix``.
        """
        frames = []
        with open(self.filepath, "r") as file:
            lines = file.readlines()
        frame_start = 0
        while frame_start < len(lines):
            num_atoms = int(lines[frame_start].strip())
            frame_start += 1
            comment_line = lines[frame_start].strip()
            lattice_info = comment_line.split('Lattice="')[1].split('"')[0]
            lattice_vectors = np.array(lattice_info.split(), dtype=float)
            lattice_matrix = lattice_vectors.reshape(3, 3)
            frame_start += 1
            symbols = []
            positions = []
            for i in range(num_atoms):
                parts = lines[frame_start + i].strip().split()
                symbols.append(parts[0])
                positions.append([float(coord) for coord in parts[1:4]])
            frames.append(
                {
                    "symbols": np.array(symbols),
                    "positions": np.array(positions),
                    "lattice_matrix": lattice_matrix,
                }
            )
            frame_start += num_atoms
        return frames



[docs]
    def parse(self, frame_index: int, indices: np.ndarray | None = None) -> np.ndarray:
        """Return Cartesian coordinates for selected atoms in a frame.

        Parameters
        ----------
        frame_index : int
            Frame index.
        indices : sequence[int], optional
            Atom indices to select; if None all atoms are returned.

        Returns
        -------
        ndarray, shape (M, 3)
            Coordinates of requested atoms.
        """
        frame = self.frames[frame_index]
        if indices is not None and len(indices) > 0:
            indices = np.array(indices, dtype=int)
            return frame["positions"][indices]
        return frame["positions"]



[docs]
    def parse_liquid_particles(
        self, liquid_particle_types: List[str], frame_index: int
    ) -> np.ndarray:
        """Return positions of liquid particles (filter by symbols).

        Parameters
        ----------
        liquid_particle_types : sequence[str]
            Atom symbols considered liquid (e.g. water molecule constituents).
        frame_index : int
            Frame index.

        Returns
        -------
        ndarray, shape (L, 3)
            Cartesian coordinates of liquid atoms.
        """
        frame = self.frames[frame_index]
        mask = np.isin(frame["symbols"], liquid_particle_types)
        return frame["positions"][mask]



[docs]
    def get_profile_coordinates(
        self,
        frame_indices: Sequence[int],
        droplet_geometry: str = "cylinder_y",
        atom_indices: Optional[Sequence[int]] = None,
    ) -> Tuple[np.ndarray, np.ndarray, int]:
        """
        Compute 2D projection coordinates (r, z) for contact angle analysis.

        Projects 3D atomic positions onto a 2D plane based on the assumed
        droplet geometry and simulation box boundaries.

        Parameters
        ----------
        frame_indices : Sequence[int]
            List of frames to process.
        droplet_geometry : str, default 'cylinder_y'
            The physical shape of the water droplet in the simulation box:
            * 'cylinder_y': A hemi-cylindrical droplet aligned along the Y-axis.
               (Returns x as the radial coordinate).
            * 'cylinder_x': A hemi-cylindrical droplet aligned along the X-axis.
               (Returns y as the radial coordinate).
            * 'spherical': A spherical cap droplet.
               (Returns sqrt(x^2 + y^2) as the radial coordinate).
        atom_indices : Sequence[int], optional
            Subset of atom indices to include (e.g., only liquid atoms).

        Returns
        -------
        r_values : np.ndarray
            The lateral/radial distances from the droplet center/axis.
        z_values : np.ndarray
            The vertical coordinates (height) of the atoms.
        n_frames : int
            Number of frames processed.
        """
        r_values = np.array([])
        z_values = np.array([])
        for frame_idx in frame_indices:
            frame = self.frames[frame_idx]
            x_par = frame["positions"]
            if atom_indices is not None:
                atom_indices_arr = np.array(atom_indices)
                x_par = x_par[atom_indices_arr]
            x_cm = np.mean(x_par, axis=0)
            x_0 = x_par - x_cm
            x_0[:, 2] = x_par[:, 2]
            if droplet_geometry == "cylinder_y":
                r_frame = np.abs(x_0[:, 0] + 0.01)
            elif droplet_geometry == "cylinder_x":
                r_frame = np.abs(x_0[:, 1] + 0.01)
            else:  # spherical
                r_frame = np.sqrt(x_0[:, 0] ** 2 + x_0[:, 1] ** 2)
            z_frame = x_0[:, 2]
            r_values = np.concatenate((r_values, r_frame))
            z_values = np.concatenate((z_values, z_frame))
            if frame_idx % 10 == 0:
                print(f"Frame: {frame_idx}\nCenter of Mass: {x_cm}")
        print(f"\nr range:\t({np.min(r_values)},{np.max(r_values)})")
        print(f"z range:\t({np.min(z_values)},{np.max(z_values)})")
        return r_values, z_values, len(frame_indices)



[docs]
    def box_length_max(self, frame_index: int) -> float:
        """Return the maximum lattice vector length for a frame.

        Parameters
        ----------
        frame_index : int
            Frame index.

        Returns
        -------
        float
            Max ``|a_i|`` over lattice vectors.
        """
        lattice_matrix = self.frames[frame_index]["lattice_matrix"]
        return float(np.max(np.linalg.norm(lattice_matrix, axis=1)))



[docs]
    def box_size_x(self, frame_index: int) -> float:
        """Return the box length along x (a1x component).

        Parameters
        ----------
        frame_index : int
            Frame index.

        Returns
        -------
        float
            Box x-length.
        """
        lattice_matrix = self.frames[frame_index]["lattice_matrix"]
        return float(lattice_matrix[0, 0])



[docs]
    def box_size_y(self, frame_index: int) -> float:
        """Return the box length along y (a2y component).

        Parameters
        ----------
        frame_index : int
            Frame index.

        Returns
        -------
        float
            Box y-length.
        """
        lattice_matrix = self.frames[frame_index]["lattice_matrix"]
        return float(lattice_matrix[1, 1])



[docs]
    def frame_count(self):
        """Return total number of frames loaded."""
        return len(self.frames)





[docs]
class XYZWaterMoleculeFinder(BaseParser):
    """Parser specialized for identifying water oxygen atoms in XYZ trajectories.

    Parameters
    ----------
    filepath : str
        Path to XYZ file.
    particle_type_wall : sequence[str]
        Symbols that represent wall (excluded) particles.
    oxygen_type : str, default "O"
        Oxygen atom symbol.
    hydrogen_type : str, default "H"
        Hydrogen atom symbol.
    oh_cutoff : float, default 1.2
        Distance cutoff (Å) for O-H bonding to identify water molecules.
    """

    def __init__(
        self,
        filepath,
        particle_type_wall,
        oxygen_type="O",
        hydrogen_type="H",
        oh_cutoff: float = 1.2,
    ):
        self.filepath = filepath
        self.particle_type_wall = particle_type_wall
        self.oxygen_type = oxygen_type
        self.hydrogen_type = hydrogen_type
        self.oh_cutoff = oh_cutoff
        self.frames = self.load_xyz_file()


[docs]
    def load_xyz_file(self):
        """Load frames (without lattice) for water oxygen analysis."""
        frames = []
        with open(self.filepath, "r") as file:
            lines = file.readlines()
        frame_start = 0
        while frame_start < len(lines):
            num_atoms = int(lines[frame_start].strip())
            frame_start += 1
            frame_start += 1  # skip comment
            symbols = []
            positions = []
            for i in range(num_atoms):
                parts = lines[frame_start + i].strip().split()
                symbols.append(parts[0])
                positions.append([float(coord) for coord in parts[1:4]])
            frames.append(
                {"symbols": np.array(symbols), "positions": np.array(positions)}
            )
            frame_start += num_atoms
        return frames



[docs]
    def parse(self, liquid_particle_types: List[str], frame_index: int) -> np.ndarray:
        """Return liquid particle coordinates filtering wall types.

        Parameters
        ----------
        liquid_particle_types : sequence[str]
            Symbols for liquid particles.
        frame_index : int
            Frame index.

        Returns
        -------
        ndarray, shape (L, 3)
            Liquid atom positions.
        """
        frame = self.frames[frame_index]
        mask = np.isin(frame["symbols"], liquid_particle_types)
        return frame["positions"][mask]



[docs]
    def get_profile_coordinates(
        self,
        frame_indices: Sequence[int],
        droplet_geometry: str = "cylinder_y",
        atom_indices: Optional[Sequence[int]] = None,
    ) -> Tuple[np.ndarray, np.ndarray, int]:
        """
        Compute 2D projection coordinates (r, z) for contact angle analysis.

        Projects 3D atomic positions onto a 2D plane based on the assumed
        droplet geometry and simulation box boundaries.

        Parameters
        ----------
        frame_indices : Sequence[int]
            List of frames to process.
        droplet_geometry : str, default 'cylinder_y'
            The physical shape of the water droplet in the simulation box:
            * 'cylinder_y': A hemi-cylindrical droplet aligned along the Y-axis.
               (Returns x as the radial coordinate).
            * 'cylinder_x': A hemi-cylindrical droplet aligned along the X-axis.
               (Returns y as the radial coordinate).
            * 'spherical': A spherical cap droplet.
               (Returns sqrt(x^2 + y^2) as the radial coordinate).
        atom_indices : Sequence[int], optional
            Subset of atom indices to include (e.g., only liquid atoms).

        Returns
        -------
        r_values : np.ndarray
            The lateral/radial distances from the droplet center/axis.
        z_values : np.ndarray
            The vertical coordinates (height) of the atoms.
        n_frames : int
            Number of frames processed.
        """
        r_values = np.array([])
        z_values = np.array([])
        for frame_idx in frame_indices:
            frame = self.frames[frame_idx]
            x_par = frame["positions"]
            if atom_indices is not None:
                atom_indices_arr = np.array(atom_indices)
                x_par = x_par[atom_indices_arr]
            x_cm = np.mean(x_par, axis=0)
            x_0 = x_par - x_cm
            x_0[:, 2] = x_par[:, 2]
            if droplet_geometry == "cylinder_y":
                r_frame = np.abs(x_0[:, 0] + 0.01)
            elif droplet_geometry == "cylinder_x":
                r_frame = np.abs(x_0[:, 1] + 0.01)
            else:
                r_frame = np.sqrt(x_0[:, 0] ** 2 + x_0[:, 1] ** 2)
            z_frame = x_0[:, 2]
            r_values = np.concatenate((r_values, r_frame))
            z_values = np.concatenate((z_values, z_frame))
            if frame_idx % 10 == 0:
                print(f"Frame: {frame_idx}\nCenter of Mass: {x_cm}")
        print(f"\nr range:\t({np.min(r_values)},{np.max(r_values)})")
        print(f"z range:\t({np.min(z_values)},{np.max(z_values)})")
        return r_values, z_values, len(frame_indices)



[docs]
    def box_length_max(self, frame_index: int) -> float:
        """Return the maximum lattice vector length for a frame.

        Parameters
        ----------
        frame_index : int
            Frame index.

        Returns
        -------
        float
            Max ``|a_i|`` over lattice vectors.
        """
        lattice_matrix = self.frames[frame_index]["lattice_matrix"]
        return float(np.max(np.linalg.norm(lattice_matrix, axis=1)))



[docs]
    def get_water_oxygen_indices(self, frame_index):
        """Return indices of oxygen atoms belonging to water molecules.

        Parameters
        ----------
        frame_index : int
            Frame index.

        Returns
        -------
        ndarray
            Indices of oxygen atoms with exactly two hydrogens within cutoff.
        """
        data = self.frames[frame_index]
        positions = data["positions"]
        symbols = data["symbols"]
        oxygen_indices = np.where(symbols == self.oxygen_type)[0]
        hydrogen_indices = np.where(symbols == self.hydrogen_type)[0]
        return self._manual_water_identification(
            positions, oxygen_indices, hydrogen_indices
        )



[docs]
    def get_water_oxygen_positions(self, frame_index):
        """Return coordinates of water oxygen atoms for a frame.

        Parameters
        ----------
        frame_index : int
            Frame index.

        Returns
        -------
        ndarray, shape (N, 3)
            Oxygen atom positions; empty array if none detected.
        """
        positions = self.frames[frame_index]["positions"]
        indices = self.get_water_oxygen_indices(frame_index)
        if len(indices) == 0:
            return np.empty((0, 3))
        return positions[indices]


    def _manual_water_identification(self, positions, oxygen_indices, hydrogen_indices):
        """Identify water oxygens by counting hydrogens within cutoff distance.

        Parameters
        ----------
        positions : ndarray, shape (N, 3)
            Atomic positions.
        oxygen_indices : ndarray
            Candidate oxygen indices.
        hydrogen_indices : ndarray
            Hydrogen indices to check.

        Returns
        -------
        ndarray
            Oxygen indices with exactly two nearby hydrogens.
        """
        water_oxygens = []
        for o_idx in oxygen_indices:
            o_pos = positions[o_idx]
            h_count = 0
            for h_idx in hydrogen_indices:
                h_pos = positions[h_idx]
                if np.linalg.norm(o_pos - h_pos) <= self.oh_cutoff:
                    h_count += 1
            if h_count == 2:
                water_oxygens.append(o_idx)
        return np.array(water_oxygens)