Source code for turbomoleio.output.states

# -*- coding: utf-8 -*-
# The turbomoleio package, a python interface to Turbomole
# for preparing inputs, parsing outputs and other related tools.
#
# Copyright (C) 2018-2022 BASF SE, Matgenix SRL.
#
# This file is part of turbomoleio.
#
# Turbomoleio is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Turbomoleio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with turbomoleio (see ~turbomoleio/COPYING). If not,
# see <https://www.gnu.org/licenses/>.

"""Module for extracting and modifying electronic states."""

import re
import subprocess
from collections import defaultdict
from collections.abc import MutableSequence
from copy import copy
from fractions import Fraction

import numpy as np
from monty.json import MSONable
from monty.os import cd

from turbomoleio.core.control import Control
from turbomoleio.core.symmetry import irrep_size


[docs] def get_mos_energies(dg_string): """Get the molecular orbital energies. Given the string of a mos datagroup or the content of the mos file this extracts the list of energies associated with each state. Each state is identified by its irreducible representation and the index relative that that irrep. Args: dg_string (str): the string with the datagroup Returns: A list of energies, each element of the list contains the irrep, the index relative to the irred and the energy. """ m = re.findall( r"^\s+(\d+)\s+([\w\'\"]+)\s+eigenvalue=([+-]?[0-9]*[.]?[0-9]+D[+-]\d{2})\s+", dg_string, re.MULTILINE, ) if not m: raise RuntimeError("No mos energies could be extracted") energies = [] for e in m: energies.append([e[1], int(e[0]), float(e[2].replace("D", "E"))]) return energies
[docs] class State(MSONable): """ Object a single state of a calculation. Each state is identified by its irreducible representation, the index in the irrep, its spin, its occupation and the calculated eigenvalue. Uses Fraction for occupation for compatibility with the Shells object. """ def __init__(self, eigenvalue, irrep, irrep_index, occupation, spin=None): """Construct State object. Args: eigenvalue (float): the eigenvalue of the state irrep (str): the irreducible representation irrep_index (int): the index in the irreducible representation occupation (Fraction): occupation of the state spin (str): "a" or "b" if a uhf calculation, representing the spin, None otherwise. """ self.eigenvalue = eigenvalue self.irrep = irrep self.irrep_index = irrep_index self.occupation = occupation self.spin = spin def __eq__(self, other): """Compare with another State object. Two State objects are equal if all the attributes are the same. Args: other: another object Returns: (bool): True if the objects are equals """ if not isinstance(other, self.__class__): return False return ( self.eigenvalue == other.eigenvalue and self.irrep == other.irrep and self.irrep_index == other.irrep_index and self.occupation == other.occupation and self.spin == other.spin ) def __str__(self): """Get a string representation of the State object.""" return "Eigenvalue: {}, irrep: {}, index: {}, spin: {}, occupation: {}".format( self.eigenvalue, self.irrep, self.irrep_index, self.spin, self.occupation ) @property def max_occupation(self): """ Get the maximum possible occupation for this state. This depends on the irrep and spin of the system. Returns: int: the maximum occupation. """ max_occ = irrep_size[self.irrep[0].upper()] if not self.spin: max_occ *= 2 return max_occ @property def has_fractional_occ(self): """Check if the state is only fractionally occupied.""" return 0 < self.occupation < self.max_occupation
[docs] def as_dict(self): """Get a JSON serializable dict representation of State.""" occupation = (self.occupation.numerator, self.occupation.denominator) d = { "eigenvalue": self.eigenvalue, "irrep": self.irrep, "irrep_index": self.irrep_index, "occupation": occupation, "spin": self.spin, "@module": self.__class__.__module__, "@class": self.__class__.__name__, } return d
[docs] @classmethod def from_dict(cls, d): """Generate object from JSON representation.""" occupation = Fraction( numerator=d["occupation"][0], denominator=d["occupation"][1] ) return cls( eigenvalue=d["eigenvalue"], irrep=d["irrep"], irrep_index=d["irrep_index"], occupation=occupation, spin=d["spin"], )
[docs] class States(MSONable, MutableSequence): """ A sequence of State, sorted in increasing order of energy. Describes the eigenstates of a molecule. """ def __init__(self, states): """ Construct the States object. The states are sorted here. Args: states (list): a list of State. """ self._states = sorted( states, key=lambda s: (s.eigenvalue, s.irrep, s.irrep_index, s.spin) ) def __getitem__(self, i): """Get i-th state.""" return self._states[i] def __setitem__(self, i, state): """Set i-th state.""" self._states[i] = state def __delitem__(self, i): """Delete i-th state.""" self._states.__delitem__(i) def __len__(self): """Get the number of states.""" return len(self._states)
[docs] def insert(self, index, state): """Insert state at the specified index.""" self._states.insert(index, state)
@staticmethod def _generate_states_lists(eigen_data, shells, spin=None): """ Generate a list of states for a specific type of shells. Args: eigen_data (list): a list with the eigenvalues data. irrep, index in the irrep and eigenvalues extracted from the mos files. shells (Shell): the shell describing the occupation of the states. spin (str): "a" or "b" for alpha and beta spins if calculation is uhf, None otherwise. Returns: (list) a list of State """ states = [] for d in eigen_data: irrep = d[0] irrep_ind = d[1] eig = d[2] occ = 0 if irrep in shells.irreps: # pragma: no branch for i, (irrep_shell, index_shell) in enumerate(shells.states): if irrep_shell == irrep and index_shell == irrep_ind: degeneracy = irrep_size[irrep[0].upper()] occ = shells.occupations[i] * degeneracy states.append(State(eig, irrep, irrep_ind, occ, spin)) return states
[docs] @classmethod def from_file(cls, filename="control"): """ Generate the instance from a file (usually control). Args: filename (str): name of the file Returns: An instance of States """ c = Control.from_file(filename) states = [] # files are read directly here for performance issues. The mos files can # be large and the DataGroups object performs useless operations # in this case. We assume that the mo will always be in an external file. if c.is_uhf: alpha_shells = c.get_shells("alpha") beta_shells = c.get_shells("beta") with open(c.show_subfile_fname("uhfmo_alpha")) as f: alpha_eigen = get_mos_energies(f.read()) with open(c.show_subfile_fname("uhfmo_beta")) as f: beta_eigen = get_mos_energies(f.read()) states.extend( cls._generate_states_lists(alpha_eigen, alpha_shells, spin="a") ) states.extend(cls._generate_states_lists(beta_eigen, beta_shells, spin="b")) else: shells = c.get_shells("closed") with open(c.show_subfile_fname("scfmo")) as f: eigen = get_mos_energies(f.read()) states = cls._generate_states_lists(eigen, shells, spin=None) return cls(states)
@property def total_electrons(self): """Get the total numer of electrons. Can be a float if occupations are fractional. """ return np.sum(self.occupations) @property def occupations(self): """Get the list of all the occupations.""" return [s.occupation for s in self._states] @property def irreps(self): """Get the list of all the irreducible representations.""" return [s.irrep for s in self._states] @property def irrep_indices(self): """Get the list of all the indices of the irreducible representations.""" return [s.irrep_index for s in self._states] @property def eigenvalues(self): """Get the list of all the eigenvalues.""" return [s.eigenvalue for s in self._states] @property def spins(self): """Get the list of all the spins.""" return [s.spin for s in self._states] @property def is_uhf(self): """Determine whether the calculation is a UHF one.""" return self._states[0].spin is not None def __str__(self): """Get a string representation of the States object.""" lines = [""] for s in self._states: spin = s.spin if s.spin is not None else " " lines.append( " {}\t{} {}\t{}\t\t{}".format( s.irrep, s.irrep_index, spin, s.occupation, s.eigenvalue ) ) return "\n".join(lines) @property def homo_index(self): """Get the index of the HOMO state.""" occupied_indices = np.where(np.array(self.occupations) > 0)[0] if len(occupied_indices) == 0: raise RuntimeError("No occupied states present") return occupied_indices[-1] @property def lumo_index(self): """Get the index of the LUMO state. None if no empty state is available. """ unoccupied_indices = np.where(np.array(self.occupations) == 0)[0] if len(unoccupied_indices) == 0: return None return unoccupied_indices[0] @property def homo(self): """Get the HOMO state.""" return self[self.homo_index] @property def lumo(self): """Get the LUMO state. None if no empty state is available. """ lumo_index = self.lumo_index if lumo_index is None: return None return self[self.lumo_index] @property def gap(self): """Get the gap between the HOMO and the LUMO. Can be negative if a hole is present. None if no empty state is available. """ lumo = self.lumo if lumo is None: return None return lumo.eigenvalue - self.homo.eigenvalue @property def n_states(self): """Get the number of states.""" return len(self) @property def has_hole(self): """Check if the system has a hole.""" empty = False for occ in self.occupations: if occ == 0: empty = True elif empty: return True return False @property def has_fractional_occ(self): """Check if any of the states has a fractional occupation.""" return any(s.has_fractional_occ for s in self._states)
[docs] def generate_lowest_filled_states( self, allow_fractional=None, only_occupied=False, reorder_irrep_index=False ): """ Create a new States object with lowest states filled. Using the current number of electrons, this fills the states starting from the lowest ones in energy. This procedure can lead to fractional occupations even if initially not present because of states degeneracies. If not allowed an exception will be raised in this case. Args: allow_fractional (bool): whether to allow fractional occupations in the states. If None it will be set equal to self.has_fractional_occ. only_occupied (bool): if True only occupied states will be considered. reorder_irrep_index (bool): if True the irrep_index of the states will changed to be in ascending order. Returns: States: a list of the lowest lying states filled with electrons. Raises: RuntimeError: if the states will contain fractional occupations and allow_fractional is False. """ if allow_fractional is None: allow_fractional = self.has_fractional_occ remaining_electrons = self.total_electrons states = [] irrep_ind = defaultdict(lambda: defaultdict(int)) for s in self._states: new_s = copy(s) if reorder_irrep_index: irrep_ind[s.spin][s.irrep] += 1 new_s.irrep_index = irrep_ind[s.spin][s.irrep] max_occ = s.max_occupation if remaining_electrons <= 0: new_s.occupation = Fraction(0) elif max_occ > remaining_electrons: if not allow_fractional: raise RuntimeError( "Fractional occupancies of states are not allowed." ) new_s.occupation = Fraction(remaining_electrons) else: new_s.occupation = Fraction(max_occ) states.append(new_s) remaining_electrons -= max_occ if only_occupied and remaining_electrons <= 0: break return States(states)
[docs] def filter_states(self, spin=None, irrep=None): """ Generate a States instance with a subset of the State objects. The State objects are filtered by spin and irrep. Notice that the same instances of State as in the current object will be used. Args: spin (str): string describing the spin. Values allowed: "alpha", "beta" and the shortcuts "a" and "b". irrep (str): the symbol of the irreducible representations. Returns: States: the filtered list of states. Raises: ValueError: if spin is requested for a non uhf calculation. """ if spin and not self.is_uhf: raise ValueError("The states do not include spin.") states = [] for s in self._states: if spin is None or spin[0] == s.spin: if irrep is None or irrep == s.irrep: states.append(s) return States(states)
[docs] def get_shells(self, spin=None): """ Generate a Shells object with the occupied shells in the list of States. Args: spin (str): string describing the spin. Values allowed: "alpha", "beta" and the shortcuts "a" and "b". Returns: Shells: the occupied shells for the states """ if (spin is not None) != self.is_uhf: raise ValueError("incompatible configuration and spin request") shell_states = [] occupations = [] for s in self._states: if s.occupation == 0: continue if spin is None or spin[0] == s.spin: shell_states.append((s.irrep, s.irrep_index)) occupations.append(s.occupation / irrep_size[s.irrep[0].upper()]) from turbomoleio.core.control import Shells return Shells(shell_states, occupations)
[docs] def as_dict(self): """Get a JSON serializable dict representation of States.""" d = { "states": [s.as_dict() for s in self._states], "@module": self.__class__.__module__, "@class": self.__class__.__name__, } return d
[docs] class EigerOutput(MSONable): """Class to read and store the output from the eiger command.""" def __init__( self, eigenvalues, irreps, irrep_indices, occupations, spin, gap, nelec ): """Construct the EigerOutput object. The lists should match among them and be sorted in ascending values with respect to the eigenvalues. Args: eigenvalues (list): the eigenvalues. irreps (list): the irreducible representations. irrep_indices (list): the inddices for each irreducible representation. occupations (list): the occupations. spin (list): the spins. gap (float): the value of the gap. nelec (float): the total number of electrons. """ self.eigenvalues = eigenvalues self.irreps = irreps self.irrep_indices = irrep_indices self.occupations = occupations self.spin = spin self.gap = gap self.nelec = nelec
[docs] @classmethod def from_string(cls, string): """ Generate the object from the string of the output from eiger. Args: string (str): the string of the output of eiger. Returns: An instance of EigerOutput """ eigenvalues = [] irreps = [] irrep_indices = [] occupations = [] spin = [] gap = None nelec = 0 parse_states = False r = re.compile( r"^\s*\d+.\s+([ab]*)\s+(\d+)\s+([^\s]*)\s+([\d.+\-]*)\s+([\d.+\-]+)\s+H" ) for line in string.splitlines(): # pragma: no branch if "Gap" in line: gap = float(line.split()[2]) if "Electrons=" in line: nelec = float(line.split()[5].replace(",", "")) if parse_states: if not line.strip(): break m = r.search(line) if not m: raise RuntimeError( "Could not match regex for line: {}".format(line) ) s = m.group(1) if m.group(1) else None spin.append(s) irrep_indices.append(int(m.group(2))) irreps.append(m.group(3)) occ = float(m.group(4)) if m.group(4) else 0 occupations.append(occ) eigenvalues.append(float(m.group(5))) if "Nr. " in line: parse_states = True return cls(eigenvalues, irreps, irrep_indices, occupations, spin, gap, nelec)
[docs] @classmethod def from_file(cls, filename): """ Generate the object from a file with the output from eiger. Args: filename (str): the name of the file containing the output of eiger. Returns: An instance of EigerOutput """ with open(filename) as f: return cls.from_string(f.read())
[docs] def compare_states(self, states, tol=1e-6): """ Compare the current values with anoter States object. This checks that the two are equivalent. Numbers are checked within the specified tolerance. Args: states (States): States object to be compared. tol (float): tolerance on the floating numbers. Returns: None if the data are equivalent, otherwise a string describing the difference between this instance and the States object. """ if len(self.eigenvalues) != len(states): return "number of states" if abs(self.gap - states.gap) > tol: return "gap" if abs(self.nelec - states.total_electrons) > 0.01: return "number of electrons" for sp, irrep, ind, occ, eig in zip( self.spin, self.irreps, self.irrep_indices, self.occupations, self.eigenvalues, ): for s in states: if s.irrep == irrep and s.irrep_index == ind and s.spin == sp: if abs(s.occupation - occ) > tol: return "occupation for state {}".format(s) if abs(s.eigenvalue - eig) > tol: return "eigenvalue for state: {}".format(s) break else: return "no match for values: {} {} {} {} {}".format( sp, irrep, ind, occ, eig ) return None
[docs] class EigerRunner: """Class that runs the eiger executable.""" def __init__(self, executable="eiger", data_path="."): """Construct EigerRunner object. Args: executable (str): the eiger executable. data_path (str): path to where the TM data are stored. """ self.executable = executable self.data_path = data_path self.out = None
[docs] def run(self): """ Run eiger in the data_path directory. Returns: None """ with cd(self.data_path): result = subprocess.run([self.executable, "-a"], stdout=subprocess.PIPE) self.out = result.stdout.decode("utf-8")
[docs] def get_eiger_output(self): """ Generate an EigerOutput instance from the output of eiger. Returns: EigerOutput Raises: ValueError: if eiger has not run """ if not self.out: raise ValueError("No output") return EigerOutput.from_string(self.out)
[docs] def to_file(self, filepath): """ Write the output of eiger to a file. Args: filepath (str): path to the file. Returns: None Raises: ValueError: if eiger has not run """ if not self.out: raise ValueError("No output") with open(filepath, "w") as f: f.write(self.out)