Source code for turbomoleio.output.parser

# -*- coding: utf-8 -*-
# The turbomoleio package, a python interface to Turbomole
# for preparing inputs, parsing outputs and other related tools.
#
# Copyright (C) 2018-2022 BASF SE, Matgenix SRL.
#
# This file is part of turbomoleio.
#
# Turbomoleio is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Turbomoleio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with turbomoleio (see ~turbomoleio/COPYING). If not,
# see <https://www.gnu.org/licenses/>.

"""Module with the main parsing utilities for the stdout of Turbomole executables."""

import datetime
import re
from collections import namedtuple

import numpy as np
from monty.functools import lazy_property

# common pattern for regex
date_format = "%Y-%m-%d %H:%M:%S.%f"
float_number_re = r"[+-]?[0-9]*[.]?[0-9]+"
float_number_d_re = r"[+-]?[0-9]*[.]?[0-9]+D[+-]\d{2}"
float_number_e_re = r"[+-]?[0-9]*[.]?[0-9]+E[+-]\d{2}"
float_number_all_re = r"[+-]?[0-9]*[.]?[0-9]+(?:[ED][+-]\d{2})?"
irrep_re_group = r"\w'\""


# pre compiled regex
asterisks_re = re.compile(r"\*+")


# strings present in the output for all the types of standard xc functionals.
# NB This should be updated if other standard xc functionals are added to TM.
functional_strings = {
    "s-vwn": [r"Slater.+?Dirac.+?exchange.+?with.+?VWN.+?corr\..+?functional"],
    "s-vwn_Gaussian": [
        r"Slater.+?Dirac.+?exchange.+?with.+?VWN.+?"
        r"corr\.functional.+?\(fit.+?III.+?as.+?in.+?Gaussian\)"
    ],
    "pwlda": [r"Slater.+?Dirac.+?exchange.+?with.+?PW.+?LDA.+?corr\..+?functional"],
    "b-lyp": [
        r"B\-LYP.+?functional.+?exchange\:.+?LDA.+?\+.+?Becke.+?\(B88\).+?"
        r"correlation\:.+?Lee\-Yang\-Parr.+?\(LYP\)"
    ],
    "b-vwn": [
        r"B\-VWN.+?functional.+?exchange\:.+?LDA.+?\+.+?Becke.+?\(B88\).+?"
        r"correlation\:.+?LDA.+?\(VWN\)"
    ],
    "b-p": [
        r"B\-P86.+?functional.+?exchange\:.+?LDA.+?\+.+?Becke.+?\(B88\).+?"
        r"correlation\:.+?LDA.+?\(VWN\).+?\+.+?Perdew.+?\(P86\)"
    ],
    "pbe": [
        r"PBE.+?functional.+?exchange\:.+?LDA.+?\+.+?PBE.+?correlation\:.+?"
        r"LDA.+?\(PW\).+?\+.+?PBE"
    ],
    "tpss": [
        r"TPSS.+?meta\-GGA.+?functional.+?exchange\:.+?LDA.+?\+.+?TPSS.+?"
        r"correlation\:.+?LDA.+?\(PW\).+?\+.+?TPSS"
    ],
    "bh-lyp": [
        r"Becke\-Half\-and\-Half\-LYP.+?hybrid.+?functional\:.+?BH\-LYP.+?"
        r"exchange\:.+?1\/2.+?\(LDA.+?\+.+?Becke.+?\(B88\)\).+?\+.+?1\/2.+?HF.+?"
        r"correlation\:.+?Lee\-Yang\-Parr.+?\(LYP\)"
    ],
    "b3-lyp": [
        r"Becke\-3\-Parameter.+?hybrid.+?functional\:.+?B3\-LYP.+?"
        r"exchange\:.+?0\.8\*LDA.+?\+.+?0\.72\*B88.+?\+.+?0\.2\*HF.+?"
        r"correlation\:.+?0\.19\*LDA\(VWN\).+?\+.+?0\.81\*LYP"
    ],
    "b3-lyp_Gaussian": [
        r"Becke\-3\-Parameter.+?hybrid.+?functional\:.+?B3\-LYP.+?"
        r"exchange\:.+?0\.8\*LDA.+?\+.+?0\.72\*B88.+?\+.+?0\.2\*"
        r"HF.+?correlation\:.+?0\.19\*LDA\(VWNIII\).+?\+.+?0\.81\*"
        r"LYP.+?\(VWNIII.+?fit.+?as.+?in.+?Gaussian\)"
    ],
    "pbe0": [
        r"PBE0.+?hybrid.+?functional.+?"
        r"exchange\:.+?3\/4.+?\(LDA\+PBE\).+?\+.+?1\/4.+?HF.+?"
        r"correlation\:.+?LDA.+?\(PW\).+?\+.+?PBE"
    ],
    "tpssh": [
        r"TPSS.+?global.+?hybrid.+?functional.+?"
        r"exchange\:.+?9\/10.+?\(LDA\+TPSS\).+?\+.+?1\/10.+?HF.+?"
        r"correlation\:.+?LDA.+?\(PW\).+?\+.+?TPSS"
    ],
    "pw6b95": [
        r"PW6B95.+?global.+?meta.+?hybrid.+?functional.+?code.+?by.+?"
        r"Stefan.+?Grimme\,.+?University.+?of.+?Muenster.+?"
        r"Zhao.+?and.+?Truhlar\,.+?"
        r"J.+?Phys.+?Chem.+?A\,.+?109\,.+?25\,.+?2005\,.+?5656\."
    ],
    "m06": [
        r"M06.+?meta\-GGA.+?functional.+?Truhlar.+?functional.+?with.+?27\%.+?HF.+?"
        r"exchange.+?USING.+?XCfun.+?library\,.+?see.+?documentation.+?"
        r"XCFun.+?library.+?is.+?being.+?used\,.+?version\:.+?1\.99000000000000.+?"
        r"XCFun.+?DFT.+?library.+?Copyright.+?2009\-2011.+?"
        r"Ulf.+?Ekstrom.+?and.+?contributors\..+?See.+?"
        r"http\:\/\/admol\.org\/xcfun.+?for.+?more.+?information\..+?"
        r"This.+?is.+?free.+?software\;.+?see.+?the.+?source.+?code.+?"
        r"for.+?copying.+?conditions\..+?There.+?is.+?ABSOLUTELY.+?NO.+?"
        r"WARRANTY\;.+?not.+?even.+?for.+?MERCHANTABILITY.+?or.+?FITNESS.+?"
        r"FOR.+?A.+?PARTICULAR.+?PURPOSE\..+?For.+?details.+?"
        r"see.+?the.+?documentation\..+?Scientific.+?users.+?of.+?this.+?"
        r"library.+?should.+?cite.+?U\..+?Ekstrom\,.+?L\..+?Visscher\,.+?R\..+?"
        r"Bast\,.+?A\..+?J\..+?Thorvaldsen.+?and.+?K\..+?Ruud\;.+?"
        r"J\.Chem\.Theor\.Comp\..+?2010\,.+?DOI\:.+?10\.1021\/ct100117s.+?"
        r"XCFun.+?uses.+?functional\:.+?m06",
        r"Name of XC-Functional: Minnesota M06 hybrid exchange functional",
    ],
    "m06-l": [
        r"M06\-L.+?meta\-GGA.+?functional.+?Truhlar.+?functional.+?without.+?"
        r"HF.+?exchange.+?USING.+?XCfun.+?library\,.+?see.+?documentation.+?"
        r"XCFun.+?library.+?is.+?being.+?used\,.+?"
        r"version\:.+?1\.99000000000000.+?XCFun.+?DFT.+?library.+?"
        r"Copyright.+?2009\-2011.+?Ulf.+?Ekstrom.+?and.+?contributors\..+?"
        r"See.+?http\:\/\/admol\.org\/xcfun.+?for.+?more.+?information\..+?"
        r"This.+?is.+?free.+?software\;.+?see.+?the.+?source.+?code.+?"
        r"for.+?copying.+?conditions\..+?There.+?is.+?ABSOLUTELY.+?NO.+?"
        r"WARRANTY\;.+?not.+?even.+?for.+?MERCHANTABILITY.+?or.+?FITNESS.+?"
        r"FOR.+?A.+?PARTICULAR.+?PURPOSE\..+?For.+?details.+?see.+?the.+?"
        r"documentation\..+?Scientific.+?users.+?of.+?this.+?library.+?should.+?"
        r"cite.+?U\..+?Ekstrom\,.+?L\..+?Visscher\,.+?R\..+?Bast\,.+?A\..+?J\..+?"
        r"Thorvaldsen.+?and.+?K\..+?Ruud\;.+?J\.Chem\.Theor\.Comp\..+?2010\,.+?"
        r"DOI\:.+?10\.1021\/ct100117s.+?XCFun.+?uses.+?functional\:.+?m06l"
    ],
    "m06-2x": [
        r"M06\-2X.+?meta\-GGA.+?functional.+?Truhlar.+?functional.+?"
        r"with.+?54\%.+?HF.+?exchange.+?USING.+?XCfun.+?library\,.+?see.+?"
        r"documentation.+?XCFun.+?library.+?is.+?being.+?used\,.+?"
        r"version\:.+?1\.99000000000000.+?XCFun.+?DFT.+?library.+?"
        r"Copyright.+?2009\-2011.+?Ulf.+?Ekstrom.+?and.+?contributors\..+?"
        r"See.+?http\:\/\/admol\.org\/xcfun.+?for.+?more.+?information\..+?"
        r"This.+?is.+?free.+?software\;.+?see.+?the.+?source.+?code.+?"
        r"for.+?copying.+?conditions\..+?There.+?is.+?ABSOLUTELY.+?"
        r"NO.+?WARRANTY\;.+?not.+?even.+?for.+?MERCHANTABILITY.+?or.+?"
        r"FITNESS.+?FOR.+?A.+?PARTICULAR.+?PURPOSE\..+?For.+?details.+?"
        r"see.+?the.+?documentation\..+?Scientific.+?users.+?of.+?this.+?"
        r"library.+?should.+?cite.+?U\..+?Ekstrom\,.+?L\..+?Visscher\,.+?R\..+?"
        r"Bast\,.+?A\..+?J\..+?Thorvaldsen.+?and.+?K\..+?Ruud\;.+?"
        r"J\.Chem\.Theor\.Comp\..+?2010\,.+?DOI\:.+?10\.1021\/ct100117s.+?"
        r"XCFun.+?uses.+?functional\:.+?m062x"
    ],
    "lhf": [
        r"Localized.+?Hartree\-Fock.+?Methods\:.+?F\..+?"
        r"Della.+?Sala.+?and.+?A\..+?Goerling\,.+?"
        r"J\..+?Chem\..+?Phys\..+?115\,.+?5718.+?\(2001\).+?"
        r"F\..+?Della.+?Sala.+?and.+?A\..+?Goerling\,.+?"
        r"J\..+?Chem\..+?Phys\..+?116\,.+?5374.+?\(2002\)"
    ],
    "oep": [
        r"Exact\-Exchange.+?Optimized.+?Effective.+?Potential.+?Method\:.+?"
        r"Hesselmann\,.+?A\.\,.+?Goetz\,.+?A\.\,.+?Della.+?Sala\,.+?"
        r"F\.\,.+?Goerling\,.+?A\.\,.+?"
        r"J\..+?Chem\..+?Phys\.\,.+?127.+?\(2007\)\,.+?054102"
    ],
    "b97-d": [
        r"exchange\:.+?B97GGA.+?vdW.+?refit.+?"
        r"correlation\:.+?\".+?\".+?\".+?S\..+?Grimme\,.+?"
        r"J\.Comput\..+?Chem\..+?27\,.+?\(2006\)\,.+?1787\-1799"
    ],
    "pbeh-3c": [
        r"PBE0.+?modified.+?by.+?S\..+?Grimme.+?for.+?D3.+?and.+?gCP.+?"
        r"exchange\:.+?PBE.+?\(kappa\=1\.0245\,.+?mu\=0\.12345679\).+?"
        r"correlation\:.+?LDA.+?\(PW\).+?\+.+?PBE"
    ],
    "b97-3c": [
        r"exchange\:.+?B97GGA.+?vdW.+?refit.+?"
        r"correlation\:.+?\".+?\".+?\".+?S\..+?Grimme\,.+?"
        r"modifications.+?B97\-3c.+?\(2016\)"
    ],
    "lh07t-svwn": [
        r"Lh07t\-SVWN.+?local.+?hybrid.+?functional.+?Local.+?hybrid.+?"
        r"semi\-numerical.+?integral.+?thresholds\:.+?"
        r"S\-junctions.+?\:.+?0\.10E\-05.+?P\-junctions.+?\:.+?0\.10E\-05"
    ],
    "lh07s-svwn": [
        r"Lh07s\-SVWN.+?local.+?hybrid.+?functional.+?"
        r"Local.+?hybrid.+?semi\-numerical.+?integral.+?thresholds\:.+?"
        r"S\-junctions.+?\:.+?0\.10E\-05.+?P\-junctions.+?\:.+?0\.10E\-05"
    ],
    "lh12ct-ssirpw92": [
        r"Lh12ct\-SsirPW92.+?local.+?hybrid.+?functional.+?"
        r"Local.+?hybrid.+?semi\-numerical.+?integral.+?thresholds\:.+?"
        r"S\-junctions.+?\:.+?0\.10E\-05.+?"
        r"P\-junctions.+?\:.+?0\.10E\-05"
    ],
    "lh12ct-ssifpw92": [
        r"Lh12ct\-SsifPW92.+?local.+?hybrid.+?functional.+?"
        r"Local.+?hybrid.+?semi\-numerical.+?integral.+?thresholds\:.+?"
        r"S\-junctions.+?\:.+?0\.10E\-05.+?"
        r"P\-junctions.+?\:.+?0\.10E\-05"
    ],
    "lh14t-calpbe": [
        r"Lh14t\-calPBE.+?local.+?hybrid.+?functional.+?"
        r"Local.+?hybrid.+?semi\-numerical.+?integral.+?thresholds\:.+?"
        r"S\-junctions.+?\:.+?0\.10E\-05.+?P\-junctions.+?\:.+?0\.10E\-05"
    ],
    "b2-plyp": [
        r"Hybrid.+?part.+?of.+?B2\-PLYP.+?double.+?hybrid.+?functional.+?"
        r"exchange\:.+?0\.47\(LDA.+?\+.+?Becke.+?\(B88\)\).+?\+.+?0\.53.+?"
        r"HF.+?correlation\:.+?0\.73.+?LYP.+?\+.+?0\.27.+?PT2.+?"
        r"\(MP2.+?program\).+?S\..+?Grimme\,.+?"
        r"JCP.+?124\,.+?\(2006\)\,.+?034108\-16"
    ],
}


# type of xc functionals
# NB This should be updated if other standard xc functionals are added to TM.
functional_types = {
    "s-vwn": "LDA",
    "s-vwn_Gaussian": "LDA",
    "pwlda": "LDA",
    "b-lyp": "GGA",
    "b-vwn": "GGA",
    "b-p": "GGA",
    "pbe": "GGA",
    "tpss": "MGGA",
    "bh-lyp": "HYB",
    "b3-lyp": "HYB",
    "b3-lyp_Gaussian": "HYB",
    "pbe0": "HYB",
    "tpssh": "MHYB",
    "pw6b95": "MHYB",
    "m06": "MHYB",
    "m06-l": "MGGA",
    "m06-2x": "MHYB",
    "lhf": "ODFT",
    "oep": "ODFT",
    "b97-d": "GGA",
    "pbeh-3c": "HYB",
    "b97-3c": "GGA",
    "lh07t-svwn": "LHYB",
    "lh07s-svwn": "LHYB",
    "lh12ct-ssirpw92": "LHYB",
    "lh12ct-ssifpw92": "LHYB",
    "lh14t-calpbe": "LHYB",
    "b2-plyp": "DHYB",
}



[docs]
def convert_float(f):
    """
    Convert a float from the output.

    Handles simple float and exponential notation with D and E
    (e.g 11.23 1.123E+01 1.123D+01).
    Returns None if the string is composed of only asterisks,
    as it might be in case of formatting problems in fortran.

    Args:
        f (str): the string to be converted to a float.

    Returns:
        float.
    """
    if asterisks_re.match(f.strip()):
        return None

    return float(f.replace("D", "E"))




[docs]
def convert_int(i):
    """
    Convert an int from the output.

    Returns None if the string is composed of only asterisks,
    as it might be in case of formatting problems in fortran.

    Args:
        i (str): the string to be converted to an int.

    Returns:
        int.
    """
    if asterisks_re.match(i.strip()):
        return None

    return int(i)




[docs]
def convert_time_string(line):
    """
    Convert the cpu and wall time string given in the Turbomole outputs to seconds.

    Example: 2 days 1 hours 12 minutes and 55 seconds.

    Args:
        line (str): the line with the timings.

    Returns:
        float: the value of the time in seconds.
    """
    split = line.split()

    time = 0
    for i, s in enumerate(split):
        if s == "days":
            time += float(split[i - 1]) * 86400
        if s == "hours":
            time += float(split[i - 1]) * 3600
        if s == "minutes":
            time += float(split[i - 1]) * 60
        if s == "seconds":
            time += float(split[i - 1])

    return time




[docs]
class Parser:
    """
    Main Parser object for Turbomole output files.

    This object allows to parse the different outputs (the stdout) produced by
    the Turbomole executables. Each method parses a specific portion of the output
    aiming at being as reusable as possible across the different types of outputs.
    The quantities are accessible as lazy properties, to avoid parsing the same
    sections multiple times.
    The parser expects the output of a completed calculation. Parsing of crashed
    calculations may lead to wrong output.
    The methods rely heavily on regex to select portions of the output and extract
    the quantities.
    """

    def __init__(self, string):
        """Construct Parser object.

        Args:
            string (str): the string of the output from Turbomole.
        """
        self._string = string

    @property
    def string(self):
        """Get the string given as an input."""
        return self._string


[docs]
    @classmethod
    def from_file(cls, filepath, check_all_done=True):
        """
        Generate an instance from a file path.

        Args:
            filepath (str): path to the output file to read.
            check_all_done (bool): if True it will be checked that the job
                was "all done". If not an exception is raised.

        Returns:
            an instance of Parser.
        """
        try:
            with open(filepath, "r") as f:
                string = f.read()
        except UnicodeDecodeError:  # pragma: no cover (unlikely + trivial anyway)
            with open(filepath, "r", errors="ignore") as f:
                string = f.read()

        if check_all_done and string.rfind("all done") < 0:
            raise ValueError(
                "The string does not contain data for a completed calculation"
            )

        return cls(string=string)



[docs]
    @lazy_property
    def all_done(self):
        """Check if "all done" is present in the string."""
        return self.string.rfind("all done") >= 0



[docs]
    @lazy_property
    def header(self):
        """
        Extract the information contained in the header of the output.

        The header contains information such as the TM version and timings.
        Valid for all the TM executables.

        Returns:
            dict with "executable" name, "host" of execution, "tm_version",
                "tm_build", "start_time".
        """
        # Example string parsed here:
        #  escf (node001) : TURBOMOLE V7.3 ( 22118 ) 1 Jul 2018 at 20:38:15
        #  Copyright (C) 2018 TURBOMOLE GmbH, Karlsruhe
        #    2018-11-13 09:30:15.283
        r = (
            r"^.*?([\w]+)\s+\((.*?)\).*?"
            r"TURBOMOLE(.*?)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3})"
        )
        match = re.search(r, self.string, re.DOTALL)
        if not match:
            return None

        # the section of the version can miss some of the required information.
        # In this case turbomole version and build will be set to None.
        # For example it may be:
        # escf (node001) : TURBOMOLE rev. compiled 1 Jul 2018 at 20:38:15
        #
        # Note: TURBOMOLE 7.6 uses the notation 7-6. Catch it and convert to 7.6
        r_version = r"V([\d\.-]+\d)\s+.*"
        r_build = r"V[\d\.]+\d\s+\((.*)\)"
        match_version = re.search(r_version, match.group(3).strip())
        match_build = re.search(r_build, match.group(3).strip())
        if match_version:
            tm_version = match_version.group(1).strip()
            tm_version = tm_version.replace("-", ".")
        else:
            tm_version = None
        if match_build:
            tm_build = (
                match_build.group(1).strip() or None
            )  # When the build info inside the parenthesis is empty
        else:
            tm_build = None

        d = dict(
            executable=match.group(1),
            host=match.group(2).strip(),
            tm_version=tm_version,
            tm_build=tm_build,
            start_time=datetime.datetime.strptime(match.group(4), date_format),
        )

        return d



[docs]
    @lazy_property
    def centers(self):
        """
        Extract the center of mass and center of charge values.

        Valid for most of the TM executables (including the scf, escf, grad).

        Returns:
            dict with "center_of_mass" and "center_of_charge".
        """
        d = dict(center_of_mass=None, center_of_charge=None)

        r = r"center of nuclear mass[\s:]*" + (r"\s*(" + float_number_re + r")") * 3

        match = re.search(r, self.string)

        if match:
            d["center_of_mass"] = [convert_float(match.group(i)) for i in range(1, 4)]

        r = r"center of nuclear charge[\s:]*" + (r"\s*(" + float_number_re + r")") * 3

        match = re.search(r, self.string)

        if match:
            d["center_of_charge"] = [convert_float(match.group(i)) for i in range(1, 4)]

        return d



[docs]
    @lazy_property
    def coordinates(self):
        """
        Extract the coordinates, species and charges of the atoms.

        Valid for most of the TM executables (including the scf, escf, grad).

        Returns:
            dict with "coord", "species", "charges", and "isotopes" as lists.
        """
        # Example string parsed here:
        #               +--------------------------------------------------+
        #               | Atomic coordinate, charge and isotop information |
        #               +--------------------------------------------------+
        #
        #                     atomic coordinates            atom    charge  isotop
        #           0.00000000    0.00000000    1.00000000    h      1.000     0
        #           0.00000000    0.00000000   -1.00000000    h      1.000     0
        r = (
            r"atomic coordinates\s+atom\s+charge\s+isotop\s+(.+?)\s+"
            r"center of nuclear mass"
        )

        match = re.search(r, self.string, re.DOTALL)
        if not match:
            return None

        coords = []
        species = []
        charges = []
        isotopes = []

        for line in match.group(1).splitlines():
            line = line.strip()
            if not line:  # pragma: no cover (trivial)
                continue

            split = line.split()
            coords.append([convert_float(f) for f in split[:3]])
            species.append(split[3])
            charges.append(convert_float(split[4]))
            isotopes.append(convert_int(split[5]))

        return dict(coords=coords, species=species, charges=charges, isotopes=isotopes)



[docs]
    @lazy_property
    def basis(self):
        """
        Extract the data of the basis used for the calculation.

        It also contains the auxiliary basis in case of RI.
        Valid for most of the TM executables (including the scf, escf, grad).

        Returns:
            dict with "basis_per_specie" a dict with species as keys and name of the
            basis as values, "aux_basis_per_specie" the same for auxiliary basis,
            "number_scf_basis_func" and "number_scf_aux_basis_func".
        """
        # See a test file as an example. Information parsed from the section
        # starting with:
        #      +--------------------------------------------------+
        #      |               basis set information              |
        #      +--------------------------------------------------+
        r = r"basis set information.*?symmetry group of the molecule"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        basis_string = match.group()

        # takes the section:
        #    type   atoms  prim   cont   basis
        #    ---------------------------------------------------------------------------
        #     h        2     10      5   def-SV(P)   [2s1p|4s2p]
        #    ---------------------------------------------------------------------------
        r_atoms = r"type   atoms  prim   cont   basis\s*-{5,}(.*?)-{5,}"

        match_atoms = re.search(r_atoms, basis_string, re.DOTALL)

        basis_per_specie = {}
        if match_atoms:  # pragma: no branch
            for line in match_atoms.group(1).splitlines():
                line = line.strip()
                if not line:
                    continue

                l_split = line.split()
                basis_per_specie[l_split[0]] = l_split[4]

        # For ridft can be present the section starting
        # with "RI-J AUXILIARY BASIS SET information".
        r_atoms_aux = r"RI-J[K]? AUXILIARY BASIS SET.*?" + r_atoms

        match_atoms_aux = re.search(r_atoms_aux, basis_string, re.DOTALL)

        aux_basis_per_specie = {}
        if match_atoms_aux:
            for line in match_atoms_aux.group(1).splitlines():
                line = line.strip()
                if not line:
                    continue
                l_split = line.split()
                aux_basis_per_specie[l_split[0]] = l_split[4]

        # search the total number of basis function. This should be present at
        # maximum twice: one for the standard basis and one for the auxiliary.
        list_n_basis_func = re.findall(
            r"total number of SCF-basis functions[\s:]+?(\d+)", basis_string
        )

        len_n_basis = len(list_n_basis_func)
        if len_n_basis > 2:  # pragma: no cover (trivial)
            raise RuntimeError(
                "Found {} instances of 'total number of SCF-basis functions', while "
                "expecting at most 2.".format(len_n_basis)
            )

        if len_n_basis > 0:  # pragma: no branch (trivial)
            number_scf_basis_func = convert_int(list_n_basis_func[0])
        else:  # pragma: no cover (should not occur)
            number_scf_basis_func = None

        if len_n_basis == 2:
            number_scf_aux_basis_func = convert_int(list_n_basis_func[1])
        else:
            number_scf_aux_basis_func = None

        d = dict(
            basis_per_specie=basis_per_specie,
            aux_basis_per_specie=aux_basis_per_specie,
            number_scf_basis_func=number_scf_basis_func,
            number_scf_aux_basis_func=number_scf_aux_basis_func,
        )
        return d



[docs]
    @lazy_property
    def symmetry(self):
        """
        Extract the symmetry of the molecule and the irreducible representations.

        Valid for all the TM executables (in some case only a part of the
        information might be available).

        Returns:
            dict with symmetry "symbol", "n_reps" and list of representation
            symbols "reps".
        """
        match_symm = re.search(
            r"symmetry group of the molecule[\s:]+?([\w]+)", self.string
        )
        if match_symm:
            mol_sym_group = match_symm.group(1)
        else:
            mol_sym_group = None

        # Example: there are 4 real representations :   a1   a2   b1   b2
        match_reps = re.search(r"there are\s+(\d+)\s+real representations", self.string)
        if match_reps:
            n_reps = convert_int(match_reps.group(1))
            match_reps = re.search(
                r"there are\s+\d+\s+real representations\s*:(\s+[\w\'\"]+){"
                + str(n_reps)
                + "}",
                self.string,
                re.MULTILINE,
            )
            if not match_reps:  # pragma: no cover
                raise RuntimeError(
                    "Could not parse correctly the list of representations"
                )
            reps = match_reps.group().split()[-n_reps:]
        else:
            n_reps = None
            reps = None

        if all(x is None for x in (mol_sym_group, n_reps, reps)):
            return None

        d = dict(symbol=mol_sym_group, n_reps=n_reps, reps=reps)

        return d



[docs]
    @lazy_property
    def cosmo_header(self):
        """
        Extract the information from the header of the cosmo section (area and volume).

        Should return None if cosmo is not activated.
        Valid for scf and escf/egrad.

        Returns:
            dict with "area" and "volume".
        """
        # See a test file as an example. Selects the section of the cosmo header
        # starting with:
        # ==============================================================================
        #                       COSMO switched on
        # ==============================================================================
        r = r"COSMO switched on.*?(---)+"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        d = dict(area=None, volume=None)

        for line in match.group().splitlines():
            if "area" in line:
                d["area"] = convert_float(line.split()[-1])
            elif "volume" in line:
                d["volume"] = convert_float(line.split()[-1])

        return d



[docs]
    @lazy_property
    def density_functional_data(self):
        """
        Extract the information relative to DFT calculations (xf functional and grids).

        Valid for scf, gradient and relax executables.

        Returns:
            dict with the full string describing the functional "functional_msg",
            "functional_name", "functional_type", the version of "xcfun",
            "spherical_gridsize, number of "gridpoints" for spherical integration.
        """
        # See a test file as an example. Selects the section of dft information
        # starting with:
        #           ------------------
        #           density functional
        #           ------------------
        r = r"density functional[\s]*?-{5,}.*?(biggest|-{5,})"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        dft_string = match.group()

        d = dict(
            functional_msg=None,
            functional_name=None,
            functional_type=None,
            xcfun=None,
            spherical_gridsize=None,
            gridpoints=None,
        )

        # capture the part with the message describing the xc functional.
        # "iterations will be done with small grid" is present only for
        # modified grids (e.g. m3)
        r_func = (
            r"--\s+(.*?)\s*(iterations will be done with "
            r"small grid|spherical integration)"
        )

        if "USE AT YOUR OWN RISK" not in dft_string:  # pragma: no branch
            func_match = re.search(r_func, dft_string, re.DOTALL)

            # Matches the type of functional based on the message produced by TM.
            # The messages have been extracted for all the standard types available
            # in define. Notice that if further xc functional need be parsed, the
            # functional_strings dictionary should be updated accordingly.
            # Also arbitrary mix will be ignored (when "USE AT YOUR OWN RISK"
            # is present).
            if func_match:  # pragma: no branch
                func_msg = func_match.group(1).strip()
                d["functional_msg"] = func_msg

                for (  # pragma: no branch
                    func_name,
                    desc_list,
                ) in functional_strings.items():

                    found = False
                    for desc in desc_list:
                        if re.search(desc, func_msg, re.DOTALL):
                            d["functional_name"] = func_name
                            d["functional_type"] = functional_types[func_name]
                            found = True
                            break
                    if found:
                        break

        for line in dft_string.splitlines():
            if "XCFun" in line and "version" in line:
                d["xcfun"] = line.split()[-1]

            if "spherical gridsize" in line:
                d["spherical_gridsize"] = convert_int(line.split()[-1])

            if "i.e. gridpoints" in line:
                d["gridpoints"] = convert_int(line.split()[-1])

        return d



[docs]
    @lazy_property
    def rij_info(self):
        """
        Extract information about RI.

        Valid for ridft and escf/egrad.

        Returns:
            dict with "marij" to True if marij calculation, "rij_memory",
            "rik" to True if $rik and "ricore" memory in MB.
        """
        # Parses the section that can start with RI, RI-J or RI-JK
        #            ------------------------
        #                RI-JK - INFORMATION
        #            ------------------------
        r = r"RI[\-JK]* - INFORMATION.*?Memory allocated for.*?$"

        match = re.search(r, self.string, re.DOTALL | re.MULTILINE)

        if match is None:
            return None

        match_str = match.group()

        d = dict(marij=False, rij_memory=None, rik=False, ricore=None)

        for line in match_str.splitlines():
            if "Multipole Accelerated RI-J" in line:
                d["marij"] = True

            # stopping at "RI" should match the line for both ridft and escf
            if "Memory allocated for RI" in line:
                d["rij_memory"] = convert_int(line.split()[-2])

        match_rik_1 = (
            re.search("BLOCKING OF .*?MOS FOR RI-K", self.string, re.DOTALL) is not None
        )  # ridft
        match_rik_2 = "FOUND RI-K FLAG" in self.string  # escf
        d["rik"] = match_rik_1 or match_rik_2

        # ricore in ridft (is in another section)
        r_ricore = r"Allocatable memory for RI due to \$ricore \(MB\):\s+(\d+)"
        match = re.search(r_ricore, self.string)
        if match:
            d["ricore"] = convert_int(match.group(1))
        else:
            # ricore in escf
            r_ricore = r"Core memory available \(ricore\)\s+(\d+)"
            match = re.search(r_ricore, match_str)
            if match:  # pragma: no branch
                d["ricore"] = convert_int(match.group(1))

        return d



[docs]
    @lazy_property
    def dftd(self):
        """
        Extract information about dispersion correction in dft.

        Valid for scf executables.

        Returns:
            dict with the type of "correction" (string) and the value of
            correction "en_corr".
        """
        # assume that the DFT-D part is between these two sections
        r = r"nuclear repulsion energy(.*?)-S,T\+V- integrals"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        match_str = match.group(1)

        if "DFT-D" not in match_str:
            return None

        d = dict(correction=None, en_corr=None)

        # The output changes depending on the type of correction.
        # Is not always mentioned.
        # For D1 and D2 should be deduced from the value of the damping.
        if "DFT-D V3(BJ)" in match_str:
            d["correction"] = "D3-BJ"
        elif "DFT-D V3" in match_str:
            d["correction"] = "D3"
        else:
            damp_l = re.search(
                r"exponent of damping function \(d\)\s+(" + float_number_re + r")",
                match_str,
            )
            if damp_l:  # pragma: no branch
                damp_val = convert_float(damp_l.group(1))
                if damp_val == 20:
                    d["correction"] = "D2"
                elif damp_val == 23:  # pragma: no branch
                    d["correction"] = "D1"

        # Output for DFT-D3 has changed in Turbomole 7.5:
        # - versions < 7.5: "empirical dispersive energy correction"
        # - versions >= 7.5: "DFT-D3 Energy contribution"
        corr_l = re.search(
            r"(?:empirical dispersive energy correction|"
            r"DFT-D3 Energy contribution)[\s=]+(" + float_number_re + r")",
            match_str,
        )

        if corr_l:  # pragma: no branch
            d["en_corr"] = convert_float(corr_l.group(1))

        return d



[docs]
    @lazy_property
    def pre_scf_run(self):
        """
        Extract "pre-scf loop" information extracted about the scf calculation.

        The information extracted is printed before running the scf loop.

        Valid only for scf executables.

        Returns:
            dict with "diis" True if activated, diis_error_vect", "conv_tot_en",
            "conv_one_e_en", "virtual_orbital_shift_on",
            "virtual_orbital_shift_limit", "orbital_characterization",
            "restart_file", "n_occupied_orbitals".
        """
        # large portion to include several sections
        r = r"basis set information(.*?)ITERATION  ENERGY"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        match_str = match.group(1)

        d = dict(
            diis=False,
            diis_error_vect=None,
            conv_tot_en=None,
            conv_one_e_en=None,
            virtual_orbital_shift_on=False,
            virtual_orbital_shift_limit=None,
            orbital_characterization=None,
            restart_file=None,
            n_occupied_orbitals=None,
        )

        # analyze the string line by line to find those corresponding to the
        # values to parse.
        for line in match_str.splitlines():
            if "DIIS switched on" in line:
                d["diis"] = True
                d["diis_error_vect"] = line.split()[-1]

            if "increment of total energy" in line:
                d["conv_tot_en"] = convert_float(line.split()[-1])

            if "increment of one-electron energy" in line:
                d["conv_one_e_en"] = convert_float(line.split()[-1])

            if "automatic virtual orbital shift switched on" in line:
                d["virtual_orbital_shift_on"] = True

            if "shift if e(lumo)-e(homo)" in line:
                d["virtual_orbital_shift_limit"] = convert_float(line.split()[-1])

            if "orbital characterization" in line:
                d["orbital_characterization"] = line.split()[-1]

            if "DSCF restart information will be dumped onto file" in line:
                d["restart_file"] = line.split()[-1]

            if "number of occupied orbitals" in line:
                d["n_occupied_orbitals"] = convert_int(line.split()[-1])

        return d



[docs]
    @lazy_property
    def scf_iterations(self):
        """
        Extract the data for each iteration of the scf loop.

        Valid only for scf executables.

        Returns:
            dict with the list of values extracted for each scf step
            "energies", "dampings". The value of the "first_index" and "n_steps".
            "converged" True if the calculation is converged.
        """
        # parsing an iteration of a block like:
        #                                               current damping :  0.700
        #  ITERATION  ENERGY         1e-ENERGY       2e-ENERGY    RMS[dD(SAO)]   TOL
        #    1  -76.143808666911   -124.43866883    38.987705388   0.000D+00 0.175D-08

        std_float = r"\s+(" + float_number_re + ")"
        d_float = r"\s+(" + float_number_d_re + ")"
        r = r"^\s*(\d+)" + std_float * 3 + d_float * 2 + r"\s*$"

        match_l = re.findall(r, self.string, re.MULTILINE)

        if not match_l:
            return None

        iter_indices = []
        energies = []
        for line in match_l:
            iter_indices.append(convert_int(line[0]))
            energies.append(convert_float(line[1]))

        # dscf has a ":" while ridft as a "="
        dampings = re.findall(r"current damping[\s:=]+" + std_float, self.string)
        if dampings:  # pragma: no branch
            dampings = [convert_float(d) for d in dampings]

        converged = "convergence criteria satisfied after " in self.string

        d = dict(
            energies=energies,
            first_index=iter_indices[0],
            n_steps=len(iter_indices),
            dampings=dampings,
            converged=converged,
        )

        return d



[docs]
    @lazy_property
    def scf_energies(self):
        """
        Extract the final energies for scf calculation.

        Valid only for scf executables.

        Returns:
            dict with the energy and its contributions: "total_energy",
            "kinetic_energy", "potential_energy", "virial_theorem",
            "wavefunction_norm".
        """
        # Parsing the section:
        #              ------------------------------------------
        #              |  total energy      =    -76.34276215784  |
        #              ------------------------------------------
        #              :  kinetic energy    =     75.98641545802  :
        #              :  potential energy  =   -152.32917761586  :
        #              :  virial theorem    =      1.99533227919  :
        #              :  wavefunction norm =      1.00000000000  :
        #              ..........................................
        r = r"\|\s+total energy(.*?)\.{3,}"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        d = dict(
            total_energy=None,
            kinetic_energy=None,
            potential_energy=None,
            virial_theorem=None,
            wavefunction_norm=None,
        )

        for line in match.group().splitlines():
            for k in d.keys():
                if " ".join(k.split("_")) in line:
                    d[k] = convert_float(line.split()[-2])
                    break

        return d



[docs]
    @lazy_property
    def riper_scf_energies(self):
        """
        Extract the final energies for scf calculation with riper executable.

        Returns:
            dict with the energy and its contributions: "total_energy",
            "kinetic_energy", "coulomb_energy", "xc_energy", "ts_energy",
            "free_energy" and "sigma0_energy".
        """
        # Parsing the section:
        #               +--------------------------------------------------+
        #               |                FINAL ENERGIES                    |
        #               +--------------------------------------------------+
        #               | KINETIC ENERGY       =          75.3219906303    |
        #               | COULOMB ENERGY       =        -140.6650084669    |
        #               | EXCH. & CORR. ENERGY =         -10.8758949666    |
        #               |==================================================|
        #               | TOTAL ENERGY         =         -76.2189128032    |
        #               | T*S                  =          -0.0000000000    |
        #               | FREE  ENERGY         =         -76.2189128032    |
        #               | ENERGY (sigma->0)    =         -76.2189128032    |
        #               +--------------------------------------------------+
        # Note that "FINAL ENERGIES" is needed here because the rest of the table is
        # also provided for each scf iteration.
        r = r"FINAL ENERGIES\s+\|\s+\+-{50,}\+\s+\|\s+KINETIC ENERGY(.*?)\+-{50,}\+"

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        d = dict(
            total_energy=None,
            kinetic_energy=None,
            coulomb_energy=None,
            xc_energy=None,
            ts_energy=None,
            free_energy=None,
            sigma0_energy=None,
        )

        for line in match.group().splitlines():
            if "KINETIC ENERGY" in line:
                d["kinetic_energy"] = convert_float(line.split("=")[1].split()[-2])
            elif "COULOMB ENERGY" in line:
                d["coulomb_energy"] = convert_float(line.split("=")[1].split()[-2])
            elif "EXCH. & CORR. ENERGY" in line:
                d["xc_energy"] = convert_float(line.split("=")[1].split()[-2])
            elif "TOTAL ENERGY" in line:
                d["total_energy"] = convert_float(line.split("=")[1].split()[-2])
            elif "T*S" in line:
                d["ts_energy"] = convert_float(line.split("=")[1].split()[-2])
            elif "FREE  ENERGY" in line:
                d["free_energy"] = convert_float(line.split("=")[1].split()[-2])
            elif "ENERGY (sigma->0)" in line:
                d["sigma0_energy"] = convert_float(line.split("=")[1].split()[-2])

        return d



[docs]
    @lazy_property
    def cosmo_results(self):
        """
        Extract the results of cosmo.

        Valid only for scf executables.

        Returns:
            dict with several subdictionaries: "parameters" ("nppa", "nspa", "nsph",
            "npspher", "disex", "disex2", "rsolv", "routf", "phsran", "ampran",
            "cavity", "epsilon", "refind", "fepsi"), "screening_charge"
            ("cosmo", "correction", "total"), "energies"
            ("total_energy", "total_energy_oc_corr", "dielectric_energy",
            "dielectric_energy_oc_corr"), "element_radius" ("radius, "sites").
        """
        # See test file for an example. Parses the section starting with:
        #  ==============================================================================
        #                                   COSMO RESULTS
        #  ==============================================================================
        # and splits in block for the section "CAVITY VOLUME/AREA", "SCREENING CHARGE",
        # "ENERGIES", "ELEMENT RADIUS"
        r = (
            r"COSMO RESULTS\s*(===)+.*?PARAMETER:(.*?)CAVITY VOLUME/AREA(.*?)"
            r"SCREENING CHARGE:(.*?)ENERGIES(.*?)"
            r"ELEMENT RADIUS \[A\]: ATOM LIST(.*?)(===)+"
        )

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        # In this section a list of keyword_name: value are parsed, one for each line.
        # this dictionary allows to loop over all the lines and cast the value to the
        # correct type.
        key_types_parameters = dict(
            nppa=int,
            nspa=int,
            nsph=int,
            nps=int,
            npspher=int,
            disex=float,
            disex2=float,
            rsolv=float,
            routf=float,
            phsran=float,
            ampran=float,
            cavity=str,
            solvent=str,
            source=str,
            epsilon=float,
            refind=float,
            fepsi=float,
        )

        d_parameters = {k: None for k in key_types_parameters.keys()}

        for line in match.group(2).splitlines():
            for k, t in key_types_parameters.items():
                # NB here the regex is tailored to explicitly capture the string that
                # need to be parsed. For example:
                # disex2: 3538.50
                # rsolv [A]: 1.3000
                # if more are added the regex may need an update.
                if re.search(k + r"[\s\[A\]]*:", line):
                    if k in ("epsilon", "refind"):
                        # In Turbomole 7.8, the epsilon and refind parameters lines
                        # have been modified from
                        #   epsilon: <VALUE>
                        #   refind: <VALUE>
                        # to
                        #   epsilon: <VALUE> ( <TEMP> C)
                        #   refind: <VALUE> ( <TEMP> C)
                        # This deals with both pre-7.8 and post-7.8 Turbomole versions.
                        d_parameters[k] = t(line.split()[1])
                    elif k in ("solvent", "source"):
                        d_parameters[k] = t(line.split(f"{k}:")[1].strip())
                    else:
                        d_parameters[k] = t(line.split()[-1])
                    break

        d_screening = dict(cosmo=None, correction=None, total=None)

        for line in match.group(4).splitlines():
            if "cosmo" in line:
                d_screening["cosmo"] = convert_float(line.split()[-1])
            if "correction" in line:
                d_screening["correction"] = convert_float(line.split()[-1])
            if "total" in line:
                d_screening["total"] = convert_float(line.split()[-1])

        d_energies = dict(
            total_energy=None,
            total_energy_oc_corr=None,
            dielectric_energy=None,
            dielectric_energy_oc_corr=None,
        )

        for line in match.group(5).splitlines():
            if re.search(r"Total energy\s*=", line):
                d_energies["total_energy"] = convert_float(line.split()[-1])
            if re.search(r"Total energy \+ OC corr\.\s*=", line):
                d_energies["total_energy_oc_corr"] = convert_float(line.split()[-1])
            if re.search(r"Dielectric energy\s*=", line):
                d_energies["dielectric_energy"] = convert_float(line.split()[-1])
            if re.search(r"Diel\. energy \+ OC corr\.\s+=", line):
                d_energies["dielectric_energy_oc_corr"] = convert_float(
                    line.split()[-1]
                )

        d_element_radius = {}

        for line in match.group(6).splitlines():
            if not line.strip():
                continue

            split = line.split()
            # list of sites are separated by "," and can be defined as
            # intervals with "-". Example: 1,3-5,7
            sites = []
            for s in split[2].split(","):
                if "-" in s:
                    sites_split = s.split("-")
                    sites.extend(
                        range(
                            convert_int(sites_split[0]), convert_int(sites_split[1]) + 1
                        )
                    )
                else:
                    sites.append(convert_int(s))
            d_element_radius[split[0]] = {
                "radius": convert_float(split[1].replace(":", "")),
                "sites": sites,
            }

        d = dict(
            parameters=d_parameters,
            screening_charge=d_screening,
            energies=d_energies,
            element_radius=d_element_radius,
        )

        return d



[docs]
    @lazy_property
    def electrostatic_moments(self):
        """
        Extract the information about the electrostatic moment (dipole and quadrupole).

        Valid only for scf executables.

        Returns:
            dict with single "dipole" and "quadrupole" moments.
        """
        # See test files for an example. Parses the section starting with:
        #  ==============================================================================
        #                            electrostatic moments
        #  ==============================================================================
        r = (
            r"electrostatic moments\s*(==)+.*?charge\s*-+(.*?)-+\s*dipole moment(.*?)"
            r"quadrupole moment(.*?anisotropy=\s*" + float_number_all_re + r")"
        )
        # does not stop at "============" since there might be the
        # PABOON section before that.

        match = re.search(r, self.string, re.DOTALL)

        if match is None:
            return None

        d = {"unrestricted_electrons": None}

        for line in match.group(2).splitlines():
            if not line.strip():
                continue
            elif "a-b" in line:
                d["unrestricted_electrons"] = convert_float(line.split()[-1])
            else:
                d["charge"] = convert_float(line.split()[-1])

        dipole = [0, 0, 0]
        d_dipole = {}
        for line in match.group(3).splitlines():
            if " x " in line:
                dipole[0] = convert_float(line.split()[-1])
            elif " y " in line:
                dipole[1] = convert_float(line.split()[-1])
            elif " z " in line:
                dipole[2] = convert_float(line.split()[-1])
            elif "| dipole moment |" in line:
                d_dipole["norm"] = convert_float(line.split()[5])

        d_dipole["moment"] = dipole

        quadrupole = np.zeros((3, 3))
        d_quadrupole = {}
        for line in match.group(4).splitlines():
            if " xx " in line:
                quadrupole[0, 0] = convert_float(line.split()[-1])
            elif " yy " in line:
                quadrupole[1, 1] = convert_float(line.split()[-1])
            elif " zz " in line:
                quadrupole[2, 2] = convert_float(line.split()[-1])
            elif " xy " in line:
                quadrupole[0, 1] = quadrupole[1, 0] = convert_float(line.split()[-1])
            elif " xz " in line:
                quadrupole[0, 2] = quadrupole[2, 0] = convert_float(line.split()[-1])
            elif " yz " in line:
                quadrupole[1, 2] = quadrupole[2, 1] = convert_float(line.split()[-1])
            elif "trace=" in line:
                d_quadrupole["trace"] = 3 * convert_float(line.split()[-1])
            elif "anisotropy" in line:
                d_quadrupole["anisotropy"] = convert_float(line.split()[-1])

        d_quadrupole["moment"] = quadrupole.tolist()

        d["dipole"] = d_dipole
        d["quadrupole"] = d_quadrupole

        return d



[docs]
    @lazy_property
    def timings(self):
        """
        Extract the cpu and wall time for the calculation.

        Valid for all the Turbomole executables.

        Returns:
            dict with "cpu_time" and "wall_time" in seconds.
        """
        #          total  cpu-time :   0.74 seconds
        #          total wall-time :   0.79 seconds
        r = r"(total\s+cpu-time.*?)(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3})"

        match = re.search(r, self.string, re.DOTALL)
        if not match:
            return None

        d = dict(
            cpu_time=None,
            wall_time=None,
            end_time=datetime.datetime.strptime(match.group(2), date_format),
        )

        for line in match.group(1).splitlines():
            if "cpu-time" in line:
                d["cpu_time"] = convert_time_string(line.split(":")[1])
            if "wall-time" in line:
                d["wall_time"] = convert_time_string(line.split(":")[1])

        return d



[docs]
    @lazy_property
    def s2(self):
        """
        Extract the value S^2 of the spin.

        Valid only for scf executables.

        Returns:
            dict containing "s2".
        """
        # Parsing:  <S*S>     0.75205422
        r = r"<S\*S>\s+?(" + float_number_re + r")"

        match = re.search(r, self.string)

        if not match:
            return None

        return {"s2": convert_float(match.group(1))}



[docs]
    @lazy_property
    def is_uhf(self):
        """Determine if the calculation is a UHF one.

        Valid for scf, gradient and escf executables.

        Returns:
            bool.
        """
        # "mode" appears in dscf and escf, while "modus" in ridft and grad.
        # "UHF mode switched on" has also disappeared in TM >= 7.7. Checking from
        # "RPA UHF-EXCITATION-CALCULATION" instead.
        is_uhf = (
            re.search("UHF (mode|modus) switched on", self.string) is not None
            or re.search("RPA UHF-EXCITATION-CALCULATION", self.string) is not None
        )
        return is_uhf



[docs]
    @lazy_property
    def fermi(self):
        """
        Extract the information about smearing of occupations.

        Valid only for scf executables.

        Returns:
            dict with "initial_elec_temp", "final_elec_temp",
            "annealing_homo_lumo_gap_limit", "smearing_de_limit".
        """
        # parses the section:
        #     Fermi smearing switched on
        #       Initial electron temperature:   500.00
        #         Final electron temperature:    50.00
        #                   Annealing factor:    0.900
        #        Annealing if HOMO-LUMO gap <  0.2E+00
        #       Smearing switched off if DE <  0.1E-02

        # this shows up before the start of the scf iterations. Since the beginning
        # of that may change (at least depending if it's a new calculation or a
        # restart) the end of the regex is on the last line and the value.
        r = (
            r"Fermi smearing switched on.*?Smearing switched off if DE\s+<\s+"
            + float_number_e_re
        )

        match = re.search(r, self.string, re.DOTALL)

        if not match:
            return None

        d = dict(
            initial_elec_temp=None,
            final_elec_temp=None,
            annealing_homo_lumo_gap_limit=None,
            smearing_de_limit=None,
        )

        for line in match.group().splitlines():
            if "Initial electron temperature" in line:
                d["initial_elec_temp"] = convert_float(line.split()[-1])
            if "Final electron temperature" in line:
                d["final_elec_temp"] = convert_float(line.split()[-1])
            if "Annealing factor" in line:
                d["annealing_factor"] = convert_float(line.split()[-1])
            if "Annealing if HOMO-LUMO gap" in line:
                d["annealing_homo_lumo_gap_limit"] = convert_float(line.split()[-1])
            if "Smearing switched off if DE" in line:
                d["smearing_de_limit"] = convert_float(line.split()[-1])

        return d



[docs]
    @lazy_property
    def integral(self):
        """
        Extract the thresholds for integrals.

        Valid for scf, gradient and escf executables.

        Returns:
            dict with "integral_neglect_threshold", "thize", "thime".
        """
        # parses the section
        #  integral neglect threshold       :  0.18E-08
        #  integral storage threshold THIZE :  0.10E-04
        #  integral storage threshold THIME :         5

        d = dict(integral_neglect_threshold=None, thize=None, thime=None)

        match = re.search(r"integral neglect threshold.*?$", self.string, re.MULTILINE)
        if match is not None:
            d["integral_neglect_threshold"] = convert_float(match.group().split()[-1])

        match = re.search(
            r"integral storage threshold THIZE.*?$", self.string, re.MULTILINE
        )
        if match is not None:
            d["thize"] = convert_float(match.group().split()[-1])

        match = re.search(
            r"integral storage threshold THIME.*?$", self.string, re.MULTILINE
        )
        if match is not None:
            d["thime"] = convert_int(match.group().split()[-1])

        if all(v is None for v in d.values()):
            return None

        return d



[docs]
    @lazy_property
    def pre_escf_run(self):
        """
        Extract the "pre-escf iterations" information about the escf calculation.

        This information is printed before running the escf iterations.
        Valid for escf and egrad executables.

        Returns:
            dict with "calc_type", "residuum_convergence_criterium",
            "n_occupied_orbitals", "orbital_characterization", "max_davidson_iter",
            "machine_precision", "max_core_mem",
            "max_cao_basis_vectors", "max_treated_vectors", "irrep_data".
        """
        # See test files for examples. The values spread over a relatively
        # large number of lines and the lines are matched individually.

        # large portion of the code considered to accommodate the various
        # pieces of information.
        r = r"basis set information.+Iteration\s+IRREP\s+Converged"

        match = re.search(r, self.string, re.DOTALL)

        if not match:
            return None

        match_str = match.group()

        d = dict(
            calc_type=None,
            residuum_convergence_criterium=None,
            n_occupied_orbitals=None,
            orbital_characterization=None,
            max_davidson_iter=None,
            machine_precision=None,
            max_core_mem=None,
            max_cao_basis_vectors=None,
            max_treated_vectors=None,
            irrep_data=None,
        )

        match = re.search(r"^(.*)[\-\s]CALCULATION\s+", match_str, re.MULTILINE)
        if match is not None:
            d["calc_type"] = match.group(1).strip()

        match = re.search(
            r"residuum convergence criterium.*?$", match_str, re.MULTILINE
        )
        if match is not None:
            d["residuum_convergence_criterium"] = convert_float(
                match.group().split()[-1]
            )

        match = re.search(r"number of occupied orbitals.*?$", match_str, re.MULTILINE)
        if match is not None:  # pragma: no branch
            d["n_occupied_orbitals"] = convert_int(match.group().split()[-1])

        match = re.search(r"orbital characterization.*?$", match_str, re.MULTILINE)
        if match is not None:  # pragma: no branch
            d["orbital_characterization"] = match.group().split()[-1]

        match = re.search(
            r"maximum number of Davidson iterations set to.*?$", match_str, re.MULTILINE
        )
        if match is not None:
            d["max_davidson_iter"] = convert_int(match.group().split()[-1])

        match = re.search(r"machine precision.*?$", match_str, re.MULTILINE)
        if match is not None:  # pragma: no branch
            d["machine_precision"] = convert_float(match.group().split()[-1])

        match = re.search(r"maximum core memory set to.*?$", match_str, re.MULTILINE)
        if match is not None:  # pragma: no branch
            d["max_core_mem"] = convert_float(match.group().split()[-2])

        match = re.search(r"corresponding to\s+(\d+)\s+vectors in CAO basis", match_str)
        if match is not None:  # pragma: no branch
            d["max_cao_basis_vectors"] = convert_int(match.group(1))

        match = re.search(
            r"maximum number of simultaneously treated vectors.*?$",
            match_str,
            re.MULTILINE,
        )
        if match is not None:  # pragma: no branch
            d["max_treated_vectors"] = convert_int(match.group().split()[-1])

        # In TM versions < 7.7, IRREP's block is like:
        #  dimension of super-tensorspace: 1
        #
        #          IRREP   tensor space dimension   number of roots
        #
        #           a1               24                  12
        #           a2                9                   9
        #           b1               19                  12
        #           b2               13                  12
        #
        # maximum number of Davidson iterations set to   35
        #
        #
        # machine precision: 2.22D-16
        #
        # While in TM versions >= 7.7, the "maximum number of Davidson" disappeared
        # (it actually went somewhere else in the output file), so the blocks is like:
        #  dimension of super-tensorspace: 1
        #
        #          IRREP   tensor space dimension   number of roots
        #
        #           a1                 24                     12
        #           a2                  9                      9
        #           b1                 19                     12
        #           b2                 13                     12
        #
        # machine precision: 2.22D-16
        #
        # => Matching regex using the line before the IRREPs ("dimension of super...").

        r = (
            r"dimension\s+of\s+super-tensorspace:\s+\d\s+"
            r"IRREP\s+tensor\s+space\s+dimension\s+number\s+of\s+roots\s*\n\s*"
            r"(\S+\s+\d+\s+\d+\s*\n\s*)+\n"
        )

        match = re.search(r, match_str, re.DOTALL)
        if match is not None:
            irrep_data = {}

            line_pattern = re.compile(r"\s*(\S+)\s+(\d+)\s+(\d+)\s*")
            for line_match in line_pattern.finditer(match.group(0)):
                irrep_name, tensor_space_dim, n_roots = line_match.groups()
                irrep_data[irrep_name] = {
                    "tensor_space_dim": convert_int(tensor_space_dim),
                    "n_roots": convert_int(n_roots),
                }
            d["irrep_data"] = irrep_data

        return d



[docs]
    @lazy_property
    def escf_iterations(self):
        """
        Extract the data for each iteration of the escf loop.

        Valid for escf and egrad executables.

        Returns:
            dict "converged" to True if the calculation is converged and a list
                of "steps". One element of the list for each escf step.
                One element of the sublist for each irrep. The inner list contains
                the convergence information:
                [name of the irrep, number of converged roots, euclidean residual norm].
        """
        # splits the different iterations, parsing the section starting with
        #  Iteration IRREP Converged      Max. Euclidean
        #                  roots          residual norm
        #
        #     1       a1       0        3.367862225107080D-01
        #             a2       9        6.722471267583007D-14

        r = (
            r"Iteration\s+IRREP\s+Converged\s+Max\.\s+Euclidean\s+roots\s+"
            r"residual\s+norm\s+(.*?)(converged|Warning)"
        )
        match = re.search(r, self.string, re.DOTALL)

        if not match:
            return None

        iter_str = match.group(1)

        def convert_line(ls):
            ls[1] = convert_int(ls[1])
            ls[2] = convert_float(ls[2])
            return ls

        iter_steps_list = []
        iter_step = None
        for line in iter_str.splitlines():
            line = line.strip()
            if not line:
                continue

            split = line.split()
            if len(split) == 4:
                if iter_step is not None:
                    iter_steps_list.append(iter_step)
                iter_step = [convert_line(split[1:])]
            elif len(split) == 3:  # pragma: no branch
                iter_step.append(convert_line(split))
            else:  # pragma: no cover
                raise RuntimeError("wrong line {} in escf iterations".format(line))
        if iter_step:  # pragma: no branch
            iter_steps_list.append(iter_step)

        converged = "converged" in match.group(2)

        d = dict(steps=iter_steps_list, converged=converged)

        return d



[docs]
    @lazy_property
    def escf_gs_total_en(self):
        """
        Extract the value of the final total energy of escf.

        Valid for escf and egrad executables.

        Returns:
            float.
        """
        # Parses:
        #                             Ground state
        #  Total energy:                           -76.34301407423000
        r = r"Ground\s+state\s+Total\s+energy:\s+(" + float_number_re + r")"

        match = re.search(r, self.string, re.DOTALL)

        if not match:
            return None
        else:
            return convert_float(match.group(1))



[docs]
    @lazy_property
    def escf_excitations(self):
        """
        Extract all the values for each calculated excitation.

        Valid for escf and egrad executables.

        Returns:
            dict with name of irreps as keywords and list of excitations as values.
            For each excitation, a dict with: "tot_en", "osc_stre", "rot_stre",
            "dominant_contributions" (list of contributions), "moments_columns"
            (list of dicts value of electric and magnetic moment for each column).
        """
        # See test files for example. Splits the sections for each irrep delimited by
        #  ==============================================================================
        #
        #                               I R R E P   a1
        #
        #  ==============================================================================
        r = (
            r"={3,}\s+I R R E P\s+["
            + irrep_re_group
            + r"]+\s+={3,}.*?(?=={3,}|SUMMARY|-{5,})"
        )

        match_irreps = re.findall(r, self.string, re.DOTALL)

        if not match_irreps:
            return None

        # Compile the regex beforehand since they will be executed several times.

        # Splits the excitations for each irrep. For example a group starting with:
        #                          1 singlet a1 excitation
        exc_head_group = (
            r"^\s+\d+\s+(?:singlet\s+|triplet\s+|)["
            + irrep_re_group
            + r"]+?\s+excitation"
        )
        r_excited = exc_head_group + r".*?(?=" + exc_head_group + r"|\Z)"
        regex_excited = re.compile(r_excited, re.DOTALL | re.MULTILINE)

        # Finds the value of the total energy
        r_tot_en = r"Total energy:\s+(" + float_number_all_re + r")"
        regex_tot_en = re.compile(r_tot_en)

        # Parses:
        #  Oscillator strength:
        #     velocity representation:             0.1689055727340166
        #     length representation:               0.7845154315911747E-01
        #     mixed representation:                0.1151125659046751
        r_osc = (
            r"Oscillator strength.*?length representation:\s+("
            + float_number_all_re
            + r")"
        )
        regex_osc = re.compile(r_osc, re.DOTALL)

        # Parses:
        #  Rotatory strength:
        #     velocity representation:              0.000000000000000
        #     velocity rep. / 10^(-40)erg*cm^3:     0.000000000000000
        #     length representation:                0.000000000000000
        #     length rep. / 10^(-40)erg*cm^3:       0.000000000000000
        r_rot = (
            r"Rotatory strength.*?length representation:\s+("
            + float_number_all_re
            + r")"
        )
        regex_rot = re.compile(r_rot, re.DOTALL)

        # Parses the dominant contributions. Example:
        #  Dominant contributions:
        #     occ. orbital  energy / eV  virt. orbital    energy / eV   |coeff.|^2*100
        #       3 a1            -8.44          4 a1             0.92       99.2
        #
        # Different points to stop depending on singlet, triplet or uhf.
        r_dom_contrib = (
            r"Dominant contributions:\s+.*?coeff\.\|\^2\*100\s+(.*?)(?="
            r"Change of electron number|<|S\^2|1st)"
        )
        regex_dom_contrib = re.compile(r_dom_contrib, re.DOTALL | re.MULTILINE)

        # The following parse the section with the electric and magnetic moments
        r_elec_dip = (
            r"Electric transition dipole moment "
            r"\(length rep\.\):(.*?)Magnetic transition"
        )
        regec_elec_dip = re.compile(r_elec_dip, re.DOTALL)

        r_mag_dip = r"Magnetic transition dipole moment / i:(.*?)Electric quadrupole"
        regec_mag_dip = re.compile(r_mag_dip, re.DOTALL)

        r_elec_quad = r"Electric quadrupole transition moment:(.*)"
        regec_elec_quad = re.compile(r_elec_quad, re.DOTALL)

        excited_data = {}

        for irrep_group in match_irreps:
            r = r"I R R E P\s+([" + irrep_re_group + r"]+)"
            irrep = re.search(r, irrep_group).group(1)

            match_excitations = regex_excited.findall(irrep_group)
            if not match_excitations:  # pragma: no cover
                raise RuntimeError(
                    "Could not extract the single excitation contributions"
                )

            single_excitations_list = []
            # loop over the excitations
            for exc_group in match_excitations:
                exc_data = dict(
                    tot_en=None,
                    osc_stre=None,
                    rot_stre=None,
                    dominant_contributions=None,
                    moments_columns=None,
                )

                match_tot_en = regex_tot_en.search(exc_group)
                if match_tot_en:  # pragma: no branch
                    exc_data["tot_en"] = convert_float(match_tot_en.group(1))

                match_osc_stre = regex_osc.search(exc_group)
                if match_osc_stre:  # pragma: no branch
                    exc_data["osc_stre"] = convert_float(match_osc_stre.group(1))

                match_rot_stre = regex_rot.search(exc_group)
                if match_rot_stre:  # pragma: no branch
                    exc_data["rot_stre"] = convert_float(match_rot_stre.group(1))

                match_dominant = regex_dom_contrib.search(exc_group)
                if match_dominant:  # pragma: no branch
                    dominant_contributions = []
                    # loop over dominant contributions to the excitation
                    for line in match_dominant.group(1).splitlines():
                        line = line.strip()
                        if not line:
                            continue
                        s = line.split()
                        # there could be 7 or 9 columns depending if it is a rhf
                        # or uhf, respectively.
                        if len(s) == 7:
                            occ_orb = dict(
                                index=convert_int(s[0]),
                                irrep=s[1],
                                energy=convert_float(s[2]),
                                spin=None,
                            )
                            virt_orb = dict(
                                index=convert_int(s[3]),
                                irrep=s[4],
                                energy=convert_float(s[5]),
                                spin=None,
                            )
                            dominant_contributions.append(
                                dict(
                                    occ_orb=occ_orb,
                                    virt_orb=virt_orb,
                                    coeff=convert_float(s[6]),
                                )
                            )
                        elif len(s) == 9:  # pragma: no branch
                            occ_orb = dict(
                                index=convert_int(s[0]),
                                irrep=s[1],
                                energy=convert_float(s[3]),
                                spin=s[2],
                            )
                            virt_orb = dict(
                                index=convert_int(s[4]),
                                irrep=s[5],
                                energy=convert_float(s[7]),
                                spin=s[6],
                            )
                            dominant_contributions.append(
                                dict(
                                    occ_orb=occ_orb,
                                    virt_orb=virt_orb,
                                    coeff=convert_float(s[8]),
                                )
                            )
                        else:  # pragma: no cover
                            raise RuntimeError(
                                "error while parsing line of escf dominant "
                                "contribution: {}".format(line)
                            )

                    exc_data["dominant_contributions"] = dominant_contributions

                # Depending on the irrep there might be more "columns" with the dipole
                # and quadrupole values. Generate a list of dictionaries with
                # these data.
                moments_columns = []
                if " column" in exc_group:
                    columns_to_parse = exc_group.split(" column")[1:]
                else:
                    columns_to_parse = [exc_group]

                # loop over the columns for the electric and magnetic moments.
                for str_c in columns_to_parse:
                    moments_data = {}
                    match_elec_dip = regec_elec_dip.search(str_c)
                    if match_elec_dip:  # pragma: no branch
                        el_dip = []
                        for line in match_elec_dip.group(1).splitlines():
                            line = line.strip()
                            if line and line[0] in ("x", "y", "z"):
                                el_dip.append(convert_float(line.split()[1]))

                        moments_data["electric_dipole"] = el_dip

                    match_mag_dip = regec_mag_dip.search(str_c)
                    if match_mag_dip:  # pragma: no branch
                        mag_dip = []
                        for line in match_mag_dip.group(1).splitlines():
                            line = line.strip()
                            if line and line[0] in ("x", "y", "z"):
                                mag_dip.append(convert_float(line.split()[1]))

                        moments_data["magnetic_dipole"] = mag_dip

                    match_elec_quad = regec_elec_quad.search(str_c)
                    if match_elec_quad:  # pragma: no branch
                        quadrupole = np.zeros((3, 3))
                        d_quadrupole = {}
                        for line in match_elec_quad.group(1).splitlines():
                            s = line.split()
                            if " xx " in line:
                                quadrupole[0, 0] = convert_float(s[1])
                            elif " yy " in line:
                                quadrupole[1, 1] = convert_float(s[1])
                                d_quadrupole["trace"] = 3 * convert_float(s[-1])
                            elif " zz " in line:
                                quadrupole[2, 2] = convert_float(s[1])
                            elif " xy " in line:
                                quadrupole[0, 1] = quadrupole[1, 0] = convert_float(
                                    s[1]
                                )
                            elif " xz " in line:
                                quadrupole[0, 2] = quadrupole[2, 0] = convert_float(
                                    s[1]
                                )
                                d_quadrupole["anisotropy"] = convert_float(s[-1])
                            elif " yz " in line:
                                quadrupole[1, 2] = quadrupole[2, 1] = convert_float(
                                    s[1]
                                )

                        d_quadrupole["moment"] = quadrupole.tolist()
                        moments_data["electric_quadrupole"] = d_quadrupole

                    if moments_data:  # pragma: no branch
                        moments_columns.append(moments_data)

                if moments_columns:  # pragma: no branch
                    exc_data["moments_columns"] = moments_columns

                single_excitations_list.append(exc_data)

            excited_data[irrep] = single_excitations_list

        return excited_data



[docs]
    @lazy_property
    def rdgrad_memory(self):
        """
        Extract the memory allocated for rdgrad.

        Valid for rdgrad.

        Returns:
            int.
        """
        # Parses: Memory allocated for RDGRAD: 1 MiB
        r = r"Memory allocated for RDGRAD.*$"

        match = re.search(r, self.string, re.MULTILINE)

        if not match:
            return None
        else:
            return convert_int(match.group().split()[-2])



[docs]
    @lazy_property
    def gradient(self):
        """
        Extract the values of the gradients and dielectric data.

        Valid for gradient executables.

        Returns:
            dict with "gradients", a matrix of shape (natoms, 3) containing the
            values of the cartesian gradients, and "dielectric".
        """
        # Parse the section starting with:
        #           ------------------------------------------------
        #            cartesian gradient of the energy (hartree/bohr)
        #           ------------------------------------------------
        r = "cartesian gradient of the energy.*cartesian gradients written onto"
        match = re.search(r, self.string, re.DOTALL)

        if not match:
            return None

        grad_str = match.group()

        grad_data = {}

        # Parse lines of the form:
        # dE/dx  0.0000000D+00 -0.2705802D-01  0.2705802D-01
        # dE/dy  0.0000000D+00  0.0000000D+00  0.0000000D+00
        # dE/dz  0.4144103D-01 -0.2070390D-01 -0.2070390D-01
        #
        # Notice that they can span multiple sets of lines.

        gradients = [[], [], []]
        for i, d in enumerate(["x", "y", "z"]):
            match = re.findall(r"dE/d" + d + r"(.*?)$", grad_str, re.MULTILINE)
            for g_vals in match:
                g = [convert_float(f) for f in g_vals.split()]
                gradients[i].extend(g)

        grad_data["gradients"] = np.transpose(gradients).tolist()

        # Parses the lines:
        #  exx =      -0.076690 eyy =       0.000000 ezz =      -0.045091
        #  eyz =       0.000000 exz =      -0.000000 exy =       0.000000

        dielectric = np.zeros((3, 3))

        for line in grad_str.splitlines():
            if "exx" in line:
                s = line.split()
                dielectric[0, 0] = convert_float(s[2])
                dielectric[1, 1] = convert_float(s[5])
                dielectric[2, 2] = convert_float(s[8])
            elif "eyz" in line:
                s = line.split()
                dielectric[1, 2] = dielectric[2, 1] = convert_float(s[2])
                dielectric[0, 2] = dielectric[2, 0] = convert_float(s[5])
                dielectric[0, 1] = dielectric[1, 0] = convert_float(s[8])

        grad_data["dielectric"] = dielectric.tolist()

        return grad_data



[docs]
    @lazy_property
    def egrad_excited_state(self):
        """
        Extract the information about the excited state chosen for optimization.

        Valid only for egrad.

        Returns:
            dict with "index" indicating the (1-based) index of the state used for
            optimization and "default" to True if the default values used for $exopt.
        """
        # Parses:  Excited state no.    3 chosen for optimization
        r = r"Excited state no.\s+(\d+)\s+chosen for optimization"
        match = re.search(r, self.string)

        if not match:
            return None

        data = dict(index=convert_int(match.group(1)))
        data["default"] = "Data group $exopt missing or empty" in self.string

        return data



[docs]
    @lazy_property
    def statpt_info(self):
        """
        Extract the initial information provided in statpt.

        Valid only for statpt.

        Returns:
            dict with "max_trust_radius", "min_trust_radius", "init_trust_radius",
            "min_grad_norm_for_gdiis", "prev_steps_for_gdiis", "hessian_update_method",
            "thr_energy_change", "thr_max_displ", "thr_max_grad",
            "thr_rms_displ", "thr_rms_grad", "use_defaults", "final_step_radius".
        """
        # See test files for examples. Parses the block starting with:
        # ***************  Stationary point options ******************
        r = r"Stationary point options[\s\*]{60,}(.+?)\*{60,}"
        match = re.search(r, self.string, re.DOTALL)

        data = dict(
            max_trust_radius=None,
            min_trust_radius=None,
            init_trust_radius=None,
            min_grad_norm_for_gdiis=None,
            prev_steps_for_gdiis=None,
            hessian_update_method=None,
            thr_energy_change=None,
            thr_max_displ=None,
            thr_max_grad=None,
            thr_rms_displ=None,
            thr_rms_grad=None,
            use_defaults=False,
            final_step_radius=None,
        )

        if match:
            for line in match.group(1).splitlines():
                if "Maximum allowed trust radius" in line:
                    data["max_trust_radius"] = convert_float(line.split()[-1])
                if "Minimum allowed trust radius" in line:
                    data["min_trust_radius"] = convert_float(line.split()[-1])
                if "Initial trust radius" in line:
                    data["init_trust_radius"] = convert_float(line.split()[-1])
                if "GDIIS used if gradient norm" in line:
                    data["min_grad_norm_for_gdiis"] = convert_float(line.split()[-1])
                if "Number of previous steps for GDIIS" in line:
                    data["prev_steps_for_gdiis"] = convert_int(line.split()[-1])
                if "Hessian update method" in line:
                    data["hessian_update_method"] = line.split()[-1]
                if "Threshold for energy change" in line:
                    data["thr_energy_change"] = convert_float(line.split()[-1])
                if "Threshold for max displacement element" in line:
                    data["thr_max_displ"] = convert_float(line.split()[-1])
                if "Threshold for max gradient element" in line:
                    data["thr_max_grad"] = convert_float(line.split()[-1])
                if "Threshold for RMS of displacement" in line:
                    data["thr_rms_displ"] = convert_float(line.split()[-1])
                if "Threshold for RMS of gradient" in line:
                    data["thr_rms_grad"] = convert_float(line.split()[-1])

        data["use_defaults"] = (
            "Keyword $statpt not found - using default options" in self.string
        )

        r = r"$\s+Final\s+step\s+radius\s*:\s+(" + float_number_e_re + r")"
        match = re.search(r, self.string, re.MULTILINE)
        if match:
            data["final_step_radius"] = convert_float(match.group(1))

        if all(not v for v in data.values()):
            return None

        return data



[docs]
    @lazy_property
    def relax_info(self):
        """
        Extract the initial information provided in relax.

        Valid only for relax.

        Returns:
            dict with "optimizations" list of optimized quantities and "thr_int_coord".
        """
        data = dict(optimizations=None, thr_int_coord=None)

        # Parses for example:
        #  optimization will be performed with respect to
        #  - INTERNAL coordinates
        r = r"optimization will be performed with respect to\s+(.*?)-{10,}"
        match = re.search(r, self.string, re.DOTALL)
        if match:
            opt = []
            for line in match.group(1).splitlines():
                line = line.strip()
                if line.startswith("-"):
                    opt.append(line.replace("-", "", 1).strip())
            data["optimizations"] = opt

        r = (
            r"$\s+convergence\s+criterion\s+for\s+internal\s+coordinates\s*:\s+("
            + float_number_e_re
            + r")"
        )
        match = re.search(r, self.string, re.MULTILINE)
        if match:
            data["thr_int_coord"] = convert_float(match.group(1))

        if all(v is None for v in data.values()):
            return None

        return data



[docs]
    @lazy_property
    def relax_gradient_values(self):
        """
        Extract the gradient values from the relax/stapt output.

        This takes frozen coordinates into account.
        Valid for relax and statpt.

        Returns:
            dict with "norm_cartesian", "norm_internal", "max_internal"
        """
        # Parses:
        #   norm of actual CARTESIAN gradient : .54369
        #   norm of actual  INTERNAL gradient : .55998
        #   maximum norm of actual  INTERNAL gradient : .46843

        data = dict(norm_cartesian=None, norm_internal=None, max_internal=None)

        r = (
            r"$\s+norm\s+of\s+actual\s+CARTESIAN\s+gradient\s*:\s+("
            + float_number_e_re
            + r")"
        )
        match = re.search(r, self.string, re.MULTILINE)
        if match:
            data["norm_cartesian"] = convert_float(match.group(1))

        r = (
            r"$\s+norm\s+of\s+actual\s+INTERNAL\s+gradient\s*:\s+("
            + float_number_e_re
            + r")"
        )
        match = re.search(r, self.string, re.MULTILINE)
        if match:
            data["norm_internal"] = convert_float(match.group(1))

        r = (
            r"maximum\s+norm\s+of\s+actual\s+INTERNAL\s+gradient\s*:\s+("
            + float_number_e_re
            + r")"
        )
        match = re.search(r, self.string)
        if match:
            data["max_internal"] = convert_float(match.group(1))

        if all(v is None for v in data.values()):
            return None

        return data



[docs]
    @lazy_property
    def relax_conv_info(self):
        """
        Extract the final information about convergence.

        This is performed according to what is defined in the control file.
        Valid for relax and statpt.

        Returns:
            dict with "energy_change", "rms_displ", "rms_grad", "max_displ", "max_grad".
            For each one a dict with "value", "thr" and "conv".
        """
        # Parses:
        #       ******************************************************************
        #                           CONVERGENCE INFORMATION
        #
        #                                Converged?     Value      Criterion
        #              Energy change         no      76.3430059   0.0000010
        #              RMS of displacement   no       0.0517230   0.0005000
        #              RMS of gradient       no       0.0340771   0.0005000
        #              MAX displacement      no       0.0731468   0.0010000
        #              MAX gradient          no       0.0481922   0.0010000
        #       ******************************************************************
        r = r"CONVERGENCE INFORMATION.*?Criterion\s*(.+?)\*{60,}"
        match = re.search(r, self.string, re.DOTALL)

        if not match:
            return None

        def get_single_data(line):
            s = line.split()
            converged = "yes" == s[-3]
            return {
                "value": convert_float(s[-2]),
                "thr": convert_float(s[-1]),
                "conv": converged,
            }

        data = dict(
            energy_change=None,
            rms_displ=None,
            rms_grad=None,
            max_displ=None,
            max_grad=None,
        )
        for line in match.group(1).splitlines():
            if "Energy change" in line:
                data["energy_change"] = get_single_data(line)
            if "RMS of displacement" in line:
                data["rms_displ"] = get_single_data(line)
            if "RMS of gradient" in line:
                data["rms_grad"] = get_single_data(line)
            if "MAX displacement" in line:
                data["max_displ"] = get_single_data(line)
            if "MAX geom. grad." in line or "MAX gradient" in line:
                data["max_grad"] = get_single_data(line)

        return data



[docs]
    @lazy_property
    def aoforce_numerical_integration(self):
        """
        Extract the information about the numerical integration in aoforce.

        Will be present only if a proper aoforce is run.
        Absent if running after numforce.

        Returns:
            dict with "memory" (a dict with "core_memory_dft", "memory_per_atom",
            "atoms_per_loop") and "construction_timings" (a list of lists with
            construction timings. For each element:
            [timing_description, cpu_time, wall_time]
            with timing_description being the description of the timing, cpu_time
            being the CPU time in seconds and wall_time, the wall time in seconds.
        """
        # This section may not be present if aoforce runs after a numforce but also
        #  if the calculation is not dft.
        r = r"PREPARING NUMERICAL INTEGRATION.*atoms per loop"
        match = re.search(r, self.string, re.DOTALL)

        if match:
            memory = {}
            for line in match.group().splitlines():
                if "Remaining core memory for DFT" in line:
                    memory["core_memory_dft"] = convert_int(line.split()[-2])
                if "Memory needed per atom" in line:
                    memory["memory_per_atom"] = convert_int(line.split()[-2])
                if "atoms per loop" in line:
                    memory["atoms_per_loop"] = convert_int(line.split()[-4])
        else:
            memory = None

        timings = []
        # this matches a series a of "CONSTRUCTING xxx to get their timings. Example:
        #  CONSTRUCTING first deriv. of <mu|x,y,z|nu> -> Dip. deriv.
        #   ...
        #       ...terminated. cpu:       0.00       wall:       0.00
        # it skips the CONSTRUCTING integral bounds, that does not contain any timing.
        r = (
            r"CONSTRUCTING((?! integral bounds).*?wall:\s+\d\.\d+)\s+"
            r"(?=CONSTRUCTING|SOLVING|-------)"
        )
        for m in re.findall(r, self.string, re.DOTALL):
            # consider a different case for the second deriv. of 2e energy
            # since if it a dft calculations there are two timings there.
            if "second deriv. of 2e energy" in m:
                gl = m.splitlines()
                treating_line = ""
                for i in range(1, len(gl)):
                    if "treating" in gl[i]:
                        treating_line = gl[i]
                    if "...terminated" in gl[i]:
                        gls = gl[i].split()
                        timings.append(
                            (
                                gl[0] + "\n" + treating_line,
                                convert_float(gls[-3]),
                                convert_float(gls[-1]),
                            )
                        )

            else:
                lines = []
                cpu = None
                wall = None
                for line in m.splitlines():
                    if "...terminated" in line:
                        s = line.split()
                        cpu = convert_float(s[-3])
                        wall = convert_float(s[-1])
                    elif "->" in line or ("<" and ">" in line) or ("[" and "]" in line):
                        lines.append(line)
                timings.append(("\n".join(lines), cpu, wall))

        if not memory and not timings:
            return None

        data = dict(memory=memory, construction_timings=timings)

        return data



[docs]
    @lazy_property
    def aoforce_analysis(self):
        """
        Output of rotational and vibrational analysis in aoforce.

        Returns:
            dict with "rotational" (a dict with "dipole_moment", "b",
            "intensities", "m") and "vibrational". Vibrational, contains dict
            with keys "frequencies", "symmetries", "ir", "dDIP_dQ", "intensities",
            "intensities_perc", "raman", "eigenvectors", "reduced_masses".
            One value for each frequency.
        """
        # See test files for examples. Parses the section starting with:
        #              -----------------------------------
        #              rotational and vibrational analysis
        #              -----------------------------------
        split1 = self.string.split("dipole moment in principle axis system")
        if len(split1) != 2:
            return None

        split2 = split1[1].split("NORMAL MODES and VIBRATIONAL FREQUENCIES")
        if len(split2) != 2:
            return None

        # Rotational section
        # Parses:
        #  dipole moment in principle axis system (a.u.) :
        #     -0.0000000000     0.0000000000     0.8436248828
        #  norm :  0.843624882772472
        #
        #  rotational constants b for rotations around axis of inertia
        #  and optical intensities for (1 <-- 0) transition
        #
        #    b   :    25.5620992978     9.0755697626    14.0715251781   (cm**(-1))
        #    b   :      766332.4580      272078.7367      421853.7121     (MHz)
        #   int. :     0.0000000000     0.0000000000     0.7117029428     (a.u.)
        #
        #    x   :     1.0000000000     0.0000000000     0.0000000000
        #    y   :     0.0000000000     1.0000000000     0.0000000000
        #    z   :     0.0000000000     0.0000000000     1.0000000000

        rot = {}
        rot_lines = split2[0].splitlines()

        line1 = rot_lines[1].strip()
        if line1.startswith("norm"):
            # Example output in TM versions >=7.7:
            #  dipole moment in principle axis system (a.u.) :     0.0000000000     0.0000000000     0.8330302255  # noqa: E501
            #  norm :  0.8330302255
            rot["dipole_moment"] = [
                convert_float(f) for f in rot_lines[0].split(":")[1].split()
            ]
        else:
            # Example output in TM versions < 7.7:
            #  dipole moment in principle axis system (a.u.) :
            #     -0.0000000000     0.0000000000     0.8436248828
            #  norm :  0.843624882772472
            rot["dipole_moment"] = [convert_float(f) for f in rot_lines[1].split()]
        b = None
        rot_intensities = None
        m = None
        for line in rot_lines:
            line = line.strip()
            if line.startswith("b ") and "cm" in line:
                s = line.split()
                b = [convert_float(s[i]) for i in (2, 3, 4)]
            if line.startswith("int.") or (line.startswith("b") and "(a.u.)" in line):
                # Weird new output for the intensities from TM >= 7.7:
                #  b   :    28.3859581012     9.7579343128    14.8694417457   (cm**(-1))
                #
                #  b   :  ***************  ***************  ***************     (MHz)
                #
                #  b   :     0.0000000000     0.0000000000     0.6939393566     (a.u.)
                # While this was before TM 7.7:
                #  b   :    28.3859581012     9.7579343128    14.8694417457   (cm**(-1))
                #  b   :      850989.6152      292535.5113      445774.6490     (MHz)
                # int. :     0.0000000000     0.0000000000     0.6939393566     (a.u.)
                s = line.split()
                rot_intensities = [convert_float(s[i]) for i in (2, 3, 4)]
            if line.startswith("x "):
                # the x :   y :   z : section may be missing.
                # Initialize m here only if it is actually filled
                m = [[0.0, 0.0, 0.0]] * 3
                s = line.split()
                m[0] = [convert_float(s[i]) for i in (2, 3, 4)]
            if line.startswith("y "):
                s = line.split()
                m[1] = [convert_float(s[i]) for i in (2, 3, 4)]
            if line.startswith("z "):
                s = line.split()
                m[2] = [convert_float(s[i]) for i in (2, 3, 4)]

        # TODO verify the meaning of int and m
        rot["b"] = b
        rot["intensities"] = rot_intensities
        rot["m"] = m

        # Vibrational section
        # Parses groups of the form:
        #        mode               7        8        9
        #
        #      frequency        1603.56  3527.41  3641.97
        #
        #      symmetry            a1       a1       b1
        #
        #         IR               YES      YES      YES
        # |dDIP/dQ|   (a.u.)     0.0059   0.0002   0.0031
        # intensity (km/mol)      62.07     0.11    16.54
        # intensity (  %   )     100.00     0.17    26.64
        #
        #        RAMAN             YES      YES      YES
        #
        #   1   o           x  -0.00000  0.00000  0.06983
        #                   y   0.00000  0.00000  0.00000
        #                   z   0.07060  0.04999 -0.00000
        #   2   h           x   0.42845 -0.58424 -0.55423
        #                   y   0.00000  0.00000  0.00000
        #                   z  -0.56031 -0.39676 -0.43634
        #   3   h           x  -0.42845  0.58424 -0.55423
        #                   y  -0.00000  0.00000  0.00000
        #                   z  -0.56031 -0.39676  0.43634
        #
        # reduced mass(g/mol)     1.083    1.045    1.081

        frequencies = []
        symmetries = []
        ir = []
        dDIP_dQ = []
        vib_intensities = []
        vib_intensities_perc = []
        raman = []
        eigenvectors = []
        reduced_masses = []

        regex_eigenvectors = re.compile(
            r"(.*?)(^[ \t]+\d+\s+[a-z]{1,2}.*?)(reduced mass.*)",
            re.DOTALL | re.MULTILINE,
        )

        def get_active_value(v):
            """Convert the value to a bool. If "-" is set to None."""
            if v == "YES":
                return True
            elif v == "NO":
                return False
            elif v == "-":  # pragma: no branch
                return None
            else:  # pragma: no cover
                raise RuntimeError("cannot correctly parse active value {}".format(v))

        # splits the block of lines with one set of frequencies and loops over them
        r = r"(mode\s{5}.*?)(?=mode |\*{10,})"
        for block in re.findall(r, split2[1], re.DOTALL):
            match = regex_eigenvectors.search(block)
            if not match:  # pragma: no cover
                continue
            block_freqs = match.group(1) + match.group(3)
            block_eigenvectors = match.group(2).strip()
            freqs_lines = block_freqs.splitlines()
            n_freqs = len(freqs_lines[0].split()) - 1

            # loop over the lines of the block, extract all the properties
            for line in freqs_lines:
                s = line.split()
                if "frequency" in line:
                    # put imaginary frequencies to negative values.
                    frequencies.extend(
                        convert_float(f.replace("i", "-")) for f in s[1:]
                    )
                elif "symmetry" in line:
                    # put the symmetries that are present
                    symmetries.extend(sym for sym in s[1:])
                    # add None values for those that are not present
                    symmetries.extend(None for i in range(n_freqs - len(s) + 1))
                elif "IR " in line:
                    ir.extend(get_active_value(v) for v in s[1:])
                elif "|dDIP/dQ|" in line:
                    dDIP_dQ.extend(convert_float(f) for f in s[2:])
                elif "intensity (km/mol)" in line:
                    vib_intensities.extend(convert_float(f) for f in s[2:])
                elif "intensity" in line and "%" in line:
                    vib_intensities_perc.extend(convert_float(f) for f in s[4:])
                elif "RAMAN " in line:
                    raman.extend(get_active_value(v) for v in s[1:])
                elif "reduced mass" in line:
                    reduced_masses.extend(convert_float(f) for f in s[2:])

            eig_lines = block_eigenvectors.splitlines()
            if len(eig_lines) % 3 != 0:  # pragma: no cover
                raise RuntimeError(
                    "number of line to be parsed for eigenvectors "
                    "seems wrong: {}".format(len(eig_lines))
                )

            n_atoms = len(eig_lines) // 3
            eigv_block = np.zeros((n_freqs, n_atoms, 3))
            for i in range(n_atoms):
                for j in range(3):
                    s = eig_lines[3 * i + j].split()
                    eigv_block[:, i, j] = [convert_float(f) for f in s[-n_freqs:]]

            eigenvectors.extend(eigv_block.tolist())

        energies = None
        re_zpve = r"zero point VIBRATIONAL energy.*?\*{10,}"
        match = re.search(re_zpve, self.string, re.DOTALL)
        if match:  # pragma: no branch
            energies = {"zpve": None, "scf": None, "total": None}
            for line in match.group().splitlines():
                if "zero point VIBRATIONAL energy" in line:
                    energies["zpve"] = convert_float(line.split()[-3])
                if "SCF-energy" in line:
                    energies["scf"] = convert_float(line.split()[-2])
                if "SCF + E(vib0)" in line:
                    energies["total"] = convert_float(line.split()[-2])

        vib = dict(
            frequencies=frequencies,
            symmetries=symmetries,
            ir=ir,
            dDIP_dQ=dDIP_dQ,
            intensities=vib_intensities,
            intensities_perc=vib_intensities_perc,
            raman=raman,
            eigenvectors=eigenvectors,
            reduced_masses=reduced_masses,
            energies=energies,
        )

        tot_n_freqs = len(frequencies)
        if any(
            k != "energies" and len(v) != tot_n_freqs for k, v in vib.items()
        ):  # pragma: no cover
            raise RuntimeError(
                "Error parsing the vibrational frequencies, "
                "for some quantity the list of "
                "values does not contain the correct number of items"
            )

        data = dict(rotational=rot, vibrational=vib)

        return data



[docs]
    @lazy_property
    def mp2_data(self):
        """
        Extract information of MP2 calculation.

        Returns:
            dict with "energy_only".
        """
        # TODO: implement for mpgrad and rimp2/ricc2
        return None



[docs]
    @lazy_property
    def mp2_results(self):
        """
        Extract MP2 results.

        Returns:
            dict with "energy".
        """
        energy = None
        # Try to parse from rimp2/ricc2 programs
        r = r"\*{62,62}\s+\*\s+\*\s+\*<{10,10}\s+"
        r += r"GROUND\s+STATE\s+FIRST-ORDER\s+PROPERTIES"
        r += r"\s+>{11,11}\*\s+\*\s+\*\s+\*{62,62}\s+"
        r += r"-{48,48}\s+Method\s+:\s+MP2\s+Total\s+Energy\s+:\s+("
        r += float_number_all_re
        r += r")\s+"
        m = re.findall(r, string=self.string)
        if len(m) == 1:
            energy = convert_float(m[0])
        elif len(m) > 1:  # pragma: no cover
            raise RuntimeError(
                "Error parsing the MP2 results. Multiple occurrences of "
                "MP2 results found."
            )

        # Try to parse from mpgrad program
        r = r"\*{53,53}\s+\*\s+\*\s+\*\s+"
        r += r"SCF-energy\s+:\s+(" + float_number_all_re + r")\s+\*\s+\*\s+"
        r += r"MP2-energy\s+:\s+(" + float_number_all_re + r")\s+\*\s+\*\s+"
        r += r"total\s+:\s+(" + float_number_all_re + r")\s+\*\s+\*\s+"
        r += (
            r"\*\s+\*\s+\(MP2-energy\s+evaluated\s+from\s+T2\s+"
            r"amplitudes\)\s+\*\s+\*\s+\*\s+\*{53,53}"
        )
        m = re.findall(r, string=self.string)
        if len(m) == 1:
            if energy is not None:  # pragma: no cover
                raise RuntimeError(
                    "Error parsing the MP2 results. "
                    "Found MP2 results from both mpgrad- and "
                    "rimp2/ricc2-like calculations."
                )
            energy = convert_float(m[0][2])
        elif len(m) > 1:  # pragma: no cover
            raise RuntimeError(
                "Error parsing the MP2 results. Multiple occurrences of "
                "MP2 results found."
            )

        return dict(energy=energy)



[docs]
    @lazy_property
    def periodicity_data(self):
        """
        Information about periodicity.

        Returns:
            dict with "periodicity", "tm_lattice_params",
            "shortest_interatomic_distance", "direct_space_vectors",
            "reciprocal_space_vectors".
        """
        r = r"Periodic system found\: PBC structure information.*"
        r += r"Reciprocal space cell vectors \(au\)\:\s+"
        r += r"(?:[a|b|c]\s+"
        r += float_number_all_re
        r += r"\s+"
        r += float_number_all_re
        r += r"\s+"
        r += float_number_all_re
        r += r"\s+){1,3}"
        m = re.findall(r, string=self.string, flags=re.DOTALL)
        if len(m) == 0:
            return None
        elif len(m) > 1:  # pragma: no cover
            raise RuntimeError("More than one section on periodicity.")
        periodicity_string = m[0]
        periodicity = int(
            re.findall(r"Periodicity in (\d) dimensions", periodicity_string)[0]
        )
        if periodicity == 1:
            rlatparams = rf"\+-{{73}}\s+({float_number_all_re})\s+\+-{{73}}"
            tm_lattice_params = [
                convert_float(lp) for lp in re.findall(rlatparams, periodicity_string)
            ]
        elif periodicity == 2:
            rlatparams = r"\+-{73}"
            rlatparams += rf"\s+({float_number_all_re})" * 3
            rlatparams += r"\s+\+-{73}"
            tm_lattice_params = [
                convert_float(lp)
                for lp in re.findall(rlatparams, periodicity_string)[0]
            ]
        elif periodicity == 3:  # pragma: no branch
            rlatparams = r"\+-{73}"
            rlatparams += rf"\s+({float_number_all_re})" * 6
            rlatparams += r"\s+\+-{73}"
            tm_lattice_params = [
                convert_float(lp)
                for lp in re.findall(rlatparams, periodicity_string)[0]
            ]
        else:  # pragma: no cover
            raise RuntimeError("Wrong periodicity.")

        shortest_interatomic_distance = convert_float(
            re.search(
                rf"Shortest interatomic distance \(bohr\)\:\s+({float_number_all_re})",
                periodicity_string,
            ).group(1)
        )

        rdirect = r"Direct space cell vectors \(au\)\:"
        rdirect += (
            rf"\s+[a|b|c]\s+({float_number_all_re})"
            rf"\s+({float_number_all_re})\s+({float_number_all_re})" * periodicity
        )
        direct_space_vectors = np.reshape(
            [
                convert_float(val)
                for val in re.search(rdirect, periodicity_string).groups()
            ],
            (periodicity, 3),
        ).tolist()

        r_recip = r"Reciprocal space cell vectors \(au\)\:"
        r_recip += (
            rf"\s+[a|b|c]\s+({float_number_all_re})"
            rf"\s+({float_number_all_re})\s+({float_number_all_re})" * periodicity
        )
        reciprocal_space_vectors = np.reshape(
            [
                convert_float(val)
                for val in re.search(r_recip, periodicity_string).groups()
            ],
            (periodicity, 3),
        ).tolist()

        return dict(
            periodicity=periodicity,
            tm_lattice_params=tm_lattice_params,
            shortest_interatomic_distance=shortest_interatomic_distance,
            direct_space_vectors=direct_space_vectors,
            reciprocal_space_vectors=reciprocal_space_vectors,
        )



[docs]
    def get_split_jobex_parsers(self):
        """
        Get parser for each step of a jobex calculation.

        Given the string of the "job.last" output file of jobex, this generates the
        parsers for each of the step: energy, gradient and relax.

        Returns:
            namedtuple:
                exec_en: executable of the energy step.
                parser_en: parser with the output of the energy step.
                exec_grad: executable of the gradient step.
                parser_grad: parser with the output of the gradient step.
                exec_relax: executable of the energy step.
                parser_relax: parser with the output of the relax step.
        """
        tot_split = self.string.split("next step =")
        p_en = p_grad = p_relax = None
        exec_en = exec_grad = exec_relax = None

        for s in tot_split:
            p = Parser(s)
            header = p.header
            if not header:
                continue

            executable = header["executable"]

            if executable in ("dscf", "ridft", "riper"):
                p_en = p
                exec_en = executable
            elif executable in ("grad", "rdgrad", "egrad", "mpgrad", "ricc2"):
                p_grad = p
                exec_grad = executable
            elif executable in ("relax", "statpt", "frog"):  # pragma: no branch
                p_relax = p
                exec_relax = executable

        JobexParsers = namedtuple(
            "JobexParsers",
            [
                "exec_en",
                "parser_en",
                "exec_grad",
                "parser_grad",
                "exec_relax",
                "parser_relax",
            ],
        )

        return JobexParsers(exec_en, p_en, exec_grad, p_grad, exec_relax, p_relax)



[docs]
    def grep_line(self, text, nlines=0):
        """
        Search the text in the string similarly to the grep command.

        The specified text is searched in a single line and the full line plus
        the "nlines" following the matching one is returned.
        Returns the first match.

        Args:
            text (str): the text to be searched.
            nlines (int): the number of lines

        Returns:
            str: the text found
        """
        r = r"^.*" + text + r".*$(\n.*){0," + str(nlines) + r"}"

        m = re.search(r, self.string, re.MULTILINE)

        if not m:
            return None

        return m.group(0)



[docs]
    def get_value(self, text, chunk, occurrence=0, converter=None):
        """
        Get value from the string.

        This finds the i-th occurrence of a line containing the specified text,
        splits the line and takes the specified chunk obtained from the split.
        Optionally converts it to a specific format.

        Args:
            text (str): text to search
            chunk (int): the chunk to take after the split of the line.
                Can be negative as it identifies the index in the list.
            occurrence (int): the 0-based index of the occurrence of the "text"
                in the string. Can be negative (e.g. -1 is the last occurrence).
            converter (callable): called to convert the selected chunk to some
                other type. It can be simply int, but for a float it would be better
                to use the convert_float function instead.

        Returns:
            the converted selected chunk.
        """
        r = r"^.*" + text + r".*$"

        m = re.findall(r, self.string, re.MULTILINE)

        if not m:
            return None

        line = m[occurrence]

        v = line.split()[chunk]

        if converter:
            v = converter(v)

        return v