Source code for sisl.io.siesta.stdout

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
from __future__ import annotations

import os
from functools import lru_cache
from typing import Optional

import numpy as np

import sisl._array as _a
from sisl import Atom, Atoms, Geometry, Lattice
from sisl._common import Opt
from sisl._help import voigt_matrix
from sisl._internal import set_module
from sisl.messages import deprecation, warn
from sisl.physics import Spin
from sisl.unit.siesta import unit_convert
from sisl.utils import PropertyDict
from sisl.utils.cmd import *

from .._multiple import SileBinder, postprocess_tuple
from ..sile import SileError, add_sile, sile_fh_open
from .sile import SileSiesta

__all__ = ["stdoutSileSiesta", "outSileSiesta"]


Bohr2Ang = unit_convert("Bohr", "Ang")
_A = SileSiesta.InfoAttr


def _ensure_atoms(atoms):
    """Ensures that the atoms list is a list with entries (converts `None` to a list)."""
    if atoms is None:
        return [Atom(i) for i in range(150)]
    elif len(atoms) == 0:
        return [Atom(i) for i in range(150)]
    return atoms


def _parse_spin(attr, match):
    """Parse 'redata: Spin configuration *= <value>'"""
    opt = match.string.split("=")[-1]

    if opt.startswith("spin-orbit"):
        return Spin("spin-orbit")
    if opt.startswith("collinear") or opt.startswith("colinear"):
        return Spin("polarized")
    if opt.startswith("non-col"):
        return Spin("non-colinear")
    return Spin()


def _read_scf_empty(scf):
    if isinstance(scf, tuple):
        return len(scf[0]) == 0
    return len(scf) == 0


def _read_scf_md_process(scfs):

    if len(scfs) == 0:
        return None

    if not isinstance(scfs, list):
        # single MD request either as:
        #  - np.ndarray
        #  - np.ndarray, tuple
        #  - pd.DataFrame
        return scfs

    has_props = isinstance(scfs[0], tuple)
    if has_props:
        my_len = lambda scf: len(scf[0])
    else:
        my_len = len

    scf_len1 = np.all(_a.fromiterd(map(my_len, scfs)) == 1)
    if isinstance(scfs[0], (np.ndarray, tuple)):

        if has_props:
            props = scfs[0][1]
            scfs = [scf[0] for scf in scfs]

        if scf_len1:
            scfs = np.array(scfs)
        if has_props:
            return scfs, props
        return scfs

    # We are dealing with a dataframe
    import pandas as pd

    df = pd.concat(
        scfs,
        keys=_a.arangei(1, len(scfs) + 1),
        names=["imd"],
    )
    if scf_len1:
        df.reset_index("iscf", inplace=True)
    return df


@set_module("sisl.io.siesta")
class stdoutSileSiesta(SileSiesta):
    """Output file from Siesta

    This enables reading the output quantities from the Siesta output.
    """

    _info_attributes_ = [
        _A(
            "na",
            r"^initatomlists: Number of atoms",
            lambda attr, match: int(match.string.split()[-3]),
            not_found="warn",
        ),
        _A(
            "no",
            r"^initatomlists: Number of atoms",
            lambda attr, match: int(match.string.split()[-2]),
            not_found="warn",
        ),
        _A(
            "completed",
            r".*Job completed",
            lambda attr, match: lambda: True,
            default=lambda: False,
            not_found="warn",
        ),
        _A(
            "spin",
            r"^redata: Spin configuration",
            _parse_spin,
        ),
        _A(
            "_final_analysis",
            r"^siesta: Final energy",
            lambda attr, match: lambda: True,
            default=lambda: False,
        ),
    ]

[docs] @deprecation( "stdoutSileSiesta.completed is deprecated in favor of stdoutSileSiesta.info.completed", "0.15", "0.16", ) def completed(self): """True if the full file has been read and "Job completed" was found.""" return self.info.completed()
[docs] @lru_cache(1) @sile_fh_open(True) def read_basis(self) -> Atoms: """Reads the basis as found in the output file This parses 3 things: 1. At the start of the file there are some initatom output specifying which species in the calculation. 2. Reading the <basis_specs> entries for the masses 3. Reading the PAO.Basis block output for orbital information """ found, line = self.step_to("Species number:") if not found: return [] atoms = {} order = [] while "Species number:" in line: ls = line.split() if ls[3] == "Atomic": atoms[ls[7]] = {"Z": int(ls[5]), "tag": ls[7]} order.append(ls[7]) else: atoms[ls[4]] = {"Z": int(ls[7]), "tag": ls[4]} order.append(ls[4]) line = self.readline() # Now go down to basis_specs found, line = self.step_to("<basis_specs>") while found: # ===== self.readline() # actual line line = self.readline().split("=") tag = line[0].split()[0] atoms[tag]["mass"] = float(line[2].split()[0]) found, line = self.step_to("<basis_specs>", allow_reread=False) block = [] found, line = self.step_to("%block PAO.Basis") line = self.readline() while not line.startswith("%endblock PAO.Basis"): block.append(line) line = self.readline() from .fdf import fdfSileSiesta atom_orbs = fdfSileSiesta._parse_pao_basis(block) for atom, orbs in atom_orbs.items(): atoms[atom]["orbitals"] = orbs return Atoms([Atom(**atoms[tag]) for tag in order])
def _r_lattice_outcell(self): """Wrapper for reading the unit-cell from the outcoor block""" # Read until outcell is found found, line = self.step_to("outcell: Unit cell vectors") if not found: raise ValueError( f"{self.__class__.__name__}._r_lattice_outcell did not find outcell key" ) Ang = "Ang" in line # We read the unit-cell vectors (in Ang) cell = [] line = self.readline() while len(line.strip()) > 0: line = line.split() cell.append([float(x) for x in line[:3]]) line = self.readline() cell = _a.arrayd(cell) if not Ang: cell *= Bohr2Ang return Lattice(cell) def _r_geometry_outcoor(self, line, atoms=None): """Wrapper for reading the geometry as in the outcoor output""" atoms_order = _ensure_atoms(atoms) is_final = "Relaxed" in line or "Final (unrelaxed)" in line # Now we have outcoor scaled = "scaled" in line fractional = "fractional" in line Ang = "Ang" in line # Read in data xyz = [] atoms = [] line = self.readline() while len(line.strip()) > 0: line = line.split() if len(line) != 6: break xyz.append(line[:3]) atoms.append(atoms_order[int(line[3]) - 1]) line = self.readline() # in outcoor we know it is always just after # But not if not variable cell. # Retrieve the unit-cell (but do not skip file-descriptor position) # This is because the current unit-cell is not always written. pos = self.fh.tell() cell = self._r_lattice_outcell() if is_final and self.fh.tell() < pos: # we have wrapped around the file self.fh.seek(pos, os.SEEK_SET) xyz = _a.arrayd(xyz) # Now create the geometry if scaled: # The output file for siesta does not # contain the lattice constant. # So... :( raise ValueError( "Could not read the lattice-constant for the scaled geometry" ) elif fractional: xyz = xyz.dot(cell.cell) elif not Ang: xyz *= Bohr2Ang return Geometry(xyz, atoms, lattice=cell) def _r_geometry_atomic(self, line, atoms=None): """Wrapper for reading the geometry as in the outcoor output""" atoms_order = _ensure_atoms(atoms) # Now we have outcoor Ang = "Ang" in line # Read in data xyz = [] atoms = [] line = self.readline() while len(line.strip()) > 0: line = line.split() xyz.append([float(x) for x in line[1:4]]) atoms.append(atoms_order[int(line[4]) - 1]) line = self.readline() # Retrieve the unit-cell (but do not skip file-descriptor position) # This is because the current unit-cell is not always written. pos = self.fh.tell() cell = self._r_lattice_outcell() self.fh.seek(pos, os.SEEK_SET) # Convert xyz xyz = _a.arrayd(xyz) if not Ang: xyz *= Bohr2Ang return Geometry(xyz, atoms, lattice=cell)
[docs] @SileBinder() @sile_fh_open() def read_geometry(self, skip_input: bool = True) -> Geometry: """Reads the geometry from the Siesta output file Parameters ---------- skip_input : the input geometry may be contained as a print-out. This is not part of an MD calculation, and hence is per default not returned. Returns ------- geometries: list or Geometry or None if all is False only one geometry will be returned (or None). Otherwise a list of geometries corresponding to the MD-runs. """ atoms = self.read_basis() def func(*args, **kwargs): """Wrapper to return None""" return None line = " " while line != "": line = self.readline() if "outcoor" in line and "coordinates" in line: func = self._r_geometry_outcoor break elif "siesta: Atomic coordinates" in line and not skip_input: func = self._r_geometry_atomic break return func(line, atoms)
[docs] @SileBinder(postprocess=postprocess_tuple(_a.arrayd)) @sile_fh_open() def read_force(self, total: bool = False, max: bool = False, key: str = "siesta"): """Reads the forces from the Siesta output file Parameters ---------- total: bool, optional return the total forces instead of the atomic forces. max: bool, optional whether only the maximum atomic force should be returned for each step. Setting it to `True` is equivalent to `max(outSile.read_force())` in case atomic forces are written in the output file (`WriteForces .true.` in the fdf file) Note that this is not the same as doing `max(outSile.read_force(total=True))` since the forces returned in that case are averages on each axis. key: {"siesta", "ts"} Specifies the indicator string for the forces that are to be read. The function will look for a line containing ``f'{key}: Atomic forces'`` to start reading forces. Returns ------- numpy.ndarray or None returns ``None`` if the forces are not found in the output, otherwise forces will be returned The shape of the array will be different depending on the type of forces requested: - atomic (default): (nMDsteps, nAtoms, 3) - total: (nMDsteps, 3) - max: (nMDsteps, ) If `total` and `max` are both `True`, they are returned separately as a tuple: ``(total, max)`` """ # Read until forces are found found, line = self.step_to(f"{key}: Atomic forces", allow_reread=False) if not found: return None # Now read data line = self.readline() if "siesta:" in line: # This is the final summary, we don't need to read it as it does not contain new information # and also it make break things since max forces are not written there return None F = [] # First, we encounter the atomic forces while "---" not in line: line = line.split() if not (total or max): F.append([float(x) for x in line[-3:]]) line = self.readline() if line == "": break if not F: F = None line = self.readline().split() # Parse total forces if requested if total and line: F = _a.arrayd([float(x) for x in line[-3:]]) # And after that we can read the max force line = self.readline() if max and line: line = self.readline().split() maxF = _a.arrayd(float(line[1])) # In case total is also requested, we are going to # store it all in the same variable. # It will be separated later if total: # create tuple F = (F, maxF) else: F = maxF return F
[docs] @SileBinder(postprocess=_a.arrayd) @sile_fh_open() def read_stress(self, key: str = "static", skip_final: bool = True) -> np.ndarray: """Reads the stresses from the Siesta output file Parameters ---------- key : {static, total, Voigt} which stress to read from the output. skip_final: the static stress tensor is duplicated in the output when running MD simulations. This flag is used in case one wish to get the final one. Returns ------- numpy.ndarray or None returns ``None`` if the stresses are not found in the output, otherwise stresses will be returned """ is_voigt = key.lower() == "voigt" if is_voigt: key = "Voigt" search = "Stress tensor Voigt" else: search = "siesta: Stress tensor" found = False line = " " while not found and line != "": found, line = self.step_to(search, allow_reread=False) found = found and key in line if not found: return None # Now read data if is_voigt: Svoigt = _a.arrayd([float(x) for x in line.split()[-6:]]) S = voigt_matrix(Svoigt, False) else: S = [] for _ in range(3): line = self.readline().split() S.append([float(x) for x in line[-3:]]) if skip_final: if line[0].startswith("siesta:"): return None S = _a.arrayd(S) return S
[docs] @SileBinder(postprocess=_a.arrayd) @sile_fh_open() def read_moment(self, orbitals=False, quantity="S") -> np.ndarray: """Reads the moments from the Siesta output file These will only be present in case of spin-orbit coupling. Parameters ---------- orbitals: bool, optional return a table with orbitally resolved moments. quantity: {'S', 'L'}, optional return the spin-moments or the L moments """ # Read until outcoor is found if not self.step_to("moments: Atomic", allow_reread=False)[0]: return None # The moments are printed in SPECIES list itt = iter(self) next(itt) # empty next(itt) # empty na = 0 # Loop the species tbl = [] # Read the species label while True: next(itt) # "" next(itt) # Atom Orb ... # Loop atoms in this species list while True: line = next(itt) if line.startswith("Species") or line.startswith("--"): break line = " " atom = [] ia = 0 while not line.startswith("--"): line = next(itt).split() if ia == 0: ia = int(line[0]) elif ia != int(line[0]): raise ValueError("Error in moments formatting.") # Track maximum number of atoms na = max(ia, na) if quantity == "S": atom.append([float(x) for x in line[4:7]]) elif quantity == "L": atom.append([float(x) for x in line[7:10]]) line = next(itt).split() # Total ... if not orbitals: ia = int(line[0]) if quantity == "S": atom.append([float(x) for x in line[4:7]]) elif quantity == "L": atom.append([float(x) for x in line[8:11]]) tbl.append((ia, atom)) if line.startswith("--"): break # Sort according to the atomic index moments = [] * na # Insert in the correct atomic for ia, atom in tbl: moments[ia - 1] = atom return _a.arrayd(moments)
[docs] @sile_fh_open(True) def read_energy(self) -> PropertyDict: """Reads the final energy distribution Currently the energies translated are: ``band`` band structure energy ``kinetic`` electronic kinetic energy ``hartree`` electronic electrostatic Hartree energy ``dftu`` DFT+U energy ``spin_orbit`` spin-orbit energy ``extE`` external field energy ``xc`` exchange-correlation energy ``exchange`` exchange energy ``correlation`` correlation energy ``bulkV`` bulk-bias correction energy ``total`` total energy ``negf`` NEGF energy ``fermi`` Fermi energy ``ion.electron`` ion-electron interaction energy ``ion.ion`` ion-ion interaction energy ``ion.kinetic`` kinetic ion energy ``basis.enthalpy`` enthalpy of basis sets, Free + p_basis*V_orbitals Any unrecognized key gets added *as is*. Examples -------- >>> energies = sisl.get_sile("RUN.out").read_energy() >>> ion_energies = energies.ion >>> ion_energies.ion # ion-ion interaction energy >>> ion_energies.kinetic # ion kinetic energy >>> energies.fermi # fermi energy Returns ------- PropertyDict : dictionary like lookup table ionic energies are stored in a nested `PropertyDict` at the key ``ion`` (all energies in eV) """ found = self.step_to("siesta: Final energy", allow_reread=False)[0] out = PropertyDict() if not found: return out itt = iter(self) # Read data line = next(itt) name_conv = { "Band Struct.": "band", "Kinetic": "kinetic", "Hartree": "hartree", "Edftu": "dftu", "Eldau": "dftu", "Eso": "spin_orbit", "Ext. field": "extE", "Exch.-corr.": "xc", "Exch.": "exchange", "Corr.": "correlation", "Ekinion": "ion.kinetic", "Ion-electron": "ion.electron", "Ion-ion": "ion.ion", "Bulk bias": "bulkV", "Total": "total", "Fermi": "fermi", "Enegf": "negf", "(Free)E+ p_basis*V_orbitals": "basis.enthalpy", "(Free)E + p_basis*V_orbitals": "basis.enthalpy", # we may correct the missing space } def assign(out, key, val): key = name_conv.get(key, key) try: val = float(val) except ValueError: warn( f"Could not convert energy '{key}' ({val}) to a float, assigning nan." ) val = np.nan if "." in key: loc, key = key.split(".") if not hasattr(out, loc): out[loc] = PropertyDict() loc = out[loc] else: loc = out loc[key] = val while len(line.strip()) > 0: key, val = line.split("=") key = key.split(":")[1].strip() assign(out, key, val) line = next(itt) # now skip to the pressure found, line = self.step_to( ["(Free)E + p_basis*V_orbitals", "(Free)E+ p_basis*V_orbitals"], allow_reread=False, ) if found: key, val = line.split("=") assign(out, key.strip(), val) return out
[docs] def read_data(self, *args, **kwargs): """Read specific content in the Siesta out file The currently implemented things are denoted in the parameters list. Note that the returned quantities are in the order of keywords, so: >>> read_data(geometry=True, force=True) <geometry>, <force> >>> read_data(force=True, geometry=True) <force>, <geometry> Parameters ---------- geometry: bool, optional read geometry, args are passed to `read_geometry` force: bool, optional read force, args are passed to `read_force` stress: bool, optional read stress, args are passed to `read_stress` moment: bool, optional read moment, args are passed to `read_moment` (only for spin-orbit calculations) energy: bool, optional read final energies, args are passed to `read_energy` """ run = [] # This loops ensures that we preserve the order of arguments # From Py3.6 and onwards the **kwargs is an OrderedDictionary for kw in kwargs.keys(): if kw in ("geometry", "force", "moment", "stress", "energy"): if kwargs[kw]: run.append(kw) # Clean running names for name in run: kwargs.pop(name) slice = kwargs.pop("slice", None) val = [] for name in run: if slice is None: val.append(getattr(self, f"read_{name.lower()}")(*args, **kwargs)) else: val.append( getattr(self, f"read_{name.lower()}")[slice](*args, **kwargs) ) if len(val) == 0: return None elif len(val) == 1: val = val[0] return val
[docs] @SileBinder( default_slice=-1, check_empty=_read_scf_empty, postprocess=_read_scf_md_process ) @sile_fh_open() def read_scf( self, key: str = "scf", iscf: Optional[int] = -1, as_dataframe: bool = False, ret_header: bool = False, ): r"""Parse SCF information and return a table of SCF information depending on what is requested Parameters ---------- key : {'scf', 'ts-scf'} parse SCF information from Siesta SCF or TranSiesta SCF iscf : which SCF cycle should be stored. If ``-1`` only the final SCF step is stored, for None *all* SCF cycles are returned. When `iscf` values queried are not found they will be truncated to the nearest SCF step. as_dataframe: whether the information should be returned as a `pandas.DataFrame`. The advantage of this format is that everything is indexed and therefore you know what each value means.You can also perform operations very easily on a dataframe. ret_header: whether to also return the headers that define each value in the returned array, will have no effect if `as_dataframe` is true. """ # These are the properties that are written in SIESTA scf props = ["iscf", "Eharris", "E_KS", "FreeEng", "dDmax", "Ef", "dHmax"] if not iscf is None: if iscf == 0: raise ValueError( f"{self.__class__.__name__}.read_scf requires iscf argument to *not* be 0!" ) def reset_d(d, line): if line.startswith("SCF cycle converged") or line.startswith( "SCF_NOT_CONV" ): if len(d["data"]) > 0: d["_final_iscf"] = 1 elif line.startswith("SCF cycle continued"): d["_final_iscf"] = 0 def common_parse(line, d): nonlocal props if line.startswith("ts-Vha:"): d["ts-Vha"] = [float(line.split()[1])] if "ts-Vha" not in props: d["order"].append("ts-Vha") props.append("ts-Vha") elif line.startswith("spin moment: S"): # 4.1 and earlier d["S"] = list(map(float, line.split("=")[1].split()[1:])) if "Sx" not in props: d["order"].append("S") props.extend(["Sx", "Sy", "Sz"]) elif line.startswith("spin moment: {S}"): # 4.2 and later d["S"] = list(map(float, line.split("= {")[1].split()[:3])) if "Sx" not in props: d["order"].append("S") props.extend(["Sx", "Sy", "Sz"]) elif line.startswith("bulk-bias: |v"): # TODO old version should be removed once released d["bb-v"] = list(map(float, line.split()[-3:])) if "BB-vx" not in props: d["order"].append("bb-v") props.extend(["BB-vx", "BB-vy", "BB-vz"]) elif line.startswith("bulk-bias: {v}"): idx = line.index("{v}") if line[idx + 3] == "_": # we are in a subset lbl = f"BB-{line[idx + 4:idx + 6]}" else: lbl = "BB" v = line.split("] {")[1].split() v = list(map(float, v[:3])) d[lbl] = v if f"{lbl}-vx" not in props: d["order"].append(lbl) props.extend([f"{lbl}-vx", f"{lbl}-vy", f"{lbl}-vz"]) elif line.startswith("bulk-bias: dq"): d["BB-q"] = list(map(float, line.split()[-2:])) if "BB-dq" not in props: d["order"].append("BB-q") props.extend(["BB-dq", "BB-q0"]) else: return False return True if key.lower() == "scf": def parse_next(line, d): line = line.strip().replace("*", "0") reset_d(d, line) if common_parse(line, d): pass elif line.startswith("scf:"): d["_found_iscf"] = True if len(line) == 97: # this should be for Efup/dwn # but I think this will fail for as_dataframe (TODO) data = [ int(line[5:9]), float(line[9:25]), float(line[25:41]), float(line[41:57]), float(line[57:67]), float(line[67:77]), float(line[77:87]), float(line[87:97]), ] elif len(line) == 87: data = [ int(line[5:9]), float(line[9:25]), float(line[25:41]), float(line[41:57]), float(line[57:67]), float(line[67:77]), float(line[77:87]), ] else: # Populate DATA by splitting data = line.split() data = [int(data[1])] + list(map(float, data[2:])) construct_data(d, data) elif key.lower() == "ts-scf": def parse_next(line, d): line = line.strip().replace("*", "0") reset_d(d, line) if common_parse(line, d): pass elif line.startswith("ts-q:"): data = line.split()[1:] try: d["ts-q"] = list(map(float, data)) except Exception: # We are probably reading a device list # ensure that props are appended if data[-1] not in props: d["order"].append("ts-q") props.extend(data) elif line.startswith("ts-scf:"): d["_found_iscf"] = True if len(line) == 100: data = [ int(line[8:12]), float(line[12:28]), float(line[28:44]), float(line[44:60]), float(line[60:70]), float(line[70:80]), float(line[80:90]), float(line[90:100]), ] elif len(line) == 90: data = [ int(line[8:12]), float(line[12:28]), float(line[28:44]), float(line[44:60]), float(line[60:70]), float(line[70:80]), float(line[80:90]), ] else: # Populate DATA by splitting data = line.split() data = [int(data[1])] + list(map(float, data[2:])) construct_data(d, data) # A temporary dictionary to hold information while reading the output file d = { "_found_iscf": False, "_final_iscf": 0, "data": [], "order": [], } def construct_data(d, data): for key in d["order"]: data.extend(d[key]) d["data"] = data scf = [] for line in self: parse_next(line, d) if d["_found_iscf"]: d["_found_iscf"] = False data = d["data"] if len(data) == 0: continue if iscf is None or iscf < 0: scf.append(data) elif data[0] <= iscf: # this ensures we will retain the latest iscf in # case the requested iscf is too big scf = data if d["_final_iscf"] == 1: d["_final_iscf"] = 2 elif d["_final_iscf"] == 2: d["_final_iscf"] = 0 data = d["data"] if len(data) == 0: # this traps the case where we read ts-scf # but find the final scf iteration. # In that case we don't have any data. scf = [] continue if len(scf) == 0: # this traps cases where final_iscf has # been trickered but we haven't collected anything. # I.e. if key == scf but ts-scf also exists. continue # First figure out which iscf we should store if iscf is None: # or iscf > 0 # scf is correct pass elif iscf < 0: # truncate to 0 scf = scf[max(len(scf) + iscf, 0)] # found a full MD break # Define the function that is going to convert the information of a MDstep to a Dataset if as_dataframe: import pandas as pd if len(scf) == 0: return pd.DataFrame(index=pd.Index([], name="iscf"), columns=props[1:]) scf = np.atleast_2d(scf) return pd.DataFrame( scf[..., 1:], index=pd.Index(scf[..., 0].ravel().astype(np.int32), name="iscf"), columns=props[1:], ) # Convert to numpy array scf = np.array(scf) if ret_header: return scf, props return scf
[docs] @sile_fh_open(True) def read_charge( self, name, iscf=Opt.ANY, imd=Opt.ANY, key_scf="scf", as_dataframe=False ): r"""Read charges calculated in SCF loop or MD loop (or both) Siesta enables many different modes of writing out charges. NOTE: currently Mulliken charges are not implemented. The below table shows a list of different cases that may be encountered, the letters are referred to in the return section to indicate what is returned. +-----------+-----+-----+--------+-------+------------------+ | Case | *A* | *B* | *C* | *D* | *E* | +-----------+-----+-----+--------+-------+------------------+ | Charge | MD | SCF | MD+SCF | Final | Orbital resolved | +-----------+-----+-----+--------+-------+------------------+ | Voronoi | + | + | + | + | - | +-----------+-----+-----+--------+-------+------------------+ | Hirshfeld | + | + | + | + | - | +-----------+-----+-----+--------+-------+------------------+ | Mulliken | + | + | + | + | + | +-----------+-----+-----+--------+-------+------------------+ Notes ----- Errors will be raised if one requests information not present. I.e. passing an integer or `Opt.ALL` for `iscf` will raise an error if the SCF charges are not present. For `Opt.ANY` it will return the most information, effectively SCF will be returned if present. Currently Mulliken is not implemented, any help in reading this would be very welcome. Parameters ---------- name: {"voronoi", "hirshfeld"} the name of the charges that you want to read iscf: int or Opt, optional index (0-based) of the scf iteration you want the charges for. If the enum specifier `Opt.ANY` or `Opt.ALL` are used, then the returned quantities depend on what is present. If ``None/Opt.NONE`` it will not return any SCF charges. If both `imd` and `iscf` are ``None`` then only the final charges will be returned. imd: int or Opt, optional index (0-based) of the md step you want the charges for. If the enum specifier `Opt.ANY` or `Opt.ALL` are used, then the returned quantities depend on what is present. If ``None/Opt.NONE`` it will not return any MD charges. If both `imd` and `iscf` are ``None`` then only the final charges will be returned. key_scf : str, optional the key lookup for the scf iterations (a ":" will automatically be appended) as_dataframe: boolean, optional whether charges should be returned as a pandas dataframe. Returns ------- numpy.ndarray if a specific MD+SCF index is requested (or special cases where output is not complete) list of numpy.ndarray if one both `iscf` or `imd` is different from ``None/Opt.NONE``. pandas.DataFrame if `as_dataframe` is requested. The dataframe will have multi-indices if multiple SCF or MD steps are requested. """ namel = name.lower() if as_dataframe: import pandas as pd def _empty_charge(): # build a fake dataframe with no indices return pd.DataFrame( index=pd.Index([], name="atom", dtype=np.int32), dtype=np.float32 ) else: pd = None def _empty_charge(): # return for single value with nan values return _a.arrayf([[None]]) # define helper function for reading voronoi+hirshfeld charges def _voronoi_hirshfeld_charges(): """Read output from Voronoi/Hirshfeld charges""" nonlocal pd # Expecting something like this (NC/SOC) # Voronoi Atomic Populations: # Atom # dQatom Atom pop S Sx Sy Sz Species # 1 -0.02936 4.02936 0.00000 -0.00000 0.00000 0.00000 C # or (polarized) # Voronoi Atomic Populations: # Atom # dQatom Atom pop Sz Species # 1 -0.02936 4.02936 0.00000 C # first line is the header header = ( self.readline() .replace("dQatom", "dq") # dQatom in master .replace(" Qatom", " dq") # Qatom in 4.1 .replace("Atom pop", "e") # not found in 4.1 .split() )[2:-1] # Define the function that parses the charges def _parse_charge(line): atom_idx, *vals, symbol = line.split() # assert that this is a proper line # this should catch cases where the following line of charge output # is still parseable # atom_idx = int(atom_idx) return list(map(float, vals)) # We have found the header, prepare a list to read the charges atom_charges = [] line = " " while line != "": try: line = self.readline() charge_vals = _parse_charge(line) atom_charges.append(charge_vals) except Exception: # We already have the charge values and we reached a line that can't be parsed, # this means we have reached the end. break if pd is None: # not as_dataframe return _a.arrayf(atom_charges) # determine how many columns we have # this will remove atom indices and species, so only inside ncols = len(atom_charges[0]) assert ncols == len(header) # the precision is limited, so no need for double precision return pd.DataFrame( atom_charges, columns=header, dtype=np.float32, index=pd.RangeIndex(stop=len(atom_charges), name="atom"), ) # define helper function for reading voronoi+hirshfeld charges def _mulliken_charges(): """Read output from Mulliken charges""" raise NotImplementedError("Mulliken charges are not implemented currently") # Check that a known charge has been requested if namel == "voronoi": _r_charge = _voronoi_hirshfeld_charges charge_keys = [ "Voronoi Atomic Populations", "Voronoi Net Atomic Populations", ] elif namel == "hirshfeld": _r_charge = _voronoi_hirshfeld_charges charge_keys = [ "Hirshfeld Atomic Populations", "Hirshfeld Net Atomic Populations", ] elif namel == "mulliken": _r_charge = _mulliken_charges charge_keys = ["mulliken: Atomic and Orbital Populations"] else: raise ValueError( f"{self.__class__.__name__}.read_charge name argument should be one of [voronoi, hirshfeld, mulliken], got {name}?" ) # Ensure the key_scf matches exactly (prepend a space) key_scf = f" {key_scf.strip()}:" # Reading charges may be quite time consuming for large MD simulations. # to see if we finished a MD read, we check for these keys search_keys = [ # two keys can signal ending SCF "SCF Convergence", "SCF_NOT_CONV", "siesta: Final energy", key_scf, *charge_keys, ] # adjust the below while loop to take into account any additional # segments of search_keys IDX_SCF_END = [0, 1] IDX_FINAL = [2] IDX_SCF = [3] # the rest are charge keys IDX_CHARGE = list(range(len(search_keys) - len(charge_keys), len(search_keys))) # state to figure out where we are state = PropertyDict() state.INITIAL = 0 state.MD = 1 state.SCF = 2 state.CHARGE = 3 state.FINAL = 4 # a list of scf_charge md_charge = [] md_scf_charge = [] scf_charge = [] final_charge = None # signal that any first reads are INITIAL charges current_state = state.INITIAL charge = _empty_charge() FOUND_SCF = False FOUND_MD = False FOUND_FINAL = False # TODO whalrus ret = self.step_to(search_keys, case=True, ret_index=True, allow_reread=False) while ret[0]: if ret[2] in IDX_SCF_END: # we finished all SCF iterations current_state = state.MD md_scf_charge.append(scf_charge) scf_charge = [] elif ret[2] in IDX_SCF: current_state = state.SCF # collect scf-charges (possibly none) scf_charge.append(charge) elif ret[2] in IDX_FINAL: current_state = state.FINAL # don't do anything, this is the final charge construct # regardless of where it comes from. elif ret[2] in IDX_CHARGE: FOUND_CHARGE = True # also read charge charge = _r_charge() if state.INITIAL == current_state or state.CHARGE == current_state: # this signals scf charges FOUND_SCF = True # There *could* be 2 steps if we are mixing H, # this is because it first does # compute H -> compute DM -> compute H # in the first iteration, subsequently we only do # compute compute DM -> compute H # once we hit ret[2] in IDX_SCF we will append scf_charge = [] elif state.MD == current_state: FOUND_MD = True # we just finished an SCF cycle. # So any output between SCF ending and # a new one beginning *must* be that geometries # charge # Here `charge` may be NONE signalling # we don't have charge in MD steps. md_charge.append(charge) # reset charge charge = _empty_charge() elif state.SCF == current_state: FOUND_SCF = True elif state.FINAL == current_state: FOUND_FINAL = True # a special state writing out the charges after everything final_charge = charge charge = _empty_charge() scf_charge = [] # we should be done and no other charge reads should be found! # should we just break? current_state = state.CHARGE # step to next entry ret = self.step_to( search_keys, case=True, ret_index=True, allow_reread=False ) if not any((FOUND_SCF, FOUND_MD, FOUND_FINAL)): raise SileError(f"{self!s} does not contain any charges ({name})") # if the scf-charges are not stored, it means that the MD step finalization # has not been read. So correct if len(scf_charge) > 0: assert False, "this test shouldn't reach here" # we must not have read through the entire MD step # so this has to be a running simulation if charge is not None: scf_charge.append(charge) charge = _empty_charge() md_scf_charge.append(scf_charge) # otherwise there is some *parsing* error, so for now we use assert assert len(scf_charge) == 0 if as_dataframe: # convert data to proper data structures # regardless of user requests. This is an overhead... But probably not that big of a problem. if FOUND_SCF: md_scf_charge = pd.concat( [ pd.concat( iscf, keys=pd.RangeIndex(1, len(iscf) + 1, name="iscf") ) for iscf in md_scf_charge ], keys=pd.RangeIndex(1, len(md_scf_charge) + 1, name="imd"), ) if FOUND_MD: md_charge = pd.concat( md_charge, keys=pd.RangeIndex(1, len(md_charge) + 1, name="imd") ) else: if FOUND_SCF: nan_array = _a.emptyf(md_scf_charge[0][0].shape) nan_array.fill(np.nan) def get_md_scf_charge(scf_charge, iscf): try: return scf_charge[iscf] except Exception: return nan_array if FOUND_MD: md_charge = np.stack(md_charge) # option parsing is a bit *difficult* with flag enums # So first figure out what is there, and handle this based # on arguments def _p(flag, found): """Helper routine to do the following: Returns ------- is_opt : bool whether the flag is an `Opt` flag : corrected flag """ if isinstance(flag, Opt): # correct flag depending on what `found` is # If the values have been found we # change flag to None only if flag == NONE # If the case has not been found, we # change flag to None if ANY or NONE is in flags if found: # flag is only NONE, then pass none if not (Opt.NONE ^ flag): flag = None else: # not found # we convert flag to none # if ANY or NONE in flag if (Opt.NONE | Opt.ANY) & flag: flag = None return isinstance(flag, Opt), flag opt_imd, imd = _p(imd, FOUND_MD) opt_iscf, iscf = _p(iscf, FOUND_SCF) if not (FOUND_SCF or FOUND_MD): # none of these are found # we request that user does not request any input if (opt_iscf or (not iscf is None)) or (opt_imd or (not imd is None)): raise SileError(f"{self!s} does not contain MD/SCF charges") elif not FOUND_SCF: if opt_iscf or (not iscf is None): raise SileError(f"{self!s} does not contain SCF charges") elif not FOUND_MD: if opt_imd or (not imd is None): raise SileError(f"{self!s} does not contain MD charges") # if either are options they may hold if opt_imd and opt_iscf: if FOUND_SCF: return md_scf_charge elif FOUND_MD: return md_charge elif FOUND_FINAL: # I think this will never be reached # If neither are found they will be converted to # None return final_charge raise SileError(f"{str(self)} unknown argument for 'imd' and 'iscf'") elif opt_imd: # flag requested imd if not (imd & (Opt.ANY | Opt.ALL)): # wrong flag raise SileError(f"{str(self)} unknown argument for 'imd'") if FOUND_SCF and iscf is not None: # this should be handled, i.e. the scf should be taken out if as_dataframe: return md_scf_charge.groupby(level=[0, 2]).nth(iscf) return np.stack( tuple(get_md_scf_charge(x, iscf) for x in md_scf_charge) ) elif FOUND_MD and iscf is None: return md_charge raise SileError( f"{str(self)} unknown argument for 'imd' and 'iscf', could not find SCF charges" ) elif opt_iscf: # flag requested imd if not (iscf & (Opt.ANY | Opt.ALL)): # wrong flag raise SileError(f"{str(self)} unknown argument for 'iscf'") if imd is None: # correct imd imd = -1 if as_dataframe: md_scf_charge = md_scf_charge.groupby(level=0) group = list(md_scf_charge.groups.keys())[imd] return md_scf_charge.get_group(group).droplevel(0) return np.stack(md_scf_charge[imd]) elif imd is None and iscf is None: if FOUND_FINAL: return final_charge raise SileError(f"{str(self)} does not contain final charges") elif imd is None: # iscf is not None, so pass through as though explicitly passed imd = -1 elif iscf is None: # we return the last MD step and the requested scf iteration if as_dataframe: return md_charge.groupby(level=1).nth(imd) return md_charge[imd] if as_dataframe: # first select imd md_scf_charge = md_scf_charge.groupby(level=0) group = list(md_scf_charge.groups.keys())[imd] md_scf_charge = md_scf_charge.get_group(group).droplevel(0) return md_scf_charge.groupby(level=1).nth(iscf) return md_scf_charge[imd][iscf]
outSileSiesta = deprecation( "outSileSiesta has been deprecated in favor of stdoutSileSiesta.", "0.15", "0.16" )(stdoutSileSiesta) add_sile("siesta.out", stdoutSileSiesta, case=False, gzip=True) add_sile("out", stdoutSileSiesta, case=False, gzip=True)