Source code for sisl.io.siesta.out

import os
import numpy as np

from .sile import SileSiesta
from ..sile import add_sile, sile_fh_open

from sisl._internal import set_module
import sisl._array as _a
from sisl import Geometry, Atom, SuperCell
from sisl.utils import PropertyDict
from sisl.utils.cmd import *
from sisl.unit.siesta import unit_convert

__all__ = ['outSileSiesta']


Bohr2Ang = unit_convert('Bohr', 'Ang')


def _ensure_species(species):
    """ Ensures that the species list is a list with entries (converts `None` to a list). """
    if species is None:
        return [Atom(i) for i in range(150)]
    return species


@set_module("sisl.io.siesta")
class outSileSiesta(SileSiesta):
    """ Output file from Siesta

    This enables reading the output quantities from the Siesta output.
    """
    _completed = None

    def readline(self):
        line = super().readline()
        if 'Job completed' in line:
            self._completed = True
        return line

    readline.__doc__ = SileSiesta.readline.__doc__

[docs]    @sile_fh_open()
    def completed(self):
        """ True if the full file has been read and "Job completed" was found. """
        if self._completed is None:
            completed = self.step_to("Job completed")[0]
        else:
            completed = self._completed
        if completed:
            self._completed = True
        return completed

[docs]    @sile_fh_open()
    def read_species(self):
        """ Reads the species from the top of the output file.

        If wanting the species this HAS to be the first routine called.

        It returns an array of `Atom` objects which may easily be indexed.
        """

        line = self.readline()
        while not 'Species number:' in line:
            line = self.readline()
            if line == '':
                # We fake the species by direct atomic number
                return None

        atom = []
        while 'Species number:' in line:
            ls = line.split()
            if ls[3] == 'Atomic':
                atom.append(Atom(int(ls[5]), tag=ls[7]))
            else:
                atom.append(Atom(int(ls[7]), tag=ls[4]))
            line = self.readline()

        return atom

[docs]    @sile_fh_open(True)
    def read_basis_block(self):
        """ Reads the PAO.Basis block that Siesta writes """
        found, line = self.step_to("%block PAO.Basis")
        if not found:
            raise ValueError(f"{self.__class__.__name__}.read_basis_block could not find PAO.Basis in output")

        basis = []
        while not line.startswith("%endblock PAO.Basis"):
            line = self.readline()
            basis.append(line)

        return basis

    def _read_supercell_outcell(self):
        """ Wrapper for reading the unit-cell from the outcoor block """

        # Read until outcell is found
        found, line = self.step_to("outcell: Unit cell vectors")
        if not found:
            raise ValueError(f"{self.__class__.__name__}._r_supercell_outcell did not find outcell key")

        Ang = 'Ang' in line

        # We read the unit-cell vectors (in Ang)
        cell = []
        line = self.readline()
        while len(line.strip()) > 0:
            line = line.split()
            cell.append([float(x) for x in line[:3]])
            line = self.readline()

        cell = _a.arrayd(cell)

        if not Ang:
            cell *= Bohr2Ang

        return SuperCell(cell)

    def _read_geometry_outcoor(self, line, species=None):
        """ Wrapper for reading the geometry as in the outcoor output """
        species = _ensure_species(species)

        # Now we have outcoor
        scaled = 'scaled' in line
        fractional = 'fractional' in line
        Ang = 'Ang' in line

        # Read in data
        xyz = []
        spec = []
        line = self.readline()
        while len(line.strip()) > 0:
            line = line.split()
            xyz.append([float(x) for x in line[:3]])
            spec.append(int(line[3]))
            line = self.readline()

        # in outcoor we know it is always just after
        cell = self._read_supercell_outcell()
        xyz = _a.arrayd(xyz)

        # Now create the geometry
        if scaled:
            # The output file for siesta does not
            # contain the lattice constant.
            # So... :(
            raise ValueError("Could not read the lattice-constant for the scaled geometry")
        elif fractional:
            xyz = xyz.dot(cell.cell)
        elif not Ang:
            xyz *= Bohr2Ang

        # Assign the correct species
        geom = Geometry(xyz, [species[ia - 1] for ia in spec], sc=cell)

        return geom

    def _read_geometry_atomic(self, line, species=None):
        """ Wrapper for reading the geometry as in the outcoor output """
        species = _ensure_species(species)

        # Now we have outcoor
        Ang = 'Ang' in line

        # Read in data
        xyz = []
        atom = []
        line = self.readline()
        while len(line.strip()) > 0:
            line = line.split()
            xyz.append([float(x) for x in line[1:4]])
            atom.append(species[int(line[4])-1])
            line = self.readline()

        # Retrieve the unit-cell (but do not skip file-descriptor position)
        # This is because the current unit-cell is not always written.
        pos = self.fh.tell()
        cell = self._read_supercell_outcell()
        self.fh.seek(pos, os.SEEK_SET)

        # Convert xyz
        xyz = _a.arrayd(xyz)
        if not Ang:
            xyz *= Bohr2Ang

        return Geometry(xyz, atom, sc=cell)

[docs]    @sile_fh_open()
    def read_geometry(self, last=True, all=False):
        """ Reads the geometry from the Siesta output file

        Parameters
        ----------
        last: bool, optional
           only read the last geometry
        all: bool, optional
           return a list of all geometries (like an MD)
           If `True` `last` is ignored

        Returns
        -------
        geometries: list or Geometry or None
             if all is False only one geometry will be returned (or None). Otherwise
             a list of geometries corresponding to the MD-runs.
        """

        # The first thing we do is reading the species.
        # Sadly, if this routine is called AFTER some other
        # reading process, it may fail...
        # Perhaps we should rewind to ensure this...
        # But...
        species = self.read_species()
        if all:
            # force last to be false
            last = False

        def type_coord(line):
            if 'outcoor' in line:
                return 1
            elif 'siesta: Atomic coordinates' in line:
                return 2
            # Signal not found
            return 0

        def next_geom():
            coord = 0
            while coord == 0:
                line = self.readline()
                if line == '':
                    return 0, None
                coord = type_coord(line)

            if coord == 1:
                return 1, self._read_geometry_outcoor(line, species)
            elif coord == 2:
                return 2, self._read_geometry_atomic(line, species)

        # Read until a coordinate block is found
        geom0 = None
        mds = []

        if all or last:
            # we need to read through all things!
            while True:
                coord, geom = next_geom()
                if coord == 0:
                    break
                if coord == 2:
                    geom0 = geom
                else:
                    mds.append(geom)

            # Since the user requests only the MD geometries
            # we only return those
            if last:
                if len(mds) > 0:
                    return mds[-1]
                return geom0
            return mds

        # just read the next geometry we hit
        return next_geom()[1]

[docs]    @sile_fh_open()
    def read_force(self, last=True, all=False, total=False, max=False):
        """ Reads the forces from the Siesta output file

        Parameters
        ----------
        last: bool, optional
           only read the last force
        all: bool, optional
           return a list of all forces (like an MD)
           If `True` `last` is ignored
        total: bool, optional
            return the total forces instead of the atomic forces.
        max: bool, optional
            whether only the maximum atomic force should be returned for each step.

            Setting it to `True` is equivalent to `max(outSile.read_force())` in case atomic forces
            are written in the output file (`WriteForces .true.` in the fdf file)

            Note that this is not the same as doing `max(outSile.read_force(total=True))` since
            the forces returned in that case are averages on each axis.

        Returns
        -------
        numpy.ndarray or None
            returns ``None`` if the forces are not found in the
            output, otherwise forces will be returned

            The shape of the array will be different depending on the type of forces requested:
                - atomic (default): (nMDsteps, nAtoms, 3)
                - total: (nMDsteps, 3)
                - max: (nMDsteps, )

            If `all` is `False`, the first dimension does not exist. In the case of max, the returned value
            will therefore be just a float, not an array.

            If `total` and `max` are both `True`, they are returned separately as a tuple: ``(total, max)``
        """
        if all:
            last = False

        # Read until forces are found
        def next_force():

            line = self.readline()
            while not 'siesta: Atomic forces' in line:
                line = self.readline()
                if line == '':
                    return None

            # Now read data
            F = []
            line = self.readline()
            if 'siesta:' in line:
                # This is the final summary, we don't need to read it as it does not contain new information
                # and also it make break things since max forces are not written there
                return None

            # First, we encounter the atomic forces
            while '---' not in line:
                line = line.split()
                if not (total or max):
                    F.append([float(x) for x in line[-3:]])
                line = self.readline()
                if line == '':
                    break

            line = self.readline()
            # Then, the total forces
            if total:
                F = [float(x) for x in line.split()[-3:]]

            line = self.readline()
            #And after that we can read the max force
            if max and len(line.split()) != 0:
                line = self.readline()
                maxF = float(line.split()[1])

                # In case total is also requested, we are going to store it all in the same variable
                # It will be separated later
                if total:
                    F = (*F, maxF)
                else:
                    F = maxF

            return _a.arrayd(F)

        def return_forces(Fs):
            # Handle cases where we can't now if they are found
            if Fs is None: return None
            Fs = _a.arrayd(Fs)
            if max and total:
                return (Fs[..., :-1], Fs[..., -1])
            elif max and not all:
                return Fs.ravel()[0]
            return Fs

        if all or last:
            # list of all forces
            Fs = []
            while True:
                F = next_force()
                if F is None:
                    break
                Fs.append(F)

            if last:
                return return_forces(Fs[-1])

            return return_forces(Fs)

        return return_forces(next_force())

[docs]    @sile_fh_open()
    def read_stress(self, key='static', last=True, all=False):
        """ Reads the stresses from the Siesta output file

        Parameters
        ----------
        key : {'static', 'total'}
           which stress to read from the output.
        last: bool, optional
           only read the last stress
        all: bool, optional
           return a list of all stresses (like an MD)
           If `True` `last` is ignored

        Returns
        -------
        numpy.ndarray or None
            returns ``None`` if the stresses are not found in the
            output, otherwise stresses will be returned
        """
        if all:
            last = False

        # Read until stress are found
        def next_stress():
            line = self.readline()
            while not ('siesta: Stress tensor' in line and key in line):
                line = self.readline()
                if line == '':
                    return None

            # Now read data
            S = []
            for _ in range(3):
                line = self.readline().split()
                S.append([float(x) for x in line[-3:]])

            return _a.arrayd(S)

        # list of all stresses
        Ss = []

        if all or last:
            while True:
                S = next_stress()
                if S is None:
                    break
                Ss.append(S)

            if last:
                return Ss[-1]
            if self.completed() and key == 'static':
                return Ss[:-1]
            return Ss

        return next_stress()

[docs]    @sile_fh_open()
    def read_moment(self, orbitals=False, quantity='S', last=True, all=False):
        """ Reads the moments from the Siesta output file

        These will only be present in case of spin-orbit coupling.

        Parameters
        ----------
        orbitals: bool, optional
           return a table with orbitally resolved
           moments.
        quantity: {'S', 'L'}, optional
           return the spin-moments or the L moments
        last: bool, optional
           only read the last force
        all: bool, optional
           return a list of all forces (like an MD)
           If `True` `last` is ignored
        """

        # Read until outcoor is found
        itt = iter(self)
        for line in itt:
            if 'moments: Atomic' in line:
                break
        if not 'moments: Atomic' in line:
            return None

        # The moments are printed in SPECIES list
        next(itt) # empty
        next(itt) # empty

        na = 0
        # Loop the species
        tbl = []
        # Read the species label
        while True:
            next(itt) # ""
            next(itt) # Atom    Orb ...
            # Loop atoms in this species list
            while True:
                line = next(itt)
                if line.startswith('Species') or \
                   line.startswith('--'):
                    break
                line = ' '
                atom = []
                ia = 0
                while not line.startswith('--'):
                    line = next(itt).split()
                    if ia == 0:
                        ia = int(line[0])
                    elif ia != int(line[0]):
                        raise ValueError("Error in moments formatting.")
                    # Track maximum number of atoms
                    na = max(ia, na)
                    if quantity == 'S':
                        atom.append([float(x) for x in line[4:7]])
                    elif quantity == 'L':
                        atom.append([float(x) for x in line[7:10]])
                line = next(itt).split() # Total ...
                if not orbitals:
                    ia = int(line[0])
                    if quantity == 'S':
                        atom.append([float(x) for x in line[4:7]])
                    elif quantity == 'L':
                        atom.append([float(x) for x in line[8:11]])
                tbl.append((ia, atom))
            if line.startswith('--'):
                break

        # Sort according to the atomic index
        moments = [] * na

        # Insert in the correct atomic
        for ia, atom in tbl:
            moments[ia-1] = atom

        if not all:
            return _a.arrayd(moments)
        return moments

[docs]    @sile_fh_open()
    def read_energy(self):
        """ Reads the final energy distribution

        Currently the energies translated are:

        ``band``
             band structure energy
        ``kinetic``
             electronic kinetic energy
        ``hartree``
             electronic electrostatic Hartree energy
        ``dftu``
             DFT+U energy
        ``spin_orbit``
             spin-orbit energy
        ``extE``
             external field energy
        ``xc``
             exchange-correlation energy
        ``bulkV``
             bulk-bias correction energy
        ``total``
             total energy
        ``negf``
             NEGF energy
        ``fermi``
             Fermi energy
        ``ion.electron``
             ion-electron interaction energy
        ``ion.ion``
             ion-ion interaction energy
        ``ion.kinetic``
             kinetic ion energy


        Any unrecognized key gets added *as is*.

        Examples
        --------
        >>> energies = sisl.get_sile("RUN.out").read_energy()
        >>> ion_energies = energies.ion
        >>> ion_energies.ion # ion-ion interaction energy
        >>> ion_energies.kinetic # ion kinetic energy
        >>> energies.fermi # fermi energy

        Returns
        -------
        PropertyDict : dictionary like lookup table ionic energies are stored in a nested `PropertyDict` at the key ``ion`` (all energies in eV)
        """
        found = self.step_to("siesta: Final energy", reread=False)[0]
        out = PropertyDict()
        out.ion = PropertyDict()
        if not found:
            return out
        itt = iter(self)

        # Read data
        line = next(itt)
        name_conv = {
            "Band Struct.": "band",
            "Kinetic": "kinetic",
            "Hartree": "hartree",
            "Edftu": "dftu",
            "Eldau": "dftu",
            "Eso": "spin_orbit",
            "Ext. field": "extE",
            "Exch.-corr.": "xc",
            "Ekinion": "ion.kinetic",
            "Ion-electron": "ion.electron",
            "Ion-ion": "ion.ion",
            "Bulk bias": "bulkV",
            "Total": "total",
            "Fermi": "fermi",
            "Enegf": "negf",
        }
        while len(line.strip()) > 0:
            key, val = line.split("=")
            key = key.split(":")[1].strip()
            key = name_conv.get(key, key)
            if key.startswith("ion."):
                # sub-nest
                out.ion[key[4:]] = float(val)
            else:
                out[key] = float(val)
            line = next(itt)

        return out

[docs]    def read_data(self, *args, **kwargs):
        """ Read specific content in the Siesta out file

        The currently implemented things are denoted in
        the parameters list.
        Note that the returned quantities are in the order
        of keywords, so:

        >>> read_data(geometry=True, force=True)
        <geometry>, <force>
        >>> read_data(force=True, geometry=True)
        <force>, <geometry>

        Parameters
        ----------
        geometry: bool, optional
           read geometry, args are passed to `read_geometry`
        force: bool, optional
           read force, args are passed to `read_force`
        stress: bool, optional
           read stress, args are passed to `read_stress`
        moment: bool, optional
           read moment, args are passed to `read_moment` (only for spin-orbit calculations)
        energy: bool, optional
           read final energies, args are passed to `read_energy`
        """
        run = []
        # This loops ensures that we preserve the order of arguments
        # From Py3.6 and onwards the **kwargs is an OrderedDictionary
        for kw in kwargs.keys():
            if kw in ['geometry', 'force', 'moment', 'stress', 'energy']:
                if kwargs[kw]:
                    run.append(kw)

        # Clean running names
        for name in run:
            kwargs.pop(name)

        val = []
        for name in run:
            val.append(getattr(self, 'read_{}'.format(name.lower()))(*args, **kwargs))

        if len(val) == 0:
            return None
        elif len(val) == 1:
            val = val[0]
        return val

[docs]    @sile_fh_open()
    def read_scf(self, key="scf", iscf=-1, imd=None, as_dataframe=False):
        r""" Parse SCF information and return a table of SCF information depending on what is requested

        Parameters
        ----------
        key : {'scf', 'ts-scf'}
            parse SCF information from Siesta SCF or TranSiesta SCF
        iscf : int, optional
            which SCF cycle should be stored. If ``-1`` only the final SCF step is stored,
            for None *all* SCF cycles are returned. When `iscf` values queried are not found they
            will be truncated to the nearest SCF step.
        imd: int or None, optional
            whether only a particular MD step is queried, if None, all MD steps are
            parsed and returned. A negative number wraps for the last MD steps.
        as_dataframe: boolean, optional
            whether the information should be returned as a `pandas.DataFrame`. The advantage of this
            format is that everything is indexed and therefore you know what each value means.You can also
            perform operations very easily on a dataframe. 
        """

        #These are the properties that are written in SIESTA scf
        props = ["iscf", "Eharris", "E_KS", "FreeEng", "dDmax", "Ef", "dHmax"]

        if not iscf is None:
            if iscf == 0:
                raise ValueError(f"{self.__class__.__name__}.read_scf requires iscf argument to *not* be 0!")
        if not imd is None:
            if imd == 0:
                raise ValueError(f"{self.__class__.__name__}.read_scf requires imd argument to *not* be 0!")
        def reset_d(d, line):
            if line.startswith('SCF cycle converged'):
                if len(d['data']) > 0:
                    d['_final_iscf'] = 1
            elif line.startswith('SCF cycle continued'):
                d['_final_iscf'] = 0

        def common_parse(line, d):
            if line.startswith('ts-Vha:'):
                d['ts-Vha'] = float(line.split()[1])
            elif line.startswith('bulk-bias: |v'):
                d['bb-v'] = list(map(float, line.split()[-3:]))
                if 'bb-vx' not in props:
                    props.extend(['BB-vx', 'BB-vy', 'BB-vz'])
            elif line.startswith('bulk-bias: {q'):
                d['bb-q'] = list(map(float, line.split()[-3:]))
                if 'bb-q+' not in props:
                    props.extend(['BB-q+', 'BB-q-', 'BB-q0'])
            else:
                return False
            return True

        if key.lower() == 'scf':
            def parse_next(line, d):
                line = line.strip().replace('*', '0')
                reset_d(d, line)
                if common_parse(line, d):
                    pass
                elif line.startswith('scf:'):
                    d['_found_iscf'] = True
                    if len(line) == 97:
                        # this should be for Efup/dwn
                        # but I think this will fail for as_dataframe (TODO)
                        data = [int(line[5:9]), float(line[9:25]), float(line[25:41]),
                                float(line[41:57]), float(line[57:67]), float(line[67:77]),
                                float(line[77:87]), float(line[87:97])]
                    elif len(line) == 87:
                        data = [int(line[5:9]), float(line[9:25]), float(line[25:41]),
                                float(line[41:57]), float(line[57:67]), float(line[67:77]),
                                float(line[77:87])]
                    else:
                        # Populate DATA by splitting
                        data = line.split()
                        data =  [int(data[1])] + list(map(float, data[2:]))
                    d['data'] = data

        elif key.lower() == 'ts-scf':
            props.append("ts-Vha")
            def parse_next(line, d):
                line = line.strip().replace('*', '0')
                reset_d(d, line)
                if common_parse(line, d):
                    pass
                elif line.startswith('ts-q:'):
                    data = line.split()[1:]
                    try:
                        d['ts-q'] = list(map(float, data))
                    except:
                        # We are probably reading a device list
                        # ensure that props are appended
                        if data[-1] not in props:
                            props.extend(data)
                        pass
                elif line.startswith('ts-scf:'):
                    d['_found_iscf'] = True
                    if len(line) == 100:
                        data = [int(line[8:12]), float(line[12:28]), float(line[28:44]),
                                float(line[44:60]), float(line[60:70]), float(line[70:80]),
                                float(line[80:90]), float(line[90:100]), d['ts-Vha']] + d['ts-q']
                    elif len(line) == 90:
                        data = [int(line[8:12]), float(line[12:28]), float(line[28:44]),
                                float(line[44:60]), float(line[60:70]), float(line[70:80]),
                                float(line[80:90]), d['ts-Vha']] + d['ts-q']
                    else:
                        # Populate DATA by splitting
                        data = line.split()
                        data = [int(data[1])] + list(map(float, data[2:])) + [d['ts-Vha']] + d['ts-q']
                    d['data'] = data

        # A temporary dictionary to hold information while reading the output file
        d = {
            '_found_iscf': False,
            '_final_iscf': 0,
            'data': [],
        }
        md = []
        scf = []
        for line in self:
            parse_next(line, d)
            if d['_found_iscf']:
                d['_found_iscf'] = False
                data = d['data']
                if len(data) == 0:
                    continue

                if iscf is None or iscf < 0:
                    scf.append(data)
                elif data[0] <= iscf:
                    # this ensures we will retain the latest iscf in
                    # case the requested iscf is too big
                    scf = data

            if d['_final_iscf'] == 1:
                d['_final_iscf'] = 2
            elif d['_final_iscf'] == 2:
                d['_final_iscf'] = 0
                data = d['data']
                if len(data) == 0:
                    # this traps the case where we read ts-scf
                    # but find the final scf iteration.
                    # In that case we don't have any data.
                    scf = []
                    continue

                if len(scf) == 0:
                    # this traps cases where final_iscf has
                    # been trickered but we haven't collected anything.
                    # I.e. if key == scf but ts-scf also exists.
                    continue

                # First figure out which iscf we should store
                if iscf is None: # or iscf > 0
                    # scf is correct
                    pass
                elif iscf < 0:
                    # truncate to 0
                    scf = scf[max(len(scf) + iscf, 0)]

                # Populate md
                md.append(np.array(scf))
                # Reset SCF data
                scf = []

                # In case we wanted a given MD step and it's this one, just stop reading
                # We are going to return the last MD (see below)
                if imd == len(md):
                    break

        # Define the function that is going to convert the information of a MDstep to a Dataset
        if as_dataframe:
            import pandas as pd

            def MDstep_dataframe(scf):
                scf = np.atleast_2d(scf)
                return pd.DataFrame(
                    scf[..., 1:],
                    index=pd.Index(scf[..., 0].ravel().astype(np.int32),
                                   name="iscf"),
                    columns=props[1:]
                )

        # Now we know how many MD steps there are

        # We will return stuff based on what the user requested
        # For pandas DataFrame this will be dependent
        #  1. all MD steps requested => imd == index, iscf == column (regardless of iscf==none|int)
        #  2. 1 MD step requested => iscf == index

        if imd is None:
            if as_dataframe:
                if len(md) == 0:
                    # return an empty dataframe (with imd as index)
                    return pd.DataFrame(index=pd.Index([], name="imd"),
                                        columns=props)
                # Regardless of what the user requests we will always have imd == index
                # and iscf a column, a user may easily change this.
                df = pd.concat(map(MDstep_dataframe, md),
                               keys=_a.arangei(1, len(md) + 1), names=["imd"])
                if iscf is not None:
                    df.reset_index("iscf", inplace=True)
                return df

            if iscf is None:
                # since each MD step may be a different number of SCF steps
                # we cannot convert to a dense array
                return md
            return np.array(md)

        # correct imd to ensure we check against the final size
        imd = min(len(md) - 1, max(len(md) + imd, 0))
        if len(md) == 0:
            # no data collected
            if as_dataframe:
                return pd.DataFrame(index=pd.Index([], name="iscf"),
                                    columns=props[1:])
            return np.array(md[imd])

        if imd > len(md):
            raise ValueError(f"{self.__class__.__name__}.read_scf could not find requested MD step ({imd}).")

        # If a certain imd was requested, get it
        # Remember that if imd is positive, we stopped reading at the moment we reached it
        scf = np.array(md[imd])
        if as_dataframe:
            return MDstep_dataframe(scf)
        return scf


add_sile('out', outSileSiesta, case=False, gzip=True)