Source code for sisl.utils.ranges

import re
from itertools import groupby

from numpy import zeros, ones, cumsum, take, int32, int64
from numpy import asarray

__all__ = ["strmap", "strseq", "lstranges", "erange", "list2str", "fileindex"]
__all__ += ["array_arange"]


# Function to change a string to a range of integers
[docs]def strmap(func, s, start=None, end=None, sep="b"): """ Parse a string as though it was a slice and map all entries using ``func``. Parameters ---------- func : function function to parse every match with s : str the string that should be parsed start : optional the replacement in case the LHS of the delimiter is not present end : optional the replacement in case the RHS of the delimiter is not present sep : {"b", "c"} separator used, ``"b"`` is square brackets, ``"c"``, curly braces Examples -------- >>> strmap(int, "1") [1] >>> strmap(int, "1-2") [(1, 2)] >>> strmap(int, "1-") [(1, None)] >>> strmap(int, "1-", end=4) [(1, 4)] >>> strmap(int, "1-10[2-3]") [((1, 10), [(2, 3)])] """ if sep == "b": segment = re.compile(r"\[(.+)\]\[(.+)\]|(.+)\[(.+)\]|(.+)") sep1, sep2 = "[", "]" elif sep == "c": segment = re.compile(r"\{(.+)\}\{(.+)\}|(.+)\{(.+)\}|(.+)") sep1, sep2 = "{", "}" else: raise ValueError("strmap: unknown separator for the sequence") # Create list s = s.replace(" ", "") if len(s) == 0: return [None] elif s in ["-", ":"]: return [(start, end)] commas = s.split(",") # Collect all the comma separated quantities that # may be selected by [..,..] i = 0 while i < len(commas) - 1: if commas[i].count(sep1) == commas[i].count(sep2): i = i + 1 else: # there must be more [ than ] commas[i] = commas[i] + "," + commas[i+1] del commas[i+1] # Check the last input... i = len(commas) - 1 if commas[i].count(sep1) != commas[i].count(sep2): raise ValueError(f"Unbalanced string: not enough {sep1} and {sep2}") # Now we have a comma-separated list # with collected brackets. l = [] for seg in commas: # Split it in groups of reg-exps m = segment.findall(seg)[0] if len(m[0]) > 0: # this is: [..][..] rhs = strmap(func, m[1], start, end, sep) for el in strmap(func, m[0], start, end, sep): l.append((el, rhs)) elif len(m[2]) > 0: # this is: ..[..] l.append((strseq(func, m[2], start, end), strmap(func, m[3], start, end, sep))) elif len(m[4]) > 0: l.append(strseq(func, m[4], start, end)) return l
[docs]def strseq(cast, s, start=None, end=None): """ Accept a string and return the casted tuples of content based on ranges. Parameters ---------- cast : function parser of the individual elements s : str string with content Examples -------- >>> strseq(int, "3") 3 >>> strseq(int, "3-6") (3, 6) >>> strseq(int, "3-") (3, None) >>> strseq(int, "3:2:7") (3, 2, 7) >>> strseq(int, "3:2:", end=8) (3, 2, 8) >>> strseq(int, ":2:", start=2) (2, 2, None) >>> strseq(float, "3.2:6.3") (3.2, 6.3) """ if ":" in s: s = [ss.strip() for ss in s.split(":")] elif "-" in s: s = [ss.strip() for ss in s.split("-")] if isinstance(s, list): if len(s[0]) == 0: s[0] = start if len(s[-1]) == 0: s[-1] = end return tuple(cast(ss) if ss is not None else None for ss in s) return cast(s)
[docs]def erange(start, step, end=None): """ Returns the range with both ends includede """ if end is None: return range(start, step + 1) return range(start, end + 1, step)
[docs]def lstranges(lst, cast=erange, end=None): """ Convert a `strmap` list into expanded ranges """ l = [] # If an entry is a tuple, it means it is either # a range 0-1 == tuple(0, 1), or # a sub-range # 0[0-1], 0-1[0-1] if isinstance(lst, tuple): if len(lst) == 3: l.extend(cast(*lst)) else: head = lstranges(lst[0], cast, end) bot = lstranges(lst[1], cast, end) if isinstance(head, list): for el in head: l.append([el, bot]) elif isinstance(bot, list): l.append([head, bot]) else: l.extend(cast(head, bot)) elif isinstance(lst, list): for lt in lst: ls = lstranges(lt, cast, end) if isinstance(ls, list): l.extend(ls) else: l.append(ls) else: if lst is None and end is not None: return cast(0, end) return lst return l
[docs]def list2str(lst): """ Convert a list of elements into a string of ranges Examples -------- >>> list2str([2, 4, 5, 6]) "2, 4-6" >>> list2str([2, 4, 5, 6, 8, 9]) "2, 4-6, 8-9" """ lst = lst[:] lst.sort() # Create positions pos = [j - i for i, j in enumerate(lst)] t = 0 rng = "" for _, els in groupby(pos): ln = len(list(els)) el = lst[t] if t > 0: rng += ", " t += ln if ln == 1: rng += str(el) #elif ln == 2: # rng += "{}, {}".format(str(el), str(el+ln-1)) else: rng += "{}-{}".format(el, el+ln-1) return rng
# Function to retrieve an optional index from the # filename # file[0] returns: # file, 0 # file returns: # file, None # file[0-1] returns # file, [0,1]
[docs]def fileindex(f, cast=int): """ Parses a filename string into the filename and the indices. This range can be formatted like this: file[1,2,3-6] in which case it will return: file, [1,2,3,4,5,6] Parameters ---------- f : str filename to parse cast : function the function to cast the bracketed value Examples -------- >>> fileindex("Hello[0]") ("Hello", 0) >>> fileindex("Hello[0-2]") ("Hello", [0, 1, 2]) """ if "[" not in f: return f, None # Grab the filename f = f.split("[") fname = f.pop(0) # Re-join and remove the last "]" f = "[".join(f) if f[-1] == "]": f = f[:-1] ranges = strmap(cast, f) rng = lstranges(ranges) if len(rng) == 1: return fname, rng[0] return fname, rng
[docs]def array_arange(start, end=None, n=None, dtype=int64): """ Creates a single array from a sequence of `numpy.arange` Parameters ---------- start : array_like a list of start elements for `numpy.arange` end : array_like a list of end elements (exclusive) for `numpy.arange`. This argument is not used if `n` is passed. n : array_like a list of counts of elements for `numpy.arange`. This is equivalent to ``end=start + n``. dtype : numpy.dtype the returned lists data-type Examples -------- >>> array_arange([1, 5], [3, 6]) array([1, 2, 5], dtype=int64) >>> array_arange([1, 6], [4, 9]) array([1, 2, 3, 6, 7, 8], dtype=int64) >>> array_arange([1, 6], n=[2, 2]) array([1, 2, 6, 7], dtype=int64) """ # Tests show that the below code is faster than # implicit for-loops, or list-comprehensions # concatenate(map(..) # The below is much faster and does not require _any_ loops if n is None: # We need n to speed things up n = asarray(end) - asarray(start) else: n = asarray(n) # The below algorithm only works for non-zero n idx = n.nonzero()[0] # Grab corner case if len(idx) == 0: return zeros(0, dtype=dtype) # Reduce size start = take(start, idx) n = take(n, idx) # Create array of 1's. # The 1's are important when issuing the cumultative sum a = ones(n.sum(), dtype=dtype) # set pointers such that we can # correct for final cumsum ptr = cumsum(n[:-1]) a[0] = start[0] # Define start and correct for previous values a[ptr] = start[1:] - start[:-1] - n[:-1] + 1 return cumsum(a, dtype=dtype)