import re
from itertools import groupby
from numpy import zeros, ones, cumsum, take, int32, int64
from numpy import asarray
__all__ = ["strmap", "strseq", "lstranges", "erange", "list2str", "fileindex"]
__all__ += ["array_arange"]
# Function to change a string to a range of integers
[docs]def strmap(func, s, start=None, end=None, sep="b"):
""" Parse a string as though it was a slice and map all entries using ``func``.
Parameters
----------
func : function
function to parse every match with
s : str
the string that should be parsed
start : optional
the replacement in case the LHS of the delimiter is not present
end : optional
the replacement in case the RHS of the delimiter is not present
sep : {"b", "c"}
separator used, ``"b"`` is square brackets, ``"c"``, curly braces
Examples
--------
>>> strmap(int, "1")
[1]
>>> strmap(int, "1-2")
[(1, 2)]
>>> strmap(int, "1-")
[(1, None)]
>>> strmap(int, "1-", end=4)
[(1, 4)]
>>> strmap(int, "1-10[2-3]")
[((1, 10), [(2, 3)])]
"""
if sep == "b":
segment = re.compile(r"\[(.+)\]\[(.+)\]|(.+)\[(.+)\]|(.+)")
sep1, sep2 = "[", "]"
elif sep == "c":
segment = re.compile(r"\{(.+)\}\{(.+)\}|(.+)\{(.+)\}|(.+)")
sep1, sep2 = "{", "}"
else:
raise ValueError("strmap: unknown separator for the sequence")
# Create list
s = s.replace(" ", "")
if len(s) == 0:
return [None]
elif s in ["-", ":"]:
return [(start, end)]
commas = s.split(",")
# Collect all the comma separated quantities that
# may be selected by [..,..]
i = 0
while i < len(commas) - 1:
if commas[i].count(sep1) == commas[i].count(sep2):
i = i + 1
else:
# there must be more [ than ]
commas[i] = commas[i] + "," + commas[i+1]
del commas[i+1]
# Check the last input...
i = len(commas) - 1
if commas[i].count(sep1) != commas[i].count(sep2):
raise ValueError(f"Unbalanced string: not enough {sep1} and {sep2}")
# Now we have a comma-separated list
# with collected brackets.
l = []
for seg in commas:
# Split it in groups of reg-exps
m = segment.findall(seg)[0]
if len(m[0]) > 0:
# this is: [..][..]
rhs = strmap(func, m[1], start, end, sep)
for el in strmap(func, m[0], start, end, sep):
l.append((el, rhs))
elif len(m[2]) > 0:
# this is: ..[..]
l.append((strseq(func, m[2], start, end),
strmap(func, m[3], start, end, sep)))
elif len(m[4]) > 0:
l.append(strseq(func, m[4], start, end))
return l
[docs]def strseq(cast, s, start=None, end=None):
""" Accept a string and return the casted tuples of content based on ranges.
Parameters
----------
cast : function
parser of the individual elements
s : str
string with content
Examples
--------
>>> strseq(int, "3")
3
>>> strseq(int, "3-6")
(3, 6)
>>> strseq(int, "3-")
(3, None)
>>> strseq(int, "3:2:7")
(3, 2, 7)
>>> strseq(int, "3:2:", end=8)
(3, 2, 8)
>>> strseq(int, ":2:", start=2)
(2, 2, None)
>>> strseq(float, "3.2:6.3")
(3.2, 6.3)
"""
if ":" in s:
s = [ss.strip() for ss in s.split(":")]
elif "-" in s:
s = [ss.strip() for ss in s.split("-")]
if isinstance(s, list):
if len(s[0]) == 0:
s[0] = start
if len(s[-1]) == 0:
s[-1] = end
return tuple(cast(ss) if ss is not None else None for ss in s)
return cast(s)
[docs]def erange(start, step, end=None):
""" Returns the range with both ends includede """
if end is None:
return range(start, step + 1)
return range(start, end + 1, step)
[docs]def lstranges(lst, cast=erange, end=None):
""" Convert a `strmap` list into expanded ranges """
l = []
# If an entry is a tuple, it means it is either
# a range 0-1 == tuple(0, 1), or
# a sub-range
# 0[0-1], 0-1[0-1]
if isinstance(lst, tuple):
if len(lst) == 3:
l.extend(cast(*lst))
else:
head = lstranges(lst[0], cast, end)
bot = lstranges(lst[1], cast, end)
if isinstance(head, list):
for el in head:
l.append([el, bot])
elif isinstance(bot, list):
l.append([head, bot])
else:
l.extend(cast(head, bot))
elif isinstance(lst, list):
for lt in lst:
ls = lstranges(lt, cast, end)
if isinstance(ls, list):
l.extend(ls)
else:
l.append(ls)
else:
if lst is None and end is not None:
return cast(0, end)
return lst
return l
[docs]def list2str(lst):
""" Convert a list of elements into a string of ranges
Examples
--------
>>> list2str([2, 4, 5, 6])
"2, 4-6"
>>> list2str([2, 4, 5, 6, 8, 9])
"2, 4-6, 8-9"
"""
lst = lst[:]
lst.sort()
# Create positions
pos = [j - i for i, j in enumerate(lst)]
t = 0
rng = ""
for _, els in groupby(pos):
ln = len(list(els))
el = lst[t]
if t > 0:
rng += ", "
t += ln
if ln == 1:
rng += str(el)
#elif ln == 2:
# rng += "{}, {}".format(str(el), str(el+ln-1))
else:
rng += "{}-{}".format(el, el+ln-1)
return rng
# Function to retrieve an optional index from the
# filename
# file[0] returns:
# file, 0
# file returns:
# file, None
# file[0-1] returns
# file, [0,1]
[docs]def fileindex(f, cast=int):
""" Parses a filename string into the filename and the indices.
This range can be formatted like this:
file[1,2,3-6]
in which case it will return:
file, [1,2,3,4,5,6]
Parameters
----------
f : str
filename to parse
cast : function
the function to cast the bracketed value
Examples
--------
>>> fileindex("Hello[0]")
("Hello", 0)
>>> fileindex("Hello[0-2]")
("Hello", [0, 1, 2])
"""
if "[" not in f:
return f, None
# Grab the filename
f = f.split("[")
fname = f.pop(0)
# Re-join and remove the last "]"
f = "[".join(f)
if f[-1] == "]":
f = f[:-1]
ranges = strmap(cast, f)
rng = lstranges(ranges)
if len(rng) == 1:
return fname, rng[0]
return fname, rng
[docs]def array_arange(start, end=None, n=None, dtype=int64):
""" Creates a single array from a sequence of `numpy.arange`
Parameters
----------
start : array_like
a list of start elements for `numpy.arange`
end : array_like
a list of end elements (exclusive) for `numpy.arange`.
This argument is not used if `n` is passed.
n : array_like
a list of counts of elements for `numpy.arange`.
This is equivalent to ``end=start + n``.
dtype : numpy.dtype
the returned lists data-type
Examples
--------
>>> array_arange([1, 5], [3, 6])
array([1, 2, 5], dtype=int64)
>>> array_arange([1, 6], [4, 9])
array([1, 2, 3, 6, 7, 8], dtype=int64)
>>> array_arange([1, 6], n=[2, 2])
array([1, 2, 6, 7], dtype=int64)
"""
# Tests show that the below code is faster than
# implicit for-loops, or list-comprehensions
# concatenate(map(..)
# The below is much faster and does not require _any_ loops
if n is None:
# We need n to speed things up
n = asarray(end) - asarray(start)
else:
n = asarray(n)
# The below algorithm only works for non-zero n
idx = n.nonzero()[0]
# Grab corner case
if len(idx) == 0:
return zeros(0, dtype=dtype)
# Reduce size
start = take(start, idx)
n = take(n, idx)
# Create array of 1's.
# The 1's are important when issuing the cumultative sum
a = ones(n.sum(), dtype=dtype)
# set pointers such that we can
# correct for final cumsum
ptr = cumsum(n[:-1])
a[0] = start[0]
# Define start and correct for previous values
a[ptr] = start[1:] - start[:-1] - n[:-1] + 1
return cumsum(a, dtype=dtype)