#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2014-2019European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl
"""
The high-level functionality, the filtering and recursive :term:`lassoing`.
Prefer accessing the public members from the parent module.
.. currentmodule:: pandalone.xleash
"""
from collections import namedtuple, OrderedDict
import logging
import asteval
from toolz import dicttoolz as dtz
import numpy as np
from . import Lasso, _parse
from ..utils import LoggerWriter
from ..utils import as_list
log = logging.getLogger(__name__)
[docs]def pipe_filter(ranger, lasso, *filters, **kwds):
"""
A :term:`bulk-filter` that applies all call-specifiers one after another on the :term:`capture-rect` values.
:param list filters:
the json-parsed :term:`call-spec`
"""
for filt in filters:
call_spec = _parse.parse_call_spec(filt)
lasso = ranger.make_call(lasso, *call_spec)
return lasso
[docs]def _classify_rect_shape(st, nd):
"""
Identifies rect from its edge-coordinates (row, col, 2d-table)..
:param Coords st:
the top-left edge of capture-rect, inclusive
:param Coords or None nd:
the bottom-right edge of capture-rect, inclusive
:return:
in int based on the input like that:
- 0: only `st` given
- 1: `st` and `nd` point the same cell
- 2: row
- 3: col
- 4: 2d-table
Examples::
>>> _classify_rect_shape((1,1), None)
0
>>> _classify_rect_shape((2,2), (2,2))
1
>>> _classify_rect_shape((2,2), (2,20))
2
>>> _classify_rect_shape((2,2), (20,2))
3
>>> _classify_rect_shape((2,2), (20,20))
4
"""
if nd is None:
return 0
rows = nd[0] - st[0]
cols = nd[1] - st[1]
return 1 + bool(cols) + 2 * bool(rows)
def _decide_ndim_by_rect_shape(shape_idx, ndims_list):
return ndims_list[shape_idx]
[docs]def _updim(values, new_ndim):
"""
Append trivial dimensions to the left.
:param values: The scalar ot 2D-results of :meth:`Sheet.read_rect()`
:param int new_dim: The new dimension the result should have
"""
new_shape = (1,) * (new_ndim - values.ndim) + values.shape
return values.reshape(new_shape)
[docs]def _downdim(values, new_ndim):
"""
Squeeze it, and then flatten it, before inflating it.
:param values: The scalar ot 2D-results of :meth:`Sheet.read_rect()`
:param int new_dim: The new dimension the result should have
"""
trivial_indxs = [i for i, d in enumerate(values.shape) if d == 1]
offset = values.ndim - new_ndim
trivial_ndims = len(trivial_indxs)
if offset > trivial_ndims:
values = values.flatten()
elif offset == trivial_ndims:
values = values.squeeze()
else:
for _, indx in zip(range(offset), trivial_indxs):
values = values.squeeze(indx)
return values
[docs]def _redim(values, new_ndim):
"""
Reshapes the :term:`capture-rect` values of :func:`read_capture_rect()`.
:param values: The scalar ot 2D-results of :meth:`Sheet.read_rect()`
:type values: (nested) list, *
:param new_ndim:
:type int, (int, bool) or None new_ndim:
:return: reshaped values
:rtype: list of lists, list, *
Examples::
>>> _redim([1, 2], 2)
[[1, 2]]
>>> _redim([[1, 2]], 1)
[1, 2]
>>> _redim([], 2)
[[]]
>>> _redim([[3.14]], 0)
3.14
>>> _redim([[11, 22]], 0)
[11, 22]
>>> arr = [[[11], [22]]]
>>> arr == _redim(arr, None)
True
>>> _redim([[11, 22]], 0)
[11, 22]
"""
if new_ndim is None:
return values
values = np.asarray(values)
try:
new_ndim, transpose = new_ndim
if transpose:
values = values.T
except:
pass
if new_ndim is not None:
if values.ndim < new_ndim:
values = _updim(values, new_ndim)
elif values.ndim > new_ndim:
values = _downdim(values, new_ndim)
return values.tolist()
[docs]def xlwings_dims_call_spec():
"""A list :term:`call-spec` for :meth:`_redim_filter` :term:`filter` that imitates results of *xlwings* library."""
return '["redim", [0, 1, 1, 1, 2]]'
[docs]def redim_filter(ranger, lasso, scalar=None, cell=None, row=None, col=None, table=None):
"""
A :term:`bulk-filter` that reshapes sand/or transpose captured values, depending on rect's shape.
Each dimension might be a single int or None, or a pair [dim, transpose].
"""
ndims_list = (scalar, cell, row, col, table)
shape_idx = _classify_rect_shape(lasso.st, lasso.nd)
new_ndim = _decide_ndim_by_rect_shape(shape_idx, ndims_list)
values = lasso.values
if new_ndim is not None:
lasso = lasso._replace(values=_redim(values, new_ndim))
return lasso
XLocation = namedtuple("XLocation", ("sheet", "st", "nd", "base_coords"))
"""
Fields denoting the position of a sheet/cell while running a :term:`element-wise-filter`.
Practically func:`run_filter_elementwise() preserves these fields if the
processed ones were `None`.
"""
[docs]def run_filter_elementwise(
ranger,
lasso,
element_func,
filters,
include=None,
exclude=None,
depth=-1,
*args,
**kwds
):
"""
Runner of all :term:`element-wise` :term:`filters`.
It applies the `element_func` on elements extracted from ``lasso.values``
by treating the later first as "indexed" objects
(Mappings, Series and Dataframes.), and if that fails, as nested lists.
- The `include`/`exclude` filter args work only for "indexed" objects
with ``items()`` and indexing methods.
- If no filter arg specified, expands for all keys.
- If only `include` specified, rejects all keys not explicitly
contained in this filter arg.
- If only `exclude` specified, expands all keys not explicitly
contained in this filter arg.
- When both `include`/`exclude` exist, only those explicitly
included are accepted, unless also excluded.
- Lower the :mod:`logging` level to see other than syntax-errors on
recursion reported on :data:`log`.
- Only those in :class:`XLocation` are passed recursively.
:param list element_func:
A function implementing the element-wise :term:`filter`
and returning a 2-tuple ``(is_proccessed, new_val_or_lasso)``,
like that::
def element_func(ranger, lasso, context, elval)
proced = False
try:
elval = int(elval)
proced = True
except ValueError:
pass
return proced, elval
Its `kwds` may contain the `include`, `exclude` and `depth` args.
Any exception raised from `element_func` will cancel the diving.
:param list filters:
Any :term:`filters` to apply after invoking the `element_func`.
:param list or str include:
Items to include when diving into "indexed" values.
See description above.
:param list or str exclude:
Items to exclude when diving into "indexed" values.
See description above.
:param int or None depth:
How deep to dive into nested structures, "indexed" or lists.
If `< 0`, no limit. If 0, stops completely.
:params args:
To be relayed to 'element_func'.
:params kwds:
To be relayed to 'element_func'.
"""
include = include and as_list(include)
exclude = exclude and as_list(exclude)
def is_included(elval, key, cdepth):
ok = True
if cdepth == 0 or isinstance(elval, dict):
ok &= not include or key in include
ok &= not exclude or key not in exclude
return ok
def upd_base_coords(elval, cdepth, base_coords, i):
if base_coords and not isinstance(elval, dict):
row, col = base_coords
try:
import pandas as pd
except ImportError:
if cdepth == 0:
row += i
elif cdepth == 1:
col += +i
else:
if isinstance(elval, pd.DataFrame):
col += i
elif isinstance(elval, pd.Series):
row += i
return row, col
def call_element_func(elval, cdepth, base_coords):
context_kwds = dtz.keyfilter(lambda k: k in XLocation._fields, lasso._asdict())
context_kwds["base_coords"] = base_coords
context = XLocation(**context_kwds)
try:
proced, res_lasso = element_func(
ranger, lasso, context, elval, *args, **kwds
)
except Exception as ex:
msg_args = (elval, context, ex)
raise ValueError("Value(%r) at %s: \n %s" % msg_args)
if proced:
if not isinstance(res_lasso, Lasso):
res_lasso = lasso._replace(values=res_lasso)
for call_spec in sub_call_specs:
res_lasso = ranger.make_call(res_lasso, *call_spec)
elval = res_lasso and res_lasso.values
return proced, elval
def dive_list(elval, cdepth, base_coords):
proced, elval = call_element_func(elval, cdepth, base_coords)
if not proced and isinstance(elval, list):
for i, v in enumerate(elval):
nbc = upd_base_coords(elval, cdepth, base_coords, i)
elval[i] = dive_indexed(v, cdepth + 1, nbc)
return elval
def dive_indexed(elval, cdepth, base_coords):
if cdepth != depth:
dived = False
try:
items = elval.items()
except:
pass # Just to avoid chained ex.
else:
for i, (k, v) in enumerate(items):
# Dict is not ordered, so cannot locate `base_coords`!
if is_included(elval, k, cdepth):
nbc = upd_base_coords(elval, cdepth, base_coords, i)
elval[k] = dive_indexed(v, cdepth + 1, nbc)
dived = True
if not dived:
elval = dive_list(elval, cdepth, base_coords)
return elval
sub_call_specs = [_parse.parse_call_spec(f) for f in filters]
values = dive_indexed(lasso.values, 0, lasso.st)
return lasso._replace(values=values)
def _recurse_element_func(ranger, lasso, context, elval):
proced = False
try:
if isinstance(elval, str):
lasso = ranger.do_lasso(elval, **context._asdict())
proced = True
except SyntaxError as ex:
msg = "Skipped non `xl-ref` value(%r) \n ++at %s \n ++while lassoing %r \n ++due to: %s"
msg_args = (elval, context, lasso.xl_ref, ex)
log.debug(msg, *msg_args)
except Exception as ex:
msg = "Lassoing `xl-ref` failed due to: %s"
raise ValueError(msg % ex)
return proced, lasso
[docs]def recursive_filter(ranger, lasso, filters=(), include=None, exclude=None, depth=-1):
"""
A :term:`element-wise-filter` that expand recursively any :term:`xl-ref` strings elements in :term:`capture-rect` values.
:param list filters:
Any :term:`filters` to apply after invoking the `element_func`.
:param list or str include:
Items to include when diving into "indexed" values.
See :func:`run_filter_elementwise()`.
:param list or str exclude:
Items to exclude when diving into "indexed" values.
See :func:`run_filter_elementwise()`.
:param int or None depth:
How deep to dive into nested structures, "indexed" or lists.
If `< 0`, no limit. If 0, stops completely.
See :func:`run_filter_elementwise()`.
"""
return run_filter_elementwise(
ranger, lasso, _recurse_element_func, filters, include, exclude, depth
)
ast_log_writer = LoggerWriter(logging.getLogger("%s.pyeval" % __name__), logging.INFO)
def _asteval_interpreter(symtable, *args, **kwds):
try:
interp = asteval.Interpreter(usersyms=symtable, **kwds)
except TypeError:
# Arg `usersym ` added in asteval-0.9.10 (Oct 2017)
interp = asteval.Interpreter(symtable, **kwds)
interp.symtable.update(symtable)
return interp
def _pyeval_element_func(ranger, lasso, context, elval, eval_all):
proced = False
if isinstance(elval, str):
expr = str(elval)
symtable = locals()
from .. import xleash
symtable.update({"xleash": xleash})
aeval = _asteval_interpreter(symtable, writer=ast_log_writer)
res = aeval.eval(expr)
if aeval.error:
error = aeval.error[0].get_error()
if eval_all:
msg = "%i errors while py-evaluating %r: %s: %s"
msg_args = (len(aeval.error), expr) + error
raise ValueError(msg % msg_args)
else:
msg = "Skipped py-evaluating value(%r) \n ++at %s \n ++while lassoing %r \n ++due to %i errors: %s: %s"
msg_args = (elval, context, lasso.xl_ref, len(aeval.error)) + error
log.warning(msg, *msg_args)
else:
if isinstance(res, Lasso):
lasso = res._replace(opts=lasso.opts) if res.opts is None else res
else:
lasso = lasso._replace(values=res)
proced = True
return proced, lasso
[docs]def pyeval_filter(
ranger, lasso, filters=(), eval_all=False, include=None, exclude=None, depth=-1
):
"""
A :term:`element-wise-filter` that uses :mod:`asteval` to evaluate string values as python expressions.
The `expr` fecthed from `term:`capturing` may access read-write
all :func:`locals()` of this method (ie: `ranger`, `lasso`),
the :mod:`numpy` funcs, and the :mod:`pandalone.xleash` module under
the `xleash` variable.
The `expr` may return either:
- the processed values, or
- an instance of the :class:`Lasso`, in which case only its `opt`
field is checked and replaced with original if missing.
So better use :func:`namedtuple._replace()` on the current `lasso`
which exists in the expr's namespace.
:param bool eval_all:
If `True` raise on 1st error and stop diving cells.
Defaults to `False`.
:param list filters:
Any :term:`filters` to apply after invoking the `element_func`.
:param list or str include:
Items to include when diving into "indexed" values.
See :func:`run_filter_elementwise()`.
:param list or str exclude:
Items to exclude when diving into "indexed" values.
See :func:`run_filter_elementwise()`.
:param int or None depth:
How deep to dive into nested structures, "indexed" or lists.
If `< 0`, no limit. If 0, stops completely.
See :func:`run_filter_elementwise()`.
Example::
>>> from pandalone import xleash
>>> expr = '''
... res = array([[0.5, 0.3, 0.1, 0.1]])
... res * res.T
... '''
>>> lasso = Lasso(values=expr, opts={})
>>> with xleash.SheetsFactory() as sf:
... ranger = xleash.Ranger(sf)
... pyeval_filter(ranger, lasso).values
array([[0.25, 0.15, 0.05, 0.05],
[0.15, 0.09, 0.03, 0.03],
[0.05, 0.03, 0.01, 0.01],
[0.05, 0.03, 0.01, 0.01]])
"""
return run_filter_elementwise(
ranger,
lasso,
_pyeval_element_func,
filters,
include,
exclude,
depth,
eval_all=eval_all,
)
[docs]def py_filter(ranger, lasso, expr):
"""
A :term:`bulk-filter` that passes values through a python-expression using :mod:`asteval` library.
The `expr` may access read-write all :func:`locals()` of this method
(`ranger`, `lasso`), the :mod:`numpy` funcs, and the :mod:`pandalone.xleash`
module under the `xleash` variable.
The `expr` may return either:
- the processed values, or
- an instance of the :class:`Lasso`, in which case only its `opt`
field is checked and replaced with original if missing.
So better use :func:`namedtuple._replace()` on the current `lasso`
which exists in the expr's namespace.
:param str expr:
The python-expression, which may comprise of multiple statements.
"""
symtable = locals()
from .. import xleash
symtable.update({"xleash": xleash})
aeval = _asteval_interpreter(symtable, writer=ast_log_writer)
res = aeval.eval(expr)
if aeval.error:
error = aeval.error[0].get_error()
msg = "%i errors while py-evaluating %r: %s: %s"
msg_args = (len(aeval.error), expr) + error
raise ValueError(msg % msg_args)
else:
if isinstance(res, Lasso):
lasso = res._replace(opts=lasso.opts) if res.opts is None else res
else:
lasso = lasso._replace(values=res)
return lasso
[docs]def install_default_filters(filters_dict):
"""
Updates the default available :term:`filters` used by :func:`lasso()` when constructing its internal :class:`Ranger`.
:param dict filters_dict:
The dictionary to update with the default filters.
"""
filters_dict.update(
{
"pipe": {"func": pipe_filter},
"pyeval": {"func": pyeval_filter},
"py": {"func": py_filter},
"recurse": {"func": recursive_filter},
"redim": {"func": redim_filter},
"numpy": {
"func": lambda ranger, lasso, *args, **kwds: lasso._replace(
values=np.array(lasso.values, *args, **kwds)
),
"desc": np.array.__doc__,
},
"dict": {
"func": lambda ranger, lasso, *args, **kwds: lasso._replace(
values=dict(lasso.values, *args, **kwds)
),
"desc": dict.__doc__,
},
"odict": {
"func": lambda ranger, lasso, *args, **kwds: lasso._replace(
values=OrderedDict(lasso.values, *args, **kwds)
),
"desc": OrderedDict.__doc__,
},
"sorted": {
"func": lambda ranger, lasso, *args, **kwds: lasso._replace(
values=sorted(lasso.values, *args, **kwds)
),
"desc": sorted.__doc__,
},
}
)