#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2014-2019European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl
"""
The algorithmic part of :term:`capturing`.
Prefer accessing the public members from the parent module.
"""
import logging
from string import ascii_uppercase
import numpy as np
from . import Coords, _parse
from ._parse import Cell
log = logging.getLogger(__name__)
try:
from xlrd import colname as xl_colname
# TODO: Try different backends providing `colname` function.
except ImportError:
log.warning("One of `xlrd`, `...` libraries is needed, will crash later!")
CHECK_CELLTYPE = False
"""When `True`, most coord-functions accept any 2-tuples."""
[docs]class EmptyCaptureException(Exception):
"""
Thrown when :term:`targeting` fails.
"""
_special_coord_symbols = {"^", "_", "."}
_primitive_dir_vectors = {
"L": Coords(0, -1),
"U": Coords(-1, 0),
"R": Coords(0, 1),
"D": Coords(1, 0),
}
[docs]def coords2Cell(row, col):
"""Make *A1* :class:`Cell` from *resolved* coords, with rudimentary error-checking.
Examples::
>>> coords2Cell(row=0, col=0)
Cell(row='1', col='A')
>>> coords2Cell(row=0, col=26)
Cell(row='1', col='AA')
>>> coords2Cell(row=10, col='.')
Cell(row='11', col='.')
>>> coords2Cell(row=-3, col=-2)
Traceback (most recent call last):
AssertionError: negative row!
"""
if row not in _special_coord_symbols:
assert row >= 0, "negative row!"
row = str(row + 1)
if col not in _special_coord_symbols:
assert col >= 0, "negative col!"
col = xl_colname(col)
return Cell(row=row, col=col)
[docs]def _row2num(coord):
"""
Resolves special coords or converts Excel 1-based rows to zero-based, reporting invalids.
:param str, int coord: excel-row coordinate or one of ``^_.``
:return: excel row number, >= 0
:rtype: int
Examples::
>>> row = _row2num('1')
>>> row
0
>>> row == _row2num(1)
True
Negatives (from bottom) are preserved::
>>> _row2num('-1')
-1
Fails ugly::
>>> _row2num('.')
Traceback (most recent call last):
ValueError: invalid literal for int() with base 10: '.'
"""
rcoord = int(coord)
if rcoord == 0:
msg = "Uncooked-coord cannot be zero!"
raise ValueError(msg.format(coord))
if rcoord > 0:
rcoord -= 1
return rcoord
[docs]def _col2num(coord):
"""
Resolves special coords or converts Excel A1 columns to a zero-based, reporting invalids.
:param str coord: excel-column coordinate or one of ``^_.``
:return: excel column number, >= 0
:rtype: int
Examples::
>>> col = _col2num('D')
>>> col
3
>>> _col2num('d') == col
True
>>> _col2num('AaZ')
727
>>> _col2num('10')
9
>>> _col2num(9)
8
Negatives (from left-end) are preserved::
>>> _col2num('AaZ')
727
Fails ugly::
>>> _col2num('%$')
Traceback (most recent call last):
ValueError: substring not found
>>> _col2num([])
Traceback (most recent call last):
TypeError: int() argument must be a string, a bytes-like object or
a number, not 'list'
"""
try:
rcoord = int(coord)
except ValueError:
rcoord = 0
for c in coord:
rcoord = rcoord * 26 + ascii_uppercase.rindex(c.upper()) + 1
rcoord -= 1
else:
if rcoord == 0:
msg = "Uncooked-coord cannot be zero!"
raise ValueError(msg.format(coord))
elif rcoord > 0:
rcoord -= 1
return rcoord
[docs]def _resolve_coord(cname, cfunc, coord, up_coord, dn_coord, base_coords=None):
"""
Translates special coords or converts Excel string 1-based rows/cols to zero-based, reporting invalids.
:param str cname:
the coord-name, one of 'row', 'column'
:param function cfunc:
the function to convert coord ``str --> int``
:param int, str coord:
the "A1" coord to translate
:param int up_coord:
the resolved *top* or *left* margin zero-based coordinate
:param int dn_coord:
the resolved *bottom* or *right* margin zero-based coordinate
:param int, None base_coords:
the resolved basis for dependent coord, if any
:return: the resolved coord or `None` if it were not a special coord.
Row examples::
>>> cname = 'row'
>>> r0 = _resolve_coord(cname, _row2num, '1', 1, 10)
>>> r0
0
>>> r0 == _resolve_coord(cname, _row2num, 1, 1, 10)
True
>>> _resolve_coord(cname, _row2num, '^', 1, 10)
1
>>> _resolve_coord(cname, _row2num, '_', 1, 10)
10
>>> _resolve_coord(cname, _row2num, '.', 1, 10, 13)
13
>>> _resolve_coord(cname, _row2num, '-3', 0, 10)
8
But notice when base-cell missing::
>>> _resolve_coord(cname, _row2num, '.', 0, 10, base_coords=None)
Traceback (most recent call last):
ValueError: Cannot resolve `relative-row` without `base-coord`!
Other ROW error-checks::
>>> _resolve_coord(cname, _row2num, '0', 0, 10)
Traceback (most recent call last):
ValueError: invalid row('0') due to: Uncooked-coord cannot be zero!
>>> _resolve_coord(cname, _row2num, 'a', 0, 10)
Traceback (most recent call last):
ValueError: invalid row('a') due to: invalid literal for int() with base 10: 'a'
>>> _resolve_coord(cname, _row2num, None, 0, 10)
Traceback (most recent call last):
ValueError: invalid row(None) due to:
int() argument must be a string,
a bytes-like object or a number, not 'NoneType'
Column examples::
>>> cname = 'column'
>>> _resolve_coord(cname, _col2num, 'A', 1, 10)
0
>>> _resolve_coord(cname, _col2num, 'DADA', 1, 10)
71084
>>> _resolve_coord(cname, _col2num, '.', 1, 10, 13)
13
>>> _resolve_coord(cname, _col2num, '-4', 0, 10)
7
And COLUMN error-checks::
>>> _resolve_coord(cname, _col2num, None, 0, 10)
Traceback (most recent call last):
ValueError: invalid column(None) due to: int() argument must be a string,
a bytes-like object or a number, not 'NoneType'
>>> _resolve_coord(cname, _col2num, 0, 0, 10)
Traceback (most recent call last):
ValueError: invalid column(0) due to: Uncooked-coord cannot be zero!
"""
try:
if coord in _special_coord_symbols:
special_dict = {"^": up_coord, "_": dn_coord}
if base_coords is not None:
special_dict["."] = base_coords
rcoord = special_dict[coord]
else:
rcoord = cfunc(coord)
# Resolve negatives as from the end.
if rcoord < 0:
rcoord = dn_coord + rcoord + 1
return rcoord
except Exception as ex:
if isinstance(ex, KeyError) and ex.args == (".",):
msg = "Cannot resolve `relative-{}` without `base-coord`!"
raise ValueError(msg.format(cname))
msg = "invalid {}({!r}) due to: {}"
raise ValueError(msg.format(cname, coord, ex)) from ex
[docs]def _resolve_cell(cell, up_coords, dn_coords, base_coords=None):
"""
Translates any special coords to absolute ones.
To get the margin_coords, use one of:
* :meth:`ABCSheet.get_margin_coords()`
* :func:`.io.backend.margin_coords_from_states_matrix()`
:param Cell cell:
The "A1" cell to translate its coords.
:param Coords up_coords:
the top-left resolved coords with full-cells
:param Coords dn_coords:
the bottom-right resolved coords with full-cells
:param Coords base_coords:
A resolved cell to base dependent coords (``.``).
:return: the resolved cell-coords
:rtype: Coords
Examples::
>>> up = Coords(1, 2)
>>> dn = Coords(10, 6)
>>> base = Coords(40, 50)
>>> _resolve_cell(Cell(col='B', row='5'), up, dn)
Coords(row=4, col=1)
>>> _resolve_cell(Cell('^', '^'), up, dn)
Coords(row=1, col=2)
>>> _resolve_cell(Cell('_', '_'), up, dn)
Coords(row=10, col=6)
>>> base == _resolve_cell(Cell('.', '.'), up, dn, base)
True
>>> _resolve_cell(Cell('-1', '-2'), up, dn)
Coords(row=10, col=5)
>>> _resolve_cell(Cell('A', 'B'), up, dn)
Traceback (most recent call last):
ValueError: invalid cell(Cell(row='A', col='B')) due to:
invalid row('A') due to: invalid literal for int() with base 10: 'A'
But notice when base-cell missing::
>>> _resolve_cell(Cell('1', '.'), up, dn)
Traceback (most recent call last):
ValueError: invalid cell(Cell(row='1', col='.')) due to:
Cannot resolve `relative-col` without `base-coord`!
"""
assert not CHECK_CELLTYPE or isinstance(cell, Cell), cell
assert not CHECK_CELLTYPE or isinstance(up_coords, Coords), up_coords
assert not CHECK_CELLTYPE or isinstance(dn_coords, Coords), dn_coords
try:
if base_coords is None:
base_row = base_col = None
else:
base_row, base_col = base_coords
row = _resolve_coord(
"row", _row2num, cell.row, up_coords[0], dn_coords[0], base_row
)
col = _resolve_coord(
"col", _col2num, cell.col, up_coords[1], dn_coords[1], base_col
)
return Coords(row, col)
except Exception as ex:
msg = "invalid cell(%r) due to: %s\n margins(%r)\n base_coords(%r)"
log.debug(msg, cell, ex, (up_coords, dn_coords), base_coords)
msg = "invalid cell(%r) due to: %s"
raise ValueError(msg % (cell, ex)) from ex
_mov_vector_slices = {
# VECTO_SLICE REVERSE COORD_INDEX
"L": (1, -1, lambda r, c: (r, slice(None, c + 1))),
"U": (0, -1, lambda r, c: (slice(None, r + 1), c)),
"R": (1, 1, lambda r, c: (r, slice(c, None))),
"D": (0, 1, lambda r, c: (slice(r, None), c)),
}
[docs]def _target_opposite(states_matrix, dn_coords, land, moves, edge_name=""):
"""
Follow moves from `land` and stop on the 1st full-cell.
:param np.ndarray states_matrix:
A 2D-array with `False` wherever cell are blank or empty.
Use :meth:`ABCSheet.get_states_matrix()` to derrive it.
:param Coords dn_coords:
the bottom-right for the top-left of full-cells
:param Coords land:
the landing-cell
:param str moves: MUST not be empty
:return: the identified target-cell's coordinates
:rtype: Coords
Examples::
>>> states_matrix = np.array([
... [0, 0, 0, 0, 0, 0],
... [0, 0, 0, 0, 0, 0],
... [0, 0, 0, 1, 1, 1],
... [0, 0, 1, 0, 0, 1],
... [0, 0, 1, 1, 1, 1]
... ])
>>> args = (states_matrix, Coords(4, 5))
>>> _target_opposite(*(args + (Coords(0, 0), 'DR')))
Coords(row=3, col=2)
>>> _target_opposite(*(args + (Coords(0, 0), 'RD')))
Coords(row=2, col=3)
It fails if a non-empty target-cell cannot be found, or
it ends-up beyond bounds::
>>> _target_opposite(*(args + (Coords(0, 0), 'D')))
Traceback (most recent call last):
pandalone.xleash._capture.EmptyCaptureException: No opposite-target found
while moving(D) from landing-Coords(row=0, col=0)!
>>> _target_opposite(*(args + (Coords(0, 0), 'UR')))
Traceback (most recent call last):
pandalone.xleash._capture.EmptyCaptureException: No opposite-target found
while moving(UR) from landing-Coords(row=0, col=0)!
But notice that the landing-cell maybe outside of bounds::
>>> _target_opposite(*(args + (Coords(3, 10), 'L')))
Coords(row=3, col=5)
"""
assert not CHECK_CELLTYPE or isinstance(dn_coords, Coords), dn_coords
assert not CHECK_CELLTYPE or isinstance(land, Coords), land
up_coords = np.array([0, 0])
target = np.array(land)
if land[0] > dn_coords[0] and "U" in moves:
target[0] = dn_coords[0]
if land[1] > dn_coords[1] and "L" in moves:
target[1] = dn_coords[1]
# if states_matrix[target].all():
# return Coords(*target)
imoves = iter(moves)
mov1 = next(imoves)
mov2 = next(imoves, None)
dv2 = mov2 and _primitive_dir_vectors[mov2]
# Limit negative coords, since they are valid indices.
while (up_coords <= target).all():
try:
states_vect, coord_indx, is_reverse = _extract_states_vector(
states_matrix, dn_coords, target, mov1
)
except IndexError:
break
else:
if states_vect.any():
indices = states_vect.nonzero()[0]
target[coord_indx] += is_reverse * indices.min()
return Coords(*target)
if not dv2:
break
target += dv2
msg = "No opposite-target found while moving({}) from {}landing-{}!"
raise EmptyCaptureException(msg.format(moves, edge_name, land))
[docs]def _target_same_vector(states_matrix, dn_coords, land, mov):
"""
:param np.ndarray states_matrix:
A 2D-array with `False` wherever cell are blank or empty.
Use :meth:`ABCSheet.get_states_matrix()` to derrive it.
:param Coords dn_coords:
the bottom-right for the top-left of full-cells
:param Coords land:
The landing-cell, which MUST be full!
"""
states_vect, coord_indx, is_reverse = _extract_states_vector(
states_matrix, dn_coords, land, mov
)
if states_vect.all():
same_len = len(states_vect) - 1
else:
indices = np.diff(states_vect.astype(int)).nonzero()[0]
same_len = indices.min()
target_coord = land[coord_indx] + is_reverse * same_len
return target_coord, coord_indx
[docs]def _target_same(states_matrix, dn_coords, land, moves, edge_name=""):
"""
Scan term:`exterior` row and column on specified `moves` and stop on the last full-cell.
:param np.ndarray states_matrix:
A 2D-array with `False` wherever cell are blank or empty.
Use :meth:`ABCSheet.get_states_matrix()` to derrive it.
:param Coords dn_coords:
the bottom-right for the top-left of full-cells
:param Coords land:
the landing-cell which MUST be within bounds
:param moves: which MUST not be empty
:return: the identified target-cell's coordinates
:rtype: Coords
Examples::
>>> states_matrix = np.array([
... [0, 0, 0, 0, 0, 0],
... [0, 0, 0, 0, 0, 0],
... [0, 0, 0, 1, 1, 1],
... [0, 0, 1, 0, 0, 1],
... [0, 0, 1, 1, 1, 1]
... ])
>>> args = (states_matrix, Coords(4, 5))
>>> _target_same(*(args + (Coords(4, 5), 'U')))
Coords(row=2, col=5)
>>> _target_same(*(args + (Coords(4, 5), 'L')))
Coords(row=4, col=2)
>>> _target_same(*(args + (Coords(4, 5), 'UL', )))
Coords(row=2, col=2)
It fails if landing is empty or beyond bounds::
>>> _target_same(*(args + (Coords(2, 2), 'DR')))
Traceback (most recent call last):
pandalone.xleash._capture.EmptyCaptureException: No same-target found
while moving(DR) from landing-Coords(row=2, col=2)!
>>> _target_same(*(args + (Coords(10, 3), 'U')))
Traceback (most recent call last):
pandalone.xleash._capture.EmptyCaptureException: No same-target found
while moving(U) from landing-Coords(row=10, col=3)!
"""
assert not CHECK_CELLTYPE or isinstance(dn_coords, Coords), dn_coords
assert not CHECK_CELLTYPE or isinstance(land, Coords), land
target = np.asarray(land)
if (target <= dn_coords).all() and states_matrix[land]:
for mov in moves:
coord, indx = _target_same_vector(
states_matrix, dn_coords, np.asarray(land), mov
)
target[indx] = coord
return Coords(*target)
msg = "No same-target found while moving({}) from {}landing-{}!"
raise EmptyCaptureException(msg.format(moves, edge_name, land))
[docs]def _sort_rect(r1, r2):
"""
Sorts rect-vertices in a 2D-array (with vertices in rows).
Example::
>>> _sort_rect((5, 3), (4, 6))
array([[4, 3],
[5, 6]])
"""
rect = np.array([r1, r2], dtype=int)
rect.sort(0)
return rect
[docs]def _expand_rect(states_matrix, r1, r2, exp_moves):
"""
Applies the :term:`expansion-moves` based on the `states_matrix`.
:param state:
:param Coords r1:
any vertice of the rect to expand
:param Coords r2:
any vertice of the rect to expand
:param np.ndarray states_matrix:
A 2D-array with `False` wherever cell are blank or empty.
Use :meth:`ABCSheet.get_states_matrix()` to derrive it.
:param exp_moves:
Just the parsed string, and not `None`.
:return: a sorted rect top-left/bottom-right
Examples::
>>> states_matrix = np.array([
... #0 1 2 3 4 5
... [0, 0, 0, 0, 0, 0], #0
... [0, 0, 1, 1, 1, 0], #1
... [0, 1, 0, 0, 1, 0], #2
... [0, 1, 1, 1, 1, 0], #3
... [0, 0, 0, 0, 0, 1], #4
... ], dtype=bool)
>>> r1, r2 = (Coords(2, 1), Coords(2, 1))
>>> _expand_rect(states_matrix, r1, r2, 'U')
(Coords(row=2, col=1), Coords(row=2, col=1))
>>> r1, r2 = (Coords(3, 1), Coords(2, 1))
>>> _expand_rect(states_matrix, r1, r2, 'R')
(Coords(row=2, col=1), Coords(row=3, col=4))
>>> r1, r2 = (Coords(2, 1), Coords(6, 1))
>>> _expand_rect(states_matrix, r1, r2, 'r')
(Coords(row=2, col=1), Coords(row=6, col=5))
>>> r1, r2 = (Coords(2, 3), Coords(2, 3))
>>> _expand_rect(states_matrix, r1, r2, 'LURD')
(Coords(row=1, col=1), Coords(row=3, col=4))
"""
assert not CHECK_CELLTYPE or isinstance(r1, Coords), r1
assert not CHECK_CELLTYPE or isinstance(r2, Coords), r2
exp_moves = _parse.parse_expansion_moves(exp_moves)
nd_offsets = np.array([0, 1, 0, 1])
coord_offsets = {
"L": np.array([0, 0, -1, 0]),
"R": np.array([0, 0, 0, 1]),
"U": np.array([-1, 0, 0, 0]),
"D": np.array([0, 1, 0, 0]),
}
coord_indices = {
"L": [0, 1, 2, 2],
"R": [0, 1, 3, 3],
"U": [0, 0, 2, 3],
"D": [1, 1, 2, 3],
}
# Sort rect's vertices top-left/bottom-right.
#
rect = _sort_rect(r1, r2)
# ``[r1, r2, c1, c2]`` to use slices, below
rect = rect.T.flatten()
for dirs_repeated in exp_moves:
for dirs in dirs_repeated:
orig_rect = rect
for d in dirs:
exp_rect = rect + coord_offsets[d]
exp_vect_i = exp_rect[coord_indices[d]] + nd_offsets
exp_vect_v = states_matrix[
slice(*exp_vect_i[:2]), slice(*exp_vect_i[2:])
]
if exp_vect_v.any():
rect = exp_rect
if (rect == orig_rect).all():
break
return Coords(*rect[[0, 2]]), Coords(*rect[[1, 3]])
[docs]def resolve_capture_rect(
states_matrix,
up_dn_margins,
st_edge,
nd_edge=None,
exp_moves=None,
base_coords=None,
):
"""
Performs :term:`targeting`, :term:`capturing` and :term:`expansions` based on the :term:`states-matrix`.
To get the margin_coords, use one of:
* :meth:`ABCSheet.get_margin_coords()`
* :func:`.io.backend.margin_coords_from_states_matrix()`
Its results can be fed into :func:`read_capture_values()`.
:param np.ndarray states_matrix:
A 2D-array with `False` wherever cell are blank or empty.
Use :meth:`ABCSheet.get_states_matrix()` to derrive it.
:param (Coords, Coords) up_dn_margins:
the top-left/bottom-right coords with full-cells
:param Edge st_edge: "uncooked" as matched by regex
:param Edge nd_edge: "uncooked" as matched by regex
:param list or none exp_moves:
Just the parsed string, and not `None`.
:param Coords base_coords:
The base for a :term:`dependent` :term:`1st` edge.
:return: a ``(Coords, Coords)`` with the 1st and 2nd :term:`capture-cell`
ordered from top-left --> bottom-right.
:rtype: tuple
:raises EmptyCaptureException:
When :term:`targeting` failed, and no :term:`target` cell identified.
Examples::
>>> from pandalone.xleash import Edge, margin_coords_from_states_matrix
>>> states_matrix = np.array([
... [0, 0, 0, 0, 0, 0],
... [0, 0, 0, 0, 0, 0],
... [0, 0, 0, 1, 1, 1],
... [0, 0, 1, 0, 0, 1],
... [0, 0, 1, 1, 1, 1]
... ], dtype=bool)
>>> up, dn = margin_coords_from_states_matrix(states_matrix)
>>> st_edge = Edge(Cell('1', 'A'), 'DR')
>>> nd_edge = Edge(Cell('.', '.'), 'DR')
>>> resolve_capture_rect(states_matrix, (up, dn), st_edge, nd_edge)
(Coords(row=3, col=2), Coords(row=4, col=2))
Using dependenent coordinates for the 2nd edge::
>>> st_edge = Edge(Cell('_', '_'), None)
>>> nd_edge = Edge(Cell('.', '.'), 'UL')
>>> rect = resolve_capture_rect(states_matrix, (up, dn), st_edge, nd_edge)
>>> rect
(Coords(row=2, col=2), Coords(row=4, col=5))
Using sheet's margins::
>>> st_edge = Edge(Cell('^', '_'), None)
>>> nd_edge = Edge(Cell('_', '^'), None)
>>> rect == resolve_capture_rect(states_matrix, (up, dn), st_edge, nd_edge)
True
Walking backwards::
>>> st_edge = Edge(Cell('^', '_'), 'L') # Landing is full, so 'L' ignored.
>>> nd_edge = Edge(Cell('_', '_'), 'L', '+') # '+' or would also stop.
>>> rect == resolve_capture_rect(states_matrix, (up, dn), st_edge, nd_edge)
True
"""
up_margin, dn_margin = up_dn_margins
assert not CHECK_CELLTYPE or isinstance(up_margin, Coords), up_margin
assert not CHECK_CELLTYPE or isinstance(dn_margin, Coords), dn_margin
st = _resolve_cell(st_edge.land, up_margin, dn_margin, base_coords)
try:
st_state = states_matrix[st]
except IndexError:
st_state = False
if st_edge.mov is not None:
if st_state:
if st_edge.mod == "+":
st = _target_same(states_matrix, dn_margin, st, st_edge.mov, "1st-")
else:
st = _target_opposite(states_matrix, dn_margin, st, st_edge.mov, "1st-")
if nd_edge is None:
nd = None
else:
nd = _resolve_cell(nd_edge.land, up_margin, dn_margin, st)
if nd_edge.mov is not None:
try:
nd_state = states_matrix[nd]
except IndexError:
nd_state = False
mov = nd_edge.mov
if nd_state:
if (
nd_edge.mod == "+"
or nd_edge.land == Cell(".", ".")
and nd_edge.mod != "?"
):
nd = _target_same(states_matrix, dn_margin, nd, mov, "2nd-")
else:
nd = _target_opposite(states_matrix, dn_margin, nd, mov, "2nd-")
if exp_moves:
st, nd = _expand_rect(states_matrix, st, nd or st, exp_moves)
else:
if nd is not None:
rect = _sort_rect(st, nd)
st, nd = tuple(Coords(*c) for c in rect)
return st, nd