Source code for varray

#
##
##  This file is part of pyFormex 2.0  (Mon Sep 14 12:29:05 CEST 2020)
##  pyFormex is a tool for generating, manipulating and transforming 3D
##  geometrical models by sequences of mathematical operations.
##  Home page: http://pyformex.org
##  Project page:  http://savannah.nongnu.org/projects/pyformex/
##  Copyright 2004-2020 (C) Benedict Verhegghe (benedict.verhegghe@ugent.be)
##  Distributed under the GNU General Public License version 3 or later.
##
##  This program is free software: you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation, either version 3 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU General Public License
##  along with this program.  If not, see http://www.gnu.org/licenses/.
##

"""Working with variable width tables.

Mesh type geometries use tables of integer data to store the connectivity
between different geometric entities. The basic connectivity table in a
Mesh with elements of the same type is a table of constant width: the
number of nodes connected to each element is constant.
However, the inverse table (the elements connected to each node) does not
have a constant width.

Tables of constant width can conveniently be stored as a 2D array, allowing
fast indexing by row and/or column number. A variable width table can be
stored (using arrays) in two ways:

- as a 2D array, with a width equal to the maximal row length.
  Unused positions in the row are then filled with an invalid value (-1).
- as a 1D array, storing a simple concatenation of the rows.
  An additional array then stores the position in that array of the first
  element of each row.

In pyFormex, variable width tables were initially stored as 2D arrays:
a remnant of the author's past FORTRAN experience. With a growing
professional use of pyFormex involving ever larger models, it became clear
that there was a large memory and speed penalty related to the use of
2D arrays with lots of unused entries.
This is illustrated in the following table, obtained on the
inversion of a connectivity table of 10000 rows and 25 columns.
The table shows the memory size of the inverse table, the time needed to
compute it, and the time to compute both tables. The latter involves an
extra conversion of the stored array to the other data type.

====================  ===============  ==============  ===============
Stored as:             2D (ndarray)     1D (Varray)     1D (Varray)
Rows are sorted:       yes              yes             no
====================  ===============  ==============  ===============
Memory size                450000         250000        250000
Time to create table       128 ms         49 ms         25ms
Time to create both        169 ms         82 ms         57ms
====================  ===============  ==============  ===============

The memory and speed gains of using the Varray are important.
The 2D array can even be faster generated by first creating the
1D array, and then converting that to 2D.
Not sorting the entries in the Varray provides a further gain.
The Varray class defined below therefore does not sort the rows
by default, but provides methods to sort them when needed.
"""

import numpy as np

from pyformex import arraytools as at


[docs]class Varray(object):

    """A variable width 2D integer array

    This class provides an efficient way to store tables of
    nonnegative integers when the rows of the table may have
    different length.

    For large tables this may allow an important memory saving
    compared to a rectangular array where the non-existent entries
    are filled by some special value.
    Data in the Varray are stored as a single 1D array,
    containing the concatenation of all rows.
    An index is kept with the start position of each row in the 1D array.

    Parameters
    ----------
    data:
        Data to initialize to a new Varray object. This can either of:

        - another Varray instance: a shallow copy of the Varray is created.

        - a list of lists of integers. Each item in the list contains
          one row of the table.

        - a 2D ndarray of integer type. The nonnegative numbers on each row
          constitute the data for that row.

        - a 1D array or list of integers, containing the concatenation of
          the rows. The second argument `ind` specifies the indices of the
          first element of each row.

        - a 1D array or list of integers, containing the concatenation of
          the rows obtained by prepending each row with the row length.
          The caller should make sure these 1D data are consistent.

    ind: 1-dim int :term:`array_like`, optional
        This is only used when `data` is a pure concatenation of all rows.
        It holds the position in `data` of the first element of each row.
        Its length is equal to the number of rows (`nrows`) or `nrows+1`.
        It is a non-decreasing series of integer values, starting with 0.
        If it has ``nrows+1`` entries, the last value is equal to the total
        number of elements in `data`. This last value may be omitted,
        and will then be added automatically.
        Note that two subsequent elements may be equal, corresponding with
        an empty row.


    **Attributes**

    Attributes
    ----------
    nrows: int
        The number of rows in the table
    width: int
        The length of the longest row in the table
    size: int
        The total number of entries in the table
    shape: tuple of two ints
        The combined (``nrows``,``width``) values.

    Examples
    --------
    Create a Varray is by default printed in user-friendly format:

    >>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
    >>> Va
    Varray([[0], [1, 2], [0, 2, 4], [0, 2]])

    The Varray prints in a user-friendly format:

    >>> print(Va)
    Varray (4,3)
      [0]
      [1 2]
      [0 2 4]
      [0 2]
    <BLANKLINE>

    Other initialization methods resulting in the same Varray:

    >>> Vb = Varray(Va)
    >>> print(str(Vb) == str(Va))
    True
    >>> Vb = Varray(np.array([[-1,-1,0],[-1,1,2],[0,2,4],[-1,0,2]]))
    >>> print(str(Vb) == str(Va))
    True
    >>> Vc = Varray([0,1,2,0,2,4,0,2], at.cumsum0([1,2,3,2]))
    >>> print(str(Vc) == str(Va))
    True
    >>> Vd = Varray([1,0, 2,1,2, 3,0,2,4, 2,0,2])
    >>> print(str(Vd) == str(Va))
    True

    Show info about the Varray

    >>> print(Va.nrows, Va.width, Va.shape)
    4 3 (4, 3)
    >>> print(Va.size, Va.lengths)
    8 [1 2 3 2]

    Indexing: The data for any row can be obtained by simple indexing:

    >>> print(Va[1])
    [1 2]

    This is equivalent with

    >>> print(Va.row(1))
    [1 2]

    >>> print(Va.row(-1))
    [0 2]

    Change elements:

    >>> Va[1][0] = 3
    >>> print(Va[1])
    [3 2]

    Full row can be changed with matching length:

    >>> Va[1] = [1, 2]
    >>> print(Va[1])
    [1 2]

    Negative indices are allowed:

    Extracted columns are filled with -1 values where needed

    >>> print(Va.col(1))
    [-1  2  2  2]

    Select takes multiple rows using indices or bool:

    >>> print(Va.select([1,3]))
    Varray (2,2)
      [1 2]
      [0 2]
    <BLANKLINE>
    >>> print(Va.select(Va.lengths==2))
    Varray (2,2)
      [1 2]
      [0 2]
    <BLANKLINE>

    Iterator: A Varray provides its own iterator:

    >>> for row in Va:
    ...     print(row)
    [0]
    [1 2]
    [0 2 4]
    [0 2]

    >>> print(Varray())
    Varray (0,0)
    <BLANKLINE>

    >>> L,R = Va.sameLength()
    >>> print(L)
    [1 2 3]
    >>> print(R)
    [array([0]), array([1, 3]), array([2])]
    >>> for a in Va.split():
    ...     print(a)
    [[0]]
    [[1 2]
     [0 2]]
    [[0 2 4]]

    """

    def __init__(self, data=[], ind=None):
        """Initialize the Varray. See the class docstring."""

        # If data is a Varray, just use its data
        if isinstance(data, Varray):
            self._replace_data(data)
            return

        # Allow for empty Varray
        if len(data) <= 0:
            data = np.array([], dtype=at.Int)

        # If data is an array, convert to list of lists
        try:
            data = at.checkArray(data, kind='i', ndim=2)
            data = [row[row >= 0] for row in data]
        except Exception:
            pass

        # If data is a list of lists, concatenate and create index
        try:
            # construct row length array
            rowlen = [len(row) for row in data]
            ind = at.cumsum0(rowlen)
            data = np.concatenate(data).astype(at.Int)
        except Exception:
            pass

        # data should now be 1D array
        # ind is also 1D array, unless initialized from inlined length data
        try:
            data = at.checkArray(data, kind='i', ndim=1)
            if ind is None:
                # extract row lengths from data
                i = 0
                size = len(data)
                rowlen = []
                while i < size:
                    rowlen.append(data[i])
                    i += data[i] + 1
                # create indices and remove row lengths from data
                ind = at.cumsum0(rowlen)
                data = np.delete(data, ind[:-1] + np.arange(len(rowlen)))

            ind = at.checkArray(ind, kind='i', ndim=1)
            ind.sort()
            if ind[0] != 0 or ind[-1] > len(data):
                raise ValueError
            if ind[-1] != len(data):
                ind = np.concatenate([ind, [len(data)]])
        except Exception:
            raise ValueError("Invalid input data for Varray")

        # Store the data
        self.data = data
        self.ind = ind
        # We also store the width because it is often needed and
        # may be expensive to compute
        self.width = max(self.lengths) if len(self.lengths) > 0 else 0
        # And the current row, for use in iterators
        self._row = 0


    def _replace_data(self, var):
        """Replace the current data with data from another Varray"""
        if not isinstance(var, Varray):
            raise ValueError("Expected a Varray as argument")
        self.data = var.data
        self.ind = var.ind
        self.width = var.width
        self._row = 0


[docs]    def copy(self):
        """Return a deep copy of the Varray"""
        return Varray(self.data.copy(), self.ind.copy())


    # Attributes computed ad hoc, because cheap(er)


    @property
    def lengths(self):
        """Return the length of all rows of the Varray"""
        return self.ind[1:] - self.ind[:-1]


    @property
    def nrows(self):
        """Return the number of rows in the Varray"""
        return len(self.ind) - 1


    @property
    def size(self):
        """Return the total number of elements in the Varray"""
        return self.ind[-1]


    @property
    def shape(self):
        """Return a tuple with the number of rows and maximum row length"""
        return (self.nrows, self.width)


[docs]    def length(self, i):
        """Return the length of row i"""
        return self.ind[i + 1] - self.ind[i]


    def __getitem__(self, i):
        """Return the data for the row i.

        Parameters
        ----------
        i: int
            The index of the row to return.

        Returns
        -------
        1-dim int array
            An array with the values of row i

        Examples
        --------
        >>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
        >>> print(Va[1])
        [1 2]

        """
        if not at.isInt(i):
            raise ValueError("Varray index should be an single int")
        if i < 0:
            i += self.nrows
        return self.data[self.ind[i]:self.ind[i + 1]]


    def __setitem__(self, i, data):
        """Set the data for the row i.

        Parameters
        ----------
        i: int
            The index of the row to change.
        data: int or int :term:`array_like`
            Data to replace the row i.
            If a single int, all items in the row are set to this value.
            If an array, it should match the row length.

        Examples
        --------
        >>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
        >>> Va[1] = 0
        >>> Va[2] = [1,3,5]
        >>> Va[3][1] = 1
        >>> print(Va)
        Varray (4,3)
          [0]
          [0 0]
          [1 3 5]
          [0 1]
        <BLANKLINE>

        """
        if not at.isInt(i):
            raise ValueError("Varray index should be an single int")
        if i < 0:
            i += self.nrows
        self.data[self.ind[i]:self.ind[i + 1]] = data


[docs]    def row(self, i):
        """Return the data for row i

        This returns self[i].
        """
        return self[i]


[docs]    def setRow(self, i, data):
        """Replace the data of row i

        This is equivalent to self[i] = data.
        """
        self[i] = data


[docs]    def col(self, i):
        """Return the data for column i

        This always returns a list of length nrows.
        For rows where the column index i is missing, a value -1 is returned.
        """
        return np.array([r[i] if i in range(-len(r), len(r)) else -1
                         for r in self])


[docs]    def select(self, sel):
        """Select some rows from the Varray.

        Parameters
        ----------
        sel: iterable of ints or bools
            Specifies the row(s) to be selected.
            If type is int, the values are the row numbers.
            If type is bool, the length of the iterable should be
            exactly ``self.nrows``; the positions where the value is True are
            the rows to be returned.

        Returns
        -------
        Varray object
            A Varray with only the selected rows.

        Examples
        --------
        >>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
        >>> Va.select((1,3))
        Varray([[1, 2], [0, 2]])
        >>> Va.select((False,True,False,True))
        Varray([[1, 2], [0, 2]])

        """
        sel = np.asarray(sel)   # this is important, because Python bool isInt
        if len(sel) > 0 and not at.isInt(sel[0]):
            sel = np.where(sel)[0]
        return Varray([self[j] for j in sel])


    def __iter__(self):
        """Return an iterator for the Varray"""
        self._row = 0
        return self


    def __next__(self):
        """Return the next row of the Varray"""
        if self._row >= self.nrows:
            raise StopIteration
        row = self[self._row]
        self._row += 1
        return row


[docs]    def index(self, sel):
        """Convert a selector to an index.

        Parameters
        ----------
        sel: iterable of ints or bools
            Specifies the elements of the Varray to be selected.
            If type is int, the values are the index numbers in the
            flat array. If type is bool, the length of the iterable
            should be exactly ``self.size``; the positions where the
            value is True will be returned.

        Returns
        -------
        int array
            The selected element numbers.

        Examples
        --------
        >>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
        >>> Va.index((1,3,5,7))
        array([1, 3, 5, 7])
        >>> Va.index((False,True,False,True,False,True,False,True))
        array([1, 3, 5, 7])

        """
        try:
            sel = at.checkArray(sel, shape=(self.size,), kind='b')
            sel = np.where(sel)[0]
        except ValueError:
            sel = at.checkArray(sel, kind='i')
        return sel


[docs]    def rowindex(self, sel):
        """Return the rowindex for the elements flagged by selector sel.

        sel is either a list of element numbers or a bool array with
        length self.size
        """
        sel = self.index(sel)
        return self.ind.searchsorted(sel, side='right') - 1


[docs]    def colindex(self, sel):
        """Return the column index for the elements flagged by selector sel.

        sel is either a list of element numbers or a bool array with
        length self.size
        """
        sel = self.index(sel)
        ri = self.rowindex(sel)
        return sel - self.ind[ri]


[docs]    def where(self, sel):
        """Return row and column index of the selected elements

        sel is either a list of element numbers or a bool array with
        length self.size

        Returns a 2D array where the first column is the row index
        and the second column the corresponding column index of an
        element selected by sel
        """
        return np.column_stack([self.rowindex(sel), self.colindex(sel)])


[docs]    def index1d(self, i, j):
        """Return the sequential index for the element with 2D index i,j"""
        if j >= 0 and j < self.length(i):
            return self.ind[i] + j
        else:
            raise IndexError("Index out of bounds")


[docs]    def sorted(self):
        """Returns a sorted Varray.

        Returns a Varray with the same entries but where each
        row is sorted.

        This returns a copy of the data, and leaves the original
        unchanged.

        See also :meth:`sort` for sorting the rows inplace.
        """
        return Varray([sorted(row) for row in self])


[docs]    def removeFlat(self, ind):
        """Remove the nelement with flat index i

        Parameters
        ----------
        ind: int or int :term:`array_like`
            Index in the flat data of the element(s) to remove.

        Returns
        -------
        Varray
            A Varray with the element(s) ind removed.

        Examples
        --------
        >>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
        >>> Va.removeFlat(3)
        Varray([[0], [1, 2], [2, 4], [0, 2]])
        >>> Va.removeFlat([0,2,7])
        Varray([[], [1], [0, 2, 4], [0]])
        """
        srt = np.unique(ind)
        data = self.data[at.complement(srt, len(self.data))]
        ind = self.ind.copy()
        for i in srt[::-1]:
            ind[ind>i] -= 1
        return Varray(data, ind)


[docs]    def sort(self):
        """Sort the Varray inplace.

        Sorting a Varray sorts the elements in each row.
        The sorting is done inplace.

        See also :meth:`sorted` for sorting the rows without
        changing the original.

        Examples
        --------
        >>> va = Varray([[0],[2,1],[4,0,2],[0,2]])
        >>> va.sort()
        >>> print(va)
        Varray (4,3)
          [0]
          [1 2]
          [0 2 4]
          [0 2]
        <BLANKLINE>
        """
        for row in self:
            row.sort()


[docs]    def toArray(self):
        """Convert the Varray to a 2D array.

        Returns a 2D array with shape (self.nrows,self.width), containing
        the row data of the Varray.
        Rows which are shorter than width are padded at the start with
        values -1.
        """
        a = -np.ones((self.nrows, self.width), dtype=at.Int)
        for i, r in enumerate(self):
            if len(r) > 0:
                a[i, -len(r):] = r
        return a


[docs]    def sameLength(self):
        """Groups the rows according to their length.

        Returns a tuple of two lists (lengths,rows):

        - lengths: the sorted unique row lengths,
        - rows: the indices of the rows having the corresponding length.
        """
        lens = self.lengths
        ulens = np.unique(lens)
        return ulens, [np.where(lens == l)[0] for l in ulens]


[docs]    def split(self):
        """Split the Varray into 2D arrays.

        Returns a list of 2D arrays with the same number
        of columns and the indices in the original Varray.
        """
        return [self.select(ind).toArray() for ind in self.sameLength()[1]]


[docs]    def toList(self):
        """Convert the Varray to a nested list.

        Returns a list of lists of integers.
        """
        return [r.tolist() for r in self]


[docs]    def inverse(self, sort=True, expand=False):
        """Return the inverse of a Varray.

        The inverse of a Varray is again a Varray. Values k on a row i will
        become values i on row k. The number of data in both Varrays is thus
        the same.

        The inverse of the inverse is equal to the original. Two Varrays are
        equal if they have the same number of rows and all rows contain the
        same numbers, independent of their order.

        Parameters
        ----------
        sort: bool
            If True (default), the values on each row of the returned index
            are sorted.
            The default (False) will leave the values in the order obtained
            by the algorithm, which depends on Python/numpy sorting, and
            usually turns out to be sorted as well.

        Returns
        -------
        :class:`Varray`
        The inverse index, as a Varray (default).
        Each row ``i`` of the inverse contains the numbers of the rows of the
        input in which a value ``i`` appeared. The rows are sorted by default.

        Examples
        --------
        >>> a = Varray([[0,1],[2,0],[1,2],[4]])
        >>> b = a.inverse()
        >>> c = b.inverse()
        >>> print(a,b,c)
        Varray (4,2)
          [0 1]
          [2 0]
          [1 2]
          [4]
         Varray (5,2)
          [0 1]
          [0 2]
          [1 2]
          []
          [3]
         Varray (4,2)
          [0 1]
          [0 2]
          [1 2]
          [4]
        <BLANKLINE>
        >>> a = Varray([[-1,0,1],[0,2,-1],[2,1,1],[3,-2,0]])
        >>> print(a.inverse())
        Varray (4,3)
          [0 1 3]
          [0 2 2]
          [1 2]
          [3]
        <BLANKLINE>
        """
        nrows, ncols = self.shape
        if nrows <= 0:
            # allow inverse of empty Varray
            va = Varray()
        else:
            # Create a row index for each value of the data
            row = np.arange(nrows).repeat(self.lengths)
            s = self.data.argsort()
            t = self.data[s]
            u = row[s]
            # Now search for the start of every row number
            v = t.searchsorted(np.arange(t.max() + 1))
            if v[0] > 0:
                # There were negative numbers: remove them
                u = u[v[0]:]
                v -= v[0]
            va = Varray(u, v)
            if sort:
                # TODO: this could be avoided by using a stable
                # sort algorithm above
                va.sort()
        if expand:
            va = va.toArray()
        return va


    def __repr__(self):
        """String representation of the Varray"""
        return "%s(%s)" % (self.__class__.__name__, self.toList())


    def __str__(self):
        """Nicely print the Varray"""
        s = "%s (%s,%s)\n" % (self.__class__.__name__, self.nrows, self.width)
        for row in self:
            s += '  ' + row.__str__() + '\n'
        return s


# End