#
##
## This file is part of pyFormex 2.0 (Mon Sep 14 12:29:05 CEST 2020)
## pyFormex is a tool for generating, manipulating and transforming 3D
## geometrical models by sequences of mathematical operations.
## Home page: http://pyformex.org
## Project page: http://savannah.nongnu.org/projects/pyformex/
## Copyright 2004-2020 (C) Benedict Verhegghe (benedict.verhegghe@ugent.be)
## Distributed under the GNU General Public License version 3 or later.
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see http://www.gnu.org/licenses/.
##
"""Working with variable width tables.
Mesh type geometries use tables of integer data to store the connectivity
between different geometric entities. The basic connectivity table in a
Mesh with elements of the same type is a table of constant width: the
number of nodes connected to each element is constant.
However, the inverse table (the elements connected to each node) does not
have a constant width.
Tables of constant width can conveniently be stored as a 2D array, allowing
fast indexing by row and/or column number. A variable width table can be
stored (using arrays) in two ways:
- as a 2D array, with a width equal to the maximal row length.
Unused positions in the row are then filled with an invalid value (-1).
- as a 1D array, storing a simple concatenation of the rows.
An additional array then stores the position in that array of the first
element of each row.
In pyFormex, variable width tables were initially stored as 2D arrays:
a remnant of the author's past FORTRAN experience. With a growing
professional use of pyFormex involving ever larger models, it became clear
that there was a large memory and speed penalty related to the use of
2D arrays with lots of unused entries.
This is illustrated in the following table, obtained on the
inversion of a connectivity table of 10000 rows and 25 columns.
The table shows the memory size of the inverse table, the time needed to
compute it, and the time to compute both tables. The latter involves an
extra conversion of the stored array to the other data type.
==================== =============== ============== ===============
Stored as: 2D (ndarray) 1D (Varray) 1D (Varray)
Rows are sorted: yes yes no
==================== =============== ============== ===============
Memory size 450000 250000 250000
Time to create table 128 ms 49 ms 25ms
Time to create both 169 ms 82 ms 57ms
==================== =============== ============== ===============
The memory and speed gains of using the Varray are important.
The 2D array can even be faster generated by first creating the
1D array, and then converting that to 2D.
Not sorting the entries in the Varray provides a further gain.
The Varray class defined below therefore does not sort the rows
by default, but provides methods to sort them when needed.
"""
import numpy as np
from pyformex import arraytools as at
[docs]class Varray(object):
"""A variable width 2D integer array
This class provides an efficient way to store tables of
nonnegative integers when the rows of the table may have
different length.
For large tables this may allow an important memory saving
compared to a rectangular array where the non-existent entries
are filled by some special value.
Data in the Varray are stored as a single 1D array,
containing the concatenation of all rows.
An index is kept with the start position of each row in the 1D array.
Parameters
----------
data:
Data to initialize to a new Varray object. This can either of:
- another Varray instance: a shallow copy of the Varray is created.
- a list of lists of integers. Each item in the list contains
one row of the table.
- a 2D ndarray of integer type. The nonnegative numbers on each row
constitute the data for that row.
- a 1D array or list of integers, containing the concatenation of
the rows. The second argument `ind` specifies the indices of the
first element of each row.
- a 1D array or list of integers, containing the concatenation of
the rows obtained by prepending each row with the row length.
The caller should make sure these 1D data are consistent.
ind: 1-dim int :term:`array_like`, optional
This is only used when `data` is a pure concatenation of all rows.
It holds the position in `data` of the first element of each row.
Its length is equal to the number of rows (`nrows`) or `nrows+1`.
It is a non-decreasing series of integer values, starting with 0.
If it has ``nrows+1`` entries, the last value is equal to the total
number of elements in `data`. This last value may be omitted,
and will then be added automatically.
Note that two subsequent elements may be equal, corresponding with
an empty row.
**Attributes**
Attributes
----------
nrows: int
The number of rows in the table
width: int
The length of the longest row in the table
size: int
The total number of entries in the table
shape: tuple of two ints
The combined (``nrows``,``width``) values.
Examples
--------
Create a Varray is by default printed in user-friendly format:
>>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
>>> Va
Varray([[0], [1, 2], [0, 2, 4], [0, 2]])
The Varray prints in a user-friendly format:
>>> print(Va)
Varray (4,3)
[0]
[1 2]
[0 2 4]
[0 2]
<BLANKLINE>
Other initialization methods resulting in the same Varray:
>>> Vb = Varray(Va)
>>> print(str(Vb) == str(Va))
True
>>> Vb = Varray(np.array([[-1,-1,0],[-1,1,2],[0,2,4],[-1,0,2]]))
>>> print(str(Vb) == str(Va))
True
>>> Vc = Varray([0,1,2,0,2,4,0,2], at.cumsum0([1,2,3,2]))
>>> print(str(Vc) == str(Va))
True
>>> Vd = Varray([1,0, 2,1,2, 3,0,2,4, 2,0,2])
>>> print(str(Vd) == str(Va))
True
Show info about the Varray
>>> print(Va.nrows, Va.width, Va.shape)
4 3 (4, 3)
>>> print(Va.size, Va.lengths)
8 [1 2 3 2]
Indexing: The data for any row can be obtained by simple indexing:
>>> print(Va[1])
[1 2]
This is equivalent with
>>> print(Va.row(1))
[1 2]
>>> print(Va.row(-1))
[0 2]
Change elements:
>>> Va[1][0] = 3
>>> print(Va[1])
[3 2]
Full row can be changed with matching length:
>>> Va[1] = [1, 2]
>>> print(Va[1])
[1 2]
Negative indices are allowed:
Extracted columns are filled with -1 values where needed
>>> print(Va.col(1))
[-1 2 2 2]
Select takes multiple rows using indices or bool:
>>> print(Va.select([1,3]))
Varray (2,2)
[1 2]
[0 2]
<BLANKLINE>
>>> print(Va.select(Va.lengths==2))
Varray (2,2)
[1 2]
[0 2]
<BLANKLINE>
Iterator: A Varray provides its own iterator:
>>> for row in Va:
... print(row)
[0]
[1 2]
[0 2 4]
[0 2]
>>> print(Varray())
Varray (0,0)
<BLANKLINE>
>>> L,R = Va.sameLength()
>>> print(L)
[1 2 3]
>>> print(R)
[array([0]), array([1, 3]), array([2])]
>>> for a in Va.split():
... print(a)
[[0]]
[[1 2]
[0 2]]
[[0 2 4]]
"""
def __init__(self, data=[], ind=None):
"""Initialize the Varray. See the class docstring."""
# If data is a Varray, just use its data
if isinstance(data, Varray):
self._replace_data(data)
return
# Allow for empty Varray
if len(data) <= 0:
data = np.array([], dtype=at.Int)
# If data is an array, convert to list of lists
try:
data = at.checkArray(data, kind='i', ndim=2)
data = [row[row >= 0] for row in data]
except Exception:
pass
# If data is a list of lists, concatenate and create index
try:
# construct row length array
rowlen = [len(row) for row in data]
ind = at.cumsum0(rowlen)
data = np.concatenate(data).astype(at.Int)
except Exception:
pass
# data should now be 1D array
# ind is also 1D array, unless initialized from inlined length data
try:
data = at.checkArray(data, kind='i', ndim=1)
if ind is None:
# extract row lengths from data
i = 0
size = len(data)
rowlen = []
while i < size:
rowlen.append(data[i])
i += data[i] + 1
# create indices and remove row lengths from data
ind = at.cumsum0(rowlen)
data = np.delete(data, ind[:-1] + np.arange(len(rowlen)))
ind = at.checkArray(ind, kind='i', ndim=1)
ind.sort()
if ind[0] != 0 or ind[-1] > len(data):
raise ValueError
if ind[-1] != len(data):
ind = np.concatenate([ind, [len(data)]])
except Exception:
raise ValueError("Invalid input data for Varray")
# Store the data
self.data = data
self.ind = ind
# We also store the width because it is often needed and
# may be expensive to compute
self.width = max(self.lengths) if len(self.lengths) > 0 else 0
# And the current row, for use in iterators
self._row = 0
def _replace_data(self, var):
"""Replace the current data with data from another Varray"""
if not isinstance(var, Varray):
raise ValueError("Expected a Varray as argument")
self.data = var.data
self.ind = var.ind
self.width = var.width
self._row = 0
[docs] def copy(self):
"""Return a deep copy of the Varray"""
return Varray(self.data.copy(), self.ind.copy())
# Attributes computed ad hoc, because cheap(er)
@property
def lengths(self):
"""Return the length of all rows of the Varray"""
return self.ind[1:] - self.ind[:-1]
@property
def nrows(self):
"""Return the number of rows in the Varray"""
return len(self.ind) - 1
@property
def size(self):
"""Return the total number of elements in the Varray"""
return self.ind[-1]
@property
def shape(self):
"""Return a tuple with the number of rows and maximum row length"""
return (self.nrows, self.width)
[docs] def length(self, i):
"""Return the length of row i"""
return self.ind[i + 1] - self.ind[i]
def __getitem__(self, i):
"""Return the data for the row i.
Parameters
----------
i: int
The index of the row to return.
Returns
-------
1-dim int array
An array with the values of row i
Examples
--------
>>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
>>> print(Va[1])
[1 2]
"""
if not at.isInt(i):
raise ValueError("Varray index should be an single int")
if i < 0:
i += self.nrows
return self.data[self.ind[i]:self.ind[i + 1]]
def __setitem__(self, i, data):
"""Set the data for the row i.
Parameters
----------
i: int
The index of the row to change.
data: int or int :term:`array_like`
Data to replace the row i.
If a single int, all items in the row are set to this value.
If an array, it should match the row length.
Examples
--------
>>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
>>> Va[1] = 0
>>> Va[2] = [1,3,5]
>>> Va[3][1] = 1
>>> print(Va)
Varray (4,3)
[0]
[0 0]
[1 3 5]
[0 1]
<BLANKLINE>
"""
if not at.isInt(i):
raise ValueError("Varray index should be an single int")
if i < 0:
i += self.nrows
self.data[self.ind[i]:self.ind[i + 1]] = data
[docs] def row(self, i):
"""Return the data for row i
This returns self[i].
"""
return self[i]
[docs] def setRow(self, i, data):
"""Replace the data of row i
This is equivalent to self[i] = data.
"""
self[i] = data
[docs] def col(self, i):
"""Return the data for column i
This always returns a list of length nrows.
For rows where the column index i is missing, a value -1 is returned.
"""
return np.array([r[i] if i in range(-len(r), len(r)) else -1
for r in self])
[docs] def select(self, sel):
"""Select some rows from the Varray.
Parameters
----------
sel: iterable of ints or bools
Specifies the row(s) to be selected.
If type is int, the values are the row numbers.
If type is bool, the length of the iterable should be
exactly ``self.nrows``; the positions where the value is True are
the rows to be returned.
Returns
-------
Varray object
A Varray with only the selected rows.
Examples
--------
>>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
>>> Va.select((1,3))
Varray([[1, 2], [0, 2]])
>>> Va.select((False,True,False,True))
Varray([[1, 2], [0, 2]])
"""
sel = np.asarray(sel) # this is important, because Python bool isInt
if len(sel) > 0 and not at.isInt(sel[0]):
sel = np.where(sel)[0]
return Varray([self[j] for j in sel])
def __iter__(self):
"""Return an iterator for the Varray"""
self._row = 0
return self
def __next__(self):
"""Return the next row of the Varray"""
if self._row >= self.nrows:
raise StopIteration
row = self[self._row]
self._row += 1
return row
[docs] def index(self, sel):
"""Convert a selector to an index.
Parameters
----------
sel: iterable of ints or bools
Specifies the elements of the Varray to be selected.
If type is int, the values are the index numbers in the
flat array. If type is bool, the length of the iterable
should be exactly ``self.size``; the positions where the
value is True will be returned.
Returns
-------
int array
The selected element numbers.
Examples
--------
>>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
>>> Va.index((1,3,5,7))
array([1, 3, 5, 7])
>>> Va.index((False,True,False,True,False,True,False,True))
array([1, 3, 5, 7])
"""
try:
sel = at.checkArray(sel, shape=(self.size,), kind='b')
sel = np.where(sel)[0]
except ValueError:
sel = at.checkArray(sel, kind='i')
return sel
[docs] def rowindex(self, sel):
"""Return the rowindex for the elements flagged by selector sel.
sel is either a list of element numbers or a bool array with
length self.size
"""
sel = self.index(sel)
return self.ind.searchsorted(sel, side='right') - 1
[docs] def colindex(self, sel):
"""Return the column index for the elements flagged by selector sel.
sel is either a list of element numbers or a bool array with
length self.size
"""
sel = self.index(sel)
ri = self.rowindex(sel)
return sel - self.ind[ri]
[docs] def where(self, sel):
"""Return row and column index of the selected elements
sel is either a list of element numbers or a bool array with
length self.size
Returns a 2D array where the first column is the row index
and the second column the corresponding column index of an
element selected by sel
"""
return np.column_stack([self.rowindex(sel), self.colindex(sel)])
[docs] def index1d(self, i, j):
"""Return the sequential index for the element with 2D index i,j"""
if j >= 0 and j < self.length(i):
return self.ind[i] + j
else:
raise IndexError("Index out of bounds")
[docs] def sorted(self):
"""Returns a sorted Varray.
Returns a Varray with the same entries but where each
row is sorted.
This returns a copy of the data, and leaves the original
unchanged.
See also :meth:`sort` for sorting the rows inplace.
"""
return Varray([sorted(row) for row in self])
[docs] def removeFlat(self, ind):
"""Remove the nelement with flat index i
Parameters
----------
ind: int or int :term:`array_like`
Index in the flat data of the element(s) to remove.
Returns
-------
Varray
A Varray with the element(s) ind removed.
Examples
--------
>>> Va = Varray([[0],[1,2],[0,2,4],[0,2]])
>>> Va.removeFlat(3)
Varray([[0], [1, 2], [2, 4], [0, 2]])
>>> Va.removeFlat([0,2,7])
Varray([[], [1], [0, 2, 4], [0]])
"""
srt = np.unique(ind)
data = self.data[at.complement(srt, len(self.data))]
ind = self.ind.copy()
for i in srt[::-1]:
ind[ind>i] -= 1
return Varray(data, ind)
[docs] def sort(self):
"""Sort the Varray inplace.
Sorting a Varray sorts the elements in each row.
The sorting is done inplace.
See also :meth:`sorted` for sorting the rows without
changing the original.
Examples
--------
>>> va = Varray([[0],[2,1],[4,0,2],[0,2]])
>>> va.sort()
>>> print(va)
Varray (4,3)
[0]
[1 2]
[0 2 4]
[0 2]
<BLANKLINE>
"""
for row in self:
row.sort()
[docs] def toArray(self):
"""Convert the Varray to a 2D array.
Returns a 2D array with shape (self.nrows,self.width), containing
the row data of the Varray.
Rows which are shorter than width are padded at the start with
values -1.
"""
a = -np.ones((self.nrows, self.width), dtype=at.Int)
for i, r in enumerate(self):
if len(r) > 0:
a[i, -len(r):] = r
return a
[docs] def sameLength(self):
"""Groups the rows according to their length.
Returns a tuple of two lists (lengths,rows):
- lengths: the sorted unique row lengths,
- rows: the indices of the rows having the corresponding length.
"""
lens = self.lengths
ulens = np.unique(lens)
return ulens, [np.where(lens == l)[0] for l in ulens]
[docs] def split(self):
"""Split the Varray into 2D arrays.
Returns a list of 2D arrays with the same number
of columns and the indices in the original Varray.
"""
return [self.select(ind).toArray() for ind in self.sameLength()[1]]
[docs] def toList(self):
"""Convert the Varray to a nested list.
Returns a list of lists of integers.
"""
return [r.tolist() for r in self]
[docs] def inverse(self, sort=True, expand=False):
"""Return the inverse of a Varray.
The inverse of a Varray is again a Varray. Values k on a row i will
become values i on row k. The number of data in both Varrays is thus
the same.
The inverse of the inverse is equal to the original. Two Varrays are
equal if they have the same number of rows and all rows contain the
same numbers, independent of their order.
Parameters
----------
sort: bool
If True (default), the values on each row of the returned index
are sorted.
The default (False) will leave the values in the order obtained
by the algorithm, which depends on Python/numpy sorting, and
usually turns out to be sorted as well.
Returns
-------
:class:`Varray`
The inverse index, as a Varray (default).
Each row ``i`` of the inverse contains the numbers of the rows of the
input in which a value ``i`` appeared. The rows are sorted by default.
Examples
--------
>>> a = Varray([[0,1],[2,0],[1,2],[4]])
>>> b = a.inverse()
>>> c = b.inverse()
>>> print(a,b,c)
Varray (4,2)
[0 1]
[2 0]
[1 2]
[4]
Varray (5,2)
[0 1]
[0 2]
[1 2]
[]
[3]
Varray (4,2)
[0 1]
[0 2]
[1 2]
[4]
<BLANKLINE>
>>> a = Varray([[-1,0,1],[0,2,-1],[2,1,1],[3,-2,0]])
>>> print(a.inverse())
Varray (4,3)
[0 1 3]
[0 2 2]
[1 2]
[3]
<BLANKLINE>
"""
nrows, ncols = self.shape
if nrows <= 0:
# allow inverse of empty Varray
va = Varray()
else:
# Create a row index for each value of the data
row = np.arange(nrows).repeat(self.lengths)
s = self.data.argsort()
t = self.data[s]
u = row[s]
# Now search for the start of every row number
v = t.searchsorted(np.arange(t.max() + 1))
if v[0] > 0:
# There were negative numbers: remove them
u = u[v[0]:]
v -= v[0]
va = Varray(u, v)
if sort:
# TODO: this could be avoided by using a stable
# sort algorithm above
va.sort()
if expand:
va = va.toArray()
return va
def __repr__(self):
"""String representation of the Varray"""
return "%s(%s)" % (self.__class__.__name__, self.toList())
def __str__(self):
"""Nicely print the Varray"""
s = "%s (%s,%s)\n" % (self.__class__.__name__, self.nrows, self.width)
for row in self:
s += ' ' + row.__str__() + '\n'
return s
# End