Source code for rc.base.models

#  BSD 3-Clause License.
#
#  Copyright (c) 2019-2025 Robert A. Milton. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without modification, are permitted provided that
#  the following conditions are met:
#
#  1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
#  following disclaimer.
#
#  2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
#  following disclaimer in the documentation and/or other materials provided with the distribution.
#
#  3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or
#  promote products derived from this software without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
#  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
#  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
#  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
#  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
#  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

""" Abstract and concrete base classes for RomCom Models."""

from __future__ import annotations

from .definitions import *

from shutil import copyfile, copytree, rmtree
from json import load, dump


MetaData = dict[str, Any]
"""Type for passing metadata as ``**kwargs``."""

Matrix = Union[Pd.DataFrame, Np.Matrix, Tc.Matrix]
"""Types which a DataBase Table accepts."""


[docs] class Store(ABC): """ Base class for any stored class. Users are not expected to subclass this class directly.""" Path = Path | str """ Class attribute aliasing Types used to specify the ``path`` to a Store. Do not override.""" ext: str = '' """Class attribute specifying the file extension terminating ``self.path``. Override if and only if the derived class must be stored in a file. Otherwise, ``cls.ext == ''`` and the derived class is stored in a folder.""" @property def path(self) -> Path: """ The ``Path`` to this ``Store``, without ``cls.ext``. File extension is internal, meaning ``self._path = self._path + cls.ext``.""" return self._path.with_suffix('') if self.ext else self._path
[docs] def __repr__(self) -> str: """ The ``Path`` to this ``Store``. :meta public: """ return str(self._path)
[docs] def __str__(self) -> str: """ The ``Path`` to this ``Store``, abbreviated. :meta public: """ return self._path.stem if self.ext else self._path.name
[docs] @abstractmethod def __call__(self, **data) -> Self: """ Update and store ``self``. Args: **data: Data to update. Returns: ``self``. """ raise NotImplementedError()
@abstractmethod def __init__(self, path: Path): """ Construct ``self``. Overrides should call ``super(Store).__init__(path)`` as a matter of priority. Then they should read ``self`` from ``self._path`` or write ``self`` in ``self._path``. Args: path: The ``Path`` to ``self``. Do not include an extension. """ self._path = self.mkdir(path)
[docs] @classmethod def extAppend(cls, path: Path) -> Path: """ Append ``cls.ext`` to ``path.name``. Args: path: The path to append ``cls.ext`` to. Returns: ``Path(path)`` with ``cls.ext`` appended. """ path = Path(path) if cls.ext: path = path.with_name(path.name + cls.ext) return path
[docs] @classmethod def mkdir(cls, path: Path) -> Path: """ Create ``path.parent``, with a subfolder ``path`` if ``cls.ext == ''``. Args: path: The folder to create, or a child file of the folder to create. Returns: ``Path(path)`` with ``cls.ext`` appended. """ path = cls.extAppend(path) if cls.ext: path.parent.mkdir(mode=0o777, parents=True, exist_ok=True) else: path.mkdir(mode=0o777, parents=True, exist_ok=True) return path
[docs] @classmethod @abstractmethod def create(cls, path: Path) -> Self | Path: """ Create a folder (and its parents) if it doesn't already exist. Overrides should create and return an instance of ``cls``. Args: path: Where to create the folder. If ``cls.ext != ''``, the parent folder of ``path`` is created. Returns: ``path`` with extension ``f'.{cls.ext}'``. Raises: FileExistsError: If attempting to overwrite a file with a folder. """ return cls.mkdir(path)
[docs] @classmethod @abstractmethod def copy(cls, src: Path, dst: Path) -> Self | Path: """ Copy ``src`` to ``dst``, overwriting only files in common. Overrides should copy an instance of ``cls`` called ``src`` to ``Store.create(dst)``, and return the copy. Args: src: The source ``Path``, which must be a folder or a file. dst: The destination ``Path``, which may or may not exist. Returns: ``dst``. Raises: FileNotFoundError: If ``src`` does not exist. FileExistsError: If attempting to overwrite a file with a folder. """ src, dst = cls.extAppend(src), cls.mkdir(dst) if src.is_dir(): copytree(src=src, dst=dst, dirs_exist_ok=True) else: copyfile(src, dst) return dst
[docs] @classmethod def delete(cls, path: Path) -> Path: """ Delete any file or folder at ``path``. Args: path: The ``Path`` to delete. Returns: ``path``, which no longer exists. """ path = cls.extAppend(path) if path.is_dir(): rmtree(path, ignore_errors=True) else: path.unlink(missing_ok=False) return path
[docs] class Meta(Store, dict): """ Concrete class encapsulating metadata stored in a ``.json`` file.""" ext: str = '.json' #: ext: Class attribute specifying the file extension of Meta instances.
[docs] def __call__(self, **data: Any) -> Self: """ Update and store ``self``, overwriting. Args: **data: Data to update ``self.data``. Returns: ``self``. """ self.update(data) with open(self._path, mode='w') as file: dump(self, file, indent=4) return self
[docs] def __setitem__(self, key, value): """ Indexer sets the ``value`` indexed by ``key``.""" super().__setitem__(key, value) self()
def __init__(self, path: Store.Path, **data: Any): """ Construct ``self`` from a ``.json`` file or ``MetaData``. Args: path: The Path (file) to store ``self``. A ``.json`` extension is automatically appended. **data: The ``MetaData`` to store. If absent, ``self.data`` is read from ``path``, otherwise ``self.data=data`` is stored in ``path`` (which is overwritten if existing). """ super(Meta, self).__init__(path) if data == {}: with open(self._path, mode='r') as file: data = load(file) super(Store, self).__init__(**data) self()
[docs] @classmethod def create(cls, path: Store.Path, **data: Any): """ Create a ``Meta`` at ``path``, overwriting. Args: path: The ``Path`` (file) to store ``self``, overwritten if existing. A ``.json`` extension is automatically appended. **data: The ``MetaData`` to store. Returns: The ``Meta`` created. """ return cls(path, **data)
[docs] @classmethod def copy(cls, src: Meta, dst: Store.Path) -> Self: """ Copy ``src`` to ``dst``, overwriting. Args: src: The source ``Meta``. dst: The destination ``Path``, overwritten if existing. A ``.json`` extension is automatically appended. Returns: The ``Meta`` now stored at ``dst.json``. """ return cls(dst, **src)
[docs] class Table(Store): """ Concrete class encapsulating a ``pd.DataFrame`` backed by a ``.csv`` file. This class may be usefully overridden to provide bespoke read and write options for file operations. Subclasses should follow the template (copy and paste it):: class MyTable(Table): class Options(NamedTuple): read: MetaData = {'index_col': 0} #: Read options passed to ``pd.read_csv``. write: MetaData = {} #: Write options passed to ``pd.DataFrame.to_csv``. @classmethod def default(cls) -> MetaData: \"\"\" Returns the default Options as ``cls.read | cls.write``.\"\"\" return cls._field_defaults['read'] | cls._field_defaults['write'] """ class Options(NamedTuple): read: MetaData = {'index_col': 0} #: Read options passed to ``pd.read_csv``. write: MetaData = {} #: Write options passed to ``pd.DataFrame.to_csv``. @classmethod def default(cls) -> MetaData: """ Returns the default Options as ``cls.read | cls.write``.""" return cls._field_defaults['read'] | cls._field_defaults['write'] ext: str = '.csv' #: Class attribute specifying the file extension of Table objects. writeOptions: list[str] = ['sep', 'na_rep', 'float_format'] """ Class attribute listing kwargs which will be interpreted as write options. All other kwargs are interpreted as read options. To specify a separator, use ``delimiter`` as read option and ``sep`` as write option. """ @property def options(self) -> MetaData: """ A ``dict of options for file operations involving ``self``. Any option not in ``Table.writeOptions`` is stored in ``self.options.read`` and passed to ``pd.read_csv``. Any option in ``Table.writeOptions`` is stored in ``self.options.write`` and passed to ``pd.DataFrame.to_csv``. The setter updates via logical or ``|=``, so existing values are retained unless explicitly updated. """ return self._options.read | self._options.write @options.setter def options(self, update: MetaData): write = {key: update.pop(key) for key in self.writeOptions if key in update} self._options._replace(read =self._options.read | update, write =self._options.write | write) @property def pd(self) -> Pd.DataFrame: """ The ``Pd.DataFrame`` stored in ``self``.""" return self._pd @property def np(self) -> Np.Matrix: """ The ``Np.Matrix`` stored in ``self``.""" return self.pd.to_numpy() @property def tc(self) -> Tc.Matrix: """ The ``TF.Matrix`` stored in ``self``.""" return tc.from_numpy(self.np)
[docs] def broadcast_to(self, target_shape: Tuple[int, int], is_diagonal: bool = True) -> Self: """ Broadcast ``self``. Args: target_shape: The shape to broadcast to. is_diagonal: Whether to zero the off-diagonal elements of a square matrix. Returns: ``self``. Raises: IndexError: If broadcasting is impossible. """ try: data = np.array(np.broadcast_to(self.np, target_shape)) except ValueError: raise IndexError(f'{repr(self)} has shape {self._pd.shape} ' f'which cannot be broadcast to {target_shape}.') if is_diagonal and target_shape[0] > 1: data = np.diag(np.diagonal(data)) return self(data)
[docs] def __call__(self, data: Self | Matrix | None, **options: Any) -> Self: """ Update and store ``self``, overwriting. Args: data: The data updates. **options: Updates ``self.options``, before storing ``self``. Returns: ``self``. """ if isinstance(data, Table): self._pd = data.pd.copy() elif isinstance(data, pd.DataFrame): self._pd = data.copy() elif isinstance(data, Np.Matrix): self._pd.iloc[:, :] = data elif isinstance(data, Tc.Matrix): self._pd.iloc[:, :] = data.numpy() self.options = options self._pd.to_csv(self._path, **self._options.write) return self
def __init__(self, path: Store.Path, data: Self | Pd.DataFrame | None = None, **options: Any): """ Construct ``self`` from a ``.csv`` file or ``Pd.DataFrame``. Args: path: The ``Path`` (file) to store ``self``. A ``.csv`` extension is automatically appended. data: The data to store. If ``None``, ``data`` is read from ``path``, otherwise ``data`` is stored in ``path`` (which is overwritten if existing). **metadata: Updates ``self.readMetaData`` if ``data is None``, otherwise updates ``self.writeMetaData``. """ super().__init__(path) self._options = self.Options() self.options = options if data is None: self(pd.read_csv(self._path, **self._options.read)) else: self(data)
[docs] @classmethod def create(cls, path: Store.Path, data: Self | Matrix | None = None, index: Pd.Index | Np.Array = None, columns: Pd.Index | Np.Array = None, dtype: Np.DType | None = None, copy: bool | None = None, **metadata) -> Self: """ Create a ``Table`` at ``path``, overwriting. Args: path: The ``Path`` to store this DataTable, overwritten if existing. A ``.csv`` extension is automatically appended. data: The data to store. If ``None``, a ``Pd.DataFrame`` is read from ``.csv``. See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_. index: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_. columns: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_. dtype: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_. copy: See `pd.DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_. **metadata: MetaData passed to `pd.read_csv <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html>`_ or `pd.DataFrame.to_csv`_. Returns: The ``DataTable`` created. .. _pd.DataFrame.to_csv: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html """ return cls(path, pd.DataFrame(data.pd if isinstance(data, Table) else data, index, columns, dtype, copy), **metadata)
[docs] @classmethod def copy(cls, src: Self, dst: Store.Path) -> Self: """ Copy ``src`` to ``dst``, overwriting. Args: src: The source ``DataTable``. dst: The destination ``Path``, overwritten if existing. A ``.csv`` extension is automatically appended. Returns: The ``DataTable`` now stored at ``dst.csv``. """ return cls(dst, src.pd, **src.options)
[docs] class DataBase(Store): """ ``NamedTables(NamedTuple)`` in a folder alongside ``Meta``. Abstract base class for any model. ``DataBase`` subclasses must be implemented according to the template (copy and paste it):: class MyDataBase(DataBase): class NT(NamedTuple): names[i]: Table | Matrix | MetaData = pd.DataFrame(defaults[names[i]].pd) #: Comment ... def __call__(self, name: str) -> Table | Matrix | MetaData: \"\"\" Returns the Table named ``name``.\"\"\" return getattr(self, name) options: NamedTables[MetaData] = NamedTables(**{name: table.options for name, table in {}.items()}) \"\"\" Class attribute of the form ``NamedTables(**{names[i]: options[i], ...})``. Override as necessary for bespoke ``Table.options``. Elements of ``options[i]`` found in ``Table.writeOptions`` populate ``self[i].options.write``, the remainder populate ``self[i].options.read``.\"\"\" defaultMetaData: MetaData = {'Tables': Tables.options._asdict()} """
[docs] class NamedTables(NamedTuple): """ Must be overridden. """ NotImplemented: Table | Matrix = pd.DataFrame(((f'Attribute type should be Table in ' f'any implementation.',),)) #: :meta private:
[docs] def __call__(self, name: str) -> Table | Matrix | MetaData: """ Returns the Table named ``name``.""" return getattr(self, name)
options: NamedTables[MetaData] = NamedTables(**{name: table.options for name, table in {}.items()}) """ Class attribute of the form ``NamedTables(**{names[i]: options[i], ...})``. Override as necessary for bespoke ``Table.options``. Elements of ``options[i]`` found in ``Table.writeOptions`` populate ``self[i].options.write``, the remainder populate ``self[i].options.read``.""" #: Class attribute. Should be overridden. defaultMetaData: MetaData = {'Tables': options._asdict()} @property def nt(self) -> NamedTables: """ The ``NamedTables`` currently in ``self``.""" return self._nt @property def meta(self) -> Meta: """ The ``Meta`` currently in ``self``.""" return self._meta
[docs] def __len__(self) -> int: """ Counts the ``Table`` s in ``self``. """ return len(self._nt)
[docs] def __getitem__(self, name: str | slice) -> Table | Tuple[Table, ...]: """ Indexer returns the ``Table`` (s) named or sliced by ``name``. """ return self._nt(name) if isinstance(name, str) else self._nt[name]
[docs] def __setitem__(self, name: str | slice , tables: Table | Matrix | Tuple[Table | Matrix, ...]): """ Indexer sets the ``Table`` (s) named or sliced by ``name``.""" if isinstance(name, str): tables = {name: tables} else: tables = {named: tables[i] for i, named in enumerate(self.names()[name])} self(**tables)
[docs] def __call__(self, **tables: Table | Matrix) -> Self: """ Update and store ``self``, overwriting. Args: path: Optionally, an update to ``self.path``, overwritten if existing. **tables: Updates to ``self`` in the form ``names[i]=Table[i], ...``. Returns: ``self``. """ for name, table in tables.items(): self._nt(name)(table, **self.options(name)) return self
def __init__(self, path: Store.Path, **tables: Table | Pd.DataFrame): """ Read the ``DataBase`` in ``path``. Reading is lazy: If ``names[i]`` occurs in ``**tables`` it's ``Table`` is not read, just updated. Overrides must call ``super(DataBase).__init__(path, **tables)`` as a matter of priority. Args: path: The ``Path`` to read from. **tables: ``Table`` s to update those read, in the form ``names[i]=tables[i], ...``. Raises: FileNotFoundError: If ``path`` lacks ``self.meta`` or any member of ``self.Tables.names`` not mentioned in ``**tables``. """ super().__init__(path) try: self._meta = Meta(self._meta_in(path)) self._nt = self.NamedTables(**{name: Table(path / name, tables[name], **self.options(name)) if name in tables and tables[name] is not None else Table(path / name, **self.options(name)) for name in self.names()}) except FileNotFoundError as error: print(f'DataBase "{self}" is trying to read a non-existent Table. Did your script mean to call ' f'{type(self).__qualname__}.create("{str(self)}") ' f'instead of {type(self).__qualname__}("{str(self)}")?') raise error
[docs] @classmethod # Class Property def names(cls) -> Tuple[str, ...]: """ ``(names[i], ...)`` of table names for this ``Tables`` class.""" return cls.NamedTables._fields
[docs] @classmethod # Class Property def defaults(cls) -> Dict[str, Pd.DataFrame]: """ ``{names[i]: Pd.DataFrame[i], ...}`` of default tables for this ``Tables`` class.""" return cls.NamedTables._field_defaults
[docs] @classmethod def create(cls, path: Store.Path, **tables_and_meta: Table | Pd.DataFrame | MetaData) -> Self: """ Create a ``DataBase`` in ``path``. Args: path: The folder to store the ``DataBase`` in. Need not exist, any existing ``Tables`` will be overwritten if it does. **tables_and_meta: Data to update ``cls.defaults()``, in the form ``names[i]=tables[i]``, and optional ``MetaData`` to update ``cls.defaultMetaData`` in the form ``meta=MetaData``. Returns: The ``DataBase`` created. """ Meta.create(cls._meta_in(path), **(cls.defaultMetaData | (tables_and_meta.pop('meta', {})))) return cls(path, **(cls.defaults() | tables_and_meta))
[docs] @classmethod def copy(cls, src: Self, dst: Store.Path) -> Self: """ Copy ``src`` to ``dst``, overwriting any files in common. Args: src: The source ``DataBase``. dst: The destination ``Path``, which may or may not exist. Returns: The ``DataBase`` now stored in ``dst``. """ return cls.create(dst, meta=src.meta, **src.nt._asdict())
[docs] @classmethod def delete(cls, path: Store.Path) -> Path: """ Delete all ``DataBase`` files in ``path``, retaining ``path`` and any other files it contains. If you wish to delete ``path`` entirely, use ``Store.delete(path)`` instead. Args: path: ``Path`` to the ``DataBase`` to delete. Returns: ``path``, which still exists. """ path = Path(path) Meta.delete(cls._meta_in(path)) for name in cls.names(): Table.delete(path / name) return path
@staticmethod def _meta_in(path: Store.Path) -> Path: return Path(path) / 'meta'