3
0
Fork 0

feat: add basic batch feature, data model and partial handler

hopefully data model is complete enough for now, but handler does not
yet have all methods, e.g. execute()
This commit is contained in:
Lance Edgar 2024-12-13 20:38:00 -06:00
parent 51accc5a93
commit a514d9cfba
9 changed files with 813 additions and 1 deletions

View file

@ -0,0 +1,6 @@
``wuttjamaican.batch``
======================
.. automodule:: wuttjamaican.batch
:members:

View file

@ -0,0 +1,6 @@
``wuttjamaican.db.model.batch``
===============================
.. automodule:: wuttjamaican.db.model.batch
:members:

View file

@ -76,6 +76,42 @@ Glossary
See also :class:`~wuttjamaican.auth.AuthHandler`.
batch
This refers to a process whereby bulk data operations may be
performed, with preview and other tools to allow the user to
refine as needed before "executing" the batch.
The term "batch" may refer to such a feature overall, or the
:term:`data model` used, or the specific data for a single batch,
etc.
See also :term:`batch handler` and :term:`batch row`, and the
:class:`~wuttjamaican.db.model.batch.BatchMixin` base class.
batch handler
This refers to a :term:`handler` meant to process a given type of
:term:`batch`.
There may be multiple handlers registered for a given
:term:`batch type`, but (usually) only one will be configured for
use.
batch row
A row of data within a :term:`batch`.
May also refer to the :term:`data model` class used for such a row.
See also the :class:`~wuttjamaican.db.model.batch.BatchRowMixin`
base class.
batch type
This term is used to distinguish :term:`batches <batch>` according
to which underlying table is used to store their data, essentially.
For instance a "pricing batch" would use one table, whereas an
"inventory batch" would use another. And each "type" would be
managed by its own :term:`batch handler`.
command
A top-level command line interface for the app. Note that
top-level commands don't usually "do" anything per se, and are

View file

@ -64,6 +64,7 @@ Contents
api/wuttjamaican.app
api/wuttjamaican.auth
api/wuttjamaican.batch
api/wuttjamaican.cli
api/wuttjamaican.cli.base
api/wuttjamaican.cli.make_appdir
@ -75,6 +76,7 @@ Contents
api/wuttjamaican.db.model
api/wuttjamaican.db.model.auth
api/wuttjamaican.db.model.base
api/wuttjamaican.db.model.batch
api/wuttjamaican.db.model.upgrades
api/wuttjamaican.db.sess
api/wuttjamaican.db.util

217
src/wuttjamaican/batch.py Normal file
View file

@ -0,0 +1,217 @@
# -*- coding: utf-8; -*-
################################################################################
#
# WuttJamaican -- Base package for Wutta Framework
# Copyright © 2023-2024 Lance Edgar
#
# This file is part of Wutta Framework.
#
# Wutta Framework is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# Wutta Framework is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# Wutta Framework. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
"""
Batch Handlers
"""
from wuttjamaican.app import GenericHandler
class BatchHandler(GenericHandler):
"""
Base class and *partial* default implementation for :term:`batch
handlers <batch handler>`.
This handler class "works as-is" but does not actually do
anything. Subclass must implement logic for various things as
needed, e.g.:
* :attr:`model_class`
* :meth:`init_batch()`
* :meth:`should_populate()`
* :meth:`populate()`
* :meth:`refresh_row()`
"""
@property
def model_class(self):
"""
Reference to the batch :term:`data model` class which this
batch handler is meant to work with.
This is expected to be a subclass of
:class:`~wuttjamaican.db.model.batch.BatchMixin` (among other
classes).
Subclass must define this; default is not implemented.
"""
raise NotImplementedError("You must set the 'model_class' attribute "
f"for class '{self.__class__.__name__}'")
def make_batch(self, session, progress=None, **kwargs):
"""
Make and return a new batch (:attr:`model_class`) instance.
This will create the new batch, and auto-assign its
:attr:`~wuttjamaican.db.model.batch.BatchMixin.id` value
(unless caller specifies it) by calling
:meth:`consume_batch_id()`.
It then will call :meth:`init_batch()` to perform any custom
initialization needed.
Therefore callers should use this ``make_batch()`` method, but
subclass should override :meth:`init_batch()` instead (if
needed).
:param session: Current :term:`db session`.
:param progress: Optional progress indicator factory.
:param \**kwargs: Additional kwargs to pass to the batch
constructor.
:returns: New batch; instance of :attr:`model_class`.
"""
# generate new ID unless caller specifies
if 'id' not in kwargs:
kwargs['id'] = self.consume_batch_id(session)
# make batch
batch = self.model_class(**kwargs)
self.init_batch(batch, session=session, progress=progress, **kwargs)
return batch
def consume_batch_id(self, session, as_str=False):
"""
Fetch a new batch ID from the counter, and return it.
This may be called automatically from :meth:`make_batch()`.
:param session: Current :term:`db session`.
:param as_str: Indicates the return value should be a string
instead of integer.
:returns: Batch ID as integer, or zero-padded 8-char string.
"""
db = self.app.get_db_handler()
batch_id = db.next_counter_value(session, 'batch_id')
if as_str:
return f'{batch_id:08d}'
return batch_id
def init_batch(self, batch, session=None, progress=None, **kwargs):
"""
Initialize a new batch.
This is called automatically from :meth:`make_batch()`.
Default logic does nothing; subclass should override if needed.
.. note::
*Population* of the new batch should **not** happen here;
see instead :meth:`populate()`.
"""
def should_populate(self, batch):
"""
Must return true or false, indicating whether the given batch
should be populated from initial data source(s).
So, true means fill the batch with data up front - by calling
:meth:`do_populate()` - and false means the batch will start
empty.
Default logic here always return false; subclass should
override if needed.
"""
return False
def do_populate(self, batch, progress=None):
"""
Populate the batch from initial data source(s).
This method is a convenience wrapper, which ultimately will
call :meth:`populate()` for the implementation logic.
Therefore callers should use this ``do_populate()`` method,
but subclass should override :meth:`populate()` instead (if
needed).
See also :meth:`should_populate()` - you should check that
before calling ``do_populate()``.
"""
self.populate(batch, progress=progress)
def populate(self, batch, progress=None):
"""
Populate the batch from initial data source(s).
It is assumed that the data source(s) to be used will be known
by inspecting various properties of the batch itself.
Subclass should override this method to provide the
implementation logic. It may populate some batches
differently based on the batch attributes, or it may populate
them all the same. Whatever is needed.
Callers should always use :meth:`do_populate()` instead of
calling ``populate()`` directly.
"""
def make_row(self, **kwargs):
"""
Make a new row for the batch. This will be an instance of
:attr:`~wuttjamaican.db.model.batch.BatchMixin.__row_class__`.
Note that the row will **not** be added to the batch; that
should be done with :meth:`add_row()`.
:returns: A new row object, which does *not* yet belong to any batch.
"""
return self.model_class.__row_class__(**kwargs)
def add_row(self, batch, row):
"""
Add the given row to the given batch.
This assumes a *new* row which does not yet belong to a batch,
as returned by :meth:`make_row()`.
It will add it to batch
:attr:`~wuttjamaican.db.model.batch.BatchMixin.rows`, call
:meth:`refresh_row()` for it, and update the
:attr:`~wuttjamaican.db.model.batch.BatchMixin.row_count`.
"""
session = self.app.get_session(batch)
with session.no_autoflush:
batch.rows.append(row)
self.refresh_row(row)
batch.row_count = (batch.row_count or 0) + 1
def refresh_row(self, row):
"""
Update the given batch row as needed, to reflect latest data.
This method is a bit of a catch-all in that it could be used
to do any of the following (etc.):
* fetch latest "live" data for comparison with batch input data
* calculate some data values based on the previous step
* set row status based on other row attributes
This method is called when the row is first added to the batch
via :meth:`add_row()` - but may be called multiple times after
that depending on the workflow.
"""

View file

@ -30,6 +30,7 @@ This namespace exposes the following:
* :class:`~wuttjamaican.db.model.base.Base`
* :func:`~wuttjamaican.db.util.uuid_column()`
* :func:`~wuttjamaican.db.util.uuid_fk_column()`
* :class:`~wuttjamaican.db.util.UUID`
And the :term:`data models <data model>`:
@ -40,10 +41,16 @@ And the :term:`data models <data model>`:
* :class:`~wuttjamaican.db.model.auth.User`
* :class:`~wuttjamaican.db.model.auth.UserRole`
* :class:`~wuttjamaican.db.model.upgrades.Upgrade`
And the :term:`batch` model base/mixin classes:
* :class:`~wuttjamaican.db.model.batch.BatchMixin`
* :class:`~wuttjamaican.db.model.batch.BatchRowMixin`
"""
from wuttjamaican.db.util import uuid_column, uuid_fk_column
from wuttjamaican.db.util import uuid_column, uuid_fk_column, UUID
from .base import Base, Setting, Person
from .auth import Role, Permission, User, UserRole
from .upgrades import Upgrade
from .batch import BatchMixin, BatchRowMixin

View file

@ -0,0 +1,416 @@
# -*- coding: utf-8; -*-
################################################################################
#
# WuttJamaican -- Base package for Wutta Framework
# Copyright © 2023-2024 Lance Edgar
#
# This file is part of Wutta Framework.
#
# Wutta Framework is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# Wutta Framework is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# Wutta Framework. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
"""
Batch data models
"""
import datetime
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.ext.orderinglist import ordering_list
from wuttjamaican.db.model import uuid_column, uuid_fk_column, User
from wuttjamaican.db.util import UUID
class BatchMixin:
"""
Mixin base class for :term:`data models <data model>` which
represent a :term:`batch`.
See also :class:`BatchRowMixin` which should be used for the row
model.
For a batch model (table) to be useful, at least one :term:`batch
handler` must be defined, which is able to process data for that
:term:`batch type`.
.. attribute:: __row_class__
Reference to the specific :term:`data model` class used for the
:term:`batch rows <batch row>`.
This will be a subclass of :class:`BatchRowMixin` (among other
classes).
When defining the batch model, you do not have to set this as
it will be assigned automatically based on
:attr:`BatchRowMixin.__batch_class__`.
.. attribute:: id
Numeric ID for the batch, unique across all batches (regardless
of type).
See also :attr:`id_str`.
.. attribute:: description
Simple description for the batch.
.. attribute:: notes
Arbitrary notes for the batch.
.. attribute:: rows
List of data rows for the batch, aka. :term:`batch rows <batch
row>`.
Each will be an instance of :class:`BatchRowMixin` (among other
base classes).
.. attribute:: row_count
Cached row count for the batch, i.e. how many :attr:`rows` it has.
No guarantees perhaps, but this should ideally be accurate (it
ultimately depends on the :term:`batch handler`
implementation).
.. attribute:: STATUS
Dict of possible batch status codes and their human-readable
names.
Each key will be a possible :attr:`status_code` and the
corresponding value will be the human-readable name.
See also :attr:`status_text` for when more detail/subtlety is
needed.
Typically each "key" (code) is also defined as its own
"constant" on the model class. For instance::
from collections import OrderedDict
from wuttjamaican.db import model
class MyBatch(model.BatchMixin, model.Base):
\""" my custom batch \"""
STATUS_INCOMPLETE = 1
STATUS_EXECUTABLE = 2
STATUS = OrderedDict([
(STATUS_INCOMPLETE, "incomplete"),
(STATUS_EXECUTABLE, "executable"),
])
# TODO: column definitions...
And in fact, the above status definition is the built-in
default. However it is expected for subclass to overwrite the
definition entirely (in similar fashion to above) when needed.
.. note::
There is not any built-in logic around these integer codes;
subclass can use any the developer prefers.
Of course, once you define one, if any live batches use it,
you should not then change its fundamental meaning (although
you can change the human-readable text).
It's recommended to use
:class:`~python:collections.OrderedDict` (as shown above) to
ensure the possible status codes are displayed in the
correct order, when applicable.
.. attribute:: status_code
Status code for the batch as a whole. This indicates whether
the batch is "okay" and ready to execute, or (why) not etc.
This must correspond to an existing key within the
:attr:`STATUS` dict.
See also :attr:`status_text`.
.. attribute:: status_text
Text which may (briefly) further explain the batch
:attr:`status_code`, if needed.
For example, assuming built-in default :attr:`STATUS`
definition::
batch.status_code = batch.STATUS_INCOMPLETE
batch.status_text = "cannot execute batch because it is missing something"
.. attribute:: created
When the batch was first created.
.. attribute:: created_by
Reference to the :class:`~wuttjamaican.db.model.auth.User` who
first created the batch.
.. attribute:: executed
When the batch was executed.
.. attribute:: executed_by
Reference to the :class:`~wuttjamaican.db.model.auth.User` who
executed the batch.
"""
@declared_attr
def __table_args__(cls):
return cls.__default_table_args__()
@classmethod
def __default_table_args__(cls):
return cls.__batch_table_args__()
@classmethod
def __batch_table_args__(cls):
return (
sa.ForeignKeyConstraint(['created_by_uuid'], ['user.uuid']),
sa.ForeignKeyConstraint(['executed_by_uuid'], ['user.uuid']),
)
@declared_attr
def batch_key(cls):
return cls.__tablename__
uuid = uuid_column()
id = sa.Column(sa.Integer(), nullable=False)
description = sa.Column(sa.String(length=255), nullable=True)
notes = sa.Column(sa.Text(), nullable=True)
row_count = sa.Column(sa.Integer(), nullable=True, default=0)
STATUS_INCOMPLETE = 1
STATUS_EXECUTABLE = 2
STATUS = {
STATUS_INCOMPLETE : "incomplete",
STATUS_EXECUTABLE : "executable",
}
status_code = sa.Column(sa.Integer(), nullable=True)
status_text = sa.Column(sa.String(length=255), nullable=True)
created = sa.Column(sa.DateTime(timezone=True), nullable=False,
default=datetime.datetime.now)
created_by_uuid = sa.Column(UUID(), nullable=False)
@declared_attr
def created_by(cls):
return orm.relationship(
User,
primaryjoin=lambda: User.uuid == cls.created_by_uuid,
foreign_keys=lambda: [cls.created_by_uuid])
executed = sa.Column(sa.DateTime(timezone=True), nullable=True)
executed_by_uuid = sa.Column(UUID(), nullable=True)
@declared_attr
def executed_by(cls):
return orm.relationship(
User,
primaryjoin=lambda: User.uuid == cls.executed_by_uuid,
foreign_keys=lambda: [cls.executed_by_uuid])
def __repr__(self):
cls = self.__class__.__name__
return f"{cls}(uuid={repr(self.uuid)})"
def __str__(self):
return self.id_str if self.id else "(new)"
@property
def id_str(self):
"""
Property which returns the :attr:`id` as a string, zero-padded
to 8 digits::
batch.id = 42
print(batch.id_str) # => '00000042'
"""
if self.id:
return f'{self.id:08d}'
class BatchRowMixin:
"""
Mixin base class for :term:`data models <data model>` which
represent a :term:`batch row`.
See also :class:`BatchMixin` which should be used for the (parent)
batch model.
.. attribute:: __batch_class__
Reference to the :term:`data model` for the parent
:term:`batch` class.
This will be a subclass of :class:`BatchMixin` (among other
classes).
When defining the batch row model, you must set this attribute
explicitly! And then :attr:`BatchMixin.__row_class__` will be
set automatically to match.
.. attribute:: batch
Reference to the parent :term:`batch` to which the row belongs.
This will be an instance of :class:`BatchMixin` (among other
base classes).
.. attribute:: sequence
Sequence (aka. line) number for the row, within the parent
batch. This is 1-based so the first row has sequence 1, etc.
.. attribute:: STATUS
Dict of possible row status codes and their human-readable
names.
Each key will be a possible :attr:`status_code` and the
corresponding value will be the human-readable name.
See also :attr:`status_text` for when more detail/subtlety is
needed.
Typically each "key" (code) is also defined as its own
"constant" on the model class. For instance::
from collections import OrderedDict
from wuttjamaican.db import model
class MyBatchRow(model.BatchRowMixin, model.Base):
\""" my custom batch row \"""
STATUS_INVALID = 1
STATUS_GOOD_TO_GO = 2
STATUS = OrderedDict([
(STATUS_INVALID, "invalid"),
(STATUS_GOOD_TO_GO, "good to go"),
])
# TODO: column definitions...
Whereas there is a built-in default for the
:attr:`BatchMixin.STATUS`, there is no built-in default defined
for the ``BatchRowMixin.STATUS``. Subclass must overwrite the
definition entirely, in similar fashion to above.
.. note::
There is not any built-in logic around these integer codes;
subclass can use any the developer prefers.
Of course, once you define one, if any live batches use it,
you should not then change its fundamental meaning (although
you can change the human-readable text).
It's recommended to use
:class:`~python:collections.OrderedDict` (as shown above) to
ensure the possible status codes are displayed in the
correct order, when applicable.
.. attribute:: status_code
Current status code for the row. This indicates if the row is
"good to go" or has "warnings" or is outright "invalid" etc.
This must correspond to an existing key within the
:attr:`STATUS` dict.
See also :attr:`status_text`.
.. attribute:: status_text
Text which may (briefly) further explain the row
:attr:`status_code`, if needed.
For instance, assuming the example :attr:`STATUS` definition
shown above::
row.status_code = row.STATUS_INVALID
row.status_text = "input data for this row is missing fields: foo, bar"
.. attribute:: modified
Last modification time of the row. This should be
automatically set when the row is first created, as well as
anytime it's updated thereafter.
"""
uuid = uuid_column()
@declared_attr
def __table_args__(cls):
return cls.__default_table_args__()
@classmethod
def __default_table_args__(cls):
return cls.__batchrow_table_args__()
@classmethod
def __batchrow_table_args__(cls):
batch_table = cls.__batch_class__.__tablename__
return (
sa.ForeignKeyConstraint(['batch_uuid'], [f'{batch_table}.uuid']),
)
batch_uuid = sa.Column(UUID(), nullable=False)
@declared_attr
def batch(cls):
batch_class = cls.__batch_class__
row_class = cls
batch_class.__row_class__ = row_class
# must establish `Batch.rows` here instead of from within the
# Batch above, because BatchRow class doesn't yet exist above.
batch_class.rows = orm.relationship(
row_class,
order_by=lambda: row_class.sequence,
collection_class=ordering_list('sequence', count_from=1),
cascade='all, delete-orphan',
back_populates='batch')
# now, here's the `BatchRow.batch`
return orm.relationship(
batch_class,
back_populates='rows')
sequence = sa.Column(sa.Integer(), nullable=False)
STATUS = {}
status_code = sa.Column(sa.Integer(), nullable=True)
status_text = sa.Column(sa.String(length=255), nullable=True)
modified = sa.Column(sa.DateTime(timezone=True), nullable=True,
default=datetime.datetime.now,
onupdate=datetime.datetime.now)

View file

@ -0,0 +1,52 @@
# -*- coding: utf-8; -*-
import uuid as _uuid
from wuttjamaican.testing import DataTestCase
try:
import sqlalchemy as sa
from wuttjamaican.db import model
from wuttjamaican.db.model import batch as mod
except ImportError:
pass
else:
class TestBatchMixin(DataTestCase):
def test_basic(self):
class MyBatch(mod.BatchMixin, model.Base):
__tablename__ = 'testing_mybatch'
model.Base.metadata.create_all(bind=self.session.bind)
metadata = sa.MetaData()
metadata.reflect(self.session.bind)
self.assertIn('testing_mybatch', metadata.tables)
batch = MyBatch(id=42, uuid=_uuid.UUID('0675cdac-ffc9-7690-8000-6023de1c8cfd'))
self.assertEqual(repr(batch), "MyBatch(uuid=UUID('0675cdac-ffc9-7690-8000-6023de1c8cfd'))")
self.assertEqual(str(batch), "00000042")
class TestBatchRowMixin(DataTestCase):
def test_basic(self):
class MyBatch2(mod.BatchMixin, model.Base):
__tablename__ = 'testing_mybatch2'
class MyBatchRow2(mod.BatchRowMixin, model.Base):
__tablename__ = 'testing_mybatch_row2'
__batch_class__ = MyBatch2
model.Base.metadata.create_all(bind=self.session.bind)
metadata = sa.MetaData()
metadata.reflect(self.session.bind)
self.assertIn('testing_mybatch2', metadata.tables)
self.assertIn('testing_mybatch_row2', metadata.tables)
# nb. this gives coverage but doesn't really test much
batch = MyBatch2(id=42, uuid=_uuid.UUID('0675cdac-ffc9-7690-8000-6023de1c8cfd'))
row = MyBatchRow2()
batch.rows.append(row)

70
tests/test_batch.py Normal file
View file

@ -0,0 +1,70 @@
# -*- coding: utf-8; -*-
from wuttjamaican import batch as mod
try:
import sqlalchemy as sa
from wuttjamaican.db import model
from wuttjamaican.testing import DataTestCase
except ImportError:
pass
else:
class MockBatch(model.BatchMixin, model.Base):
__tablename__ = 'testing_batch_mock'
class MockBatchRow(model.BatchRowMixin, model.Base):
__tablename__ = 'testing_batch_mock_row'
__batch_class__ = MockBatch
class MockBatchHandler(mod.BatchHandler):
model_class = MockBatch
class TestBatchHandler(DataTestCase):
def make_handler(self, **kwargs):
return MockBatchHandler(self.config, **kwargs)
def test_model_class(self):
handler = mod.BatchHandler(self.config)
self.assertRaises(NotImplementedError, getattr, handler, 'model_class')
def test_make_batch(self):
handler = self.make_handler()
batch = handler.make_batch(self.session)
self.assertIsInstance(batch, MockBatch)
def test_consume_batch_id(self):
handler = self.make_handler()
first = handler.consume_batch_id(self.session)
second = handler.consume_batch_id(self.session)
self.assertEqual(second, first + 1)
third = handler.consume_batch_id(self.session, as_str=True)
self.assertEqual(third, f'{first + 2:08d}')
def test_should_populate(self):
handler = self.make_handler()
batch = handler.make_batch(self.session)
self.assertFalse(handler.should_populate(batch))
def test_do_populate(self):
handler = self.make_handler()
batch = handler.make_batch(self.session)
# nb. coverage only; tests nothing
handler.do_populate(batch)
def test_make_row(self):
handler = self.make_handler()
row = handler.make_row()
self.assertIsInstance(row, MockBatchRow)
def test_add_row(self):
handler = self.make_handler()
batch = handler.make_batch(self.session)
self.session.add(batch)
row = handler.make_row()
self.assertIsNone(batch.row_count)
handler.add_row(batch, row)
self.assertEqual(batch.row_count, 1)