From 3585eca65b79c0d08155edaf63131898bf33ad70 Mon Sep 17 00:00:00 2001 From: Lance Edgar Date: Sat, 14 Dec 2024 19:39:02 -0600 Subject: [PATCH] fix: add basic execution methods for batch handler also logic for batch data files, and deletion --- docs/conf.py | 1 + docs/glossary.rst | 6 + pyproject.toml | 1 + src/wuttjamaican/app.py | 18 +++ src/wuttjamaican/batch.py | 233 ++++++++++++++++++++++++++++- src/wuttjamaican/db/model/batch.py | 9 +- tests/test_app.py | 8 + tests/test_batch.py | 128 ++++++++++++++++ 8 files changed, 402 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 56ce5da..8dbfd79 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,6 +30,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] intersphinx_mapping = { 'alembic': ('https://alembic.sqlalchemy.org/en/latest/', None), + 'humanize': ('https://humanize.readthedocs.io/en/stable/', None), 'mako': ('https://docs.makotemplates.org/en/latest/', None), 'packaging': ('https://packaging.python.org/en/latest/', None), 'python': ('https://docs.python.org/3/', None), diff --git a/docs/glossary.rst b/docs/glossary.rst index 137aefe..58484a2 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -112,6 +112,12 @@ Glossary "inventory batch" would use another. And each "type" would be managed by its own :term:`batch handler`. + The batch type is set on the model class but is also available on + the handler: + + * :attr:`wuttjamaican.db.model.batch.BatchMixin.batch_type` + * :attr:`wuttjamaican.batch.BatchHandler.batch_type` + command A top-level command line interface for the app. Note that top-level commands don't usually "do" anything per se, and are diff --git a/pyproject.toml b/pyproject.toml index ec67103..efb2cfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ ] requires-python = ">= 3.8" dependencies = [ + "humanize", 'importlib-metadata; python_version < "3.10"', "importlib_resources ; python_version < '3.9'", "Mako", diff --git a/src/wuttjamaican/app.py b/src/wuttjamaican/app.py index 19766eb..102d5db 100644 --- a/src/wuttjamaican/app.py +++ b/src/wuttjamaican/app.py @@ -24,11 +24,14 @@ WuttJamaican - app handler """ +import datetime import importlib import os import sys import warnings +import humanize + from wuttjamaican.util import (load_entry_points, load_object, make_title, make_uuid, make_true_uuid, progress_loop, resource_path) @@ -714,6 +717,21 @@ class AppHandler: if value is not None: return value.strftime(self.display_format_datetime) + def render_time_ago(self, value): + """ + Return a human-friendly string, indicating how long ago + something occurred. + + Default logic uses :func:`humanize:humanize.naturaltime()` for + the rendering. + + :param value: Instance of :class:`python:datetime.datetime` or + :class:`python:datetime.timedelta`. + + :returns: Text to display. + """ + return humanize.naturaltime(value) + ############################## # getters for other handlers ############################## diff --git a/src/wuttjamaican/batch.py b/src/wuttjamaican/batch.py index ec0f708..98325ff 100644 --- a/src/wuttjamaican/batch.py +++ b/src/wuttjamaican/batch.py @@ -24,6 +24,10 @@ Batch Handlers """ +import datetime +import os +import shutil + from wuttjamaican.app import GenericHandler @@ -58,6 +62,17 @@ class BatchHandler(GenericHandler): raise NotImplementedError("You must set the 'model_class' attribute " f"for class '{self.__class__.__name__}'") + @property + def batch_type(self): + """ + Convenience property to return the :term:`batch type` which + the current handler is meant to process. + + This is effectively an alias to + :attr:`~wuttjamaican.db.model.batch.BatchMixin.batch_type`. + """ + return self.model_class.batch_type + def make_batch(self, session, progress=None, **kwargs): """ Make and return a new batch (:attr:`model_class`) instance. @@ -124,6 +139,71 @@ class BatchHandler(GenericHandler): see instead :meth:`populate()`. """ + def get_data_path(self, batch=None, filename=None, makedirs=False): + """ + Returns a path to batch data file(s). + + This can be used to return any of the following, depending on + how it's called: + + * path to root data dir for handler's :attr:`batch_type` + * path to data dir for specific batch + * path to specific filename, for specific batch + + For instance:: + + # nb. assuming batch_type = 'inventory' + batch = handler.make_batch(session, created_by=user) + + handler.get_data_path() + # => env/app/data/batch/inventory + + handler.get_data_path(batch) + # => env/app/data/batch/inventory/03/7721fe56c811ef9223743af49773a4 + + handler.get_data_path(batch, 'counts.csv') + # => env/app/data/batch/inventory/03/7721fe56c811ef9223743af49773a4/counts.csv + + :param batch: Optional batch instance. If specified, will + return path for this batch in particular. Otherwise will + return the "generic" path for handler's batch type. + + :param filename: Optional filename, in context of the batch. + If set, the returned path will include this filename. Only + relevant if ``batch`` is also specified. + + :param makedirs: Whether the folder(s) should be created, if + not already present. + + :returns: Path to root data dir for handler's batch type. + """ + # get root storage path + rootdir = self.config.get(f'{self.config.appname}.batch.storage_path') + if not rootdir: + appdir = self.app.get_appdir() + rootdir = os.path.join(appdir, 'data', 'batch') + + # get path for this batch type + path = os.path.join(rootdir, self.batch_type) + + # give more precise path, if batch was specified + if batch: + uuid = batch.uuid.hex + # nb. we use *last 2 chars* for first part of batch uuid + # path. this is because uuid7 is mostly sequential, so + # first 2 chars do not vary enough. + path = os.path.join(path, uuid[-2:], uuid[:-2]) + + # maybe create data dir + if makedirs and not os.path.exists(path): + os.makedirs(path) + + # append filename if applicable + if batch and filename: + path = os.path.join(path, filename) + + return path + def should_populate(self, batch): """ Must return true or false, indicating whether the given batch @@ -208,10 +288,161 @@ class BatchHandler(GenericHandler): to do any of the following (etc.): * fetch latest "live" data for comparison with batch input data - * calculate some data values based on the previous step + * (re-)calculate row values based on latest data * set row status based on other row attributes This method is called when the row is first added to the batch via :meth:`add_row()` - but may be called multiple times after that depending on the workflow. """ + + def why_not_execute(self, batch, user=None, **kwargs): + """ + Returns text indicating the reason (if any) that a given batch + should *not* be executed. + + By default the only reason a batch cannot be executed, is if + it has already been executed. But in some cases it should be + more restrictive; hence this method. + + A "brief but descriptive" message should be returned, which + may be displayed to the user e.g. so they understand why the + execute feature is not allowed for the batch. (There is no + need to check if batch is already executed since other logic + handles that.) + + If no text is returned, the assumption will be made that this + batch is safe to execute. + + :param batch: The batch in question; potentially eligible for + execution. + + :param user: :class:`~wuttjamaican.db.model.auth.User` who + might choose to execute the batch. + + :param \**kwargs: Execution kwargs for the batch, if known. + Should be similar to those for :meth:`execute()`. + + :returns: Text reason to prevent execution, or ``None``. + + The user interface should normally check this and if it + returns anything, that should be shown and the user should be + prevented from executing the batch. + + However :meth:`do_execute()` will also call this method, and + raise a ``RuntimeError`` if text was returned. This is done + out of safety, to avoid relying on the user interface. + """ + + def describe_execution(self, batch, user=None, **kwargs): + """ + This should return some text which briefly describes what will + happen when the given batch is executed. + + Note that Markdown is supported here, e.g.:: + + def describe_execution(self, batch, **kwargs): + return \""" + + This batch does some crazy things! + + **you cannot possibly fathom it** + + here are a few of them: + + - first + - second + - third + \""" + + Nothing is returned by default; subclass should define. + + :param batch: The batch in question; eligible for execution. + + :param user: Reference to current user who might choose to + execute the batch. + + :param \**kwargs: Execution kwargs for the batch; should be + similar to those for :meth:`execute()`. + + :returns: Markdown text describing batch execution. + """ + + def do_execute(self, batch, user, progress=None, **kwargs): + """ + Perform the execution steps for a batch. + + This first calls :meth:`why_not_execute()` to make sure this + is even allowed. + + If so, it calls :meth:`execute()` and then updates + :attr:`~wuttjamaican.db.model.batch.BatchMixin.executed` and + :attr:`~wuttjamaican.db.model.batch.BatchMixin.executed_by` on + the batch, to reflect current time+user. + + So, callers should use ``do_execute()``, and subclass should + override :meth:`execute()`. + + :param batch: The :term:`batch` to execute; instance of + :class:`~wuttjamaican.db.model.batch.BatchMixin` (among + other classes). + + :param user: :class:`~wuttjamaican.db.model.auth.User` who is + executing the batch. + + :param progress: Optional progress indicator factory. + + :param \**kwargs: Additional kwargs as needed. These are + passed as-is to :meth:`why_not_execute()` and + :meth:`execute()`. + """ + if batch.executed: + raise ValueError(f"batch has already been executed: {batch}") + + reason = self.why_not_execute(batch, user=user, **kwargs) + if reason: + raise RuntimeError(f"batch execution not allowed: {reason}") + + self.execute(batch, user=user, progress=progress, **kwargs) + batch.executed = datetime.datetime.now() + batch.executed_by = user + + def execute(self, batch, user=None, progress=None, **kwargs): + """ + Execute the given batch. + + Callers should use :meth:`do_execute()` instead, which calls + this method automatically. + + This does nothing by default; subclass must define logic. + + :param batch: A :term:`batch`; instance of + :class:`~wuttjamaican.db.model.batch.BatchMixin` (among + other classes). + + :param user: :class:`~wuttjamaican.db.model.auth.User` who is + executing the batch. + + :param progress: Optional progress indicator factory. + + :param \**kwargs: Additional kwargs which may affect the batch + execution behavior. There are none by default, but some + handlers may declare/use them. + """ + + def do_delete(self, batch, user, dry_run=False, progress=None, **kwargs): + """ + Delete the given batch entirely. + + This will delete the batch proper, all data rows, and any + files which may be associated with it. + """ + session = self.app.get_session(batch) + + # remove data files + path = self.get_data_path(batch) + if os.path.exists(path) and not dry_run: + shutil.rmtree(path) + + # remove batch proper + session.delete(batch) diff --git a/src/wuttjamaican/db/model/batch.py b/src/wuttjamaican/db/model/batch.py index 76801b8..f6ba154 100644 --- a/src/wuttjamaican/db/model/batch.py +++ b/src/wuttjamaican/db/model/batch.py @@ -47,6 +47,13 @@ class BatchMixin: handler` must be defined, which is able to process data for that :term:`batch type`. + .. attribute:: batch_type + + This is the canonical :term:`batch type` for the batch model. + + By default this will match the underlying table name for the + batch, but the model class can set it explicitly to override. + .. attribute:: __row_class__ Reference to the specific :term:`data model` class used for the @@ -194,7 +201,7 @@ class BatchMixin: ) @declared_attr - def batch_key(cls): + def batch_type(cls): return cls.__tablename__ uuid = uuid_column() diff --git a/tests/test_app.py b/tests/test_app.py index ca8c952..71a4066 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -434,6 +434,14 @@ app_title = WuttaTest dt = datetime.datetime(2024, 12, 11, 8, 30, tzinfo=datetime.timezone.utc) self.assertEqual(self.app.render_datetime(dt), '2024-12-11 08:30+0000') + def test_render_time_ago(self): + with patch.object(mod, 'humanize') as humanize: + humanize.naturaltime.return_value = 'now' + now = datetime.datetime.now() + result = self.app.render_time_ago(now) + self.assertEqual(result, 'now') + humanize.naturaltime.assert_called_once_with(now) + def test_get_person(self): people = self.app.get_people_handler() with patch.object(people, 'get_person') as get_person: diff --git a/tests/test_batch.py b/tests/test_batch.py index b29c42b..6075fd0 100644 --- a/tests/test_batch.py +++ b/tests/test_batch.py @@ -1,5 +1,8 @@ # -*- coding: utf-8; -*- +import os +from unittest.mock import patch + from wuttjamaican import batch as mod try: @@ -29,6 +32,11 @@ else: handler = mod.BatchHandler(self.config) self.assertRaises(NotImplementedError, getattr, handler, 'model_class') + def test_batch_type(self): + with patch.object(mod.BatchHandler, 'model_class', new=MockBatch): + handler = mod.BatchHandler(self.config) + self.assertEqual(handler.batch_type, 'testing_batch_mock') + def test_make_batch(self): handler = self.make_handler() batch = handler.make_batch(self.session) @@ -44,6 +52,44 @@ else: third = handler.consume_batch_id(self.session, as_str=True) self.assertEqual(third, f'{first + 2:08d}') + def test_get_data_path(self): + model = self.app.model + user = model.User(username='barney') + self.session.add(user) + + with patch.object(mod.BatchHandler, 'model_class', new=MockBatch): + handler = self.make_handler() + + # root storage (default) + with patch.object(self.app, 'get_appdir', return_value=self.tempdir): + path = handler.get_data_path() + self.assertEqual(path, os.path.join(self.tempdir, 'data', 'batch', 'testing_batch_mock')) + + # root storage (configured) + self.config.setdefault('wutta.batch.storage_path', self.tempdir) + path = handler.get_data_path() + self.assertEqual(path, os.path.join(self.tempdir, 'testing_batch_mock')) + + batch = handler.make_batch(self.session, created_by=user) + self.session.add(batch) + self.session.flush() + + # batch-specific + path = handler.get_data_path(batch) + uuid = batch.uuid.hex + final = os.path.join(uuid[-2:], uuid[:-2]) + self.assertEqual(path, os.path.join(self.tempdir, 'testing_batch_mock', final)) + + # with filename + path = handler.get_data_path(batch, 'input.csv') + self.assertEqual(path, os.path.join(self.tempdir, 'testing_batch_mock', final, 'input.csv')) + + # makedirs + path = handler.get_data_path(batch) + self.assertFalse(os.path.exists(path)) + path = handler.get_data_path(batch, makedirs=True) + self.assertTrue(os.path.exists(path)) + def test_should_populate(self): handler = self.make_handler() batch = handler.make_batch(self.session) @@ -68,3 +114,85 @@ else: self.assertIsNone(batch.row_count) handler.add_row(batch, row) self.assertEqual(batch.row_count, 1) + + def test_do_execute(self): + model = self.app.model + user = model.User(username='barney') + self.session.add(user) + + handler = self.make_handler() + batch = handler.make_batch(self.session, created_by=user) + self.session.add(batch) + self.session.flush() + + # error if execution not allowed + with patch.object(handler, 'why_not_execute', return_value="bad batch"): + self.assertRaises(RuntimeError, handler.do_execute, batch, user) + + # nb. coverage only; tests nothing + self.assertIsNone(batch.executed) + self.assertIsNone(batch.executed_by) + handler.do_execute(batch, user) + self.assertIsNotNone(batch.executed) + self.assertIs(batch.executed_by, user) + + # error if execution already happened + self.assertRaises(ValueError, handler.do_execute, batch, user) + + def test_do_delete(self): + model = self.app.model + handler = self.make_handler() + + user = model.User(username='barney') + self.session.add(user) + + # simple delete + batch = handler.make_batch(self.session, created_by=user) + self.session.add(batch) + self.session.flush() + self.assertEqual(self.session.query(MockBatch).count(), 1) + handler.do_delete(batch, user) + self.assertEqual(self.session.query(MockBatch).count(), 0) + + # delete w/ rows + batch = handler.make_batch(self.session, created_by=user) + self.session.add(batch) + for i in range(5): + row = handler.make_row() + handler.add_row(batch, row) + self.session.flush() + self.assertEqual(self.session.query(MockBatch).count(), 1) + handler.do_delete(batch, user) + self.assertEqual(self.session.query(MockBatch).count(), 0) + + # delete w/ files + self.config.setdefault('wutta.batch.storage_path', self.tempdir) + batch = handler.make_batch(self.session, created_by=user) + self.session.add(batch) + self.session.flush() + path = handler.get_data_path(batch, 'data.txt', makedirs=True) + with open(path, 'wt') as f: + f.write('foo=bar') + self.assertEqual(self.session.query(MockBatch).count(), 1) + path = handler.get_data_path(batch) + self.assertTrue(os.path.exists(path)) + handler.do_delete(batch, user) + self.assertEqual(self.session.query(MockBatch).count(), 0) + self.assertFalse(os.path.exists(path)) + + # delete w/ files (dry-run) + self.config.setdefault('wutta.batch.storage_path', self.tempdir) + batch = handler.make_batch(self.session, created_by=user) + self.session.add(batch) + self.session.flush() + path = handler.get_data_path(batch, 'data.txt', makedirs=True) + with open(path, 'wt') as f: + f.write('foo=bar') + self.assertEqual(self.session.query(MockBatch).count(), 1) + path = handler.get_data_path(batch) + self.assertTrue(os.path.exists(path)) + handler.do_delete(batch, user, dry_run=True) + # nb. batch appears missing from session even in dry-run + self.assertEqual(self.session.query(MockBatch).count(), 0) + # nb. but its files remain intact + self.assertTrue(os.path.exists(path))