diff --git a/CHANGELOG.md b/CHANGELOG.md index 22559be..05f5ef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ All notable changes to WuttaSync will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## v0.2.0 (2024-12-07) + +### Feat + +- add `wutta import-csv` command + +### Fix + +- expose `ToWuttaHandler`, `ToWutta` in `wuttasync.importing` namespace +- implement deletion logic; add cli params for max changes +- add `--key` (or `--keys`) param for import/export commands +- add `--list-models` option for import/export commands +- require latest wuttjamaican +- add `--fields` and `--exclude` params for import/export cli + ## v0.1.0 (2024-12-05) ### Feat diff --git a/docs/glossary.rst b/docs/glossary.rst new file mode 100644 index 0000000..c58e3d6 --- /dev/null +++ b/docs/glossary.rst @@ -0,0 +1,30 @@ +.. _glossary: + +Glossary +======== + +.. glossary:: + :sorted: + + import handler + This a type of :term:`handler` which is responsible for a + particular set of data import/export task(s). + + The import handler manages data connections and transactions, and + invokes one or more :term:`importers ` to process the + data. See also :ref:`import-handler-vs-importer`. + + Note that "import/export handler" is the more proper term to use + here but it is often shortened to just "import handler" for + convenience. + + importer + This refers to a Python class/instance responsible for processing + a particular :term:`data model` for an import/export job. + + For instance there is usually one importer per table, when + importing to the :term:`app database` (regardless of source). + See also :ref:`import-handler-vs-importer`. + + Note that "importer/exporter" is the more proper term to use here + but it is often shortened to just "importer" for convenience. diff --git a/docs/index.rst b/docs/index.rst index b8bf248..ea00f77 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,9 +5,11 @@ WuttaSync This package adds data import/export and real-time sync utilities for the `Wutta Framework `_. -The primary use cases here are: +*(NB. the real-time sync has not been added yet.)* -* keep "operational" data in sync between e.g. various business systems +The primary use cases in mind are: + +* keep operational data in sync between various business systems * import data from user-specified file * export to file @@ -22,8 +24,11 @@ database`, it may be used for any "source → target" data flow. :maxdepth: 2 :caption: Documentation + glossary narr/install - narr/cli + narr/cli/index + narr/concepts + narr/custom/index .. toctree:: :maxdepth: 1 diff --git a/docs/narr/cli.rst b/docs/narr/cli/builtin.rst similarity index 74% rename from docs/narr/cli.rst rename to docs/narr/cli/builtin.rst index 4bee64d..0630c94 100644 --- a/docs/narr/cli.rst +++ b/docs/narr/cli/builtin.rst @@ -1,10 +1,12 @@ -Built-in Commands -================= +=================== + Built-in Commands +=================== -WuttaSync adds some built-in ``wutta`` :term:`subcommands `. +Below are the :term:`subcommands ` which come with +WuttaSync. -See also :doc:`wuttjamaican:narr/cli/index`. +It is fairly simple to add more; see :doc:`custom`. .. _wutta-import-csv: diff --git a/docs/narr/cli/custom.rst b/docs/narr/cli/custom.rst new file mode 100644 index 0000000..837a70c --- /dev/null +++ b/docs/narr/cli/custom.rst @@ -0,0 +1,64 @@ + +================= + Custom Commands +================= + +This section describes how to add a custom :term:`subcommand` which +wraps a particular :term:`import handler`. + +See also :doc:`wuttjamaican:narr/cli/custom` for more information +on the general concepts etc. + + +Basic Import/Export +------------------- + +Here we'll assume you have a typical "Poser" app based on Wutta +Framework, and the "Foo → Poser" (``FromFooToPoser`` handler) import +logic is defined in the ``poser.importing.foo`` module. + +We'll also assume you already have a ``poser`` top-level +:term:`command` (in ``poser.cli``), and our task now is to add the +``poser import-foo`` subcommand to wrap the import handler. + +And finally we'll assume this is just a "typical" import handler and +we do not need any custom CLI params exposed. + +Here is the code and we'll explain below:: + + from poser.cli import poser_typer + from wuttasync.cli import import_command, ImportCommandHandler + + @poser_typer.command() + @import_command + def import_foo(ctx, **kwargs): + """ + Import data from Foo API to Poser DB + """ + config = ctx.parent.wutta_config + handler = ImportCommandHandler( + config, import_handler='poser.importing.foo:FromFooToPoser') + handler.run(ctx.params) + +Hopefully it's straightforward but to be clear: + +* subcommand is really just a function, **with desired name** +* wrap with ``@poser_typer.command()`` to register as subcomand +* wrap with ``@import_command`` to get typical CLI params +* call ``ImportCommandHandler.run()`` with import handler spec + +So really - in addition to +:func:`~wuttasync.cli.base.import_command()` - the +:class:`~wuttasync.cli.base.ImportCommandHandler` is doing the heavy +lifting for all import/export subcommands, it just needs to know which +:term:`import handler` to use. + +.. note:: + + If your new subcommand is defined in a different module than is the + top-level command (e.g. as in example above) then you may need to + "eagerly" import the subcommand module. (Otherwise auto-discovery + may not find it.) + + This is usually done from within the top-level command's module, + since it is always imported early due to the entry point. diff --git a/docs/narr/cli/index.rst b/docs/narr/cli/index.rst new file mode 100644 index 0000000..96be6c7 --- /dev/null +++ b/docs/narr/cli/index.rst @@ -0,0 +1,23 @@ + +======================== + Command Line Interface +======================== + +The primary way of using the import/export framework day to day is via +the command line. + +WuttJamaican defines the ``wutta`` :term:`command` and WuttaSync comes +with some extra :term:`subcommands ` for importing to / +exporting from the Wutta :term:`app database`. + +It is fairly simple to add a dedicated subcommand for any +:term:`import handler`; see below. + +And for more general info about CLI see +:doc:`wuttjamaican:narr/cli/index`. + +.. toctree:: + :maxdepth: 2 + + builtin + custom diff --git a/docs/narr/concepts.rst b/docs/narr/concepts.rst new file mode 100644 index 0000000..93d09a3 --- /dev/null +++ b/docs/narr/concepts.rst @@ -0,0 +1,54 @@ + +Concepts +======== + +Things hopefully are straightforward but it's important to get the +following straight in your head; the rest will come easier if you do. + + +Source vs. Target +----------------- + +Data always flows from source to target, it is the #1 rule. + +Docs and command output will always reflect this, e.g. **CSV → +Wutta**. + +Source and target can be anything as long as the :term:`import +handler` and :term:`importer(s) ` implement the desired +logic. The :term:`app database` is often involved but not always. + + +Import vs. Export +----------------- + +Surprise, there is no difference. After all from target's perspective +everything is really an import. + +Sometimes it's more helpful to think of it as an export, e.g. **Wutta +→ CSV** really seems like an export. In such cases the +:attr:`~wuttasync.importing.handlers.ImportHandler.orientation` may be +set to reflect the distinction. + + +.. _import-handler-vs-importer: + +Import Handler vs. Importer +--------------------------- + +The :term:`import handler` is sort of the "wrapper" around one or more +:term:`importers ` and the latter contain the table-specific +sync logic. + +In a DB or similar context, the import handler will make the +connection, then invoke all requested importers, then commit +transaction at the end (or rollback if dry-run). + +And each importer will read data from source, and usually also read +data from target, then compare data sets and finally write data to +target as needed. But each would usually do this for just one table. + +See also the base classes for each: + +* :class:`~wuttasync.importing.handlers.ImportHandler` +* :class:`~wuttasync.importing.base.Importer` diff --git a/docs/narr/custom/command.rst b/docs/narr/custom/command.rst new file mode 100644 index 0000000..39eaeae --- /dev/null +++ b/docs/narr/custom/command.rst @@ -0,0 +1,9 @@ + +Define Command +============== + +Now that you have defined the import handler plus any importers +required, you'll want to define a command line interface to use it. + +This section is here for completeness but the process is described +elsewhere; see :doc:`/narr/cli/custom`. diff --git a/docs/narr/custom/conventions.rst b/docs/narr/custom/conventions.rst new file mode 100644 index 0000000..3ce686a --- /dev/null +++ b/docs/narr/custom/conventions.rst @@ -0,0 +1,90 @@ + +Conventions +=========== + +Below are recommended conventions for structuring and naming the files +in your project relating to import/export. + +The intention for these rules is that they are "intuitive" based on +the fact that all data flows from source to target and therefore can +be thought of as "importing" in virtually all cases. + +But there are a lot of edge cases out there so YMMV. + + +"The Rules" +----------- + +There are exceptions to these of course, but in general: + +* regarding how to think about these conventions: + + * always look at it from target's perspective + + * always look at it as an *import*, not export + +* "final" logic is always a combo of: + + * "base" logic for how target data read/write happens generally + + * "specific" logic for how that happens using a particular data source + +* targets each get their own subpackage within project + + * and within that, also an ``importing`` (nested) subpackage + + * and within *that* is where the files live, referenced next + + * target ``model.py`` should contain ``ToTarget`` importer base class + + * also may have misc. per-model base classes, e.g. ``WidgetImporter`` + + * also may have ``ToTargetHandler`` base class if applicable + + * sources each get their own module, named after the source + + * should contain the "final" handler class, e.g. ``FromSourceToTarget`` + + * also contains "final" importer classes needed by handler (e.g. ``WidgetImporter``) + + +Example +------- + +That's a lot of rules so let's see it. Here we assume a Wutta-based +app named Poser and it integrates with a Foo API in the cloud. Data +should flow both ways so we will be thinking of this as: + +* **Foo → Poser import** +* **Poser → Foo export** + +Here is the suggested file layout: + +.. code-block:: none + + poser/ + ├── foo/ + │ ├── __init__.py + │ ├── api.py + │ └── importing/ + │ ├── __init__.py + │ ├── model.py + │ └── poser.py + └── importing/ + ├── __init__.py + ├── foo.py + └── model.py + +And the module breakdown: + +* ``poser.foo.api`` has e.g. ``FooAPI`` interface logic + +**Foo → Poser import** (aka. "Poser imports from Foo") + +* ``poser.importing.model`` has ``ToPoserHandler``, ``ToPoser`` and per-model base importers +* ``poser.importing.foo`` has ``FromFooToPoser`` plus final importers + +**Poser → Foo export** (aka. "Foo imports from Poser") + +* ``poser.foo.importing.model`` has ``ToFooHandler``, ``ToFoo`` and per-model base importer +* ``poser.foo.importing.poser`` has ``FromPoserToFoo`` plus final importers diff --git a/docs/narr/custom/handler.rst b/docs/narr/custom/handler.rst new file mode 100644 index 0000000..cb2b74d --- /dev/null +++ b/docs/narr/custom/handler.rst @@ -0,0 +1,93 @@ + +Define Import Handler +===================== + +The obvious step here is to define a new :term:`import handler`, which +ultimately inherits from +:class:`~wuttasync.importing.handlers.ImportHandler`. But the choice +of which class(es) *specifically* to inherit from, is a bit more +complicated. + + +Choose the Base Class(es) +------------------------- + +If all else fails, or to get started simply, you can always just +inherit from :class:`~wuttasync.importing.handlers.ImportHandler` +directly as the only base class. You'll have to define any methods +needed to implement desired behavior. + +However depending on your particular source and/or target, there may +be existing base classes defined somewhere from which you can inherit. +This may save you some effort, and/or is just a good idea to share +code where possible. + +Keep in mind your import handler can inherit from multiple base +classes, and often will - one base for the source side, and another +for the target side. For instance:: + + from wuttasync.importing import FromFileHandler, ToWuttaHandler + + class FromExcelToPoser(FromFileHandler, ToWuttaHandler): + """ + Handler for Excel file → Poser app DB + """ + +You generally will still need to define/override some methods to +customize behavior. + +All built-in base classes live under :mod:`wuttasync.importing`. + + +.. _register-importer: + +Register Importer(s) +-------------------- + +If nothing else, most custom handlers must override +:meth:`~wuttasync.importing.handlers.ImportHandler.define_importers()` +to "register" importer(s) as appropriate. There are two primary goals +here: + +* add "new" (totally custom) importers +* override "existing" importers (inherited from base class) + +Obviously for this to actually work the importer(s) must exist in +code; see :doc:`importer`. + +As an example let's say there's a ``FromFooToWutta`` handler which +defines a ``Widget`` importer. + +And let's say you want to customize that, by tweaking slightly the +logic for ``WigdetImporter`` and adding a new ``SprocketImporter``:: + + from somewhere_else import (FromFooToWutta, ToWutta, + WidgetImporter as WidgetImporterBase) + + class FromFooToPoser(FromFooToWutta): + """ + Handler for Foo -> Poser + """ + + def define_importers(self): + + # base class defines the initial set + importers = super().define_importers() + + # override widget importer + importers['Widget'] = WidgetImporter + + # add sprocket importer + importers['Sprocket'] = SprocketImporter + + return importers + + class SprocketImporter(ToWutta): + """ + Sprocket importer for Foo -> Poser + """ + + class WidgetImporter(WidgetImporterBase): + """ + Widget importer for Foo -> Poser + """ diff --git a/docs/narr/custom/importer.rst b/docs/narr/custom/importer.rst new file mode 100644 index 0000000..c9b6674 --- /dev/null +++ b/docs/narr/custom/importer.rst @@ -0,0 +1,149 @@ + +Define Importer(s) +================== + +Here we'll describe how to make a custom :term:`importer/exporter +`, which can process a given :term:`data model`. + +.. + The example will assume a **Foo → Poser import** for the ``Widget`` + :term:`data model`. + + +Choose the Base Class(es) +------------------------- + +As with the :term:`import handler`, the importer "usually" will have +two base classes: one for the target side and another for the source. + +The base class for target side is generally more fleshed out, with +logic to read/write data for the given target model. Whereas the base +class for the source side could just be a stub. In the latter case, +one might choose to skip it and inherit only from the target base +class. + +In any case the final importer class you define can override any/all +logic from either base class if needed. + + +Example: Foo → Poser import +--------------------------- + +Here we'll assume a Wutta-based app named "Poser" which will be +importing "Widget" data from the "Foo API" cloud service. + +In this case we will inherit from a base class for the target side, +which already knows how to talk to the :term:`app database` via +SQLAlchemy ORM. + +But for the source side, there is no existing base class for the Foo +API service, since that is just made-up - so we will also define our +own base class for that:: + + from wuttasync.importing import Importer, ToWutta + + # nb. this is not real of course, but an example + from poser.foo.api import FooAPI + + class FromFoo(Importer): + """ + Base class for importers using Foo API as source + """ + + def setup(self): + """ + Establish connection to Foo API + """ + self.foo_api = FooAPI(self.config) + + class WidgetImporter(FromFoo, ToWutta): + """ + Widget importer for Foo -> Poser + """ + + def get_source_objects(self): + """ + Fetch all "raw" widgets from Foo API + """ + # nb. also not real, just example + return self.foo_api.get_widgets() + + def normalize_source_object(self, widget): + """ + Convert the "raw" widget we receive from Foo API, to a + "normalized" dict with data for all fields which are part of + the processing request. + """ + return { + 'id': widget.id, + 'name': widget.name, + } + + +Example: Poser → Foo export +--------------------------- + +In the previous scenario we imported data from Foo to Poser, and here +we'll do the reverse, exporting from Poser to Foo. + +As of writing the base class logic for exporting from Wutta :term:`app +database` does not yet exist. And the Foo API is just made-up so +we'll add one-off base classes for both sides:: + + from wuttasync.importing import Importer + + class FromWutta(Importer): + """ + Base class for importers using Wutta DB as source + """ + + class ToFoo(Importer): + """ + Base class for exporters targeting Foo API + """ + + class WidgetImporter(FromWutta, ToFoo): + """ + Widget exporter for Poser -> Foo + """ + + def get_source_objects(self): + """ + Fetch all widgets from the Poser app DB. + + (see note below regarding the db session) + """ + model = self.app.model + return self.source_session.query(model.Widget).all() + + def normalize_source_object(self, widget): + """ + Convert the "raw" widget from Poser app (ORM) to a + "normalized" dict with data for all fields which are part of + the processing request. + """ + return { + 'id': widget.id, + 'name': widget.name, + } + +Note that the ``get_source_objects()`` method shown above makes use of +a ``source_session`` attribute - where did that come from? + +This is actually not part of the importer proper, but rather this +attribute is set by the :term:`import handler`. And that will ony +happen if the importer is being invoked by a handler which supports +it. So none of that is shown here, but FYI. + +(And again, that logic isn't written yet, but there will "soon" be a +``FromSqlalchemyHandler`` class defined which implements this.) + + +Regster with Import Handler +--------------------------- + +After you define the importer/exporter class (as shown above) you also +must "register" it within the import/export handler. + +This section is here for completeness but the process is described +elsewhere; see :ref:`register-importer`. diff --git a/docs/narr/custom/index.rst b/docs/narr/custom/index.rst new file mode 100644 index 0000000..7e75146 --- /dev/null +++ b/docs/narr/custom/index.rst @@ -0,0 +1,21 @@ + +Custom Import/Export +==================== + +This section explains what's required to make your own import/export +tasks. + +See also :doc:`/narr/concepts` for some terminology etc. + +.. + The examples throughout the sections below will often involve a + theoretical **Foo → Poser** import, where Poser is a typical + Wutta-based app and Foo is some API in the cloud. + +.. toctree:: + :maxdepth: 2 + + conventions + handler + importer + command diff --git a/pyproject.toml b/pyproject.toml index ac28730..a198b0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" [project] name = "WuttaSync" -version = "0.1.0" +version = "0.2.0" description = "Wutta Framework for data import/export and real-time sync" readme = "README.md" authors = [{name = "Lance Edgar", email = "lance@wuttaproject.org"}] diff --git a/src/wuttasync/cli/__init__.py b/src/wuttasync/cli/__init__.py index 70de7ac..c77a4e2 100644 --- a/src/wuttasync/cli/__init__.py +++ b/src/wuttasync/cli/__init__.py @@ -25,11 +25,12 @@ WuttaSync - ``wutta`` subcommands This namespace exposes the following: -* :func:`~wuttasync.cli.base.importer_command()` -* :func:`~wuttasync.cli.base.file_importer_command()` +* :func:`~wuttasync.cli.base.import_command()` +* :func:`~wuttasync.cli.base.file_import_command()` +* :class:`~wuttasync.cli.base.ImportCommandHandler` """ -from .base import importer_command, file_importer_command +from .base import import_command, file_import_command, ImportCommandHandler # nb. must bring in all modules for discovery to work from . import import_csv diff --git a/src/wuttasync/cli/base.py b/src/wuttasync/cli/base.py index 008dd5b..6368c6a 100644 --- a/src/wuttasync/cli/base.py +++ b/src/wuttasync/cli/base.py @@ -25,6 +25,8 @@ """ import inspect +import logging +import sys from pathlib import Path from typing import List, Optional from typing_extensions import Annotated @@ -32,41 +34,167 @@ from typing_extensions import Annotated import makefun import typer +from wuttjamaican.app import GenericHandler +from wuttasync.importing import ImportHandler -def importer_command_template( - # model keys +log = logging.getLogger(__name__) + + +class ImportCommandHandler(GenericHandler): + """ + This is the :term:`handler` responsible for import/export command + line runs. + + Normally, the command (actually :term:`subcommand`) logic will + create this handler and call its :meth:`run()` method. + + This handler does not know how to import/export data, but it knows + how to make its :attr:`import_handler` do it. + + :param import_handler: During construction, caller can specify the + :attr:`import_handler` as any of: + + * import handler instance + * import handler factory (e.g. class) + * import handler spec (cf. :func:`~wuttjamaican:wuttjamaican.util.load_object()`) + + For example:: + + handler = ImportCommandHandler( + config, import_handler='wuttasync.importing.csv:FromCsvToWutta') + """ + + import_handler = None + """ + Reference to the :term:`import handler` instance, which is to be + invoked when command runs. See also :meth:`run()`. + """ + + def __init__(self, config, import_handler=None): + super().__init__(config) + + if import_handler: + if isinstance(import_handler, ImportHandler): + self.import_handler = import_handler + elif callable(import_handler): + self.import_handler = import_handler(self.config) + else: # spec + factory = self.app.load_object(import_handler) + self.import_handler = factory(self.config) + + def run(self, params, progress=None): + """ + Run the import/export job(s) based on command line params. + + This mostly just calls + :meth:`~wuttasync.importing.handlers.ImportHandler.process_data()` + for the :attr:`import_handler`. + + Unless ``--list-models`` was specified on the command line in + which case we do :meth:`list_models()` instead. + + :param params: Dict of params from command line. This must + include a ``'models'`` key, the rest are optional. + + :param progress: Optional progress indicator factory. + """ + + # maybe just list models and bail + if params.get('list_models'): + self.list_models(params) + return + + # otherwise process some data + kw = dict(params) + models = kw.pop('models') + log.debug("using handler: %s", self.import_handler.get_spec()) + # TODO: need to use all/default models if none specified + # (and should know models by now for logging purposes) + log.debug("running %s %s for: %s", + self.import_handler, + self.import_handler.orientation.value, + ', '.join(models)) + log.debug("params are: %s", kw) + self.import_handler.process_data(*models, **kw) + + def list_models(self, params): + """ + Query the :attr:`import_handler`'s supported target models and + print the info to stdout. + + This is what happens when command line has ``--list-models``. + """ + sys.stdout.write("ALL MODELS:\n") + sys.stdout.write("==============================\n") + for key in self.import_handler.importers: + sys.stdout.write(key) + sys.stdout.write("\n") + sys.stdout.write("==============================\n") + + +def import_command_template( + models: Annotated[ Optional[List[str]], typer.Argument(help="Model(s) to process. Can specify one or more, " - "or omit to process all default models.")] = None, + "or omit to process default models.")] = None, + + list_models: Annotated[ + bool, + typer.Option('--list-models', '-l', + help="List available target models and exit.")] = False, - # allow create? create: Annotated[ bool, - typer.Option(help="Allow new target records to be created.")] = True, + typer.Option(help="Allow new target records to be created. " + "See aso --max-create.")] = True, - # allow update? update: Annotated[ bool, - typer.Option(help="Allow existing target records to be updated.")] = True, + typer.Option(help="Allow existing target records to be updated. " + "See also --max-update.")] = True, - # allow delete? delete: Annotated[ bool, - typer.Option(help="Allow existing target records to be deleted.")] = False, + typer.Option(help="Allow existing target records to be deleted. " + "See also --max-delete.")] = False, - # fields fields: Annotated[ str, typer.Option('--fields', - help="List of fields to process. See also --exclude.")] = None, + help="List of fields to process. See also --exclude and --key.")] = None, + excluded_fields: Annotated[ str, typer.Option('--exclude', help="List of fields *not* to process. See also --fields.")] = None, - # dry run? + keys: Annotated[ + str, + typer.Option('--key', '--keys', + help="List of fields to use as record key/identifier. " + "See also --fields.")] = None, + + max_create: Annotated[ + int, + typer.Option(help="Max number of target records to create (per model). " + "See also --create.")] = None, + + max_update: Annotated[ + int, + typer.Option(help="Max number of target records to update (per model). " + "See also --update.")] = None, + + max_delete: Annotated[ + int, + typer.Option(help="Max number of target records to delete (per model). " + "See also --delete.")] = None, + + max_total: Annotated[ + int, + typer.Option(help="Max number of *any* target record changes which may occur (per model).")] = None, + dry_run: Annotated[ bool, typer.Option('--dry-run', @@ -75,22 +203,22 @@ def importer_command_template( ): """ Stub function which provides a common param signature; used with - :func:`importer_command()`. + :func:`import_command()`. """ -def importer_command(fn): +def import_command(fn): """ Decorator for import/export commands. Adds common params based on - :func:`importer_command_template()`. + :func:`import_command_template()`. To use this, e.g. for ``poser import-foo`` command:: from poser.cli import poser_typer - from wuttasync.cli import importer_command + from wuttasync.cli import import_command, ImportCommandHandler @poser_typer.command() - @importer_command + @import_command def import_foo( ctx: typer.Context, **kwargs @@ -98,16 +226,15 @@ def importer_command(fn): \""" Import data from Foo API to Poser DB \""" - from poser.importing.foo import FromFooToPoser - config = ctx.parent.wutta_config - kw = dict(ctx.params) - models = kw.pop('models') - handler = FromFooToPoser(config) - handler.process_data(*models, **kw) + handler = ImportCommandHandler( + config, import_handler='poser.importing.foo:FromFooToPoser') + handler.run(ctx.params) + + See also :class:`ImportCommandHandler`. """ original_sig = inspect.signature(fn) - reference_sig = inspect.signature(importer_command_template) + reference_sig = inspect.signature(import_command_template) params = list(original_sig.parameters.values()) for i, param in enumerate(reference_sig.parameters.values()): @@ -120,7 +247,8 @@ def importer_command(fn): return makefun.create_function(final_sig, fn) -def file_importer_command_template( +def file_import_command_template( + input_file_path: Annotated[ Path, typer.Option('--input-path', @@ -128,27 +256,28 @@ def file_importer_command_template( help="Path to input file(s). Can be a folder " "if app logic can guess the filename(s); " "otherwise must be complete file path.")] = ..., + ): """ Stub function to provide signature for import/export commands which require input file. Used with - :func:`file_importer_command()`. + :func:`file_import_command()`. """ -def file_importer_command(fn): +def file_import_command(fn): """ Decorator for import/export commands which require input file. Adds common params based on - :func:`file_importer_command_template()`. + :func:`file_import_command_template()`. To use this, it's the same method as shown for - :func:`importer_command()` except in this case you would use the - ``file_importer_command`` decorator. + :func:`import_command()` except in this case you would use the + ``file_import_command`` decorator. """ original_sig = inspect.signature(fn) - plain_import_sig = inspect.signature(importer_command_template) - file_import_sig = inspect.signature(file_importer_command_template) + plain_import_sig = inspect.signature(import_command_template) + file_import_sig = inspect.signature(file_import_command_template) desired_params = ( list(plain_import_sig.parameters.values()) + list(file_import_sig.parameters.values())) diff --git a/src/wuttasync/cli/import_csv.py b/src/wuttasync/cli/import_csv.py index 7600d5f..50c2a83 100644 --- a/src/wuttasync/cli/import_csv.py +++ b/src/wuttasync/cli/import_csv.py @@ -30,11 +30,11 @@ import typer from wuttjamaican.cli import wutta_typer -from .base import file_importer_command +from .base import file_import_command, ImportCommandHandler @wutta_typer.command() -@file_importer_command +@file_import_command def import_csv( ctx: typer.Context, **kwargs @@ -42,10 +42,7 @@ def import_csv( """ Import data from CSV file(s) to Wutta DB """ - from wuttasync.importing.csv import FromCsvToWutta - config = ctx.parent.wutta_config - kw = dict(ctx.params) - models = kw.pop('models') - handler = FromCsvToWutta(config) - handler.process_data(*models, **kw) + handler = ImportCommandHandler( + config, import_handler='wuttasync.importing.csv:FromCsvToWutta') + handler.run(ctx.params) diff --git a/src/wuttasync/importing/__init__.py b/src/wuttasync/importing/__init__.py index b43d5d3..03a421f 100644 --- a/src/wuttasync/importing/__init__.py +++ b/src/wuttasync/importing/__init__.py @@ -22,7 +22,27 @@ ################################################################################ """ Data Import / Export Framework + +This namespace exposes the following: + +* :enum:`~wuttasync.importing.handlers.Orientation` + +And some :term:`import handler` base classes: + +* :class:`~wuttasync.importing.handlers.ImportHandler` +* :class:`~wuttasync.importing.handlers.FromFileHandler` +* :class:`~wuttasync.importing.handlers.ToSqlalchemyHandler` +* :class:`~wuttasync.importing.wutta.ToWuttaHandler` + +And some :term:`importer` base classes: + +* :class:`~wuttasync.importing.base.Importer` +* :class:`~wuttasync.importing.base.FromFile` +* :class:`~wuttasync.importing.base.ToSqlalchemy` +* :class:`~wuttasync.importing.model.ToWutta` """ from .handlers import Orientation, ImportHandler, FromFileHandler, ToSqlalchemyHandler from .base import Importer, FromFile, ToSqlalchemy +from .model import ToWutta +from .wutta import ToWuttaHandler diff --git a/src/wuttasync/importing/base.py b/src/wuttasync/importing/base.py index d46ae67..59017d7 100644 --- a/src/wuttasync/importing/base.py +++ b/src/wuttasync/importing/base.py @@ -26,6 +26,7 @@ Data Importer base class import os import logging +from collections import OrderedDict from sqlalchemy import orm from sqlalchemy_utils.functions import get_primary_keys, get_columns @@ -36,6 +37,13 @@ from wuttasync.util import data_diffs log = logging.getLogger(__name__) +class ImportLimitReached(Exception): + """ + Exception raised when an import/export job reaches the max number + of changes allowed. + """ + + class Importer: """ Base class for all data importers / exporters. @@ -174,6 +182,11 @@ class Importer: :meth:`get_target_cache()`. """ + max_create = None + max_update = None + max_delete = None + max_total = None + def __init__(self, config, **kwargs): self.config = config self.app = self.config.get_app() @@ -309,10 +322,17 @@ class Importer: :returns: List of "key" field names. """ - if hasattr(self, 'key'): + keys = None + # nb. prefer 'keys' but use 'key' as fallback + if hasattr(self, 'keys'): + keys = self.keys + elif hasattr(self, 'key'): keys = self.key + if keys: if isinstance(keys, str): - keys = [keys] + keys = self.config.parse_list(keys) + # nb. save for next time + self.keys = keys return keys return list(get_primary_keys(self.model_class)) @@ -347,9 +367,26 @@ class Importer: Note that subclass generally should not override this method, but instead some of the others. - :param source_data: Optional sequence of normalized source - data. If not specified, it is obtained from - :meth:`normalize_source_data()`. + This first calls :meth:`setup()` to prepare things as needed. + + If no source data is specified, it calls + :meth:`normalize_source_data()` to get that. Regardless, it + also calls :meth:`get_unique_data()` to discard any + duplicates. + + If :attr:`caches_target` is set, it calls + :meth:`get_target_cache()` and assigns result to + :attr:`cached_target`. + + Then depending on values for :attr:`create`, :attr:`update` + and :attr:`delete` it may call: + + * :meth:`do_create_update()` + * :meth:`do_delete()` + + And finally it calls :meth:`teardown()` for cleanup. + + :param source_data: Sequence of normalized source data, if known. :param progress: Optional progress indicator factory. @@ -359,13 +396,6 @@ class Importer: * ``created`` - list of records created on the target * ``updated`` - list of records updated on the target * ``deleted`` - list of records deleted on the target - - See also these methods which this one calls: - - * :meth:`setup()` - * :meth:`do_create_update()` - * :meth:`do_delete()` - * :meth:`teardown()` """ # TODO: should add try/catch around this all? and teardown() in finally: clause? self.setup() @@ -373,12 +403,15 @@ class Importer: updated = [] deleted = [] + log.debug("using key fields: %s", ', '.join(self.get_keys())) + # get complete set of normalized source data if source_data is None: source_data = self.normalize_source_data(progress=progress) - # TODO: should exclude duplicate source records - # source_data, unique = self.get_unique_data(source_data) + # nb. prune duplicate records from source data + source_data, source_keys = self.get_unique_data(source_data) + model_title = self.get_model_title() log.debug(f"got %s {model_title} records from source", len(source_data)) @@ -393,7 +426,12 @@ class Importer: # delete target data if self.delete: - deleted = self.do_delete(source_data) + changes = len(created) + len(updated) + if self.max_total and changes >= self.max_total: + log.debug("max of %s total changes already reached; skipping deletions", + self.max_total) + else: + deleted = self.do_delete(source_keys, changes, progress=progress) self.teardown() return created, updated, deleted @@ -451,6 +489,16 @@ class Importer: target_data=target_data) updated.append((target_object, target_data, source_data)) + # stop if we reach max allowed + if self.max_update and len(updated) >= self.max_update: + log.warning("max of %s *updated* records has been reached; stopping now", + self.max_update) + raise ImportLimitReached() + elif self.max_total and (len(created) + len(updated)) >= self.max_total: + log.warning("max of %s *total changes* has been reached; stopping now", + self.max_total) + raise ImportLimitReached() + elif not target_object and self.create: # target object not yet present, so create it @@ -464,23 +512,94 @@ class Importer: # 'object': target_object, # 'data': self.normalize_target_object(target_object), # } + + # stop if we reach max allowed + if self.max_create and len(created) >= self.max_create: + log.warning("max of %s *created* records has been reached; stopping now", + self.max_create) + raise ImportLimitReached() + elif self.max_total and (len(created) + len(updated)) >= self.max_total: + log.warning("max of %s *total changes* has been reached; stopping now", + self.max_total) + raise ImportLimitReached() + else: log.debug("did NOT create new %s for key: %s", model_title, key) actioning = self.actioning.capitalize() target_title = self.handler.get_target_title() - self.app.progress_loop(create_update, all_source_data, progress, - message=f"{actioning} {model_title} data to {target_title}") + try: + self.app.progress_loop(create_update, all_source_data, progress, + message=f"{actioning} {model_title} data to {target_title}") + except ImportLimitReached: + pass return created, updated - def do_delete(self, source_data, progress=None): + def do_delete(self, source_keys, changes=None, progress=None): """ - TODO: not yet implemented + Delete records from the target side as needed, per the given + source data. - :returns: List of records deleted on the target. + This will call :meth:`get_deletable_keys()` to discover which + keys existing on the target side could theoretically allow + being deleted. + + From that set it will remove all the given source keys - since + such keys still exist on the source, they should not be + deleted from target. + + If any "deletable" keys remain, their corresponding objects + are removed from target via :meth:`delete_target_object()`. + + :param source_keys: A ``set`` of keys for all source records. + Essentially this is just the list of keys for which target + records should *not* be deleted - since they still exist in + the data source. + + :param changes: Number of changes which have already been made + on the target side. Used to enforce max allowed changes, + if applicable. + + :param progress: Optional progress indicator factory. + + :returns: List of target records which were deleted. """ - return [] + deleted = [] + changes = changes or 0 + + # which target records are deletable? potentially all target + # records may be eligible, but anything also found in source + # is *not* eligible. + deletable = self.get_deletable_keys() - source_keys + log.debug("found %s records to delete", len(deletable)) + + def delete(key, i): + cached = self.cached_target.pop(key) + obj = cached['object'] + + # delete target object + if self.delete_target_object(obj): + deleted.append((obj, cached['data'])) + + # stop if we reach max allowed + if self.max_delete and len(deleted) >= self.max_delete: + log.warning("max of %s *deleted* records has been reached; stopping now", + self.max_delete) + raise ImportLimitReached() + elif self.max_total and (changes + len(deleted)) >= self.max_total: + log.warning("max of %s *total changes* has been reached; stopping now", + self.max_total) + raise ImportLimitReached() + + try: + model_title = self.get_model_title() + self.app.progress_loop(delete, sorted(deletable), progress, + message=f"Deleting {model_title} records") + except ImportLimitReached: + pass + + return deleted def get_record_key(self, data): """ @@ -570,6 +689,49 @@ class Importer: message=f"Reading {model_title} data from {source_title}") return normalized + def get_unique_data(self, source_data): + """ + Return a copy of the given source data, with any duplicate + records removed. + + This looks for duplicates based on the effective key fields, + cf. :meth:`get_keys()`. The first record found with a given + key is kept; subsequent records with that key are discarded. + + This is called from :meth:`process_data()` and is done largely + for sanity's sake, to avoid indeterminate behavior when source + data contains duplicates. For instance: + + Problem #1: If source contains 2 records with key 'X' it makes + no sense to create both records on the target side. + + Problem #2: if the 2 source records have different data (apart + from their key) then which should target reflect? + + So the main point of this method is to discard the duplicates + to avoid problem #1, but do it in a deterministic way so at + least the "choice" of which record is kept will not vary + across runs; hence "pseudo-resolve" problem #2. + + :param source_data: Sequence of normalized source data. + + :returns: A 2-tuple of ``(source_data, unique_keys)`` where: + + * ``source_data`` is the final list of source data + * ``unique_keys`` is a :class:`python:set` of the source record keys + """ + unique = OrderedDict() + for data in source_data: + key = self.get_record_key(data) + if key in unique: + log.warning("duplicate %s records detected from %s for key: %s", + self.get_model_title(), + self.handler.get_source_title(), + key) + else: + unique[key] = data + return list(unique.values()), set(unique) + def get_source_objects(self): """ This method (if applicable) should return a sequence of "raw" @@ -745,6 +907,38 @@ class Importer: for field in fields]) return data + def get_deletable_keys(self, progress=None): + """ + Return a set of record keys from the target side, which are + *potentially* eligible for deletion. + + Inclusion in this set does not imply a given record/key + *should* be deleted, only that app logic (e.g. business rules) + does not prevent it. + + Default logic here will look in the :attr:`cached_target` and + then call :meth:`can_delete_object()` for each record in the + cache. If that call returns true for a given key, it is + included in the result. + + :returns: The ``set`` of target record keys eligible for + deletion. + """ + if not self.caches_target: + return set() + + keys = set() + + def check(key, i): + data = self.cached_target[key]['data'] + obj = self.cached_target[key]['object'] + if self.can_delete_object(obj, data): + keys.add(key) + + self.app.progress_loop(check, set(self.cached_target), progress, + message="Determining which objects can be deleted") + return keys + ############################## # CRUD methods ############################## @@ -850,6 +1044,40 @@ class Importer: return obj + def can_delete_object(self, obj, data=None): + """ + Should return true or false indicating whether the given + object "can" be deleted. Default is to return true in all + cases. + + If you return false then the importer will know not to call + :meth:`delete_target_object()` even if the data sets imply + that it should. + + :param obj: Raw object on the target side. + + :param data: Normalized data dict for the target record, if + known. + + :returns: ``True`` if object can be deleted, else ``False``. + """ + return True + + def delete_target_object(self, obj): + """ + Delete the given raw object from the target side, and return + true if successful. + + This is called from :meth:`do_delete()`. + + Default logic for this method just returns false; subclass + should override if needed. + + :returns: Should return ``True`` if deletion succeeds, or + ``False`` if deletion failed or was skipped. + """ + return False + class FromFile(Importer): """ @@ -996,10 +1224,9 @@ class ToSqlalchemy(Importer): """ Tries to fetch the object from target DB using ORM query. """ - # first the default logic in case target object is cached - obj = super().get_target_object(key) - if obj: - return obj + # use default logic to fetch from cache, if applicable + if self.caches_target: + return super().get_target_object(key) # okay now we must fetch via query query = self.target_session.query(self.model_class) @@ -1010,15 +1237,6 @@ class ToSqlalchemy(Importer): except orm.exc.NoResultFound: pass - def create_target_object(self, key, source_data): - """ """ - with self.target_session.no_autoflush: - obj = super().create_target_object(key, source_data) - if obj: - # nb. add new object to target db session - self.target_session.add(obj) - return obj - def get_target_objects(self, source_data=None, progress=None): """ Fetches target objects via the ORM query from @@ -1034,3 +1252,17 @@ class ToSqlalchemy(Importer): :meth:`get_target_objects()`. """ return self.target_session.query(self.model_class) + + def create_target_object(self, key, source_data): + """ """ + with self.target_session.no_autoflush: + obj = super().create_target_object(key, source_data) + if obj: + # nb. add new object to target db session + self.target_session.add(obj) + return obj + + def delete_target_object(self, obj): + """ """ + self.target_session.delete(obj) + return True diff --git a/src/wuttasync/importing/csv.py b/src/wuttasync/importing/csv.py index 1c62818..e1937b5 100644 --- a/src/wuttasync/importing/csv.py +++ b/src/wuttasync/importing/csv.py @@ -26,11 +26,12 @@ Importing from CSV import csv import logging +import uuid as _uuid from collections import OrderedDict from sqlalchemy_utils.functions import get_primary_keys -from wuttjamaican.db.util import make_topo_sortkey +from wuttjamaican.db.util import make_topo_sortkey, UUID from .base import FromFile from .handlers import FromFileHandler @@ -138,7 +139,54 @@ class FromCsv(FromFile): class FromCsvToSqlalchemyMixin: """ - Mixin handler class for CSV → SQLAlchemy ORM import/export. + Mixin class for CSV → SQLAlchemy ORM :term:`importers `. + + Meant to be used by :class:`FromCsvToSqlalchemyHandlerMixin`. + + This mixin adds some logic to better handle ``uuid`` key fields + which are of :class:`~wuttjamaican:wuttjamaican.db.util.UUID` data + type (i.e. on the target side). Namely, when reading ``uuid`` + values as string from CSV, convert them to proper UUID instances, + so the key matching between source and target will behave as + expected. + """ + + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + + # nb. keep track of any key fields which use proper UUID type + self.uuid_keys = [] + for field in self.get_keys(): + attr = getattr(self.model_class, field) + if len(attr.prop.columns) == 1: + if isinstance(attr.prop.columns[0].type, UUID): + self.uuid_keys.append(field) + + def normalize_source_object(self, obj): + """ """ + data = dict(obj) + + # nb. convert to proper UUID values so key matching will work + # properly, where applicable + for key in self.uuid_keys: + uuid = data[key] + if uuid and not isinstance(uuid, _uuid.UUID): + data[key] = _uuid.UUID(uuid) + + return data + + +class FromCsvToSqlalchemyHandlerMixin: + """ + Mixin class for CSV → SQLAlchemy ORM :term:`import handlers + `. + + This knows how to dynamically generate :term:`importer` classes to + target the particular ORM involved. Such classes will inherit + from :class:`FromCsvToSqlalchemyMixin`, in addition to whatever + :attr:`FromImporterBase` and :attr:`ToImporterBase` reference. + + This all happens within :meth:`define_importers()`. """ source_key = 'csv' generic_source_title = "CSV" @@ -201,30 +249,39 @@ class FromCsvToSqlalchemyMixin: return importers - def make_importer_factory(self, cls, name): + def make_importer_factory(self, model_class, name): """ - Generate and return a new importer/exporter class, targeting - the given data model class. + Generate and return a new :term:`importer` class, targeting + the given :term:`data model` class. - :param cls: A data model class. + The newly-created class will inherit from: - :param name: Optional "model name" override for the - importer/exporter. + * :class:`FromCsvToSqlalchemyMixin` + * :attr:`FromImporterBase` + * :attr:`ToImporterBase` - :returns: A new class, meant to process import/export - operations which target the given data model. The new - class will inherit from both :attr:`FromImporterBase` and - :attr:`ToImporterBase`. + :param model_class: A data model class. + + :param name: The "model name" for the importer/exporter. New + class name will be based on this, so e.g. ``Widget`` model + name becomes ``WidgetImporter`` class name. + + :returns: The new class, meant to process import/export + targeting the given data model. """ - return type(f'{name}Importer', (FromCsv, self.ToImporterBase), { - 'model_class': cls, - 'key': list(get_primary_keys(cls)), + return type(f'{name}Importer', + (FromCsvToSqlalchemyMixin, self.FromImporterBase, self.ToImporterBase), { + 'model_class': model_class, + 'key': list(get_primary_keys(model_class)), }) -class FromCsvToWutta(FromCsvToSqlalchemyMixin, FromFileHandler, ToWuttaHandler): +class FromCsvToWutta(FromCsvToSqlalchemyHandlerMixin, FromFileHandler, ToWuttaHandler): """ Handler for CSV → Wutta :term:`app database` import. + + This uses :class:`FromCsvToSqlalchemyHandlerMixin` for most of the + heavy lifting. """ ToImporterBase = ToWutta diff --git a/tests/cli/test_base.py b/tests/cli/test_base.py index b43f7d7..69af1b8 100644 --- a/tests/cli/test_base.py +++ b/tests/cli/test_base.py @@ -2,8 +2,59 @@ import inspect from unittest import TestCase +from unittest.mock import patch from wuttasync.cli import base as mod +from wuttjamaican.testing import DataTestCase + + +class TestImportCommandHandler(DataTestCase): + + def make_handler(self, **kwargs): + return mod.ImportCommandHandler(self.config, **kwargs) + + def test_import_handler(self): + + # none + handler = self.make_handler() + self.assertIsNone(handler.import_handler) + + FromCsvToWutta = self.app.load_object('wuttasync.importing.csv:FromCsvToWutta') + + # as spec + handler = self.make_handler(import_handler=FromCsvToWutta.get_spec()) + self.assertIsInstance(handler.import_handler, FromCsvToWutta) + + # as factory + handler = self.make_handler(import_handler=FromCsvToWutta) + self.assertIsInstance(handler.import_handler, FromCsvToWutta) + + # as instance + myhandler = FromCsvToWutta(self.config) + handler = self.make_handler(import_handler=myhandler) + self.assertIs(handler.import_handler, myhandler) + + def test_run(self): + handler = self.make_handler(import_handler='wuttasync.importing.csv:FromCsvToWutta') + + with patch.object(handler, 'list_models') as list_models: + handler.run({'list_models': True}) + list_models.assert_called_once_with({'list_models': True}) + + with patch.object(handler, 'import_handler') as import_handler: + handler.run({'models': []}) + import_handler.process_data.assert_called_once_with() + + def test_list_models(self): + handler = self.make_handler(import_handler='wuttasync.importing.csv:FromCsvToWutta') + + with patch.object(mod, 'sys') as sys: + handler.list_models({}) + # just test a few random things we expect to see + self.assertTrue(sys.stdout.write.has_call('ALL MODELS:\n')) + self.assertTrue(sys.stdout.write.has_call('Person')) + self.assertTrue(sys.stdout.write.has_call('User')) + self.assertTrue(sys.stdout.write.has_call('Upgrade')) class TestImporterCommand(TestCase): @@ -15,7 +66,7 @@ class TestImporterCommand(TestCase): sig1 = inspect.signature(myfunc) self.assertIn('kwargs', sig1.parameters) self.assertNotIn('dry_run', sig1.parameters) - wrapt = mod.importer_command(myfunc) + wrapt = mod.import_command(myfunc) sig2 = inspect.signature(wrapt) self.assertNotIn('kwargs', sig2.parameters) self.assertIn('dry_run', sig2.parameters) @@ -31,7 +82,7 @@ class TestFileImporterCommand(TestCase): self.assertIn('kwargs', sig1.parameters) self.assertNotIn('dry_run', sig1.parameters) self.assertNotIn('input_file_path', sig1.parameters) - wrapt = mod.file_importer_command(myfunc) + wrapt = mod.file_import_command(myfunc) sig2 = inspect.signature(wrapt) self.assertNotIn('kwargs', sig2.parameters) self.assertIn('dry_run', sig2.parameters) diff --git a/tests/cli/test_import_csv.py b/tests/cli/test_import_csv.py index a4371df..f856947 100644 --- a/tests/cli/test_import_csv.py +++ b/tests/cli/test_import_csv.py @@ -1,24 +1,19 @@ #-*- coding: utf-8; -*- -import os from unittest import TestCase from unittest.mock import MagicMock, patch -from wuttasync.cli import import_csv as mod -from wuttasync.importing.csv import FromCsvToWutta +from wuttasync.cli import import_csv as mod, ImportCommandHandler -here = os.path.dirname(__file__) -example_conf = os.path.join(here, 'example.conf') - class TestImportCsv(TestCase): def test_basic(self): - ctx = MagicMock(params={'models': [], - 'create': True, 'update': True, 'delete': False, - 'dry_run': True}) - with patch.object(FromCsvToWutta, 'process_data') as process_data: + params = {'models': [], + 'create': True, 'update': True, 'delete': False, + 'dry_run': True} + ctx = MagicMock(params=params) + with patch.object(ImportCommandHandler, 'run') as run: mod.import_csv(ctx) - process_data.assert_called_once_with(create=True, update=True, delete=False, - dry_run=True) + run.assert_called_once_with(params) diff --git a/tests/importing/test_base.py b/tests/importing/test_base.py index 0648dc9..ff2ca6e 100644 --- a/tests/importing/test_base.py +++ b/tests/importing/test_base.py @@ -82,65 +82,244 @@ class TestImporter(DataTestCase): model = self.app.model imp = self.make_importer(model_class=model.Setting) self.assertEqual(imp.get_keys(), ['name']) - imp.key = 'value' - self.assertEqual(imp.get_keys(), ['value']) + with patch.multiple(imp, create=True, key='value'): + self.assertEqual(imp.get_keys(), ['value']) + with patch.multiple(imp, create=True, keys=['foo', 'bar']): + self.assertEqual(imp.get_keys(), ['foo', 'bar']) def test_process_data(self): model = self.app.model - imp = self.make_importer(model_class=model.Setting, caches_target=True) + imp = self.make_importer(model_class=model.Setting, caches_target=True, + delete=True) - # empty data set / just for coverage - with patch.object(imp, 'normalize_source_data') as normalize_source_data: - normalize_source_data.return_value = [] + def make_cache(): + setting1 = model.Setting(name='foo1', value='bar1') + setting2 = model.Setting(name='foo2', value='bar2') + setting3 = model.Setting(name='foo3', value='bar3') + cache = { + ('foo1',): { + 'object': setting1, + 'data': {'name': 'foo1', 'value': 'bar1'}, + }, + ('foo2',): { + 'object': setting2, + 'data': {'name': 'foo2', 'value': 'bar2'}, + }, + ('foo3',): { + 'object': setting3, + 'data': {'name': 'foo3', 'value': 'bar3'}, + }, + } + return cache - with patch.object(imp, 'get_target_cache') as get_target_cache: - get_target_cache.return_value = {} + # nb. delete always succeeds + with patch.object(imp, 'delete_target_object', return_value=True): - result = imp.process_data() - self.assertEqual(result, ([], [], [])) + # create + update + delete all as needed + with patch.object(imp, 'get_target_cache', return_value=make_cache()): + created, updated, deleted = imp.process_data([ + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + {'name': 'foo5', 'value': 'BAR5'}, + ]) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 1) + self.assertEqual(len(deleted), 2) + + # same but with --max-total so delete gets skipped + with patch.object(imp, 'get_target_cache', return_value=make_cache()): + with patch.object(imp, 'max_total', new=3): + created, updated, deleted = imp.process_data([ + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + {'name': 'foo5', 'value': 'BAR5'}, + ]) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 1) + self.assertEqual(len(deleted), 0) + + # delete all if source data empty + with patch.object(imp, 'get_target_cache', return_value=make_cache()): + created, updated, deleted = imp.process_data() + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 0) + self.assertEqual(len(deleted), 3) def test_do_create_update(self): model = self.app.model + imp = self.make_importer(model_class=model.Setting, caches_target=True) + + def make_cache(): + setting1 = model.Setting(name='foo1', value='bar1') + setting2 = model.Setting(name='foo2', value='bar2') + cache = { + ('foo1',): { + 'object': setting1, + 'data': {'name': 'foo1', 'value': 'bar1'}, + }, + ('foo2',): { + 'object': setting2, + 'data': {'name': 'foo2', 'value': 'bar2'}, + }, + } + return cache + + # change nothing if data matches + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'bar1'}, + {'name': 'foo2', 'value': 'bar2'}, + ]) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 0) + + # update all as needed + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'BAR1'}, + {'name': 'foo2', 'value': 'BAR2'}, + ]) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 2) + + # update all, with --max-update + with patch.multiple(imp, create=True, cached_target=make_cache(), max_update=1): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'BAR1'}, + {'name': 'foo2', 'value': 'BAR2'}, + ]) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 1) + + # update all, with --max-total + with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'BAR1'}, + {'name': 'foo2', 'value': 'BAR2'}, + ]) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 1) + + # create all as needed + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'bar1'}, + {'name': 'foo2', 'value': 'bar2'}, + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + ]) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 0) + + # what happens when create gets skipped + with patch.multiple(imp, create=True, cached_target=make_cache()): + with patch.object(imp, 'create_target_object', return_value=None): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'bar1'}, + {'name': 'foo2', 'value': 'bar2'}, + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + ]) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 0) + + # create all, with --max-create + with patch.multiple(imp, create=True, cached_target=make_cache(), max_create=1): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'bar1'}, + {'name': 'foo2', 'value': 'bar2'}, + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + ]) + self.assertEqual(len(created), 1) + self.assertEqual(len(updated), 0) + + # create all, with --max-total + with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'bar1'}, + {'name': 'foo2', 'value': 'bar2'}, + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + ]) + self.assertEqual(len(created), 1) + self.assertEqual(len(updated), 0) + + # create + update all as needed + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'BAR1'}, + {'name': 'foo2', 'value': 'BAR2'}, + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + ]) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 2) + + # create + update all, with --max-total + with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): + created, updated = imp.do_create_update([ + {'name': 'foo1', 'value': 'BAR1'}, + {'name': 'foo2', 'value': 'BAR2'}, + {'name': 'foo3', 'value': 'BAR3'}, + {'name': 'foo4', 'value': 'BAR4'}, + ]) + # nb. foo1 is updated first + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 1) + + def test_do_delete(self): + model = self.app.model # this requires a mock target cache + setting1 = model.Setting(name='foo1', value='bar1') + setting2 = model.Setting(name='foo2', value='bar2') imp = self.make_importer(model_class=model.Setting, caches_target=True) - setting = model.Setting(name='foo', value='bar') - imp.cached_target = { - ('foo',): { - 'object': setting, - 'data': {'name': 'foo', 'value': 'bar'}, + cache = { + ('foo1',): { + 'object': setting1, + 'data': {'name': 'foo1', 'value': 'bar1'}, + }, + ('foo2',): { + 'object': setting2, + 'data': {'name': 'foo2', 'value': 'bar2'}, }, } - # will update the one record - result = imp.do_create_update([{'name': 'foo', 'value': 'baz'}]) - self.assertIs(result[1][0][0], setting) - self.assertEqual(result, ([], [(setting, - # nb. target - {'name': 'foo', 'value': 'bar'}, - # nb. source - {'name': 'foo', 'value': 'baz'})])) - self.assertEqual(setting.value, 'baz') + with patch.object(imp, 'delete_target_object') as delete_target_object: - # will create a new record - result = imp.do_create_update([{'name': 'blah', 'value': 'zay'}]) - self.assertIsNot(result[0][0][0], setting) - setting_new = result[0][0][0] - self.assertEqual(result, ([(setting_new, - # nb. source - {'name': 'blah', 'value': 'zay'})], - [])) - self.assertEqual(setting_new.name, 'blah') - self.assertEqual(setting_new.value, 'zay') + # delete nothing if source has same keys + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set(imp.cached_target) + result = imp.do_delete(source_keys) + self.assertFalse(delete_target_object.called) + self.assertEqual(result, []) - # but what if new record is *not* created - with patch.object(imp, 'create_target_object', return_value=None): - result = imp.do_create_update([{'name': 'another', 'value': 'one'}]) - self.assertEqual(result, ([], [])) + # delete both if source has no keys + delete_target_object.reset_mock() + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set() + result = imp.do_delete(source_keys) + self.assertEqual(delete_target_object.call_count, 2) + self.assertEqual(len(result), 2) - # def test_do_delete(self): - # model = self.app.model - # imp = self.make_importer(model_class=model.Setting) + # delete just one if --max-delete was set + delete_target_object.reset_mock() + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set() + with patch.object(imp, 'max_delete', new=1): + result = imp.do_delete(source_keys) + self.assertEqual(delete_target_object.call_count, 1) + self.assertEqual(len(result), 1) + + # delete just one if --max-total was set + delete_target_object.reset_mock() + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set() + with patch.object(imp, 'max_total', new=1): + result = imp.do_delete(source_keys) + self.assertEqual(delete_target_object.call_count, 1) + self.assertEqual(len(result), 1) def test_get_record_key(self): model = self.app.model @@ -180,6 +359,22 @@ class TestImporter(DataTestCase): # nb. default normalizer returns object as-is self.assertIs(data[0], setting) + def test_get_unique_data(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + + setting1 = model.Setting(name='foo', value='bar1') + setting2 = model.Setting(name='foo', value='bar2') + + result = imp.get_unique_data([setting2, setting1]) + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], list) + self.assertEqual(len(result[0]), 1) + self.assertIs(result[0][0], setting2) # nb. not setting1 + self.assertIsInstance(result[1], set) + self.assertEqual(result[1], {('foo',)}) + def test_get_source_objects(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) @@ -261,6 +456,34 @@ class TestImporter(DataTestCase): data = imp.normalize_target_object(setting) self.assertEqual(data, {'name': 'foo', 'value': 'bar'}) + def test_get_deletable_keys(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + + # empty set by default (nb. no target cache) + result = imp.get_deletable_keys() + self.assertIsInstance(result, set) + self.assertEqual(result, set()) + + setting = model.Setting(name='foo', value='bar') + cache = { + ('foo',): { + 'object': setting, + 'data': {'name': 'foo', 'value': 'bar'}, + }, + } + + with patch.multiple(imp, create=True, caches_target=True, cached_target=cache): + + # all are deletable by default + result = imp.get_deletable_keys() + self.assertEqual(result, {('foo',)}) + + # but some maybe can't be deleted + with patch.object(imp, 'can_delete_object', return_value=False): + result = imp.get_deletable_keys() + self.assertEqual(result, set()) + def test_create_target_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) @@ -299,6 +522,19 @@ class TestImporter(DataTestCase): self.assertIs(obj, setting) self.assertEqual(setting.value, 'bar') + def test_can_delete_object(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + setting = model.Setting(name='foo') + self.assertTrue(imp.can_delete_object(setting)) + + def test_delete_target_object(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + setting = model.Setting(name='foo') + # nb. default implementation always returns false + self.assertFalse(imp.delete_target_object(setting)) + class TestFromFile(DataTestCase): @@ -388,6 +624,20 @@ class TestToSqlalchemy(DataTestCase): kwargs.setdefault('handler', self.handler) return mod.ToSqlalchemy(self.config, **kwargs) + def test_get_target_objects(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting, target_session=self.session) + + setting1 = model.Setting(name='foo', value='bar') + self.session.add(setting1) + setting2 = model.Setting(name='foo2', value='bar2') + self.session.add(setting2) + self.session.commit() + + result = imp.get_target_objects() + self.assertEqual(len(result), 2) + self.assertEqual(set(result), {setting1, setting2}) + def test_get_target_object(self): model = self.app.model setting = model.Setting(name='foo', value='bar') @@ -414,15 +664,19 @@ class TestToSqlalchemy(DataTestCase): self.session.add(setting2) self.session.commit() - # then we should be able to fetch that via query - imp.target_session = self.session - result = imp.get_target_object(('foo2',)) - self.assertIsInstance(result, model.Setting) - self.assertIs(result, setting2) + # nb. disable target cache + with patch.multiple(imp, create=True, + target_session=self.session, + caches_target=False): - # but sometimes it will not be found - result = imp.get_target_object(('foo3',)) - self.assertIsNone(result) + # now we should be able to fetch that via query + result = imp.get_target_object(('foo2',)) + self.assertIsInstance(result, model.Setting) + self.assertIs(result, setting2) + + # but sometimes it will not be found + result = imp.get_target_object(('foo3',)) + self.assertIsNone(result) def test_create_target_object(self): model = self.app.model @@ -436,16 +690,13 @@ class TestToSqlalchemy(DataTestCase): self.assertEqual(setting.value, 'bar') self.assertIn(setting, self.session) - def test_get_target_objects(self): + def test_delete_target_object(self): model = self.app.model + + setting = model.Setting(name='foo', value='bar') + self.session.add(setting) + + self.assertEqual(self.session.query(model.Setting).count(), 1) imp = self.make_importer(model_class=model.Setting, target_session=self.session) - - setting1 = model.Setting(name='foo', value='bar') - self.session.add(setting1) - setting2 = model.Setting(name='foo2', value='bar2') - self.session.add(setting2) - self.session.commit() - - result = imp.get_target_objects() - self.assertEqual(len(result), 2) - self.assertEqual(set(result), {setting1, setting2}) + imp.delete_target_object(setting) + self.assertEqual(self.session.query(model.Setting).count(), 0) diff --git a/tests/importing/test_csv.py b/tests/importing/test_csv.py index 683215e..dc65e54 100644 --- a/tests/importing/test_csv.py +++ b/tests/importing/test_csv.py @@ -1,6 +1,7 @@ #-*- coding: utf-8; -*- import csv +import uuid as _uuid from unittest.mock import patch from wuttjamaican.testing import DataTestCase @@ -87,23 +88,74 @@ foo2,bar2 self.assertEqual(objects[1], {'name': 'foo2', 'value': 'bar2'}) -class MockMixinHandler(mod.FromCsvToSqlalchemyMixin, ToSqlalchemyHandler): - ToImporterBase = ToSqlalchemy +class MockMixinImporter(mod.FromCsvToSqlalchemyMixin, mod.FromCsv, ToSqlalchemy): + pass class TestFromCsvToSqlalchemyMixin(DataTestCase): + def setUp(self): + self.setup_db() + self.handler = ImportHandler(self.config) + + def make_importer(self, **kwargs): + kwargs.setdefault('handler', self.handler) + return MockMixinImporter(self.config, **kwargs) + + def test_constructor(self): + model = self.app.model + + # no uuid keys + imp = self.make_importer(model_class=model.Setting) + self.assertEqual(imp.uuid_keys, []) + + # typical + # nb. as of now Upgrade is the only table using proper UUID + imp = self.make_importer(model_class=model.Upgrade) + self.assertEqual(imp.uuid_keys, ['uuid']) + + def test_normalize_source_object(self): + model = self.app.model + + # no uuid keys + imp = self.make_importer(model_class=model.Setting) + result = imp.normalize_source_object({'name': 'foo', 'value': 'bar'}) + self.assertEqual(result, {'name': 'foo', 'value': 'bar'}) + + # source has proper UUID + # nb. as of now Upgrade is the only table using proper UUID + imp = self.make_importer(model_class=model.Upgrade, fields=['uuid', 'description']) + result = imp.normalize_source_object({'uuid': _uuid.UUID('06753693-d892-77f0-8000-ce71bf7ebbba'), + 'description': 'testing'}) + self.assertEqual(result, {'uuid': _uuid.UUID('06753693-d892-77f0-8000-ce71bf7ebbba'), + 'description': 'testing'}) + + # source has string uuid + # nb. as of now Upgrade is the only table using proper UUID + imp = self.make_importer(model_class=model.Upgrade, fields=['uuid', 'description']) + result = imp.normalize_source_object({'uuid': '06753693d89277f08000ce71bf7ebbba', + 'description': 'testing'}) + self.assertEqual(result, {'uuid': _uuid.UUID('06753693-d892-77f0-8000-ce71bf7ebbba'), + 'description': 'testing'}) + + +class MockMixinHandler(mod.FromCsvToSqlalchemyHandlerMixin, ToSqlalchemyHandler): + ToImporterBase = ToSqlalchemy + + +class TestFromCsvToSqlalchemyHandlerMixin(DataTestCase): + def make_handler(self, **kwargs): return MockMixinHandler(self.config, **kwargs) def test_get_target_model(self): - with patch.object(mod.FromCsvToSqlalchemyMixin, 'define_importers', return_value={}): + with patch.object(mod.FromCsvToSqlalchemyHandlerMixin, 'define_importers', return_value={}): handler = self.make_handler() self.assertRaises(NotImplementedError, handler.get_target_model) def test_define_importers(self): model = self.app.model - with patch.object(mod.FromCsvToSqlalchemyMixin, 'get_target_model', return_value=model): + with patch.object(mod.FromCsvToSqlalchemyHandlerMixin, 'get_target_model', return_value=model): handler = self.make_handler() importers = handler.define_importers() self.assertIn('Setting', importers) @@ -115,7 +167,7 @@ class TestFromCsvToSqlalchemyMixin(DataTestCase): def test_make_importer_factory(self): model = self.app.model - with patch.object(mod.FromCsvToSqlalchemyMixin, 'define_importers', return_value={}): + with patch.object(mod.FromCsvToSqlalchemyHandlerMixin, 'define_importers', return_value={}): handler = self.make_handler() factory = handler.make_importer_factory(model.Setting, 'Setting') self.assertTrue(issubclass(factory, mod.FromCsv))