diff --git a/CHANGELOG.md b/CHANGELOG.md index 05f5ef3..22559be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,21 +5,6 @@ All notable changes to WuttaSync will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## v0.2.0 (2024-12-07) - -### Feat - -- add `wutta import-csv` command - -### Fix - -- expose `ToWuttaHandler`, `ToWutta` in `wuttasync.importing` namespace -- implement deletion logic; add cli params for max changes -- add `--key` (or `--keys`) param for import/export commands -- add `--list-models` option for import/export commands -- require latest wuttjamaican -- add `--fields` and `--exclude` params for import/export cli - ## v0.1.0 (2024-12-05) ### Feat diff --git a/docs/glossary.rst b/docs/glossary.rst deleted file mode 100644 index c58e3d6..0000000 --- a/docs/glossary.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. _glossary: - -Glossary -======== - -.. glossary:: - :sorted: - - import handler - This a type of :term:`handler` which is responsible for a - particular set of data import/export task(s). - - The import handler manages data connections and transactions, and - invokes one or more :term:`importers ` to process the - data. See also :ref:`import-handler-vs-importer`. - - Note that "import/export handler" is the more proper term to use - here but it is often shortened to just "import handler" for - convenience. - - importer - This refers to a Python class/instance responsible for processing - a particular :term:`data model` for an import/export job. - - For instance there is usually one importer per table, when - importing to the :term:`app database` (regardless of source). - See also :ref:`import-handler-vs-importer`. - - Note that "importer/exporter" is the more proper term to use here - but it is often shortened to just "importer" for convenience. diff --git a/docs/index.rst b/docs/index.rst index ea00f77..b8bf248 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,11 +5,9 @@ WuttaSync This package adds data import/export and real-time sync utilities for the `Wutta Framework `_. -*(NB. the real-time sync has not been added yet.)* +The primary use cases here are: -The primary use cases in mind are: - -* keep operational data in sync between various business systems +* keep "operational" data in sync between e.g. various business systems * import data from user-specified file * export to file @@ -24,11 +22,8 @@ database`, it may be used for any "source → target" data flow. :maxdepth: 2 :caption: Documentation - glossary narr/install - narr/cli/index - narr/concepts - narr/custom/index + narr/cli .. toctree:: :maxdepth: 1 diff --git a/docs/narr/cli/builtin.rst b/docs/narr/cli.rst similarity index 74% rename from docs/narr/cli/builtin.rst rename to docs/narr/cli.rst index 0630c94..4bee64d 100644 --- a/docs/narr/cli/builtin.rst +++ b/docs/narr/cli.rst @@ -1,12 +1,10 @@ -=================== - Built-in Commands -=================== +Built-in Commands +================= -Below are the :term:`subcommands ` which come with -WuttaSync. +WuttaSync adds some built-in ``wutta`` :term:`subcommands `. -It is fairly simple to add more; see :doc:`custom`. +See also :doc:`wuttjamaican:narr/cli/index`. .. _wutta-import-csv: diff --git a/docs/narr/cli/custom.rst b/docs/narr/cli/custom.rst deleted file mode 100644 index 837a70c..0000000 --- a/docs/narr/cli/custom.rst +++ /dev/null @@ -1,64 +0,0 @@ - -================= - Custom Commands -================= - -This section describes how to add a custom :term:`subcommand` which -wraps a particular :term:`import handler`. - -See also :doc:`wuttjamaican:narr/cli/custom` for more information -on the general concepts etc. - - -Basic Import/Export -------------------- - -Here we'll assume you have a typical "Poser" app based on Wutta -Framework, and the "Foo → Poser" (``FromFooToPoser`` handler) import -logic is defined in the ``poser.importing.foo`` module. - -We'll also assume you already have a ``poser`` top-level -:term:`command` (in ``poser.cli``), and our task now is to add the -``poser import-foo`` subcommand to wrap the import handler. - -And finally we'll assume this is just a "typical" import handler and -we do not need any custom CLI params exposed. - -Here is the code and we'll explain below:: - - from poser.cli import poser_typer - from wuttasync.cli import import_command, ImportCommandHandler - - @poser_typer.command() - @import_command - def import_foo(ctx, **kwargs): - """ - Import data from Foo API to Poser DB - """ - config = ctx.parent.wutta_config - handler = ImportCommandHandler( - config, import_handler='poser.importing.foo:FromFooToPoser') - handler.run(ctx.params) - -Hopefully it's straightforward but to be clear: - -* subcommand is really just a function, **with desired name** -* wrap with ``@poser_typer.command()`` to register as subcomand -* wrap with ``@import_command`` to get typical CLI params -* call ``ImportCommandHandler.run()`` with import handler spec - -So really - in addition to -:func:`~wuttasync.cli.base.import_command()` - the -:class:`~wuttasync.cli.base.ImportCommandHandler` is doing the heavy -lifting for all import/export subcommands, it just needs to know which -:term:`import handler` to use. - -.. note:: - - If your new subcommand is defined in a different module than is the - top-level command (e.g. as in example above) then you may need to - "eagerly" import the subcommand module. (Otherwise auto-discovery - may not find it.) - - This is usually done from within the top-level command's module, - since it is always imported early due to the entry point. diff --git a/docs/narr/cli/index.rst b/docs/narr/cli/index.rst deleted file mode 100644 index 96be6c7..0000000 --- a/docs/narr/cli/index.rst +++ /dev/null @@ -1,23 +0,0 @@ - -======================== - Command Line Interface -======================== - -The primary way of using the import/export framework day to day is via -the command line. - -WuttJamaican defines the ``wutta`` :term:`command` and WuttaSync comes -with some extra :term:`subcommands ` for importing to / -exporting from the Wutta :term:`app database`. - -It is fairly simple to add a dedicated subcommand for any -:term:`import handler`; see below. - -And for more general info about CLI see -:doc:`wuttjamaican:narr/cli/index`. - -.. toctree:: - :maxdepth: 2 - - builtin - custom diff --git a/docs/narr/concepts.rst b/docs/narr/concepts.rst deleted file mode 100644 index 93d09a3..0000000 --- a/docs/narr/concepts.rst +++ /dev/null @@ -1,54 +0,0 @@ - -Concepts -======== - -Things hopefully are straightforward but it's important to get the -following straight in your head; the rest will come easier if you do. - - -Source vs. Target ------------------ - -Data always flows from source to target, it is the #1 rule. - -Docs and command output will always reflect this, e.g. **CSV → -Wutta**. - -Source and target can be anything as long as the :term:`import -handler` and :term:`importer(s) ` implement the desired -logic. The :term:`app database` is often involved but not always. - - -Import vs. Export ------------------ - -Surprise, there is no difference. After all from target's perspective -everything is really an import. - -Sometimes it's more helpful to think of it as an export, e.g. **Wutta -→ CSV** really seems like an export. In such cases the -:attr:`~wuttasync.importing.handlers.ImportHandler.orientation` may be -set to reflect the distinction. - - -.. _import-handler-vs-importer: - -Import Handler vs. Importer ---------------------------- - -The :term:`import handler` is sort of the "wrapper" around one or more -:term:`importers ` and the latter contain the table-specific -sync logic. - -In a DB or similar context, the import handler will make the -connection, then invoke all requested importers, then commit -transaction at the end (or rollback if dry-run). - -And each importer will read data from source, and usually also read -data from target, then compare data sets and finally write data to -target as needed. But each would usually do this for just one table. - -See also the base classes for each: - -* :class:`~wuttasync.importing.handlers.ImportHandler` -* :class:`~wuttasync.importing.base.Importer` diff --git a/docs/narr/custom/command.rst b/docs/narr/custom/command.rst deleted file mode 100644 index 39eaeae..0000000 --- a/docs/narr/custom/command.rst +++ /dev/null @@ -1,9 +0,0 @@ - -Define Command -============== - -Now that you have defined the import handler plus any importers -required, you'll want to define a command line interface to use it. - -This section is here for completeness but the process is described -elsewhere; see :doc:`/narr/cli/custom`. diff --git a/docs/narr/custom/conventions.rst b/docs/narr/custom/conventions.rst deleted file mode 100644 index 3ce686a..0000000 --- a/docs/narr/custom/conventions.rst +++ /dev/null @@ -1,90 +0,0 @@ - -Conventions -=========== - -Below are recommended conventions for structuring and naming the files -in your project relating to import/export. - -The intention for these rules is that they are "intuitive" based on -the fact that all data flows from source to target and therefore can -be thought of as "importing" in virtually all cases. - -But there are a lot of edge cases out there so YMMV. - - -"The Rules" ------------ - -There are exceptions to these of course, but in general: - -* regarding how to think about these conventions: - - * always look at it from target's perspective - - * always look at it as an *import*, not export - -* "final" logic is always a combo of: - - * "base" logic for how target data read/write happens generally - - * "specific" logic for how that happens using a particular data source - -* targets each get their own subpackage within project - - * and within that, also an ``importing`` (nested) subpackage - - * and within *that* is where the files live, referenced next - - * target ``model.py`` should contain ``ToTarget`` importer base class - - * also may have misc. per-model base classes, e.g. ``WidgetImporter`` - - * also may have ``ToTargetHandler`` base class if applicable - - * sources each get their own module, named after the source - - * should contain the "final" handler class, e.g. ``FromSourceToTarget`` - - * also contains "final" importer classes needed by handler (e.g. ``WidgetImporter``) - - -Example -------- - -That's a lot of rules so let's see it. Here we assume a Wutta-based -app named Poser and it integrates with a Foo API in the cloud. Data -should flow both ways so we will be thinking of this as: - -* **Foo → Poser import** -* **Poser → Foo export** - -Here is the suggested file layout: - -.. code-block:: none - - poser/ - ├── foo/ - │ ├── __init__.py - │ ├── api.py - │ └── importing/ - │ ├── __init__.py - │ ├── model.py - │ └── poser.py - └── importing/ - ├── __init__.py - ├── foo.py - └── model.py - -And the module breakdown: - -* ``poser.foo.api`` has e.g. ``FooAPI`` interface logic - -**Foo → Poser import** (aka. "Poser imports from Foo") - -* ``poser.importing.model`` has ``ToPoserHandler``, ``ToPoser`` and per-model base importers -* ``poser.importing.foo`` has ``FromFooToPoser`` plus final importers - -**Poser → Foo export** (aka. "Foo imports from Poser") - -* ``poser.foo.importing.model`` has ``ToFooHandler``, ``ToFoo`` and per-model base importer -* ``poser.foo.importing.poser`` has ``FromPoserToFoo`` plus final importers diff --git a/docs/narr/custom/handler.rst b/docs/narr/custom/handler.rst deleted file mode 100644 index cb2b74d..0000000 --- a/docs/narr/custom/handler.rst +++ /dev/null @@ -1,93 +0,0 @@ - -Define Import Handler -===================== - -The obvious step here is to define a new :term:`import handler`, which -ultimately inherits from -:class:`~wuttasync.importing.handlers.ImportHandler`. But the choice -of which class(es) *specifically* to inherit from, is a bit more -complicated. - - -Choose the Base Class(es) -------------------------- - -If all else fails, or to get started simply, you can always just -inherit from :class:`~wuttasync.importing.handlers.ImportHandler` -directly as the only base class. You'll have to define any methods -needed to implement desired behavior. - -However depending on your particular source and/or target, there may -be existing base classes defined somewhere from which you can inherit. -This may save you some effort, and/or is just a good idea to share -code where possible. - -Keep in mind your import handler can inherit from multiple base -classes, and often will - one base for the source side, and another -for the target side. For instance:: - - from wuttasync.importing import FromFileHandler, ToWuttaHandler - - class FromExcelToPoser(FromFileHandler, ToWuttaHandler): - """ - Handler for Excel file → Poser app DB - """ - -You generally will still need to define/override some methods to -customize behavior. - -All built-in base classes live under :mod:`wuttasync.importing`. - - -.. _register-importer: - -Register Importer(s) --------------------- - -If nothing else, most custom handlers must override -:meth:`~wuttasync.importing.handlers.ImportHandler.define_importers()` -to "register" importer(s) as appropriate. There are two primary goals -here: - -* add "new" (totally custom) importers -* override "existing" importers (inherited from base class) - -Obviously for this to actually work the importer(s) must exist in -code; see :doc:`importer`. - -As an example let's say there's a ``FromFooToWutta`` handler which -defines a ``Widget`` importer. - -And let's say you want to customize that, by tweaking slightly the -logic for ``WigdetImporter`` and adding a new ``SprocketImporter``:: - - from somewhere_else import (FromFooToWutta, ToWutta, - WidgetImporter as WidgetImporterBase) - - class FromFooToPoser(FromFooToWutta): - """ - Handler for Foo -> Poser - """ - - def define_importers(self): - - # base class defines the initial set - importers = super().define_importers() - - # override widget importer - importers['Widget'] = WidgetImporter - - # add sprocket importer - importers['Sprocket'] = SprocketImporter - - return importers - - class SprocketImporter(ToWutta): - """ - Sprocket importer for Foo -> Poser - """ - - class WidgetImporter(WidgetImporterBase): - """ - Widget importer for Foo -> Poser - """ diff --git a/docs/narr/custom/importer.rst b/docs/narr/custom/importer.rst deleted file mode 100644 index c9b6674..0000000 --- a/docs/narr/custom/importer.rst +++ /dev/null @@ -1,149 +0,0 @@ - -Define Importer(s) -================== - -Here we'll describe how to make a custom :term:`importer/exporter -`, which can process a given :term:`data model`. - -.. - The example will assume a **Foo → Poser import** for the ``Widget`` - :term:`data model`. - - -Choose the Base Class(es) -------------------------- - -As with the :term:`import handler`, the importer "usually" will have -two base classes: one for the target side and another for the source. - -The base class for target side is generally more fleshed out, with -logic to read/write data for the given target model. Whereas the base -class for the source side could just be a stub. In the latter case, -one might choose to skip it and inherit only from the target base -class. - -In any case the final importer class you define can override any/all -logic from either base class if needed. - - -Example: Foo → Poser import ---------------------------- - -Here we'll assume a Wutta-based app named "Poser" which will be -importing "Widget" data from the "Foo API" cloud service. - -In this case we will inherit from a base class for the target side, -which already knows how to talk to the :term:`app database` via -SQLAlchemy ORM. - -But for the source side, there is no existing base class for the Foo -API service, since that is just made-up - so we will also define our -own base class for that:: - - from wuttasync.importing import Importer, ToWutta - - # nb. this is not real of course, but an example - from poser.foo.api import FooAPI - - class FromFoo(Importer): - """ - Base class for importers using Foo API as source - """ - - def setup(self): - """ - Establish connection to Foo API - """ - self.foo_api = FooAPI(self.config) - - class WidgetImporter(FromFoo, ToWutta): - """ - Widget importer for Foo -> Poser - """ - - def get_source_objects(self): - """ - Fetch all "raw" widgets from Foo API - """ - # nb. also not real, just example - return self.foo_api.get_widgets() - - def normalize_source_object(self, widget): - """ - Convert the "raw" widget we receive from Foo API, to a - "normalized" dict with data for all fields which are part of - the processing request. - """ - return { - 'id': widget.id, - 'name': widget.name, - } - - -Example: Poser → Foo export ---------------------------- - -In the previous scenario we imported data from Foo to Poser, and here -we'll do the reverse, exporting from Poser to Foo. - -As of writing the base class logic for exporting from Wutta :term:`app -database` does not yet exist. And the Foo API is just made-up so -we'll add one-off base classes for both sides:: - - from wuttasync.importing import Importer - - class FromWutta(Importer): - """ - Base class for importers using Wutta DB as source - """ - - class ToFoo(Importer): - """ - Base class for exporters targeting Foo API - """ - - class WidgetImporter(FromWutta, ToFoo): - """ - Widget exporter for Poser -> Foo - """ - - def get_source_objects(self): - """ - Fetch all widgets from the Poser app DB. - - (see note below regarding the db session) - """ - model = self.app.model - return self.source_session.query(model.Widget).all() - - def normalize_source_object(self, widget): - """ - Convert the "raw" widget from Poser app (ORM) to a - "normalized" dict with data for all fields which are part of - the processing request. - """ - return { - 'id': widget.id, - 'name': widget.name, - } - -Note that the ``get_source_objects()`` method shown above makes use of -a ``source_session`` attribute - where did that come from? - -This is actually not part of the importer proper, but rather this -attribute is set by the :term:`import handler`. And that will ony -happen if the importer is being invoked by a handler which supports -it. So none of that is shown here, but FYI. - -(And again, that logic isn't written yet, but there will "soon" be a -``FromSqlalchemyHandler`` class defined which implements this.) - - -Regster with Import Handler ---------------------------- - -After you define the importer/exporter class (as shown above) you also -must "register" it within the import/export handler. - -This section is here for completeness but the process is described -elsewhere; see :ref:`register-importer`. diff --git a/docs/narr/custom/index.rst b/docs/narr/custom/index.rst deleted file mode 100644 index 7e75146..0000000 --- a/docs/narr/custom/index.rst +++ /dev/null @@ -1,21 +0,0 @@ - -Custom Import/Export -==================== - -This section explains what's required to make your own import/export -tasks. - -See also :doc:`/narr/concepts` for some terminology etc. - -.. - The examples throughout the sections below will often involve a - theoretical **Foo → Poser** import, where Poser is a typical - Wutta-based app and Foo is some API in the cloud. - -.. toctree:: - :maxdepth: 2 - - conventions - handler - importer - command diff --git a/pyproject.toml b/pyproject.toml index a198b0e..ac28730 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" [project] name = "WuttaSync" -version = "0.2.0" +version = "0.1.0" description = "Wutta Framework for data import/export and real-time sync" readme = "README.md" authors = [{name = "Lance Edgar", email = "lance@wuttaproject.org"}] diff --git a/src/wuttasync/cli/__init__.py b/src/wuttasync/cli/__init__.py index c77a4e2..70de7ac 100644 --- a/src/wuttasync/cli/__init__.py +++ b/src/wuttasync/cli/__init__.py @@ -25,12 +25,11 @@ WuttaSync - ``wutta`` subcommands This namespace exposes the following: -* :func:`~wuttasync.cli.base.import_command()` -* :func:`~wuttasync.cli.base.file_import_command()` -* :class:`~wuttasync.cli.base.ImportCommandHandler` +* :func:`~wuttasync.cli.base.importer_command()` +* :func:`~wuttasync.cli.base.file_importer_command()` """ -from .base import import_command, file_import_command, ImportCommandHandler +from .base import importer_command, file_importer_command # nb. must bring in all modules for discovery to work from . import import_csv diff --git a/src/wuttasync/cli/base.py b/src/wuttasync/cli/base.py index 6368c6a..008dd5b 100644 --- a/src/wuttasync/cli/base.py +++ b/src/wuttasync/cli/base.py @@ -25,8 +25,6 @@ """ import inspect -import logging -import sys from pathlib import Path from typing import List, Optional from typing_extensions import Annotated @@ -34,167 +32,41 @@ from typing_extensions import Annotated import makefun import typer -from wuttjamaican.app import GenericHandler -from wuttasync.importing import ImportHandler +def importer_command_template( -log = logging.getLogger(__name__) - - -class ImportCommandHandler(GenericHandler): - """ - This is the :term:`handler` responsible for import/export command - line runs. - - Normally, the command (actually :term:`subcommand`) logic will - create this handler and call its :meth:`run()` method. - - This handler does not know how to import/export data, but it knows - how to make its :attr:`import_handler` do it. - - :param import_handler: During construction, caller can specify the - :attr:`import_handler` as any of: - - * import handler instance - * import handler factory (e.g. class) - * import handler spec (cf. :func:`~wuttjamaican:wuttjamaican.util.load_object()`) - - For example:: - - handler = ImportCommandHandler( - config, import_handler='wuttasync.importing.csv:FromCsvToWutta') - """ - - import_handler = None - """ - Reference to the :term:`import handler` instance, which is to be - invoked when command runs. See also :meth:`run()`. - """ - - def __init__(self, config, import_handler=None): - super().__init__(config) - - if import_handler: - if isinstance(import_handler, ImportHandler): - self.import_handler = import_handler - elif callable(import_handler): - self.import_handler = import_handler(self.config) - else: # spec - factory = self.app.load_object(import_handler) - self.import_handler = factory(self.config) - - def run(self, params, progress=None): - """ - Run the import/export job(s) based on command line params. - - This mostly just calls - :meth:`~wuttasync.importing.handlers.ImportHandler.process_data()` - for the :attr:`import_handler`. - - Unless ``--list-models`` was specified on the command line in - which case we do :meth:`list_models()` instead. - - :param params: Dict of params from command line. This must - include a ``'models'`` key, the rest are optional. - - :param progress: Optional progress indicator factory. - """ - - # maybe just list models and bail - if params.get('list_models'): - self.list_models(params) - return - - # otherwise process some data - kw = dict(params) - models = kw.pop('models') - log.debug("using handler: %s", self.import_handler.get_spec()) - # TODO: need to use all/default models if none specified - # (and should know models by now for logging purposes) - log.debug("running %s %s for: %s", - self.import_handler, - self.import_handler.orientation.value, - ', '.join(models)) - log.debug("params are: %s", kw) - self.import_handler.process_data(*models, **kw) - - def list_models(self, params): - """ - Query the :attr:`import_handler`'s supported target models and - print the info to stdout. - - This is what happens when command line has ``--list-models``. - """ - sys.stdout.write("ALL MODELS:\n") - sys.stdout.write("==============================\n") - for key in self.import_handler.importers: - sys.stdout.write(key) - sys.stdout.write("\n") - sys.stdout.write("==============================\n") - - -def import_command_template( - + # model keys models: Annotated[ Optional[List[str]], typer.Argument(help="Model(s) to process. Can specify one or more, " - "or omit to process default models.")] = None, - - list_models: Annotated[ - bool, - typer.Option('--list-models', '-l', - help="List available target models and exit.")] = False, + "or omit to process all default models.")] = None, + # allow create? create: Annotated[ bool, - typer.Option(help="Allow new target records to be created. " - "See aso --max-create.")] = True, + typer.Option(help="Allow new target records to be created.")] = True, + # allow update? update: Annotated[ bool, - typer.Option(help="Allow existing target records to be updated. " - "See also --max-update.")] = True, + typer.Option(help="Allow existing target records to be updated.")] = True, + # allow delete? delete: Annotated[ bool, - typer.Option(help="Allow existing target records to be deleted. " - "See also --max-delete.")] = False, + typer.Option(help="Allow existing target records to be deleted.")] = False, + # fields fields: Annotated[ str, typer.Option('--fields', - help="List of fields to process. See also --exclude and --key.")] = None, - + help="List of fields to process. See also --exclude.")] = None, excluded_fields: Annotated[ str, typer.Option('--exclude', help="List of fields *not* to process. See also --fields.")] = None, - keys: Annotated[ - str, - typer.Option('--key', '--keys', - help="List of fields to use as record key/identifier. " - "See also --fields.")] = None, - - max_create: Annotated[ - int, - typer.Option(help="Max number of target records to create (per model). " - "See also --create.")] = None, - - max_update: Annotated[ - int, - typer.Option(help="Max number of target records to update (per model). " - "See also --update.")] = None, - - max_delete: Annotated[ - int, - typer.Option(help="Max number of target records to delete (per model). " - "See also --delete.")] = None, - - max_total: Annotated[ - int, - typer.Option(help="Max number of *any* target record changes which may occur (per model).")] = None, - + # dry run? dry_run: Annotated[ bool, typer.Option('--dry-run', @@ -203,22 +75,22 @@ def import_command_template( ): """ Stub function which provides a common param signature; used with - :func:`import_command()`. + :func:`importer_command()`. """ -def import_command(fn): +def importer_command(fn): """ Decorator for import/export commands. Adds common params based on - :func:`import_command_template()`. + :func:`importer_command_template()`. To use this, e.g. for ``poser import-foo`` command:: from poser.cli import poser_typer - from wuttasync.cli import import_command, ImportCommandHandler + from wuttasync.cli import importer_command @poser_typer.command() - @import_command + @importer_command def import_foo( ctx: typer.Context, **kwargs @@ -226,15 +98,16 @@ def import_command(fn): \""" Import data from Foo API to Poser DB \""" - config = ctx.parent.wutta_config - handler = ImportCommandHandler( - config, import_handler='poser.importing.foo:FromFooToPoser') - handler.run(ctx.params) + from poser.importing.foo import FromFooToPoser - See also :class:`ImportCommandHandler`. + config = ctx.parent.wutta_config + kw = dict(ctx.params) + models = kw.pop('models') + handler = FromFooToPoser(config) + handler.process_data(*models, **kw) """ original_sig = inspect.signature(fn) - reference_sig = inspect.signature(import_command_template) + reference_sig = inspect.signature(importer_command_template) params = list(original_sig.parameters.values()) for i, param in enumerate(reference_sig.parameters.values()): @@ -247,8 +120,7 @@ def import_command(fn): return makefun.create_function(final_sig, fn) -def file_import_command_template( - +def file_importer_command_template( input_file_path: Annotated[ Path, typer.Option('--input-path', @@ -256,28 +128,27 @@ def file_import_command_template( help="Path to input file(s). Can be a folder " "if app logic can guess the filename(s); " "otherwise must be complete file path.")] = ..., - ): """ Stub function to provide signature for import/export commands which require input file. Used with - :func:`file_import_command()`. + :func:`file_importer_command()`. """ -def file_import_command(fn): +def file_importer_command(fn): """ Decorator for import/export commands which require input file. Adds common params based on - :func:`file_import_command_template()`. + :func:`file_importer_command_template()`. To use this, it's the same method as shown for - :func:`import_command()` except in this case you would use the - ``file_import_command`` decorator. + :func:`importer_command()` except in this case you would use the + ``file_importer_command`` decorator. """ original_sig = inspect.signature(fn) - plain_import_sig = inspect.signature(import_command_template) - file_import_sig = inspect.signature(file_import_command_template) + plain_import_sig = inspect.signature(importer_command_template) + file_import_sig = inspect.signature(file_importer_command_template) desired_params = ( list(plain_import_sig.parameters.values()) + list(file_import_sig.parameters.values())) diff --git a/src/wuttasync/cli/import_csv.py b/src/wuttasync/cli/import_csv.py index 50c2a83..7600d5f 100644 --- a/src/wuttasync/cli/import_csv.py +++ b/src/wuttasync/cli/import_csv.py @@ -30,11 +30,11 @@ import typer from wuttjamaican.cli import wutta_typer -from .base import file_import_command, ImportCommandHandler +from .base import file_importer_command @wutta_typer.command() -@file_import_command +@file_importer_command def import_csv( ctx: typer.Context, **kwargs @@ -42,7 +42,10 @@ def import_csv( """ Import data from CSV file(s) to Wutta DB """ + from wuttasync.importing.csv import FromCsvToWutta + config = ctx.parent.wutta_config - handler = ImportCommandHandler( - config, import_handler='wuttasync.importing.csv:FromCsvToWutta') - handler.run(ctx.params) + kw = dict(ctx.params) + models = kw.pop('models') + handler = FromCsvToWutta(config) + handler.process_data(*models, **kw) diff --git a/src/wuttasync/importing/__init__.py b/src/wuttasync/importing/__init__.py index 03a421f..b43d5d3 100644 --- a/src/wuttasync/importing/__init__.py +++ b/src/wuttasync/importing/__init__.py @@ -22,27 +22,7 @@ ################################################################################ """ Data Import / Export Framework - -This namespace exposes the following: - -* :enum:`~wuttasync.importing.handlers.Orientation` - -And some :term:`import handler` base classes: - -* :class:`~wuttasync.importing.handlers.ImportHandler` -* :class:`~wuttasync.importing.handlers.FromFileHandler` -* :class:`~wuttasync.importing.handlers.ToSqlalchemyHandler` -* :class:`~wuttasync.importing.wutta.ToWuttaHandler` - -And some :term:`importer` base classes: - -* :class:`~wuttasync.importing.base.Importer` -* :class:`~wuttasync.importing.base.FromFile` -* :class:`~wuttasync.importing.base.ToSqlalchemy` -* :class:`~wuttasync.importing.model.ToWutta` """ from .handlers import Orientation, ImportHandler, FromFileHandler, ToSqlalchemyHandler from .base import Importer, FromFile, ToSqlalchemy -from .model import ToWutta -from .wutta import ToWuttaHandler diff --git a/src/wuttasync/importing/base.py b/src/wuttasync/importing/base.py index 59017d7..d46ae67 100644 --- a/src/wuttasync/importing/base.py +++ b/src/wuttasync/importing/base.py @@ -26,7 +26,6 @@ Data Importer base class import os import logging -from collections import OrderedDict from sqlalchemy import orm from sqlalchemy_utils.functions import get_primary_keys, get_columns @@ -37,13 +36,6 @@ from wuttasync.util import data_diffs log = logging.getLogger(__name__) -class ImportLimitReached(Exception): - """ - Exception raised when an import/export job reaches the max number - of changes allowed. - """ - - class Importer: """ Base class for all data importers / exporters. @@ -182,11 +174,6 @@ class Importer: :meth:`get_target_cache()`. """ - max_create = None - max_update = None - max_delete = None - max_total = None - def __init__(self, config, **kwargs): self.config = config self.app = self.config.get_app() @@ -322,17 +309,10 @@ class Importer: :returns: List of "key" field names. """ - keys = None - # nb. prefer 'keys' but use 'key' as fallback - if hasattr(self, 'keys'): - keys = self.keys - elif hasattr(self, 'key'): + if hasattr(self, 'key'): keys = self.key - if keys: if isinstance(keys, str): - keys = self.config.parse_list(keys) - # nb. save for next time - self.keys = keys + keys = [keys] return keys return list(get_primary_keys(self.model_class)) @@ -367,26 +347,9 @@ class Importer: Note that subclass generally should not override this method, but instead some of the others. - This first calls :meth:`setup()` to prepare things as needed. - - If no source data is specified, it calls - :meth:`normalize_source_data()` to get that. Regardless, it - also calls :meth:`get_unique_data()` to discard any - duplicates. - - If :attr:`caches_target` is set, it calls - :meth:`get_target_cache()` and assigns result to - :attr:`cached_target`. - - Then depending on values for :attr:`create`, :attr:`update` - and :attr:`delete` it may call: - - * :meth:`do_create_update()` - * :meth:`do_delete()` - - And finally it calls :meth:`teardown()` for cleanup. - - :param source_data: Sequence of normalized source data, if known. + :param source_data: Optional sequence of normalized source + data. If not specified, it is obtained from + :meth:`normalize_source_data()`. :param progress: Optional progress indicator factory. @@ -396,6 +359,13 @@ class Importer: * ``created`` - list of records created on the target * ``updated`` - list of records updated on the target * ``deleted`` - list of records deleted on the target + + See also these methods which this one calls: + + * :meth:`setup()` + * :meth:`do_create_update()` + * :meth:`do_delete()` + * :meth:`teardown()` """ # TODO: should add try/catch around this all? and teardown() in finally: clause? self.setup() @@ -403,15 +373,12 @@ class Importer: updated = [] deleted = [] - log.debug("using key fields: %s", ', '.join(self.get_keys())) - # get complete set of normalized source data if source_data is None: source_data = self.normalize_source_data(progress=progress) - # nb. prune duplicate records from source data - source_data, source_keys = self.get_unique_data(source_data) - + # TODO: should exclude duplicate source records + # source_data, unique = self.get_unique_data(source_data) model_title = self.get_model_title() log.debug(f"got %s {model_title} records from source", len(source_data)) @@ -426,12 +393,7 @@ class Importer: # delete target data if self.delete: - changes = len(created) + len(updated) - if self.max_total and changes >= self.max_total: - log.debug("max of %s total changes already reached; skipping deletions", - self.max_total) - else: - deleted = self.do_delete(source_keys, changes, progress=progress) + deleted = self.do_delete(source_data) self.teardown() return created, updated, deleted @@ -489,16 +451,6 @@ class Importer: target_data=target_data) updated.append((target_object, target_data, source_data)) - # stop if we reach max allowed - if self.max_update and len(updated) >= self.max_update: - log.warning("max of %s *updated* records has been reached; stopping now", - self.max_update) - raise ImportLimitReached() - elif self.max_total and (len(created) + len(updated)) >= self.max_total: - log.warning("max of %s *total changes* has been reached; stopping now", - self.max_total) - raise ImportLimitReached() - elif not target_object and self.create: # target object not yet present, so create it @@ -512,94 +464,23 @@ class Importer: # 'object': target_object, # 'data': self.normalize_target_object(target_object), # } - - # stop if we reach max allowed - if self.max_create and len(created) >= self.max_create: - log.warning("max of %s *created* records has been reached; stopping now", - self.max_create) - raise ImportLimitReached() - elif self.max_total and (len(created) + len(updated)) >= self.max_total: - log.warning("max of %s *total changes* has been reached; stopping now", - self.max_total) - raise ImportLimitReached() - else: log.debug("did NOT create new %s for key: %s", model_title, key) actioning = self.actioning.capitalize() target_title = self.handler.get_target_title() - try: - self.app.progress_loop(create_update, all_source_data, progress, - message=f"{actioning} {model_title} data to {target_title}") - except ImportLimitReached: - pass + self.app.progress_loop(create_update, all_source_data, progress, + message=f"{actioning} {model_title} data to {target_title}") return created, updated - def do_delete(self, source_keys, changes=None, progress=None): + def do_delete(self, source_data, progress=None): """ - Delete records from the target side as needed, per the given - source data. + TODO: not yet implemented - This will call :meth:`get_deletable_keys()` to discover which - keys existing on the target side could theoretically allow - being deleted. - - From that set it will remove all the given source keys - since - such keys still exist on the source, they should not be - deleted from target. - - If any "deletable" keys remain, their corresponding objects - are removed from target via :meth:`delete_target_object()`. - - :param source_keys: A ``set`` of keys for all source records. - Essentially this is just the list of keys for which target - records should *not* be deleted - since they still exist in - the data source. - - :param changes: Number of changes which have already been made - on the target side. Used to enforce max allowed changes, - if applicable. - - :param progress: Optional progress indicator factory. - - :returns: List of target records which were deleted. + :returns: List of records deleted on the target. """ - deleted = [] - changes = changes or 0 - - # which target records are deletable? potentially all target - # records may be eligible, but anything also found in source - # is *not* eligible. - deletable = self.get_deletable_keys() - source_keys - log.debug("found %s records to delete", len(deletable)) - - def delete(key, i): - cached = self.cached_target.pop(key) - obj = cached['object'] - - # delete target object - if self.delete_target_object(obj): - deleted.append((obj, cached['data'])) - - # stop if we reach max allowed - if self.max_delete and len(deleted) >= self.max_delete: - log.warning("max of %s *deleted* records has been reached; stopping now", - self.max_delete) - raise ImportLimitReached() - elif self.max_total and (changes + len(deleted)) >= self.max_total: - log.warning("max of %s *total changes* has been reached; stopping now", - self.max_total) - raise ImportLimitReached() - - try: - model_title = self.get_model_title() - self.app.progress_loop(delete, sorted(deletable), progress, - message=f"Deleting {model_title} records") - except ImportLimitReached: - pass - - return deleted + return [] def get_record_key(self, data): """ @@ -689,49 +570,6 @@ class Importer: message=f"Reading {model_title} data from {source_title}") return normalized - def get_unique_data(self, source_data): - """ - Return a copy of the given source data, with any duplicate - records removed. - - This looks for duplicates based on the effective key fields, - cf. :meth:`get_keys()`. The first record found with a given - key is kept; subsequent records with that key are discarded. - - This is called from :meth:`process_data()` and is done largely - for sanity's sake, to avoid indeterminate behavior when source - data contains duplicates. For instance: - - Problem #1: If source contains 2 records with key 'X' it makes - no sense to create both records on the target side. - - Problem #2: if the 2 source records have different data (apart - from their key) then which should target reflect? - - So the main point of this method is to discard the duplicates - to avoid problem #1, but do it in a deterministic way so at - least the "choice" of which record is kept will not vary - across runs; hence "pseudo-resolve" problem #2. - - :param source_data: Sequence of normalized source data. - - :returns: A 2-tuple of ``(source_data, unique_keys)`` where: - - * ``source_data`` is the final list of source data - * ``unique_keys`` is a :class:`python:set` of the source record keys - """ - unique = OrderedDict() - for data in source_data: - key = self.get_record_key(data) - if key in unique: - log.warning("duplicate %s records detected from %s for key: %s", - self.get_model_title(), - self.handler.get_source_title(), - key) - else: - unique[key] = data - return list(unique.values()), set(unique) - def get_source_objects(self): """ This method (if applicable) should return a sequence of "raw" @@ -907,38 +745,6 @@ class Importer: for field in fields]) return data - def get_deletable_keys(self, progress=None): - """ - Return a set of record keys from the target side, which are - *potentially* eligible for deletion. - - Inclusion in this set does not imply a given record/key - *should* be deleted, only that app logic (e.g. business rules) - does not prevent it. - - Default logic here will look in the :attr:`cached_target` and - then call :meth:`can_delete_object()` for each record in the - cache. If that call returns true for a given key, it is - included in the result. - - :returns: The ``set`` of target record keys eligible for - deletion. - """ - if not self.caches_target: - return set() - - keys = set() - - def check(key, i): - data = self.cached_target[key]['data'] - obj = self.cached_target[key]['object'] - if self.can_delete_object(obj, data): - keys.add(key) - - self.app.progress_loop(check, set(self.cached_target), progress, - message="Determining which objects can be deleted") - return keys - ############################## # CRUD methods ############################## @@ -1044,40 +850,6 @@ class Importer: return obj - def can_delete_object(self, obj, data=None): - """ - Should return true or false indicating whether the given - object "can" be deleted. Default is to return true in all - cases. - - If you return false then the importer will know not to call - :meth:`delete_target_object()` even if the data sets imply - that it should. - - :param obj: Raw object on the target side. - - :param data: Normalized data dict for the target record, if - known. - - :returns: ``True`` if object can be deleted, else ``False``. - """ - return True - - def delete_target_object(self, obj): - """ - Delete the given raw object from the target side, and return - true if successful. - - This is called from :meth:`do_delete()`. - - Default logic for this method just returns false; subclass - should override if needed. - - :returns: Should return ``True`` if deletion succeeds, or - ``False`` if deletion failed or was skipped. - """ - return False - class FromFile(Importer): """ @@ -1224,9 +996,10 @@ class ToSqlalchemy(Importer): """ Tries to fetch the object from target DB using ORM query. """ - # use default logic to fetch from cache, if applicable - if self.caches_target: - return super().get_target_object(key) + # first the default logic in case target object is cached + obj = super().get_target_object(key) + if obj: + return obj # okay now we must fetch via query query = self.target_session.query(self.model_class) @@ -1237,6 +1010,15 @@ class ToSqlalchemy(Importer): except orm.exc.NoResultFound: pass + def create_target_object(self, key, source_data): + """ """ + with self.target_session.no_autoflush: + obj = super().create_target_object(key, source_data) + if obj: + # nb. add new object to target db session + self.target_session.add(obj) + return obj + def get_target_objects(self, source_data=None, progress=None): """ Fetches target objects via the ORM query from @@ -1252,17 +1034,3 @@ class ToSqlalchemy(Importer): :meth:`get_target_objects()`. """ return self.target_session.query(self.model_class) - - def create_target_object(self, key, source_data): - """ """ - with self.target_session.no_autoflush: - obj = super().create_target_object(key, source_data) - if obj: - # nb. add new object to target db session - self.target_session.add(obj) - return obj - - def delete_target_object(self, obj): - """ """ - self.target_session.delete(obj) - return True diff --git a/src/wuttasync/importing/csv.py b/src/wuttasync/importing/csv.py index e1937b5..1c62818 100644 --- a/src/wuttasync/importing/csv.py +++ b/src/wuttasync/importing/csv.py @@ -26,12 +26,11 @@ Importing from CSV import csv import logging -import uuid as _uuid from collections import OrderedDict from sqlalchemy_utils.functions import get_primary_keys -from wuttjamaican.db.util import make_topo_sortkey, UUID +from wuttjamaican.db.util import make_topo_sortkey from .base import FromFile from .handlers import FromFileHandler @@ -139,54 +138,7 @@ class FromCsv(FromFile): class FromCsvToSqlalchemyMixin: """ - Mixin class for CSV → SQLAlchemy ORM :term:`importers `. - - Meant to be used by :class:`FromCsvToSqlalchemyHandlerMixin`. - - This mixin adds some logic to better handle ``uuid`` key fields - which are of :class:`~wuttjamaican:wuttjamaican.db.util.UUID` data - type (i.e. on the target side). Namely, when reading ``uuid`` - values as string from CSV, convert them to proper UUID instances, - so the key matching between source and target will behave as - expected. - """ - - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) - - # nb. keep track of any key fields which use proper UUID type - self.uuid_keys = [] - for field in self.get_keys(): - attr = getattr(self.model_class, field) - if len(attr.prop.columns) == 1: - if isinstance(attr.prop.columns[0].type, UUID): - self.uuid_keys.append(field) - - def normalize_source_object(self, obj): - """ """ - data = dict(obj) - - # nb. convert to proper UUID values so key matching will work - # properly, where applicable - for key in self.uuid_keys: - uuid = data[key] - if uuid and not isinstance(uuid, _uuid.UUID): - data[key] = _uuid.UUID(uuid) - - return data - - -class FromCsvToSqlalchemyHandlerMixin: - """ - Mixin class for CSV → SQLAlchemy ORM :term:`import handlers - `. - - This knows how to dynamically generate :term:`importer` classes to - target the particular ORM involved. Such classes will inherit - from :class:`FromCsvToSqlalchemyMixin`, in addition to whatever - :attr:`FromImporterBase` and :attr:`ToImporterBase` reference. - - This all happens within :meth:`define_importers()`. + Mixin handler class for CSV → SQLAlchemy ORM import/export. """ source_key = 'csv' generic_source_title = "CSV" @@ -249,39 +201,30 @@ class FromCsvToSqlalchemyHandlerMixin: return importers - def make_importer_factory(self, model_class, name): + def make_importer_factory(self, cls, name): """ - Generate and return a new :term:`importer` class, targeting - the given :term:`data model` class. + Generate and return a new importer/exporter class, targeting + the given data model class. - The newly-created class will inherit from: + :param cls: A data model class. - * :class:`FromCsvToSqlalchemyMixin` - * :attr:`FromImporterBase` - * :attr:`ToImporterBase` + :param name: Optional "model name" override for the + importer/exporter. - :param model_class: A data model class. - - :param name: The "model name" for the importer/exporter. New - class name will be based on this, so e.g. ``Widget`` model - name becomes ``WidgetImporter`` class name. - - :returns: The new class, meant to process import/export - targeting the given data model. + :returns: A new class, meant to process import/export + operations which target the given data model. The new + class will inherit from both :attr:`FromImporterBase` and + :attr:`ToImporterBase`. """ - return type(f'{name}Importer', - (FromCsvToSqlalchemyMixin, self.FromImporterBase, self.ToImporterBase), { - 'model_class': model_class, - 'key': list(get_primary_keys(model_class)), + return type(f'{name}Importer', (FromCsv, self.ToImporterBase), { + 'model_class': cls, + 'key': list(get_primary_keys(cls)), }) -class FromCsvToWutta(FromCsvToSqlalchemyHandlerMixin, FromFileHandler, ToWuttaHandler): +class FromCsvToWutta(FromCsvToSqlalchemyMixin, FromFileHandler, ToWuttaHandler): """ Handler for CSV → Wutta :term:`app database` import. - - This uses :class:`FromCsvToSqlalchemyHandlerMixin` for most of the - heavy lifting. """ ToImporterBase = ToWutta diff --git a/tests/cli/test_base.py b/tests/cli/test_base.py index 69af1b8..b43f7d7 100644 --- a/tests/cli/test_base.py +++ b/tests/cli/test_base.py @@ -2,59 +2,8 @@ import inspect from unittest import TestCase -from unittest.mock import patch from wuttasync.cli import base as mod -from wuttjamaican.testing import DataTestCase - - -class TestImportCommandHandler(DataTestCase): - - def make_handler(self, **kwargs): - return mod.ImportCommandHandler(self.config, **kwargs) - - def test_import_handler(self): - - # none - handler = self.make_handler() - self.assertIsNone(handler.import_handler) - - FromCsvToWutta = self.app.load_object('wuttasync.importing.csv:FromCsvToWutta') - - # as spec - handler = self.make_handler(import_handler=FromCsvToWutta.get_spec()) - self.assertIsInstance(handler.import_handler, FromCsvToWutta) - - # as factory - handler = self.make_handler(import_handler=FromCsvToWutta) - self.assertIsInstance(handler.import_handler, FromCsvToWutta) - - # as instance - myhandler = FromCsvToWutta(self.config) - handler = self.make_handler(import_handler=myhandler) - self.assertIs(handler.import_handler, myhandler) - - def test_run(self): - handler = self.make_handler(import_handler='wuttasync.importing.csv:FromCsvToWutta') - - with patch.object(handler, 'list_models') as list_models: - handler.run({'list_models': True}) - list_models.assert_called_once_with({'list_models': True}) - - with patch.object(handler, 'import_handler') as import_handler: - handler.run({'models': []}) - import_handler.process_data.assert_called_once_with() - - def test_list_models(self): - handler = self.make_handler(import_handler='wuttasync.importing.csv:FromCsvToWutta') - - with patch.object(mod, 'sys') as sys: - handler.list_models({}) - # just test a few random things we expect to see - self.assertTrue(sys.stdout.write.has_call('ALL MODELS:\n')) - self.assertTrue(sys.stdout.write.has_call('Person')) - self.assertTrue(sys.stdout.write.has_call('User')) - self.assertTrue(sys.stdout.write.has_call('Upgrade')) class TestImporterCommand(TestCase): @@ -66,7 +15,7 @@ class TestImporterCommand(TestCase): sig1 = inspect.signature(myfunc) self.assertIn('kwargs', sig1.parameters) self.assertNotIn('dry_run', sig1.parameters) - wrapt = mod.import_command(myfunc) + wrapt = mod.importer_command(myfunc) sig2 = inspect.signature(wrapt) self.assertNotIn('kwargs', sig2.parameters) self.assertIn('dry_run', sig2.parameters) @@ -82,7 +31,7 @@ class TestFileImporterCommand(TestCase): self.assertIn('kwargs', sig1.parameters) self.assertNotIn('dry_run', sig1.parameters) self.assertNotIn('input_file_path', sig1.parameters) - wrapt = mod.file_import_command(myfunc) + wrapt = mod.file_importer_command(myfunc) sig2 = inspect.signature(wrapt) self.assertNotIn('kwargs', sig2.parameters) self.assertIn('dry_run', sig2.parameters) diff --git a/tests/cli/test_import_csv.py b/tests/cli/test_import_csv.py index f856947..a4371df 100644 --- a/tests/cli/test_import_csv.py +++ b/tests/cli/test_import_csv.py @@ -1,19 +1,24 @@ #-*- coding: utf-8; -*- +import os from unittest import TestCase from unittest.mock import MagicMock, patch -from wuttasync.cli import import_csv as mod, ImportCommandHandler +from wuttasync.cli import import_csv as mod +from wuttasync.importing.csv import FromCsvToWutta +here = os.path.dirname(__file__) +example_conf = os.path.join(here, 'example.conf') + class TestImportCsv(TestCase): def test_basic(self): - params = {'models': [], - 'create': True, 'update': True, 'delete': False, - 'dry_run': True} - ctx = MagicMock(params=params) - with patch.object(ImportCommandHandler, 'run') as run: + ctx = MagicMock(params={'models': [], + 'create': True, 'update': True, 'delete': False, + 'dry_run': True}) + with patch.object(FromCsvToWutta, 'process_data') as process_data: mod.import_csv(ctx) - run.assert_called_once_with(params) + process_data.assert_called_once_with(create=True, update=True, delete=False, + dry_run=True) diff --git a/tests/importing/test_base.py b/tests/importing/test_base.py index ff2ca6e..0648dc9 100644 --- a/tests/importing/test_base.py +++ b/tests/importing/test_base.py @@ -82,244 +82,65 @@ class TestImporter(DataTestCase): model = self.app.model imp = self.make_importer(model_class=model.Setting) self.assertEqual(imp.get_keys(), ['name']) - with patch.multiple(imp, create=True, key='value'): - self.assertEqual(imp.get_keys(), ['value']) - with patch.multiple(imp, create=True, keys=['foo', 'bar']): - self.assertEqual(imp.get_keys(), ['foo', 'bar']) + imp.key = 'value' + self.assertEqual(imp.get_keys(), ['value']) def test_process_data(self): model = self.app.model - imp = self.make_importer(model_class=model.Setting, caches_target=True, - delete=True) + imp = self.make_importer(model_class=model.Setting, caches_target=True) - def make_cache(): - setting1 = model.Setting(name='foo1', value='bar1') - setting2 = model.Setting(name='foo2', value='bar2') - setting3 = model.Setting(name='foo3', value='bar3') - cache = { - ('foo1',): { - 'object': setting1, - 'data': {'name': 'foo1', 'value': 'bar1'}, - }, - ('foo2',): { - 'object': setting2, - 'data': {'name': 'foo2', 'value': 'bar2'}, - }, - ('foo3',): { - 'object': setting3, - 'data': {'name': 'foo3', 'value': 'bar3'}, - }, - } - return cache + # empty data set / just for coverage + with patch.object(imp, 'normalize_source_data') as normalize_source_data: + normalize_source_data.return_value = [] - # nb. delete always succeeds - with patch.object(imp, 'delete_target_object', return_value=True): + with patch.object(imp, 'get_target_cache') as get_target_cache: + get_target_cache.return_value = {} - # create + update + delete all as needed - with patch.object(imp, 'get_target_cache', return_value=make_cache()): - created, updated, deleted = imp.process_data([ - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - {'name': 'foo5', 'value': 'BAR5'}, - ]) - self.assertEqual(len(created), 2) - self.assertEqual(len(updated), 1) - self.assertEqual(len(deleted), 2) - - # same but with --max-total so delete gets skipped - with patch.object(imp, 'get_target_cache', return_value=make_cache()): - with patch.object(imp, 'max_total', new=3): - created, updated, deleted = imp.process_data([ - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - {'name': 'foo5', 'value': 'BAR5'}, - ]) - self.assertEqual(len(created), 2) - self.assertEqual(len(updated), 1) - self.assertEqual(len(deleted), 0) - - # delete all if source data empty - with patch.object(imp, 'get_target_cache', return_value=make_cache()): - created, updated, deleted = imp.process_data() - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 0) - self.assertEqual(len(deleted), 3) + result = imp.process_data() + self.assertEqual(result, ([], [], [])) def test_do_create_update(self): model = self.app.model - imp = self.make_importer(model_class=model.Setting, caches_target=True) - - def make_cache(): - setting1 = model.Setting(name='foo1', value='bar1') - setting2 = model.Setting(name='foo2', value='bar2') - cache = { - ('foo1',): { - 'object': setting1, - 'data': {'name': 'foo1', 'value': 'bar1'}, - }, - ('foo2',): { - 'object': setting2, - 'data': {'name': 'foo2', 'value': 'bar2'}, - }, - } - return cache - - # change nothing if data matches - with patch.multiple(imp, create=True, cached_target=make_cache()): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'bar1'}, - {'name': 'foo2', 'value': 'bar2'}, - ]) - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 0) - - # update all as needed - with patch.multiple(imp, create=True, cached_target=make_cache()): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'BAR1'}, - {'name': 'foo2', 'value': 'BAR2'}, - ]) - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 2) - - # update all, with --max-update - with patch.multiple(imp, create=True, cached_target=make_cache(), max_update=1): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'BAR1'}, - {'name': 'foo2', 'value': 'BAR2'}, - ]) - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 1) - - # update all, with --max-total - with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'BAR1'}, - {'name': 'foo2', 'value': 'BAR2'}, - ]) - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 1) - - # create all as needed - with patch.multiple(imp, create=True, cached_target=make_cache()): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'bar1'}, - {'name': 'foo2', 'value': 'bar2'}, - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - ]) - self.assertEqual(len(created), 2) - self.assertEqual(len(updated), 0) - - # what happens when create gets skipped - with patch.multiple(imp, create=True, cached_target=make_cache()): - with patch.object(imp, 'create_target_object', return_value=None): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'bar1'}, - {'name': 'foo2', 'value': 'bar2'}, - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - ]) - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 0) - - # create all, with --max-create - with patch.multiple(imp, create=True, cached_target=make_cache(), max_create=1): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'bar1'}, - {'name': 'foo2', 'value': 'bar2'}, - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - ]) - self.assertEqual(len(created), 1) - self.assertEqual(len(updated), 0) - - # create all, with --max-total - with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'bar1'}, - {'name': 'foo2', 'value': 'bar2'}, - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - ]) - self.assertEqual(len(created), 1) - self.assertEqual(len(updated), 0) - - # create + update all as needed - with patch.multiple(imp, create=True, cached_target=make_cache()): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'BAR1'}, - {'name': 'foo2', 'value': 'BAR2'}, - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - ]) - self.assertEqual(len(created), 2) - self.assertEqual(len(updated), 2) - - # create + update all, with --max-total - with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): - created, updated = imp.do_create_update([ - {'name': 'foo1', 'value': 'BAR1'}, - {'name': 'foo2', 'value': 'BAR2'}, - {'name': 'foo3', 'value': 'BAR3'}, - {'name': 'foo4', 'value': 'BAR4'}, - ]) - # nb. foo1 is updated first - self.assertEqual(len(created), 0) - self.assertEqual(len(updated), 1) - - def test_do_delete(self): - model = self.app.model # this requires a mock target cache - setting1 = model.Setting(name='foo1', value='bar1') - setting2 = model.Setting(name='foo2', value='bar2') imp = self.make_importer(model_class=model.Setting, caches_target=True) - cache = { - ('foo1',): { - 'object': setting1, - 'data': {'name': 'foo1', 'value': 'bar1'}, - }, - ('foo2',): { - 'object': setting2, - 'data': {'name': 'foo2', 'value': 'bar2'}, + setting = model.Setting(name='foo', value='bar') + imp.cached_target = { + ('foo',): { + 'object': setting, + 'data': {'name': 'foo', 'value': 'bar'}, }, } - with patch.object(imp, 'delete_target_object') as delete_target_object: + # will update the one record + result = imp.do_create_update([{'name': 'foo', 'value': 'baz'}]) + self.assertIs(result[1][0][0], setting) + self.assertEqual(result, ([], [(setting, + # nb. target + {'name': 'foo', 'value': 'bar'}, + # nb. source + {'name': 'foo', 'value': 'baz'})])) + self.assertEqual(setting.value, 'baz') - # delete nothing if source has same keys - with patch.multiple(imp, create=True, cached_target=dict(cache)): - source_keys = set(imp.cached_target) - result = imp.do_delete(source_keys) - self.assertFalse(delete_target_object.called) - self.assertEqual(result, []) + # will create a new record + result = imp.do_create_update([{'name': 'blah', 'value': 'zay'}]) + self.assertIsNot(result[0][0][0], setting) + setting_new = result[0][0][0] + self.assertEqual(result, ([(setting_new, + # nb. source + {'name': 'blah', 'value': 'zay'})], + [])) + self.assertEqual(setting_new.name, 'blah') + self.assertEqual(setting_new.value, 'zay') - # delete both if source has no keys - delete_target_object.reset_mock() - with patch.multiple(imp, create=True, cached_target=dict(cache)): - source_keys = set() - result = imp.do_delete(source_keys) - self.assertEqual(delete_target_object.call_count, 2) - self.assertEqual(len(result), 2) + # but what if new record is *not* created + with patch.object(imp, 'create_target_object', return_value=None): + result = imp.do_create_update([{'name': 'another', 'value': 'one'}]) + self.assertEqual(result, ([], [])) - # delete just one if --max-delete was set - delete_target_object.reset_mock() - with patch.multiple(imp, create=True, cached_target=dict(cache)): - source_keys = set() - with patch.object(imp, 'max_delete', new=1): - result = imp.do_delete(source_keys) - self.assertEqual(delete_target_object.call_count, 1) - self.assertEqual(len(result), 1) - - # delete just one if --max-total was set - delete_target_object.reset_mock() - with patch.multiple(imp, create=True, cached_target=dict(cache)): - source_keys = set() - with patch.object(imp, 'max_total', new=1): - result = imp.do_delete(source_keys) - self.assertEqual(delete_target_object.call_count, 1) - self.assertEqual(len(result), 1) + # def test_do_delete(self): + # model = self.app.model + # imp = self.make_importer(model_class=model.Setting) def test_get_record_key(self): model = self.app.model @@ -359,22 +180,6 @@ class TestImporter(DataTestCase): # nb. default normalizer returns object as-is self.assertIs(data[0], setting) - def test_get_unique_data(self): - model = self.app.model - imp = self.make_importer(model_class=model.Setting) - - setting1 = model.Setting(name='foo', value='bar1') - setting2 = model.Setting(name='foo', value='bar2') - - result = imp.get_unique_data([setting2, setting1]) - self.assertIsInstance(result, tuple) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], list) - self.assertEqual(len(result[0]), 1) - self.assertIs(result[0][0], setting2) # nb. not setting1 - self.assertIsInstance(result[1], set) - self.assertEqual(result[1], {('foo',)}) - def test_get_source_objects(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) @@ -456,34 +261,6 @@ class TestImporter(DataTestCase): data = imp.normalize_target_object(setting) self.assertEqual(data, {'name': 'foo', 'value': 'bar'}) - def test_get_deletable_keys(self): - model = self.app.model - imp = self.make_importer(model_class=model.Setting) - - # empty set by default (nb. no target cache) - result = imp.get_deletable_keys() - self.assertIsInstance(result, set) - self.assertEqual(result, set()) - - setting = model.Setting(name='foo', value='bar') - cache = { - ('foo',): { - 'object': setting, - 'data': {'name': 'foo', 'value': 'bar'}, - }, - } - - with patch.multiple(imp, create=True, caches_target=True, cached_target=cache): - - # all are deletable by default - result = imp.get_deletable_keys() - self.assertEqual(result, {('foo',)}) - - # but some maybe can't be deleted - with patch.object(imp, 'can_delete_object', return_value=False): - result = imp.get_deletable_keys() - self.assertEqual(result, set()) - def test_create_target_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) @@ -522,19 +299,6 @@ class TestImporter(DataTestCase): self.assertIs(obj, setting) self.assertEqual(setting.value, 'bar') - def test_can_delete_object(self): - model = self.app.model - imp = self.make_importer(model_class=model.Setting) - setting = model.Setting(name='foo') - self.assertTrue(imp.can_delete_object(setting)) - - def test_delete_target_object(self): - model = self.app.model - imp = self.make_importer(model_class=model.Setting) - setting = model.Setting(name='foo') - # nb. default implementation always returns false - self.assertFalse(imp.delete_target_object(setting)) - class TestFromFile(DataTestCase): @@ -624,20 +388,6 @@ class TestToSqlalchemy(DataTestCase): kwargs.setdefault('handler', self.handler) return mod.ToSqlalchemy(self.config, **kwargs) - def test_get_target_objects(self): - model = self.app.model - imp = self.make_importer(model_class=model.Setting, target_session=self.session) - - setting1 = model.Setting(name='foo', value='bar') - self.session.add(setting1) - setting2 = model.Setting(name='foo2', value='bar2') - self.session.add(setting2) - self.session.commit() - - result = imp.get_target_objects() - self.assertEqual(len(result), 2) - self.assertEqual(set(result), {setting1, setting2}) - def test_get_target_object(self): model = self.app.model setting = model.Setting(name='foo', value='bar') @@ -664,19 +414,15 @@ class TestToSqlalchemy(DataTestCase): self.session.add(setting2) self.session.commit() - # nb. disable target cache - with patch.multiple(imp, create=True, - target_session=self.session, - caches_target=False): + # then we should be able to fetch that via query + imp.target_session = self.session + result = imp.get_target_object(('foo2',)) + self.assertIsInstance(result, model.Setting) + self.assertIs(result, setting2) - # now we should be able to fetch that via query - result = imp.get_target_object(('foo2',)) - self.assertIsInstance(result, model.Setting) - self.assertIs(result, setting2) - - # but sometimes it will not be found - result = imp.get_target_object(('foo3',)) - self.assertIsNone(result) + # but sometimes it will not be found + result = imp.get_target_object(('foo3',)) + self.assertIsNone(result) def test_create_target_object(self): model = self.app.model @@ -690,13 +436,16 @@ class TestToSqlalchemy(DataTestCase): self.assertEqual(setting.value, 'bar') self.assertIn(setting, self.session) - def test_delete_target_object(self): + def test_get_target_objects(self): model = self.app.model - - setting = model.Setting(name='foo', value='bar') - self.session.add(setting) - - self.assertEqual(self.session.query(model.Setting).count(), 1) imp = self.make_importer(model_class=model.Setting, target_session=self.session) - imp.delete_target_object(setting) - self.assertEqual(self.session.query(model.Setting).count(), 0) + + setting1 = model.Setting(name='foo', value='bar') + self.session.add(setting1) + setting2 = model.Setting(name='foo2', value='bar2') + self.session.add(setting2) + self.session.commit() + + result = imp.get_target_objects() + self.assertEqual(len(result), 2) + self.assertEqual(set(result), {setting1, setting2}) diff --git a/tests/importing/test_csv.py b/tests/importing/test_csv.py index dc65e54..683215e 100644 --- a/tests/importing/test_csv.py +++ b/tests/importing/test_csv.py @@ -1,7 +1,6 @@ #-*- coding: utf-8; -*- import csv -import uuid as _uuid from unittest.mock import patch from wuttjamaican.testing import DataTestCase @@ -88,74 +87,23 @@ foo2,bar2 self.assertEqual(objects[1], {'name': 'foo2', 'value': 'bar2'}) -class MockMixinImporter(mod.FromCsvToSqlalchemyMixin, mod.FromCsv, ToSqlalchemy): - pass - - -class TestFromCsvToSqlalchemyMixin(DataTestCase): - - def setUp(self): - self.setup_db() - self.handler = ImportHandler(self.config) - - def make_importer(self, **kwargs): - kwargs.setdefault('handler', self.handler) - return MockMixinImporter(self.config, **kwargs) - - def test_constructor(self): - model = self.app.model - - # no uuid keys - imp = self.make_importer(model_class=model.Setting) - self.assertEqual(imp.uuid_keys, []) - - # typical - # nb. as of now Upgrade is the only table using proper UUID - imp = self.make_importer(model_class=model.Upgrade) - self.assertEqual(imp.uuid_keys, ['uuid']) - - def test_normalize_source_object(self): - model = self.app.model - - # no uuid keys - imp = self.make_importer(model_class=model.Setting) - result = imp.normalize_source_object({'name': 'foo', 'value': 'bar'}) - self.assertEqual(result, {'name': 'foo', 'value': 'bar'}) - - # source has proper UUID - # nb. as of now Upgrade is the only table using proper UUID - imp = self.make_importer(model_class=model.Upgrade, fields=['uuid', 'description']) - result = imp.normalize_source_object({'uuid': _uuid.UUID('06753693-d892-77f0-8000-ce71bf7ebbba'), - 'description': 'testing'}) - self.assertEqual(result, {'uuid': _uuid.UUID('06753693-d892-77f0-8000-ce71bf7ebbba'), - 'description': 'testing'}) - - # source has string uuid - # nb. as of now Upgrade is the only table using proper UUID - imp = self.make_importer(model_class=model.Upgrade, fields=['uuid', 'description']) - result = imp.normalize_source_object({'uuid': '06753693d89277f08000ce71bf7ebbba', - 'description': 'testing'}) - self.assertEqual(result, {'uuid': _uuid.UUID('06753693-d892-77f0-8000-ce71bf7ebbba'), - 'description': 'testing'}) - - -class MockMixinHandler(mod.FromCsvToSqlalchemyHandlerMixin, ToSqlalchemyHandler): +class MockMixinHandler(mod.FromCsvToSqlalchemyMixin, ToSqlalchemyHandler): ToImporterBase = ToSqlalchemy -class TestFromCsvToSqlalchemyHandlerMixin(DataTestCase): +class TestFromCsvToSqlalchemyMixin(DataTestCase): def make_handler(self, **kwargs): return MockMixinHandler(self.config, **kwargs) def test_get_target_model(self): - with patch.object(mod.FromCsvToSqlalchemyHandlerMixin, 'define_importers', return_value={}): + with patch.object(mod.FromCsvToSqlalchemyMixin, 'define_importers', return_value={}): handler = self.make_handler() self.assertRaises(NotImplementedError, handler.get_target_model) def test_define_importers(self): model = self.app.model - with patch.object(mod.FromCsvToSqlalchemyHandlerMixin, 'get_target_model', return_value=model): + with patch.object(mod.FromCsvToSqlalchemyMixin, 'get_target_model', return_value=model): handler = self.make_handler() importers = handler.define_importers() self.assertIn('Setting', importers) @@ -167,7 +115,7 @@ class TestFromCsvToSqlalchemyHandlerMixin(DataTestCase): def test_make_importer_factory(self): model = self.app.model - with patch.object(mod.FromCsvToSqlalchemyHandlerMixin, 'define_importers', return_value={}): + with patch.object(mod.FromCsvToSqlalchemyMixin, 'define_importers', return_value={}): handler = self.make_handler() factory = handler.make_importer_factory(model.Setting, 'Setting') self.assertTrue(issubclass(factory, mod.FromCsv))