diff --git a/.gitignore b/.gitignore index 2aef550..953e3c6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *~ *.pyc .coverage +.tox/ +dist/ docs/_build/ diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..7eb5e2c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,4 @@ +# -*- mode: conf; -*- + +[MESSAGES CONTROL] +disable=fixme diff --git a/CHANGELOG.md b/CHANGELOG.md index 22559be..4b1fd1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to WuttaSync will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## v0.2.1 (2025-06-29) + +### Fix + +- avoid empty keys for importer +- do not assign simple/supported fields in Importer constructor +- make `--input-path` optional for import/export commands + +## v0.2.0 (2024-12-07) + +### Feat + +- add `wutta import-csv` command + +### Fix + +- expose `ToWuttaHandler`, `ToWutta` in `wuttasync.importing` namespace +- implement deletion logic; add cli params for max changes +- add `--key` (or `--keys`) param for import/export commands +- add `--list-models` option for import/export commands +- require latest wuttjamaican +- add `--fields` and `--exclude` params for import/export cli + ## v0.1.0 (2024-12-05) ### Feat diff --git a/README.md b/README.md index 4b4ff38..7ad12cb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # WuttaSync -Wutta framework for data import/export and real-time sync +Wutta Framework for data import/export and real-time sync -See docs at https://rattailproject.org/docs/wuttasync/ +See docs at https://docs.wuttaproject.org/wuttasync/ diff --git a/docs/_static/.keepme b/docs/_static/.keepme new file mode 100644 index 0000000..e69de29 diff --git a/docs/api/wuttasync.cli.base.rst b/docs/api/wuttasync.cli.base.rst new file mode 100644 index 0000000..a411eef --- /dev/null +++ b/docs/api/wuttasync.cli.base.rst @@ -0,0 +1,6 @@ + +``wuttasync.cli.base`` +====================== + +.. automodule:: wuttasync.cli.base + :members: diff --git a/docs/api/wuttasync.cli.import_csv.rst b/docs/api/wuttasync.cli.import_csv.rst new file mode 100644 index 0000000..c5104b2 --- /dev/null +++ b/docs/api/wuttasync.cli.import_csv.rst @@ -0,0 +1,6 @@ + +``wuttasync.cli.import_csv`` +============================ + +.. automodule:: wuttasync.cli.import_csv + :members: diff --git a/docs/api/wuttasync.cli.rst b/docs/api/wuttasync.cli.rst new file mode 100644 index 0000000..92dddb5 --- /dev/null +++ b/docs/api/wuttasync.cli.rst @@ -0,0 +1,6 @@ + +``wuttasync.cli`` +================= + +.. automodule:: wuttasync.cli + :members: diff --git a/docs/conf.py b/docs/conf.py index c5d923c..2b47550 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -8,33 +8,35 @@ from importlib.metadata import version as get_version -project = 'WuttaSync' -copyright = '2024, Lance Edgar' -author = 'Lance Edgar' -release = get_version('WuttaSync') +project = "WuttaSync" +copyright = "2024, Lance Edgar" +author = "Lance Edgar" +release = get_version("WuttaSync") # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.viewcode', - 'sphinx.ext.todo', - 'enum_tools.autoenum', + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", + "sphinx.ext.todo", + "enum_tools.autoenum", + "sphinxcontrib.programoutput", ] -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', None), - 'wuttjamaican': ('https://rattailproject.org/docs/wuttjamaican/', None), + "python": ("https://docs.python.org/3/", None), + "rattail-manual": ("https://docs.wuttaproject.org/rattail-manual/", None), + "wuttjamaican": ("https://docs.wuttaproject.org/wuttjamaican/", None), } # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'furo' -html_static_path = ['_static'] +html_theme = "furo" +html_static_path = ["_static"] diff --git a/docs/glossary.rst b/docs/glossary.rst new file mode 100644 index 0000000..c58e3d6 --- /dev/null +++ b/docs/glossary.rst @@ -0,0 +1,30 @@ +.. _glossary: + +Glossary +======== + +.. glossary:: + :sorted: + + import handler + This a type of :term:`handler` which is responsible for a + particular set of data import/export task(s). + + The import handler manages data connections and transactions, and + invokes one or more :term:`importers ` to process the + data. See also :ref:`import-handler-vs-importer`. + + Note that "import/export handler" is the more proper term to use + here but it is often shortened to just "import handler" for + convenience. + + importer + This refers to a Python class/instance responsible for processing + a particular :term:`data model` for an import/export job. + + For instance there is usually one importer per table, when + importing to the :term:`app database` (regardless of source). + See also :ref:`import-handler-vs-importer`. + + Note that "importer/exporter" is the more proper term to use here + but it is often shortened to just "importer" for convenience. diff --git a/docs/index.rst b/docs/index.rst index ac6be84..9eb2d93 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,24 +2,77 @@ WuttaSync ========= -This package adds data import/export and real-time sync utilities for -the `Wutta Framework `_. +This provides a "batteries included" way to handle data sync between +arbitrary source and target. -While it of course supports import/export to/from the Wutta :term:`app -database`, it may be used for any "source → target" data flow. +This builds / depends on :doc:`WuttJamaican `, for +sake of a common :term:`config object` and :term:`handler` interface. +It was originally designed for import to / export from the :term:`app +database` but **both** the source and target can be "anything" - +e.g. CSV or Excel file, cloud API, another DB. + +The basic idea is as follows: + +* read a data set from "source" +* read corresonding data from "target" +* compare the two data sets +* where they differ, create/update/delete records on the target + +Although in some cases (e.g. export to CSV) the target has no +meaningful data so all source records are "created" on / written to +the target. + +.. note:: + + You may already have guessed, that this approach may not work for + "big data" - and indeed, it is designed for "small" data sets, + ideally 500K records or smaller. It reads both (source/target) + data sets into memory so that is the limiting factor. + + You can work around this to some extent, by limiting the data sets + to a particular date range (or other "partitionable" aspect of the + data), and only syncing that portion. + + However this is not meant to be an ETL engine involving a data + lake/warehouse. It is for more "practical" concerns where some + disparate "systems" must be kept in sync, or basic import from / + export to file. + +The general "source → target" concept can be used for both import and +export, since "everything is an import" from the target's perspective. + +In addition to the import/export framework proper, a CLI framework is +also provided. + +A "real-time sync" framework is also (eventually) planned, similar to +the one developed in the Rattail Project; +cf. :doc:`rattail-manual:data/sync/index`. + +.. image:: https://img.shields.io/badge/linting-pylint-yellowgreen + :target: https://github.com/pylint-dev/pylint + +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/psf/black .. toctree:: :maxdepth: 2 :caption: Documentation + glossary narr/install + narr/cli/index + narr/concepts + narr/custom/index .. toctree:: :maxdepth: 1 :caption: API api/wuttasync + api/wuttasync.cli + api/wuttasync.cli.base + api/wuttasync.cli.import_csv api/wuttasync.importing api/wuttasync.importing.base api/wuttasync.importing.csv diff --git a/docs/narr/cli/builtin.rst b/docs/narr/cli/builtin.rst new file mode 100644 index 0000000..0630c94 --- /dev/null +++ b/docs/narr/cli/builtin.rst @@ -0,0 +1,27 @@ + +=================== + Built-in Commands +=================== + +Below are the :term:`subcommands ` which come with +WuttaSync. + +It is fairly simple to add more; see :doc:`custom`. + + +.. _wutta-import-csv: + +``wutta import-csv`` +-------------------- + +Import data from CSV file(s) to the Wutta :term:`app database`. + +This *should* be able to automatically target any table mapped in the +:term:`app model`. The only caveat is that it is "dumb" and does not +have any special field handling. This means the column headers in the +CSV file must be named the same as in the target table, and some data +types may not behave as expected etc. + +Defined in: :mod:`wuttasync.cli.import_csv` + +.. program-output:: wutta import-csv --help diff --git a/docs/narr/cli/custom.rst b/docs/narr/cli/custom.rst new file mode 100644 index 0000000..837a70c --- /dev/null +++ b/docs/narr/cli/custom.rst @@ -0,0 +1,64 @@ + +================= + Custom Commands +================= + +This section describes how to add a custom :term:`subcommand` which +wraps a particular :term:`import handler`. + +See also :doc:`wuttjamaican:narr/cli/custom` for more information +on the general concepts etc. + + +Basic Import/Export +------------------- + +Here we'll assume you have a typical "Poser" app based on Wutta +Framework, and the "Foo → Poser" (``FromFooToPoser`` handler) import +logic is defined in the ``poser.importing.foo`` module. + +We'll also assume you already have a ``poser`` top-level +:term:`command` (in ``poser.cli``), and our task now is to add the +``poser import-foo`` subcommand to wrap the import handler. + +And finally we'll assume this is just a "typical" import handler and +we do not need any custom CLI params exposed. + +Here is the code and we'll explain below:: + + from poser.cli import poser_typer + from wuttasync.cli import import_command, ImportCommandHandler + + @poser_typer.command() + @import_command + def import_foo(ctx, **kwargs): + """ + Import data from Foo API to Poser DB + """ + config = ctx.parent.wutta_config + handler = ImportCommandHandler( + config, import_handler='poser.importing.foo:FromFooToPoser') + handler.run(ctx.params) + +Hopefully it's straightforward but to be clear: + +* subcommand is really just a function, **with desired name** +* wrap with ``@poser_typer.command()`` to register as subcomand +* wrap with ``@import_command`` to get typical CLI params +* call ``ImportCommandHandler.run()`` with import handler spec + +So really - in addition to +:func:`~wuttasync.cli.base.import_command()` - the +:class:`~wuttasync.cli.base.ImportCommandHandler` is doing the heavy +lifting for all import/export subcommands, it just needs to know which +:term:`import handler` to use. + +.. note:: + + If your new subcommand is defined in a different module than is the + top-level command (e.g. as in example above) then you may need to + "eagerly" import the subcommand module. (Otherwise auto-discovery + may not find it.) + + This is usually done from within the top-level command's module, + since it is always imported early due to the entry point. diff --git a/docs/narr/cli/index.rst b/docs/narr/cli/index.rst new file mode 100644 index 0000000..96be6c7 --- /dev/null +++ b/docs/narr/cli/index.rst @@ -0,0 +1,23 @@ + +======================== + Command Line Interface +======================== + +The primary way of using the import/export framework day to day is via +the command line. + +WuttJamaican defines the ``wutta`` :term:`command` and WuttaSync comes +with some extra :term:`subcommands ` for importing to / +exporting from the Wutta :term:`app database`. + +It is fairly simple to add a dedicated subcommand for any +:term:`import handler`; see below. + +And for more general info about CLI see +:doc:`wuttjamaican:narr/cli/index`. + +.. toctree:: + :maxdepth: 2 + + builtin + custom diff --git a/docs/narr/concepts.rst b/docs/narr/concepts.rst new file mode 100644 index 0000000..93d09a3 --- /dev/null +++ b/docs/narr/concepts.rst @@ -0,0 +1,54 @@ + +Concepts +======== + +Things hopefully are straightforward but it's important to get the +following straight in your head; the rest will come easier if you do. + + +Source vs. Target +----------------- + +Data always flows from source to target, it is the #1 rule. + +Docs and command output will always reflect this, e.g. **CSV → +Wutta**. + +Source and target can be anything as long as the :term:`import +handler` and :term:`importer(s) ` implement the desired +logic. The :term:`app database` is often involved but not always. + + +Import vs. Export +----------------- + +Surprise, there is no difference. After all from target's perspective +everything is really an import. + +Sometimes it's more helpful to think of it as an export, e.g. **Wutta +→ CSV** really seems like an export. In such cases the +:attr:`~wuttasync.importing.handlers.ImportHandler.orientation` may be +set to reflect the distinction. + + +.. _import-handler-vs-importer: + +Import Handler vs. Importer +--------------------------- + +The :term:`import handler` is sort of the "wrapper" around one or more +:term:`importers ` and the latter contain the table-specific +sync logic. + +In a DB or similar context, the import handler will make the +connection, then invoke all requested importers, then commit +transaction at the end (or rollback if dry-run). + +And each importer will read data from source, and usually also read +data from target, then compare data sets and finally write data to +target as needed. But each would usually do this for just one table. + +See also the base classes for each: + +* :class:`~wuttasync.importing.handlers.ImportHandler` +* :class:`~wuttasync.importing.base.Importer` diff --git a/docs/narr/custom/command.rst b/docs/narr/custom/command.rst new file mode 100644 index 0000000..39eaeae --- /dev/null +++ b/docs/narr/custom/command.rst @@ -0,0 +1,9 @@ + +Define Command +============== + +Now that you have defined the import handler plus any importers +required, you'll want to define a command line interface to use it. + +This section is here for completeness but the process is described +elsewhere; see :doc:`/narr/cli/custom`. diff --git a/docs/narr/custom/conventions.rst b/docs/narr/custom/conventions.rst new file mode 100644 index 0000000..3ce686a --- /dev/null +++ b/docs/narr/custom/conventions.rst @@ -0,0 +1,90 @@ + +Conventions +=========== + +Below are recommended conventions for structuring and naming the files +in your project relating to import/export. + +The intention for these rules is that they are "intuitive" based on +the fact that all data flows from source to target and therefore can +be thought of as "importing" in virtually all cases. + +But there are a lot of edge cases out there so YMMV. + + +"The Rules" +----------- + +There are exceptions to these of course, but in general: + +* regarding how to think about these conventions: + + * always look at it from target's perspective + + * always look at it as an *import*, not export + +* "final" logic is always a combo of: + + * "base" logic for how target data read/write happens generally + + * "specific" logic for how that happens using a particular data source + +* targets each get their own subpackage within project + + * and within that, also an ``importing`` (nested) subpackage + + * and within *that* is where the files live, referenced next + + * target ``model.py`` should contain ``ToTarget`` importer base class + + * also may have misc. per-model base classes, e.g. ``WidgetImporter`` + + * also may have ``ToTargetHandler`` base class if applicable + + * sources each get their own module, named after the source + + * should contain the "final" handler class, e.g. ``FromSourceToTarget`` + + * also contains "final" importer classes needed by handler (e.g. ``WidgetImporter``) + + +Example +------- + +That's a lot of rules so let's see it. Here we assume a Wutta-based +app named Poser and it integrates with a Foo API in the cloud. Data +should flow both ways so we will be thinking of this as: + +* **Foo → Poser import** +* **Poser → Foo export** + +Here is the suggested file layout: + +.. code-block:: none + + poser/ + ├── foo/ + │ ├── __init__.py + │ ├── api.py + │ └── importing/ + │ ├── __init__.py + │ ├── model.py + │ └── poser.py + └── importing/ + ├── __init__.py + ├── foo.py + └── model.py + +And the module breakdown: + +* ``poser.foo.api`` has e.g. ``FooAPI`` interface logic + +**Foo → Poser import** (aka. "Poser imports from Foo") + +* ``poser.importing.model`` has ``ToPoserHandler``, ``ToPoser`` and per-model base importers +* ``poser.importing.foo`` has ``FromFooToPoser`` plus final importers + +**Poser → Foo export** (aka. "Foo imports from Poser") + +* ``poser.foo.importing.model`` has ``ToFooHandler``, ``ToFoo`` and per-model base importer +* ``poser.foo.importing.poser`` has ``FromPoserToFoo`` plus final importers diff --git a/docs/narr/custom/handler.rst b/docs/narr/custom/handler.rst new file mode 100644 index 0000000..cb2b74d --- /dev/null +++ b/docs/narr/custom/handler.rst @@ -0,0 +1,93 @@ + +Define Import Handler +===================== + +The obvious step here is to define a new :term:`import handler`, which +ultimately inherits from +:class:`~wuttasync.importing.handlers.ImportHandler`. But the choice +of which class(es) *specifically* to inherit from, is a bit more +complicated. + + +Choose the Base Class(es) +------------------------- + +If all else fails, or to get started simply, you can always just +inherit from :class:`~wuttasync.importing.handlers.ImportHandler` +directly as the only base class. You'll have to define any methods +needed to implement desired behavior. + +However depending on your particular source and/or target, there may +be existing base classes defined somewhere from which you can inherit. +This may save you some effort, and/or is just a good idea to share +code where possible. + +Keep in mind your import handler can inherit from multiple base +classes, and often will - one base for the source side, and another +for the target side. For instance:: + + from wuttasync.importing import FromFileHandler, ToWuttaHandler + + class FromExcelToPoser(FromFileHandler, ToWuttaHandler): + """ + Handler for Excel file → Poser app DB + """ + +You generally will still need to define/override some methods to +customize behavior. + +All built-in base classes live under :mod:`wuttasync.importing`. + + +.. _register-importer: + +Register Importer(s) +-------------------- + +If nothing else, most custom handlers must override +:meth:`~wuttasync.importing.handlers.ImportHandler.define_importers()` +to "register" importer(s) as appropriate. There are two primary goals +here: + +* add "new" (totally custom) importers +* override "existing" importers (inherited from base class) + +Obviously for this to actually work the importer(s) must exist in +code; see :doc:`importer`. + +As an example let's say there's a ``FromFooToWutta`` handler which +defines a ``Widget`` importer. + +And let's say you want to customize that, by tweaking slightly the +logic for ``WigdetImporter`` and adding a new ``SprocketImporter``:: + + from somewhere_else import (FromFooToWutta, ToWutta, + WidgetImporter as WidgetImporterBase) + + class FromFooToPoser(FromFooToWutta): + """ + Handler for Foo -> Poser + """ + + def define_importers(self): + + # base class defines the initial set + importers = super().define_importers() + + # override widget importer + importers['Widget'] = WidgetImporter + + # add sprocket importer + importers['Sprocket'] = SprocketImporter + + return importers + + class SprocketImporter(ToWutta): + """ + Sprocket importer for Foo -> Poser + """ + + class WidgetImporter(WidgetImporterBase): + """ + Widget importer for Foo -> Poser + """ diff --git a/docs/narr/custom/importer.rst b/docs/narr/custom/importer.rst new file mode 100644 index 0000000..c9b6674 --- /dev/null +++ b/docs/narr/custom/importer.rst @@ -0,0 +1,149 @@ + +Define Importer(s) +================== + +Here we'll describe how to make a custom :term:`importer/exporter +`, which can process a given :term:`data model`. + +.. + The example will assume a **Foo → Poser import** for the ``Widget`` + :term:`data model`. + + +Choose the Base Class(es) +------------------------- + +As with the :term:`import handler`, the importer "usually" will have +two base classes: one for the target side and another for the source. + +The base class for target side is generally more fleshed out, with +logic to read/write data for the given target model. Whereas the base +class for the source side could just be a stub. In the latter case, +one might choose to skip it and inherit only from the target base +class. + +In any case the final importer class you define can override any/all +logic from either base class if needed. + + +Example: Foo → Poser import +--------------------------- + +Here we'll assume a Wutta-based app named "Poser" which will be +importing "Widget" data from the "Foo API" cloud service. + +In this case we will inherit from a base class for the target side, +which already knows how to talk to the :term:`app database` via +SQLAlchemy ORM. + +But for the source side, there is no existing base class for the Foo +API service, since that is just made-up - so we will also define our +own base class for that:: + + from wuttasync.importing import Importer, ToWutta + + # nb. this is not real of course, but an example + from poser.foo.api import FooAPI + + class FromFoo(Importer): + """ + Base class for importers using Foo API as source + """ + + def setup(self): + """ + Establish connection to Foo API + """ + self.foo_api = FooAPI(self.config) + + class WidgetImporter(FromFoo, ToWutta): + """ + Widget importer for Foo -> Poser + """ + + def get_source_objects(self): + """ + Fetch all "raw" widgets from Foo API + """ + # nb. also not real, just example + return self.foo_api.get_widgets() + + def normalize_source_object(self, widget): + """ + Convert the "raw" widget we receive from Foo API, to a + "normalized" dict with data for all fields which are part of + the processing request. + """ + return { + 'id': widget.id, + 'name': widget.name, + } + + +Example: Poser → Foo export +--------------------------- + +In the previous scenario we imported data from Foo to Poser, and here +we'll do the reverse, exporting from Poser to Foo. + +As of writing the base class logic for exporting from Wutta :term:`app +database` does not yet exist. And the Foo API is just made-up so +we'll add one-off base classes for both sides:: + + from wuttasync.importing import Importer + + class FromWutta(Importer): + """ + Base class for importers using Wutta DB as source + """ + + class ToFoo(Importer): + """ + Base class for exporters targeting Foo API + """ + + class WidgetImporter(FromWutta, ToFoo): + """ + Widget exporter for Poser -> Foo + """ + + def get_source_objects(self): + """ + Fetch all widgets from the Poser app DB. + + (see note below regarding the db session) + """ + model = self.app.model + return self.source_session.query(model.Widget).all() + + def normalize_source_object(self, widget): + """ + Convert the "raw" widget from Poser app (ORM) to a + "normalized" dict with data for all fields which are part of + the processing request. + """ + return { + 'id': widget.id, + 'name': widget.name, + } + +Note that the ``get_source_objects()`` method shown above makes use of +a ``source_session`` attribute - where did that come from? + +This is actually not part of the importer proper, but rather this +attribute is set by the :term:`import handler`. And that will ony +happen if the importer is being invoked by a handler which supports +it. So none of that is shown here, but FYI. + +(And again, that logic isn't written yet, but there will "soon" be a +``FromSqlalchemyHandler`` class defined which implements this.) + + +Regster with Import Handler +--------------------------- + +After you define the importer/exporter class (as shown above) you also +must "register" it within the import/export handler. + +This section is here for completeness but the process is described +elsewhere; see :ref:`register-importer`. diff --git a/docs/narr/custom/index.rst b/docs/narr/custom/index.rst new file mode 100644 index 0000000..7e75146 --- /dev/null +++ b/docs/narr/custom/index.rst @@ -0,0 +1,21 @@ + +Custom Import/Export +==================== + +This section explains what's required to make your own import/export +tasks. + +See also :doc:`/narr/concepts` for some terminology etc. + +.. + The examples throughout the sections below will often involve a + theoretical **Foo → Poser** import, where Poser is a typical + Wutta-based app and Foo is some API in the cloud. + +.. toctree:: + :maxdepth: 2 + + conventions + handler + importer + command diff --git a/pyproject.toml b/pyproject.toml index 294e3f3..a48b949 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,8 +6,8 @@ build-backend = "hatchling.build" [project] name = "WuttaSync" -version = "0.1.0" -description = "Wutta framework for data import/export and real-time sync" +version = "0.2.1" +description = "Wutta Framework for data import/export and real-time sync" readme = "README.md" authors = [{name = "Lance Edgar", email = "lance@wuttaproject.org"}] license = {text = "GNU GPL v3+"} @@ -26,14 +26,19 @@ classifiers = [ ] requires-python = ">= 3.8" dependencies = [ + "makefun", "SQLAlchemy-Utils", - "WuttJamaican[db]", + "WuttJamaican[db]>=0.16.2", ] [project.optional-dependencies] -docs = ["Sphinx", "enum-tools[sphinx]", "furo"] -tests = ["pytest-cov", "tox"] +docs = ["Sphinx", "enum-tools[sphinx]", "furo", "sphinxcontrib-programoutput"] +tests = ["pylint", "pytest", "pytest-cov", "tox"] + + +[project.entry-points."wutta.typer_imports"] +wuttasync = "wuttasync.cli" [project.urls] diff --git a/src/wuttasync/__init__.py b/src/wuttasync/__init__.py index 69a8e83..6cced18 100644 --- a/src/wuttasync/__init__.py +++ b/src/wuttasync/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync +# WuttaSync -- Wutta Framework for data import/export and real-time sync # Copyright © 2024 Lance Edgar # # This file is part of Wutta Framework. diff --git a/src/wuttasync/_version.py b/src/wuttasync/_version.py index 6432bbf..4881c5c 100644 --- a/src/wuttasync/_version.py +++ b/src/wuttasync/_version.py @@ -1,6 +1,9 @@ # -*- coding: utf-8; -*- +""" +Package Version +""" from importlib.metadata import version -__version__ = version('WuttaSync') +__version__ = version("WuttaSync") diff --git a/src/wuttasync/cli/__init__.py b/src/wuttasync/cli/__init__.py new file mode 100644 index 0000000..c77a4e2 --- /dev/null +++ b/src/wuttasync/cli/__init__.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8; -*- +################################################################################ +# +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024 Lance Edgar +# +# This file is part of Wutta Framework. +# +# Wutta Framework is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Wutta Framework is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# Wutta Framework. If not, see . +# +################################################################################ +""" +WuttaSync - ``wutta`` subcommands + +This namespace exposes the following: + +* :func:`~wuttasync.cli.base.import_command()` +* :func:`~wuttasync.cli.base.file_import_command()` +* :class:`~wuttasync.cli.base.ImportCommandHandler` +""" + +from .base import import_command, file_import_command, ImportCommandHandler + +# nb. must bring in all modules for discovery to work +from . import import_csv diff --git a/src/wuttasync/cli/base.py b/src/wuttasync/cli/base.py new file mode 100644 index 0000000..08fa4f5 --- /dev/null +++ b/src/wuttasync/cli/base.py @@ -0,0 +1,319 @@ +# -*- coding: utf-8; -*- +################################################################################ +# +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024-2025 Lance Edgar +# +# This file is part of Wutta Framework. +# +# Wutta Framework is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Wutta Framework is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# Wutta Framework. If not, see . +# +################################################################################ +""" +``wutta import-csv`` command +""" + +import inspect +import logging +import sys +from pathlib import Path +from typing import List, Optional +from typing_extensions import Annotated + +import makefun +import typer + +from wuttjamaican.app import GenericHandler +from wuttasync.importing import ImportHandler + + +log = logging.getLogger(__name__) + + +class ImportCommandHandler(GenericHandler): + """ + This is the :term:`handler` responsible for import/export command + line runs. + + Normally, the command (actually :term:`subcommand`) logic will + create this handler and call its :meth:`run()` method. + + This handler does not know how to import/export data, but it knows + how to make its :attr:`import_handler` do it. + + :param import_handler: During construction, caller can specify the + :attr:`import_handler` as any of: + + * import handler instance + * import handler factory (e.g. class) + * import handler spec (cf. :func:`~wuttjamaican:wuttjamaican.util.load_object()`) + + For example:: + + handler = ImportCommandHandler( + config, import_handler='wuttasync.importing.csv:FromCsvToWutta') + """ + + import_handler = None + """ + Reference to the :term:`import handler` instance, which is to be + invoked when command runs. See also :meth:`run()`. + """ + + def __init__(self, config, import_handler=None): + super().__init__(config) + + if import_handler: + if isinstance(import_handler, ImportHandler): + self.import_handler = import_handler + elif callable(import_handler): + self.import_handler = import_handler(self.config) + else: # spec + factory = self.app.load_object(import_handler) + self.import_handler = factory(self.config) + + def run(self, params, progress=None): # pylint: disable=unused-argument + """ + Run the import/export job(s) based on command line params. + + This mostly just calls + :meth:`~wuttasync.importing.handlers.ImportHandler.process_data()` + for the :attr:`import_handler`. + + Unless ``--list-models`` was specified on the command line in + which case we do :meth:`list_models()` instead. + + :param params: Dict of params from command line. This must + include a ``'models'`` key, the rest are optional. + + :param progress: Optional progress indicator factory. + """ + + # maybe just list models and bail + if params.get("list_models"): + self.list_models(params) + return + + # otherwise process some data + kw = dict(params) + models = kw.pop("models") + log.debug("using handler: %s", self.import_handler.get_spec()) + # TODO: need to use all/default models if none specified + # (and should know models by now for logging purposes) + log.debug( + "running %s %s for: %s", + self.import_handler, + self.import_handler.orientation.value, + ", ".join(models), + ) + log.debug("params are: %s", kw) + self.import_handler.process_data(*models, **kw) + + def list_models(self, params): # pylint: disable=unused-argument + """ + Query the :attr:`import_handler`'s supported target models and + print the info to stdout. + + This is what happens when command line has ``--list-models``. + """ + sys.stdout.write("ALL MODELS:\n") + sys.stdout.write("==============================\n") + for key in self.import_handler.importers: + sys.stdout.write(key) + sys.stdout.write("\n") + sys.stdout.write("==============================\n") + + +def import_command_template( # pylint: disable=unused-argument,too-many-arguments,too-many-positional-arguments + models: Annotated[ + Optional[List[str]], + typer.Argument( + help="Model(s) to process. Can specify one or more, " + "or omit to process default models." + ), + ] = None, + list_models: Annotated[ + bool, + typer.Option( + "--list-models", "-l", help="List available target models and exit." + ), + ] = False, + create: Annotated[ + bool, + typer.Option( + help="Allow new target records to be created. " "See aso --max-create." + ), + ] = True, + update: Annotated[ + bool, + typer.Option( + help="Allow existing target records to be updated. " + "See also --max-update." + ), + ] = True, + delete: Annotated[ + bool, + typer.Option( + help="Allow existing target records to be deleted. " + "See also --max-delete." + ), + ] = False, + fields: Annotated[ + str, + typer.Option( + "--fields", help="List of fields to process. See also --exclude and --key." + ), + ] = None, + excluded_fields: Annotated[ + str, + typer.Option( + "--exclude", help="List of fields *not* to process. See also --fields." + ), + ] = None, + keys: Annotated[ + str, + typer.Option( + "--key", + "--keys", + help="List of fields to use as record key/identifier. " + "See also --fields.", + ), + ] = None, + max_create: Annotated[ + int, + typer.Option( + help="Max number of target records to create (per model). " + "See also --create." + ), + ] = None, + max_update: Annotated[ + int, + typer.Option( + help="Max number of target records to update (per model). " + "See also --update." + ), + ] = None, + max_delete: Annotated[ + int, + typer.Option( + help="Max number of target records to delete (per model). " + "See also --delete." + ), + ] = None, + max_total: Annotated[ + int, + typer.Option( + help="Max number of *any* target record changes which may occur (per model)." + ), + ] = None, + dry_run: Annotated[ + bool, + typer.Option( + "--dry-run", help="Go through the motions, but rollback the transaction." + ), + ] = False, +): + """ + Stub function which provides a common param signature; used with + :func:`import_command()`. + """ + + +def import_command(fn): + """ + Decorator for import/export commands. Adds common params based on + :func:`import_command_template()`. + + To use this, e.g. for ``poser import-foo`` command:: + + from poser.cli import poser_typer + from wuttasync.cli import import_command, ImportCommandHandler + + @poser_typer.command() + @import_command + def import_foo( + ctx: typer.Context, + **kwargs + ): + \""" + Import data from Foo API to Poser DB + \""" + config = ctx.parent.wutta_config + handler = ImportCommandHandler( + config, import_handler='poser.importing.foo:FromFooToPoser') + handler.run(ctx.params) + + See also :class:`ImportCommandHandler`. + """ + original_sig = inspect.signature(fn) + reference_sig = inspect.signature(import_command_template) + + params = list(original_sig.parameters.values()) + for i, param in enumerate(reference_sig.parameters.values()): + params.insert(i + 1, param) + + # remove the **kwargs param + params.pop(-1) + + final_sig = original_sig.replace(parameters=params) + return makefun.create_function(final_sig, fn) + + +def file_import_command_template( # pylint: disable=unused-argument + input_file_path: Annotated[ + Path, + typer.Option( + "--input-path", + exists=True, + file_okay=True, + dir_okay=True, + help="Path to input file(s). Can be a folder " + "if app logic can guess the filename(s); " + "otherwise must be complete file path.", + ), + ] = None, +): + """ + Stub function to provide signature for import/export commands + which require input file. Used with + :func:`file_import_command()`. + """ + + +def file_import_command(fn): + """ + Decorator for import/export commands which require input file. + Adds common params based on + :func:`file_import_command_template()`. + + To use this, it's the same method as shown for + :func:`import_command()` except in this case you would use the + ``file_import_command`` decorator. + """ + original_sig = inspect.signature(fn) + plain_import_sig = inspect.signature(import_command_template) + file_import_sig = inspect.signature(file_import_command_template) + desired_params = list(plain_import_sig.parameters.values()) + list( + file_import_sig.parameters.values() + ) + + params = list(original_sig.parameters.values()) + for i, param in enumerate(desired_params): + params.insert(i + 1, param) + + # remove the **kwargs param + params.pop(-1) + + final_sig = original_sig.replace(parameters=params) + return makefun.create_function(final_sig, fn) diff --git a/src/wuttasync/cli/import_csv.py b/src/wuttasync/cli/import_csv.py new file mode 100644 index 0000000..d3c8047 --- /dev/null +++ b/src/wuttasync/cli/import_csv.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8; -*- +################################################################################ +# +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024-2025 Lance Edgar +# +# This file is part of Wutta Framework. +# +# Wutta Framework is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Wutta Framework is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# Wutta Framework. If not, see . +# +################################################################################ +""" +See also: :ref:`wutta-import-csv` +""" + +import typer + +from wuttjamaican.cli import wutta_typer + +from .base import file_import_command, ImportCommandHandler + + +@wutta_typer.command() +@file_import_command +def import_csv(ctx: typer.Context, **kwargs): # pylint: disable=unused-argument + """ + Import data from CSV file(s) to Wutta DB + """ + config = ctx.parent.wutta_config + handler = ImportCommandHandler( + config, import_handler="wuttasync.importing.csv:FromCsvToWutta" + ) + handler.run(ctx.params) diff --git a/src/wuttasync/importing/__init__.py b/src/wuttasync/importing/__init__.py index 615b2d3..03a421f 100644 --- a/src/wuttasync/importing/__init__.py +++ b/src/wuttasync/importing/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync +# WuttaSync -- Wutta Framework for data import/export and real-time sync # Copyright © 2024 Lance Edgar # # This file is part of Wutta Framework. @@ -22,7 +22,27 @@ ################################################################################ """ Data Import / Export Framework + +This namespace exposes the following: + +* :enum:`~wuttasync.importing.handlers.Orientation` + +And some :term:`import handler` base classes: + +* :class:`~wuttasync.importing.handlers.ImportHandler` +* :class:`~wuttasync.importing.handlers.FromFileHandler` +* :class:`~wuttasync.importing.handlers.ToSqlalchemyHandler` +* :class:`~wuttasync.importing.wutta.ToWuttaHandler` + +And some :term:`importer` base classes: + +* :class:`~wuttasync.importing.base.Importer` +* :class:`~wuttasync.importing.base.FromFile` +* :class:`~wuttasync.importing.base.ToSqlalchemy` +* :class:`~wuttasync.importing.model.ToWutta` """ from .handlers import Orientation, ImportHandler, FromFileHandler, ToSqlalchemyHandler from .base import Importer, FromFile, ToSqlalchemy +from .model import ToWutta +from .wutta import ToWuttaHandler diff --git a/src/wuttasync/importing/base.py b/src/wuttasync/importing/base.py index 0c83d70..629ead6 100644 --- a/src/wuttasync/importing/base.py +++ b/src/wuttasync/importing/base.py @@ -1,8 +1,8 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync -# Copyright © 2024 Lance Edgar +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024-2025 Lance Edgar # # This file is part of Wutta Framework. # @@ -23,10 +23,13 @@ """ Data Importer base class """ +# pylint: disable=too-many-lines import os import logging +from collections import OrderedDict +import sqlalchemy as sa from sqlalchemy import orm from sqlalchemy_utils.functions import get_primary_keys, get_columns @@ -36,7 +39,14 @@ from wuttasync.util import data_diffs log = logging.getLogger(__name__) -class Importer: +class ImportLimitReached(Exception): + """ + Exception raised when an import/export job reaches the max number + of changes allowed. + """ + + +class Importer: # pylint: disable=too-many-instance-attributes,too-many-public-methods """ Base class for all data importers / exporters. @@ -71,6 +81,25 @@ class Importer: It is primarily (only?) used when the target side of the import/export uses SQLAlchemy ORM. + + .. attribute:: fields + + This is the official list of "effective" fields to be processed + for the current import/export job. + + Code theoretically should not access this directly but instead + call :meth:`get_fields()`. However it is often convenient to + overwrite this attribute directly, for dynamic fields. If so + then ``get_fields()`` will return the new value. And really, + it's probably just as safe to read this attribute directly too. + + .. attribute:: excluded_fields + + This attribute will often not exist, but is mentioned here for + reference. + + It may be specified via constructor param in which case each + field listed therein will be removed from :attr:`fields`. """ allow_create = True @@ -155,23 +184,43 @@ class Importer: :meth:`get_target_cache()`. """ + max_create = None + max_update = None + max_delete = None + max_total = None + + handler = None + model_class = None + def __init__(self, config, **kwargs): self.config = config self.app = self.config.get_app() - self.create = kwargs.pop('create', - kwargs.pop('allow_create', self.allow_create)) - self.update = kwargs.pop('update', - kwargs.pop('allow_update', self.allow_update)) - self.delete = kwargs.pop('delete', - kwargs.pop('allow_delete', self.allow_delete)) + self.create = kwargs.pop( + "create", kwargs.pop("allow_create", self.allow_create) + ) + self.update = kwargs.pop( + "update", kwargs.pop("allow_update", self.allow_update) + ) + self.delete = kwargs.pop( + "delete", kwargs.pop("allow_delete", self.allow_delete) + ) self.__dict__.update(kwargs) - self.simple_fields = self.get_simple_fields() - self.supported_fields = self.get_supported_fields() self.fields = self.get_fields() + # fields could be comma-delimited string from cli param + if isinstance(self.fields, str): + self.fields = self.config.parse_list(self.fields) + + # discard any fields caller asked to exclude + excluded = getattr(self, "excluded_fields", None) + if excluded: + if isinstance(excluded, str): + excluded = self.config.parse_list(excluded) + self.fields = [f for f in self.fields if f not in excluded] + @property def orientation(self): """ @@ -203,7 +252,7 @@ class Importer: """ Returns the display title for the target data model. """ - if hasattr(self, 'model_title'): + if hasattr(self, "model_title"): return self.model_title # TODO: this will fail if not using a model class, obviously.. @@ -222,10 +271,13 @@ class Importer: :returns: Possibly empty list of "simple" field names. """ - if hasattr(self, 'simple_fields'): + if hasattr(self, "simple_fields"): return self.simple_fields - fields = get_columns(self.model_class) + try: + fields = get_columns(self.model_class) + except sa.exc.NoInspectionAvailable: + return [] return list(fields.keys()) def get_supported_fields(self): @@ -245,7 +297,7 @@ class Importer: :returns: List of all "supported" field names. """ - if hasattr(self, 'supported_fields'): + if hasattr(self, "supported_fields"): return self.supported_fields return self.get_simple_fields() @@ -255,6 +307,8 @@ class Importer: This should return the "effective" list of fields which are to be used for the import/export. + See also :attr:`fields` which is normally what this returns. + All fields in this list should also be found in the output for :meth:`get_supported_fields()`. @@ -262,7 +316,7 @@ class Importer: :returns: List of "effective" field names. """ - if hasattr(self, 'fields'): + if hasattr(self, "fields") and self.fields is not None: return self.fields return self.get_supported_fields() @@ -276,10 +330,17 @@ class Importer: :returns: List of "key" field names. """ - if hasattr(self, 'key'): - keys = self.key + keys = None + # nb. prefer 'keys' but use 'key' as fallback + if "keys" in self.__dict__: + keys = self.__dict__["keys"] + elif "key" in self.__dict__: + keys = self.__dict__["key"] + if keys: if isinstance(keys, str): - keys = [keys] + keys = self.config.parse_list(keys) + # nb. save for next time + self.__dict__["keys"] = keys return keys return list(get_primary_keys(self.model_class)) @@ -314,9 +375,26 @@ class Importer: Note that subclass generally should not override this method, but instead some of the others. - :param source_data: Optional sequence of normalized source - data. If not specified, it is obtained from - :meth:`normalize_source_data()`. + This first calls :meth:`setup()` to prepare things as needed. + + If no source data is specified, it calls + :meth:`normalize_source_data()` to get that. Regardless, it + also calls :meth:`get_unique_data()` to discard any + duplicates. + + If :attr:`caches_target` is set, it calls + :meth:`get_target_cache()` and assigns result to + :attr:`cached_target`. + + Then depending on values for :attr:`create`, :attr:`update` + and :attr:`delete` it may call: + + * :meth:`do_create_update()` + * :meth:`do_delete()` + + And finally it calls :meth:`teardown()` for cleanup. + + :param source_data: Sequence of normalized source data, if known. :param progress: Optional progress indicator factory. @@ -326,25 +404,24 @@ class Importer: * ``created`` - list of records created on the target * ``updated`` - list of records updated on the target * ``deleted`` - list of records deleted on the target - - See also these methods which this one calls: - - * :meth:`setup()` - * :meth:`do_create_update()` - * :meth:`do_delete()` - * :meth:`teardown()` """ + # TODO: should add try/catch around this all? and teardown() in finally: clause? self.setup() created = [] updated = [] deleted = [] + log.debug("using key fields: %s", ", ".join(self.get_keys())) + # get complete set of normalized source data if source_data is None: source_data = self.normalize_source_data(progress=progress) - # TODO: should exclude duplicate source records - # source_data, unique = self.get_unique_data(source_data) + # nb. prune duplicate records from source data + source_data, source_keys = self.get_unique_data(source_data) + + model_title = self.get_model_title() + log.debug(f"got %s {model_title} records from source", len(source_data)) # maybe cache existing target data if self.caches_target: @@ -356,7 +433,14 @@ class Importer: # delete target data if self.delete: - deleted = self.do_delete(source_data) + changes = len(created) + len(updated) + if self.max_total and changes >= self.max_total: + log.debug( + "max of %s total changes already reached; skipping deletions", + self.max_total, + ) + else: + deleted = self.do_delete(source_keys, changes, progress=progress) self.teardown() return created, updated, deleted @@ -394,7 +478,7 @@ class Importer: # cache the set of fields to use for diff checks fields = set(self.get_fields()) - set(self.get_keys()) - def create_update(source_data, i): + def create_update(source_data, i): # pylint: disable=unused-argument # try to fetch target object per source key key = self.get_record_key(source_data) @@ -407,13 +491,34 @@ class Importer: if diffs: # data differs, so update target object - log.debug("fields (%s) differed for target data: %s and source data: %s", - ','.join(diffs), target_data, source_data) - target_object = self.update_target_object(target_object, - source_data, - target_data=target_data) + log.debug( + "fields (%s) differed for target data: %s and source data: %s", + ",".join(diffs), + target_data, + source_data, + ) + target_object = self.update_target_object( + target_object, source_data, target_data=target_data + ) updated.append((target_object, target_data, source_data)) + # stop if we reach max allowed + if self.max_update and len(updated) >= self.max_update: + log.warning( + "max of %s *updated* records has been reached; stopping now", + self.max_update, + ) + raise ImportLimitReached() + if ( + self.max_total + and (len(created) + len(updated)) >= self.max_total + ): + log.warning( + "max of %s *total changes* has been reached; stopping now", + self.max_total, + ) + raise ImportLimitReached() + elif not target_object and self.create: # target object not yet present, so create it @@ -427,23 +532,115 @@ class Importer: # 'object': target_object, # 'data': self.normalize_target_object(target_object), # } + + # stop if we reach max allowed + if self.max_create and len(created) >= self.max_create: + log.warning( + "max of %s *created* records has been reached; stopping now", + self.max_create, + ) + raise ImportLimitReached() + if ( + self.max_total + and (len(created) + len(updated)) >= self.max_total + ): + log.warning( + "max of %s *total changes* has been reached; stopping now", + self.max_total, + ) + raise ImportLimitReached() + else: log.debug("did NOT create new %s for key: %s", model_title, key) actioning = self.actioning.capitalize() target_title = self.handler.get_target_title() - self.app.progress_loop(create_update, all_source_data, progress, - message=f"{actioning} {model_title} data to {target_title}") + try: + self.app.progress_loop( + create_update, + all_source_data, + progress, + message=f"{actioning} {model_title} data to {target_title}", + ) + except ImportLimitReached: + pass return created, updated - def do_delete(self, source_data, progress=None): + def do_delete(self, source_keys, changes=None, progress=None): """ - TODO: not yet implemented + Delete records from the target side as needed, per the given + source data. - :returns: List of records deleted on the target. + This will call :meth:`get_deletable_keys()` to discover which + keys existing on the target side could theoretically allow + being deleted. + + From that set it will remove all the given source keys - since + such keys still exist on the source, they should not be + deleted from target. + + If any "deletable" keys remain, their corresponding objects + are removed from target via :meth:`delete_target_object()`. + + :param source_keys: A ``set`` of keys for all source records. + Essentially this is just the list of keys for which target + records should *not* be deleted - since they still exist in + the data source. + + :param changes: Number of changes which have already been made + on the target side. Used to enforce max allowed changes, + if applicable. + + :param progress: Optional progress indicator factory. + + :returns: List of target records which were deleted. """ - return [] + model_title = self.get_model_title() + deleted = [] + changes = changes or 0 + + # which target records are deletable? potentially all target + # records may be eligible, but anything also found in source + # is *not* eligible. + deletable = self.get_deletable_keys() - source_keys + log.debug("found %s records to delete", len(deletable)) + + def delete(key, i): # pylint: disable=unused-argument + cached = self.cached_target.pop(key) + obj = cached["object"] + + # delete target object + log.debug("deleting %s %s: %s", model_title, key, obj) + if self.delete_target_object(obj): + deleted.append((obj, cached["data"])) + + # stop if we reach max allowed + if self.max_delete and len(deleted) >= self.max_delete: + log.warning( + "max of %s *deleted* records has been reached; stopping now", + self.max_delete, + ) + raise ImportLimitReached() + if self.max_total and (changes + len(deleted)) >= self.max_total: + log.warning( + "max of %s *total changes* has been reached; stopping now", + self.max_total, + ) + raise ImportLimitReached() + + try: + model_title = self.get_model_title() + self.app.progress_loop( + delete, + sorted(deletable), + progress, + message=f"Deleting {model_title} records", + ) + except ImportLimitReached: + pass + + return deleted def get_record_key(self, data): """ @@ -522,17 +719,66 @@ class Importer: source_objects = self.get_source_objects() normalized = [] - def normalize(obj, i): + def normalize(obj, i): # pylint: disable=unused-argument data = self.normalize_source_object_all(obj) if data: normalized.extend(data) model_title = self.get_model_title() source_title = self.handler.get_source_title() - self.app.progress_loop(normalize, source_objects, progress, - message=f"Reading {model_title} data from {source_title}") + self.app.progress_loop( + normalize, + source_objects, + progress, + message=f"Reading {model_title} data from {source_title}", + ) return normalized + def get_unique_data(self, source_data): + """ + Return a copy of the given source data, with any duplicate + records removed. + + This looks for duplicates based on the effective key fields, + cf. :meth:`get_keys()`. The first record found with a given + key is kept; subsequent records with that key are discarded. + + This is called from :meth:`process_data()` and is done largely + for sanity's sake, to avoid indeterminate behavior when source + data contains duplicates. For instance: + + Problem #1: If source contains 2 records with key 'X' it makes + no sense to create both records on the target side. + + Problem #2: if the 2 source records have different data (apart + from their key) then which should target reflect? + + So the main point of this method is to discard the duplicates + to avoid problem #1, but do it in a deterministic way so at + least the "choice" of which record is kept will not vary + across runs; hence "pseudo-resolve" problem #2. + + :param source_data: Sequence of normalized source data. + + :returns: A 2-tuple of ``(source_data, unique_keys)`` where: + + * ``source_data`` is the final list of source data + * ``unique_keys`` is a :class:`python:set` of the source record keys + """ + unique = OrderedDict() + for data in source_data: + key = self.get_record_key(data) + if key in unique: + log.warning( + "duplicate %s records detected from %s for key: %s", + self.get_model_title(), + self.handler.get_source_title(), + key, + ) + else: + unique[key] = data + return list(unique.values()), set(unique) + def get_source_objects(self): """ This method (if applicable) should return a sequence of "raw" @@ -567,6 +813,7 @@ class Importer: data = self.normalize_source_object(obj) if data: return [data] + return None def normalize_source_object(self, obj): """ @@ -627,16 +874,21 @@ class Importer: objects = self.get_target_objects(source_data=source_data) cached = {} - def cache(obj, i): + def cache(obj, i): # pylint: disable=unused-argument data = self.normalize_target_object(obj) if data: key = self.get_record_key(data) - cached[key] = {'object': obj, 'data': data} + cached[key] = {"object": obj, "data": data} model_title = self.get_model_title() target_title = self.handler.get_target_title() - self.app.progress_loop(cache, objects, progress, - message=f"Reading {model_title} data from {target_title}") + self.app.progress_loop( + cache, + objects, + progress, + message=f"Reading {model_title} data from {target_title}", + ) + log.debug(f"cached %s {model_title} records from target", len(cached)) return cached def get_target_objects(self, source_data=None, progress=None): @@ -677,7 +929,8 @@ class Importer: """ if self.caches_target and self.cached_target is not None: cached = self.cached_target.get(key) - return cached['object'] if cached else None + return cached["object"] if cached else None + return None def normalize_target_object(self, obj): """ @@ -701,12 +954,46 @@ class Importer: :returns: Dict of normalized data fields, or ``None``. """ fields = self.get_fields() - fields = [f for f in self.get_simple_fields() - if f in fields] - data = dict([(field, getattr(obj, field)) - for field in fields]) + fields = [f for f in self.get_simple_fields() if f in fields] + data = {field: getattr(obj, field) for field in fields} return data + def get_deletable_keys(self, progress=None): + """ + Return a set of record keys from the target side, which are + *potentially* eligible for deletion. + + Inclusion in this set does not imply a given record/key + *should* be deleted, only that app logic (e.g. business rules) + does not prevent it. + + Default logic here will look in the :attr:`cached_target` and + then call :meth:`can_delete_object()` for each record in the + cache. If that call returns true for a given key, it is + included in the result. + + :returns: The ``set`` of target record keys eligible for + deletion. + """ + if not self.caches_target: + return set() + + keys = set() + + def check(key, i): # pylint: disable=unused-argument + data = self.cached_target[key]["data"] + obj = self.cached_target[key]["object"] + if self.can_delete_object(obj, data): + keys.add(key) + + self.app.progress_loop( + check, + set(self.cached_target), + progress, + message="Determining which objects can be deleted", + ) + return keys + ############################## # CRUD methods ############################## @@ -722,12 +1009,11 @@ class Importer: :returns: New object for the target side, or ``None``. """ - if source_data.get('__ignoreme__'): - return + if source_data.get("__ignoreme__"): + return None obj = self.make_empty_object(key) - if obj: - return self.update_target_object(obj, source_data) + return self.update_target_object(obj, source_data) def make_empty_object(self, key): """ @@ -756,7 +1042,9 @@ class Importer: Default logic will make a new instance of :attr:`model_class`. """ - return self.model_class() + if callable(self.model_class): + return self.model_class() # pylint: disable=not-callable + raise AttributeError("model_class is not callable!") def update_target_object(self, obj, source_data, target_data=None): """ @@ -795,23 +1083,59 @@ class Importer: # object key(s) should already be populated continue - # elif field not in source_data: + # if field not in source_data: # # no source data for field # continue - elif field in fields: + if field in fields: # field is eligible for update generally, so compare # values between records - if (not target_data + if ( + not target_data or field not in target_data - or target_data[field] != source_data[field]): + or target_data[field] != source_data[field] + ): # data mismatch; update field for target object setattr(obj, field, source_data[field]) return obj + def can_delete_object(self, obj, data=None): # pylint: disable=unused-argument + """ + Should return true or false indicating whether the given + object "can" be deleted. Default is to return true in all + cases. + + If you return false then the importer will know not to call + :meth:`delete_target_object()` even if the data sets imply + that it should. + + :param obj: Raw object on the target side. + + :param data: Normalized data dict for the target record, if + known. + + :returns: ``True`` if object can be deleted, else ``False``. + """ + return True + + def delete_target_object(self, obj): # pylint: disable=unused-argument + """ + Delete the given raw object from the target side, and return + true if successful. + + This is called from :meth:`do_delete()`. + + Default logic for this method just returns false; subclass + should override if needed. + + :returns: Should return ``True`` if deletion succeeds, or + ``False`` if deletion failed or was skipped. + """ + return False + class FromFile(Importer): """ @@ -861,6 +1185,8 @@ class FromFile(Importer): :meth:`close_input_file()`. """ + input_file = None + def setup(self): """ Open the input file. See also :meth:`open_input_file()`. @@ -884,7 +1210,7 @@ class FromFile(Importer): :returns: Path to input file. """ - if hasattr(self, 'input_file_path'): + if hasattr(self, "input_file_path"): return self.input_file_path folder = self.get_input_file_dir() @@ -900,7 +1226,7 @@ class FromFile(Importer): :returns: Path to folder with input file(s). """ - if hasattr(self, 'input_file_dir'): + if hasattr(self, "input_file_dir"): return self.input_file_dir raise NotImplementedError("can't guess path to input file(s) folder") @@ -914,7 +1240,7 @@ class FromFile(Importer): :returns: Input filename, sans folder path. """ - if hasattr(self, 'input_file_name'): + if hasattr(self, "input_file_name"): return self.input_file_name raise NotImplementedError("can't guess input filename") @@ -952,16 +1278,17 @@ class ToSqlalchemy(Importer): """ caches_target = True - "" # nb. suppress sphinx docs + "" # nb. suppress sphinx docs + + target_session = None def get_target_object(self, key): """ Tries to fetch the object from target DB using ORM query. """ - # first the default logic in case target object is cached - obj = super().get_target_object(key) - if obj: - return obj + # use default logic to fetch from cache, if applicable + if self.caches_target: + return super().get_target_object(key) # okay now we must fetch via query query = self.target_session.query(self.model_class) @@ -970,16 +1297,7 @@ class ToSqlalchemy(Importer): try: return query.one() except orm.exc.NoResultFound: - pass - - def create_target_object(self, key, source_data): - """ """ - with self.target_session.no_autoflush: - obj = super().create_target_object(key, source_data) - if obj: - # nb. add new object to target db session - self.target_session.add(obj) - return obj + return None def get_target_objects(self, source_data=None, progress=None): """ @@ -989,10 +1307,25 @@ class ToSqlalchemy(Importer): query = self.get_target_query(source_data=source_data) return query.all() - def get_target_query(self, source_data=None): + def get_target_query(self, source_data=None): # pylint: disable=unused-argument """ Returns an ORM query suitable to fetch existing objects from the target side. This is called from :meth:`get_target_objects()`. """ return self.target_session.query(self.model_class) + + def create_target_object(self, key, source_data): # pylint: disable=empty-docstring + """ """ + with self.target_session.no_autoflush: + obj = super().create_target_object(key, source_data) + if obj: + # nb. add new object to target db session + self.target_session.add(obj) + return obj + return None + + def delete_target_object(self, obj): # pylint: disable=empty-docstring + """ """ + self.target_session.delete(obj) + return True diff --git a/src/wuttasync/importing/csv.py b/src/wuttasync/importing/csv.py index 7bbc727..1d6946d 100644 --- a/src/wuttasync/importing/csv.py +++ b/src/wuttasync/importing/csv.py @@ -1,8 +1,8 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync -# Copyright © 2024 Lance Edgar +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024-2025 Lance Edgar # # This file is part of Wutta Framework. # @@ -25,11 +25,13 @@ Importing from CSV """ import csv +import logging +import uuid as _uuid from collections import OrderedDict from sqlalchemy_utils.functions import get_primary_keys -from wuttjamaican.db.util import make_topo_sortkey +from wuttjamaican.db.util import make_topo_sortkey, UUID from .base import FromFile from .handlers import FromFileHandler @@ -37,7 +39,10 @@ from .wutta import ToWuttaHandler from .model import ToWutta -class FromCsv(FromFile): +log = logging.getLogger(__name__) + + +class FromCsv(FromFile): # pylint: disable=abstract-method """ Base class for importer/exporter using CSV file as data source. @@ -56,7 +61,9 @@ class FromCsv(FromFile): :class:`python:csv.DictReader` instance. """ - csv_encoding = 'utf_8' + input_reader = None + + csv_encoding = "utf_8" """ Encoding used by the CSV input file. @@ -73,11 +80,11 @@ class FromCsv(FromFile): :meth:`~wuttasync.importing.base.Importer.get_model_title()` to obtain the model name. """ - if hasattr(self, 'input_file_name'): + if hasattr(self, "input_file_name"): return self.input_file_name model_title = self.get_model_title() - return f'{model_title}.csv' + return f"{model_title}.csv" def open_input_file(self): """ @@ -86,12 +93,36 @@ class FromCsv(FromFile): This tracks the file handle via :attr:`~wuttasync.importing.base.FromFile.input_file` and the CSV reader via :attr:`input_reader`. + + It also updates the effective + :attr:`~wuttasync.importing.base.Importer.fields` list per the + following logic: + + First get the current effective field list, e.g. as defined by + the class and/or from caller params. Then read the column + header list from CSV file, and discard any which are not found + in the first list. The result becomes the new effective field + list. """ path = self.get_input_file_path() - self.input_file = open(path, 'rt', encoding=self.csv_encoding) + log.debug("opening input file: %s", path) + self.input_file = open( # pylint: disable=consider-using-with + path, "rt", encoding=self.csv_encoding + ) self.input_reader = csv.DictReader(self.input_file) - def close_input_file(self): + # nb. importer may have all supported fields by default, so + # must prune to the subset also present in the input file + fields = self.get_fields() + orientation = self.orientation.value + log.debug(f"supported fields for {orientation}: %s", fields) + self.fields = [f for f in self.input_reader.fieldnames or [] if f in fields] + log.debug("fields present in source data: %s", self.fields) + if not self.fields: + self.input_file.close() + raise ValueError("input file has no recognized fields") + + def close_input_file(self): # pylint: disable=empty-docstring """ """ self.input_file.close() del self.input_reader @@ -109,11 +140,59 @@ class FromCsv(FromFile): return list(self.input_reader) -class FromCsvToSqlalchemyMixin: +class FromCsvToSqlalchemyMixin: # pylint: disable=too-few-public-methods """ - Mixin handler class for CSV → SQLAlchemy ORM import/export. + Mixin class for CSV → SQLAlchemy ORM :term:`importers `. + + Meant to be used by :class:`FromCsvToSqlalchemyHandlerMixin`. + + This mixin adds some logic to better handle ``uuid`` key fields + which are of :class:`~wuttjamaican:wuttjamaican.db.util.UUID` data + type (i.e. on the target side). Namely, when reading ``uuid`` + values as string from CSV, convert them to proper UUID instances, + so the key matching between source and target will behave as + expected. """ - source_key = 'csv' + + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + + # nb. keep track of any key fields which use proper UUID type + self.uuid_keys = [] + for field in self.get_keys(): + attr = getattr(self.model_class, field) + if len(attr.prop.columns) == 1: + if isinstance(attr.prop.columns[0].type, UUID): + self.uuid_keys.append(field) + + def normalize_source_object(self, obj): # pylint: disable=empty-docstring + """ """ + data = dict(obj) + + # nb. convert to proper UUID values so key matching will work + # properly, where applicable + for key in self.uuid_keys: + uuid = data[key] + if uuid and not isinstance(uuid, _uuid.UUID): + data[key] = _uuid.UUID(uuid) + + return data + + +class FromCsvToSqlalchemyHandlerMixin: + """ + Mixin class for CSV → SQLAlchemy ORM :term:`import handlers + `. + + This knows how to dynamically generate :term:`importer` classes to + target the particular ORM involved. Such classes will inherit + from :class:`FromCsvToSqlalchemyMixin`, in addition to whatever + :attr:`FromImporterBase` and :attr:`ToImporterBase` reference. + + This all happens within :meth:`define_importers()`. + """ + + source_key = "csv" generic_source_title = "CSV" FromImporterBase = FromCsv @@ -162,45 +241,61 @@ class FromCsvToSqlalchemyMixin: # mostly try to make an importer for every data model for name in dir(model): cls = getattr(model, name) - if isinstance(cls, type) and issubclass(cls, model.Base) and cls is not model.Base: + if ( + isinstance(cls, type) + and issubclass(cls, model.Base) + and cls is not model.Base + ): importers[name] = self.make_importer_factory(cls, name) # sort importers according to schema topography topo_sortkey = make_topo_sortkey(model) - importers = OrderedDict([ - (name, importers[name]) - for name in sorted(importers, key=topo_sortkey) - ]) + importers = OrderedDict( + [(name, importers[name]) for name in sorted(importers, key=topo_sortkey)] + ) return importers - def make_importer_factory(self, cls, name): + def make_importer_factory(self, model_class, name): """ - Generate and return a new importer/exporter class, targeting - the given data model class. + Generate and return a new :term:`importer` class, targeting + the given :term:`data model` class. - :param cls: A data model class. + The newly-created class will inherit from: - :param name: Optional "model name" override for the - importer/exporter. + * :class:`FromCsvToSqlalchemyMixin` + * :attr:`FromImporterBase` + * :attr:`ToImporterBase` - :returns: A new class, meant to process import/export - operations which target the given data model. The new - class will inherit from both :attr:`FromImporterBase` and - :attr:`ToImporterBase`. + :param model_class: A data model class. + + :param name: The "model name" for the importer/exporter. New + class name will be based on this, so e.g. ``Widget`` model + name becomes ``WidgetImporter`` class name. + + :returns: The new class, meant to process import/export + targeting the given data model. """ - return type(f'{name}Importer', (FromCsv, self.ToImporterBase), { - 'model_class': cls, - 'key': list(get_primary_keys(cls)), - }) + return type( + f"{name}Importer", + (FromCsvToSqlalchemyMixin, self.FromImporterBase, self.ToImporterBase), + { + "model_class": model_class, + "key": list(get_primary_keys(model_class)), + }, + ) -class FromCsvToWutta(FromCsvToSqlalchemyMixin, ToWuttaHandler): +class FromCsvToWutta(FromCsvToSqlalchemyHandlerMixin, FromFileHandler, ToWuttaHandler): """ Handler for CSV → Wutta :term:`app database` import. + + This uses :class:`FromCsvToSqlalchemyHandlerMixin` for most of the + heavy lifting. """ + ToImporterBase = ToWutta - def get_target_model(self): + def get_target_model(self): # pylint: disable=empty-docstring """ """ return self.app.model diff --git a/src/wuttasync/importing/handlers.py b/src/wuttasync/importing/handlers.py index 87e0ac3..e9c6ac3 100644 --- a/src/wuttasync/importing/handlers.py +++ b/src/wuttasync/importing/handlers.py @@ -1,8 +1,8 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync -# Copyright © 2024 Lance Edgar +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024-2025 Lance Edgar # # This file is part of Wutta Framework. # @@ -25,6 +25,7 @@ Data Import / Export Handlers """ import logging +import os from collections import OrderedDict from enum import Enum @@ -38,8 +39,9 @@ class Orientation(Enum): """ Enum values for :attr:`ImportHandler.orientation`. """ - IMPORT = 'import' - EXPORT = 'export' + + IMPORT = "import" + EXPORT = "export" class ImportHandler(GenericHandler): @@ -157,7 +159,7 @@ class ImportHandler(GenericHandler): * ``'importing'`` * ``'exporting'`` """ - return f'{self.orientation.value}ing' + return f"{self.orientation.value}ing" @classmethod def get_key(cls): @@ -173,7 +175,7 @@ class ImportHandler(GenericHandler): here; but only one will be configured as the "default" handler for that key. See also :meth:`get_spec()`. """ - return f'to_{cls.target_key}.from_{cls.source_key}.{cls.orientation.value}' + return f"to_{cls.target_key}.from_{cls.source_key}.{cls.orientation.value}" @classmethod def get_spec(cls): @@ -187,7 +189,7 @@ class ImportHandler(GenericHandler): See also :meth:`get_key()`. """ - return f'{cls.__module__}:{cls.__name__}' + return f"{cls.__module__}:{cls.__name__}" def get_title(self): """ @@ -209,9 +211,9 @@ class ImportHandler(GenericHandler): See also :meth:`get_title()` and :meth:`get_target_title()`. """ - if hasattr(self, 'source_title'): + if hasattr(self, "source_title"): return self.source_title - if hasattr(self, 'generic_source_title'): + if hasattr(self, "generic_source_title"): return self.generic_source_title return self.source_key @@ -221,9 +223,9 @@ class ImportHandler(GenericHandler): See also :meth:`get_title()` and :meth:`get_source_title()`. """ - if hasattr(self, 'target_title'): + if hasattr(self, "target_title"): return self.target_title - if hasattr(self, 'generic_target_title'): + if hasattr(self, "generic_target_title"): return self.generic_target_title return self.target_key @@ -231,7 +233,7 @@ class ImportHandler(GenericHandler): """ Run import/export operations for the specified models. - :param \*keys: One or more importer/exporter (model) keys, as + :param \\*keys: One or more importer/exporter (model) keys, as defined by the handler. Each key specified must be present in :attr:`importers` and @@ -268,10 +270,12 @@ class ImportHandler(GenericHandler): msg = "%s: added %d; updated %d; deleted %d %s records" if self.dry_run: msg += " (dry run)" - log.info(msg, self.get_title(), len(created), len(updated), len(deleted), key) + log.info( + msg, self.get_title(), len(created), len(updated), len(deleted), key + ) except: - # TODO: what should happen here? + log.exception("what should happen here?") # TODO raise else: @@ -307,8 +311,8 @@ class ImportHandler(GenericHandler): :returns: Dict of kwargs, "post-consumption." """ - if 'dry_run' in kwargs: - self.dry_run = kwargs['dry_run'] + if "dry_run" in kwargs: + self.dry_run = kwargs["dry_run"] return kwargs @@ -459,6 +463,9 @@ class ImportHandler(GenericHandler): Returns an importer/exporter instance corresponding to the given key. + Note that this will always create a *new* instance; they are + not cached. + The key will be the "model name" mapped to a particular importer/exporter class and thus must be present in :attr:`importers`. @@ -471,6 +478,8 @@ class ImportHandler(GenericHandler): :param key: Model key for desired importer/exporter. + :param \\**kwargs: Extra/override kwargs for the importer. + :returns: Instance of (subclass of) :class:`~wuttasync.importing.base.Importer`. """ @@ -479,11 +488,16 @@ class ImportHandler(GenericHandler): raise KeyError(f"unknown {orientation} key: {key}") kwargs = self.get_importer_kwargs(key, **kwargs) - kwargs['handler'] = self + kwargs["handler"] = self + + # nb. default logic should (normally) determine keys + if "keys" in kwargs and not kwargs["keys"]: + del kwargs["keys"] + factory = self.importers[key] return factory(self.config, **kwargs) - def get_importer_kwargs(self, key, **kwargs): + def get_importer_kwargs(self, key, **kwargs): # pylint: disable=unused-argument """ Returns a dict of kwargs to be used when construcing an importer/exporter with the given key. This is normally called @@ -492,7 +506,7 @@ class ImportHandler(GenericHandler): :param key: Model key for the desired importer/exporter, e.g. ``'Widget'`` - :param \**kwargs: Any kwargs we have so collected far. + :param \\**kwargs: Any kwargs we have so collected far. :returns: Final kwargs dict for new importer/exporter. """ @@ -501,9 +515,28 @@ class ImportHandler(GenericHandler): class FromFileHandler(ImportHandler): """ - Handler for import/export which uses an input file as data source. + Handler for import/export which uses input file(s) as data source. + + This handler assumes its importer/exporter classes inherit from + :class:`~wuttasync.importing.base.FromFile` for source parent + logic. """ + def process_data(self, *keys, **kwargs): # pylint: disable=empty-docstring + """ """ + + # interpret file vs. folder path + # nb. this assumes FromFile importer/exporter + path = kwargs.pop("input_file_path", None) + if path: + if not kwargs.get("input_file_dir") and os.path.isdir(path): + kwargs["input_file_dir"] = path + else: + kwargs["input_file_path"] = path + + # and carry on + super().process_data(*keys, **kwargs) + class ToSqlalchemyHandler(ImportHandler): """ @@ -553,8 +586,8 @@ class ToSqlalchemyHandler(ImportHandler): """ raise NotImplementedError - def get_importer_kwargs(self, key, **kwargs): + def get_importer_kwargs(self, key, **kwargs): # pylint: disable=empty-docstring """ """ kwargs = super().get_importer_kwargs(key, **kwargs) - kwargs.setdefault('target_session', self.target_session) + kwargs.setdefault("target_session", self.target_session) return kwargs diff --git a/src/wuttasync/importing/model.py b/src/wuttasync/importing/model.py index 7a7c554..341e092 100644 --- a/src/wuttasync/importing/model.py +++ b/src/wuttasync/importing/model.py @@ -1,7 +1,7 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync +# WuttaSync -- Wutta Framework for data import/export and real-time sync # Copyright © 2024 Lance Edgar # # This file is part of Wutta Framework. diff --git a/src/wuttasync/importing/wutta.py b/src/wuttasync/importing/wutta.py index be5786c..9de4822 100644 --- a/src/wuttasync/importing/wutta.py +++ b/src/wuttasync/importing/wutta.py @@ -1,8 +1,8 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync -# Copyright © 2024 Lance Edgar +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024-2025 Lance Edgar # # This file is part of Wutta Framework. # @@ -33,15 +33,15 @@ class ToWuttaHandler(ToSqlalchemyHandler): database`). """ - target_key = 'wutta' - "" # nb. suppress docs + target_key = "wutta" + "" # nb. suppress docs - def get_target_title(self): + def get_target_title(self): # pylint: disable=empty-docstring """ """ # nb. we override parent to use app title as default - if hasattr(self, 'target_title'): + if hasattr(self, "target_title"): return self.target_title - if hasattr(self, 'generic_target_title'): + if hasattr(self, "generic_target_title"): return self.generic_target_title return self.app.get_title() diff --git a/src/wuttasync/util.py b/src/wuttasync/util.py index 9f9eccb..8cfd0d4 100644 --- a/src/wuttasync/util.py +++ b/src/wuttasync/util.py @@ -1,7 +1,7 @@ # -*- coding: utf-8; -*- ################################################################################ # -# WuttaSync -- Wutta framework for data import/export and real-time sync +# WuttaSync -- Wutta Framework for data import/export and real-time sync # Copyright © 2024 Lance Edgar # # This file is part of Wutta Framework. diff --git a/tasks.py b/tasks.py index 78a4ece..56a7e1d 100644 --- a/tasks.py +++ b/tasks.py @@ -15,10 +15,10 @@ def release(c, skip_tests=False): Release a new version of WuttaSync """ if not skip_tests: - c.run('pytest') + c.run("pytest") - if os.path.exists('dist'): - shutil.rmtree('dist') + if os.path.exists("dist"): + shutil.rmtree("dist") - c.run('python -m build --sdist') - c.run('twine upload dist/*') + c.run("python -m build --sdist") + c.run("twine upload dist/*") diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cli/example.conf b/tests/cli/example.conf new file mode 100644 index 0000000..e69de29 diff --git a/tests/cli/test_base.py b/tests/cli/test_base.py new file mode 100644 index 0000000..991358e --- /dev/null +++ b/tests/cli/test_base.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8; -*- + +import inspect +from unittest import TestCase +from unittest.mock import patch + +from wuttasync.cli import base as mod +from wuttjamaican.testing import DataTestCase + + +class TestImportCommandHandler(DataTestCase): + + def make_handler(self, **kwargs): + return mod.ImportCommandHandler(self.config, **kwargs) + + def test_import_handler(self): + + # none + handler = self.make_handler() + self.assertIsNone(handler.import_handler) + + FromCsvToWutta = self.app.load_object("wuttasync.importing.csv:FromCsvToWutta") + + # as spec + handler = self.make_handler(import_handler=FromCsvToWutta.get_spec()) + self.assertIsInstance(handler.import_handler, FromCsvToWutta) + + # as factory + handler = self.make_handler(import_handler=FromCsvToWutta) + self.assertIsInstance(handler.import_handler, FromCsvToWutta) + + # as instance + myhandler = FromCsvToWutta(self.config) + handler = self.make_handler(import_handler=myhandler) + self.assertIs(handler.import_handler, myhandler) + + def test_run(self): + handler = self.make_handler( + import_handler="wuttasync.importing.csv:FromCsvToWutta" + ) + + with patch.object(handler, "list_models") as list_models: + handler.run({"list_models": True}) + list_models.assert_called_once_with({"list_models": True}) + + with patch.object(handler, "import_handler") as import_handler: + handler.run({"models": []}) + import_handler.process_data.assert_called_once_with() + + def test_list_models(self): + handler = self.make_handler( + import_handler="wuttasync.importing.csv:FromCsvToWutta" + ) + + with patch.object(mod, "sys") as sys: + handler.list_models({}) + # just test a few random things we expect to see + self.assertTrue(sys.stdout.write.has_call("ALL MODELS:\n")) + self.assertTrue(sys.stdout.write.has_call("Person")) + self.assertTrue(sys.stdout.write.has_call("User")) + self.assertTrue(sys.stdout.write.has_call("Upgrade")) + + +class TestImporterCommand(TestCase): + + def test_basic(self): + def myfunc(ctx, **kwargs): + pass + + sig1 = inspect.signature(myfunc) + self.assertIn("kwargs", sig1.parameters) + self.assertNotIn("dry_run", sig1.parameters) + wrapt = mod.import_command(myfunc) + sig2 = inspect.signature(wrapt) + self.assertNotIn("kwargs", sig2.parameters) + self.assertIn("dry_run", sig2.parameters) + + +class TestFileImporterCommand(TestCase): + + def test_basic(self): + def myfunc(ctx, **kwargs): + pass + + sig1 = inspect.signature(myfunc) + self.assertIn("kwargs", sig1.parameters) + self.assertNotIn("dry_run", sig1.parameters) + self.assertNotIn("input_file_path", sig1.parameters) + wrapt = mod.file_import_command(myfunc) + sig2 = inspect.signature(wrapt) + self.assertNotIn("kwargs", sig2.parameters) + self.assertIn("dry_run", sig2.parameters) + self.assertIn("input_file_path", sig2.parameters) diff --git a/tests/cli/test_import_csv.py b/tests/cli/test_import_csv.py new file mode 100644 index 0000000..5623176 --- /dev/null +++ b/tests/cli/test_import_csv.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8; -*- + +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from wuttasync.cli import import_csv as mod, ImportCommandHandler + + +class TestImportCsv(TestCase): + + def test_basic(self): + params = { + "models": [], + "create": True, + "update": True, + "delete": False, + "dry_run": True, + } + ctx = MagicMock(params=params) + with patch.object(ImportCommandHandler, "run") as run: + mod.import_csv(ctx) + run.assert_called_once_with(params) diff --git a/tests/importing/test_base.py b/tests/importing/test_base.py index abe121b..08c37a2 100644 --- a/tests/importing/test_base.py +++ b/tests/importing/test_base.py @@ -1,4 +1,4 @@ -#-*- coding: utf-8; -*- +# -*- coding: utf-8; -*- from unittest.mock import patch @@ -14,7 +14,7 @@ class TestImporter(DataTestCase): self.handler = ImportHandler(self.config) def make_importer(self, **kwargs): - kwargs.setdefault('handler', self.handler) + kwargs.setdefault("handler", self.handler) return mod.Importer(self.config, **kwargs) def test_constructor(self): @@ -24,132 +24,348 @@ class TestImporter(DataTestCase): imp = self.make_importer(model_class=model.Setting) # fields - self.assertEqual(imp.supported_fields, ['name', 'value']) - self.assertEqual(imp.simple_fields, ['name', 'value']) - self.assertEqual(imp.fields, ['name', 'value']) + self.assertEqual(imp.fields, ["name", "value"]) # orientation etc. self.assertEqual(imp.orientation, Orientation.IMPORT) - self.assertEqual(imp.actioning, 'importing') + self.assertEqual(imp.actioning, "importing") self.assertTrue(imp.create) self.assertTrue(imp.update) self.assertTrue(imp.delete) self.assertFalse(imp.dry_run) + def test_constructor_fields(self): + model = self.app.model + + # basic importer + imp = self.make_importer(model_class=model.Setting, fields="name") + self.assertEqual(imp.fields, ["name"]) + + def test_constructor_excluded_fields(self): + model = self.app.model + + # basic importer + imp = self.make_importer(model_class=model.Setting, excluded_fields="value") + self.assertEqual(imp.fields, ["name"]) + def test_get_model_title(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - self.assertEqual(imp.get_model_title(), 'Setting') + self.assertEqual(imp.get_model_title(), "Setting") imp.model_title = "SeTtInG" - self.assertEqual(imp.get_model_title(), 'SeTtInG') + self.assertEqual(imp.get_model_title(), "SeTtInG") def test_get_simple_fields(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - self.assertEqual(imp.get_simple_fields(), ['name', 'value']) - imp.simple_fields = ['name'] - self.assertEqual(imp.get_simple_fields(), ['name']) + self.assertEqual(imp.get_simple_fields(), ["name", "value"]) + imp.simple_fields = ["name"] + self.assertEqual(imp.get_simple_fields(), ["name"]) def test_get_supported_fields(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - self.assertEqual(imp.get_supported_fields(), ['name', 'value']) - imp.supported_fields = ['name'] - self.assertEqual(imp.get_supported_fields(), ['name']) + self.assertEqual(imp.get_supported_fields(), ["name", "value"]) + imp.supported_fields = ["name"] + self.assertEqual(imp.get_supported_fields(), ["name"]) def test_get_fields(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - self.assertEqual(imp.get_fields(), ['name', 'value']) - imp.fields = ['name'] - self.assertEqual(imp.get_fields(), ['name']) + self.assertEqual(imp.get_fields(), ["name", "value"]) + imp.fields = ["name"] + self.assertEqual(imp.get_fields(), ["name"]) def test_get_keys(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - self.assertEqual(imp.get_keys(), ['name']) - imp.key = 'value' - self.assertEqual(imp.get_keys(), ['value']) + self.assertEqual(imp.get_keys(), ["name"]) + with patch.multiple(imp, create=True, key="value"): + self.assertEqual(imp.get_keys(), ["value"]) + with patch.multiple(imp, create=True, keys=["foo", "bar"]): + self.assertEqual(imp.get_keys(), ["foo", "bar"]) def test_process_data(self): model = self.app.model - imp = self.make_importer(model_class=model.Setting, caches_target=True) + imp = self.make_importer( + model_class=model.Setting, caches_target=True, delete=True + ) - # empty data set / just for coverage - with patch.object(imp, 'normalize_source_data') as normalize_source_data: - normalize_source_data.return_value = [] + def make_cache(): + setting1 = model.Setting(name="foo1", value="bar1") + setting2 = model.Setting(name="foo2", value="bar2") + setting3 = model.Setting(name="foo3", value="bar3") + cache = { + ("foo1",): { + "object": setting1, + "data": {"name": "foo1", "value": "bar1"}, + }, + ("foo2",): { + "object": setting2, + "data": {"name": "foo2", "value": "bar2"}, + }, + ("foo3",): { + "object": setting3, + "data": {"name": "foo3", "value": "bar3"}, + }, + } + return cache - with patch.object(imp, 'get_target_cache') as get_target_cache: - get_target_cache.return_value = {} + # nb. delete always succeeds + with patch.object(imp, "delete_target_object", return_value=True): - result = imp.process_data() - self.assertEqual(result, ([], [], [])) + # create + update + delete all as needed + with patch.object(imp, "get_target_cache", return_value=make_cache()): + created, updated, deleted = imp.process_data( + [ + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + {"name": "foo5", "value": "BAR5"}, + ] + ) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 1) + self.assertEqual(len(deleted), 2) + + # same but with --max-total so delete gets skipped + with patch.object(imp, "get_target_cache", return_value=make_cache()): + with patch.object(imp, "max_total", new=3): + created, updated, deleted = imp.process_data( + [ + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + {"name": "foo5", "value": "BAR5"}, + ] + ) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 1) + self.assertEqual(len(deleted), 0) + + # delete all if source data empty + with patch.object(imp, "get_target_cache", return_value=make_cache()): + created, updated, deleted = imp.process_data() + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 0) + self.assertEqual(len(deleted), 3) def test_do_create_update(self): model = self.app.model + imp = self.make_importer(model_class=model.Setting, caches_target=True) + + def make_cache(): + setting1 = model.Setting(name="foo1", value="bar1") + setting2 = model.Setting(name="foo2", value="bar2") + cache = { + ("foo1",): { + "object": setting1, + "data": {"name": "foo1", "value": "bar1"}, + }, + ("foo2",): { + "object": setting2, + "data": {"name": "foo2", "value": "bar2"}, + }, + } + return cache + + # change nothing if data matches + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "bar1"}, + {"name": "foo2", "value": "bar2"}, + ] + ) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 0) + + # update all as needed + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "BAR1"}, + {"name": "foo2", "value": "BAR2"}, + ] + ) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 2) + + # update all, with --max-update + with patch.multiple(imp, create=True, cached_target=make_cache(), max_update=1): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "BAR1"}, + {"name": "foo2", "value": "BAR2"}, + ] + ) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 1) + + # update all, with --max-total + with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "BAR1"}, + {"name": "foo2", "value": "BAR2"}, + ] + ) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 1) + + # create all as needed + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "bar1"}, + {"name": "foo2", "value": "bar2"}, + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + ] + ) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 0) + + # what happens when create gets skipped + with patch.multiple(imp, create=True, cached_target=make_cache()): + with patch.object(imp, "create_target_object", return_value=None): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "bar1"}, + {"name": "foo2", "value": "bar2"}, + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + ] + ) + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 0) + + # create all, with --max-create + with patch.multiple(imp, create=True, cached_target=make_cache(), max_create=1): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "bar1"}, + {"name": "foo2", "value": "bar2"}, + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + ] + ) + self.assertEqual(len(created), 1) + self.assertEqual(len(updated), 0) + + # create all, with --max-total + with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "bar1"}, + {"name": "foo2", "value": "bar2"}, + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + ] + ) + self.assertEqual(len(created), 1) + self.assertEqual(len(updated), 0) + + # create + update all as needed + with patch.multiple(imp, create=True, cached_target=make_cache()): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "BAR1"}, + {"name": "foo2", "value": "BAR2"}, + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + ] + ) + self.assertEqual(len(created), 2) + self.assertEqual(len(updated), 2) + + # create + update all, with --max-total + with patch.multiple(imp, create=True, cached_target=make_cache(), max_total=1): + created, updated = imp.do_create_update( + [ + {"name": "foo1", "value": "BAR1"}, + {"name": "foo2", "value": "BAR2"}, + {"name": "foo3", "value": "BAR3"}, + {"name": "foo4", "value": "BAR4"}, + ] + ) + # nb. foo1 is updated first + self.assertEqual(len(created), 0) + self.assertEqual(len(updated), 1) + + def test_do_delete(self): + model = self.app.model # this requires a mock target cache + setting1 = model.Setting(name="foo1", value="bar1") + setting2 = model.Setting(name="foo2", value="bar2") imp = self.make_importer(model_class=model.Setting, caches_target=True) - setting = model.Setting(name='foo', value='bar') - imp.cached_target = { - ('foo',): { - 'object': setting, - 'data': {'name': 'foo', 'value': 'bar'}, + cache = { + ("foo1",): { + "object": setting1, + "data": {"name": "foo1", "value": "bar1"}, + }, + ("foo2",): { + "object": setting2, + "data": {"name": "foo2", "value": "bar2"}, }, } - # will update the one record - result = imp.do_create_update([{'name': 'foo', 'value': 'baz'}]) - self.assertIs(result[1][0][0], setting) - self.assertEqual(result, ([], [(setting, - # nb. target - {'name': 'foo', 'value': 'bar'}, - # nb. source - {'name': 'foo', 'value': 'baz'})])) - self.assertEqual(setting.value, 'baz') + with patch.object(imp, "delete_target_object") as delete_target_object: - # will create a new record - result = imp.do_create_update([{'name': 'blah', 'value': 'zay'}]) - self.assertIsNot(result[0][0][0], setting) - setting_new = result[0][0][0] - self.assertEqual(result, ([(setting_new, - # nb. source - {'name': 'blah', 'value': 'zay'})], - [])) - self.assertEqual(setting_new.name, 'blah') - self.assertEqual(setting_new.value, 'zay') + # delete nothing if source has same keys + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set(imp.cached_target) + result = imp.do_delete(source_keys) + self.assertFalse(delete_target_object.called) + self.assertEqual(result, []) - # but what if new record is *not* created - with patch.object(imp, 'create_target_object', return_value=None): - result = imp.do_create_update([{'name': 'another', 'value': 'one'}]) - self.assertEqual(result, ([], [])) + # delete both if source has no keys + delete_target_object.reset_mock() + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set() + result = imp.do_delete(source_keys) + self.assertEqual(delete_target_object.call_count, 2) + self.assertEqual(len(result), 2) - # def test_do_delete(self): - # model = self.app.model - # imp = self.make_importer(model_class=model.Setting) + # delete just one if --max-delete was set + delete_target_object.reset_mock() + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set() + with patch.object(imp, "max_delete", new=1): + result = imp.do_delete(source_keys) + self.assertEqual(delete_target_object.call_count, 1) + self.assertEqual(len(result), 1) + + # delete just one if --max-total was set + delete_target_object.reset_mock() + with patch.multiple(imp, create=True, cached_target=dict(cache)): + source_keys = set() + with patch.object(imp, "max_total", new=1): + result = imp.do_delete(source_keys) + self.assertEqual(delete_target_object.call_count, 1) + self.assertEqual(len(result), 1) def test_get_record_key(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - record = {'name': 'foo', 'value': 'bar'} - self.assertEqual(imp.get_record_key(record), ('foo',)) - imp.key = ('name', 'value') - self.assertEqual(imp.get_record_key(record), ('foo', 'bar')) + record = {"name": "foo", "value": "bar"} + self.assertEqual(imp.get_record_key(record), ("foo",)) + imp.key = ("name", "value") + self.assertEqual(imp.get_record_key(record), ("foo", "bar")) def test_data_diffs(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) # 2 identical records - rec1 = {'name': 'foo', 'value': 'bar'} - rec2 = {'name': 'foo', 'value': 'bar'} + rec1 = {"name": "foo", "value": "bar"} + rec2 = {"name": "foo", "value": "bar"} result = imp.data_diffs(rec1, rec2) self.assertEqual(result, []) # now they're different - rec2['value'] = 'baz' + rec2["value"] = "baz" result = imp.data_diffs(rec1, rec2) - self.assertEqual(result, ['value']) + self.assertEqual(result, ["value"]) def test_normalize_source_data(self): model = self.app.model @@ -160,12 +376,28 @@ class TestImporter(DataTestCase): self.assertEqual(data, []) # now with 1 record - setting = model.Setting(name='foo', value='bar') + setting = model.Setting(name="foo", value="bar") data = imp.normalize_source_data(source_objects=[setting]) self.assertEqual(len(data), 1) # nb. default normalizer returns object as-is self.assertIs(data[0], setting) + def test_get_unique_data(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + + setting1 = model.Setting(name="foo", value="bar1") + setting2 = model.Setting(name="foo", value="bar2") + + result = imp.get_unique_data([setting2, setting1]) + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], list) + self.assertEqual(len(result[0]), 1) + self.assertIs(result[0][0], setting2) # nb. not setting1 + self.assertIsInstance(result[1], set) + self.assertEqual(result[1], {("foo",)}) + def test_get_source_objects(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) @@ -174,11 +406,18 @@ class TestImporter(DataTestCase): def test_normalize_source_object_all(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) + + # normal setting = model.Setting() result = imp.normalize_source_object_all(setting) self.assertEqual(len(result), 1) self.assertIs(result[0], setting) + # unwanted (normalized is None) + with patch.object(imp, "normalize_source_object", return_value=None): + result = imp.normalize_source_object_all(setting) + self.assertIsNone(result) + def test_normalize_source_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) @@ -190,7 +429,7 @@ class TestImporter(DataTestCase): model = self.app.model imp = self.make_importer(model_class=model.Setting) - with patch.object(imp, 'get_target_objects') as get_target_objects: + with patch.object(imp, "get_target_objects") as get_target_objects: get_target_objects.return_value = [] # empty cache @@ -198,16 +437,16 @@ class TestImporter(DataTestCase): self.assertEqual(cache, {}) # cache w/ one record - setting = model.Setting(name='foo', value='bar') + setting = model.Setting(name="foo", value="bar") get_target_objects.return_value = [setting] cache = imp.get_target_cache() self.assertEqual(len(cache), 1) - self.assertIn(('foo',), cache) - foo = cache[('foo',)] + self.assertIn(("foo",), cache) + foo = cache[("foo",)] self.assertEqual(len(foo), 2) - self.assertEqual(set(foo), {'object', 'data'}) - self.assertIs(foo['object'], setting) - self.assertEqual(foo['data'], {'name': 'foo', 'value': 'bar'}) + self.assertEqual(set(foo), {"object", "data"}) + self.assertIs(foo["object"], setting) + self.assertEqual(foo["data"], {"name": "foo", "value": "bar"}) def test_get_target_objects(self): model = self.app.model @@ -216,74 +455,122 @@ class TestImporter(DataTestCase): def test_get_target_object(self): model = self.app.model - setting = model.Setting(name='foo', value='bar') + setting = model.Setting(name="foo", value="bar") # nb. must mock up a target cache for this one imp = self.make_importer(model_class=model.Setting, caches_target=True) imp.cached_target = { - ('foo',): { - 'object': setting, - 'data': {'name': 'foo', 'value': 'bar'}, + ("foo",): { + "object": setting, + "data": {"name": "foo", "value": "bar"}, }, } # returns same object - result = imp.get_target_object(('foo',)) + result = imp.get_target_object(("foo",)) self.assertIs(result, setting) # and one more time just for kicks - result = imp.get_target_object(('foo',)) + result = imp.get_target_object(("foo",)) self.assertIs(result, setting) # but then not if cache flag is off imp.caches_target = False - result = imp.get_target_object(('foo',)) + result = imp.get_target_object(("foo",)) self.assertIsNone(result) def test_normalize_target_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - setting = model.Setting(name='foo', value='bar') + setting = model.Setting(name="foo", value="bar") data = imp.normalize_target_object(setting) - self.assertEqual(data, {'name': 'foo', 'value': 'bar'}) + self.assertEqual(data, {"name": "foo", "value": "bar"}) + + def test_get_deletable_keys(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + + # empty set by default (nb. no target cache) + result = imp.get_deletable_keys() + self.assertIsInstance(result, set) + self.assertEqual(result, set()) + + setting = model.Setting(name="foo", value="bar") + cache = { + ("foo",): { + "object": setting, + "data": {"name": "foo", "value": "bar"}, + }, + } + + with patch.multiple(imp, create=True, caches_target=True, cached_target=cache): + + # all are deletable by default + result = imp.get_deletable_keys() + self.assertEqual(result, {("foo",)}) + + # but some maybe can't be deleted + with patch.object(imp, "can_delete_object", return_value=False): + result = imp.get_deletable_keys() + self.assertEqual(result, set()) def test_create_target_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) # basic - setting = imp.create_target_object(('foo',), {'name': 'foo', 'value': 'bar'}) + setting = imp.create_target_object(("foo",), {"name": "foo", "value": "bar"}) self.assertIsInstance(setting, model.Setting) - self.assertEqual(setting.name, 'foo') - self.assertEqual(setting.value, 'bar') + self.assertEqual(setting.name, "foo") + self.assertEqual(setting.value, "bar") # will skip if magic delete flag is set - setting = imp.create_target_object(('foo',), {'name': 'foo', 'value': 'bar', - '__ignoreme__': True}) + setting = imp.create_target_object( + ("foo",), {"name": "foo", "value": "bar", "__ignoreme__": True} + ) self.assertIsNone(setting) def test_make_empty_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - obj = imp.make_empty_object(('foo',)) + obj = imp.make_empty_object(("foo",)) self.assertIsInstance(obj, model.Setting) - self.assertEqual(obj.name, 'foo') + self.assertEqual(obj.name, "foo") def test_make_object(self): model = self.app.model + + # normal imp = self.make_importer(model_class=model.Setting) obj = imp.make_object() self.assertIsInstance(obj, model.Setting) + # no model_class + imp = self.make_importer() + self.assertRaises(AttributeError, imp.make_object) + def test_update_target_object(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - setting = model.Setting(name='foo') + setting = model.Setting(name="foo") # basic logic for updating *new* object - obj = imp.update_target_object(setting, {'name': 'foo', 'value': 'bar'}) + obj = imp.update_target_object(setting, {"name": "foo", "value": "bar"}) self.assertIs(obj, setting) - self.assertEqual(setting.value, 'bar') + self.assertEqual(setting.value, "bar") + + def test_can_delete_object(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + setting = model.Setting(name="foo") + self.assertTrue(imp.can_delete_object(setting)) + + def test_delete_target_object(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting) + setting = model.Setting(name="foo") + # nb. default implementation always returns false + self.assertFalse(imp.delete_target_object(setting)) class TestFromFile(DataTestCase): @@ -293,20 +580,20 @@ class TestFromFile(DataTestCase): self.handler = ImportHandler(self.config) def make_importer(self, **kwargs): - kwargs.setdefault('handler', self.handler) + kwargs.setdefault("handler", self.handler) return mod.FromFile(self.config, **kwargs) def test_setup(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - with patch.object(imp, 'open_input_file') as open_input_file: + with patch.object(imp, "open_input_file") as open_input_file: imp.setup() open_input_file.assert_called_once_with() def test_teardown(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - with patch.object(imp, 'close_input_file') as close_input_file: + with patch.object(imp, "close_input_file") as close_input_file: imp.teardown() close_input_file.assert_called_once_with() @@ -315,13 +602,13 @@ class TestFromFile(DataTestCase): imp = self.make_importer(model_class=model.Setting) # path is guessed from dir+filename - path = self.write_file('data.txt', '') + path = self.write_file("data.txt", "") imp.input_file_dir = self.tempdir - imp.input_file_name = 'data.txt' + imp.input_file_name = "data.txt" self.assertEqual(imp.get_input_file_path(), path) # path can be explicitly set - path2 = self.write_file('data2.txt', '') + path2 = self.write_file("data2.txt", "") imp.input_file_path = path2 self.assertEqual(imp.get_input_file_path(), path2) @@ -344,8 +631,8 @@ class TestFromFile(DataTestCase): self.assertRaises(NotImplementedError, imp.get_input_file_name) # name can be explicitly set - imp.input_file_name = 'data.txt' - self.assertEqual(imp.get_input_file_name(), 'data.txt') + imp.input_file_name = "data.txt" + self.assertEqual(imp.get_input_file_name(), "data.txt") def test_open_input_file(self): model = self.app.model @@ -356,10 +643,10 @@ class TestFromFile(DataTestCase): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.txt', '') - with open(path, 'rt') as f: + path = self.write_file("data.txt", "") + with open(path, "rt") as f: imp.input_file = f - with patch.object(f, 'close') as close: + with patch.object(f, "close") as close: imp.close_input_file() close.assert_called_once_with() @@ -371,67 +658,89 @@ class TestToSqlalchemy(DataTestCase): self.handler = ImportHandler(self.config) def make_importer(self, **kwargs): - kwargs.setdefault('handler', self.handler) + kwargs.setdefault("handler", self.handler) return mod.ToSqlalchemy(self.config, **kwargs) - def test_get_target_object(self): - model = self.app.model - setting = model.Setting(name='foo', value='bar') - - # nb. must mock up a target cache for this one - imp = self.make_importer(model_class=model.Setting, caches_target=True) - imp.cached_target = { - ('foo',): { - 'object': setting, - 'data': {'name': 'foo', 'value': 'bar'}, - }, - } - - # returns same object - result = imp.get_target_object(('foo',)) - self.assertIs(result, setting) - - # and one more time just for kicks - result = imp.get_target_object(('foo',)) - self.assertIs(result, setting) - - # now let's put a 2nd setting in the db - setting2 = model.Setting(name='foo2', value='bar2') - self.session.add(setting2) - self.session.commit() - - # then we should be able to fetch that via query - imp.target_session = self.session - result = imp.get_target_object(('foo2',)) - self.assertIsInstance(result, model.Setting) - self.assertIs(result, setting2) - - # but sometimes it will not be found - result = imp.get_target_object(('foo3',)) - self.assertIsNone(result) - - def test_create_target_object(self): - model = self.app.model - imp = self.make_importer(model_class=model.Setting, target_session=self.session) - setting = model.Setting(name='foo', value='bar') - - # new object is added to session - setting = imp.create_target_object(('foo',), {'name': 'foo', 'value': 'bar'}) - self.assertIsInstance(setting, model.Setting) - self.assertEqual(setting.name, 'foo') - self.assertEqual(setting.value, 'bar') - self.assertIn(setting, self.session) - def test_get_target_objects(self): model = self.app.model imp = self.make_importer(model_class=model.Setting, target_session=self.session) - setting1 = model.Setting(name='foo', value='bar') + setting1 = model.Setting(name="foo", value="bar") self.session.add(setting1) - setting2 = model.Setting(name='foo2', value='bar2') + setting2 = model.Setting(name="foo2", value="bar2") self.session.add(setting2) self.session.commit() result = imp.get_target_objects() self.assertEqual(len(result), 2) self.assertEqual(set(result), {setting1, setting2}) + + def test_get_target_object(self): + model = self.app.model + setting = model.Setting(name="foo", value="bar") + + # nb. must mock up a target cache for this one + imp = self.make_importer(model_class=model.Setting, caches_target=True) + imp.cached_target = { + ("foo",): { + "object": setting, + "data": {"name": "foo", "value": "bar"}, + }, + } + + # returns same object + result = imp.get_target_object(("foo",)) + self.assertIs(result, setting) + + # and one more time just for kicks + result = imp.get_target_object(("foo",)) + self.assertIs(result, setting) + + # now let's put a 2nd setting in the db + setting2 = model.Setting(name="foo2", value="bar2") + self.session.add(setting2) + self.session.commit() + + # nb. disable target cache + with patch.multiple( + imp, create=True, target_session=self.session, caches_target=False + ): + + # now we should be able to fetch that via query + result = imp.get_target_object(("foo2",)) + self.assertIsInstance(result, model.Setting) + self.assertIs(result, setting2) + + # but sometimes it will not be found + result = imp.get_target_object(("foo3",)) + self.assertIsNone(result) + + def test_create_target_object(self): + model = self.app.model + imp = self.make_importer(model_class=model.Setting, target_session=self.session) + setting = model.Setting(name="foo", value="bar") + + # normal; new object is added to session + setting = imp.create_target_object(("foo",), {"name": "foo", "value": "bar"}) + self.assertIsInstance(setting, model.Setting) + self.assertEqual(setting.name, "foo") + self.assertEqual(setting.value, "bar") + self.assertIn(setting, self.session) + + # unwanted; parent class does not create the object + with patch.object(mod.Importer, "create_target_object", return_value=None): + setting = imp.create_target_object( + ("foo",), {"name": "foo", "value": "bar"} + ) + self.assertIsNone(setting) + + def test_delete_target_object(self): + model = self.app.model + + setting = model.Setting(name="foo", value="bar") + self.session.add(setting) + + self.assertEqual(self.session.query(model.Setting).count(), 1) + imp = self.make_importer(model_class=model.Setting, target_session=self.session) + imp.delete_target_object(setting) + self.assertEqual(self.session.query(model.Setting).count(), 0) diff --git a/tests/importing/test_csv.py b/tests/importing/test_csv.py index cf0a302..8544d63 100644 --- a/tests/importing/test_csv.py +++ b/tests/importing/test_csv.py @@ -1,11 +1,17 @@ -#-*- coding: utf-8; -*- +# -*- coding: utf-8; -*- import csv +import uuid as _uuid from unittest.mock import patch from wuttjamaican.testing import DataTestCase -from wuttasync.importing import csv as mod, ImportHandler, ToSqlalchemyHandler, ToSqlalchemy +from wuttasync.importing import ( + csv as mod, + ImportHandler, + ToSqlalchemyHandler, + ToSqlalchemy, +) class TestFromCsv(DataTestCase): @@ -14,8 +20,17 @@ class TestFromCsv(DataTestCase): self.setup_db() self.handler = ImportHandler(self.config) + self.data_path = self.write_file( + "data.txt", + """\ +name,value +foo,bar +foo2,bar2 +""", + ) + def make_importer(self, **kwargs): - kwargs.setdefault('handler', self.handler) + kwargs.setdefault("handler", self.handler) return mod.FromCsv(self.config, **kwargs) def test_get_input_file_name(self): @@ -23,83 +38,173 @@ class TestFromCsv(DataTestCase): imp = self.make_importer(model_class=model.Setting) # name can be guessed - self.assertEqual(imp.get_input_file_name(), 'Setting.csv') + self.assertEqual(imp.get_input_file_name(), "Setting.csv") # name can be explicitly set - imp.input_file_name = 'data.txt' - self.assertEqual(imp.get_input_file_name(), 'data.txt') + imp.input_file_name = "data.txt" + self.assertEqual(imp.get_input_file_name(), "data.txt") def test_open_input_file(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.txt', '') - imp.input_file_path = path + # normal operation, input file includes all fields + imp = self.make_importer( + model_class=model.Setting, input_file_path=self.data_path + ) + self.assertEqual(imp.fields, ["name", "value"]) imp.open_input_file() - self.assertEqual(imp.input_file.name, path) + self.assertEqual(imp.input_file.name, self.data_path) self.assertIsInstance(imp.input_reader, csv.DictReader) + self.assertEqual(imp.fields, ["name", "value"]) imp.input_file.close() + # this file is missing a field, plus we'll pretend more are + # supported - but should wind up with just the one field + missing = self.write_file("missing.txt", "name") + imp = self.make_importer(model_class=model.Setting, input_file_path=missing) + imp.fields.extend(["lots", "more"]) + self.assertEqual(imp.fields, ["name", "value", "lots", "more"]) + imp.open_input_file() + self.assertEqual(imp.fields, ["name"]) + imp.input_file.close() + + # and what happens when no known fields are found + bogus = self.write_file("bogus.txt", "blarg") + imp = self.make_importer(model_class=model.Setting, input_file_path=bogus) + self.assertEqual(imp.fields, ["name", "value"]) + self.assertRaises(ValueError, imp.open_input_file) + def test_close_input_file(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.txt', '') - imp.input_file_path = path + imp.input_file_path = self.data_path imp.open_input_file() imp.close_input_file() - self.assertFalse(hasattr(imp, 'input_reader')) - self.assertFalse(hasattr(imp, 'input_file')) + self.assertIsNone(imp.input_reader) + self.assertIsNone(imp.input_file) def test_get_source_objects(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.csv', """\ -name,value -foo,bar -foo2,bar2 -""") - imp.input_file_path = path + imp.input_file_path = self.data_path imp.open_input_file() objects = imp.get_source_objects() imp.close_input_file() self.assertEqual(len(objects), 2) - self.assertEqual(objects[0], {'name': 'foo', 'value': 'bar'}) - self.assertEqual(objects[1], {'name': 'foo2', 'value': 'bar2'}) + self.assertEqual(objects[0], {"name": "foo", "value": "bar"}) + self.assertEqual(objects[1], {"name": "foo2", "value": "bar2"}) -class MockMixinHandler(mod.FromCsvToSqlalchemyMixin, ToSqlalchemyHandler): - ToImporterBase = ToSqlalchemy +class MockMixinImporter(mod.FromCsvToSqlalchemyMixin, mod.FromCsv, ToSqlalchemy): + pass class TestFromCsvToSqlalchemyMixin(DataTestCase): + def setUp(self): + self.setup_db() + self.handler = ImportHandler(self.config) + + def make_importer(self, **kwargs): + kwargs.setdefault("handler", self.handler) + return MockMixinImporter(self.config, **kwargs) + + def test_constructor(self): + model = self.app.model + + # no uuid keys + imp = self.make_importer(model_class=model.Setting) + self.assertEqual(imp.uuid_keys, []) + + # typical + # nb. as of now Upgrade is the only table using proper UUID + imp = self.make_importer(model_class=model.Upgrade) + self.assertEqual(imp.uuid_keys, ["uuid"]) + + def test_normalize_source_object(self): + model = self.app.model + + # no uuid keys + imp = self.make_importer(model_class=model.Setting) + result = imp.normalize_source_object({"name": "foo", "value": "bar"}) + self.assertEqual(result, {"name": "foo", "value": "bar"}) + + # source has proper UUID + # nb. as of now Upgrade is the only table using proper UUID + imp = self.make_importer( + model_class=model.Upgrade, fields=["uuid", "description"] + ) + result = imp.normalize_source_object( + { + "uuid": _uuid.UUID("06753693-d892-77f0-8000-ce71bf7ebbba"), + "description": "testing", + } + ) + self.assertEqual( + result, + { + "uuid": _uuid.UUID("06753693-d892-77f0-8000-ce71bf7ebbba"), + "description": "testing", + }, + ) + + # source has string uuid + # nb. as of now Upgrade is the only table using proper UUID + imp = self.make_importer( + model_class=model.Upgrade, fields=["uuid", "description"] + ) + result = imp.normalize_source_object( + {"uuid": "06753693d89277f08000ce71bf7ebbba", "description": "testing"} + ) + self.assertEqual( + result, + { + "uuid": _uuid.UUID("06753693-d892-77f0-8000-ce71bf7ebbba"), + "description": "testing", + }, + ) + + +class MockMixinHandler(mod.FromCsvToSqlalchemyHandlerMixin, ToSqlalchemyHandler): + ToImporterBase = ToSqlalchemy + + +class TestFromCsvToSqlalchemyHandlerMixin(DataTestCase): + def make_handler(self, **kwargs): return MockMixinHandler(self.config, **kwargs) def test_get_target_model(self): - with patch.object(mod.FromCsvToSqlalchemyMixin, 'define_importers', return_value={}): + with patch.object( + mod.FromCsvToSqlalchemyHandlerMixin, "define_importers", return_value={} + ): handler = self.make_handler() self.assertRaises(NotImplementedError, handler.get_target_model) def test_define_importers(self): model = self.app.model - with patch.object(mod.FromCsvToSqlalchemyMixin, 'get_target_model', return_value=model): + with patch.object( + mod.FromCsvToSqlalchemyHandlerMixin, "get_target_model", return_value=model + ): handler = self.make_handler() importers = handler.define_importers() - self.assertIn('Setting', importers) - self.assertTrue(issubclass(importers['Setting'], mod.FromCsv)) - self.assertTrue(issubclass(importers['Setting'], ToSqlalchemy)) - self.assertIn('User', importers) - self.assertIn('Person', importers) - self.assertIn('Role', importers) + self.assertIn("Setting", importers) + self.assertTrue(issubclass(importers["Setting"], mod.FromCsv)) + self.assertTrue(issubclass(importers["Setting"], ToSqlalchemy)) + self.assertIn("User", importers) + self.assertIn("Person", importers) + self.assertIn("Role", importers) def test_make_importer_factory(self): model = self.app.model - with patch.object(mod.FromCsvToSqlalchemyMixin, 'define_importers', return_value={}): + with patch.object( + mod.FromCsvToSqlalchemyHandlerMixin, "define_importers", return_value={} + ): handler = self.make_handler() - factory = handler.make_importer_factory(model.Setting, 'Setting') + factory = handler.make_importer_factory(model.Setting, "Setting") self.assertTrue(issubclass(factory, mod.FromCsv)) self.assertTrue(issubclass(factory, ToSqlalchemy)) diff --git a/tests/importing/test_handlers.py b/tests/importing/test_handlers.py index 67d861f..9bd0157 100644 --- a/tests/importing/test_handlers.py +++ b/tests/importing/test_handlers.py @@ -1,4 +1,4 @@ -#-*- coding: utf-8; -*- +# -*- coding: utf-8; -*- from collections import OrderedDict from unittest.mock import patch @@ -17,34 +17,36 @@ class TestImportHandler(DataTestCase): handler = self.make_handler() self.assertEqual(str(handler), "None → None") - handler.source_title = 'CSV' - handler.target_title = 'Wutta' + handler.source_title = "CSV" + handler.target_title = "Wutta" self.assertEqual(str(handler), "CSV → Wutta") def test_actioning(self): handler = self.make_handler() - self.assertEqual(handler.actioning, 'importing') + self.assertEqual(handler.actioning, "importing") handler.orientation = mod.Orientation.EXPORT - self.assertEqual(handler.actioning, 'exporting') + self.assertEqual(handler.actioning, "exporting") def test_get_key(self): handler = self.make_handler() - self.assertEqual(handler.get_key(), 'to_None.from_None.import') + self.assertEqual(handler.get_key(), "to_None.from_None.import") - with patch.multiple(mod.ImportHandler, source_key='csv', target_key='wutta'): - self.assertEqual(handler.get_key(), 'to_wutta.from_csv.import') + with patch.multiple(mod.ImportHandler, source_key="csv", target_key="wutta"): + self.assertEqual(handler.get_key(), "to_wutta.from_csv.import") def test_get_spec(self): handler = self.make_handler() - self.assertEqual(handler.get_spec(), 'wuttasync.importing.handlers:ImportHandler') + self.assertEqual( + handler.get_spec(), "wuttasync.importing.handlers:ImportHandler" + ) def test_get_title(self): handler = self.make_handler() self.assertEqual(handler.get_title(), "None → None") - handler.source_title = 'CSV' - handler.target_title = 'Wutta' + handler.source_title = "CSV" + handler.target_title = "Wutta" self.assertEqual(handler.get_title(), "CSV → Wutta") def test_get_source_title(self): @@ -54,16 +56,16 @@ class TestImportHandler(DataTestCase): self.assertIsNone(handler.get_source_title()) # which is really using source_key as fallback - handler.source_key = 'csv' - self.assertEqual(handler.get_source_title(), 'csv') + handler.source_key = "csv" + self.assertEqual(handler.get_source_title(), "csv") # can also use (defined) generic fallback - handler.generic_source_title = 'CSV' - self.assertEqual(handler.get_source_title(), 'CSV') + handler.generic_source_title = "CSV" + self.assertEqual(handler.get_source_title(), "CSV") # or can set explicitly - handler.source_title = 'XXX' - self.assertEqual(handler.get_source_title(), 'XXX') + handler.source_title = "XXX" + self.assertEqual(handler.get_source_title(), "XXX") def test_get_target_title(self): handler = self.make_handler() @@ -72,23 +74,23 @@ class TestImportHandler(DataTestCase): self.assertIsNone(handler.get_target_title()) # which is really using target_key as fallback - handler.target_key = 'wutta' - self.assertEqual(handler.get_target_title(), 'wutta') + handler.target_key = "wutta" + self.assertEqual(handler.get_target_title(), "wutta") # can also use (defined) generic fallback - handler.generic_target_title = 'Wutta' - self.assertEqual(handler.get_target_title(), 'Wutta') + handler.generic_target_title = "Wutta" + self.assertEqual(handler.get_target_title(), "Wutta") # or can set explicitly - handler.target_title = 'XXX' - self.assertEqual(handler.get_target_title(), 'XXX') + handler.target_title = "XXX" + self.assertEqual(handler.get_target_title(), "XXX") def test_process_data(self): model = self.app.model handler = self.make_handler() # empy/no-op should commit (not fail) - with patch.object(handler, 'commit_transaction') as commit_transaction: + with patch.object(handler, "commit_transaction") as commit_transaction: handler.process_data() commit_transaction.assert_called_once_with() @@ -96,8 +98,8 @@ class TestImportHandler(DataTestCase): handler.process_data() # dry-run should rollback - with patch.object(handler, 'commit_transaction') as commit_transaction: - with patch.object(handler, 'rollback_transaction') as rollback_transaction: + with patch.object(handler, "commit_transaction") as commit_transaction: + with patch.object(handler, "rollback_transaction") as rollback_transaction: handler.process_data(dry_run=True) self.assertFalse(commit_transaction.called) rollback_transaction.assert_called_once_with() @@ -106,36 +108,38 @@ class TestImportHandler(DataTestCase): handler.process_data(dry_run=True) # outright error should cause rollback - with patch.object(handler, 'commit_transaction') as commit_transaction: - with patch.object(handler, 'rollback_transaction') as rollback_transaction: - with patch.object(handler, 'get_importer', side_effect=RuntimeError): - self.assertRaises(RuntimeError, handler.process_data, 'BlahBlah') + with patch.object(handler, "commit_transaction") as commit_transaction: + with patch.object(handler, "rollback_transaction") as rollback_transaction: + with patch.object(handler, "get_importer", side_effect=RuntimeError): + self.assertRaises(RuntimeError, handler.process_data, "BlahBlah") self.assertFalse(commit_transaction.called) rollback_transaction.assert_called_once_with() # fake importer class/data - mock_source_objects = [{'name': 'foo', 'value': 'bar'}] + mock_source_objects = [{"name": "foo", "value": "bar"}] + class SettingImporter(ToSqlalchemy): model_class = model.Setting target_session = self.session + def get_source_objects(self): return mock_source_objects # now for a "normal" one - handler.importers['Setting'] = SettingImporter + handler.importers["Setting"] = SettingImporter self.assertEqual(self.session.query(model.Setting).count(), 0) - handler.process_data('Setting') + handler.process_data("Setting") self.assertEqual(self.session.query(model.Setting).count(), 1) # then add another mock record - mock_source_objects.append({'name': 'foo2', 'value': 'bar2'}) - handler.process_data('Setting') + mock_source_objects.append({"name": "foo2", "value": "bar2"}) + handler.process_data("Setting") self.assertEqual(self.session.query(model.Setting).count(), 2) # nb. even if dry-run, record is added # (rollback would happen later in that case) - mock_source_objects.append({'name': 'foo3', 'value': 'bar3'}) - handler.process_data('Setting', dry_run=True) + mock_source_objects.append({"name": "foo3", "value": "bar3"}) + handler.process_data("Setting", dry_run=True) self.assertEqual(self.session.query(model.Setting).count(), 3) def test_consume_kwargs(self): @@ -148,10 +152,10 @@ class TestImportHandler(DataTestCase): # captures dry-run flag self.assertFalse(handler.dry_run) - kw['dry_run'] = True + kw["dry_run"] = True result = handler.consume_kwargs(kw) self.assertIs(result, kw) - self.assertTrue(kw['dry_run']) + self.assertTrue(kw["dry_run"]) self.assertTrue(handler.dry_run) def test_define_importers(self): @@ -165,12 +169,48 @@ class TestImportHandler(DataTestCase): handler = self.make_handler() # normal - handler.importers['Setting'] = Importer - importer = handler.get_importer('Setting', model_class=model.Setting) + handler.importers["Setting"] = Importer + importer = handler.get_importer("Setting", model_class=model.Setting) + self.assertIsInstance(importer, Importer) + + # specifying empty keys + handler.importers["Setting"] = Importer + importer = handler.get_importer("Setting", model_class=model.Setting, keys=None) + self.assertIsInstance(importer, Importer) + importer = handler.get_importer("Setting", model_class=model.Setting, keys="") + self.assertIsInstance(importer, Importer) + importer = handler.get_importer("Setting", model_class=model.Setting, keys=[]) self.assertIsInstance(importer, Importer) # key not found - self.assertRaises(KeyError, handler.get_importer, 'BunchOfNonsense', model_class=model.Setting) + self.assertRaises( + KeyError, handler.get_importer, "BunchOfNonsense", model_class=model.Setting + ) + + +class TestFromFileHandler(DataTestCase): + + def make_handler(self, **kwargs): + return mod.FromFileHandler(self.config, **kwargs) + + def test_process_data(self): + handler = self.make_handler() + path = self.write_file("data.txt", "") + with patch.object(mod.ImportHandler, "process_data") as process_data: + + # bare + handler.process_data() + process_data.assert_called_once_with() + + # with file path + process_data.reset_mock() + handler.process_data(input_file_path=path) + process_data.assert_called_once_with(input_file_path=path) + + # with folder + process_data.reset_mock() + handler.process_data(input_file_path=self.tempdir) + process_data.assert_called_once_with(input_file_dir=self.tempdir) class TestToSqlalchemyHandler(DataTestCase): @@ -180,7 +220,7 @@ class TestToSqlalchemyHandler(DataTestCase): def test_begin_target_transaction(self): handler = self.make_handler() - with patch.object(handler, 'make_target_session') as make_target_session: + with patch.object(handler, "make_target_session") as make_target_session: make_target_session.return_value = self.session self.assertIsNone(handler.target_session) handler.begin_target_transaction() @@ -188,7 +228,7 @@ class TestToSqlalchemyHandler(DataTestCase): def test_rollback_target_transaction(self): handler = self.make_handler() - with patch.object(handler, 'make_target_session') as make_target_session: + with patch.object(handler, "make_target_session") as make_target_session: make_target_session.return_value = self.session self.assertIsNone(handler.target_session) handler.begin_target_transaction() @@ -198,7 +238,7 @@ class TestToSqlalchemyHandler(DataTestCase): def test_commit_target_transaction(self): handler = self.make_handler() - with patch.object(handler, 'make_target_session') as make_target_session: + with patch.object(handler, "make_target_session") as make_target_session: make_target_session.return_value = self.session self.assertIsNone(handler.target_session) handler.begin_target_transaction() @@ -213,6 +253,6 @@ class TestToSqlalchemyHandler(DataTestCase): def test_get_importer_kwargs(self): handler = self.make_handler() handler.target_session = self.session - kw = handler.get_importer_kwargs('Setting') - self.assertIn('target_session', kw) - self.assertIs(kw['target_session'], self.session) + kw = handler.get_importer_kwargs("Setting") + self.assertIn("target_session", kw) + self.assertIs(kw["target_session"], self.session) diff --git a/tests/importing/test_model.py b/tests/importing/test_model.py index ea74a43..d27abc2 100644 --- a/tests/importing/test_model.py +++ b/tests/importing/test_model.py @@ -1,3 +1,3 @@ -#-*- coding: utf-8; -*- +# -*- coding: utf-8; -*- from wuttasync.importing import model as mod diff --git a/tests/importing/test_wutta.py b/tests/importing/test_wutta.py index ec5df50..4d6fdd2 100644 --- a/tests/importing/test_wutta.py +++ b/tests/importing/test_wutta.py @@ -1,4 +1,4 @@ -#-*- coding: utf-8; -*- +# -*- coding: utf-8; -*- from unittest.mock import patch @@ -16,22 +16,22 @@ class TestToWuttaHandler(DataTestCase): handler = self.make_handler() # uses app title by default - self.config.setdefault('wutta.app_title', "What About This") - self.assertEqual(handler.get_target_title(), 'What About This') + self.config.setdefault("wutta.app_title", "What About This") + self.assertEqual(handler.get_target_title(), "What About This") # or generic default if present handler.generic_target_title = "WHATABOUTTHIS" - self.assertEqual(handler.get_target_title(), 'WHATABOUTTHIS') + self.assertEqual(handler.get_target_title(), "WHATABOUTTHIS") # but prefer specific title if present handler.target_title = "what_about_this" - self.assertEqual(handler.get_target_title(), 'what_about_this') + self.assertEqual(handler.get_target_title(), "what_about_this") def test_make_target_session(self): handler = self.make_handler() # makes "new" (mocked in our case) app session - with patch.object(self.app, 'make_session') as make_session: + with patch.object(self.app, "make_session") as make_session: make_session.return_value = self.session session = handler.make_target_session() make_session.assert_called_once_with() diff --git a/tests/test_util.py b/tests/test_util.py index fc0476c..4b01777 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -8,22 +8,24 @@ from wuttasync import util as mod class TestDataDiffs(TestCase): def test_source_missing_field(self): - source = {'foo': 'bar'} - target = {'baz': 'xyz', 'foo': 'bar'} + source = {"foo": "bar"} + target = {"baz": "xyz", "foo": "bar"} self.assertRaises(KeyError, mod.data_diffs, source, target) def test_target_missing_field(self): - source = {'foo': 'bar', 'baz': 'xyz'} - target = {'baz': 'xyz'} - self.assertRaises(KeyError, mod.data_diffs, source, target, fields=['foo', 'baz']) + source = {"foo": "bar", "baz": "xyz"} + target = {"baz": "xyz"} + self.assertRaises( + KeyError, mod.data_diffs, source, target, fields=["foo", "baz"] + ) def test_no_diffs(self): - source = {'foo': 'bar', 'baz': 'xyz'} - target = {'baz': 'xyz', 'foo': 'bar'} + source = {"foo": "bar", "baz": "xyz"} + target = {"baz": "xyz", "foo": "bar"} self.assertFalse(mod.data_diffs(source, target)) def test_with_diffs(self): - source = {'foo': 'bar', 'baz': 'xyz'} - target = {'baz': 'xyz', 'foo': 'BAR'} + source = {"foo": "bar", "baz": "xyz"} + target = {"baz": "xyz", "foo": "BAR"} result = mod.data_diffs(source, target) - self.assertEqual(result, ['foo']) + self.assertEqual(result, ["foo"]) diff --git a/tox.ini b/tox.ini index 78d41eb..a9472fb 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,10 @@ envlist = py38, py39, py310, py311 extras = tests commands = pytest {posargs} +[testenv:pylint] +basepython = python3.11 +commands = pylint wuttasync + [testenv:coverage] basepython = python3.11 commands = pytest --cov=wuttasync --cov-report=html --cov-fail-under=100