From b3e4e91df81ada6137f2b83cef358770f68ede2a Mon Sep 17 00:00:00 2001 From: Lance Edgar Date: Sat, 7 Dec 2024 18:14:11 -0600 Subject: [PATCH] docs: add some narrative docs to explain basic concepts still needs a lot of work i'm sure..gotta start somewhere --- docs/glossary.rst | 13 +-- docs/index.rst | 10 +- docs/narr/{cli.rst => cli/builtin.rst} | 10 +- docs/narr/cli/custom.rst | 64 +++++++++++ docs/narr/cli/index.rst | 23 ++++ docs/narr/concepts.rst | 54 +++++++++ docs/narr/custom/command.rst | 9 ++ docs/narr/custom/conventions.rst | 90 +++++++++++++++ docs/narr/custom/handler.rst | 93 +++++++++++++++ docs/narr/custom/importer.rst | 149 +++++++++++++++++++++++++ docs/narr/custom/index.rst | 21 ++++ 11 files changed, 522 insertions(+), 14 deletions(-) rename docs/narr/{cli.rst => cli/builtin.rst} (74%) create mode 100644 docs/narr/cli/custom.rst create mode 100644 docs/narr/cli/index.rst create mode 100644 docs/narr/concepts.rst create mode 100644 docs/narr/custom/command.rst create mode 100644 docs/narr/custom/conventions.rst create mode 100644 docs/narr/custom/handler.rst create mode 100644 docs/narr/custom/importer.rst create mode 100644 docs/narr/custom/index.rst diff --git a/docs/glossary.rst b/docs/glossary.rst index 9bf2b30..c58e3d6 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -12,20 +12,19 @@ Glossary The import handler manages data connections and transactions, and invokes one or more :term:`importers ` to process the - data. + data. See also :ref:`import-handler-vs-importer`. Note that "import/export handler" is the more proper term to use here but it is often shortened to just "import handler" for convenience. importer - In the context of WuttaSync, this refers to a type of object - which can process data for an import/export job, i.e. create, - update or delete records on the "target" based on the "source" - data it reads. + This refers to a Python class/instance responsible for processing + a particular :term:`data model` for an import/export job. - See also :term:`import handler` which can "contain" one or more - importers. + For instance there is usually one importer per table, when + importing to the :term:`app database` (regardless of source). + See also :ref:`import-handler-vs-importer`. Note that "importer/exporter" is the more proper term to use here but it is often shortened to just "importer" for convenience. diff --git a/docs/index.rst b/docs/index.rst index c8bc0cb..ea00f77 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,9 +5,11 @@ WuttaSync This package adds data import/export and real-time sync utilities for the `Wutta Framework `_. -The primary use cases here are: +*(NB. the real-time sync has not been added yet.)* -* keep "operational" data in sync between e.g. various business systems +The primary use cases in mind are: + +* keep operational data in sync between various business systems * import data from user-specified file * export to file @@ -24,7 +26,9 @@ database`, it may be used for any "source → target" data flow. glossary narr/install - narr/cli + narr/cli/index + narr/concepts + narr/custom/index .. toctree:: :maxdepth: 1 diff --git a/docs/narr/cli.rst b/docs/narr/cli/builtin.rst similarity index 74% rename from docs/narr/cli.rst rename to docs/narr/cli/builtin.rst index 4bee64d..0630c94 100644 --- a/docs/narr/cli.rst +++ b/docs/narr/cli/builtin.rst @@ -1,10 +1,12 @@ -Built-in Commands -================= +=================== + Built-in Commands +=================== -WuttaSync adds some built-in ``wutta`` :term:`subcommands `. +Below are the :term:`subcommands ` which come with +WuttaSync. -See also :doc:`wuttjamaican:narr/cli/index`. +It is fairly simple to add more; see :doc:`custom`. .. _wutta-import-csv: diff --git a/docs/narr/cli/custom.rst b/docs/narr/cli/custom.rst new file mode 100644 index 0000000..837a70c --- /dev/null +++ b/docs/narr/cli/custom.rst @@ -0,0 +1,64 @@ + +================= + Custom Commands +================= + +This section describes how to add a custom :term:`subcommand` which +wraps a particular :term:`import handler`. + +See also :doc:`wuttjamaican:narr/cli/custom` for more information +on the general concepts etc. + + +Basic Import/Export +------------------- + +Here we'll assume you have a typical "Poser" app based on Wutta +Framework, and the "Foo → Poser" (``FromFooToPoser`` handler) import +logic is defined in the ``poser.importing.foo`` module. + +We'll also assume you already have a ``poser`` top-level +:term:`command` (in ``poser.cli``), and our task now is to add the +``poser import-foo`` subcommand to wrap the import handler. + +And finally we'll assume this is just a "typical" import handler and +we do not need any custom CLI params exposed. + +Here is the code and we'll explain below:: + + from poser.cli import poser_typer + from wuttasync.cli import import_command, ImportCommandHandler + + @poser_typer.command() + @import_command + def import_foo(ctx, **kwargs): + """ + Import data from Foo API to Poser DB + """ + config = ctx.parent.wutta_config + handler = ImportCommandHandler( + config, import_handler='poser.importing.foo:FromFooToPoser') + handler.run(ctx.params) + +Hopefully it's straightforward but to be clear: + +* subcommand is really just a function, **with desired name** +* wrap with ``@poser_typer.command()`` to register as subcomand +* wrap with ``@import_command`` to get typical CLI params +* call ``ImportCommandHandler.run()`` with import handler spec + +So really - in addition to +:func:`~wuttasync.cli.base.import_command()` - the +:class:`~wuttasync.cli.base.ImportCommandHandler` is doing the heavy +lifting for all import/export subcommands, it just needs to know which +:term:`import handler` to use. + +.. note:: + + If your new subcommand is defined in a different module than is the + top-level command (e.g. as in example above) then you may need to + "eagerly" import the subcommand module. (Otherwise auto-discovery + may not find it.) + + This is usually done from within the top-level command's module, + since it is always imported early due to the entry point. diff --git a/docs/narr/cli/index.rst b/docs/narr/cli/index.rst new file mode 100644 index 0000000..96be6c7 --- /dev/null +++ b/docs/narr/cli/index.rst @@ -0,0 +1,23 @@ + +======================== + Command Line Interface +======================== + +The primary way of using the import/export framework day to day is via +the command line. + +WuttJamaican defines the ``wutta`` :term:`command` and WuttaSync comes +with some extra :term:`subcommands ` for importing to / +exporting from the Wutta :term:`app database`. + +It is fairly simple to add a dedicated subcommand for any +:term:`import handler`; see below. + +And for more general info about CLI see +:doc:`wuttjamaican:narr/cli/index`. + +.. toctree:: + :maxdepth: 2 + + builtin + custom diff --git a/docs/narr/concepts.rst b/docs/narr/concepts.rst new file mode 100644 index 0000000..93d09a3 --- /dev/null +++ b/docs/narr/concepts.rst @@ -0,0 +1,54 @@ + +Concepts +======== + +Things hopefully are straightforward but it's important to get the +following straight in your head; the rest will come easier if you do. + + +Source vs. Target +----------------- + +Data always flows from source to target, it is the #1 rule. + +Docs and command output will always reflect this, e.g. **CSV → +Wutta**. + +Source and target can be anything as long as the :term:`import +handler` and :term:`importer(s) ` implement the desired +logic. The :term:`app database` is often involved but not always. + + +Import vs. Export +----------------- + +Surprise, there is no difference. After all from target's perspective +everything is really an import. + +Sometimes it's more helpful to think of it as an export, e.g. **Wutta +→ CSV** really seems like an export. In such cases the +:attr:`~wuttasync.importing.handlers.ImportHandler.orientation` may be +set to reflect the distinction. + + +.. _import-handler-vs-importer: + +Import Handler vs. Importer +--------------------------- + +The :term:`import handler` is sort of the "wrapper" around one or more +:term:`importers ` and the latter contain the table-specific +sync logic. + +In a DB or similar context, the import handler will make the +connection, then invoke all requested importers, then commit +transaction at the end (or rollback if dry-run). + +And each importer will read data from source, and usually also read +data from target, then compare data sets and finally write data to +target as needed. But each would usually do this for just one table. + +See also the base classes for each: + +* :class:`~wuttasync.importing.handlers.ImportHandler` +* :class:`~wuttasync.importing.base.Importer` diff --git a/docs/narr/custom/command.rst b/docs/narr/custom/command.rst new file mode 100644 index 0000000..39eaeae --- /dev/null +++ b/docs/narr/custom/command.rst @@ -0,0 +1,9 @@ + +Define Command +============== + +Now that you have defined the import handler plus any importers +required, you'll want to define a command line interface to use it. + +This section is here for completeness but the process is described +elsewhere; see :doc:`/narr/cli/custom`. diff --git a/docs/narr/custom/conventions.rst b/docs/narr/custom/conventions.rst new file mode 100644 index 0000000..3ce686a --- /dev/null +++ b/docs/narr/custom/conventions.rst @@ -0,0 +1,90 @@ + +Conventions +=========== + +Below are recommended conventions for structuring and naming the files +in your project relating to import/export. + +The intention for these rules is that they are "intuitive" based on +the fact that all data flows from source to target and therefore can +be thought of as "importing" in virtually all cases. + +But there are a lot of edge cases out there so YMMV. + + +"The Rules" +----------- + +There are exceptions to these of course, but in general: + +* regarding how to think about these conventions: + + * always look at it from target's perspective + + * always look at it as an *import*, not export + +* "final" logic is always a combo of: + + * "base" logic for how target data read/write happens generally + + * "specific" logic for how that happens using a particular data source + +* targets each get their own subpackage within project + + * and within that, also an ``importing`` (nested) subpackage + + * and within *that* is where the files live, referenced next + + * target ``model.py`` should contain ``ToTarget`` importer base class + + * also may have misc. per-model base classes, e.g. ``WidgetImporter`` + + * also may have ``ToTargetHandler`` base class if applicable + + * sources each get their own module, named after the source + + * should contain the "final" handler class, e.g. ``FromSourceToTarget`` + + * also contains "final" importer classes needed by handler (e.g. ``WidgetImporter``) + + +Example +------- + +That's a lot of rules so let's see it. Here we assume a Wutta-based +app named Poser and it integrates with a Foo API in the cloud. Data +should flow both ways so we will be thinking of this as: + +* **Foo → Poser import** +* **Poser → Foo export** + +Here is the suggested file layout: + +.. code-block:: none + + poser/ + ├── foo/ + │ ├── __init__.py + │ ├── api.py + │ └── importing/ + │ ├── __init__.py + │ ├── model.py + │ └── poser.py + └── importing/ + ├── __init__.py + ├── foo.py + └── model.py + +And the module breakdown: + +* ``poser.foo.api`` has e.g. ``FooAPI`` interface logic + +**Foo → Poser import** (aka. "Poser imports from Foo") + +* ``poser.importing.model`` has ``ToPoserHandler``, ``ToPoser`` and per-model base importers +* ``poser.importing.foo`` has ``FromFooToPoser`` plus final importers + +**Poser → Foo export** (aka. "Foo imports from Poser") + +* ``poser.foo.importing.model`` has ``ToFooHandler``, ``ToFoo`` and per-model base importer +* ``poser.foo.importing.poser`` has ``FromPoserToFoo`` plus final importers diff --git a/docs/narr/custom/handler.rst b/docs/narr/custom/handler.rst new file mode 100644 index 0000000..cb2b74d --- /dev/null +++ b/docs/narr/custom/handler.rst @@ -0,0 +1,93 @@ + +Define Import Handler +===================== + +The obvious step here is to define a new :term:`import handler`, which +ultimately inherits from +:class:`~wuttasync.importing.handlers.ImportHandler`. But the choice +of which class(es) *specifically* to inherit from, is a bit more +complicated. + + +Choose the Base Class(es) +------------------------- + +If all else fails, or to get started simply, you can always just +inherit from :class:`~wuttasync.importing.handlers.ImportHandler` +directly as the only base class. You'll have to define any methods +needed to implement desired behavior. + +However depending on your particular source and/or target, there may +be existing base classes defined somewhere from which you can inherit. +This may save you some effort, and/or is just a good idea to share +code where possible. + +Keep in mind your import handler can inherit from multiple base +classes, and often will - one base for the source side, and another +for the target side. For instance:: + + from wuttasync.importing import FromFileHandler, ToWuttaHandler + + class FromExcelToPoser(FromFileHandler, ToWuttaHandler): + """ + Handler for Excel file → Poser app DB + """ + +You generally will still need to define/override some methods to +customize behavior. + +All built-in base classes live under :mod:`wuttasync.importing`. + + +.. _register-importer: + +Register Importer(s) +-------------------- + +If nothing else, most custom handlers must override +:meth:`~wuttasync.importing.handlers.ImportHandler.define_importers()` +to "register" importer(s) as appropriate. There are two primary goals +here: + +* add "new" (totally custom) importers +* override "existing" importers (inherited from base class) + +Obviously for this to actually work the importer(s) must exist in +code; see :doc:`importer`. + +As an example let's say there's a ``FromFooToWutta`` handler which +defines a ``Widget`` importer. + +And let's say you want to customize that, by tweaking slightly the +logic for ``WigdetImporter`` and adding a new ``SprocketImporter``:: + + from somewhere_else import (FromFooToWutta, ToWutta, + WidgetImporter as WidgetImporterBase) + + class FromFooToPoser(FromFooToWutta): + """ + Handler for Foo -> Poser + """ + + def define_importers(self): + + # base class defines the initial set + importers = super().define_importers() + + # override widget importer + importers['Widget'] = WidgetImporter + + # add sprocket importer + importers['Sprocket'] = SprocketImporter + + return importers + + class SprocketImporter(ToWutta): + """ + Sprocket importer for Foo -> Poser + """ + + class WidgetImporter(WidgetImporterBase): + """ + Widget importer for Foo -> Poser + """ diff --git a/docs/narr/custom/importer.rst b/docs/narr/custom/importer.rst new file mode 100644 index 0000000..c9b6674 --- /dev/null +++ b/docs/narr/custom/importer.rst @@ -0,0 +1,149 @@ + +Define Importer(s) +================== + +Here we'll describe how to make a custom :term:`importer/exporter +`, which can process a given :term:`data model`. + +.. + The example will assume a **Foo → Poser import** for the ``Widget`` + :term:`data model`. + + +Choose the Base Class(es) +------------------------- + +As with the :term:`import handler`, the importer "usually" will have +two base classes: one for the target side and another for the source. + +The base class for target side is generally more fleshed out, with +logic to read/write data for the given target model. Whereas the base +class for the source side could just be a stub. In the latter case, +one might choose to skip it and inherit only from the target base +class. + +In any case the final importer class you define can override any/all +logic from either base class if needed. + + +Example: Foo → Poser import +--------------------------- + +Here we'll assume a Wutta-based app named "Poser" which will be +importing "Widget" data from the "Foo API" cloud service. + +In this case we will inherit from a base class for the target side, +which already knows how to talk to the :term:`app database` via +SQLAlchemy ORM. + +But for the source side, there is no existing base class for the Foo +API service, since that is just made-up - so we will also define our +own base class for that:: + + from wuttasync.importing import Importer, ToWutta + + # nb. this is not real of course, but an example + from poser.foo.api import FooAPI + + class FromFoo(Importer): + """ + Base class for importers using Foo API as source + """ + + def setup(self): + """ + Establish connection to Foo API + """ + self.foo_api = FooAPI(self.config) + + class WidgetImporter(FromFoo, ToWutta): + """ + Widget importer for Foo -> Poser + """ + + def get_source_objects(self): + """ + Fetch all "raw" widgets from Foo API + """ + # nb. also not real, just example + return self.foo_api.get_widgets() + + def normalize_source_object(self, widget): + """ + Convert the "raw" widget we receive from Foo API, to a + "normalized" dict with data for all fields which are part of + the processing request. + """ + return { + 'id': widget.id, + 'name': widget.name, + } + + +Example: Poser → Foo export +--------------------------- + +In the previous scenario we imported data from Foo to Poser, and here +we'll do the reverse, exporting from Poser to Foo. + +As of writing the base class logic for exporting from Wutta :term:`app +database` does not yet exist. And the Foo API is just made-up so +we'll add one-off base classes for both sides:: + + from wuttasync.importing import Importer + + class FromWutta(Importer): + """ + Base class for importers using Wutta DB as source + """ + + class ToFoo(Importer): + """ + Base class for exporters targeting Foo API + """ + + class WidgetImporter(FromWutta, ToFoo): + """ + Widget exporter for Poser -> Foo + """ + + def get_source_objects(self): + """ + Fetch all widgets from the Poser app DB. + + (see note below regarding the db session) + """ + model = self.app.model + return self.source_session.query(model.Widget).all() + + def normalize_source_object(self, widget): + """ + Convert the "raw" widget from Poser app (ORM) to a + "normalized" dict with data for all fields which are part of + the processing request. + """ + return { + 'id': widget.id, + 'name': widget.name, + } + +Note that the ``get_source_objects()`` method shown above makes use of +a ``source_session`` attribute - where did that come from? + +This is actually not part of the importer proper, but rather this +attribute is set by the :term:`import handler`. And that will ony +happen if the importer is being invoked by a handler which supports +it. So none of that is shown here, but FYI. + +(And again, that logic isn't written yet, but there will "soon" be a +``FromSqlalchemyHandler`` class defined which implements this.) + + +Regster with Import Handler +--------------------------- + +After you define the importer/exporter class (as shown above) you also +must "register" it within the import/export handler. + +This section is here for completeness but the process is described +elsewhere; see :ref:`register-importer`. diff --git a/docs/narr/custom/index.rst b/docs/narr/custom/index.rst new file mode 100644 index 0000000..7e75146 --- /dev/null +++ b/docs/narr/custom/index.rst @@ -0,0 +1,21 @@ + +Custom Import/Export +==================== + +This section explains what's required to make your own import/export +tasks. + +See also :doc:`/narr/concepts` for some terminology etc. + +.. + The examples throughout the sections below will often involve a + theoretical **Foo → Poser** import, where Poser is a typical + Wutta-based app and Foo is some API in the cloud. + +.. toctree:: + :maxdepth: 2 + + conventions + handler + importer + command