feat: add wutta import-csv command

2024-12-05 21:19:06 -06:00 · 2024-12-05 21:19:06 -06:00 · f43a066341
commit f43a066341
parent 84a8beaf46
19 changed files with 500 additions and 15 deletions
--- a/docs/api/wuttasync.cli.base.rst
+++ b/docs/api/wuttasync.cli.base.rst
@ -0,0 +1,6 @@
 ``wuttasync.cli.base``
 ======================
 .. automodule:: wuttasync.cli.base
   :members:
--- a/docs/api/wuttasync.cli.import_csv.rst
+++ b/docs/api/wuttasync.cli.import_csv.rst
@ -0,0 +1,6 @@
 ``wuttasync.cli.import_csv``
 ============================
 .. automodule:: wuttasync.cli.import_csv
   :members:
--- a/docs/api/wuttasync.cli.rst
+++ b/docs/api/wuttasync.cli.rst
@ -0,0 +1,6 @@
 ``wuttasync.cli``
 =================
 .. automodule:: wuttasync.cli
   :members:
--- a/docs/conf.py
+++ b/docs/conf.py
@ -22,6 +22,7 @@ extensions = [
    'sphinx.ext.viewcode',
    'sphinx.ext.todo',
    'enum_tools.autoenum',
    'sphinxcontrib.programoutput',
 ]
 templates_path = ['_templates']
--- a/docs/index.rst
+++ b/docs/index.rst
@ -5,6 +5,15 @@ WuttaSync
 This package adds data import/export and real-time sync utilities for
 the `Wutta Framework <https://wuttaproject.org>`_.
 The primary use cases here are:
 * keep "operational" data in sync between e.g. various business systems
 * import data from user-specified file
 * export to file
 This isn't really meant to replace typical ETL tools; it is smaller
 scale and (hopefully) more flexible.
 While it of course supports import/export to/from the Wutta :term:`app
 database`, it may be used for any "source → target" data flow.
@ -14,12 +23,16 @@ database`, it may be used for any "source → target" data flow.
   :caption: Documentation
   narr/install
   narr/cli
 .. toctree::
   :maxdepth: 1
   :caption: API
   api/wuttasync
   api/wuttasync.cli
   api/wuttasync.cli.base
   api/wuttasync.cli.import_csv
   api/wuttasync.importing
   api/wuttasync.importing.base
   api/wuttasync.importing.csv
--- a/docs/narr/cli.rst
+++ b/docs/narr/cli.rst
@ -0,0 +1,25 @@
 Built-in Commands
 =================
 WuttaSync adds some built-in ``wutta`` :term:`subcommands <subcommand>`.
 See also :doc:`wuttjamaican:narr/cli/index`.
 .. _wutta-import-csv:
 ``wutta import-csv``
 --------------------
 Import data from CSV file(s) to the Wutta :term:`app database`.
 This *should* be able to automatically target any table mapped in the
 :term:`app model`.  The only caveat is that it is "dumb" and does not
 have any special field handling.  This means the column headers in the
 CSV file must be named the same as in the target table, and some data
 types may not behave as expected etc.
 Defined in: :mod:`wuttasync.cli.import_csv`
 .. program-output:: wutta import-csv --help
--- a/pyproject.toml
+++ b/pyproject.toml
@ -26,16 +26,21 @@ classifiers = [
 ]
 requires-python = ">= 3.8"
 dependencies = [
        "makefun",
        "SQLAlchemy-Utils",
        "WuttJamaican[db]",
 ]
 [project.optional-dependencies]
-docs = ["Sphinx", "enum-tools[sphinx]", "furo"]
+docs = ["Sphinx", "enum-tools[sphinx]", "furo", "sphinxcontrib-programoutput"]
 tests = ["pytest-cov", "tox"]
 [project.entry-points."wutta.typer_imports"]
 wuttasync = "wuttasync.cli"
 [project.urls]
 Homepage = "https://wuttaproject.org/"
 Repository = "https://forgejo.wuttaproject.org/wutta/wuttasync"
--- a/src/wuttasync/cli/init.py
+++ b/src/wuttasync/cli/init.py
@ -0,0 +1,35 @@
 # -*- coding: utf-8; -*-
 ################################################################################
 #
 #  WuttaSync -- Wutta Framework for data import/export and real-time sync
 #  Copyright © 2024 Lance Edgar
 #
 #  This file is part of Wutta Framework.
 #
 #  Wutta Framework is free software: you can redistribute it and/or modify it
 #  under the terms of the GNU General Public License as published by the Free
 #  Software Foundation, either version 3 of the License, or (at your option) any
 #  later version.
 #
 #  Wutta Framework is distributed in the hope that it will be useful, but
 #  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 #  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 #  more details.
 #
 #  You should have received a copy of the GNU General Public License along with
 #  Wutta Framework.  If not, see <http://www.gnu.org/licenses/>.
 #
 ################################################################################
 """
 WuttaSync - ``wutta`` subcommands
 This namespace exposes the following:
 * :func:`~wuttasync.cli.base.importer_command()`
 * :func:`~wuttasync.cli.base.file_importer_command()`
 """
 from .base import importer_command, file_importer_command
 # nb. must bring in all modules for discovery to work
 from . import import_csv
--- a/src/wuttasync/cli/base.py
+++ b/src/wuttasync/cli/base.py
@ -0,0 +1,167 @@
 # -*- coding: utf-8; -*-
 ################################################################################
 #
 #  WuttaSync -- Wutta Framework for data import/export and real-time sync
 #  Copyright © 2024 Lance Edgar
 #
 #  This file is part of Wutta Framework.
 #
 #  Wutta Framework is free software: you can redistribute it and/or modify it
 #  under the terms of the GNU General Public License as published by the Free
 #  Software Foundation, either version 3 of the License, or (at your option) any
 #  later version.
 #
 #  Wutta Framework is distributed in the hope that it will be useful, but
 #  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 #  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 #  more details.
 #
 #  You should have received a copy of the GNU General Public License along with
 #  Wutta Framework.  If not, see <http://www.gnu.org/licenses/>.
 #
 ################################################################################
 """
 ``wutta import-csv`` command
 """
 import inspect
 from pathlib import Path
 from typing import List, Optional
 from typing_extensions import Annotated
 import makefun
 import typer
 def importer_command_template(
        # model keys
        models: Annotated[
            Optional[List[str]],
            typer.Argument(help="Model(s) to process.  Can specify one or more, "
                           "or omit to process all default models.")] = None,
        # allow create?
        create: Annotated[
            bool,
            typer.Option(help="Allow new target records to be created.")] = True,
        # allow update?
        update: Annotated[
            bool,
            typer.Option(help="Allow existing target records to be updated.")] = True,
        # allow delete?
        delete: Annotated[
            bool,
            typer.Option(help="Allow existing target records to be deleted.")] = False,
        # dry run?
        dry_run: Annotated[
            bool,
            typer.Option('--dry-run',
                         help="Go through the motions, but rollback the transaction.")] = False,
        # # fields
        # fields: Annotated[
        #     str,
        #     typer.Option('--fields',
        #                  help="List of fields to process.  If specified, "
        #                  "any field not listed is excluded regardless "
        #                  "of --exclude.")] = None,
        # exclude_fields: Annotated[
        #     str,
        #     typer.Option('--exclude',
        #                  help="List of fields not to process.  If "
        #                  "specified, any field not listed is (not?) included "
        #                  "based on app logic and/or --fields.")] = None,
 ):
    """
    Stub function which provides a common param signature; used with
    :func:`importer_command()`.
    """
 def importer_command(fn):
    """
    Decorator for import/export commands.  Adds common params based on
    :func:`importer_command_template()`.
    To use this, e.g. for ``poser import-foo`` command::
       from poser.cli import poser_typer
       from wuttasync.cli import importer_command
       @poser_typer.command()
       @importer_command
       def import_foo(
               ctx: typer.Context,
               **kwargs
       ):
           \"""
           Import data from Foo API to Poser DB
           \"""
           from poser.importing.foo import FromFooToPoser
           config = ctx.parent.wutta_config
           kw = dict(ctx.params)
           models = kw.pop('models')
           handler = FromFooToPoser(config)
           handler.process_data(*models, **kw)
    """
    original_sig = inspect.signature(fn)
    reference_sig = inspect.signature(importer_command_template)
    params = list(original_sig.parameters.values())
    for i, param in enumerate(reference_sig.parameters.values()):
        params.insert(i + 1, param)
    # remove the **kwargs param
    params.pop(-1)
    final_sig = original_sig.replace(parameters=params)
    return makefun.create_function(final_sig, fn)
 def file_importer_command_template(
        input_file_path: Annotated[
            Path,
            typer.Option('--input-path',
                         exists=True, file_okay=True, dir_okay=True,
                         help="Path to input file(s).  Can be a folder "
                         "if app logic can guess the filename(s); "
                         "otherwise must be complete file path.")] = ...,
 ):
    """
    Stub function to provide signature for import/export commands
    which require input file.  Used with
    :func:`file_importer_command()`.
    """
 def file_importer_command(fn):
    """
    Decorator for import/export commands which require input file.
    Adds common params based on
    :func:`file_importer_command_template()`.
    To use this, it's the same method as shown for
    :func:`importer_command()` except in this case you would use the
    ``file_importer_command`` decorator.
    """
    original_sig = inspect.signature(fn)
    plain_import_sig = inspect.signature(importer_command_template)
    file_import_sig = inspect.signature(file_importer_command_template)
    desired_params = (
        list(plain_import_sig.parameters.values())
        + list(file_import_sig.parameters.values()))
    params = list(original_sig.parameters.values())
    for i, param in enumerate(desired_params):
        params.insert(i + 1, param)
    # remove the **kwargs param
    params.pop(-1)
    final_sig = original_sig.replace(parameters=params)
    return makefun.create_function(final_sig, fn)
--- a/src/wuttasync/cli/import_csv.py
+++ b/src/wuttasync/cli/import_csv.py
@ -0,0 +1,51 @@
 # -*- coding: utf-8; -*-
 ################################################################################
 #
 #  WuttaSync -- Wutta Framework for data import/export and real-time sync
 #  Copyright © 2024 Lance Edgar
 #
 #  This file is part of Wutta Framework.
 #
 #  Wutta Framework is free software: you can redistribute it and/or modify it
 #  under the terms of the GNU General Public License as published by the Free
 #  Software Foundation, either version 3 of the License, or (at your option) any
 #  later version.
 #
 #  Wutta Framework is distributed in the hope that it will be useful, but
 #  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 #  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 #  more details.
 #
 #  You should have received a copy of the GNU General Public License along with
 #  Wutta Framework.  If not, see <http://www.gnu.org/licenses/>.
 #
 ################################################################################
 """
 See also: :ref:`wutta-import-csv`
 """
 import os
 import typer
 from wuttjamaican.cli import wutta_typer
 from .base import file_importer_command
@wutta_typer.command()
@file_importer_command
 def import_csv(
        ctx: typer.Context,
        **kwargs
 ):
    """
    Import data from CSV file(s) to Wutta DB
    """
    from wuttasync.importing.csv import FromCsvToWutta
    config = ctx.parent.wutta_config
    kw = dict(ctx.params)
    models = kw.pop('models')
    handler = FromCsvToWutta(config)
    handler.process_data(*models, **kw)
--- a/src/wuttasync/importing/base.py
+++ b/src/wuttasync/importing/base.py
@ -71,6 +71,17 @@ class Importer:
       It is primarily (only?) used when the target side of the
       import/export uses SQLAlchemy ORM.
    .. attribute:: fields
       This is the official list of "effective" fields to be processed
       for the current import/export job.
       Code theoretically should not access this directly but instead
       call :meth:`get_fields()`.  However it is often convenient to
       overwrite this attribute directly, for dynamic fields.  If so
       then ``get_fields()`` will return the new value.  And really,
       it's probably just as safe to read this attribute directly too.
    """
    allow_create = True
@ -255,6 +266,8 @@ class Importer:
        This should return the "effective" list of fields which are to
        be used for the import/export.
        See also :attr:`fields` which is normally what this returns.
        All fields in this list should also be found in the output for
        :meth:`get_supported_fields()`.
@ -262,7 +275,7 @@ class Importer:
        :returns: List of "effective" field names.
        """
-        if hasattr(self, 'fields'):
+        if hasattr(self, 'fields') and self.fields is not None:
            return self.fields
        return self.get_supported_fields()
@ -334,6 +347,7 @@ class Importer:
        * :meth:`do_delete()`
        * :meth:`teardown()`
        """
        # TODO: should add try/catch around this all? and teardown() in finally: clause?
        self.setup()
        created = []
        updated = []
@ -345,6 +359,9 @@ class Importer:
        # TODO: should exclude duplicate source records
        # source_data, unique = self.get_unique_data(source_data)
        model_title = self.get_model_title()
        log.debug(f"got %s {model_title} records from source",
                  len(source_data))
        # maybe cache existing target data
        if self.caches_target:
@ -637,6 +654,7 @@ class Importer:
        target_title = self.handler.get_target_title()
        self.app.progress_loop(cache, objects, progress,
                               message=f"Reading {model_title} data from {target_title}")
        log.debug(f"cached %s {model_title} records from target", len(cached))
        return cached
    def get_target_objects(self, source_data=None, progress=None):
--- a/src/wuttasync/importing/csv.py
+++ b/src/wuttasync/importing/csv.py
@ -25,6 +25,7 @@ Importing from CSV
 """
 import csv
 import logging
 from collections import OrderedDict
 from sqlalchemy_utils.functions import get_primary_keys
@ -37,6 +38,9 @@ from .wutta import ToWuttaHandler
 from .model import ToWutta
 log = logging.getLogger(__name__)
 class FromCsv(FromFile):
    """
    Base class for importer/exporter using CSV file as data source.
@ -86,11 +90,34 @@ class FromCsv(FromFile):
        This tracks the file handle via
        :attr:`~wuttasync.importing.base.FromFile.input_file` and the
        CSV reader via :attr:`input_reader`.
        It also updates the effective
        :attr:`~wuttasync.importing.base.Importer.fields` list per the
        following logic:
        First get the current effective field list, e.g. as defined by
        the class and/or from caller params.  Then read the column
        header list from CSV file, and discard any which are not found
        in the first list.  The result becomes the new effective field
        list.
        """
        path = self.get_input_file_path()
        log.debug("opening input file: %s", path)
        self.input_file = open(path, 'rt', encoding=self.csv_encoding)
        self.input_reader = csv.DictReader(self.input_file)
        # nb. importer may have all supported fields by default, so
        # must prune to the subset also present in the input file
        fields = self.get_fields()
        orientation = self.orientation.value
        log.debug(f"supported fields for {orientation}: %s", fields)
        self.fields = [f for f in self.input_reader.fieldnames or []
                       if f in fields]
        log.debug("fields present in source data: %s", self.fields)
        if not self.fields:
            self.input_file.close()
            raise ValueError("input file has no recognized fields")
    def close_input_file(self):
        """ """
        self.input_file.close()
@ -195,7 +222,7 @@ class FromCsvToSqlalchemyMixin:
        })
-class FromCsvToWutta(FromCsvToSqlalchemyMixin, ToWuttaHandler):
+class FromCsvToWutta(FromCsvToSqlalchemyMixin, FromFileHandler, ToWuttaHandler):
    """
    Handler for CSV → Wutta :term:`app database` import.
    """
--- a/src/wuttasync/importing/handlers.py
+++ b/src/wuttasync/importing/handlers.py
@ -25,6 +25,7 @@ Data Import / Export Handlers
 """
 import logging
 import os
 from collections import OrderedDict
 from enum import Enum
@ -501,9 +502,28 @@ class ImportHandler(GenericHandler):
 class FromFileHandler(ImportHandler):
    """
-    Handler for import/export which uses an input file as data source.
+    Handler for import/export which uses input file(s) as data source.
    This handler assumes its importer/exporter classes inherit from
    :class:`~wuttasync.importing.base.FromFile` for source parent
    logic.
    """
    def process_data(self, *keys, **kwargs):
        """ """
        # interpret file vs. folder path
        # nb. this assumes FromFile importer/exporter
        path = kwargs.pop('input_file_path', None)
        if path:
            if not kwargs.get('input_file_dir') and os.path.isdir(path):
                kwargs['input_file_dir'] = path
            else:
                kwargs['input_file_path'] = path
        # and carry on
        super().process_data(*keys, **kwargs)
 class ToSqlalchemyHandler(ImportHandler):
    """
--- a/tests/cli/init.py
+++ b/tests/cli/init.py
--- a/tests/cli/example.conf
+++ b/tests/cli/example.conf
--- a/tests/cli/test_base.py
+++ b/tests/cli/test_base.py
@ -0,0 +1,38 @@
 #-*- coding: utf-8; -*-
 import inspect
 from unittest import TestCase
 from wuttasync.cli import base as mod
 class TestImporterCommand(TestCase):
    def test_basic(self):
        def myfunc(ctx, **kwargs):
            pass
        sig1 = inspect.signature(myfunc)
        self.assertIn('kwargs', sig1.parameters)
        self.assertNotIn('dry_run', sig1.parameters)
        wrapt = mod.importer_command(myfunc)
        sig2 = inspect.signature(wrapt)
        self.assertNotIn('kwargs', sig2.parameters)
        self.assertIn('dry_run', sig2.parameters)
 class TestFileImporterCommand(TestCase):
    def test_basic(self):
        def myfunc(ctx, **kwargs):
            pass
        sig1 = inspect.signature(myfunc)
        self.assertIn('kwargs', sig1.parameters)
        self.assertNotIn('dry_run', sig1.parameters)
        self.assertNotIn('input_file_path', sig1.parameters)
        wrapt = mod.file_importer_command(myfunc)
        sig2 = inspect.signature(wrapt)
        self.assertNotIn('kwargs', sig2.parameters)
        self.assertIn('dry_run', sig2.parameters)
        self.assertIn('input_file_path', sig2.parameters)
--- a/tests/cli/test_import_csv.py
+++ b/tests/cli/test_import_csv.py
@ -0,0 +1,24 @@
 #-*- coding: utf-8; -*-
 import os
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
 from wuttasync.cli import import_csv as mod
 from wuttasync.importing.csv import FromCsvToWutta
 here = os.path.dirname(__file__)
 example_conf = os.path.join(here, 'example.conf')
 class TestImportCsv(TestCase):
    def test_basic(self):
        ctx = MagicMock(params={'models': [],
                                'create': True, 'update': True, 'delete': False,
                                'dry_run': True})
        with patch.object(FromCsvToWutta, 'process_data') as process_data:
            mod.import_csv(ctx)
            process_data.assert_called_once_with(create=True, update=True, delete=False,
                                                 dry_run=True)
--- a/tests/importing/test_csv.py
+++ b/tests/importing/test_csv.py
@ -14,6 +14,12 @@ class TestFromCsv(DataTestCase):
        self.setup_db()
        self.handler = ImportHandler(self.config)
        self.data_path = self.write_file('data.txt', """\
 name,value
 foo,bar
 foo2,bar2
 """)
    def make_importer(self, **kwargs):
        kwargs.setdefault('handler', self.handler)
        return mod.FromCsv(self.config, **kwargs)
@ -33,19 +39,36 @@ class TestFromCsv(DataTestCase):
        model = self.app.model
        imp = self.make_importer(model_class=model.Setting)
-        path = self.write_file('data.txt', '')
+        # normal operation, input file includes all fields
-        imp.input_file_path = path
+        imp = self.make_importer(model_class=model.Setting, input_file_path=self.data_path)
        self.assertEqual(imp.fields, ['name', 'value'])
        imp.open_input_file()
-        self.assertEqual(imp.input_file.name, path)
+        self.assertEqual(imp.input_file.name, self.data_path)
        self.assertIsInstance(imp.input_reader, csv.DictReader)
        self.assertEqual(imp.fields, ['name', 'value'])
        imp.input_file.close()
        # this file is missing a field, plus we'll pretend more are
        # supported - but should wind up with just the one field
        missing = self.write_file('missing.txt', 'name')
        imp = self.make_importer(model_class=model.Setting, input_file_path=missing)
        imp.fields.extend(['lots', 'more'])
        self.assertEqual(imp.fields, ['name', 'value', 'lots', 'more'])
        imp.open_input_file()
        self.assertEqual(imp.fields, ['name'])
        imp.input_file.close()
        # and what happens when no known fields are found
        bogus = self.write_file('bogus.txt', 'blarg')
        imp = self.make_importer(model_class=model.Setting, input_file_path=bogus)
        self.assertEqual(imp.fields, ['name', 'value'])
        self.assertRaises(ValueError, imp.open_input_file)
    def test_close_input_file(self):
        model = self.app.model
        imp = self.make_importer(model_class=model.Setting)
-        path = self.write_file('data.txt', '')
+        imp.input_file_path = self.data_path
        imp.input_file_path = path
        imp.open_input_file()
        imp.close_input_file()
        self.assertFalse(hasattr(imp, 'input_reader'))
@ -55,12 +78,7 @@ class TestFromCsv(DataTestCase):
        model = self.app.model
        imp = self.make_importer(model_class=model.Setting)
-        path = self.write_file('data.csv', """\
+        imp.input_file_path = self.data_path
 name,value
 foo,bar
 foo2,bar2
 """)
        imp.input_file_path = path
        imp.open_input_file()
        objects = imp.get_source_objects()
        imp.close_input_file()
--- a/tests/importing/test_handlers.py
+++ b/tests/importing/test_handlers.py
@ -173,6 +173,31 @@ class TestImportHandler(DataTestCase):
        self.assertRaises(KeyError, handler.get_importer, 'BunchOfNonsense', model_class=model.Setting)
 class TestFromFileHandler(DataTestCase):
    def make_handler(self, **kwargs):
        return mod.FromFileHandler(self.config, **kwargs)
    def test_process_data(self):
        handler = self.make_handler()
        path = self.write_file('data.txt', '')
        with patch.object(mod.ImportHandler, 'process_data') as process_data:
            # bare
            handler.process_data()
            process_data.assert_called_once_with()
            # with file path
            process_data.reset_mock()
            handler.process_data(input_file_path=path)
            process_data.assert_called_once_with(input_file_path=path)
            # with folder
            process_data.reset_mock()
            handler.process_data(input_file_path=self.tempdir)
            process_data.assert_called_once_with(input_file_dir=self.tempdir)
 class TestToSqlalchemyHandler(DataTestCase):
    def make_handler(self, **kwargs):