diff --git a/docs/api/wuttasync.cli.base.rst b/docs/api/wuttasync.cli.base.rst new file mode 100644 index 0000000..a411eef --- /dev/null +++ b/docs/api/wuttasync.cli.base.rst @@ -0,0 +1,6 @@ + +``wuttasync.cli.base`` +====================== + +.. automodule:: wuttasync.cli.base + :members: diff --git a/docs/api/wuttasync.cli.import_csv.rst b/docs/api/wuttasync.cli.import_csv.rst new file mode 100644 index 0000000..c5104b2 --- /dev/null +++ b/docs/api/wuttasync.cli.import_csv.rst @@ -0,0 +1,6 @@ + +``wuttasync.cli.import_csv`` +============================ + +.. automodule:: wuttasync.cli.import_csv + :members: diff --git a/docs/api/wuttasync.cli.rst b/docs/api/wuttasync.cli.rst new file mode 100644 index 0000000..92dddb5 --- /dev/null +++ b/docs/api/wuttasync.cli.rst @@ -0,0 +1,6 @@ + +``wuttasync.cli`` +================= + +.. automodule:: wuttasync.cli + :members: diff --git a/docs/conf.py b/docs/conf.py index c5d923c..9abf338 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,6 +22,7 @@ extensions = [ 'sphinx.ext.viewcode', 'sphinx.ext.todo', 'enum_tools.autoenum', + 'sphinxcontrib.programoutput', ] templates_path = ['_templates'] diff --git a/docs/index.rst b/docs/index.rst index ac6be84..b8bf248 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,6 +5,15 @@ WuttaSync This package adds data import/export and real-time sync utilities for the `Wutta Framework `_. +The primary use cases here are: + +* keep "operational" data in sync between e.g. various business systems +* import data from user-specified file +* export to file + +This isn't really meant to replace typical ETL tools; it is smaller +scale and (hopefully) more flexible. + While it of course supports import/export to/from the Wutta :term:`app database`, it may be used for any "source → target" data flow. @@ -14,12 +23,16 @@ database`, it may be used for any "source → target" data flow. :caption: Documentation narr/install + narr/cli .. toctree:: :maxdepth: 1 :caption: API api/wuttasync + api/wuttasync.cli + api/wuttasync.cli.base + api/wuttasync.cli.import_csv api/wuttasync.importing api/wuttasync.importing.base api/wuttasync.importing.csv diff --git a/docs/narr/cli.rst b/docs/narr/cli.rst new file mode 100644 index 0000000..4bee64d --- /dev/null +++ b/docs/narr/cli.rst @@ -0,0 +1,25 @@ + +Built-in Commands +================= + +WuttaSync adds some built-in ``wutta`` :term:`subcommands `. + +See also :doc:`wuttjamaican:narr/cli/index`. + + +.. _wutta-import-csv: + +``wutta import-csv`` +-------------------- + +Import data from CSV file(s) to the Wutta :term:`app database`. + +This *should* be able to automatically target any table mapped in the +:term:`app model`. The only caveat is that it is "dumb" and does not +have any special field handling. This means the column headers in the +CSV file must be named the same as in the target table, and some data +types may not behave as expected etc. + +Defined in: :mod:`wuttasync.cli.import_csv` + +.. program-output:: wutta import-csv --help diff --git a/pyproject.toml b/pyproject.toml index 6561b9e..e531391 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,16 +26,21 @@ classifiers = [ ] requires-python = ">= 3.8" dependencies = [ + "makefun", "SQLAlchemy-Utils", "WuttJamaican[db]", ] [project.optional-dependencies] -docs = ["Sphinx", "enum-tools[sphinx]", "furo"] +docs = ["Sphinx", "enum-tools[sphinx]", "furo", "sphinxcontrib-programoutput"] tests = ["pytest-cov", "tox"] +[project.entry-points."wutta.typer_imports"] +wuttasync = "wuttasync.cli" + + [project.urls] Homepage = "https://wuttaproject.org/" Repository = "https://forgejo.wuttaproject.org/wutta/wuttasync" diff --git a/src/wuttasync/cli/__init__.py b/src/wuttasync/cli/__init__.py new file mode 100644 index 0000000..70de7ac --- /dev/null +++ b/src/wuttasync/cli/__init__.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8; -*- +################################################################################ +# +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024 Lance Edgar +# +# This file is part of Wutta Framework. +# +# Wutta Framework is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Wutta Framework is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# Wutta Framework. If not, see . +# +################################################################################ +""" +WuttaSync - ``wutta`` subcommands + +This namespace exposes the following: + +* :func:`~wuttasync.cli.base.importer_command()` +* :func:`~wuttasync.cli.base.file_importer_command()` +""" + +from .base import importer_command, file_importer_command + +# nb. must bring in all modules for discovery to work +from . import import_csv diff --git a/src/wuttasync/cli/base.py b/src/wuttasync/cli/base.py new file mode 100644 index 0000000..c1fbd55 --- /dev/null +++ b/src/wuttasync/cli/base.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8; -*- +################################################################################ +# +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024 Lance Edgar +# +# This file is part of Wutta Framework. +# +# Wutta Framework is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Wutta Framework is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# Wutta Framework. If not, see . +# +################################################################################ +""" +``wutta import-csv`` command +""" + +import inspect +from pathlib import Path +from typing import List, Optional +from typing_extensions import Annotated + +import makefun +import typer + + +def importer_command_template( + + # model keys + models: Annotated[ + Optional[List[str]], + typer.Argument(help="Model(s) to process. Can specify one or more, " + "or omit to process all default models.")] = None, + + # allow create? + create: Annotated[ + bool, + typer.Option(help="Allow new target records to be created.")] = True, + + # allow update? + update: Annotated[ + bool, + typer.Option(help="Allow existing target records to be updated.")] = True, + + # allow delete? + delete: Annotated[ + bool, + typer.Option(help="Allow existing target records to be deleted.")] = False, + + # dry run? + dry_run: Annotated[ + bool, + typer.Option('--dry-run', + help="Go through the motions, but rollback the transaction.")] = False, + + # # fields + # fields: Annotated[ + # str, + # typer.Option('--fields', + # help="List of fields to process. If specified, " + # "any field not listed is excluded regardless " + # "of --exclude.")] = None, + # exclude_fields: Annotated[ + # str, + # typer.Option('--exclude', + # help="List of fields not to process. If " + # "specified, any field not listed is (not?) included " + # "based on app logic and/or --fields.")] = None, +): + """ + Stub function which provides a common param signature; used with + :func:`importer_command()`. + """ + + +def importer_command(fn): + """ + Decorator for import/export commands. Adds common params based on + :func:`importer_command_template()`. + + To use this, e.g. for ``poser import-foo`` command:: + + from poser.cli import poser_typer + from wuttasync.cli import importer_command + + @poser_typer.command() + @importer_command + def import_foo( + ctx: typer.Context, + **kwargs + ): + \""" + Import data from Foo API to Poser DB + \""" + from poser.importing.foo import FromFooToPoser + + config = ctx.parent.wutta_config + kw = dict(ctx.params) + models = kw.pop('models') + handler = FromFooToPoser(config) + handler.process_data(*models, **kw) + """ + original_sig = inspect.signature(fn) + reference_sig = inspect.signature(importer_command_template) + + params = list(original_sig.parameters.values()) + for i, param in enumerate(reference_sig.parameters.values()): + params.insert(i + 1, param) + + # remove the **kwargs param + params.pop(-1) + + final_sig = original_sig.replace(parameters=params) + return makefun.create_function(final_sig, fn) + + +def file_importer_command_template( + input_file_path: Annotated[ + Path, + typer.Option('--input-path', + exists=True, file_okay=True, dir_okay=True, + help="Path to input file(s). Can be a folder " + "if app logic can guess the filename(s); " + "otherwise must be complete file path.")] = ..., +): + """ + Stub function to provide signature for import/export commands + which require input file. Used with + :func:`file_importer_command()`. + """ + + +def file_importer_command(fn): + """ + Decorator for import/export commands which require input file. + Adds common params based on + :func:`file_importer_command_template()`. + + To use this, it's the same method as shown for + :func:`importer_command()` except in this case you would use the + ``file_importer_command`` decorator. + """ + original_sig = inspect.signature(fn) + plain_import_sig = inspect.signature(importer_command_template) + file_import_sig = inspect.signature(file_importer_command_template) + desired_params = ( + list(plain_import_sig.parameters.values()) + + list(file_import_sig.parameters.values())) + + params = list(original_sig.parameters.values()) + for i, param in enumerate(desired_params): + params.insert(i + 1, param) + + # remove the **kwargs param + params.pop(-1) + + final_sig = original_sig.replace(parameters=params) + return makefun.create_function(final_sig, fn) diff --git a/src/wuttasync/cli/import_csv.py b/src/wuttasync/cli/import_csv.py new file mode 100644 index 0000000..7600d5f --- /dev/null +++ b/src/wuttasync/cli/import_csv.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8; -*- +################################################################################ +# +# WuttaSync -- Wutta Framework for data import/export and real-time sync +# Copyright © 2024 Lance Edgar +# +# This file is part of Wutta Framework. +# +# Wutta Framework is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Wutta Framework is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# Wutta Framework. If not, see . +# +################################################################################ +""" +See also: :ref:`wutta-import-csv` +""" + +import os + +import typer + +from wuttjamaican.cli import wutta_typer + +from .base import file_importer_command + + +@wutta_typer.command() +@file_importer_command +def import_csv( + ctx: typer.Context, + **kwargs +): + """ + Import data from CSV file(s) to Wutta DB + """ + from wuttasync.importing.csv import FromCsvToWutta + + config = ctx.parent.wutta_config + kw = dict(ctx.params) + models = kw.pop('models') + handler = FromCsvToWutta(config) + handler.process_data(*models, **kw) diff --git a/src/wuttasync/importing/base.py b/src/wuttasync/importing/base.py index 352415e..164c04f 100644 --- a/src/wuttasync/importing/base.py +++ b/src/wuttasync/importing/base.py @@ -71,6 +71,17 @@ class Importer: It is primarily (only?) used when the target side of the import/export uses SQLAlchemy ORM. + + .. attribute:: fields + + This is the official list of "effective" fields to be processed + for the current import/export job. + + Code theoretically should not access this directly but instead + call :meth:`get_fields()`. However it is often convenient to + overwrite this attribute directly, for dynamic fields. If so + then ``get_fields()`` will return the new value. And really, + it's probably just as safe to read this attribute directly too. """ allow_create = True @@ -255,6 +266,8 @@ class Importer: This should return the "effective" list of fields which are to be used for the import/export. + See also :attr:`fields` which is normally what this returns. + All fields in this list should also be found in the output for :meth:`get_supported_fields()`. @@ -262,7 +275,7 @@ class Importer: :returns: List of "effective" field names. """ - if hasattr(self, 'fields'): + if hasattr(self, 'fields') and self.fields is not None: return self.fields return self.get_supported_fields() @@ -334,6 +347,7 @@ class Importer: * :meth:`do_delete()` * :meth:`teardown()` """ + # TODO: should add try/catch around this all? and teardown() in finally: clause? self.setup() created = [] updated = [] @@ -345,6 +359,9 @@ class Importer: # TODO: should exclude duplicate source records # source_data, unique = self.get_unique_data(source_data) + model_title = self.get_model_title() + log.debug(f"got %s {model_title} records from source", + len(source_data)) # maybe cache existing target data if self.caches_target: @@ -637,6 +654,7 @@ class Importer: target_title = self.handler.get_target_title() self.app.progress_loop(cache, objects, progress, message=f"Reading {model_title} data from {target_title}") + log.debug(f"cached %s {model_title} records from target", len(cached)) return cached def get_target_objects(self, source_data=None, progress=None): diff --git a/src/wuttasync/importing/csv.py b/src/wuttasync/importing/csv.py index f81652c..1c62818 100644 --- a/src/wuttasync/importing/csv.py +++ b/src/wuttasync/importing/csv.py @@ -25,6 +25,7 @@ Importing from CSV """ import csv +import logging from collections import OrderedDict from sqlalchemy_utils.functions import get_primary_keys @@ -37,6 +38,9 @@ from .wutta import ToWuttaHandler from .model import ToWutta +log = logging.getLogger(__name__) + + class FromCsv(FromFile): """ Base class for importer/exporter using CSV file as data source. @@ -86,11 +90,34 @@ class FromCsv(FromFile): This tracks the file handle via :attr:`~wuttasync.importing.base.FromFile.input_file` and the CSV reader via :attr:`input_reader`. + + It also updates the effective + :attr:`~wuttasync.importing.base.Importer.fields` list per the + following logic: + + First get the current effective field list, e.g. as defined by + the class and/or from caller params. Then read the column + header list from CSV file, and discard any which are not found + in the first list. The result becomes the new effective field + list. """ path = self.get_input_file_path() + log.debug("opening input file: %s", path) self.input_file = open(path, 'rt', encoding=self.csv_encoding) self.input_reader = csv.DictReader(self.input_file) + # nb. importer may have all supported fields by default, so + # must prune to the subset also present in the input file + fields = self.get_fields() + orientation = self.orientation.value + log.debug(f"supported fields for {orientation}: %s", fields) + self.fields = [f for f in self.input_reader.fieldnames or [] + if f in fields] + log.debug("fields present in source data: %s", self.fields) + if not self.fields: + self.input_file.close() + raise ValueError("input file has no recognized fields") + def close_input_file(self): """ """ self.input_file.close() @@ -195,7 +222,7 @@ class FromCsvToSqlalchemyMixin: }) -class FromCsvToWutta(FromCsvToSqlalchemyMixin, ToWuttaHandler): +class FromCsvToWutta(FromCsvToSqlalchemyMixin, FromFileHandler, ToWuttaHandler): """ Handler for CSV → Wutta :term:`app database` import. """ diff --git a/src/wuttasync/importing/handlers.py b/src/wuttasync/importing/handlers.py index a1e5152..ac3f89f 100644 --- a/src/wuttasync/importing/handlers.py +++ b/src/wuttasync/importing/handlers.py @@ -25,6 +25,7 @@ Data Import / Export Handlers """ import logging +import os from collections import OrderedDict from enum import Enum @@ -501,9 +502,28 @@ class ImportHandler(GenericHandler): class FromFileHandler(ImportHandler): """ - Handler for import/export which uses an input file as data source. + Handler for import/export which uses input file(s) as data source. + + This handler assumes its importer/exporter classes inherit from + :class:`~wuttasync.importing.base.FromFile` for source parent + logic. """ + def process_data(self, *keys, **kwargs): + """ """ + + # interpret file vs. folder path + # nb. this assumes FromFile importer/exporter + path = kwargs.pop('input_file_path', None) + if path: + if not kwargs.get('input_file_dir') and os.path.isdir(path): + kwargs['input_file_dir'] = path + else: + kwargs['input_file_path'] = path + + # and carry on + super().process_data(*keys, **kwargs) + class ToSqlalchemyHandler(ImportHandler): """ diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cli/example.conf b/tests/cli/example.conf new file mode 100644 index 0000000..e69de29 diff --git a/tests/cli/test_base.py b/tests/cli/test_base.py new file mode 100644 index 0000000..b43f7d7 --- /dev/null +++ b/tests/cli/test_base.py @@ -0,0 +1,38 @@ +#-*- coding: utf-8; -*- + +import inspect +from unittest import TestCase + +from wuttasync.cli import base as mod + + +class TestImporterCommand(TestCase): + + def test_basic(self): + def myfunc(ctx, **kwargs): + pass + + sig1 = inspect.signature(myfunc) + self.assertIn('kwargs', sig1.parameters) + self.assertNotIn('dry_run', sig1.parameters) + wrapt = mod.importer_command(myfunc) + sig2 = inspect.signature(wrapt) + self.assertNotIn('kwargs', sig2.parameters) + self.assertIn('dry_run', sig2.parameters) + + +class TestFileImporterCommand(TestCase): + + def test_basic(self): + def myfunc(ctx, **kwargs): + pass + + sig1 = inspect.signature(myfunc) + self.assertIn('kwargs', sig1.parameters) + self.assertNotIn('dry_run', sig1.parameters) + self.assertNotIn('input_file_path', sig1.parameters) + wrapt = mod.file_importer_command(myfunc) + sig2 = inspect.signature(wrapt) + self.assertNotIn('kwargs', sig2.parameters) + self.assertIn('dry_run', sig2.parameters) + self.assertIn('input_file_path', sig2.parameters) diff --git a/tests/cli/test_import_csv.py b/tests/cli/test_import_csv.py new file mode 100644 index 0000000..a4371df --- /dev/null +++ b/tests/cli/test_import_csv.py @@ -0,0 +1,24 @@ +#-*- coding: utf-8; -*- + +import os +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from wuttasync.cli import import_csv as mod +from wuttasync.importing.csv import FromCsvToWutta + + +here = os.path.dirname(__file__) +example_conf = os.path.join(here, 'example.conf') + + +class TestImportCsv(TestCase): + + def test_basic(self): + ctx = MagicMock(params={'models': [], + 'create': True, 'update': True, 'delete': False, + 'dry_run': True}) + with patch.object(FromCsvToWutta, 'process_data') as process_data: + mod.import_csv(ctx) + process_data.assert_called_once_with(create=True, update=True, delete=False, + dry_run=True) diff --git a/tests/importing/test_csv.py b/tests/importing/test_csv.py index cf0a302..683215e 100644 --- a/tests/importing/test_csv.py +++ b/tests/importing/test_csv.py @@ -14,6 +14,12 @@ class TestFromCsv(DataTestCase): self.setup_db() self.handler = ImportHandler(self.config) + self.data_path = self.write_file('data.txt', """\ +name,value +foo,bar +foo2,bar2 +""") + def make_importer(self, **kwargs): kwargs.setdefault('handler', self.handler) return mod.FromCsv(self.config, **kwargs) @@ -33,19 +39,36 @@ class TestFromCsv(DataTestCase): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.txt', '') - imp.input_file_path = path + # normal operation, input file includes all fields + imp = self.make_importer(model_class=model.Setting, input_file_path=self.data_path) + self.assertEqual(imp.fields, ['name', 'value']) imp.open_input_file() - self.assertEqual(imp.input_file.name, path) + self.assertEqual(imp.input_file.name, self.data_path) self.assertIsInstance(imp.input_reader, csv.DictReader) + self.assertEqual(imp.fields, ['name', 'value']) imp.input_file.close() + # this file is missing a field, plus we'll pretend more are + # supported - but should wind up with just the one field + missing = self.write_file('missing.txt', 'name') + imp = self.make_importer(model_class=model.Setting, input_file_path=missing) + imp.fields.extend(['lots', 'more']) + self.assertEqual(imp.fields, ['name', 'value', 'lots', 'more']) + imp.open_input_file() + self.assertEqual(imp.fields, ['name']) + imp.input_file.close() + + # and what happens when no known fields are found + bogus = self.write_file('bogus.txt', 'blarg') + imp = self.make_importer(model_class=model.Setting, input_file_path=bogus) + self.assertEqual(imp.fields, ['name', 'value']) + self.assertRaises(ValueError, imp.open_input_file) + def test_close_input_file(self): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.txt', '') - imp.input_file_path = path + imp.input_file_path = self.data_path imp.open_input_file() imp.close_input_file() self.assertFalse(hasattr(imp, 'input_reader')) @@ -55,12 +78,7 @@ class TestFromCsv(DataTestCase): model = self.app.model imp = self.make_importer(model_class=model.Setting) - path = self.write_file('data.csv', """\ -name,value -foo,bar -foo2,bar2 -""") - imp.input_file_path = path + imp.input_file_path = self.data_path imp.open_input_file() objects = imp.get_source_objects() imp.close_input_file() diff --git a/tests/importing/test_handlers.py b/tests/importing/test_handlers.py index 67d861f..dac37d6 100644 --- a/tests/importing/test_handlers.py +++ b/tests/importing/test_handlers.py @@ -173,6 +173,31 @@ class TestImportHandler(DataTestCase): self.assertRaises(KeyError, handler.get_importer, 'BunchOfNonsense', model_class=model.Setting) +class TestFromFileHandler(DataTestCase): + + def make_handler(self, **kwargs): + return mod.FromFileHandler(self.config, **kwargs) + + def test_process_data(self): + handler = self.make_handler() + path = self.write_file('data.txt', '') + with patch.object(mod.ImportHandler, 'process_data') as process_data: + + # bare + handler.process_data() + process_data.assert_called_once_with() + + # with file path + process_data.reset_mock() + handler.process_data(input_file_path=path) + process_data.assert_called_once_with(input_file_path=path) + + # with folder + process_data.reset_mock() + handler.process_data(input_file_path=self.tempdir) + process_data.assert_called_once_with(input_file_dir=self.tempdir) + + class TestToSqlalchemyHandler(DataTestCase): def make_handler(self, **kwargs):