feat: add wutta import-csv command

This commit is contained in:
Lance Edgar 2024-12-05 21:19:06 -06:00
parent 84a8beaf46
commit f43a066341
19 changed files with 500 additions and 15 deletions

View file

@ -0,0 +1,6 @@
``wuttasync.cli.base``
======================
.. automodule:: wuttasync.cli.base
:members:

View file

@ -0,0 +1,6 @@
``wuttasync.cli.import_csv``
============================
.. automodule:: wuttasync.cli.import_csv
:members:

View file

@ -0,0 +1,6 @@
``wuttasync.cli``
=================
.. automodule:: wuttasync.cli
:members:

View file

@ -22,6 +22,7 @@ extensions = [
'sphinx.ext.viewcode', 'sphinx.ext.viewcode',
'sphinx.ext.todo', 'sphinx.ext.todo',
'enum_tools.autoenum', 'enum_tools.autoenum',
'sphinxcontrib.programoutput',
] ]
templates_path = ['_templates'] templates_path = ['_templates']

View file

@ -5,6 +5,15 @@ WuttaSync
This package adds data import/export and real-time sync utilities for This package adds data import/export and real-time sync utilities for
the `Wutta Framework <https://wuttaproject.org>`_. the `Wutta Framework <https://wuttaproject.org>`_.
The primary use cases here are:
* keep "operational" data in sync between e.g. various business systems
* import data from user-specified file
* export to file
This isn't really meant to replace typical ETL tools; it is smaller
scale and (hopefully) more flexible.
While it of course supports import/export to/from the Wutta :term:`app While it of course supports import/export to/from the Wutta :term:`app
database`, it may be used for any "source → target" data flow. database`, it may be used for any "source → target" data flow.
@ -14,12 +23,16 @@ database`, it may be used for any "source → target" data flow.
:caption: Documentation :caption: Documentation
narr/install narr/install
narr/cli
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
:caption: API :caption: API
api/wuttasync api/wuttasync
api/wuttasync.cli
api/wuttasync.cli.base
api/wuttasync.cli.import_csv
api/wuttasync.importing api/wuttasync.importing
api/wuttasync.importing.base api/wuttasync.importing.base
api/wuttasync.importing.csv api/wuttasync.importing.csv

25
docs/narr/cli.rst Normal file
View file

@ -0,0 +1,25 @@
Built-in Commands
=================
WuttaSync adds some built-in ``wutta`` :term:`subcommands <subcommand>`.
See also :doc:`wuttjamaican:narr/cli/index`.
.. _wutta-import-csv:
``wutta import-csv``
--------------------
Import data from CSV file(s) to the Wutta :term:`app database`.
This *should* be able to automatically target any table mapped in the
:term:`app model`. The only caveat is that it is "dumb" and does not
have any special field handling. This means the column headers in the
CSV file must be named the same as in the target table, and some data
types may not behave as expected etc.
Defined in: :mod:`wuttasync.cli.import_csv`
.. program-output:: wutta import-csv --help

View file

@ -26,16 +26,21 @@ classifiers = [
] ]
requires-python = ">= 3.8" requires-python = ">= 3.8"
dependencies = [ dependencies = [
"makefun",
"SQLAlchemy-Utils", "SQLAlchemy-Utils",
"WuttJamaican[db]", "WuttJamaican[db]",
] ]
[project.optional-dependencies] [project.optional-dependencies]
docs = ["Sphinx", "enum-tools[sphinx]", "furo"] docs = ["Sphinx", "enum-tools[sphinx]", "furo", "sphinxcontrib-programoutput"]
tests = ["pytest-cov", "tox"] tests = ["pytest-cov", "tox"]
[project.entry-points."wutta.typer_imports"]
wuttasync = "wuttasync.cli"
[project.urls] [project.urls]
Homepage = "https://wuttaproject.org/" Homepage = "https://wuttaproject.org/"
Repository = "https://forgejo.wuttaproject.org/wutta/wuttasync" Repository = "https://forgejo.wuttaproject.org/wutta/wuttasync"

View file

@ -0,0 +1,35 @@
# -*- coding: utf-8; -*-
################################################################################
#
# WuttaSync -- Wutta Framework for data import/export and real-time sync
# Copyright © 2024 Lance Edgar
#
# This file is part of Wutta Framework.
#
# Wutta Framework is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# Wutta Framework is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# Wutta Framework. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
"""
WuttaSync - ``wutta`` subcommands
This namespace exposes the following:
* :func:`~wuttasync.cli.base.importer_command()`
* :func:`~wuttasync.cli.base.file_importer_command()`
"""
from .base import importer_command, file_importer_command
# nb. must bring in all modules for discovery to work
from . import import_csv

167
src/wuttasync/cli/base.py Normal file
View file

@ -0,0 +1,167 @@
# -*- coding: utf-8; -*-
################################################################################
#
# WuttaSync -- Wutta Framework for data import/export and real-time sync
# Copyright © 2024 Lance Edgar
#
# This file is part of Wutta Framework.
#
# Wutta Framework is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# Wutta Framework is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# Wutta Framework. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
"""
``wutta import-csv`` command
"""
import inspect
from pathlib import Path
from typing import List, Optional
from typing_extensions import Annotated
import makefun
import typer
def importer_command_template(
# model keys
models: Annotated[
Optional[List[str]],
typer.Argument(help="Model(s) to process. Can specify one or more, "
"or omit to process all default models.")] = None,
# allow create?
create: Annotated[
bool,
typer.Option(help="Allow new target records to be created.")] = True,
# allow update?
update: Annotated[
bool,
typer.Option(help="Allow existing target records to be updated.")] = True,
# allow delete?
delete: Annotated[
bool,
typer.Option(help="Allow existing target records to be deleted.")] = False,
# dry run?
dry_run: Annotated[
bool,
typer.Option('--dry-run',
help="Go through the motions, but rollback the transaction.")] = False,
# # fields
# fields: Annotated[
# str,
# typer.Option('--fields',
# help="List of fields to process. If specified, "
# "any field not listed is excluded regardless "
# "of --exclude.")] = None,
# exclude_fields: Annotated[
# str,
# typer.Option('--exclude',
# help="List of fields not to process. If "
# "specified, any field not listed is (not?) included "
# "based on app logic and/or --fields.")] = None,
):
"""
Stub function which provides a common param signature; used with
:func:`importer_command()`.
"""
def importer_command(fn):
"""
Decorator for import/export commands. Adds common params based on
:func:`importer_command_template()`.
To use this, e.g. for ``poser import-foo`` command::
from poser.cli import poser_typer
from wuttasync.cli import importer_command
@poser_typer.command()
@importer_command
def import_foo(
ctx: typer.Context,
**kwargs
):
\"""
Import data from Foo API to Poser DB
\"""
from poser.importing.foo import FromFooToPoser
config = ctx.parent.wutta_config
kw = dict(ctx.params)
models = kw.pop('models')
handler = FromFooToPoser(config)
handler.process_data(*models, **kw)
"""
original_sig = inspect.signature(fn)
reference_sig = inspect.signature(importer_command_template)
params = list(original_sig.parameters.values())
for i, param in enumerate(reference_sig.parameters.values()):
params.insert(i + 1, param)
# remove the **kwargs param
params.pop(-1)
final_sig = original_sig.replace(parameters=params)
return makefun.create_function(final_sig, fn)
def file_importer_command_template(
input_file_path: Annotated[
Path,
typer.Option('--input-path',
exists=True, file_okay=True, dir_okay=True,
help="Path to input file(s). Can be a folder "
"if app logic can guess the filename(s); "
"otherwise must be complete file path.")] = ...,
):
"""
Stub function to provide signature for import/export commands
which require input file. Used with
:func:`file_importer_command()`.
"""
def file_importer_command(fn):
"""
Decorator for import/export commands which require input file.
Adds common params based on
:func:`file_importer_command_template()`.
To use this, it's the same method as shown for
:func:`importer_command()` except in this case you would use the
``file_importer_command`` decorator.
"""
original_sig = inspect.signature(fn)
plain_import_sig = inspect.signature(importer_command_template)
file_import_sig = inspect.signature(file_importer_command_template)
desired_params = (
list(plain_import_sig.parameters.values())
+ list(file_import_sig.parameters.values()))
params = list(original_sig.parameters.values())
for i, param in enumerate(desired_params):
params.insert(i + 1, param)
# remove the **kwargs param
params.pop(-1)
final_sig = original_sig.replace(parameters=params)
return makefun.create_function(final_sig, fn)

View file

@ -0,0 +1,51 @@
# -*- coding: utf-8; -*-
################################################################################
#
# WuttaSync -- Wutta Framework for data import/export and real-time sync
# Copyright © 2024 Lance Edgar
#
# This file is part of Wutta Framework.
#
# Wutta Framework is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# Wutta Framework is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# Wutta Framework. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
"""
See also: :ref:`wutta-import-csv`
"""
import os
import typer
from wuttjamaican.cli import wutta_typer
from .base import file_importer_command
@wutta_typer.command()
@file_importer_command
def import_csv(
ctx: typer.Context,
**kwargs
):
"""
Import data from CSV file(s) to Wutta DB
"""
from wuttasync.importing.csv import FromCsvToWutta
config = ctx.parent.wutta_config
kw = dict(ctx.params)
models = kw.pop('models')
handler = FromCsvToWutta(config)
handler.process_data(*models, **kw)

View file

@ -71,6 +71,17 @@ class Importer:
It is primarily (only?) used when the target side of the It is primarily (only?) used when the target side of the
import/export uses SQLAlchemy ORM. import/export uses SQLAlchemy ORM.
.. attribute:: fields
This is the official list of "effective" fields to be processed
for the current import/export job.
Code theoretically should not access this directly but instead
call :meth:`get_fields()`. However it is often convenient to
overwrite this attribute directly, for dynamic fields. If so
then ``get_fields()`` will return the new value. And really,
it's probably just as safe to read this attribute directly too.
""" """
allow_create = True allow_create = True
@ -255,6 +266,8 @@ class Importer:
This should return the "effective" list of fields which are to This should return the "effective" list of fields which are to
be used for the import/export. be used for the import/export.
See also :attr:`fields` which is normally what this returns.
All fields in this list should also be found in the output for All fields in this list should also be found in the output for
:meth:`get_supported_fields()`. :meth:`get_supported_fields()`.
@ -262,7 +275,7 @@ class Importer:
:returns: List of "effective" field names. :returns: List of "effective" field names.
""" """
if hasattr(self, 'fields'): if hasattr(self, 'fields') and self.fields is not None:
return self.fields return self.fields
return self.get_supported_fields() return self.get_supported_fields()
@ -334,6 +347,7 @@ class Importer:
* :meth:`do_delete()` * :meth:`do_delete()`
* :meth:`teardown()` * :meth:`teardown()`
""" """
# TODO: should add try/catch around this all? and teardown() in finally: clause?
self.setup() self.setup()
created = [] created = []
updated = [] updated = []
@ -345,6 +359,9 @@ class Importer:
# TODO: should exclude duplicate source records # TODO: should exclude duplicate source records
# source_data, unique = self.get_unique_data(source_data) # source_data, unique = self.get_unique_data(source_data)
model_title = self.get_model_title()
log.debug(f"got %s {model_title} records from source",
len(source_data))
# maybe cache existing target data # maybe cache existing target data
if self.caches_target: if self.caches_target:
@ -637,6 +654,7 @@ class Importer:
target_title = self.handler.get_target_title() target_title = self.handler.get_target_title()
self.app.progress_loop(cache, objects, progress, self.app.progress_loop(cache, objects, progress,
message=f"Reading {model_title} data from {target_title}") message=f"Reading {model_title} data from {target_title}")
log.debug(f"cached %s {model_title} records from target", len(cached))
return cached return cached
def get_target_objects(self, source_data=None, progress=None): def get_target_objects(self, source_data=None, progress=None):

View file

@ -25,6 +25,7 @@ Importing from CSV
""" """
import csv import csv
import logging
from collections import OrderedDict from collections import OrderedDict
from sqlalchemy_utils.functions import get_primary_keys from sqlalchemy_utils.functions import get_primary_keys
@ -37,6 +38,9 @@ from .wutta import ToWuttaHandler
from .model import ToWutta from .model import ToWutta
log = logging.getLogger(__name__)
class FromCsv(FromFile): class FromCsv(FromFile):
""" """
Base class for importer/exporter using CSV file as data source. Base class for importer/exporter using CSV file as data source.
@ -86,11 +90,34 @@ class FromCsv(FromFile):
This tracks the file handle via This tracks the file handle via
:attr:`~wuttasync.importing.base.FromFile.input_file` and the :attr:`~wuttasync.importing.base.FromFile.input_file` and the
CSV reader via :attr:`input_reader`. CSV reader via :attr:`input_reader`.
It also updates the effective
:attr:`~wuttasync.importing.base.Importer.fields` list per the
following logic:
First get the current effective field list, e.g. as defined by
the class and/or from caller params. Then read the column
header list from CSV file, and discard any which are not found
in the first list. The result becomes the new effective field
list.
""" """
path = self.get_input_file_path() path = self.get_input_file_path()
log.debug("opening input file: %s", path)
self.input_file = open(path, 'rt', encoding=self.csv_encoding) self.input_file = open(path, 'rt', encoding=self.csv_encoding)
self.input_reader = csv.DictReader(self.input_file) self.input_reader = csv.DictReader(self.input_file)
# nb. importer may have all supported fields by default, so
# must prune to the subset also present in the input file
fields = self.get_fields()
orientation = self.orientation.value
log.debug(f"supported fields for {orientation}: %s", fields)
self.fields = [f for f in self.input_reader.fieldnames or []
if f in fields]
log.debug("fields present in source data: %s", self.fields)
if not self.fields:
self.input_file.close()
raise ValueError("input file has no recognized fields")
def close_input_file(self): def close_input_file(self):
""" """ """ """
self.input_file.close() self.input_file.close()
@ -195,7 +222,7 @@ class FromCsvToSqlalchemyMixin:
}) })
class FromCsvToWutta(FromCsvToSqlalchemyMixin, ToWuttaHandler): class FromCsvToWutta(FromCsvToSqlalchemyMixin, FromFileHandler, ToWuttaHandler):
""" """
Handler for CSV Wutta :term:`app database` import. Handler for CSV Wutta :term:`app database` import.
""" """

View file

@ -25,6 +25,7 @@ Data Import / Export Handlers
""" """
import logging import logging
import os
from collections import OrderedDict from collections import OrderedDict
from enum import Enum from enum import Enum
@ -501,9 +502,28 @@ class ImportHandler(GenericHandler):
class FromFileHandler(ImportHandler): class FromFileHandler(ImportHandler):
""" """
Handler for import/export which uses an input file as data source. Handler for import/export which uses input file(s) as data source.
This handler assumes its importer/exporter classes inherit from
:class:`~wuttasync.importing.base.FromFile` for source parent
logic.
""" """
def process_data(self, *keys, **kwargs):
""" """
# interpret file vs. folder path
# nb. this assumes FromFile importer/exporter
path = kwargs.pop('input_file_path', None)
if path:
if not kwargs.get('input_file_dir') and os.path.isdir(path):
kwargs['input_file_dir'] = path
else:
kwargs['input_file_path'] = path
# and carry on
super().process_data(*keys, **kwargs)
class ToSqlalchemyHandler(ImportHandler): class ToSqlalchemyHandler(ImportHandler):
""" """

0
tests/cli/__init__.py Normal file
View file

0
tests/cli/example.conf Normal file
View file

38
tests/cli/test_base.py Normal file
View file

@ -0,0 +1,38 @@
#-*- coding: utf-8; -*-
import inspect
from unittest import TestCase
from wuttasync.cli import base as mod
class TestImporterCommand(TestCase):
def test_basic(self):
def myfunc(ctx, **kwargs):
pass
sig1 = inspect.signature(myfunc)
self.assertIn('kwargs', sig1.parameters)
self.assertNotIn('dry_run', sig1.parameters)
wrapt = mod.importer_command(myfunc)
sig2 = inspect.signature(wrapt)
self.assertNotIn('kwargs', sig2.parameters)
self.assertIn('dry_run', sig2.parameters)
class TestFileImporterCommand(TestCase):
def test_basic(self):
def myfunc(ctx, **kwargs):
pass
sig1 = inspect.signature(myfunc)
self.assertIn('kwargs', sig1.parameters)
self.assertNotIn('dry_run', sig1.parameters)
self.assertNotIn('input_file_path', sig1.parameters)
wrapt = mod.file_importer_command(myfunc)
sig2 = inspect.signature(wrapt)
self.assertNotIn('kwargs', sig2.parameters)
self.assertIn('dry_run', sig2.parameters)
self.assertIn('input_file_path', sig2.parameters)

View file

@ -0,0 +1,24 @@
#-*- coding: utf-8; -*-
import os
from unittest import TestCase
from unittest.mock import MagicMock, patch
from wuttasync.cli import import_csv as mod
from wuttasync.importing.csv import FromCsvToWutta
here = os.path.dirname(__file__)
example_conf = os.path.join(here, 'example.conf')
class TestImportCsv(TestCase):
def test_basic(self):
ctx = MagicMock(params={'models': [],
'create': True, 'update': True, 'delete': False,
'dry_run': True})
with patch.object(FromCsvToWutta, 'process_data') as process_data:
mod.import_csv(ctx)
process_data.assert_called_once_with(create=True, update=True, delete=False,
dry_run=True)

View file

@ -14,6 +14,12 @@ class TestFromCsv(DataTestCase):
self.setup_db() self.setup_db()
self.handler = ImportHandler(self.config) self.handler = ImportHandler(self.config)
self.data_path = self.write_file('data.txt', """\
name,value
foo,bar
foo2,bar2
""")
def make_importer(self, **kwargs): def make_importer(self, **kwargs):
kwargs.setdefault('handler', self.handler) kwargs.setdefault('handler', self.handler)
return mod.FromCsv(self.config, **kwargs) return mod.FromCsv(self.config, **kwargs)
@ -33,19 +39,36 @@ class TestFromCsv(DataTestCase):
model = self.app.model model = self.app.model
imp = self.make_importer(model_class=model.Setting) imp = self.make_importer(model_class=model.Setting)
path = self.write_file('data.txt', '') # normal operation, input file includes all fields
imp.input_file_path = path imp = self.make_importer(model_class=model.Setting, input_file_path=self.data_path)
self.assertEqual(imp.fields, ['name', 'value'])
imp.open_input_file() imp.open_input_file()
self.assertEqual(imp.input_file.name, path) self.assertEqual(imp.input_file.name, self.data_path)
self.assertIsInstance(imp.input_reader, csv.DictReader) self.assertIsInstance(imp.input_reader, csv.DictReader)
self.assertEqual(imp.fields, ['name', 'value'])
imp.input_file.close() imp.input_file.close()
# this file is missing a field, plus we'll pretend more are
# supported - but should wind up with just the one field
missing = self.write_file('missing.txt', 'name')
imp = self.make_importer(model_class=model.Setting, input_file_path=missing)
imp.fields.extend(['lots', 'more'])
self.assertEqual(imp.fields, ['name', 'value', 'lots', 'more'])
imp.open_input_file()
self.assertEqual(imp.fields, ['name'])
imp.input_file.close()
# and what happens when no known fields are found
bogus = self.write_file('bogus.txt', 'blarg')
imp = self.make_importer(model_class=model.Setting, input_file_path=bogus)
self.assertEqual(imp.fields, ['name', 'value'])
self.assertRaises(ValueError, imp.open_input_file)
def test_close_input_file(self): def test_close_input_file(self):
model = self.app.model model = self.app.model
imp = self.make_importer(model_class=model.Setting) imp = self.make_importer(model_class=model.Setting)
path = self.write_file('data.txt', '') imp.input_file_path = self.data_path
imp.input_file_path = path
imp.open_input_file() imp.open_input_file()
imp.close_input_file() imp.close_input_file()
self.assertFalse(hasattr(imp, 'input_reader')) self.assertFalse(hasattr(imp, 'input_reader'))
@ -55,12 +78,7 @@ class TestFromCsv(DataTestCase):
model = self.app.model model = self.app.model
imp = self.make_importer(model_class=model.Setting) imp = self.make_importer(model_class=model.Setting)
path = self.write_file('data.csv', """\ imp.input_file_path = self.data_path
name,value
foo,bar
foo2,bar2
""")
imp.input_file_path = path
imp.open_input_file() imp.open_input_file()
objects = imp.get_source_objects() objects = imp.get_source_objects()
imp.close_input_file() imp.close_input_file()

View file

@ -173,6 +173,31 @@ class TestImportHandler(DataTestCase):
self.assertRaises(KeyError, handler.get_importer, 'BunchOfNonsense', model_class=model.Setting) self.assertRaises(KeyError, handler.get_importer, 'BunchOfNonsense', model_class=model.Setting)
class TestFromFileHandler(DataTestCase):
def make_handler(self, **kwargs):
return mod.FromFileHandler(self.config, **kwargs)
def test_process_data(self):
handler = self.make_handler()
path = self.write_file('data.txt', '')
with patch.object(mod.ImportHandler, 'process_data') as process_data:
# bare
handler.process_data()
process_data.assert_called_once_with()
# with file path
process_data.reset_mock()
handler.process_data(input_file_path=path)
process_data.assert_called_once_with(input_file_path=path)
# with folder
process_data.reset_mock()
handler.process_data(input_file_path=self.tempdir)
process_data.assert_called_once_with(input_file_dir=self.tempdir)
class TestToSqlalchemyHandler(DataTestCase): class TestToSqlalchemyHandler(DataTestCase):
def make_handler(self, **kwargs): def make_handler(self, **kwargs):