Refactor Catapult export a bit, for sake of "duplicate" item_id

also improve certain warnings and exclusion rules
2020-04-10 12:59:09 -05:00 · 2020-04-10 12:59:09 -05:00 · ae58b7c55a
commit ae58b7c55a
parent 7e450e11b1
1 changed files with 138 additions and 31 deletions
--- a/rattail_corepos/corepos/importing/db/exporters/catapult_inventory.py
+++ b/rattail_corepos/corepos/importing/db/exporters/catapult_inventory.py
@ -24,6 +24,7 @@
 CORE-POS -> Catapult Inventory Workbook
 """
 import re
 import datetime
 import logging
@ -36,6 +37,7 @@ from corepos.db.office_op import model as corepos
 from corepos.db.util import table_exists
 from rattail.gpc import GPC
 from rattail.core import get_uuid
 from rattail.util import OrderedDict
 from rattail.importing.handlers import ToFileHandler
 from rattail_corepos.corepos.importing.db.corepos import FromCoreHandler, FromCore
@ -64,7 +66,11 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
    Inventory Item data importer.
    """
    host_model_class = corepos.Product
    # note that we use a "dummy" uuid key here, so logic will consider each row
    # to be unique, even when duplicate item_id's are present
    key = 'uuid'
    supported_fields = [
        'uuid',
        'item_id',
        'dept_id',
        'dept_name',
@ -116,15 +122,32 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
        'scale_ingredient_text',
    ]
    # we want to add a "duplicate" column at the end
    include_duplicate_column = True
    type2_upc_pattern = re.compile(r'^2(\d{5})00000\d')
    def setup(self):
        super(InventoryItemImporter, self).setup()
        # this is used for sorting, when a value has no date
        self.old_datetime = datetime.datetime(1900, 1, 1)
        self.exclude_invalid_upc = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.exclude_invalid_upc',
            default=False)
        self.warn_invalid_upc = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.warn_invalid_upc',
            default=True)
        self.ignored_upcs = self.config.getlist(
            'corepos', 'exporting.catapult_inventory.ignored_upcs')
        self.exclude_missing_department = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.exclude_missing_department',
            default=False)
        self.warn_missing_department = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.warn_missing_department',
            default=True)
@ -145,6 +168,18 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
            'corepos', 'exporting.catapult_inventory.warn_unknown_deposit',
            default=True)
        self.warn_scale_label_non_plu = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.warn_scale_label_non_plu',
            default=True)
        self.warn_scale_label_short_plu = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.warn_scale_label_short_plu',
            default=True)
        self.warn_weight_profile_non_plu = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.warn_weight_profile_non_plu',
            default=True)
        self.warn_multiple_vendor_items = self.config.getbool(
            'corepos', 'exporting.catapult_inventory.warn_multiple_vendor_items',
            default=True)
@ -216,45 +251,113 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
                                  .joinedload(corepos.ProductPhysicalLocation.floor_section))
        return query
    def normalize_host_data(self, host_objects=None):
        normalized = super(InventoryItemImporter, self).normalize_host_data(host_objects=host_objects)
        # re-sort the results by item_id, since e.g. original UPC from CORE may
        # have been replaced with a PLU.  also put non-numeric first, to bring
        # them to user's attention
        numeric = []
        non_numeric = []
        for row in normalized:
            if row['item_id'] and row['item_id'].isdigit():
                numeric.append(row)
            else:
                non_numeric.append(row)
        numeric.sort(key=lambda row: int(row['item_id']))
        non_numeric.sort(key=lambda row: row['item_id'])
        normalized = non_numeric + numeric
        # now we must check for duplicate item ids, and mark rows accordingly.
        # but we *do* want to include/preserve all rows, hence we mark them
        # instead of pruning some out. first step is to group all by item_id
        items = {}
        def collect(row, i):
            items.setdefault(row['item_id'], []).append(row)
        self.progress_loop(collect, normalized,
                           message="Grouping rows by Item ID")
        # now we go through our groupings and for any item_id with more than 1
        # row, we'll mark each row as having a duplicate item_id.  note that
        # this modifies such a row "in-place" for our overall return value
        def inspect(rows, i):
            if len(rows) > 1:
                for row in rows:
                    row['__duplicate__'] = True
        self.progress_loop(inspect, list(items.values()),
                           message="Marking any duplicate Item IDs")
        return normalized
    def normalize_host_object(self, product):
        item_id = product.upc
        if not item_id:
            log.warning("product id %s has no upc: %s",
                        product.id, product)
            return
        if not item_id.isdigit():
            log.debug("product %s has non-numeric upc: %s",
                      product.upc, product)
            return
        if self.ignored_upcs and item_id in self.ignored_upcs:
            log.debug("ignoring UPC %s for product: %s", product.upc, product)
            return
        if not item_id:
            logger = log.warning if self.warn_invalid_upc else log.debug
            logger("product id %s has no upc: %s", product.id, product)
            if self.exclude_invalid_upc:
                return
        if not item_id.isdigit():
            logger = log.warning if self.warn_invalid_upc else log.debug
            logger("product %s has non-numeric upc: %s",
                   product.upc, product)
            if self.exclude_invalid_upc:
                return
        # convert item_id either to a PLU, or formatted UPC
        is_plu = False
-        if len(str(int(item_id))) < 6:
+        if item_id.isdigit():   # can only convert if it's numeric!
-            is_plu = True
+            if len(str(int(item_id))) < 6:
-            item_id = str(int(item_id))
+                is_plu = True
-        else: # must add check digit, and re-format
+                item_id = str(int(item_id))
-            upc = GPC(item_id, calc_check_digit='upc')
+            else: # must add check digit, and re-format
-            item_id = str(upc)
+                upc = GPC(item_id, calc_check_digit='upc')
-            assert len(item_id) == 14
+                item_id = str(upc)
-            # drop leading zero(s)
+                assert len(item_id) == 14
-            if item_id[1] == '0': # UPC-A
+                # drop leading zero(s)
-                item_id = item_id[2:]
+                if item_id[1] == '0': # UPC-A
-                assert len(item_id) == 12
+                    item_id = item_id[2:]
-            else: # EAN13
+                    assert len(item_id) == 12
-                item_id = item_id[1:]
+                else: # EAN13
-                assert len(item_id) == 13
+                    item_id = item_id[1:]
                    assert len(item_id) == 13
        # figure out the "scale label" data, which may also affect item_id
        scale_item = product.scale_item
        scale_label = None
        if scale_item:
            scale_label = 'Y'
            if item_id.isdigit():
                if len(item_id) < 5:
                    logger = log.warning if self.warn_scale_label_short_plu else log.debug
                    logger("product %s has scale label, but PLU is less than 5 digits (%s): %s",
                           product.upc, item_id, product)
                elif len(item_id) > 5:
                    match = self.type2_upc_pattern.match(item_id)
                    if match:
                        # convert type-2 UPC to PLU
                        is_plu = True
                        item_id = str(int(match.group(1)))
                        log.debug("converted type-2 UPC %s to PLU %s for: %s",
                                  product.upc, item_id, product)
                    else:
                        logger = log.warning if self.warn_scale_label_non_plu else log.debug
                        logger("product %s has scale label, but non-PLU item_id: %s",
                               product.upc, product)
        department = product.department
        if not department:
            logger = log.warning if self.warn_missing_department else log.debug
            logger("product %s has no department: %s", product.upc, product)
-            return
+            if self.exclude_missing_department:
                return
        size = product.size
        # TODO: this logic may actually be client-specific?  i just happened to
@ -290,7 +393,11 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
            sold_by_ea_or_lb = 'LB' if product.scale else 'EA'
        weight_profile = None
-        if product.scale and len(item_id) == 12 and item_id[0] == '2':
+        if product.scale or scale_item:
            if not is_plu:
                logger = log.warning if self.warn_weight_profile_non_plu else log.debug
                logger("product %s has weight profile, but non-PLU item_id %s: %s",
                       product.upc, item_id, product)
            weight_profile = 'LBNT'
        # calculate tax rates according to configured "mappings"
@ -428,7 +535,6 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
                   product.upc, len(memo), memo)
            memo = memo[:254]
        scale_item = product.scale_item
        scale_ingredient_text = None
        if scale_item:
            scale_ingredient_text = scale_item.text
@ -439,9 +545,10 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
                scale_ingredient_text = scale_ingredient_text.replace("\n", " ")
        return {
            'uuid': get_uuid(),
            'item_id': item_id,
-            'dept_id': department.number,
+            'dept_id': department.number if department else None,
-            'dept_name': department.name,
+            'dept_name': department.name if department else None,
            'receipt_alias': product.description,
            'brand': product.brand,
            'item_name': product.description,
@ -453,7 +560,7 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
            'last_cost': product.cost,
            'price_divider': price_divider,
            'base_price': product.normal_price,
-            'ideal_margin': department.margin * 100 if department.margin else None,
+            'ideal_margin': department.margin * 100 if department and department.margin else None,
            # TODO: does CORE have these?
            # 'disc_mult': None,
@ -463,7 +570,7 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
            # TODO: does CORE have this?
            # 'pos_menu_group': None,
-            'scale_label': 'Y' if scale_item else None,
+            'scale_label': scale_label,
            'sold_by_ea_or_lb': sold_by_ea_or_lb,
            'quantity_required': 'Y' if product.quantity_enforced else None,
            'weight_profile': weight_profile,