Refactor Catapult export a bit, for sake of "duplicate" item_id

also improve certain warnings and exclusion rules
This commit is contained in:
Lance Edgar 2020-04-10 12:59:09 -05:00
parent 7e450e11b1
commit ae58b7c55a

View file

@ -24,6 +24,7 @@
CORE-POS -> Catapult Inventory Workbook CORE-POS -> Catapult Inventory Workbook
""" """
import re
import datetime import datetime
import logging import logging
@ -36,6 +37,7 @@ from corepos.db.office_op import model as corepos
from corepos.db.util import table_exists from corepos.db.util import table_exists
from rattail.gpc import GPC from rattail.gpc import GPC
from rattail.core import get_uuid
from rattail.util import OrderedDict from rattail.util import OrderedDict
from rattail.importing.handlers import ToFileHandler from rattail.importing.handlers import ToFileHandler
from rattail_corepos.corepos.importing.db.corepos import FromCoreHandler, FromCore from rattail_corepos.corepos.importing.db.corepos import FromCoreHandler, FromCore
@ -64,7 +66,11 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
Inventory Item data importer. Inventory Item data importer.
""" """
host_model_class = corepos.Product host_model_class = corepos.Product
# note that we use a "dummy" uuid key here, so logic will consider each row
# to be unique, even when duplicate item_id's are present
key = 'uuid'
supported_fields = [ supported_fields = [
'uuid',
'item_id', 'item_id',
'dept_id', 'dept_id',
'dept_name', 'dept_name',
@ -116,15 +122,32 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
'scale_ingredient_text', 'scale_ingredient_text',
] ]
# we want to add a "duplicate" column at the end
include_duplicate_column = True
type2_upc_pattern = re.compile(r'^2(\d{5})00000\d')
def setup(self): def setup(self):
super(InventoryItemImporter, self).setup() super(InventoryItemImporter, self).setup()
# this is used for sorting, when a value has no date # this is used for sorting, when a value has no date
self.old_datetime = datetime.datetime(1900, 1, 1) self.old_datetime = datetime.datetime(1900, 1, 1)
self.exclude_invalid_upc = self.config.getbool(
'corepos', 'exporting.catapult_inventory.exclude_invalid_upc',
default=False)
self.warn_invalid_upc = self.config.getbool(
'corepos', 'exporting.catapult_inventory.warn_invalid_upc',
default=True)
self.ignored_upcs = self.config.getlist( self.ignored_upcs = self.config.getlist(
'corepos', 'exporting.catapult_inventory.ignored_upcs') 'corepos', 'exporting.catapult_inventory.ignored_upcs')
self.exclude_missing_department = self.config.getbool(
'corepos', 'exporting.catapult_inventory.exclude_missing_department',
default=False)
self.warn_missing_department = self.config.getbool( self.warn_missing_department = self.config.getbool(
'corepos', 'exporting.catapult_inventory.warn_missing_department', 'corepos', 'exporting.catapult_inventory.warn_missing_department',
default=True) default=True)
@ -145,6 +168,18 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
'corepos', 'exporting.catapult_inventory.warn_unknown_deposit', 'corepos', 'exporting.catapult_inventory.warn_unknown_deposit',
default=True) default=True)
self.warn_scale_label_non_plu = self.config.getbool(
'corepos', 'exporting.catapult_inventory.warn_scale_label_non_plu',
default=True)
self.warn_scale_label_short_plu = self.config.getbool(
'corepos', 'exporting.catapult_inventory.warn_scale_label_short_plu',
default=True)
self.warn_weight_profile_non_plu = self.config.getbool(
'corepos', 'exporting.catapult_inventory.warn_weight_profile_non_plu',
default=True)
self.warn_multiple_vendor_items = self.config.getbool( self.warn_multiple_vendor_items = self.config.getbool(
'corepos', 'exporting.catapult_inventory.warn_multiple_vendor_items', 'corepos', 'exporting.catapult_inventory.warn_multiple_vendor_items',
default=True) default=True)
@ -216,45 +251,113 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
.joinedload(corepos.ProductPhysicalLocation.floor_section)) .joinedload(corepos.ProductPhysicalLocation.floor_section))
return query return query
def normalize_host_data(self, host_objects=None):
normalized = super(InventoryItemImporter, self).normalize_host_data(host_objects=host_objects)
# re-sort the results by item_id, since e.g. original UPC from CORE may
# have been replaced with a PLU. also put non-numeric first, to bring
# them to user's attention
numeric = []
non_numeric = []
for row in normalized:
if row['item_id'] and row['item_id'].isdigit():
numeric.append(row)
else:
non_numeric.append(row)
numeric.sort(key=lambda row: int(row['item_id']))
non_numeric.sort(key=lambda row: row['item_id'])
normalized = non_numeric + numeric
# now we must check for duplicate item ids, and mark rows accordingly.
# but we *do* want to include/preserve all rows, hence we mark them
# instead of pruning some out. first step is to group all by item_id
items = {}
def collect(row, i):
items.setdefault(row['item_id'], []).append(row)
self.progress_loop(collect, normalized,
message="Grouping rows by Item ID")
# now we go through our groupings and for any item_id with more than 1
# row, we'll mark each row as having a duplicate item_id. note that
# this modifies such a row "in-place" for our overall return value
def inspect(rows, i):
if len(rows) > 1:
for row in rows:
row['__duplicate__'] = True
self.progress_loop(inspect, list(items.values()),
message="Marking any duplicate Item IDs")
return normalized
def normalize_host_object(self, product): def normalize_host_object(self, product):
item_id = product.upc item_id = product.upc
if not item_id:
log.warning("product id %s has no upc: %s",
product.id, product)
return
if not item_id.isdigit():
log.debug("product %s has non-numeric upc: %s",
product.upc, product)
return
if self.ignored_upcs and item_id in self.ignored_upcs: if self.ignored_upcs and item_id in self.ignored_upcs:
log.debug("ignoring UPC %s for product: %s", product.upc, product) log.debug("ignoring UPC %s for product: %s", product.upc, product)
return return
if not item_id:
logger = log.warning if self.warn_invalid_upc else log.debug
logger("product id %s has no upc: %s", product.id, product)
if self.exclude_invalid_upc:
return
if not item_id.isdigit():
logger = log.warning if self.warn_invalid_upc else log.debug
logger("product %s has non-numeric upc: %s",
product.upc, product)
if self.exclude_invalid_upc:
return
# convert item_id either to a PLU, or formatted UPC # convert item_id either to a PLU, or formatted UPC
is_plu = False is_plu = False
if len(str(int(item_id))) < 6: if item_id.isdigit(): # can only convert if it's numeric!
is_plu = True if len(str(int(item_id))) < 6:
item_id = str(int(item_id)) is_plu = True
else: # must add check digit, and re-format item_id = str(int(item_id))
upc = GPC(item_id, calc_check_digit='upc') else: # must add check digit, and re-format
item_id = str(upc) upc = GPC(item_id, calc_check_digit='upc')
assert len(item_id) == 14 item_id = str(upc)
# drop leading zero(s) assert len(item_id) == 14
if item_id[1] == '0': # UPC-A # drop leading zero(s)
item_id = item_id[2:] if item_id[1] == '0': # UPC-A
assert len(item_id) == 12 item_id = item_id[2:]
else: # EAN13 assert len(item_id) == 12
item_id = item_id[1:] else: # EAN13
assert len(item_id) == 13 item_id = item_id[1:]
assert len(item_id) == 13
# figure out the "scale label" data, which may also affect item_id
scale_item = product.scale_item
scale_label = None
if scale_item:
scale_label = 'Y'
if item_id.isdigit():
if len(item_id) < 5:
logger = log.warning if self.warn_scale_label_short_plu else log.debug
logger("product %s has scale label, but PLU is less than 5 digits (%s): %s",
product.upc, item_id, product)
elif len(item_id) > 5:
match = self.type2_upc_pattern.match(item_id)
if match:
# convert type-2 UPC to PLU
is_plu = True
item_id = str(int(match.group(1)))
log.debug("converted type-2 UPC %s to PLU %s for: %s",
product.upc, item_id, product)
else:
logger = log.warning if self.warn_scale_label_non_plu else log.debug
logger("product %s has scale label, but non-PLU item_id: %s",
product.upc, product)
department = product.department department = product.department
if not department: if not department:
logger = log.warning if self.warn_missing_department else log.debug logger = log.warning if self.warn_missing_department else log.debug
logger("product %s has no department: %s", product.upc, product) logger("product %s has no department: %s", product.upc, product)
return if self.exclude_missing_department:
return
size = product.size size = product.size
# TODO: this logic may actually be client-specific? i just happened to # TODO: this logic may actually be client-specific? i just happened to
@ -290,7 +393,11 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
sold_by_ea_or_lb = 'LB' if product.scale else 'EA' sold_by_ea_or_lb = 'LB' if product.scale else 'EA'
weight_profile = None weight_profile = None
if product.scale and len(item_id) == 12 and item_id[0] == '2': if product.scale or scale_item:
if not is_plu:
logger = log.warning if self.warn_weight_profile_non_plu else log.debug
logger("product %s has weight profile, but non-PLU item_id %s: %s",
product.upc, item_id, product)
weight_profile = 'LBNT' weight_profile = 'LBNT'
# calculate tax rates according to configured "mappings" # calculate tax rates according to configured "mappings"
@ -428,7 +535,6 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
product.upc, len(memo), memo) product.upc, len(memo), memo)
memo = memo[:254] memo = memo[:254]
scale_item = product.scale_item
scale_ingredient_text = None scale_ingredient_text = None
if scale_item: if scale_item:
scale_ingredient_text = scale_item.text scale_ingredient_text = scale_item.text
@ -439,9 +545,10 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
scale_ingredient_text = scale_ingredient_text.replace("\n", " ") scale_ingredient_text = scale_ingredient_text.replace("\n", " ")
return { return {
'uuid': get_uuid(),
'item_id': item_id, 'item_id': item_id,
'dept_id': department.number, 'dept_id': department.number if department else None,
'dept_name': department.name, 'dept_name': department.name if department else None,
'receipt_alias': product.description, 'receipt_alias': product.description,
'brand': product.brand, 'brand': product.brand,
'item_name': product.description, 'item_name': product.description,
@ -453,7 +560,7 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
'last_cost': product.cost, 'last_cost': product.cost,
'price_divider': price_divider, 'price_divider': price_divider,
'base_price': product.normal_price, 'base_price': product.normal_price,
'ideal_margin': department.margin * 100 if department.margin else None, 'ideal_margin': department.margin * 100 if department and department.margin else None,
# TODO: does CORE have these? # TODO: does CORE have these?
# 'disc_mult': None, # 'disc_mult': None,
@ -463,7 +570,7 @@ class InventoryItemImporter(FromCore, catapult_importing.model.InventoryItemImpo
# TODO: does CORE have this? # TODO: does CORE have this?
# 'pos_menu_group': None, # 'pos_menu_group': None,
'scale_label': 'Y' if scale_item else None, 'scale_label': scale_label,
'sold_by_ea_or_lb': sold_by_ea_or_lb, 'sold_by_ea_or_lb': sold_by_ea_or_lb,
'quantity_required': 'Y' if product.quantity_enforced else None, 'quantity_required': 'Y' if product.quantity_enforced else None,
'weight_profile': weight_profile, 'weight_profile': weight_profile,