Add per-status timeouts and tracking for probe status

i.e. this lets us keep track of when a probe becomes "high temp" and then later
if it becomes "critical high temp" we can still know how long it's been high
This commit is contained in:
Lance Edgar 2018-10-19 14:58:30 -05:00
parent 8be64c0580
commit 19553edda6
6 changed files with 324 additions and 41 deletions

View file

@ -0,0 +1,51 @@
# -*- coding: utf-8; -*-
"""add more timeouts
Revision ID: b02c531caca5
Revises: 5f2b87474433
Create Date: 2018-10-19 13:51:54.422490
"""
from __future__ import unicode_literals, absolute_import
# revision identifiers, used by Alembic.
revision = 'b02c531caca5'
down_revision = u'5f2b87474433'
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
import rattail.db.types
def upgrade():
# probe
op.add_column('probe', sa.Column('critical_max_started', sa.DateTime(), nullable=True))
op.add_column('probe', sa.Column('critical_max_timeout', sa.Integer(), nullable=True))
op.add_column('probe', sa.Column('critical_min_started', sa.DateTime(), nullable=True))
op.add_column('probe', sa.Column('critical_min_timeout', sa.Integer(), nullable=True))
op.add_column('probe', sa.Column('error_started', sa.DateTime(), nullable=True))
op.add_column('probe', sa.Column('error_timeout', sa.Integer(), nullable=True))
op.add_column('probe', sa.Column('good_max_started', sa.DateTime(), nullable=True))
op.add_column('probe', sa.Column('good_max_timeout', sa.Integer(), nullable=True))
op.add_column('probe', sa.Column('good_min_started', sa.DateTime(), nullable=True))
op.add_column('probe', sa.Column('good_min_timeout', sa.Integer(), nullable=True))
def downgrade():
# probe
op.drop_column('probe', 'good_min_timeout')
op.drop_column('probe', 'good_min_started')
op.drop_column('probe', 'good_max_timeout')
op.drop_column('probe', 'good_max_started')
op.drop_column('probe', 'error_timeout')
op.drop_column('probe', 'error_started')
op.drop_column('probe', 'critical_min_timeout')
op.drop_column('probe', 'critical_min_started')
op.drop_column('probe', 'critical_max_timeout')
op.drop_column('probe', 'critical_max_started')

View file

@ -2,7 +2,7 @@
################################################################################
#
# Rattail -- Retail Software Framework
# Copyright © 2010-2017 Lance Edgar
# Copyright © 2010-2018 Lance Edgar
#
# This file is part of Rattail.
#
@ -33,6 +33,7 @@ import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.ext.declarative import declarative_base
from rattail import enum
from rattail.db.model import uuid_column
from rattail.db.model.core import ModelBase
@ -130,11 +131,88 @@ class Probe(Base):
device_path = sa.Column(sa.String(length=255), nullable=True)
enabled = sa.Column(sa.Boolean(), nullable=False, default=True)
good_temp_min = sa.Column(sa.Integer(), nullable=False)
good_temp_max = sa.Column(sa.Integer(), nullable=False)
critical_temp_min = sa.Column(sa.Integer(), nullable=False)
critical_temp_max = sa.Column(sa.Integer(), nullable=False)
critical_temp_max = sa.Column(sa.Integer(), nullable=False, doc="""
Maximum high temperature; when a reading is greater than or equal to this
value, the probe's status becomes "critical high temp".
""")
critical_max_started = sa.Column(sa.DateTime(), nullable=True, doc="""
Timestamp when the probe readings started to indicate "critical high temp"
status. This should be null unless the probe currently has that status.
""")
critical_max_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
Number of minutes the probe is allowed to have "critical high temp" status,
before the first email alert is sent for that. If empty, there will be no
delay and the first email will go out as soon as that status is reached.
If set, should probably be a *low* number.
""")
good_temp_max = sa.Column(sa.Integer(), nullable=False, doc="""
Maximum good temperature; when a reading is greater than or equal to this
value, the probe's status becomes "high temp" (unless the reading also
breaches the :attr:`critical_temp_max` threshold).
""")
good_max_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
Number of minutes the probe is allowed to have "high temp" status, before
the first email alert is sent for that. This is typically meant to account
for the length of the defrost cycle, so may be a rather large number.
""")
good_max_started = sa.Column(sa.DateTime(), nullable=True, doc="""
Timestamp when the probe readings started to indicate "high temp" status.
This should be null unless the probe currently has either "high temp" or
"critical high temp" status.
""")
good_temp_min = sa.Column(sa.Integer(), nullable=False, doc="""
Minimum good temperature; when a reading is less than or equal to this
value, the probe's status becomes "low temp" (unless the reading also
breaches the :attr:`critical_temp_min` threshold).
""")
good_min_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
Number of minutes the probe is allowed to have "low temp" status, before
the first email alert is sent for that.
""")
good_min_started = sa.Column(sa.DateTime(), nullable=True, doc="""
Timestamp when the probe readings started to indicate "low temp" status.
This should be null unless the probe currently has either "low temp" or
"critical low temp" status.
""")
critical_temp_min = sa.Column(sa.Integer(), nullable=False, doc="""
Minimum low temperature; when a reading is less than or equal to this
value, the probe's status becomes "critical low temp". If empty, there
will be no delay and the first email will go out as soon as that status is
reached.
""")
critical_min_started = sa.Column(sa.DateTime(), nullable=True, doc="""
Timestamp when the probe readings started to indicate "critical low temp"
status. This should be null unless the probe currently has that status.
""")
critical_min_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
Number of minutes the probe is allowed to have "critical low temp" status,
before the first email alert is sent for that. If empty, there will be no
delay and the first email will go out as soon as that status is reached.
""")
error_started = sa.Column(sa.DateTime(), nullable=True, doc="""
Timestamp when the probe readings started to indicate "error" status. This
should be null unless the probe currently has that status.
""")
error_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
Number of minutes the probe is allowed to have "error" status, before the
first email alert is sent for that. If empty, there will be no delay and
the first email will go out as soon as that status is reached.
""")
# TODO: deprecate / remove this
therm_status_timeout = sa.Column(sa.Integer(), nullable=False, doc="""
Number of minutes the temperature is allowed to be "high" before the first
"high temp" email alert is sent. This is typically meant to account for
@ -159,6 +237,90 @@ class Probe(Base):
def __str__(self):
return self.description
def start_status(self, status, time):
"""
Update the "started" timestamp field for the given status. This is
used to track e.g. when we cross the "high temp" threshold, as a
separate event from when the "critical high temp" threshold is reached.
Note that in addition to setting the appropriate timestamp field, this
also will clear out other timestamp fields, according to the specific
(new) status.
"""
if status in (enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP):
self.critical_max_started = time
# note, we don't clear out "high temp" time
self.good_min_started = None
self.critical_min_started = None
self.error_started = None
elif status == enum.TEMPMON_PROBE_STATUS_HIGH_TEMP:
self.critical_max_started = None
self.good_max_started = time
self.good_min_started = None
self.critical_min_started = None
self.error_started = None
elif status == enum.TEMPMON_PROBE_STATUS_LOW_TEMP:
self.critical_max_started = None
self.good_max_started = None
self.good_min_started = time
self.critical_min_started = None
self.error_started = None
elif status == enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP:
self.critical_max_started = None
self.good_max_started = None
# note, we don't clear out "low temp" time
self.critical_min_started = time
self.error_started = None
elif status == enum.TEMPMON_PROBE_STATUS_ERROR:
# note, we don't clear out any other status times
self.error_started = time
def status_started(self, status):
"""
Return the timestamp indicating when the given status started.
"""
if status in (enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP):
return self.critical_max_started
elif status == enum.TEMPMON_PROBE_STATUS_HIGH_TEMP:
return self.good_max_started
elif status == enum.TEMPMON_PROBE_STATUS_LOW_TEMP:
return self.good_min_started
elif status == enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP:
return self.critical_min_started
elif status == enum.TEMPMON_PROBE_STATUS_ERROR:
return self.error_started
def timeout_for_status(self, status):
"""
Returns the timeout value for the given status. This is be the number
of minutes by which we should delay the initial email for the status.
"""
if status in (enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP):
return self.critical_max_timeout
elif status == enum.TEMPMON_PROBE_STATUS_HIGH_TEMP:
return self.good_max_timeout or self.therm_status_timeout
elif status == enum.TEMPMON_PROBE_STATUS_LOW_TEMP:
return self.good_min_timeout or self.therm_status_timeout
elif status == enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP:
return self.critical_min_timeout
elif status == enum.TEMPMON_PROBE_STATUS_ERROR:
return self.error_timeout
@six.python_2_unicode_compatible
class Reading(Base):