Add per-status timeouts and tracking for probe status
i.e. this lets us keep track of when a probe becomes "high temp" and then later if it becomes "critical high temp" we can still know how long it's been high
This commit is contained in:
parent
8be64c0580
commit
19553edda6
|
@ -0,0 +1,51 @@
|
||||||
|
# -*- coding: utf-8; -*-
|
||||||
|
"""add more timeouts
|
||||||
|
|
||||||
|
Revision ID: b02c531caca5
|
||||||
|
Revises: 5f2b87474433
|
||||||
|
Create Date: 2018-10-19 13:51:54.422490
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import unicode_literals, absolute_import
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'b02c531caca5'
|
||||||
|
down_revision = u'5f2b87474433'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
import rattail.db.types
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
|
||||||
|
# probe
|
||||||
|
op.add_column('probe', sa.Column('critical_max_started', sa.DateTime(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('critical_max_timeout', sa.Integer(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('critical_min_started', sa.DateTime(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('critical_min_timeout', sa.Integer(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('error_started', sa.DateTime(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('error_timeout', sa.Integer(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('good_max_started', sa.DateTime(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('good_max_timeout', sa.Integer(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('good_min_started', sa.DateTime(), nullable=True))
|
||||||
|
op.add_column('probe', sa.Column('good_min_timeout', sa.Integer(), nullable=True))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
|
||||||
|
# probe
|
||||||
|
op.drop_column('probe', 'good_min_timeout')
|
||||||
|
op.drop_column('probe', 'good_min_started')
|
||||||
|
op.drop_column('probe', 'good_max_timeout')
|
||||||
|
op.drop_column('probe', 'good_max_started')
|
||||||
|
op.drop_column('probe', 'error_timeout')
|
||||||
|
op.drop_column('probe', 'error_started')
|
||||||
|
op.drop_column('probe', 'critical_min_timeout')
|
||||||
|
op.drop_column('probe', 'critical_min_started')
|
||||||
|
op.drop_column('probe', 'critical_max_timeout')
|
||||||
|
op.drop_column('probe', 'critical_max_started')
|
|
@ -2,7 +2,7 @@
|
||||||
################################################################################
|
################################################################################
|
||||||
#
|
#
|
||||||
# Rattail -- Retail Software Framework
|
# Rattail -- Retail Software Framework
|
||||||
# Copyright © 2010-2017 Lance Edgar
|
# Copyright © 2010-2018 Lance Edgar
|
||||||
#
|
#
|
||||||
# This file is part of Rattail.
|
# This file is part of Rattail.
|
||||||
#
|
#
|
||||||
|
@ -33,6 +33,7 @@ import sqlalchemy as sa
|
||||||
from sqlalchemy import orm
|
from sqlalchemy import orm
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
|
from rattail import enum
|
||||||
from rattail.db.model import uuid_column
|
from rattail.db.model import uuid_column
|
||||||
from rattail.db.model.core import ModelBase
|
from rattail.db.model.core import ModelBase
|
||||||
|
|
||||||
|
@ -130,11 +131,88 @@ class Probe(Base):
|
||||||
device_path = sa.Column(sa.String(length=255), nullable=True)
|
device_path = sa.Column(sa.String(length=255), nullable=True)
|
||||||
enabled = sa.Column(sa.Boolean(), nullable=False, default=True)
|
enabled = sa.Column(sa.Boolean(), nullable=False, default=True)
|
||||||
|
|
||||||
good_temp_min = sa.Column(sa.Integer(), nullable=False)
|
critical_temp_max = sa.Column(sa.Integer(), nullable=False, doc="""
|
||||||
good_temp_max = sa.Column(sa.Integer(), nullable=False)
|
Maximum high temperature; when a reading is greater than or equal to this
|
||||||
critical_temp_min = sa.Column(sa.Integer(), nullable=False)
|
value, the probe's status becomes "critical high temp".
|
||||||
critical_temp_max = sa.Column(sa.Integer(), nullable=False)
|
""")
|
||||||
|
|
||||||
|
critical_max_started = sa.Column(sa.DateTime(), nullable=True, doc="""
|
||||||
|
Timestamp when the probe readings started to indicate "critical high temp"
|
||||||
|
status. This should be null unless the probe currently has that status.
|
||||||
|
""")
|
||||||
|
|
||||||
|
critical_max_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
|
||||||
|
Number of minutes the probe is allowed to have "critical high temp" status,
|
||||||
|
before the first email alert is sent for that. If empty, there will be no
|
||||||
|
delay and the first email will go out as soon as that status is reached.
|
||||||
|
If set, should probably be a *low* number.
|
||||||
|
""")
|
||||||
|
|
||||||
|
good_temp_max = sa.Column(sa.Integer(), nullable=False, doc="""
|
||||||
|
Maximum good temperature; when a reading is greater than or equal to this
|
||||||
|
value, the probe's status becomes "high temp" (unless the reading also
|
||||||
|
breaches the :attr:`critical_temp_max` threshold).
|
||||||
|
""")
|
||||||
|
|
||||||
|
good_max_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
|
||||||
|
Number of minutes the probe is allowed to have "high temp" status, before
|
||||||
|
the first email alert is sent for that. This is typically meant to account
|
||||||
|
for the length of the defrost cycle, so may be a rather large number.
|
||||||
|
""")
|
||||||
|
|
||||||
|
good_max_started = sa.Column(sa.DateTime(), nullable=True, doc="""
|
||||||
|
Timestamp when the probe readings started to indicate "high temp" status.
|
||||||
|
This should be null unless the probe currently has either "high temp" or
|
||||||
|
"critical high temp" status.
|
||||||
|
""")
|
||||||
|
|
||||||
|
good_temp_min = sa.Column(sa.Integer(), nullable=False, doc="""
|
||||||
|
Minimum good temperature; when a reading is less than or equal to this
|
||||||
|
value, the probe's status becomes "low temp" (unless the reading also
|
||||||
|
breaches the :attr:`critical_temp_min` threshold).
|
||||||
|
""")
|
||||||
|
|
||||||
|
good_min_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
|
||||||
|
Number of minutes the probe is allowed to have "low temp" status, before
|
||||||
|
the first email alert is sent for that.
|
||||||
|
""")
|
||||||
|
|
||||||
|
good_min_started = sa.Column(sa.DateTime(), nullable=True, doc="""
|
||||||
|
Timestamp when the probe readings started to indicate "low temp" status.
|
||||||
|
This should be null unless the probe currently has either "low temp" or
|
||||||
|
"critical low temp" status.
|
||||||
|
""")
|
||||||
|
|
||||||
|
critical_temp_min = sa.Column(sa.Integer(), nullable=False, doc="""
|
||||||
|
Minimum low temperature; when a reading is less than or equal to this
|
||||||
|
value, the probe's status becomes "critical low temp". If empty, there
|
||||||
|
will be no delay and the first email will go out as soon as that status is
|
||||||
|
reached.
|
||||||
|
""")
|
||||||
|
|
||||||
|
critical_min_started = sa.Column(sa.DateTime(), nullable=True, doc="""
|
||||||
|
Timestamp when the probe readings started to indicate "critical low temp"
|
||||||
|
status. This should be null unless the probe currently has that status.
|
||||||
|
""")
|
||||||
|
|
||||||
|
critical_min_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
|
||||||
|
Number of minutes the probe is allowed to have "critical low temp" status,
|
||||||
|
before the first email alert is sent for that. If empty, there will be no
|
||||||
|
delay and the first email will go out as soon as that status is reached.
|
||||||
|
""")
|
||||||
|
|
||||||
|
error_started = sa.Column(sa.DateTime(), nullable=True, doc="""
|
||||||
|
Timestamp when the probe readings started to indicate "error" status. This
|
||||||
|
should be null unless the probe currently has that status.
|
||||||
|
""")
|
||||||
|
|
||||||
|
error_timeout = sa.Column(sa.Integer(), nullable=True, doc="""
|
||||||
|
Number of minutes the probe is allowed to have "error" status, before the
|
||||||
|
first email alert is sent for that. If empty, there will be no delay and
|
||||||
|
the first email will go out as soon as that status is reached.
|
||||||
|
""")
|
||||||
|
|
||||||
|
# TODO: deprecate / remove this
|
||||||
therm_status_timeout = sa.Column(sa.Integer(), nullable=False, doc="""
|
therm_status_timeout = sa.Column(sa.Integer(), nullable=False, doc="""
|
||||||
Number of minutes the temperature is allowed to be "high" before the first
|
Number of minutes the temperature is allowed to be "high" before the first
|
||||||
"high temp" email alert is sent. This is typically meant to account for
|
"high temp" email alert is sent. This is typically meant to account for
|
||||||
|
@ -159,6 +237,90 @@ class Probe(Base):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.description
|
return self.description
|
||||||
|
|
||||||
|
def start_status(self, status, time):
|
||||||
|
"""
|
||||||
|
Update the "started" timestamp field for the given status. This is
|
||||||
|
used to track e.g. when we cross the "high temp" threshold, as a
|
||||||
|
separate event from when the "critical high temp" threshold is reached.
|
||||||
|
|
||||||
|
Note that in addition to setting the appropriate timestamp field, this
|
||||||
|
also will clear out other timestamp fields, according to the specific
|
||||||
|
(new) status.
|
||||||
|
"""
|
||||||
|
if status in (enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
|
||||||
|
enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP):
|
||||||
|
self.critical_max_started = time
|
||||||
|
# note, we don't clear out "high temp" time
|
||||||
|
self.good_min_started = None
|
||||||
|
self.critical_min_started = None
|
||||||
|
self.error_started = None
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_HIGH_TEMP:
|
||||||
|
self.critical_max_started = None
|
||||||
|
self.good_max_started = time
|
||||||
|
self.good_min_started = None
|
||||||
|
self.critical_min_started = None
|
||||||
|
self.error_started = None
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_LOW_TEMP:
|
||||||
|
self.critical_max_started = None
|
||||||
|
self.good_max_started = None
|
||||||
|
self.good_min_started = time
|
||||||
|
self.critical_min_started = None
|
||||||
|
self.error_started = None
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP:
|
||||||
|
self.critical_max_started = None
|
||||||
|
self.good_max_started = None
|
||||||
|
# note, we don't clear out "low temp" time
|
||||||
|
self.critical_min_started = time
|
||||||
|
self.error_started = None
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_ERROR:
|
||||||
|
# note, we don't clear out any other status times
|
||||||
|
self.error_started = time
|
||||||
|
|
||||||
|
def status_started(self, status):
|
||||||
|
"""
|
||||||
|
Return the timestamp indicating when the given status started.
|
||||||
|
"""
|
||||||
|
if status in (enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
|
||||||
|
enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP):
|
||||||
|
return self.critical_max_started
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_HIGH_TEMP:
|
||||||
|
return self.good_max_started
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_LOW_TEMP:
|
||||||
|
return self.good_min_started
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP:
|
||||||
|
return self.critical_min_started
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_ERROR:
|
||||||
|
return self.error_started
|
||||||
|
|
||||||
|
def timeout_for_status(self, status):
|
||||||
|
"""
|
||||||
|
Returns the timeout value for the given status. This is be the number
|
||||||
|
of minutes by which we should delay the initial email for the status.
|
||||||
|
"""
|
||||||
|
if status in (enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
|
||||||
|
enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP):
|
||||||
|
return self.critical_max_timeout
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_HIGH_TEMP:
|
||||||
|
return self.good_max_timeout or self.therm_status_timeout
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_LOW_TEMP:
|
||||||
|
return self.good_min_timeout or self.therm_status_timeout
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP:
|
||||||
|
return self.critical_min_timeout
|
||||||
|
|
||||||
|
elif status == enum.TEMPMON_PROBE_STATUS_ERROR:
|
||||||
|
return self.error_timeout
|
||||||
|
|
||||||
|
|
||||||
@six.python_2_unicode_compatible
|
@six.python_2_unicode_compatible
|
||||||
class Reading(Base):
|
class Reading(Base):
|
||||||
|
|
|
@ -54,6 +54,30 @@ class TempmonBase(object):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class tempmon_critical_high_temp(TempmonBase, Email):
|
||||||
|
"""
|
||||||
|
Sent when a tempmon probe takes a "critical high" temperature reading.
|
||||||
|
"""
|
||||||
|
default_subject = "CRITICAL HIGH Temperature"
|
||||||
|
|
||||||
|
def sample_data(self, request):
|
||||||
|
data = super(tempmon_critical_high_temp, self).sample_data(request)
|
||||||
|
data['status'] = self.enum.TEMPMON_PROBE_STATUS[self.enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class tempmon_critical_low_temp(TempmonBase, Email):
|
||||||
|
"""
|
||||||
|
Sent when a tempmon probe takes a "critical low" temperature reading.
|
||||||
|
"""
|
||||||
|
default_subject = "CRITICAL LOW Temperature"
|
||||||
|
|
||||||
|
def sample_data(self, request):
|
||||||
|
data = super(tempmon_critical_low_temp, self).sample_data(request)
|
||||||
|
data['status'] = self.enum.TEMPMON_PROBE_STATUS[self.enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
class tempmon_critical_temp(TempmonBase, Email):
|
class tempmon_critical_temp(TempmonBase, Email):
|
||||||
"""
|
"""
|
||||||
Sent when a tempmon probe takes a reading which is "critical" in either the
|
Sent when a tempmon probe takes a reading which is "critical" in either the
|
||||||
|
@ -92,7 +116,7 @@ class tempmon_good_temp(TempmonBase, Email):
|
||||||
Sent whenever a tempmon probe first takes a "good temp" reading, after
|
Sent whenever a tempmon probe first takes a "good temp" reading, after
|
||||||
having previously had some bad reading(s).
|
having previously had some bad reading(s).
|
||||||
"""
|
"""
|
||||||
default_subject = "Good temperature detected"
|
default_subject = "OK Temperature"
|
||||||
|
|
||||||
def sample_data(self, request):
|
def sample_data(self, request):
|
||||||
data = super(tempmon_good_temp, self).sample_data(request)
|
data = super(tempmon_good_temp, self).sample_data(request)
|
||||||
|
@ -105,7 +129,7 @@ class tempmon_high_temp(TempmonBase, Email):
|
||||||
Sent when a tempmon probe takes a reading which is above the "maximum good
|
Sent when a tempmon probe takes a reading which is above the "maximum good
|
||||||
temp" range, but still below the "critically high temp" threshold.
|
temp" range, but still below the "critically high temp" threshold.
|
||||||
"""
|
"""
|
||||||
default_subject = "High temperature detected"
|
default_subject = "HIGH Temperature"
|
||||||
|
|
||||||
def sample_data(self, request):
|
def sample_data(self, request):
|
||||||
data = super(tempmon_high_temp, self).sample_data(request)
|
data = super(tempmon_high_temp, self).sample_data(request)
|
||||||
|
@ -118,7 +142,7 @@ class tempmon_low_temp(TempmonBase, Email):
|
||||||
Sent when a tempmon probe takes a reading which is below the "minimum good
|
Sent when a tempmon probe takes a reading which is below the "minimum good
|
||||||
temp" range, but still above the "critically low temp" threshold.
|
temp" range, but still above the "critically low temp" threshold.
|
||||||
"""
|
"""
|
||||||
default_subject = "Low temperature detected"
|
default_subject = "LOW Temperature"
|
||||||
|
|
||||||
def sample_data(self, request):
|
def sample_data(self, request):
|
||||||
data = super(tempmon_low_temp, self).sample_data(request)
|
data = super(tempmon_low_temp, self).sample_data(request)
|
||||||
|
|
|
@ -124,7 +124,7 @@ class TempmonServerDaemon(Daemon):
|
||||||
cutoff = self.now - datetime.timedelta(seconds=delay + 60)
|
cutoff = self.now - datetime.timedelta(seconds=delay + 60)
|
||||||
online = False
|
online = False
|
||||||
for probe in client.enabled_probes():
|
for probe in client.enabled_probes():
|
||||||
if self.check_readings_for_probe(session, probe, cutoff) and not online:
|
if self.check_readings_for_probe(session, probe, cutoff):
|
||||||
online = True
|
online = True
|
||||||
|
|
||||||
# if client was previously marked online, but we have no "new"
|
# if client was previously marked online, but we have no "new"
|
||||||
|
@ -147,7 +147,6 @@ class TempmonServerDaemon(Daemon):
|
||||||
'now': localtime(self.config, self.now, from_utc=True),
|
'now': localtime(self.config, self.now, from_utc=True),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def check_readings_for_probe(self, session, probe, cutoff):
|
def check_readings_for_probe(self, session, probe, cutoff):
|
||||||
"""
|
"""
|
||||||
Check readings for the given probe, within the time window defined by
|
Check readings for the given probe, within the time window defined by
|
||||||
|
@ -161,18 +160,21 @@ class TempmonServerDaemon(Daemon):
|
||||||
.first()
|
.first()
|
||||||
if reading:
|
if reading:
|
||||||
|
|
||||||
# is reading below critical min, or above critical max?
|
# is reading above critical max?
|
||||||
if (reading.degrees_f <= probe.critical_temp_min or
|
if reading.degrees_f >= probe.critical_temp_max:
|
||||||
reading.degrees_f >= probe.critical_temp_max):
|
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP, reading)
|
||||||
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP, reading)
|
|
||||||
|
|
||||||
# is reading below "good" min?
|
# is reading above good max?
|
||||||
elif reading.degrees_f < probe.good_temp_min:
|
elif reading.degrees_f >= probe.good_temp_max:
|
||||||
|
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_HIGH_TEMP, reading)
|
||||||
|
|
||||||
|
# is reading below good min?
|
||||||
|
elif reading.degrees_f <= probe.good_temp_min:
|
||||||
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_LOW_TEMP, reading)
|
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_LOW_TEMP, reading)
|
||||||
|
|
||||||
# is reading above "good" max?
|
# is reading below critical min?
|
||||||
elif reading.degrees_f > probe.good_temp_max:
|
elif reading.degrees_f <= probe.critical_temp_min:
|
||||||
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_HIGH_TEMP, reading)
|
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP, reading)
|
||||||
|
|
||||||
else: # temp is good
|
else: # temp is good
|
||||||
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_GOOD_TEMP, reading)
|
self.update_status(probe, self.enum.TEMPMON_PROBE_STATUS_GOOD_TEMP, reading)
|
||||||
|
@ -196,13 +198,25 @@ class TempmonServerDaemon(Daemon):
|
||||||
prev_alert_sent = probe.status_alert_sent
|
prev_alert_sent = probe.status_alert_sent
|
||||||
if probe.status != status:
|
if probe.status != status:
|
||||||
probe.status = status
|
probe.status = status
|
||||||
|
probe.start_status(status, self.now)
|
||||||
probe.status_changed = self.now
|
probe.status_changed = self.now
|
||||||
probe.status_alert_sent = None
|
probe.status_alert_sent = None
|
||||||
|
|
||||||
# send email when things go back to normal, after being bad
|
# send "high temp" email if previous status was critical, even if
|
||||||
|
# we haven't been high for that long overall
|
||||||
|
if (status == self.enum.TEMPMON_PROBE_STATUS_HIGH_TEMP
|
||||||
|
and prev_status in (self.enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP,
|
||||||
|
self.enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP)
|
||||||
|
and prev_alert_sent):
|
||||||
|
send_email(self.config, 'tempmon_high_temp', data)
|
||||||
|
probe.status_alert_sent = self.now
|
||||||
|
return
|
||||||
|
|
||||||
|
# send email when things go back to normal (i.e. from any other status)
|
||||||
if status == self.enum.TEMPMON_PROBE_STATUS_GOOD_TEMP and prev_alert_sent:
|
if status == self.enum.TEMPMON_PROBE_STATUS_GOOD_TEMP and prev_alert_sent:
|
||||||
send_email(self.config, 'tempmon_good_temp', data)
|
send_email(self.config, 'tempmon_good_temp', data)
|
||||||
probe.status_alert_sent = self.now
|
probe.status_alert_sent = self.now
|
||||||
|
return
|
||||||
|
|
||||||
# no (more) email if status is good
|
# no (more) email if status is good
|
||||||
if status == self.enum.TEMPMON_PROBE_STATUS_GOOD_TEMP:
|
if status == self.enum.TEMPMON_PROBE_STATUS_GOOD_TEMP:
|
||||||
|
@ -215,19 +229,39 @@ class TempmonServerDaemon(Daemon):
|
||||||
return
|
return
|
||||||
|
|
||||||
# delay even the first email, until configured threshold is reached
|
# delay even the first email, until configured threshold is reached
|
||||||
# unless we have a critical status
|
timeout = probe.timeout_for_status(status) or 0
|
||||||
if status != self.enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP:
|
timeout = datetime.timedelta(minutes=timeout)
|
||||||
timeout = datetime.timedelta(minutes=probe.therm_status_timeout)
|
started = probe.status_started(status) or probe.status_changed
|
||||||
if (self.now - probe.status_changed) <= timeout:
|
if (self.now - started) <= timeout:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
msgtypes = {
|
||||||
|
self.enum.TEMPMON_PROBE_STATUS_CRITICAL_HIGH_TEMP : 'tempmon_critical_high_temp',
|
||||||
|
self.enum.TEMPMON_PROBE_STATUS_HIGH_TEMP : 'tempmon_high_temp',
|
||||||
|
self.enum.TEMPMON_PROBE_STATUS_LOW_TEMP : 'tempmon_low_temp',
|
||||||
|
self.enum.TEMPMON_PROBE_STATUS_CRITICAL_LOW_TEMP : 'tempmon_critical_low_temp',
|
||||||
|
self.enum.TEMPMON_PROBE_STATUS_ERROR : 'tempmon_error',
|
||||||
|
}
|
||||||
|
|
||||||
|
self.send_email(status, msgtypes[status], data)
|
||||||
|
|
||||||
|
# maybe send more emails if config said so
|
||||||
|
for msgtype in self.extra_emails:
|
||||||
|
self.send_email(status, msgtype, data)
|
||||||
|
|
||||||
|
probe.status_alert_sent = self.now
|
||||||
|
|
||||||
|
def send_email(self, status, template, data):
|
||||||
|
probe = data['probe']
|
||||||
|
started = probe.status_started(status) or probe.status_changed
|
||||||
|
|
||||||
# determine URL for probe, if possible
|
# determine URL for probe, if possible
|
||||||
url = self.config.get('tailbone', 'url.tempmon.probe', default='#')
|
url = self.config.get('tailbone', 'url.tempmon.probe', default='#')
|
||||||
data['probe_url'] = url.format(uuid=probe.uuid)
|
data['probe_url'] = url.format(uuid=probe.uuid)
|
||||||
|
|
||||||
since = localtime(self.config, probe.status_changed, from_utc=True)
|
since = localtime(self.config, started, from_utc=True)
|
||||||
data['status_since'] = since.strftime('%I:%M %p')
|
data['status_since'] = since.strftime('%I:%M %p')
|
||||||
data['status_since_delta'] = humanize.naturaltime(self.now - probe.status_changed)
|
data['status_since_delta'] = humanize.naturaltime(self.now - started)
|
||||||
|
|
||||||
# fetch last 90 minutes of readings
|
# fetch last 90 minutes of readings
|
||||||
session = orm.object_session(probe)
|
session = orm.object_session(probe)
|
||||||
|
@ -241,20 +275,7 @@ class TempmonServerDaemon(Daemon):
|
||||||
data['recent_readings'] = readings
|
data['recent_readings'] = readings
|
||||||
data['pretty_time'] = lambda dt: localtime(self.config, dt, from_utc=True).strftime('%Y-%m-%d %I:%M %p')
|
data['pretty_time'] = lambda dt: localtime(self.config, dt, from_utc=True).strftime('%Y-%m-%d %I:%M %p')
|
||||||
|
|
||||||
msgtypes = {
|
send_email(self.config, template, data)
|
||||||
self.enum.TEMPMON_PROBE_STATUS_LOW_TEMP : 'tempmon_low_temp',
|
|
||||||
self.enum.TEMPMON_PROBE_STATUS_HIGH_TEMP : 'tempmon_high_temp',
|
|
||||||
self.enum.TEMPMON_PROBE_STATUS_CRITICAL_TEMP : 'tempmon_critical_temp',
|
|
||||||
self.enum.TEMPMON_PROBE_STATUS_ERROR : 'tempmon_error',
|
|
||||||
}
|
|
||||||
|
|
||||||
send_email(self.config, msgtypes[status], data)
|
|
||||||
|
|
||||||
# maybe send more emails if config said so
|
|
||||||
for msgtype in self.extra_emails:
|
|
||||||
send_email(self.config, msgtype, data)
|
|
||||||
|
|
||||||
probe.status_alert_sent = self.now
|
|
||||||
|
|
||||||
|
|
||||||
def make_daemon(config, pidfile=None):
|
def make_daemon(config, pidfile=None):
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
## -*- coding: utf-8 -*-
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
<b>This is an alert from ${probe}!</b><br>
|
||||||
|
The status of ${probe} is: ${status}.<br>
|
||||||
|
The current temperature is: ${reading.degrees_f}.<br>
|
||||||
|
The temperature should never be this high.
|
||||||
|
Investigate Immediately!<br>
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Notes: <br>
|
||||||
|
Frozen food that is above 40 degrees needs to be thrown away<br>
|
||||||
|
if it remains at that temperature for two hours or more.<br>
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Check out <a href="http://www.fsis.usda.gov/wps/portal/fsis/topics/food-safety-education/get-answers/food-safety-fact-sheets/safe-food-handling/freezing-and-food-safety/CT_Index/!ut/p/a1/jZFRT8IwEIB_DY9dbw7J8G1ZYtiUTYJK2Qsp7NYt2dqlrU759RZ8UQJK-9LefV-ud6UFZbSQ_L0R3DZK8vZwLyYbWMDEn8aQ5lP_HpLsdZE_xDGEy1sHrP8AsuBK_8KK4D8_vaLAjZ7Hc0GLntuaNLJSlAm0hEszoDaUVUqVxPAK7Sep-M4SUyNalzjEyDFbc1m2jRQO1oh7d3J6SX6YlMXPm0SW-EFXtPj9KvDdTrJgOZ6lWQD5-BQ4M7Zv4PJcXOOiVdvjH60juQ1C16HGCjVq7027cG1tb-5GMIJhGDyhlGjR26nunFArYyk74fruhe0foxk0T90qNNEXiOIqAA!!/#16">this USDA link</a> for useful information
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This email will repeat every ${probe.status_alert_timeout} minutes until the issue<br>
|
||||||
|
has been resolved.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in a new issue