diff --git a/rattail_tempmon/client.py b/rattail_tempmon/client.py index c3d8b63..2b154cb 100644 --- a/rattail_tempmon/client.py +++ b/rattail_tempmon/client.py @@ -1,8 +1,8 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8; -*- ################################################################################ # # Rattail -- Retail Software Framework -# Copyright © 2010-2017 Lance Edgar +# Copyright © 2010-2018 Lance Edgar # # This file is part of Rattail. # @@ -32,6 +32,8 @@ import random import socket import logging +import six +from sqlalchemy.exc import OperationalError from sqlalchemy.orm.exc import NoResultFound from rattail.daemon import Daemon @@ -71,6 +73,7 @@ class TempmonClient(Daemon): session.close() # main loop: take readings, pause, repeat + self.failed_checks = 0 while True: self.take_readings(client_uuid) time.sleep(self.delay) @@ -79,6 +82,7 @@ class TempmonClient(Daemon): """ Take new readings for all enabled probes on this client. """ + # log.debug("taking readings") session = Session() try: @@ -87,20 +91,43 @@ class TempmonClient(Daemon): if client.enabled: for probe in client.enabled_probes(): self.take_reading(session, probe) + session.flush() - except: - log.exception("Failed to read/record temperature data (but will keep trying)") - session.rollback() - - else: - # make sure we show as being online + # one more thing, make sure our client appears "online" if not client.online: client.online = True - try: - session.commit() - except: + + except Exception as error: + log_error = True + self.failed_checks += 1 + session.rollback() + + # our goal here is to suppress logging when we see connection + # errors which are due to a simple postgres restart. but if they + # keep coming then we'll go ahead and log them (sending email) + if isinstance(error, OperationalError): + + # this first test works upon first DB restart, as well as the + # first time after DB stop. but in the case of DB stop, + # subsequent errors will instead match the second test + if error.connection_invalidated or ( + 'could not connect to server: Connection refused' in six.text_type(error)): + + # only suppress logging for 3 failures, after that we let them go + # TODO: should make the max attempts configurable + if self.failed_checks < 4: + log_error = False + log.debug("database connection failure #%s: %s", + self.failed_checks, + six.text_type(error)) + + # send error email unless we're suppressing it for now + if log_error: log.exception("Failed to read/record temperature data (but will keep trying)") - session.rollback() + + else: # taking readings was successful + self.failed_checks = 0 + session.commit() finally: session.close()