implement grace time for fails and permanent states

This commit is contained in:
Yannik Schmidt
2021-11-27 13:34:50 +01:00
parent 720bc9f25a
commit 41cbb00ce5

View File

@@ -28,7 +28,7 @@ class Service(db.Model):
class Status(db.Model): class Status(db.Model):
__tablename__ = "states" __tablename__ = "states"
service = Column(String, primary_key=True) service = Column(String, primary_key=True)
timestamp = Column(Integer) timestamp = Column(Integer, primary_key=True)
status = Column(String) status = Column(String)
info_text = Column(String) info_text = Column(String)
@@ -65,25 +65,42 @@ def default():
# check status # # check status #
response = None response = None
status = db.session.query(Status).filter(Status.service == serviceObj.service).first() query = db.session.query(Status).filter(Status.service == serviceObj.service)
if not status: lastSuccess = query.filter(Status.status == "OK").order_by(
return flask.jsonify(buildReponseDict(status)) sqlalchemy.desc(Status.timestamp)).first()
lastFail = query.filter(Status.status != "OK").order_by(
sqlalchemy.desc(Status.timestamp)).first()
if not lastSuccess and not lastFail:
return flask.jsonify(buildReponseDict(None))
elif not lastSuccess and lastFail:
return flask.jsonify(buildReponseDict(lastFail))
else: else:
# if status is ok check for timeout constrainsts, otherwise return last state # timeParsed = datetime.datetime.fromtimestamp(lastSuccess.timestamp)
if status.status == "OK": totalSeconds = (datetime.datetime.now() - timeParsed).total_seconds()
timeParsed = datetime.datetime.fromtimestamp(status.timestamp) delta = datetime.timedelta(seconds=int(totalSeconds))
totalSeconds = (datetime.datetime.now() - timeParsed).total_seconds() timeout = datetime.timedelta(seconds=serviceObj.timeout)
delta = datetime.timedelta(seconds=int(totalSeconds))
timeout = datetime.timedelta(seconds=serviceObj.timeout)
if not status.timestamp == 0 and timeout < delta:
status.info_text = "Service {} overdue since {}".format(service, str(delta))
if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12):
status.status = "WARNING"
else:
status.status = "CRITICAL"
return flask.jsonify(buildReponseDict(status)) latestInfoIsSuccess = not lastFail or lastFail.timestamp < lastSuccess.timestamp
if not lastSuccess.timestamp == 0 and delta > timeout and latestInfoIsSuccess:
# lastes info is success but timed out #
lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta))
if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12):
lastSuccess.status = "WARNING"
else:
lastSuccess.status = "CRITICAL"
return flask.jsonify(buildReponseDict(lastSuccess))
elif latestInfoIsSuccess:
return flask.jsonify(buildReponseDict(lastSuccess))
elif delta < timeout and not latestInfoIsSuccess:
return flask.jsonify(buildReponseDict(lastSuccess))
else:
return flask.jsonify(buildReponseDict(lastFail))
elif flask.request.method == "POST": elif flask.request.method == "POST":
@@ -94,12 +111,17 @@ def default():
text = flask.request.json["info"] text = flask.request.json["info"]
timestamp = datetime.datetime.now().timestamp() timestamp = datetime.datetime.now().timestamp()
if not service:
return ("'service' ist empty field in json", 400)
elif not token:
return ("'token' ist empty field in json", 400)
# verify token & service in config # # verify token & service in config #
verifiedServiceObj = db.session.query(Service).filter( verifiedServiceObj = db.session.query(Service).filter(
and_(Service.service == service, Service.token == token)).first() or_(Service.service == service, Service.token == token)).first()
if not verifiedServiceObj: if not verifiedServiceObj:
return ("Bad service name or token", 401) return ("Service with this token not found in DB", 401)
else: else:
status = Status(service=service, timestamp=timestamp, status=status, info_text=text) status = Status(service=service, timestamp=timestamp, status=status, info_text=text)
db.session.merge(status) db.session.merge(status)