mirror of
https://github.com/FAUSheppy/icinga-webhook-gateway
synced 2025-12-06 07:21:38 +01:00
feat: add SMART monitoring support
This commit is contained in:
141
server.py
141
server.py
@@ -12,7 +12,7 @@ import secrets
|
|||||||
|
|
||||||
import flask_wtf
|
import flask_wtf
|
||||||
from flask_wtf import FlaskForm
|
from flask_wtf import FlaskForm
|
||||||
from wtforms import StringField, SubmitField, BooleanField, DecimalField, HiddenField
|
from wtforms import StringField, SubmitField, BooleanField, DecimalField, HiddenField, SelectField
|
||||||
from wtforms.validators import DataRequired, Length
|
from wtforms.validators import DataRequired, Length
|
||||||
|
|
||||||
from sqlalchemy import Column, Integer, String, Boolean, or_, and_
|
from sqlalchemy import Column, Integer, String, Boolean, or_, and_
|
||||||
@@ -24,6 +24,7 @@ from flask_sqlalchemy import SQLAlchemy
|
|||||||
from sqlalchemy.sql.expression import func
|
from sqlalchemy.sql.expression import func
|
||||||
|
|
||||||
import icingatools
|
import icingatools
|
||||||
|
import smarttools
|
||||||
|
|
||||||
app = flask.Flask("Icinga Report In Gateway")
|
app = flask.Flask("Icinga Report In Gateway")
|
||||||
|
|
||||||
@@ -41,6 +42,7 @@ class Service(db.Model):
|
|||||||
token = Column(String)
|
token = Column(String)
|
||||||
timeout = Column(Integer)
|
timeout = Column(Integer)
|
||||||
owner = Column(String)
|
owner = Column(String)
|
||||||
|
special_type = Column(String)
|
||||||
|
|
||||||
staticly_configured = Column(Boolean)
|
staticly_configured = Column(Boolean)
|
||||||
|
|
||||||
@@ -57,6 +59,22 @@ class Status(db.Model):
|
|||||||
dt = datetime.datetime.fromtimestamp(self.timestamp)
|
dt = datetime.datetime.fromtimestamp(self.timestamp)
|
||||||
return dt.strftime("%d. %B %Y at %H:%M")
|
return dt.strftime("%d. %B %Y at %H:%M")
|
||||||
|
|
||||||
|
class SMARTStatus(db.Model):
|
||||||
|
|
||||||
|
__tablename__ = "smart"
|
||||||
|
|
||||||
|
service = Column(String, primary_key=True)
|
||||||
|
timestamp = Column(Integer, primary_key=True)
|
||||||
|
power_cycles = Column(Integer)
|
||||||
|
temperature = Column(Integer)
|
||||||
|
available_spare = Column(Integer)
|
||||||
|
unsafe_shutdowns = Column(Integer)
|
||||||
|
critical_warning = Column(Integer)
|
||||||
|
model_number = Column(String)
|
||||||
|
power_cycles = Column(Integer)
|
||||||
|
power_on_hours = Column(Integer)
|
||||||
|
wearleveling_count = Column(Integer)
|
||||||
|
|
||||||
def buildReponseDict(status, service=None):
|
def buildReponseDict(status, service=None):
|
||||||
|
|
||||||
if not status:
|
if not status:
|
||||||
@@ -108,6 +126,7 @@ class EntryForm(FlaskForm):
|
|||||||
|
|
||||||
service = StringField("Service Name")
|
service = StringField("Service Name")
|
||||||
service_hidden = HiddenField("service_hidden")
|
service_hidden = HiddenField("service_hidden")
|
||||||
|
special_type = SelectField("Type", choices=["Default", "SMART"])
|
||||||
timeout = DecimalField("Timeout in days", default=30)
|
timeout = DecimalField("Timeout in days", default=30)
|
||||||
|
|
||||||
def create_entry(form, user):
|
def create_entry(form, user):
|
||||||
@@ -124,8 +143,13 @@ def create_entry(form, user):
|
|||||||
raise AssertionError("WTF Service without Token {}".format(service_name))
|
raise AssertionError("WTF Service without Token {}".format(service_name))
|
||||||
|
|
||||||
day_delta = datetime.timedelta(days=int(form.timeout.data))
|
day_delta = datetime.timedelta(days=int(form.timeout.data))
|
||||||
|
|
||||||
|
special_type = form.special_type.data
|
||||||
|
if form.special_type == "Default":
|
||||||
|
special_type = None
|
||||||
|
|
||||||
service = Service(service=service_name, timeout=day_delta.total_seconds(),
|
service = Service(service=service_name, timeout=day_delta.total_seconds(),
|
||||||
owner=user, token=token)
|
owner=user, token=token, special_type=special_type)
|
||||||
|
|
||||||
# service.data set = create, service_hidden.data = modify #
|
# service.data set = create, service_hidden.data = modify #
|
||||||
if form.service.data:
|
if form.service.data:
|
||||||
@@ -155,8 +179,11 @@ def service_details():
|
|||||||
icinga_link = icingatools.build_icinga_link_for_service(user, service.service,
|
icinga_link = icingatools.build_icinga_link_for_service(user, service.service,
|
||||||
service.staticly_configured, app)
|
service.staticly_configured, app)
|
||||||
|
|
||||||
|
smart_entry_list = db.session.query(SMARTStatus).filter(SMARTStatus.service==service.service)
|
||||||
|
smart_entry = smart_entry_list.order_by(SMARTStatus.timestamp.desc()).first()
|
||||||
|
|
||||||
return flask.render_template("service_info.html", service=service, flask=flask,
|
return flask.render_template("service_info.html", service=service, flask=flask,
|
||||||
user=user, status_list=status_list, icinga_link=icinga_link)
|
user=user, status_list=status_list, icinga_link=icinga_link, smart=smart_entry)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/entry-form", methods=["GET", "POST", "DELETE"])
|
@app.route("/entry-form", methods=["GET", "POST", "DELETE"])
|
||||||
@@ -165,7 +192,7 @@ def create_interface():
|
|||||||
user = str(flask.request.headers.get("X-Forwarded-Preferred-Username"))
|
user = str(flask.request.headers.get("X-Forwarded-Preferred-Username"))
|
||||||
|
|
||||||
# check if is delete #
|
# check if is delete #
|
||||||
operation = flask.request.args.get("operation")
|
operation = flask.request.args.get("operation")
|
||||||
if operation and operation == "delete" :
|
if operation and operation == "delete" :
|
||||||
|
|
||||||
service_delete_name = flask.request.args.get("service")
|
service_delete_name = flask.request.args.get("service")
|
||||||
@@ -182,13 +209,14 @@ def create_interface():
|
|||||||
return flask.redirect("/overview")
|
return flask.redirect("/overview")
|
||||||
|
|
||||||
form = EntryForm()
|
form = EntryForm()
|
||||||
|
|
||||||
# handle modification #
|
# handle modification #
|
||||||
modify_service_name = flask.request.args.get("service")
|
modify_service_name = flask.request.args.get("service")
|
||||||
if modify_service_name:
|
if modify_service_name:
|
||||||
service = db.session.query(Service).filter(Service.service == modify_service_name).first()
|
service = db.session.query(Service).filter(Service.service == modify_service_name).first()
|
||||||
if service and service.owner == user:
|
if service and service.owner == user:
|
||||||
form.service.default = service.service
|
form.service.default = service.service
|
||||||
|
form.special_type.default = service.special_type
|
||||||
form.timeout.default = datetime.timedelta(seconds=service.timeout).days
|
form.timeout.default = datetime.timedelta(seconds=service.timeout).days
|
||||||
form.service_hidden.default = service.service
|
form.service_hidden.default = service.service
|
||||||
form.process()
|
form.process()
|
||||||
@@ -259,7 +287,7 @@ def default():
|
|||||||
if not lastSuccess.timestamp == 0 and delta > timeout and latestInfoIsSuccess:
|
if not lastSuccess.timestamp == 0 and delta > timeout and latestInfoIsSuccess:
|
||||||
|
|
||||||
# lastes info is success but timed out #
|
# lastes info is success but timed out #
|
||||||
lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta))
|
lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta))
|
||||||
if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12):
|
if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12):
|
||||||
lastSuccess.status = "WARNING"
|
lastSuccess.status = "WARNING"
|
||||||
else:
|
else:
|
||||||
@@ -277,16 +305,27 @@ def default():
|
|||||||
elif flask.request.method == "POST":
|
elif flask.request.method == "POST":
|
||||||
|
|
||||||
# get variables #
|
# get variables #
|
||||||
service = flask.request.json["service"]
|
service = flask.request.json.get("service")
|
||||||
token = flask.request.json["token"]
|
token = flask.request.json.get("token")
|
||||||
status = flask.request.json["status"]
|
status = flask.request.json.get("status")
|
||||||
text = flask.request.json["info"]
|
text = flask.request.json.get("info") or "no_info"
|
||||||
timestamp = datetime.datetime.now().timestamp()
|
timestamp = datetime.datetime.now().timestamp()
|
||||||
|
|
||||||
|
smart = flask.request.json.get("smart")
|
||||||
|
|
||||||
|
# check smart json quoting problems #
|
||||||
|
if smart and type(smart) == str:
|
||||||
|
try:
|
||||||
|
smart = json.loads(smart)
|
||||||
|
except json.decoder.JSONDecodeError as e:
|
||||||
|
return ("Error in SMART-json {}".format(e), 415)
|
||||||
|
|
||||||
if not service:
|
if not service:
|
||||||
return ("'service' ist empty field in json", 400)
|
return ("'service' ist empty field in json", 400)
|
||||||
elif not token:
|
elif not token:
|
||||||
return ("'token' ist empty field in json", 400)
|
return ("'token' ist empty field in json", 400)
|
||||||
|
elif not status and not smart:
|
||||||
|
return ("'status' is empty field in json", 400)
|
||||||
|
|
||||||
# verify token & service in config #
|
# verify token & service in config #
|
||||||
verifiedServiceObj = db.session.query(Service).filter(
|
verifiedServiceObj = db.session.query(Service).filter(
|
||||||
@@ -295,15 +334,93 @@ def default():
|
|||||||
if not verifiedServiceObj:
|
if not verifiedServiceObj:
|
||||||
return ("Service ({}) with this token ({}) not found in DB".format(service, token), 401)
|
return ("Service ({}) with this token ({}) not found in DB".format(service, token), 401)
|
||||||
else:
|
else:
|
||||||
status = Status(service=service, timestamp=timestamp, status=status, info_text=text)
|
|
||||||
|
# handle a SMART-record submission (with errorhandling) #
|
||||||
|
if smart and not verifiedServiceObj.special_type == "SMART":
|
||||||
|
return ("SMART Field for non-SMART type service", 415)
|
||||||
|
elif smart:
|
||||||
|
text, status = record_and_check_smart(verifiedServiceObj,
|
||||||
|
timestamp, smart)
|
||||||
|
|
||||||
|
status = Status(service=service, timestamp=timestamp, status=status,
|
||||||
|
info_text=text)
|
||||||
db.session.merge(status)
|
db.session.merge(status)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return ("", 204)
|
return ("", 204)
|
||||||
else:
|
else:
|
||||||
return ("Method not implemented: {}".format(flask.request.method), 405)
|
return ("Method not implemented: {}".format(flask.request.method), 405)
|
||||||
|
|
||||||
|
def record_and_check_smart(service, timestamp, smart):
|
||||||
|
|
||||||
|
if "nvme_smart_health_information_log" in smart:
|
||||||
|
health_info = smart["nvme_smart_health_information_log"]
|
||||||
|
else:
|
||||||
|
health_info = smarttools.normalize(smart)
|
||||||
|
|
||||||
|
if not service.special_type == "SMART":
|
||||||
|
raise AssertionError("Trying to record SMART-record for non-SMART service")
|
||||||
|
|
||||||
|
# record the status #
|
||||||
|
smart_status = SMARTStatus(service=service.service, timestamp=timestamp,
|
||||||
|
temperature=health_info["temperature"],
|
||||||
|
critical_warning=health_info["critical_warning"],
|
||||||
|
unsafe_shutdowns=health_info["unsafe_shutdowns"],
|
||||||
|
power_cycles=health_info["power_cycles"],
|
||||||
|
power_on_hours=health_info["power_on_hours"],
|
||||||
|
available_spare=health_info.get("available_spare"),
|
||||||
|
model_number=smart.get("model_name"),
|
||||||
|
wearleveling_count=health_info.get("wearleveling_count"))
|
||||||
|
|
||||||
|
db.session.add(smart_status)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
# check the status #
|
||||||
|
smart_last_query = db.session.query(SMARTStatus)
|
||||||
|
smart_last_query = smart_last_query.filter(SMARTStatus.service==service.service)
|
||||||
|
smart_last = smart_last_query.order_by(sqlalchemy.desc(SMARTStatus.timestamp)).first()
|
||||||
|
smart_second_last = smart_last_query.order_by(sqlalchemy.desc(
|
||||||
|
SMARTStatus.timestamp)).offset(1).first()
|
||||||
|
|
||||||
|
# last record (max 6 months ago) #
|
||||||
|
timestampt_minus_6m = datetime.datetime.now() - datetime.timedelta(days=180)
|
||||||
|
smart_old_query = smart_last_query.filter(
|
||||||
|
SMARTStatus.timestamp > timestampt_minus_6m.timestamp())
|
||||||
|
smart_old = smart_old_query.order_by(sqlalchemy.asc(SMARTStatus.timestamp)).first()
|
||||||
|
|
||||||
|
# critial != 0 #
|
||||||
|
if smart_last.critical_warning != 0:
|
||||||
|
return ("SMART reports disk critical => oO better do something about this", "CRITICAL")
|
||||||
|
|
||||||
|
# wearleveling < 20% (SAMSUNG only) #
|
||||||
|
if smart_last.wearleveling_count and smart_last.wearleveling_count <= 20:
|
||||||
|
return ("SMART report prefail disk (wear_level < 20%)", "CRITICAL")
|
||||||
|
|
||||||
|
# temp max > X #
|
||||||
|
if smart_last.temperature > 50:
|
||||||
|
return ("Disk Temperatur {}".format(smart_last.temperature), "CRITICAL")
|
||||||
|
|
||||||
|
# available_SSD spare #
|
||||||
|
spare_change = smart_old.available_spare - smart_last.available_spare
|
||||||
|
|
||||||
|
if smart_last.available_spare <= 25:
|
||||||
|
return ("SSD spare <25 ({}) YOUR DISK WILL DIE SOON".format(spare_change),
|
||||||
|
"CRITICAL")
|
||||||
|
elif smart_last.available_spare <= 50:
|
||||||
|
return ("SSD spare <50 ({})".format(spare_change), "WARNING")
|
||||||
|
elif spare_change >= 10:
|
||||||
|
return ("Strong degration in SSD spare space ({} in under 6 months)".format(
|
||||||
|
spare_change), "WARNING")
|
||||||
|
|
||||||
|
# unsafe_shutdowns +1 #
|
||||||
|
if smart_second_last.unsafe_shutdowns - smart_last.unsafe_shutdowns >= 1:
|
||||||
|
return ("Disk had {} unsafe shutdowns".format(smart_last.unsafe_shutdowns),
|
||||||
|
"WARNING")
|
||||||
|
|
||||||
|
return ("{} - no problems detected".format(smart_last.model_number), "OK")
|
||||||
|
|
||||||
|
|
||||||
def create_app():
|
def create_app():
|
||||||
|
|
||||||
db.create_all()
|
db.create_all()
|
||||||
config = {}
|
config = {}
|
||||||
|
|
||||||
|
|||||||
49
smarttools.py
Normal file
49
smarttools.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
def normalize(smart):
|
||||||
|
'''Load different types of SMART outputs'''
|
||||||
|
|
||||||
|
ret = dict()
|
||||||
|
ret.update({ "temperature" : 0 })
|
||||||
|
ret.update({ "critical_warning" : 0 })
|
||||||
|
ret.update({ "unsafe_shutdowns" : 0 })
|
||||||
|
ret.update({ "power_cycles" : 0 })
|
||||||
|
ret.update({ "power_on_hours" : 0 })
|
||||||
|
ret.update({ "available_spare" : 100 })
|
||||||
|
ret.update({ "wearleveling_count" : 100 })
|
||||||
|
|
||||||
|
if "ata_smart_attributes" in smart:
|
||||||
|
|
||||||
|
# get main table #
|
||||||
|
table = smart["ata_smart_attributes"]["table"]
|
||||||
|
|
||||||
|
# temperatur #
|
||||||
|
ret["temperature"] = smart["temperature"]["current"]
|
||||||
|
|
||||||
|
for el in table:
|
||||||
|
|
||||||
|
# look for relevant metrics #
|
||||||
|
name = el["name"].lower()
|
||||||
|
target_name = el["name"].lower() # name in return map
|
||||||
|
|
||||||
|
# handle value mapping #
|
||||||
|
use_raw = False
|
||||||
|
if name == "used_rsvd_blk_cnt_tot":
|
||||||
|
target_name = "available_spare"
|
||||||
|
elif name == "power_cylce_count":
|
||||||
|
target_name = "power_cycles"
|
||||||
|
use_raw = True
|
||||||
|
elif name == "power_on_hours":
|
||||||
|
target_name = "power_on_hours"
|
||||||
|
use_raw = True
|
||||||
|
|
||||||
|
# check if metric should be recorded #
|
||||||
|
if target_name in ret:
|
||||||
|
|
||||||
|
# set return dict #
|
||||||
|
if use_raw:
|
||||||
|
value = el["raw"]["value"]
|
||||||
|
else:
|
||||||
|
value = el["value"]
|
||||||
|
|
||||||
|
ret[target_name] = value
|
||||||
|
|
||||||
|
return ret
|
||||||
@@ -155,6 +155,22 @@ body{
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.smart-info{
|
||||||
|
font-family: monospace;
|
||||||
|
|
||||||
|
padding-top: 2px;
|
||||||
|
padding-top: 2px;
|
||||||
|
padding-left: 5px;
|
||||||
|
|
||||||
|
padding-left: 5px;
|
||||||
|
|
||||||
|
color: black;
|
||||||
|
|
||||||
|
border: none;
|
||||||
|
outline: none;
|
||||||
|
cursor: auto;
|
||||||
|
}
|
||||||
|
|
||||||
.box{
|
.box{
|
||||||
border-style: solid;
|
border-style: solid;
|
||||||
border-width: 1px;
|
border-width: 1px;
|
||||||
|
|||||||
@@ -37,6 +37,9 @@
|
|||||||
</br>
|
</br>
|
||||||
{{ form.timeout.label }} {{ form.timeout() }} </br>
|
{{ form.timeout.label }} {{ form.timeout() }} </br>
|
||||||
|
|
||||||
|
</br>
|
||||||
|
{{ form.special_type.label }} {{ form.special_type() }} </br>
|
||||||
|
|
||||||
{% if is_modification %}
|
{% if is_modification %}
|
||||||
<input class="form-button mt-4" type="submit" value="Send Modification">
|
<input class="form-button mt-4" type="submit" value="Send Modification">
|
||||||
{% else %}
|
{% else %}
|
||||||
|
|||||||
@@ -71,8 +71,39 @@
|
|||||||
class="service-token">Secret Token: {{ service.token }}</div>
|
class="service-token">Secret Token: {{ service.token }}</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if service.special_type == "SMART" %}
|
||||||
|
<div class="clear smart-info mt-3" style="background-color: orange;">
|
||||||
|
Smart Monitor {% if smart %} for: {{ smart.model_number }} {% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="clear smart-info mt-3" style="background-color: orange;">
|
||||||
|
Example below requires smartmontools ("smartctl") in PATH.
|
||||||
|
On Linux this is usually available via the package manager,
|
||||||
|
on Windows install it from the
|
||||||
|
<a style="text-decoration: underline; color: #5000e1; font-weight: bold;" href="https://www.smartmontools.org/wiki/Download#InstalltheWindowspackage">offical page</a>.
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if smart %}
|
||||||
|
<h5 class="clear my-4">Linux</h5>
|
||||||
|
{% else %}
|
||||||
<h5 class="clear my-4">Curl</h5>
|
<h5 class="clear my-4">Curl</h5>
|
||||||
|
{% endif %}
|
||||||
<div class="ml-3 example">
|
<div class="ml-3 example">
|
||||||
|
{% if smart %}
|
||||||
|
SMART='{ <br>
|
||||||
|
<div class="example-indent">
|
||||||
|
"service" : "{{ service.service }}", <br>
|
||||||
|
"token" : "{{ service.token }}", <br>
|
||||||
|
"status" : "N/A", <br>
|
||||||
|
"smart" : '$(/sbin/smartctl -a /dev/nvme0n1 --json)' <br>
|
||||||
|
</div>
|
||||||
|
}' <br><br>
|
||||||
|
curl -X POST -H "Content-Type: application/json" \ <br>
|
||||||
|
<div class="example-indent">
|
||||||
|
--data "${SMART}" \ <br>
|
||||||
|
{{ flask.request.url_root.replace("http://", "https://" )}}report
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
curl -X POST \ <br>
|
curl -X POST \ <br>
|
||||||
<div class="example-indent">
|
<div class="example-indent">
|
||||||
-H "Content-Type: application/json" \ <br>
|
-H "Content-Type: application/json" \ <br>
|
||||||
@@ -81,8 +112,23 @@
|
|||||||
"status" : "OK", "info" : "Free Text Information here" }' \<br>
|
"status" : "OK", "info" : "Free Text Information here" }' \<br>
|
||||||
{{ flask.request.url_root.replace("http://", "https://" )}}report
|
{{ flask.request.url_root.replace("http://", "https://" )}}report
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if smart %}
|
||||||
|
<h5 class="my-4">Windows</h5>
|
||||||
|
<div class="ml-3 example">
|
||||||
|
$SMART = @{ <br>
|
||||||
|
<div class="example-indent">
|
||||||
|
service = "{{ service.service }}"<br>
|
||||||
|
token = "{{ service.token }}"<br>
|
||||||
|
status = "N/A"<br>
|
||||||
|
smart = "$(smartctl -a C: --json | Out-String)"<br>
|
||||||
|
</div>
|
||||||
|
} | ConvertTo-Json<br><br>
|
||||||
|
Invoke-RestMethod -TimeoutSec 2 -Uri "{{ flask.request.url_root.replace("http://", "https://" )}}report" -Method Post -Headers @{"Content-Type"="application/json"} -Body $SMART
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
<h5 class="my-4">Python</h5>
|
<h5 class="my-4">Python</h5>
|
||||||
<div class="ml-3 example">
|
<div class="ml-3 example">
|
||||||
import requests<br>
|
import requests<br>
|
||||||
@@ -97,6 +143,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<table class="mb-4 mt-5 status-table">
|
<table class="mb-4 mt-5 status-table">
|
||||||
<thead>
|
<thead>
|
||||||
|
|||||||
Reference in New Issue
Block a user