mirror of
https://github.com/FAUSheppy/icinga-webhook-gateway
synced 2025-12-05 23:11:43 +01:00
feat: add SMART monitoring support
This commit is contained in:
141
server.py
141
server.py
@@ -12,7 +12,7 @@ import secrets
|
||||
|
||||
import flask_wtf
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import StringField, SubmitField, BooleanField, DecimalField, HiddenField
|
||||
from wtforms import StringField, SubmitField, BooleanField, DecimalField, HiddenField, SelectField
|
||||
from wtforms.validators import DataRequired, Length
|
||||
|
||||
from sqlalchemy import Column, Integer, String, Boolean, or_, and_
|
||||
@@ -24,6 +24,7 @@ from flask_sqlalchemy import SQLAlchemy
|
||||
from sqlalchemy.sql.expression import func
|
||||
|
||||
import icingatools
|
||||
import smarttools
|
||||
|
||||
app = flask.Flask("Icinga Report In Gateway")
|
||||
|
||||
@@ -41,6 +42,7 @@ class Service(db.Model):
|
||||
token = Column(String)
|
||||
timeout = Column(Integer)
|
||||
owner = Column(String)
|
||||
special_type = Column(String)
|
||||
|
||||
staticly_configured = Column(Boolean)
|
||||
|
||||
@@ -57,6 +59,22 @@ class Status(db.Model):
|
||||
dt = datetime.datetime.fromtimestamp(self.timestamp)
|
||||
return dt.strftime("%d. %B %Y at %H:%M")
|
||||
|
||||
class SMARTStatus(db.Model):
|
||||
|
||||
__tablename__ = "smart"
|
||||
|
||||
service = Column(String, primary_key=True)
|
||||
timestamp = Column(Integer, primary_key=True)
|
||||
power_cycles = Column(Integer)
|
||||
temperature = Column(Integer)
|
||||
available_spare = Column(Integer)
|
||||
unsafe_shutdowns = Column(Integer)
|
||||
critical_warning = Column(Integer)
|
||||
model_number = Column(String)
|
||||
power_cycles = Column(Integer)
|
||||
power_on_hours = Column(Integer)
|
||||
wearleveling_count = Column(Integer)
|
||||
|
||||
def buildReponseDict(status, service=None):
|
||||
|
||||
if not status:
|
||||
@@ -108,6 +126,7 @@ class EntryForm(FlaskForm):
|
||||
|
||||
service = StringField("Service Name")
|
||||
service_hidden = HiddenField("service_hidden")
|
||||
special_type = SelectField("Type", choices=["Default", "SMART"])
|
||||
timeout = DecimalField("Timeout in days", default=30)
|
||||
|
||||
def create_entry(form, user):
|
||||
@@ -124,8 +143,13 @@ def create_entry(form, user):
|
||||
raise AssertionError("WTF Service without Token {}".format(service_name))
|
||||
|
||||
day_delta = datetime.timedelta(days=int(form.timeout.data))
|
||||
|
||||
special_type = form.special_type.data
|
||||
if form.special_type == "Default":
|
||||
special_type = None
|
||||
|
||||
service = Service(service=service_name, timeout=day_delta.total_seconds(),
|
||||
owner=user, token=token)
|
||||
owner=user, token=token, special_type=special_type)
|
||||
|
||||
# service.data set = create, service_hidden.data = modify #
|
||||
if form.service.data:
|
||||
@@ -155,8 +179,11 @@ def service_details():
|
||||
icinga_link = icingatools.build_icinga_link_for_service(user, service.service,
|
||||
service.staticly_configured, app)
|
||||
|
||||
smart_entry_list = db.session.query(SMARTStatus).filter(SMARTStatus.service==service.service)
|
||||
smart_entry = smart_entry_list.order_by(SMARTStatus.timestamp.desc()).first()
|
||||
|
||||
return flask.render_template("service_info.html", service=service, flask=flask,
|
||||
user=user, status_list=status_list, icinga_link=icinga_link)
|
||||
user=user, status_list=status_list, icinga_link=icinga_link, smart=smart_entry)
|
||||
|
||||
|
||||
@app.route("/entry-form", methods=["GET", "POST", "DELETE"])
|
||||
@@ -165,7 +192,7 @@ def create_interface():
|
||||
user = str(flask.request.headers.get("X-Forwarded-Preferred-Username"))
|
||||
|
||||
# check if is delete #
|
||||
operation = flask.request.args.get("operation")
|
||||
operation = flask.request.args.get("operation")
|
||||
if operation and operation == "delete" :
|
||||
|
||||
service_delete_name = flask.request.args.get("service")
|
||||
@@ -182,13 +209,14 @@ def create_interface():
|
||||
return flask.redirect("/overview")
|
||||
|
||||
form = EntryForm()
|
||||
|
||||
|
||||
# handle modification #
|
||||
modify_service_name = flask.request.args.get("service")
|
||||
if modify_service_name:
|
||||
service = db.session.query(Service).filter(Service.service == modify_service_name).first()
|
||||
if service and service.owner == user:
|
||||
form.service.default = service.service
|
||||
form.special_type.default = service.special_type
|
||||
form.timeout.default = datetime.timedelta(seconds=service.timeout).days
|
||||
form.service_hidden.default = service.service
|
||||
form.process()
|
||||
@@ -259,7 +287,7 @@ def default():
|
||||
if not lastSuccess.timestamp == 0 and delta > timeout and latestInfoIsSuccess:
|
||||
|
||||
# lastes info is success but timed out #
|
||||
lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta))
|
||||
lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta))
|
||||
if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12):
|
||||
lastSuccess.status = "WARNING"
|
||||
else:
|
||||
@@ -277,16 +305,27 @@ def default():
|
||||
elif flask.request.method == "POST":
|
||||
|
||||
# get variables #
|
||||
service = flask.request.json["service"]
|
||||
token = flask.request.json["token"]
|
||||
status = flask.request.json["status"]
|
||||
text = flask.request.json["info"]
|
||||
service = flask.request.json.get("service")
|
||||
token = flask.request.json.get("token")
|
||||
status = flask.request.json.get("status")
|
||||
text = flask.request.json.get("info") or "no_info"
|
||||
timestamp = datetime.datetime.now().timestamp()
|
||||
|
||||
smart = flask.request.json.get("smart")
|
||||
|
||||
# check smart json quoting problems #
|
||||
if smart and type(smart) == str:
|
||||
try:
|
||||
smart = json.loads(smart)
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
return ("Error in SMART-json {}".format(e), 415)
|
||||
|
||||
if not service:
|
||||
return ("'service' ist empty field in json", 400)
|
||||
elif not token:
|
||||
return ("'token' ist empty field in json", 400)
|
||||
elif not status and not smart:
|
||||
return ("'status' is empty field in json", 400)
|
||||
|
||||
# verify token & service in config #
|
||||
verifiedServiceObj = db.session.query(Service).filter(
|
||||
@@ -295,15 +334,93 @@ def default():
|
||||
if not verifiedServiceObj:
|
||||
return ("Service ({}) with this token ({}) not found in DB".format(service, token), 401)
|
||||
else:
|
||||
status = Status(service=service, timestamp=timestamp, status=status, info_text=text)
|
||||
|
||||
# handle a SMART-record submission (with errorhandling) #
|
||||
if smart and not verifiedServiceObj.special_type == "SMART":
|
||||
return ("SMART Field for non-SMART type service", 415)
|
||||
elif smart:
|
||||
text, status = record_and_check_smart(verifiedServiceObj,
|
||||
timestamp, smart)
|
||||
|
||||
status = Status(service=service, timestamp=timestamp, status=status,
|
||||
info_text=text)
|
||||
db.session.merge(status)
|
||||
db.session.commit()
|
||||
return ("", 204)
|
||||
else:
|
||||
return ("Method not implemented: {}".format(flask.request.method), 405)
|
||||
|
||||
def record_and_check_smart(service, timestamp, smart):
|
||||
|
||||
if "nvme_smart_health_information_log" in smart:
|
||||
health_info = smart["nvme_smart_health_information_log"]
|
||||
else:
|
||||
health_info = smarttools.normalize(smart)
|
||||
|
||||
if not service.special_type == "SMART":
|
||||
raise AssertionError("Trying to record SMART-record for non-SMART service")
|
||||
|
||||
# record the status #
|
||||
smart_status = SMARTStatus(service=service.service, timestamp=timestamp,
|
||||
temperature=health_info["temperature"],
|
||||
critical_warning=health_info["critical_warning"],
|
||||
unsafe_shutdowns=health_info["unsafe_shutdowns"],
|
||||
power_cycles=health_info["power_cycles"],
|
||||
power_on_hours=health_info["power_on_hours"],
|
||||
available_spare=health_info.get("available_spare"),
|
||||
model_number=smart.get("model_name"),
|
||||
wearleveling_count=health_info.get("wearleveling_count"))
|
||||
|
||||
db.session.add(smart_status)
|
||||
db.session.commit()
|
||||
|
||||
# check the status #
|
||||
smart_last_query = db.session.query(SMARTStatus)
|
||||
smart_last_query = smart_last_query.filter(SMARTStatus.service==service.service)
|
||||
smart_last = smart_last_query.order_by(sqlalchemy.desc(SMARTStatus.timestamp)).first()
|
||||
smart_second_last = smart_last_query.order_by(sqlalchemy.desc(
|
||||
SMARTStatus.timestamp)).offset(1).first()
|
||||
|
||||
# last record (max 6 months ago) #
|
||||
timestampt_minus_6m = datetime.datetime.now() - datetime.timedelta(days=180)
|
||||
smart_old_query = smart_last_query.filter(
|
||||
SMARTStatus.timestamp > timestampt_minus_6m.timestamp())
|
||||
smart_old = smart_old_query.order_by(sqlalchemy.asc(SMARTStatus.timestamp)).first()
|
||||
|
||||
# critial != 0 #
|
||||
if smart_last.critical_warning != 0:
|
||||
return ("SMART reports disk critical => oO better do something about this", "CRITICAL")
|
||||
|
||||
# wearleveling < 20% (SAMSUNG only) #
|
||||
if smart_last.wearleveling_count and smart_last.wearleveling_count <= 20:
|
||||
return ("SMART report prefail disk (wear_level < 20%)", "CRITICAL")
|
||||
|
||||
# temp max > X #
|
||||
if smart_last.temperature > 50:
|
||||
return ("Disk Temperatur {}".format(smart_last.temperature), "CRITICAL")
|
||||
|
||||
# available_SSD spare #
|
||||
spare_change = smart_old.available_spare - smart_last.available_spare
|
||||
|
||||
if smart_last.available_spare <= 25:
|
||||
return ("SSD spare <25 ({}) YOUR DISK WILL DIE SOON".format(spare_change),
|
||||
"CRITICAL")
|
||||
elif smart_last.available_spare <= 50:
|
||||
return ("SSD spare <50 ({})".format(spare_change), "WARNING")
|
||||
elif spare_change >= 10:
|
||||
return ("Strong degration in SSD spare space ({} in under 6 months)".format(
|
||||
spare_change), "WARNING")
|
||||
|
||||
# unsafe_shutdowns +1 #
|
||||
if smart_second_last.unsafe_shutdowns - smart_last.unsafe_shutdowns >= 1:
|
||||
return ("Disk had {} unsafe shutdowns".format(smart_last.unsafe_shutdowns),
|
||||
"WARNING")
|
||||
|
||||
return ("{} - no problems detected".format(smart_last.model_number), "OK")
|
||||
|
||||
|
||||
def create_app():
|
||||
|
||||
|
||||
db.create_all()
|
||||
config = {}
|
||||
|
||||
|
||||
49
smarttools.py
Normal file
49
smarttools.py
Normal file
@@ -0,0 +1,49 @@
|
||||
def normalize(smart):
|
||||
'''Load different types of SMART outputs'''
|
||||
|
||||
ret = dict()
|
||||
ret.update({ "temperature" : 0 })
|
||||
ret.update({ "critical_warning" : 0 })
|
||||
ret.update({ "unsafe_shutdowns" : 0 })
|
||||
ret.update({ "power_cycles" : 0 })
|
||||
ret.update({ "power_on_hours" : 0 })
|
||||
ret.update({ "available_spare" : 100 })
|
||||
ret.update({ "wearleveling_count" : 100 })
|
||||
|
||||
if "ata_smart_attributes" in smart:
|
||||
|
||||
# get main table #
|
||||
table = smart["ata_smart_attributes"]["table"]
|
||||
|
||||
# temperatur #
|
||||
ret["temperature"] = smart["temperature"]["current"]
|
||||
|
||||
for el in table:
|
||||
|
||||
# look for relevant metrics #
|
||||
name = el["name"].lower()
|
||||
target_name = el["name"].lower() # name in return map
|
||||
|
||||
# handle value mapping #
|
||||
use_raw = False
|
||||
if name == "used_rsvd_blk_cnt_tot":
|
||||
target_name = "available_spare"
|
||||
elif name == "power_cylce_count":
|
||||
target_name = "power_cycles"
|
||||
use_raw = True
|
||||
elif name == "power_on_hours":
|
||||
target_name = "power_on_hours"
|
||||
use_raw = True
|
||||
|
||||
# check if metric should be recorded #
|
||||
if target_name in ret:
|
||||
|
||||
# set return dict #
|
||||
if use_raw:
|
||||
value = el["raw"]["value"]
|
||||
else:
|
||||
value = el["value"]
|
||||
|
||||
ret[target_name] = value
|
||||
|
||||
return ret
|
||||
@@ -155,6 +155,22 @@ body{
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.smart-info{
|
||||
font-family: monospace;
|
||||
|
||||
padding-top: 2px;
|
||||
padding-top: 2px;
|
||||
padding-left: 5px;
|
||||
|
||||
padding-left: 5px;
|
||||
|
||||
color: black;
|
||||
|
||||
border: none;
|
||||
outline: none;
|
||||
cursor: auto;
|
||||
}
|
||||
|
||||
.box{
|
||||
border-style: solid;
|
||||
border-width: 1px;
|
||||
|
||||
@@ -37,6 +37,9 @@
|
||||
</br>
|
||||
{{ form.timeout.label }} {{ form.timeout() }} </br>
|
||||
|
||||
</br>
|
||||
{{ form.special_type.label }} {{ form.special_type() }} </br>
|
||||
|
||||
{% if is_modification %}
|
||||
<input class="form-button mt-4" type="submit" value="Send Modification">
|
||||
{% else %}
|
||||
|
||||
@@ -71,8 +71,39 @@
|
||||
class="service-token">Secret Token: {{ service.token }}</div>
|
||||
</div>
|
||||
|
||||
{% if service.special_type == "SMART" %}
|
||||
<div class="clear smart-info mt-3" style="background-color: orange;">
|
||||
Smart Monitor {% if smart %} for: {{ smart.model_number }} {% endif %}
|
||||
</div>
|
||||
<div class="clear smart-info mt-3" style="background-color: orange;">
|
||||
Example below requires smartmontools ("smartctl") in PATH.
|
||||
On Linux this is usually available via the package manager,
|
||||
on Windows install it from the
|
||||
<a style="text-decoration: underline; color: #5000e1; font-weight: bold;" href="https://www.smartmontools.org/wiki/Download#InstalltheWindowspackage">offical page</a>.
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if smart %}
|
||||
<h5 class="clear my-4">Linux</h5>
|
||||
{% else %}
|
||||
<h5 class="clear my-4">Curl</h5>
|
||||
{% endif %}
|
||||
<div class="ml-3 example">
|
||||
{% if smart %}
|
||||
SMART='{ <br>
|
||||
<div class="example-indent">
|
||||
"service" : "{{ service.service }}", <br>
|
||||
"token" : "{{ service.token }}", <br>
|
||||
"status" : "N/A", <br>
|
||||
"smart" : '$(/sbin/smartctl -a /dev/nvme0n1 --json)' <br>
|
||||
</div>
|
||||
}' <br><br>
|
||||
curl -X POST -H "Content-Type: application/json" \ <br>
|
||||
<div class="example-indent">
|
||||
--data "${SMART}" \ <br>
|
||||
{{ flask.request.url_root.replace("http://", "https://" )}}report
|
||||
</div>
|
||||
{% else %}
|
||||
curl -X POST \ <br>
|
||||
<div class="example-indent">
|
||||
-H "Content-Type: application/json" \ <br>
|
||||
@@ -81,8 +112,23 @@
|
||||
"status" : "OK", "info" : "Free Text Information here" }' \<br>
|
||||
{{ flask.request.url_root.replace("http://", "https://" )}}report
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
{% if smart %}
|
||||
<h5 class="my-4">Windows</h5>
|
||||
<div class="ml-3 example">
|
||||
$SMART = @{ <br>
|
||||
<div class="example-indent">
|
||||
service = "{{ service.service }}"<br>
|
||||
token = "{{ service.token }}"<br>
|
||||
status = "N/A"<br>
|
||||
smart = "$(smartctl -a C: --json | Out-String)"<br>
|
||||
</div>
|
||||
} | ConvertTo-Json<br><br>
|
||||
Invoke-RestMethod -TimeoutSec 2 -Uri "{{ flask.request.url_root.replace("http://", "https://" )}}report" -Method Post -Headers @{"Content-Type"="application/json"} -Body $SMART
|
||||
</div>
|
||||
{% else %}
|
||||
<h5 class="my-4">Python</h5>
|
||||
<div class="ml-3 example">
|
||||
import requests<br>
|
||||
@@ -97,6 +143,7 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<table class="mb-4 mt-5 status-table">
|
||||
<thead>
|
||||
|
||||
Reference in New Issue
Block a user