feat: add SMART monitoring support

This commit is contained in:
2024-01-03 14:41:11 +01:00
parent 0842818cbc
commit 683ebefbb0
5 changed files with 244 additions and 12 deletions

141
server.py
View File

@@ -12,7 +12,7 @@ import secrets
import flask_wtf import flask_wtf
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import StringField, SubmitField, BooleanField, DecimalField, HiddenField from wtforms import StringField, SubmitField, BooleanField, DecimalField, HiddenField, SelectField
from wtforms.validators import DataRequired, Length from wtforms.validators import DataRequired, Length
from sqlalchemy import Column, Integer, String, Boolean, or_, and_ from sqlalchemy import Column, Integer, String, Boolean, or_, and_
@@ -24,6 +24,7 @@ from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.sql.expression import func from sqlalchemy.sql.expression import func
import icingatools import icingatools
import smarttools
app = flask.Flask("Icinga Report In Gateway") app = flask.Flask("Icinga Report In Gateway")
@@ -41,6 +42,7 @@ class Service(db.Model):
token = Column(String) token = Column(String)
timeout = Column(Integer) timeout = Column(Integer)
owner = Column(String) owner = Column(String)
special_type = Column(String)
staticly_configured = Column(Boolean) staticly_configured = Column(Boolean)
@@ -57,6 +59,22 @@ class Status(db.Model):
dt = datetime.datetime.fromtimestamp(self.timestamp) dt = datetime.datetime.fromtimestamp(self.timestamp)
return dt.strftime("%d. %B %Y at %H:%M") return dt.strftime("%d. %B %Y at %H:%M")
class SMARTStatus(db.Model):
__tablename__ = "smart"
service = Column(String, primary_key=True)
timestamp = Column(Integer, primary_key=True)
power_cycles = Column(Integer)
temperature = Column(Integer)
available_spare = Column(Integer)
unsafe_shutdowns = Column(Integer)
critical_warning = Column(Integer)
model_number = Column(String)
power_cycles = Column(Integer)
power_on_hours = Column(Integer)
wearleveling_count = Column(Integer)
def buildReponseDict(status, service=None): def buildReponseDict(status, service=None):
if not status: if not status:
@@ -108,6 +126,7 @@ class EntryForm(FlaskForm):
service = StringField("Service Name") service = StringField("Service Name")
service_hidden = HiddenField("service_hidden") service_hidden = HiddenField("service_hidden")
special_type = SelectField("Type", choices=["Default", "SMART"])
timeout = DecimalField("Timeout in days", default=30) timeout = DecimalField("Timeout in days", default=30)
def create_entry(form, user): def create_entry(form, user):
@@ -124,8 +143,13 @@ def create_entry(form, user):
raise AssertionError("WTF Service without Token {}".format(service_name)) raise AssertionError("WTF Service without Token {}".format(service_name))
day_delta = datetime.timedelta(days=int(form.timeout.data)) day_delta = datetime.timedelta(days=int(form.timeout.data))
special_type = form.special_type.data
if form.special_type == "Default":
special_type = None
service = Service(service=service_name, timeout=day_delta.total_seconds(), service = Service(service=service_name, timeout=day_delta.total_seconds(),
owner=user, token=token) owner=user, token=token, special_type=special_type)
# service.data set = create, service_hidden.data = modify # # service.data set = create, service_hidden.data = modify #
if form.service.data: if form.service.data:
@@ -155,8 +179,11 @@ def service_details():
icinga_link = icingatools.build_icinga_link_for_service(user, service.service, icinga_link = icingatools.build_icinga_link_for_service(user, service.service,
service.staticly_configured, app) service.staticly_configured, app)
smart_entry_list = db.session.query(SMARTStatus).filter(SMARTStatus.service==service.service)
smart_entry = smart_entry_list.order_by(SMARTStatus.timestamp.desc()).first()
return flask.render_template("service_info.html", service=service, flask=flask, return flask.render_template("service_info.html", service=service, flask=flask,
user=user, status_list=status_list, icinga_link=icinga_link) user=user, status_list=status_list, icinga_link=icinga_link, smart=smart_entry)
@app.route("/entry-form", methods=["GET", "POST", "DELETE"]) @app.route("/entry-form", methods=["GET", "POST", "DELETE"])
@@ -165,7 +192,7 @@ def create_interface():
user = str(flask.request.headers.get("X-Forwarded-Preferred-Username")) user = str(flask.request.headers.get("X-Forwarded-Preferred-Username"))
# check if is delete # # check if is delete #
operation = flask.request.args.get("operation") operation = flask.request.args.get("operation")
if operation and operation == "delete" : if operation and operation == "delete" :
service_delete_name = flask.request.args.get("service") service_delete_name = flask.request.args.get("service")
@@ -182,13 +209,14 @@ def create_interface():
return flask.redirect("/overview") return flask.redirect("/overview")
form = EntryForm() form = EntryForm()
# handle modification # # handle modification #
modify_service_name = flask.request.args.get("service") modify_service_name = flask.request.args.get("service")
if modify_service_name: if modify_service_name:
service = db.session.query(Service).filter(Service.service == modify_service_name).first() service = db.session.query(Service).filter(Service.service == modify_service_name).first()
if service and service.owner == user: if service and service.owner == user:
form.service.default = service.service form.service.default = service.service
form.special_type.default = service.special_type
form.timeout.default = datetime.timedelta(seconds=service.timeout).days form.timeout.default = datetime.timedelta(seconds=service.timeout).days
form.service_hidden.default = service.service form.service_hidden.default = service.service
form.process() form.process()
@@ -259,7 +287,7 @@ def default():
if not lastSuccess.timestamp == 0 and delta > timeout and latestInfoIsSuccess: if not lastSuccess.timestamp == 0 and delta > timeout and latestInfoIsSuccess:
# lastes info is success but timed out # # lastes info is success but timed out #
lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta)) lastSuccess.info_text = "Service {} overdue since {}".format(service, str(delta))
if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12): if timeout/delta > 0.9 or (delta - timeout) < datetime.timedelta(hours=12):
lastSuccess.status = "WARNING" lastSuccess.status = "WARNING"
else: else:
@@ -277,16 +305,27 @@ def default():
elif flask.request.method == "POST": elif flask.request.method == "POST":
# get variables # # get variables #
service = flask.request.json["service"] service = flask.request.json.get("service")
token = flask.request.json["token"] token = flask.request.json.get("token")
status = flask.request.json["status"] status = flask.request.json.get("status")
text = flask.request.json["info"] text = flask.request.json.get("info") or "no_info"
timestamp = datetime.datetime.now().timestamp() timestamp = datetime.datetime.now().timestamp()
smart = flask.request.json.get("smart")
# check smart json quoting problems #
if smart and type(smart) == str:
try:
smart = json.loads(smart)
except json.decoder.JSONDecodeError as e:
return ("Error in SMART-json {}".format(e), 415)
if not service: if not service:
return ("'service' ist empty field in json", 400) return ("'service' ist empty field in json", 400)
elif not token: elif not token:
return ("'token' ist empty field in json", 400) return ("'token' ist empty field in json", 400)
elif not status and not smart:
return ("'status' is empty field in json", 400)
# verify token & service in config # # verify token & service in config #
verifiedServiceObj = db.session.query(Service).filter( verifiedServiceObj = db.session.query(Service).filter(
@@ -295,15 +334,93 @@ def default():
if not verifiedServiceObj: if not verifiedServiceObj:
return ("Service ({}) with this token ({}) not found in DB".format(service, token), 401) return ("Service ({}) with this token ({}) not found in DB".format(service, token), 401)
else: else:
status = Status(service=service, timestamp=timestamp, status=status, info_text=text)
# handle a SMART-record submission (with errorhandling) #
if smart and not verifiedServiceObj.special_type == "SMART":
return ("SMART Field for non-SMART type service", 415)
elif smart:
text, status = record_and_check_smart(verifiedServiceObj,
timestamp, smart)
status = Status(service=service, timestamp=timestamp, status=status,
info_text=text)
db.session.merge(status) db.session.merge(status)
db.session.commit() db.session.commit()
return ("", 204) return ("", 204)
else: else:
return ("Method not implemented: {}".format(flask.request.method), 405) return ("Method not implemented: {}".format(flask.request.method), 405)
def record_and_check_smart(service, timestamp, smart):
if "nvme_smart_health_information_log" in smart:
health_info = smart["nvme_smart_health_information_log"]
else:
health_info = smarttools.normalize(smart)
if not service.special_type == "SMART":
raise AssertionError("Trying to record SMART-record for non-SMART service")
# record the status #
smart_status = SMARTStatus(service=service.service, timestamp=timestamp,
temperature=health_info["temperature"],
critical_warning=health_info["critical_warning"],
unsafe_shutdowns=health_info["unsafe_shutdowns"],
power_cycles=health_info["power_cycles"],
power_on_hours=health_info["power_on_hours"],
available_spare=health_info.get("available_spare"),
model_number=smart.get("model_name"),
wearleveling_count=health_info.get("wearleveling_count"))
db.session.add(smart_status)
db.session.commit()
# check the status #
smart_last_query = db.session.query(SMARTStatus)
smart_last_query = smart_last_query.filter(SMARTStatus.service==service.service)
smart_last = smart_last_query.order_by(sqlalchemy.desc(SMARTStatus.timestamp)).first()
smart_second_last = smart_last_query.order_by(sqlalchemy.desc(
SMARTStatus.timestamp)).offset(1).first()
# last record (max 6 months ago) #
timestampt_minus_6m = datetime.datetime.now() - datetime.timedelta(days=180)
smart_old_query = smart_last_query.filter(
SMARTStatus.timestamp > timestampt_minus_6m.timestamp())
smart_old = smart_old_query.order_by(sqlalchemy.asc(SMARTStatus.timestamp)).first()
# critial != 0 #
if smart_last.critical_warning != 0:
return ("SMART reports disk critical => oO better do something about this", "CRITICAL")
# wearleveling < 20% (SAMSUNG only) #
if smart_last.wearleveling_count and smart_last.wearleveling_count <= 20:
return ("SMART report prefail disk (wear_level < 20%)", "CRITICAL")
# temp max > X #
if smart_last.temperature > 50:
return ("Disk Temperatur {}".format(smart_last.temperature), "CRITICAL")
# available_SSD spare #
spare_change = smart_old.available_spare - smart_last.available_spare
if smart_last.available_spare <= 25:
return ("SSD spare <25 ({}) YOUR DISK WILL DIE SOON".format(spare_change),
"CRITICAL")
elif smart_last.available_spare <= 50:
return ("SSD spare <50 ({})".format(spare_change), "WARNING")
elif spare_change >= 10:
return ("Strong degration in SSD spare space ({} in under 6 months)".format(
spare_change), "WARNING")
# unsafe_shutdowns +1 #
if smart_second_last.unsafe_shutdowns - smart_last.unsafe_shutdowns >= 1:
return ("Disk had {} unsafe shutdowns".format(smart_last.unsafe_shutdowns),
"WARNING")
return ("{} - no problems detected".format(smart_last.model_number), "OK")
def create_app(): def create_app():
db.create_all() db.create_all()
config = {} config = {}

49
smarttools.py Normal file
View File

@@ -0,0 +1,49 @@
def normalize(smart):
'''Load different types of SMART outputs'''
ret = dict()
ret.update({ "temperature" : 0 })
ret.update({ "critical_warning" : 0 })
ret.update({ "unsafe_shutdowns" : 0 })
ret.update({ "power_cycles" : 0 })
ret.update({ "power_on_hours" : 0 })
ret.update({ "available_spare" : 100 })
ret.update({ "wearleveling_count" : 100 })
if "ata_smart_attributes" in smart:
# get main table #
table = smart["ata_smart_attributes"]["table"]
# temperatur #
ret["temperature"] = smart["temperature"]["current"]
for el in table:
# look for relevant metrics #
name = el["name"].lower()
target_name = el["name"].lower() # name in return map
# handle value mapping #
use_raw = False
if name == "used_rsvd_blk_cnt_tot":
target_name = "available_spare"
elif name == "power_cylce_count":
target_name = "power_cycles"
use_raw = True
elif name == "power_on_hours":
target_name = "power_on_hours"
use_raw = True
# check if metric should be recorded #
if target_name in ret:
# set return dict #
if use_raw:
value = el["raw"]["value"]
else:
value = el["value"]
ret[target_name] = value
return ret

View File

@@ -155,6 +155,22 @@ body{
cursor: pointer; cursor: pointer;
} }
.smart-info{
font-family: monospace;
padding-top: 2px;
padding-top: 2px;
padding-left: 5px;
padding-left: 5px;
color: black;
border: none;
outline: none;
cursor: auto;
}
.box{ .box{
border-style: solid; border-style: solid;
border-width: 1px; border-width: 1px;

View File

@@ -37,6 +37,9 @@
</br> </br>
{{ form.timeout.label }} {{ form.timeout() }} </br> {{ form.timeout.label }} {{ form.timeout() }} </br>
</br>
{{ form.special_type.label }} {{ form.special_type() }} </br>
{% if is_modification %} {% if is_modification %}
<input class="form-button mt-4" type="submit" value="Send Modification"> <input class="form-button mt-4" type="submit" value="Send Modification">
{% else %} {% else %}

View File

@@ -71,8 +71,39 @@
class="service-token">Secret Token: {{ service.token }}</div> class="service-token">Secret Token: {{ service.token }}</div>
</div> </div>
{% if service.special_type == "SMART" %}
<div class="clear smart-info mt-3" style="background-color: orange;">
Smart Monitor {% if smart %} for: {{ smart.model_number }} {% endif %}
</div>
<div class="clear smart-info mt-3" style="background-color: orange;">
Example below requires smartmontools ("smartctl") in PATH.
On Linux this is usually available via the package manager,
on Windows install it from the
<a style="text-decoration: underline; color: #5000e1; font-weight: bold;" href="https://www.smartmontools.org/wiki/Download#InstalltheWindowspackage">offical page</a>.
</div>
{% endif %}
{% if smart %}
<h5 class="clear my-4">Linux</h5>
{% else %}
<h5 class="clear my-4">Curl</h5> <h5 class="clear my-4">Curl</h5>
{% endif %}
<div class="ml-3 example"> <div class="ml-3 example">
{% if smart %}
SMART='{ <br>
<div class="example-indent">
"service" : "{{ service.service }}", <br>
"token" : "{{ service.token }}", <br>
"status" : "N/A", <br>
"smart" : '$(/sbin/smartctl -a /dev/nvme0n1 --json)' <br>
</div>
}' <br><br>
curl -X POST -H "Content-Type: application/json" \ <br>
<div class="example-indent">
--data "${SMART}" \ <br>
{{ flask.request.url_root.replace("http://", "https://" )}}report
</div>
{% else %}
curl -X POST \ <br> curl -X POST \ <br>
<div class="example-indent"> <div class="example-indent">
-H "Content-Type: application/json" \ <br> -H "Content-Type: application/json" \ <br>
@@ -81,8 +112,23 @@
"status" : "OK", "info" : "Free Text Information here" }' \<br> "status" : "OK", "info" : "Free Text Information here" }' \<br>
{{ flask.request.url_root.replace("http://", "https://" )}}report {{ flask.request.url_root.replace("http://", "https://" )}}report
</div> </div>
{% endif %}
</div> </div>
{% if smart %}
<h5 class="my-4">Windows</h5>
<div class="ml-3 example">
$SMART = @{ <br>
<div class="example-indent">
service = "{{ service.service }}"<br>
token = "{{ service.token }}"<br>
status = "N/A"<br>
smart = "$(smartctl -a C: --json | Out-String)"<br>
</div>
} | ConvertTo-Json<br><br>
Invoke-RestMethod -TimeoutSec 2 -Uri "{{ flask.request.url_root.replace("http://", "https://" )}}report" -Method Post -Headers @{"Content-Type"="application/json"} -Body $SMART
</div>
{% else %}
<h5 class="my-4">Python</h5> <h5 class="my-4">Python</h5>
<div class="ml-3 example"> <div class="ml-3 example">
import requests<br> import requests<br>
@@ -97,6 +143,7 @@
</div> </div>
</div> </div>
</div> </div>
{% endif %}
<table class="mb-4 mt-5 status-table"> <table class="mb-4 mt-5 status-table">
<thead> <thead>