From 792806838e6ccf8878189a3cd90cbc88bdf90adc Mon Sep 17 00:00:00 2001 From: Yannik Schmidt Date: Mon, 13 Sep 2021 19:08:59 +0200 Subject: [PATCH 1/3] update for error resistance --- eg_geiss_bauherren.py | 17 +++++++++++++---- server.py | 10 +++++++++- templates/index.html | 4 ++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/eg_geiss_bauherren.py b/eg_geiss_bauherren.py index 5a543ef..efd114b 100644 --- a/eg_geiss_bauherren.py +++ b/eg_geiss_bauherren.py @@ -7,6 +7,7 @@ import os.path BLOCK_TUP_TEXT = 4 def load(filename): + print(filename) doc = fitz.open(filename) FIRST_P = True @@ -32,6 +33,7 @@ def load(filename): if "Bauort:" in text: location += text.split("Bauort:")[1] + kwErrorInfo = None if "Thermoscan" in text: kwParts = text.split("\n") kw = "" @@ -42,7 +44,10 @@ def load(filename): if not pClean: continue elif not kw: - kw = int(pClean.split(". KW")[0]) + try: + kw = int(pClean.split(". KW")[0]) + except ValueError: + kwErrorInfo = "Kalenderwochen Info nicht gefunden." elif not title: title = pClean elif not contractor: @@ -50,10 +55,14 @@ def load(filename): ISO_CAL_KW_LOC = 1 kwStartDate = startDateParsed.isocalendar()[ISO_CAL_KW_LOC] - if kw < kwStartDate: - blowerdoorDate = "{} KW-{:02d}".format(startDateParsed.year +1, kw) + + if kwErrorInfo: + blowerdoorDate = None else: - blowerdoorDate = "{} KW-{}".format(startDateParsed.year, kw) + if kw < kwStartDate: + blowerdoorDate = "{} KW-{:02d}".format(startDateParsed.year +1, kw) + else: + blowerdoorDate = "{} KW-{}".format(startDateParsed.year, kw) diff --git a/server.py b/server.py index 57f8ec9..ab5dfa6 100644 --- a/server.py +++ b/server.py @@ -2,6 +2,9 @@ import flask import argparse import glob import os +from data import BlowerdoorData +import datetime +import os.path import eg_geiss_bauherren as parserBackend @@ -10,8 +13,13 @@ app = flask.Flask("THS-Raven") @app.route("/") def root(): allFiles = [] + loaded = None for filename in glob.glob("static/files/*.pdf"): - allFiles.append(parserBackend.load(filename)) + try: + loaded = parserBackend.load(filename) + except Exception: + loaded = BlowerdoorData(os.path.basename(filename), os.path.basename(filename), "", "", datetime.datetime.now(), datetime.datetime.now()) + allFiles.append(loaded) return flask.render_template("index.html", listContent=allFiles) diff --git a/templates/index.html b/templates/index.html index c3caa56..318198a 100644 --- a/templates/index.html +++ b/templates/index.html @@ -23,7 +23,11 @@ {{ bd.docName }} {{ bd.location }} + {% if bd.blowerdoorDate %} {{ bd.blowerdoorDate }} + {% else %} + 0000 - Keine Informationen Gefunden + {% endif %} {{ bd.customer }} {{ bd.pdfDate.strftime("%Y/%m/%d") }} From 8b0fee599495dfe533327f778648e013ce6a56c6 Mon Sep 17 00:00:00 2001 From: Yannik Schmidt Date: Mon, 13 Sep 2021 19:47:47 +0200 Subject: [PATCH 2/3] outdated --- data.py | 5 ++++- eg_geiss_bauherren.py | 27 ++++++++++++++++++++++++++- server.py | 14 ++++++++++++++ templates/index.html | 4 +++- 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/data.py b/data.py index 8d56825..872f2df 100644 --- a/data.py +++ b/data.py @@ -1,11 +1,14 @@ class BlowerdoorData: - def __init__(self, path, docName, location, customer, pdfDate, blowerdoorDate): + def __init__(self, path, docName, location, customer, pdfDate, blowerdoorDate, inDocumentDate=None): self.path = path self.docName = docName self.location = location self.customer = customer self.blowerdoorDate = blowerdoorDate self.pdfDate = pdfDate + self.inDocumentDate = inDocumentDate + + self.outdated = False #print("Bauort: " + location) diff --git a/eg_geiss_bauherren.py b/eg_geiss_bauherren.py index efd114b..864752c 100644 --- a/eg_geiss_bauherren.py +++ b/eg_geiss_bauherren.py @@ -14,21 +14,46 @@ def load(filename): # pop vars customer = "NOT_FOUND" location = "" + inDocumentDate = None startDate = doc.metadata["creationDate"].split("D:")[1].split("+")[0] startDateParsed = dateutil.parser.parse(startDate) blowerdoorDate = "NOT_FOUND" + datumNext = False + page = -1 for p in doc: + page += 1 blocks = p.get_text("blocks") for i in range(0, len(blocks)): + text = blocks[i][BLOCK_TUP_TEXT] textNoSpaceNewline = text.replace("\n", "") textNoSpaceNewline = textNoSpaceNewline.replace(" ", "") + + + if datumNext and page == 0: + try: + #if "Bauablaufplan11.pdf" in filename: + # print(textNoSpaceNewline) + inDocumentDate= dateutil.parser.parse(textNoSpaceNewline) + datumNext = False + except ValueError: + try: + split = textNoSpaceNewline.split(".de")[1] + inDocumentDate = dateutil.parser.parse(split) + except ValueError: + pass + except IndexError: + pass + if FIRST_P and i < 3 and textNoSpaceNewline: FIRST_P = False customer = text + + if "Datum:" in text: + datumNext = True if "Bauort:" in text: location += text.split("Bauort:")[1] @@ -72,4 +97,4 @@ def load(filename): filename = filename.replace("\\","/") return data.BlowerdoorData(filename, os.path.basename(filename), location, - customer, startDateParsed, blowerdoorDate) + customer, startDateParsed, blowerdoorDate, inDocumentDate) diff --git a/server.py b/server.py index ab5dfa6..77f9135 100644 --- a/server.py +++ b/server.py @@ -15,12 +15,26 @@ def root(): allFiles = [] loaded = None for filename in glob.glob("static/files/*.pdf"): + loaded = parserBackend.load(filename) try: loaded = parserBackend.load(filename) except Exception: loaded = BlowerdoorData(os.path.basename(filename), os.path.basename(filename), "", "", datetime.datetime.now(), datetime.datetime.now()) allFiles.append(loaded) + + # check duplicates + duplicateCheckMap = dict() + for f in allFiles: + if f.inDocumentDate: + duplicateCheckMap.update({ f.customer + f.location : f }) + for f in allFiles: + key = f.customer + f.location + if key in duplicateCheckMap and not f is duplicateCheckMap[key]: + if f.inDocumentDate <= duplicateCheckMap[key].inDocumentDate: + f.outdated = True + + return flask.render_template("index.html", listContent=allFiles) @app.route("/get-file") diff --git a/templates/index.html b/templates/index.html index 318198a..fb0f24d 100644 --- a/templates/index.html +++ b/templates/index.html @@ -21,7 +21,9 @@ {% for bd in listContent %} - {{ bd.docName }} + {{ bd.docName }} + {% if bd.outdated %}

(älter: {{ bd.inDocumentDate.strftime("%d.%m.%Y") }})

{% endif %} + {{ bd.location }} {% if bd.blowerdoorDate %} {{ bd.blowerdoorDate }} From bfe02023995bb48fc312f5f20b001346275b3852 Mon Sep 17 00:00:00 2001 From: Yannik Schmidt Date: Mon, 13 Sep 2021 19:48:52 +0200 Subject: [PATCH 3/3] change info message --- templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/index.html b/templates/index.html index fb0f24d..69703fd 100644 --- a/templates/index.html +++ b/templates/index.html @@ -22,7 +22,7 @@ {% for bd in listContent %} {{ bd.docName }} - {% if bd.outdated %}

(älter: {{ bd.inDocumentDate.strftime("%d.%m.%Y") }})

{% endif %} + {% if bd.outdated %}

(neueres Dokument verfügbar: {{ bd.inDocumentDate.strftime("%d.%m.%Y") }})

{% endif %} {{ bd.location }} {% if bd.blowerdoorDate %}