diff --git a/data.py b/data.py index 8d56825..872f2df 100644 --- a/data.py +++ b/data.py @@ -1,11 +1,14 @@ class BlowerdoorData: - def __init__(self, path, docName, location, customer, pdfDate, blowerdoorDate): + def __init__(self, path, docName, location, customer, pdfDate, blowerdoorDate, inDocumentDate=None): self.path = path self.docName = docName self.location = location self.customer = customer self.blowerdoorDate = blowerdoorDate self.pdfDate = pdfDate + self.inDocumentDate = inDocumentDate + + self.outdated = False #print("Bauort: " + location) diff --git a/eg_geiss_bauherren.py b/eg_geiss_bauherren.py index 5a543ef..864752c 100644 --- a/eg_geiss_bauherren.py +++ b/eg_geiss_bauherren.py @@ -7,31 +7,58 @@ import os.path BLOCK_TUP_TEXT = 4 def load(filename): + print(filename) doc = fitz.open(filename) FIRST_P = True # pop vars customer = "NOT_FOUND" location = "" + inDocumentDate = None startDate = doc.metadata["creationDate"].split("D:")[1].split("+")[0] startDateParsed = dateutil.parser.parse(startDate) blowerdoorDate = "NOT_FOUND" + datumNext = False + page = -1 for p in doc: + page += 1 blocks = p.get_text("blocks") for i in range(0, len(blocks)): + text = blocks[i][BLOCK_TUP_TEXT] textNoSpaceNewline = text.replace("\n", "") textNoSpaceNewline = textNoSpaceNewline.replace(" ", "") + + + if datumNext and page == 0: + try: + #if "Bauablaufplan11.pdf" in filename: + # print(textNoSpaceNewline) + inDocumentDate= dateutil.parser.parse(textNoSpaceNewline) + datumNext = False + except ValueError: + try: + split = textNoSpaceNewline.split(".de")[1] + inDocumentDate = dateutil.parser.parse(split) + except ValueError: + pass + except IndexError: + pass + if FIRST_P and i < 3 and textNoSpaceNewline: FIRST_P = False customer = text + + if "Datum:" in text: + datumNext = True if "Bauort:" in text: location += text.split("Bauort:")[1] + kwErrorInfo = None if "Thermoscan" in text: kwParts = text.split("\n") kw = "" @@ -42,7 +69,10 @@ def load(filename): if not pClean: continue elif not kw: - kw = int(pClean.split(". KW")[0]) + try: + kw = int(pClean.split(". KW")[0]) + except ValueError: + kwErrorInfo = "Kalenderwochen Info nicht gefunden." elif not title: title = pClean elif not contractor: @@ -50,10 +80,14 @@ def load(filename): ISO_CAL_KW_LOC = 1 kwStartDate = startDateParsed.isocalendar()[ISO_CAL_KW_LOC] - if kw < kwStartDate: - blowerdoorDate = "{} KW-{:02d}".format(startDateParsed.year +1, kw) + + if kwErrorInfo: + blowerdoorDate = None else: - blowerdoorDate = "{} KW-{}".format(startDateParsed.year, kw) + if kw < kwStartDate: + blowerdoorDate = "{} KW-{:02d}".format(startDateParsed.year +1, kw) + else: + blowerdoorDate = "{} KW-{}".format(startDateParsed.year, kw) @@ -63,4 +97,4 @@ def load(filename): filename = filename.replace("\\","/") return data.BlowerdoorData(filename, os.path.basename(filename), location, - customer, startDateParsed, blowerdoorDate) + customer, startDateParsed, blowerdoorDate, inDocumentDate) diff --git a/server.py b/server.py index 57f8ec9..77f9135 100644 --- a/server.py +++ b/server.py @@ -2,6 +2,9 @@ import flask import argparse import glob import os +from data import BlowerdoorData +import datetime +import os.path import eg_geiss_bauherren as parserBackend @@ -10,9 +13,28 @@ app = flask.Flask("THS-Raven") @app.route("/") def root(): allFiles = [] + loaded = None for filename in glob.glob("static/files/*.pdf"): - allFiles.append(parserBackend.load(filename)) + loaded = parserBackend.load(filename) + try: + loaded = parserBackend.load(filename) + except Exception: + loaded = BlowerdoorData(os.path.basename(filename), os.path.basename(filename), "", "", datetime.datetime.now(), datetime.datetime.now()) + allFiles.append(loaded) + + # check duplicates + duplicateCheckMap = dict() + for f in allFiles: + if f.inDocumentDate: + duplicateCheckMap.update({ f.customer + f.location : f }) + for f in allFiles: + key = f.customer + f.location + if key in duplicateCheckMap and not f is duplicateCheckMap[key]: + if f.inDocumentDate <= duplicateCheckMap[key].inDocumentDate: + f.outdated = True + + return flask.render_template("index.html", listContent=allFiles) @app.route("/get-file") diff --git a/templates/index.html b/templates/index.html index c3caa56..69703fd 100644 --- a/templates/index.html +++ b/templates/index.html @@ -21,9 +21,15 @@
{% for bd in listContent %}(neueres Dokument verfügbar: {{ bd.inDocumentDate.strftime("%d.%m.%Y") }})
{% endif %} +