From 92408f29e9e7654941d0a5735c0c8322753817b6 Mon Sep 17 00:00:00 2001 From: Yannik Schmidt Date: Wed, 8 Sep 2021 10:46:42 +0200 Subject: [PATCH] Initial --- .gitignore | 2 ++ eg-geiss-bauherren.py | 62 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 .gitignore create mode 100644 eg-geiss-bauherren.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4455b23 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pdf +*.sqlite diff --git a/eg-geiss-bauherren.py b/eg-geiss-bauherren.py new file mode 100644 index 0000000..974a19b --- /dev/null +++ b/eg-geiss-bauherren.py @@ -0,0 +1,62 @@ +import fitz +import dateutil.parser + +BLOCK_TUP_TEXT = 4 +doc = fitz.open("Bauablaufplan.pdf") + +FIRST_P = True + +# pop vars +customer = "NOT_FOUND" +location = "" +startDate = doc.metadata["creationDate"].split("D:")[1].split("+")[0] +startDateParsed = dateutil.parser.parse(startDate) +blowerdoorDate = "NOT_FOUND" + +for p in doc: + blocks = p.get_text("blocks") + for i in range(0, len(blocks)): + + text = blocks[i][BLOCK_TUP_TEXT] + + textNoSpaceNewline = text.replace("\n", "") + textNoSpaceNewline = textNoSpaceNewline.replace(" ", "") + if FIRST_P and i < 3 and textNoSpaceNewline: + FIRST_P = False + customer = text + + if "Bauort:" in text: + location += text.split("Bauort:")[1] + + if "Thermoscan" in text: + kwParts = text.split("\n") + kw = "" + title = "" + contractor = "" + for p in kwParts: + pClean = p.strip() + if not pClean: + continue + elif not kw: + kw = int(pClean.split(". KW")[0]) + elif not title: + title = pClean + elif not contractor: + contractor = pClean + + ISO_CAL_KW_LOC = 1 + kwStartDate = startDateParsed.isocalendar()[ISO_CAL_KW_LOC] + if kw < kwStartDate: + blowerdoorDate = "{} KW-{} (assumed next year)".format(startDateParsed.year +1, kw) + else: + blowerdoorDate = "{} KW-{}".format(startDateParsed.year, kw) + + + +localtion = location.replace("\n\n", "\n").strip("n") +customer = customer.replace("\n \n", "\n").strip("n") +customer = customer.replace("\n\n", "\n").strip("n") + +print("Bauort: " + location) +print("Bauherr: " + customer) +print("Blowerdoor: " + blowerdoorDate) \ No newline at end of file