Files
ths-blowerdoor-raven/eg_geiss_bauherren.py
Yannik Schmidt 8b0fee5994 outdated
2021-09-13 19:47:47 +02:00

101 lines
3.2 KiB
Python

import fitz
import data
import dateutil.parser
import os.path
BLOCK_TUP_TEXT = 4
def load(filename):
print(filename)
doc = fitz.open(filename)
FIRST_P = True
# pop vars
customer = "NOT_FOUND"
location = ""
inDocumentDate = None
startDate = doc.metadata["creationDate"].split("D:")[1].split("+")[0]
startDateParsed = dateutil.parser.parse(startDate)
blowerdoorDate = "NOT_FOUND"
datumNext = False
page = -1
for p in doc:
page += 1
blocks = p.get_text("blocks")
for i in range(0, len(blocks)):
text = blocks[i][BLOCK_TUP_TEXT]
textNoSpaceNewline = text.replace("\n", "")
textNoSpaceNewline = textNoSpaceNewline.replace(" ", "")
if datumNext and page == 0:
try:
#if "Bauablaufplan11.pdf" in filename:
# print(textNoSpaceNewline)
inDocumentDate= dateutil.parser.parse(textNoSpaceNewline)
datumNext = False
except ValueError:
try:
split = textNoSpaceNewline.split(".de")[1]
inDocumentDate = dateutil.parser.parse(split)
except ValueError:
pass
except IndexError:
pass
if FIRST_P and i < 3 and textNoSpaceNewline:
FIRST_P = False
customer = text
if "Datum:" in text:
datumNext = True
if "Bauort:" in text:
location += text.split("Bauort:")[1]
kwErrorInfo = None
if "Thermoscan" in text:
kwParts = text.split("\n")
kw = ""
title = ""
contractor = ""
for p in kwParts:
pClean = p.strip()
if not pClean:
continue
elif not kw:
try:
kw = int(pClean.split(". KW")[0])
except ValueError:
kwErrorInfo = "Kalenderwochen Info nicht gefunden."
elif not title:
title = pClean
elif not contractor:
contractor = pClean
ISO_CAL_KW_LOC = 1
kwStartDate = startDateParsed.isocalendar()[ISO_CAL_KW_LOC]
if kwErrorInfo:
blowerdoorDate = None
else:
if kw < kwStartDate:
blowerdoorDate = "{} KW-{:02d}".format(startDateParsed.year +1, kw)
else:
blowerdoorDate = "{} KW-{}".format(startDateParsed.year, kw)
location = location.replace("\n\n", "\n").strip("n")
customer = customer.replace("\n \n", "\n").strip("n")
customer = customer.replace("\n\n", "\n").strip("n")
filename = filename.replace("\\","/")
return data.BlowerdoorData(filename, os.path.basename(filename), location,
customer, startDateParsed, blowerdoorDate, inDocumentDate)