This commit is contained in:
Yannik Schmidt
2021-09-13 19:47:47 +02:00
parent 792806838e
commit 8b0fee5994
4 changed files with 47 additions and 3 deletions

View File

@@ -14,21 +14,46 @@ def load(filename):
# pop vars
customer = "NOT_FOUND"
location = ""
inDocumentDate = None
startDate = doc.metadata["creationDate"].split("D:")[1].split("+")[0]
startDateParsed = dateutil.parser.parse(startDate)
blowerdoorDate = "NOT_FOUND"
datumNext = False
page = -1
for p in doc:
page += 1
blocks = p.get_text("blocks")
for i in range(0, len(blocks)):
text = blocks[i][BLOCK_TUP_TEXT]
textNoSpaceNewline = text.replace("\n", "")
textNoSpaceNewline = textNoSpaceNewline.replace(" ", "")
if datumNext and page == 0:
try:
#if "Bauablaufplan11.pdf" in filename:
# print(textNoSpaceNewline)
inDocumentDate= dateutil.parser.parse(textNoSpaceNewline)
datumNext = False
except ValueError:
try:
split = textNoSpaceNewline.split(".de")[1]
inDocumentDate = dateutil.parser.parse(split)
except ValueError:
pass
except IndexError:
pass
if FIRST_P and i < 3 and textNoSpaceNewline:
FIRST_P = False
customer = text
if "Datum:" in text:
datumNext = True
if "Bauort:" in text:
location += text.split("Bauort:")[1]
@@ -72,4 +97,4 @@ def load(filename):
filename = filename.replace("\\","/")
return data.BlowerdoorData(filename, os.path.basename(filename), location,
customer, startDateParsed, blowerdoorDate)
customer, startDateParsed, blowerdoorDate, inDocumentDate)