mirror of
https://github.com/FAUSheppy/ths-reference-data-collector
synced 2025-12-06 15:01:35 +01:00
feat: support manual dwd data
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
__pycache__/
|
||||
dwd/
|
||||
cache/
|
||||
*.csv
|
||||
*.swp
|
||||
|
||||
114
fallback_csv.py
Normal file
114
fallback_csv.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import glob
|
||||
import datetime
|
||||
import os
|
||||
|
||||
SKIP_LINES = 13
|
||||
|
||||
def cache_content(from_time, to_time, data, dtype):
|
||||
|
||||
return_string = ""
|
||||
|
||||
skip_count = 0
|
||||
|
||||
for i in range(0, SKIP_LINES):
|
||||
return_string += "\n"
|
||||
|
||||
for d in data:
|
||||
|
||||
date, primary, secondary = d
|
||||
|
||||
# skip outside timeframe #
|
||||
if date < from_time or date > to_time:
|
||||
continue
|
||||
|
||||
if dtype in ["lufttemperatur-aussen", "windgeschwindigkeit", "niederschlagsmenge"]:
|
||||
content_number = primary
|
||||
elif dtype in ["luftfeuchte", "windrichtung"]:
|
||||
content_number = secondary
|
||||
elif dtype == "luftdruck":
|
||||
content_number = -1
|
||||
else:
|
||||
raise ValueError("Bad dtype: {}".format(dtype))
|
||||
|
||||
date_cache_format = date.strftime("%d.%m.%Y %H:%M")
|
||||
content_str = "{:1f}".format(content_number).replace(".",",")
|
||||
return_string += "{};{}\n".format(date_cache_format, content_str)
|
||||
|
||||
return return_string
|
||||
|
||||
def generate(master_dir, from_time, to_time, cache_file, dtype):
|
||||
|
||||
if dtype == "lufttemperatur-aussen" or dtype == "luftfeuchte":
|
||||
base_name = "/produkt_tu_stunde*.txt"
|
||||
elif dtype == "windgeschwindigkeit" or dtype == "windrichtung":
|
||||
base_name = "/produkt_ff_stunde*.txt"
|
||||
elif dtype == "niederschlagsmenge":
|
||||
base_name = "/produkt_rr_stunde*.txt"
|
||||
elif dtype == "luftdruck":
|
||||
base_name = "/produkt_tu_stunde*.txt" # <- placeholder cause missing
|
||||
else:
|
||||
raise ValueError("Unsupported D-Type: {}".format(dtype))
|
||||
|
||||
timeframes = []
|
||||
|
||||
if not os.path.isdir(master_dir):
|
||||
os.mkdir(master_dir)
|
||||
|
||||
# read files
|
||||
files = glob.glob(master_dir + base_name)
|
||||
|
||||
if not files:
|
||||
raise ValueError("Keine DWD_Datei für {} in: {} gefunden. Bitte herunterladen und entpacken! https://www.dwd.de/DE/leistungen/klimadatendeutschland/klarchivstunden.html;jsessionid=C423E76B30D18F24C43F4E7E36744C8C.live21073?nn=16102".format(dtype, os.getcwd() + ", " + master_dir))
|
||||
|
||||
for fname in files:
|
||||
|
||||
start = None
|
||||
end = None
|
||||
data = []
|
||||
|
||||
# read file
|
||||
with open(fname) as f:
|
||||
first_line = True
|
||||
|
||||
# iterate through csv #
|
||||
for line in f:
|
||||
|
||||
# skip header
|
||||
if first_line:
|
||||
first_line = False
|
||||
continue
|
||||
|
||||
# read the line #
|
||||
# temp & feutche => fu
|
||||
# wind & direction => ff
|
||||
# niederschlag & nichts => rr
|
||||
if dtype == "niederschlagsmenge":
|
||||
station_id, fulldate, dunno, primary, secondary, na2, na3 = line.split(";")
|
||||
else:
|
||||
station_id, fulldate, dunno, primary, secondary, na2 = line.split(";")
|
||||
|
||||
# parse date #
|
||||
date = datetime.datetime.strptime(fulldate, "%Y%m%d%H")
|
||||
|
||||
# append data #
|
||||
data.append((date, float(primary), float(secondary)))
|
||||
|
||||
# set start and end #
|
||||
if not start and date:
|
||||
start = date
|
||||
elif date:
|
||||
end = date
|
||||
|
||||
# save values #
|
||||
timeframes.append((start, end, data))
|
||||
print(dtype, start, end)
|
||||
print(dtype, from_time, to_time)
|
||||
|
||||
# find a fitting frame #
|
||||
for start, end, data in timeframes:
|
||||
if from_time >= start and to_time <= end:
|
||||
return cache_content(from_time, to_time, data, dtype)
|
||||
|
||||
if dtype.startswith("wind"):
|
||||
return ""
|
||||
raise ValueError("Keine Datei mit passenden Daten gefunden. Bitte Readme lesen")
|
||||
16
main.py
16
main.py
@@ -12,6 +12,7 @@ import openpyxl
|
||||
import glob
|
||||
import calendar
|
||||
|
||||
import fallback_csv
|
||||
|
||||
CSV_DIR = "csvfiles"
|
||||
CACHE_DIR = "cache"
|
||||
@@ -52,12 +53,20 @@ def downloadFlugfeldData(fromTime, toTime, dtype):
|
||||
r = requests.get(url)
|
||||
content = r.content.decode('utf-8', "ignore") # ignore bad bytes
|
||||
|
||||
# check response code #
|
||||
if r.status_code != 200 or "nicht gefunden" in r.text.lower():
|
||||
print("Flugfeld kapott")
|
||||
content = fallback_csv.generate("./dwd", fromTime, toTime, cacheFile, dtype)
|
||||
else:
|
||||
content = r.content.decode('utf-8', "ignore") # ignore bad bytes
|
||||
|
||||
# cache data
|
||||
if not os.path.isdir(cacheDir):
|
||||
os.mkdir(cacheDir)
|
||||
with open(fullpath, 'w') as f:
|
||||
f.write(content)
|
||||
else:
|
||||
|
||||
if os.path.isfile(fullpath):
|
||||
with open(fullpath) as f:
|
||||
content = f.read()
|
||||
|
||||
@@ -79,12 +88,15 @@ def checkLastMonths(backwardsMonths=6):
|
||||
start = dt.datetime(year=year, month=monthNumber, day=1)
|
||||
end = start + dateutil.relativedelta.relativedelta(months=+1, seconds=-1)
|
||||
|
||||
|
||||
# check special cases #
|
||||
if end > today:
|
||||
end = today - dt.timedelta(days=1)
|
||||
end = today - dt.timedelta(days=4)
|
||||
if start > end:
|
||||
return ""
|
||||
|
||||
print(start, end)
|
||||
|
||||
for dtype in dtypes:
|
||||
content = downloadFlugfeldData(start, end, dtype)
|
||||
dataList = parse(content, dtype)
|
||||
|
||||
Reference in New Issue
Block a user