mirror of
https://github.com/FAUSheppy/ths-reference-data-collector
synced 2025-12-06 06:51:35 +01:00
feat: support manual dwd data
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
|||||||
|
__pycache__/
|
||||||
|
dwd/
|
||||||
cache/
|
cache/
|
||||||
*.csv
|
*.csv
|
||||||
*.swp
|
*.swp
|
||||||
|
|||||||
114
fallback_csv.py
Normal file
114
fallback_csv.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
import glob
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
|
||||||
|
SKIP_LINES = 13
|
||||||
|
|
||||||
|
def cache_content(from_time, to_time, data, dtype):
|
||||||
|
|
||||||
|
return_string = ""
|
||||||
|
|
||||||
|
skip_count = 0
|
||||||
|
|
||||||
|
for i in range(0, SKIP_LINES):
|
||||||
|
return_string += "\n"
|
||||||
|
|
||||||
|
for d in data:
|
||||||
|
|
||||||
|
date, primary, secondary = d
|
||||||
|
|
||||||
|
# skip outside timeframe #
|
||||||
|
if date < from_time or date > to_time:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if dtype in ["lufttemperatur-aussen", "windgeschwindigkeit", "niederschlagsmenge"]:
|
||||||
|
content_number = primary
|
||||||
|
elif dtype in ["luftfeuchte", "windrichtung"]:
|
||||||
|
content_number = secondary
|
||||||
|
elif dtype == "luftdruck":
|
||||||
|
content_number = -1
|
||||||
|
else:
|
||||||
|
raise ValueError("Bad dtype: {}".format(dtype))
|
||||||
|
|
||||||
|
date_cache_format = date.strftime("%d.%m.%Y %H:%M")
|
||||||
|
content_str = "{:1f}".format(content_number).replace(".",",")
|
||||||
|
return_string += "{};{}\n".format(date_cache_format, content_str)
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
def generate(master_dir, from_time, to_time, cache_file, dtype):
|
||||||
|
|
||||||
|
if dtype == "lufttemperatur-aussen" or dtype == "luftfeuchte":
|
||||||
|
base_name = "/produkt_tu_stunde*.txt"
|
||||||
|
elif dtype == "windgeschwindigkeit" or dtype == "windrichtung":
|
||||||
|
base_name = "/produkt_ff_stunde*.txt"
|
||||||
|
elif dtype == "niederschlagsmenge":
|
||||||
|
base_name = "/produkt_rr_stunde*.txt"
|
||||||
|
elif dtype == "luftdruck":
|
||||||
|
base_name = "/produkt_tu_stunde*.txt" # <- placeholder cause missing
|
||||||
|
else:
|
||||||
|
raise ValueError("Unsupported D-Type: {}".format(dtype))
|
||||||
|
|
||||||
|
timeframes = []
|
||||||
|
|
||||||
|
if not os.path.isdir(master_dir):
|
||||||
|
os.mkdir(master_dir)
|
||||||
|
|
||||||
|
# read files
|
||||||
|
files = glob.glob(master_dir + base_name)
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
raise ValueError("Keine DWD_Datei für {} in: {} gefunden. Bitte herunterladen und entpacken! https://www.dwd.de/DE/leistungen/klimadatendeutschland/klarchivstunden.html;jsessionid=C423E76B30D18F24C43F4E7E36744C8C.live21073?nn=16102".format(dtype, os.getcwd() + ", " + master_dir))
|
||||||
|
|
||||||
|
for fname in files:
|
||||||
|
|
||||||
|
start = None
|
||||||
|
end = None
|
||||||
|
data = []
|
||||||
|
|
||||||
|
# read file
|
||||||
|
with open(fname) as f:
|
||||||
|
first_line = True
|
||||||
|
|
||||||
|
# iterate through csv #
|
||||||
|
for line in f:
|
||||||
|
|
||||||
|
# skip header
|
||||||
|
if first_line:
|
||||||
|
first_line = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
# read the line #
|
||||||
|
# temp & feutche => fu
|
||||||
|
# wind & direction => ff
|
||||||
|
# niederschlag & nichts => rr
|
||||||
|
if dtype == "niederschlagsmenge":
|
||||||
|
station_id, fulldate, dunno, primary, secondary, na2, na3 = line.split(";")
|
||||||
|
else:
|
||||||
|
station_id, fulldate, dunno, primary, secondary, na2 = line.split(";")
|
||||||
|
|
||||||
|
# parse date #
|
||||||
|
date = datetime.datetime.strptime(fulldate, "%Y%m%d%H")
|
||||||
|
|
||||||
|
# append data #
|
||||||
|
data.append((date, float(primary), float(secondary)))
|
||||||
|
|
||||||
|
# set start and end #
|
||||||
|
if not start and date:
|
||||||
|
start = date
|
||||||
|
elif date:
|
||||||
|
end = date
|
||||||
|
|
||||||
|
# save values #
|
||||||
|
timeframes.append((start, end, data))
|
||||||
|
print(dtype, start, end)
|
||||||
|
print(dtype, from_time, to_time)
|
||||||
|
|
||||||
|
# find a fitting frame #
|
||||||
|
for start, end, data in timeframes:
|
||||||
|
if from_time >= start and to_time <= end:
|
||||||
|
return cache_content(from_time, to_time, data, dtype)
|
||||||
|
|
||||||
|
if dtype.startswith("wind"):
|
||||||
|
return ""
|
||||||
|
raise ValueError("Keine Datei mit passenden Daten gefunden. Bitte Readme lesen")
|
||||||
16
main.py
16
main.py
@@ -12,6 +12,7 @@ import openpyxl
|
|||||||
import glob
|
import glob
|
||||||
import calendar
|
import calendar
|
||||||
|
|
||||||
|
import fallback_csv
|
||||||
|
|
||||||
CSV_DIR = "csvfiles"
|
CSV_DIR = "csvfiles"
|
||||||
CACHE_DIR = "cache"
|
CACHE_DIR = "cache"
|
||||||
@@ -52,12 +53,20 @@ def downloadFlugfeldData(fromTime, toTime, dtype):
|
|||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
content = r.content.decode('utf-8', "ignore") # ignore bad bytes
|
content = r.content.decode('utf-8', "ignore") # ignore bad bytes
|
||||||
|
|
||||||
|
# check response code #
|
||||||
|
if r.status_code != 200 or "nicht gefunden" in r.text.lower():
|
||||||
|
print("Flugfeld kapott")
|
||||||
|
content = fallback_csv.generate("./dwd", fromTime, toTime, cacheFile, dtype)
|
||||||
|
else:
|
||||||
|
content = r.content.decode('utf-8', "ignore") # ignore bad bytes
|
||||||
|
|
||||||
# cache data
|
# cache data
|
||||||
if not os.path.isdir(cacheDir):
|
if not os.path.isdir(cacheDir):
|
||||||
os.mkdir(cacheDir)
|
os.mkdir(cacheDir)
|
||||||
with open(fullpath, 'w') as f:
|
with open(fullpath, 'w') as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
else:
|
|
||||||
|
if os.path.isfile(fullpath):
|
||||||
with open(fullpath) as f:
|
with open(fullpath) as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
@@ -79,12 +88,15 @@ def checkLastMonths(backwardsMonths=6):
|
|||||||
start = dt.datetime(year=year, month=monthNumber, day=1)
|
start = dt.datetime(year=year, month=monthNumber, day=1)
|
||||||
end = start + dateutil.relativedelta.relativedelta(months=+1, seconds=-1)
|
end = start + dateutil.relativedelta.relativedelta(months=+1, seconds=-1)
|
||||||
|
|
||||||
|
|
||||||
# check special cases #
|
# check special cases #
|
||||||
if end > today:
|
if end > today:
|
||||||
end = today - dt.timedelta(days=1)
|
end = today - dt.timedelta(days=4)
|
||||||
if start > end:
|
if start > end:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
print(start, end)
|
||||||
|
|
||||||
for dtype in dtypes:
|
for dtype in dtypes:
|
||||||
content = downloadFlugfeldData(start, end, dtype)
|
content = downloadFlugfeldData(start, end, dtype)
|
||||||
dataList = parse(content, dtype)
|
dataList = parse(content, dtype)
|
||||||
|
|||||||
Reference in New Issue
Block a user