diff --git a/main.py b/main.py index 9b3feca..129c3af 100755 --- a/main.py +++ b/main.py @@ -3,6 +3,9 @@ import datetime as dt import dateutil.relativedelta import os +import requests +import timeutils +import csv CACHE_DIR = "cache" CACHE_FILE_TEMPLATE = "cache_{}_{}_{}.data" @@ -10,7 +13,8 @@ NFF_URL_TIMEFORMAT = "%d.%m.%Y" NFF_INPUT_TIMEFORMAT = "%d.%m.%Y %H:%M" OUTSIDE_DATA_URL = "http://umweltdaten.nuernberg.de/csv/wetterdaten/messstation-nuernberg-flugfeld/archiv/csv-export/SUN/nuernberg-flugfeld/{dtype}/individuell/{fromDate}/{toDate}/export.csv" -dtypes = [ "lufttemperatur-aussen", "luftfeuchte", "luftdruck" ] +headers = [ "Datum/Zeit", "Temperatur [°C]", "rel. Luftfeuchte [%]", "Luftdruck [mbar]", "Windgeschwindigkeit [m/s]", "Windrichtung N=0, O=90, S=180, W=270", "Niederschlag [mm = L/m2]"] +dtypes = [ "lufttemperatur-aussen", "luftfeuchte", "luftdruck", "windgeschwindigkeit", "windrichtung", "niederschlagsmenge" ] def downloadFlugfeldData(fromTime, toTime, dtype): @@ -20,13 +24,11 @@ def downloadFlugfeldData(fromTime, toTime, dtype): toTimeStr = toTime.strftime(NFF_URL_TIMEFORMAT) cacheFile = CACHE_FILE_TEMPLATE.format(dtype, fromTimeStr, toTimeStr) fullpath = os.path.join(cacheDir, cacheFile) - print(cacheFile) # check for cache file content = None if not os.path.isfile(fullpath): url = OUTSIDE_DATA_URL.format(dtype=dtype, fromDate=fromTimeStr, toDate=toTimeStr) - print(url) r = requests.get(url) content = r.content.decode('utf-8', "ignore") # ignore bad bytes @@ -42,7 +44,9 @@ def downloadFlugfeldData(fromTime, toTime, dtype): return content def checkLastMonths(backwardsMonths=6): - + + fullContentDict = dict() + today = dt.datetime.today() monthsToCheck = [ today.month - x for x in range(0, backwardsMonths) ] monthsToCheckFixed = list(map(lambda x: x if x > 0 else x + 12, monthsToCheck)) @@ -63,6 +67,63 @@ def checkLastMonths(backwardsMonths=6): for dtype in dtypes: content = downloadFlugfeldData(start, end, dtype) + dataList = parse(content, dtype) + for d in dataList: + if d.time in fullContentDict: + fullContentDict[d.time] += [d] + else: + fullContentDict.update({ d.time : [d] }) + + return fullContentDict + +def parse(content, dtype): + skipBecauseFirstLine = True + dataList = [] + for l in content.split("\n"): + if not ";" in l: + continue + elif not l.strip(): + continue + elif skipBecauseFirstLine: + skipBecauseFirstLine = False + continue + try: + timeStr, value = l.split(";") + timestamp = dt.datetime.strptime(timeStr, NFF_INPUT_TIMEFORMAT) + cleanFloat = value.replace(",",".") + + # - means the value is missing in the data set (happens sometimes) # + if cleanFloat.strip() == "-" or cleanFloat.strip() == "+": + continue + + dataList += [Data(dtype, float(cleanFloat), timestamp)] + + except ValueError as e: + print("Warning: {}".format(e)) + + return dataList + +class Data: + def __init__(self, dtype, value, time): + self.dtype = dtype + self.value = value + self.time = time + + def __str__(self): + return "Data: {} {} {}".format(self.dtype, self.time, self.value) if __name__ == "__main__": - checkLastMonths() + dictForCSV = checkLastMonths() + + with open('test.csv', 'w', newline='') as file: + + fieldnames = ["time"] + dtypes + writer = csv.DictWriter(file, fieldnames=fieldnames) + writer.writeheader() + + for key in dictForCSV.keys(): + rowdict = { "time" : key } + for data in dictForCSV[key]: + rowdict.update({ data.dtype : data.value }) + writer.writerow(rowdict) +