mirror of
https://github.com/FAUSheppy/config
synced 2025-12-05 22:51:35 +01:00
207 lines
5.4 KiB
Python
207 lines
5.4 KiB
Python
import ezodf
|
|
import matplotlib.pyplot as plt
|
|
import plotly.graph_objects as go
|
|
|
|
d = ezodf.opendoc("lebensmittel.ods")
|
|
s = d.sheets[0]
|
|
|
|
COL_GROUPS = 0
|
|
COL_NAME = 1
|
|
COL_PRICE = 4
|
|
|
|
SKIP_FIRST_ROW = True
|
|
NAME_TO_GROUP_SUM = dict()
|
|
GROUP_SUMS = dict()
|
|
|
|
COLOR_LIST = [
|
|
"papayawhip",
|
|
"indianred",
|
|
"lime",
|
|
"aqua",
|
|
"aquamarine",
|
|
"wheat",
|
|
"sienna",
|
|
"silver",
|
|
"darksalmon",
|
|
"pink",
|
|
"tomato",
|
|
"honeydew",
|
|
"plum",
|
|
"yellowgreen",
|
|
"darkcyan",
|
|
"cornflowerblue",
|
|
"maroon",
|
|
"azure",
|
|
"crimson",
|
|
"hotpink",
|
|
"peachpuff",
|
|
"violet",
|
|
"mediumspringgreen",
|
|
"teal",
|
|
"tan",
|
|
"darkgoldenrod",
|
|
"chocolate",
|
|
"mistyrose",
|
|
]
|
|
|
|
sankeyDict = {
|
|
"data" : [{
|
|
"type" : "sankey",
|
|
"node" : {
|
|
"label" : [],
|
|
"color" : []
|
|
},
|
|
"link" : {
|
|
"source" : [],
|
|
"target" : [],
|
|
"value" : [],
|
|
"color" : [],
|
|
"label" : []
|
|
}
|
|
}]
|
|
}
|
|
|
|
for r in s.rows():
|
|
|
|
if SKIP_FIRST_ROW:
|
|
SKIP_FIRST_ROW = False
|
|
continue
|
|
|
|
group = r[COL_GROUPS].value
|
|
name = r[COL_NAME].value
|
|
price = r[COL_PRICE].value
|
|
|
|
if not any((group, name, price)):
|
|
continue
|
|
|
|
# normalize price
|
|
price = int(price)
|
|
|
|
if name not in NAME_TO_GROUP_SUM:
|
|
NAME_TO_GROUP_SUM.update( { name : (group, price) } )
|
|
else:
|
|
group, cur = NAME_TO_GROUP_SUM[name]
|
|
NAME_TO_GROUP_SUM.update({ name : (group, cur + price) })
|
|
|
|
# group updates #
|
|
if group not in GROUP_SUMS:
|
|
GROUP_SUMS.update({ group : price })
|
|
else:
|
|
GROUP_SUMS[group] += price
|
|
|
|
# nodes
|
|
for k,v in NAME_TO_GROUP_SUM.items():
|
|
name = k
|
|
group, summary = v
|
|
|
|
# labels #
|
|
if group not in sankeyDict["data"][0]["node"]["label"]:
|
|
sankeyDict["data"][0]["node"]["label"].append(group)
|
|
sankeyDict["data"][0]["node"]["label"].append(name)
|
|
|
|
sankeyDict["data"][0]["node"]["color"].append("lightgray")
|
|
|
|
LABELS_ALL = sankeyDict["data"][0]["node"]["label"]
|
|
COLOR_COUNTER = 0
|
|
# links
|
|
for k,v in NAME_TO_GROUP_SUM.items():
|
|
name = k
|
|
group, summary = v
|
|
|
|
print(group)
|
|
# links #
|
|
sankeyDict["data"][0]["link"]["source"].append(LABELS_ALL.index(group))
|
|
sankeyDict["data"][0]["link"]["target"].append(LABELS_ALL.index(name))
|
|
sankeyDict["data"][0]["link"]["value"].append(summary)
|
|
sankeyDict["data"][0]["link"]["label"].append("{} €".format(summary))
|
|
sankeyDict["data"][0]["link"]["color"].append(COLOR_LIST[COLOR_COUNTER%len(COLOR_LIST)])
|
|
COLOR_COUNTER += 1
|
|
|
|
# group base connection
|
|
base = "Lebensmittel"
|
|
sankeyDict["data"][0]["node"]["label"].append(base)
|
|
for group, summary in GROUP_SUMS.items():
|
|
sankeyDict["data"][0]["link"]["source"].append(LABELS_ALL.index(base))
|
|
sankeyDict["data"][0]["link"]["target"].append(LABELS_ALL.index(group))
|
|
sankeyDict["data"][0]["link"]["value"].append(summary)
|
|
sankeyDict["data"][0]["link"]["label"].append("{} €".format(summary))
|
|
sankeyDict["data"][0]["link"]["color"].append(COLOR_LIST[COLOR_COUNTER%len(COLOR_LIST)])
|
|
COLOR_COUNTER += 1
|
|
|
|
# checks & validate
|
|
sankey_tmp = {
|
|
"sankey" : {
|
|
"nodes" : [],
|
|
"links" : []
|
|
}
|
|
}
|
|
for name in NAME_TO_GROUP_SUM.keys():
|
|
if name in GROUP_SUMS:
|
|
raise ValuerError("Group must not exist as name: {}".format(name))
|
|
|
|
# build for external json
|
|
for name in LABELS_ALL:
|
|
element = { "name" : name }
|
|
|
|
if name == base:
|
|
pass
|
|
elif name not in GROUP_SUMS:
|
|
element.update({"layer" : 2 })
|
|
else:
|
|
element.update({"layer" : 1 })
|
|
|
|
sankey_tmp["sankey"]["nodes"].append(element)
|
|
|
|
for i, source in enumerate(sankeyDict["data"][0]["link"]["source"]):
|
|
|
|
target = sankeyDict["data"][0]["link"]["target"][i]
|
|
color = sankeyDict["data"][0]["link"]["color"][i]
|
|
value = sankeyDict["data"][0]["link"]["value"][i]
|
|
label = sankeyDict["data"][0]["link"]["label"][i]
|
|
|
|
# build for external json
|
|
sankey_tmp["sankey"]["links"].append({ "fill" : color,
|
|
"source" : source,
|
|
"target" : target,
|
|
"value" : value
|
|
})
|
|
|
|
tupel = (source >= 0, target >=0, color, value is not None, label is not None)
|
|
|
|
print(source, target)
|
|
if not len(sankeyDict["data"][0]["node"]["label"]) > max(source, target):
|
|
raise ValueError("Src or target out of bounds: {}".format(max(source, target)))
|
|
|
|
if not all(tupel):
|
|
raise ValueError("Missing mandatory value [source, target, color, value, label] [{}, {}, {}, {}, {}]".format(source, target, color, value, label))
|
|
|
|
print(source, target, color)
|
|
|
|
# save file
|
|
import json
|
|
with open("sankey-tmp.json", "w") as f:
|
|
json.dump(sankey_tmp, f, indent=2)
|
|
|
|
# do sankey
|
|
fig = go.Figure(data=[go.Sankey(
|
|
valueformat = ".0f",
|
|
valuesuffix = "EUR",
|
|
# Define nodes
|
|
node = dict(
|
|
pad = 15,
|
|
thickness = 15,
|
|
line = dict(color = "black", width = 0.5),
|
|
label = sankeyDict['data'][0]['node']['label'],
|
|
color = sankeyDict['data'][0]['node']['color']
|
|
),
|
|
# Add links
|
|
link = dict(
|
|
source = sankeyDict['data'][0]['link']['source'],
|
|
target = sankeyDict['data'][0]['link']['target'],
|
|
value = sankeyDict['data'][0]['link']['value'],
|
|
label = sankeyDict['data'][0]['link']['label'],
|
|
color = sankeyDict['data'][0]['link']['color']
|
|
))])
|
|
fig.show()
|
|
fig.write_image("test.png")
|