Inspirace: Fakta o klimatu.
Data: ČHMU
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False
import pandas as pd
import plotly.express as px
from tropic_days.charting import *
from tropic_days.import_data import parse_sheet
from tropic_days.process_data import get_yearly_count, add_moving, add_line, add_line_check
# TODO fetch files automatically from CHMU
# https://www.chmi.cz/files/portal/docs/meteo/ok/denni_data/files/O1MOSN01.xls
colors = ["#56ebd3", "#851e39", "#65d04b", "#9b3ec8", "#a9c358", "#1c4c5e", "#99ceeb", "#1f3ca6", "#f67fec", "#458612"]
cities_old = {
'prg' : {'name': 'Praha', 'file_name': '../data/P1PRUZ01.xls'},
'brn' : {'name': 'Brno', 'file_name': '../data/B2BTUR01.xls'},
'prb' : {'name': 'Přibyslav', 'file_name': '../data/P3PRIB01.xls'},
'pri' : {'name': 'Přimda', 'file_name': '../data/L2PRIM01.xls'},
'koc' : {'name': 'Kocelovice', 'file_name': '../data/C1KOCE01.xls'},
'mos' : {'name': 'Mošnov', 'file_name': '../data/O1MOSN01.xls'},
'lib' : {'name': 'Liberec', 'file_name': '../data/U2LIBC01.xls'},
'mil' : {'name': 'Milešovka', 'file_name': '../data/U1MILE01.xls'},
'lys' : {'name': 'Lysá hora', 'file_name': '../data/O1LYSA01.xls'},
'kra' : {'name': 'Kramolín', 'file_name': '../data/P3KOSE01.xls'},
}
city_keys = sorted(cities_old.keys())
cities = {}
for city in city_keys:
cities[city] = cities_old[city]
cities[city]['color'] = colors.pop()
sheet_name = 'teplota maximální'
all_days = pd.DataFrame()
for city_key, city in cities.items():
city['key'] = city_key
all_days = all_days.append(parse_sheet(city, sheet_name))
# add columns for ice_day and tropical day according to the definition on Fakta o Klimatu
all_days['ice_day'] = all_days.value < 0
all_days['ice_day'] = all_days['ice_day'].astype(int)
all_days['tropical_day'] = all_days.value > 30
all_days['tropical_day'] = all_days['tropical_day'].astype(int)
ALL = pd.DataFrame()
var_names = ['ice_day', 'tropical_day']
for var_name in var_names:
ALL = ALL.append(get_yearly_count(all_days, var_name))
ALL['legend'] = ALL.city + '_' + ALL.var_name
# todo - this needs to be better :)
def content_stats():
"""helper function to be able to check quickly all necessary data is available """
ALL['var_type'] = ALL.var_name.str.split('_', expand=True)[0]
ALL['calc_type'] = ALL.var_name.str.split('_', expand=True)[2]
return ALL[['city','var_type','calc_type','val']].groupby(by=['city','var_type','calc_type']).count()
for city in cities:
ALL = ALL.append(add_moving(ALL, var_names, city))
ALL['legend'] = ALL.city + '_' + ALL.var_name
known_cities = ALL.city.unique().tolist()
known_var_names = ALL.var_name.unique().tolist()
hold_out_validation = pd.DataFrame()
for var_name in known_var_names:
if '_src' in var_name:
# drop existing line data in case its there
ALL = ALL[(ALL.var_name != var_name.replace('_src','_line'))]
for city in known_cities:
lines, validation = add_line(ALL, city, var_name)
ALL = ALL.append(lines)
hold_out_validation = hold_out_validation.append(validation, ignore_index=True)
for var_name in known_var_names:
# try breaking the line to see how the fitting copes
if '_src' in var_name:
# drop existing line data in case its there
ALL = ALL[(ALL.var_name != var_name.replace('_src','_lch'))]
for city in known_cities:
lines = add_line_check(ALL, city, var_name, 1997)
ALL = ALL.append(lines)
hold_out_validation['abs_mbe'] = hold_out_validation.mbe.abs()
ALL['var_type'] = ALL.var_name.str.split('_', expand=True)[0]
ALL['calc_type'] = ALL.var_name.str.split('_', expand=True)[2]
nice_chart_cities(ALL[ALL.calc_type != 'lch'], known_cities, cities)
chart_per_type(ALL, 'tropical', known_cities, cities)
chart_per_type(ALL, 'ice', known_cities, cities)
Notebook: tropic_days