Tropické a ledové dny

Inspirace: Fakta o klimatu.
Data: ČHMU

In [1]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

import pandas as pd
import plotly.express as px

from tropic_days.charting import *
from tropic_days.import_data import parse_sheet
from tropic_days.process_data import get_yearly_count, add_moving, add_line, add_line_check
In [2]:
# TODO fetch files automatically from CHMU
# https://www.chmi.cz/files/portal/docs/meteo/ok/denni_data/files/O1MOSN01.xls
colors = ["#56ebd3", "#851e39", "#65d04b", "#9b3ec8", "#a9c358", "#1c4c5e", "#99ceeb", "#1f3ca6", "#f67fec", "#458612"]

cities_old = {
    'prg' : {'name': 'Praha', 'file_name': '../data/P1PRUZ01.xls'},
    'brn' : {'name': 'Brno', 'file_name': '../data/B2BTUR01.xls'},
    'prb' : {'name': 'Přibyslav', 'file_name': '../data/P3PRIB01.xls'},
    'pri' : {'name': 'Přimda', 'file_name': '../data/L2PRIM01.xls'},
    'koc' : {'name': 'Kocelovice', 'file_name': '../data/C1KOCE01.xls'},
    'mos' : {'name': 'Mošnov', 'file_name': '../data/O1MOSN01.xls'},
    'lib' : {'name': 'Liberec', 'file_name': '../data/U2LIBC01.xls'},
    'mil' : {'name': 'Milešovka', 'file_name': '../data/U1MILE01.xls'},
    'lys' : {'name': 'Lysá hora', 'file_name': '../data/O1LYSA01.xls'},
    'kra' : {'name': 'Kramolín', 'file_name': '../data/P3KOSE01.xls'},
}

city_keys = sorted(cities_old.keys())

cities = {}

for city in city_keys:
    cities[city] = cities_old[city]
    cities[city]['color'] = colors.pop()


    
sheet_name = 'teplota maximální'
In [3]:
all_days = pd.DataFrame()
for city_key, city in cities.items():
    city['key'] = city_key
    all_days = all_days.append(parse_sheet(city, sheet_name))

# add columns for ice_day and tropical day according to the definition on Fakta o Klimatu
all_days['ice_day'] = all_days.value < 0
all_days['ice_day'] = all_days['ice_day'].astype(int)
all_days['tropical_day'] = all_days.value > 30
all_days['tropical_day'] = all_days['tropical_day'].astype(int)
In [4]:
ALL = pd.DataFrame()
var_names = ['ice_day', 'tropical_day']

for var_name in var_names:
    ALL = ALL.append(get_yearly_count(all_days, var_name))

ALL['legend'] = ALL.city + '_' + ALL.var_name
In [5]:
# todo - this needs to be better :)
def content_stats():
    """helper function to be able to check quickly all necessary data is available """
    ALL['var_type'] = ALL.var_name.str.split('_', expand=True)[0]
    ALL['calc_type'] = ALL.var_name.str.split('_', expand=True)[2]
    return ALL[['city','var_type','calc_type','val']].groupby(by=['city','var_type','calc_type']).count()
In [6]:
for city in cities:
    ALL = ALL.append(add_moving(ALL, var_names, city))
    
ALL['legend'] = ALL.city + '_' + ALL.var_name
In [7]:
known_cities = ALL.city.unique().tolist()
known_var_names = ALL.var_name.unique().tolist()
hold_out_validation = pd.DataFrame()


for var_name in known_var_names:
    if '_src' in var_name:
        # drop existing line data in case its there
        ALL = ALL[(ALL.var_name != var_name.replace('_src','_line'))]        
        for city in known_cities:
            lines, validation = add_line(ALL, city, var_name)
            ALL = ALL.append(lines)
            hold_out_validation = hold_out_validation.append(validation, ignore_index=True)
In [8]:
for var_name in known_var_names:
    # try breaking the line to see how the fitting copes
    if '_src' in var_name:
        # drop existing line data in case its there
        ALL = ALL[(ALL.var_name != var_name.replace('_src','_lch'))]        
        for city in known_cities:
            lines = add_line_check(ALL, city, var_name, 1997)
            ALL = ALL.append(lines)
In [9]:
hold_out_validation['abs_mbe'] = hold_out_validation.mbe.abs()
In [10]:
ALL['var_type'] = ALL.var_name.str.split('_', expand=True)[0]
In [11]:
ALL['calc_type'] = ALL.var_name.str.split('_', expand=True)[2]

Tropické a ledové dny

In [19]:
nice_chart_cities(ALL[ALL.calc_type != 'lch'], known_cities, cities)

Tropické dny

In [20]:
chart_per_type(ALL, 'tropical', known_cities, cities)

Ledové dny

In [21]:
chart_per_type(ALL, 'ice', known_cities, cities)

Notebook: tropic_days