In [1]:
import os
import pandas as pd
import numpy as np
pickle_path = os.path.join("data", "su_smhi_daily_full.bz2")
sthlm_data = pd.read_pickle(pickle_path)
sthlm_array = np.array(sthlm_data)
In [2]:
sthlm_data.info()
In [3]:
sthlm_data.describe()
Out[3]:
In [4]:
print(sthlm_data.iloc[np.argmin(sthlm_array)])
print(sthlm_data.iloc[np.argmax(sthlm_array)])
Plots¶
In [5]:
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()
January data¶
In [74]:
jan_this_year = sthlm_data["2020-01"]
jan_this_year
Out[74]:
In [75]:
jan_this_year.mean()
Out[75]:
In [6]:
month_data = sthlm_data.resample("M").mean()
month_jan_filter = (month_data.index.month == 1)
jan_data = month_data[month_jan_filter]
jan_array = jan_data.iloc[:,:].values[:,0]
In [7]:
jan_data.describe()
Out[7]:
In [8]:
jan_min_obs = jan_data.iloc[np.argmin(jan_array)]
jan_min_date = str(jan_min_obs.name).split(' ')[0].split('-')
jan_min = jan_min_obs[0]
print(jan_min_date[0]+':', jan_min)
In [9]:
jan_max_obs = jan_data.iloc[np.argmax(jan_array)]
jan_max_date = str(jan_max_obs.name).split(' ')[0].split('-')
jan_max = jan_max_obs[0]
print(jan_max_date[0]+':', jan_max)
In [12]:
plt.figure(figsize=(20,10))
plt.title("Dygnsmedeltemperatur i Januari", fontsize=18)
plt.ylabel("Temperatur (°C)", fontsize=18)
plt.xlabel("Ã…r", fontsize=18)
plt.plot(jan_data)
plt.show()
In [13]:
years = np.array(jan_data.index.year)
plt.figure(figsize=(20,10))
sns.regplot(x=years, y=jan_array)
plt.show()
In [14]:
plt.figure(figsize=(20,10))
sns.boxplot(jan_array)
plt.show()
In [15]:
plt.figure(figsize=(20,10))
sns.distplot(jan_array)
plt.show()
In [19]:
from scipy.stats import t
from scipy.stats import sem
# Mean interval
t.interval(0.95, len(jan_array)-1, loc=np.mean(jan_array), scale=sem(jan_array))
Out[19]:
In [39]:
from scipy.stats import ks_2samp
ks_2samp(jan_array[:100], jan_array[-100:])
Out[39]:
In [67]:
for date, value in zip(jan_data.index.date, jan_data["temperature"].values):
if value > 2:
print(str(date)+":", value)
Daily data¶
In [43]:
day_filter = (sthlm_data.index.month == 1)
jan_day_data = sthlm_data[day_filter]
jan_day_array = jan_day_data.iloc[:,:].values[:,0]
In [44]:
ks_2samp(jan_day_array[:100*31], jan_array[-100*31:])
Out[44]:
In [62]:
years = np.array(jan_day_data.index.year)
plt.figure(figsize=(20,10))
sns.regplot(x=years, y=jan_day_array)
plt.show()
Days above 7 °C¶
In [63]:
for date, value in zip(jan_day_data.index.date, jan_day_data["temperature"].values):
if value > 6.5:
print(str(date)+":", value)
In [50]:
Out[50]:
In [ ]: