4  Time Series Decomposition

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL

# Load the AirPassengers dataset
air_passengers = pd.read_csv('data/AirPassengers.csv')
air_passengers['Month'] = pd.to_datetime(air_passengers['Month'])
air_passengers.set_index('Month', inplace=True)

# Perform STL decomposition
stl = STL(air_passengers, seasonal=13)  # The seasonal parameter is chosen based on the periodicity of your data
result = stl.fit()
result
<statsmodels.tsa.seasonal.DecomposeResult at 0x1b2ffd590a0>

4.1 Method 1: Plotting

# Plot the original time series, trend, seasonal, and remainder components
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(10, 8), sharex=True)

ax1.plot(air_passengers, label='Original')
ax1.legend()

ax2.plot(result.trend, label='Trend', color='orange')
ax2.legend()

ax3.plot(result.seasonal, label='Seasonal', color='green')
ax3.legend()

ax4.plot(result.resid, label='Residual', color='red')
ax4.legend()

plt.suptitle('STL Decomposition of AirPassengers Dataset')
plt.show()

4.2 Method 2: Plotting using plotnine

from plotnine import ggplot, aes, geom_line, facet_wrap, ggtitle
# Create a DataFrame for visualization
df_visualization = pd.DataFrame({
    'Date': air_passengers.index,
    'Original': air_passengers['#Passengers'],
    'Trend': result.trend,
    'Seasonal': result.seasonal,
    'Residual': result.resid
})
df_visualization
Date Original Trend Seasonal Residual
Month
1949-01-01 1949-01-01 112 121.463327 -9.157113 -0.306215
1949-02-01 1949-02-01 118 122.392507 0.961357 -5.353864
1949-03-01 1949-03-01 132 123.284151 12.919571 -4.203722
1949-04-01 1949-04-01 129 124.139983 4.042554 0.817463
1949-05-01 1949-05-01 121 124.967180 -3.196646 -0.770534
... ... ... ... ... ...
1960-08-01 1960-08-01 606 481.142084 132.866128 -8.008212
1960-09-01 1960-09-01 508 484.574794 25.826563 -2.401357
1960-10-01 1960-10-01 461 487.984483 -33.766745 6.782262
1960-11-01 1960-11-01 390 491.372961 -97.319814 -4.053148
1960-12-01 1960-12-01 432 494.738728 -60.351183 -2.387545

144 rows × 5 columns

# Melt the DataFrame for easier plotting
df_melted = df_visualization.melt(id_vars='Date', var_name='Component', value_name='Value')
df_melted
Date Component Value
0 1949-01-01 Original 112.000000
1 1949-02-01 Original 118.000000
2 1949-03-01 Original 132.000000
3 1949-04-01 Original 129.000000
4 1949-05-01 Original 121.000000
... ... ... ...
571 1960-08-01 Residual -8.008212
572 1960-09-01 Residual -2.401357
573 1960-10-01 Residual 6.782262
574 1960-11-01 Residual -4.053148
575 1960-12-01 Residual -2.387545

576 rows × 3 columns

# Plot using plotnine
plot = (
    ggplot(df_melted, aes(x='Date', y='Value', color='Component')) +
    geom_line() +
    facet_wrap('~Component', scales='free_y') +
    ggtitle('STL Decomposition of AirPassengers Dataset')
)

# Display the plot
print(plot)

# Plot using plotnine
plot = (
    ggplot(df_melted, aes(x='Date', y='Value', color='Component')) +
    geom_line() +
    facet_wrap('~Component', scales='free_y', ncol=1) +
    ggtitle('STL Decomposition of AirPassengers Dataset')
)

# Display the plot
print(plot)

Task: Take hourly series and perform STL decomposition.