pip install holidays
pip install pandas
pip install matplotlib
pip install xlrd
pip install seaborn
pip install scikit-learn
pip install xgboost
pip install numpy
pip install prophet
pip install tensorflow

  Cell In[82], line 1
    pip install holidays
        ^
SyntaxError: invalid syntax

import pandas as pd
import glob
import os
import matplotlib
import matplotlib.pyplot as plt

import seaborn as sns
import xgboost as xgb
import numpy as np
import sklearn
import itertools
import datetime
import tensorflow as tf

from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, Dropout
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from prophet import Prophet
from prophet.diagnostics import cross_validation

sns.set_style("whitegrid")
small_size = 12
medium_size = 14
large_size = 16
matplotlib.rc("font", size=small_size)  
matplotlib.rc("axes", titlesize=small_size)
matplotlib.rc("axes", labelsize=medium_size)
matplotlib.rc("xtick", labelsize=small_size)
matplotlib.rc("ytick", labelsize=small_size)
matplotlib.rc("legend", fontsize=small_size)
matplotlib.rc("axes", titlesize=large_size)  

def mean_absolute_percentage_error(y_true, y_pred):
    """
    Calculate Mean Absolute Percentage Error (MAPE) between true and predicted values.

    Parameters
    ----------
    y_true : array-like
        True values.
    y_pred : array-like
        Predicted values.

    Returns
    -------
    mape : float
        MAPE value for the given predicted values.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mape

def clean_xy(X, y):
    """
    Clean feature and target dataframes for modeling.

    Parameters
    ----------
    X : pd.DataFrame
        Feature dataframe.
    y : pd.Series or pd.DataFrame
        Target values.

    Returns
    -------
    X_clean : pd.DataFrame
        Cleaned feature dataframe.
    y_clean : pd.Series
        Cleaned target values.
    """
    X = X.copy()
    for c in X.columns:
        if X[c].dtype == bool:
            X[c] = X[c].astype("int8")
        elif X[c].dtype.kind not in "iuif":
            X[c] = pd.to_numeric(X[c], errors="coerce")
    X = X.replace([np.inf, -np.inf], np.nan)
    y = pd.to_numeric(y, errors="coerce")
    y = y.replace([np.inf, -np.inf], np.nan)
    df = X.join(y.rename("_y_"), how="inner")
    df = df.dropna()
    return df[X.columns], df["_y_"]

def settlement_period(df):
    """
    Add settlement period column to dataframe index (30-minute intervals).

    Parameters
    ----------
    df : pd.DataFrame
        Time series dataframe containing dates.

    Returns
    -------
    df : pd.DataFrame
        Dataframe with settlement period column.
    """
    df = df.copy()
    settlement_period_array = (
        np.array(df.index.hour.to_list()) * 2 + np.array(df.index.minute.to_list()) / 30 + 1
    ).astype(int)
    df["settlement_period"] = settlement_period_array
    return df

def create_features(data):
    """
    Create time series features based on time series index.

    Parameters
    ----------
    data : pd.DataFrame
        Time series dataframe.

    Returns
    -------
    df : pd.DataFrame
        Dataframe with new features.
    """
    df = data.copy()
    df["settlement_period"] = df.index.hour * 2 + (df.index.minute // 30) + 1
    df["day_of_month"] = df.index.day
    df["day_of_week"] = df.index.day_of_week
    df["day_of_year"] = df.index.day_of_year
    df["quarter"] = df.index.quarter
    df["month"] = df.index.month
    df["year"] = df.index.year
    df["week_of_year"] = df.index.isocalendar().week.astype("int64")
    return df

def add_lags(df):
    """
    Add lag features to the dataframe based on the previous 3 years of data.

    Parameters
    ----------
    df : pd.DataFrame
        Time series dataframe.

    Returns
    -------
    df : pd.DataFrame
        Dataframe with new lag features.
    """
    target_map = df["consommation"].to_dict()
    df["lag1"] = (df.index - pd.Timedelta("364 days")).map(target_map)
    df["lag2"] = (df.index - pd.Timedelta("728 days")).map(target_map)
    df["lag3"] = (df.index - pd.Timedelta("1092 days")).map(target_map)
    return df

def to_prophet_df(y_series: pd.Series) -> pd.DataFrame:
    """
    Convert a Series with DatetimeIndex to a Prophet-compatible DataFrame.

    Parameters
    ----------
    y_series : pd.Series
        Series with DatetimeIndex and target values.

    Returns
    -------
    df : pd.DataFrame
        DataFrame with columns ['ds', 'y'] and valid datetimes.
    """
    df = (y_series.rename('y')
                  .to_frame()
                  .rename_axis('ds')
                  .reset_index())
    if not np.issubdtype(df['ds'].dtype, np.datetime64):
        ds_parsed = pd.to_datetime(df['ds'], errors='coerce')
        bad = df[ds_parsed.isna()]
        if not bad.empty:
            print("⚠️ Lignes retirées car 'ds' non convertible en datetime :")
            print(bad.head())
        df = df[ds_parsed.notna()].copy()
        df['ds'] = ds_parsed[ds_parsed.notna()]
    df = df.sort_values('ds').drop_duplicates(subset='ds', keep='last').reset_index(drop=True)
    return df

def root_mean_squared_error(y_true, y_pred):
    """
    Compute Root Mean Squared Error (RMSE) between true and predicted values.

    Parameters
    ----------
    y_true : tf.Tensor or array-like
        True values.
    y_pred : tf.Tensor or array-like
        Predicted values.

    Returns
    -------
    rmse : tf.Tensor or float
        RMSE value.
    """
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

DATA_PATH = "datasets"  
files = glob.glob(os.path.join(DATA_PATH, "eCO2mix_RTE_Annuel-Definitif_*.csv"))

dfs = []
for file in files:
    print(f"Chargement : {file}")
    df = pd.read_csv(
        file, sep=",", encoding="latin-1",
        low_memory=False,
        na_values=["", " ", "NA", "N/A", "ND", "n/d", "-", "--"]
    )
    df.columns = df.columns.str.strip().str.lower()

    # Construction of the datetime column
    df["datetime"] = pd.to_datetime(df["date"].astype(str) + " " + df["heures"].astype(str), errors="coerce")

    # Filter rows where minutes are 15 or 45
    df = df[~df["datetime"].dt.minute.isin([15, 45])]

    numeric_cols = [
        "consommation", "fioul", "charbon", "gaz", "nucléaire",
        "eolien", "solaire", "hydraulique", "pompage", "bioénergies"
    ]
    for c in numeric_cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    
    # Keep only a few useful columns
    keep = ["datetime", "consommation"] + [c for c in numeric_cols if c in df.columns and c != "consommation"]
    df = df[keep]
    df = df.dropna(subset=["datetime"])

    dfs.append(df)

# Concatenate + sort + remove duplicates
data = (
    pd.concat(dfs, ignore_index=True)
      .sort_values("datetime")
      .drop_duplicates(subset="datetime")
      .set_index("datetime")
)

# Take a first look at the data
data.info()
data.head()

Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2012.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2013.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2014.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2015.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2016.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2017.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2018.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2019.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2020.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2021.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2022.csv
Chargement : datasets\eCO2mix_RTE_Annuel-Definitif_2023.csv
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 210384 entries, 2012-01-01 00:00:00 to 2023-12-31 23:30:00
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   consommation  210384 non-null  float64
 1   fioul         210384 non-null  float64
 2   charbon       210384 non-null  float64
 3   gaz           210384 non-null  float64
 4   nucléaire     210384 non-null  float64
 5   eolien        210384 non-null  float64
 6   solaire       210384 non-null  float64
 7   hydraulique   210384 non-null  float64
 8   pompage       210384 non-null  float64
 9   bioénergies   210384 non-null  float64
dtypes: float64(10)
memory usage: 17.7 MB

data.sample(n=7)

data.describe()

data.shape

(210384, 10)

#Sort values by date
data = data.sort_values("datetime")

data.isna().any()

consommation    False
fioul           False
charbon         False
gaz             False
nucléaire       False
eolien          False
solaire         False
hydraulique     False
pompage         False
bioénergies     False
dtype: bool

from france import France
import pandas as pd

all_holidays = []
years = range(2012, 2024)
subdivisions = [None, "BL", "GES", "GP", "GY", "MF", "MQ", "NC", "PF", "RE", "WF", "YT"]


metropole = France(years=years)
metropole_days = set((pd.to_datetime(date), name) for date, name in metropole.items())

# metropole days
for date, name in metropole_days:
    all_holidays.append({"date": date})

holidays_df = pd.DataFrame(all_holidays).sort_values(["date"]).drop_duplicates(subset=["date"], keep="first").reset_index(drop=True)

holidays_df.head(5)

# Add a column is_holiday to data
data = data.copy()
data["is_holiday"] = data.index.normalize().isin(holidays_df["date"].dt.normalize()).astype(int)
data.sample(n=10)

df_plot = data.copy()

fig, ax = plt.subplots(figsize=(15, 5))
df_plot["consommation"].plot(
    style=".", ax=ax, title="Trasnmission System Demand", label="Timeries data"
)
(df_plot.query("is_holiday == 1")["is_holiday"] * 33000).plot(
    style=".", ax=ax, label="Bank holiday"
)
ax.legend();

df_plot.loc[(df_plot.index > "01-01-2020") & (df_plot.index < "01-08-2020")][
    "consommation"
].plot(figsize=(10, 5));

# Boxplot by hour
df_plot = data.copy()
df_plot["hour"] = df_plot.index.hour

fig, ax = plt.subplots(figsize=(15, 5))
sns.boxplot(x="hour", y="consommation", data=df_plot)

ax.set_xticks(range(0, 24, 2))
ax.set_xticklabels(range(0, 24, 2))
ax.set_xlabel("Hour")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Distribution of electricity consumption with hours");

# Boxplot by month
df_plot = data.copy()
df_plot["month"] = df_plot.index.month
fig, ax = plt.subplots(figsize=(15, 5))
sns.boxplot(x="month", y="consommation", data=df_plot)
ax.set_xlabel("Month")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Distribution of electricity consumption by month");

df_plot.loc[(df_plot.index > "01-01-2018") & (df_plot.index < "12-01-2018")]["consommation"].plot(
    figsize=(15, 5), ylabel="Electricity demand (MW)"
);

# Boxplot by day of week and holiday effect
df_plot = data.copy()
df_plot["day_of_week"] = df_plot.index.dayofweek  # 0=Monday, 6=Sunday
fig, ax = plt.subplots(figsize=(15, 5))
sns.boxplot(x="day_of_week", y="consommation", data=df_plot, hue="is_holiday", ax=ax)
ax.set_xticklabels(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]);
ax.set_xlabel("Day of the week")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Daily Distribution of electricity consumption and holiday effect");

C:\Users\guill\AppData\Local\Temp\ipykernel_23024\37080056.py:6: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]);

fig, ax = plt.subplots(figsize=(15, 5))
df_plot["year"] = df_plot.index.year
sns.boxplot(x="year", y="consommation", data=df_plot)

ax.set_xlabel("Year")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Distribution of electricity consumption with years");

fig, ax = plt.subplots(figsize=(15, 7))

ax.plot(
    range(len(df_plot.loc[(df_plot.index > "01-01-2012") & (df_plot.index < "12-01-2012")]["consommation"])),
    df_plot.loc[(df_plot.index > "01-01-2012") & (df_plot.index < "12-01-2012")]["consommation"],
    "o",
    label="2012",
)

ax.plot(
    range(len(df_plot.loc[(df_plot.index > "01-01-2023") & (df_plot.index < "12-01-2023")]["consommation"])),
    df_plot.loc[(df_plot.index > "01-01-2023") & (df_plot.index < "12-01-2023")]["consommation"],
    "o",
    alpha=0.5,
    label="2023",
)
ax.set_xlabel("Data sample")
ax.set_ylabel("Electricity demand (MW)")
ax.legend(loc="best")
ax.set_title("Demand comparison - 2012 and 2023");

#Define the features and target variable
df = data.copy()
df["settlement_period"] = df.index.hour * 2 + (df.index.minute // 30) + 1
df["day_of_month"] = df.index.day
df["day_of_week"] = df.index.day_of_week
df["day_of_year"] = df.index.day_of_year
df["quarter"] = df.index.quarter
df["month"] = df.index.month
df["year"] = df.index.year
df["week_of_year"] = df.index.isocalendar().week.astype("int64")

#Add three lags to the dataset containing information from the previous 3 years
target_map =df["consommation"].to_dict()

df["lag1"] = (df.index - pd.Timedelta("364 days")).map(target_map)
df["lag2"] = (df.index - pd.Timedelta("728 days")).map(target_map)
df["lag3"] = (df.index - pd.Timedelta("1092 days")).map(target_map)

threshold_date_1 = "2020-01-01"
threshold_date_2 = "2022-01-01"

train_data = df.loc[df.index < threshold_date_1].copy()
test_data = df.loc[(df.index >= threshold_date_1) & (df.index < threshold_date_2)].copy()
val_data = df.loc[df.index >= threshold_date_2].copy()

# Visualize the train, test and validation sets
fig, ax = plt.subplots(figsize=(15, 5))
train_data["consommation"].plot(ax=ax, label="Train data")
test_data["consommation"].plot(ax=ax, label="Test data")
val_data["consommation"].plot(ax=ax, label="Validation data")
ax.axvline(threshold_date_1, color="k", ls="--")
ax.axvline(threshold_date_2, color="k", ls=":")
ax.set_title("Train, Test and Validation data")
ax.legend();

Features = [
    "is_holiday",
    "settlement_period",
    "day_of_month",
    "day_of_week",
    "day_of_year",
    "quarter",
    "month",
    "year",
    "week_of_year",
]
Target = "consommation"

#Prepare the training, testing and validation datasets
X_train = train_data[Features]
y_train = train_data[Target]

X_test = test_data[Features]
y_test = test_data[Target]

X_val = val_data[Features]
y_val = val_data[Target]

#Initialize and train the model
xgb_simple = xgb.XGBRegressor(
    n_estimators=500,
    max_depth=3,
    learning_rate=0.01,
    early_stopping_rounds=50,
    random_state=42,
)

xgb_simple.fit(
    X_train,
    y_train,
    eval_set=[(X_test, y_test), (X_val, y_val)],
    verbose=100,
);

[0]	validation_0-rmse:11606.69810	validation_1-rmse:11499.29421
[100]	validation_0-rmse:7205.44320	validation_1-rmse:7416.11380
[200]	validation_0-rmse:6019.95139	validation_1-rmse:6394.28095
[300]	validation_0-rmse:5567.05750	validation_1-rmse:6016.35048
[400]	validation_0-rmse:5369.47333	validation_1-rmse:5861.10099
[499]	validation_0-rmse:5233.35195	validation_1-rmse:5759.62455

# Evaluate the importance of each feature in the model.
para_importance = pd.DataFrame(
    data= xgb_simple.feature_importances_,
    index= xgb_simple.get_booster().feature_names,
    columns=["importance"]
)

para_importance.sort_values("importance", ascending=True, inplace=True)
para_importance.plot(kind="barh")

<Axes: >

# This cell creates a DataFrame from the test target values (`y_test`) and adds a new column with the predictions from the simple XGBoost model (`xgb_simple`). 
# This allows for easy comparison between actual and predicted electricity consumption values.
result_frame = y_test.to_frame()
result_frame["pred_xgb_simple"] = xgb_simple.predict(X_test)

# Plot the actual vs predicted values
fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(result_frame.index, result_frame["consommation"], "o", label="Test set")
ax.plot(result_frame.index, result_frame["pred_xgb_simple"], "o", label="Prediction")

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

begin = "05-01-2020"
end = "05-14-2020"

fig, ax = plt.subplots(figsize=(15, 5))

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)]["consommation"],
    "o",
    label="Test set",
)

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)][
        "pred_xgb_simple"
    ],
    "o",
    label="Prediction",
)

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set - Two weeks")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

mape_xgboost_simple = mean_absolute_percentage_error(
    y_test, result_frame["pred_xgb_simple"]
)

rmse_xgboost_simple = np.sqrt(mean_squared_error(y_test, result_frame["pred_xgb_simple"]))

print(
    "Mean Absolute Percentage Error of the simple model is: %.2f" % mape_xgboost_simple
)

print(
    "Root Mean Squared Error of the simple models is: %.2f MW" % rmse_xgboost_simple
)

Mean Absolute Percentage Error of the simple model is: 8.24
Root Mean Squared Error of the simple models is: 5233.35 MW

"""
This cell performs a time series cross-validation using `TimeSeriesSplit` with 4 splits. 
For each fold, it separates the data into training and test sets, then plots the electricity consumption for both sets. 
Vertical lines indicate the start of each test period. This helps visualize how the data is split and how the model will be evaluated on different time intervals.
"""
n_years_test = 1
tss = TimeSeriesSplit(n_splits=4, test_size=48 * 365 * n_years_test, gap=48)

fig, axes = plt.subplots(4, 1, figsize=(15, 15), sharex=True)

fold = 0
for train_index, test_index in tss.split(df[df.index<threshold_date_1]):

    train = df.iloc[train_index]
    test = df.iloc[test_index]

    train["consommation"].plot(
        ax=axes[fold], label="Training set", title=f"Data Train-test split fold {fold}",
    )
    test["consommation"].plot(ax=axes[fold], label="Test set")
    axes[fold].axvline(test.index.min(), color="k", ls="--")
    axes[fold].legend(loc="center", bbox_to_anchor=(1.075, 0.5))

    axes[fold].set_title("Prediction on test set - week")
    axes[fold].set_ylabel("Energy Demand (MW)")
    axes[fold].set_xlabel("Date");
    fold += 1

train_data.index = pd.Index(train_data.index)
test_data.index = pd.Index(test_data.index)

FEATURES_CV = [
    "settlement_period",
    "day_of_month",
    "day_of_week",
    "day_of_year",
    "quarter",
    "month",
    "year",
    "week_of_year",
    "lag1",
    "lag2",
    "lag3",
    "is_holiday",
]
TARGET = "consommation"

X_train_cv, y_train_cv = clean_xy(train_data[FEATURES_CV], train_data[TARGET])
X_val_cv,   y_val_cv   = clean_xy(val_data[FEATURES_CV],   val_data[TARGET])
X_test_cv,  y_test_cv  = clean_xy(test_data[FEATURES_CV],  test_data[TARGET])  # si besoin

assert X_train_cv.shape[0] == y_train_cv.shape[0]
assert X_val_cv.shape[0]   == y_val_cv.shape[0]
assert X_train_cv.columns.tolist() == X_val_cv.columns.tolist()
assert np.isfinite(X_train_cv.values).all() and np.isfinite(y_train_cv.values).all()
assert np.isfinite(X_val_cv.values).all()   and np.isfinite(y_val_cv.values).all()

fit_params = {
    "eval_set": [(X_val_cv, y_val_cv)],
    "verbose": 500,  
}

estimator = xgb.XGBRegressor(
    objective="reg:squarederror",
    learning_rate=0.01,
    tree_method="hist",
    device="cpu",   
    random_state=43,
    n_jobs=-1,
            
)

param_search = {
    "max_depth": [3, 5],
    "n_estimators": [350, 500, 650],
    "subsample": [0.95, 0.8, 0.7],
}

xgb_search = GridSearchCV(
    estimator=estimator,
    cv=tss,
    param_grid=param_search,
    scoring="neg_root_mean_squared_error",
    error_score="raise",
    verbose=3,
    return_train_score=True,
)
xgb_search.fit(X_train_cv, y_train_cv)

best_params = xgb_search.best_params_

final_model = xgb.XGBRegressor(
    **best_params,
    objective="reg:squarederror",
    learning_rate=0.01,
    tree_method="hist",
    device="cpu",
    random_state=43,
    n_jobs=-1,
    early_stopping_rounds=50,
    eval_metric="rmse",
)

final_model.fit(
    pd.concat([X_train_cv, X_val_cv]),
    pd.concat([y_train_cv, y_val_cv]),
    eval_set=[(X_val_cv, y_val_cv)],
    verbose=False,
)

Fitting 4 folds for each of 18 candidates, totalling 72 fits
[CV 1/4] END max_depth=3, n_estimators=350, subsample=0.95;, score=(train=-2796.724, test=-4992.636) total time=   0.2s
[CV 2/4] END max_depth=3, n_estimators=350, subsample=0.95;, score=(train=-3305.896, test=-5260.361) total time=   0.3s
[CV 3/4] END max_depth=3, n_estimators=350, subsample=0.95;, score=(train=-3669.237, test=-5267.921) total time=   0.5s
[CV 4/4] END max_depth=3, n_estimators=350, subsample=0.95;, score=(train=-3994.843, test=-4539.460) total time=   0.5s
[CV 1/4] END max_depth=3, n_estimators=350, subsample=0.8;, score=(train=-2790.798, test=-4998.334) total time=   0.2s
[CV 2/4] END max_depth=3, n_estimators=350, subsample=0.8;, score=(train=-3298.165, test=-5248.703) total time=   0.3s
[CV 3/4] END max_depth=3, n_estimators=350, subsample=0.8;, score=(train=-3676.321, test=-5240.145) total time=   0.3s
[CV 4/4] END max_depth=3, n_estimators=350, subsample=0.8;, score=(train=-3999.440, test=-4541.069) total time=   0.4s
[CV 1/4] END max_depth=3, n_estimators=350, subsample=0.7;, score=(train=-2787.841, test=-4984.139) total time=   0.2s
[CV 2/4] END max_depth=3, n_estimators=350, subsample=0.7;, score=(train=-3305.565, test=-5253.318) total time=   0.3s
[CV 3/4] END max_depth=3, n_estimators=350, subsample=0.7;, score=(train=-3664.624, test=-5258.885) total time=   0.3s
[CV 4/4] END max_depth=3, n_estimators=350, subsample=0.7;, score=(train=-3992.699, test=-4537.044) total time=   0.4s
[CV 1/4] END max_depth=3, n_estimators=500, subsample=0.95;, score=(train=-2366.346, test=-5081.071) total time=   0.3s
[CV 2/4] END max_depth=3, n_estimators=500, subsample=0.95;, score=(train=-3008.899, test=-5119.812) total time=   0.5s
[CV 3/4] END max_depth=3, n_estimators=500, subsample=0.95;, score=(train=-3355.968, test=-5502.345) total time=   0.5s
[CV 4/4] END max_depth=3, n_estimators=500, subsample=0.95;, score=(train=-3758.991, test=-4575.550) total time=   0.6s
[CV 1/4] END max_depth=3, n_estimators=500, subsample=0.8;, score=(train=-2363.727, test=-5087.644) total time=   0.3s
[CV 2/4] END max_depth=3, n_estimators=500, subsample=0.8;, score=(train=-3002.442, test=-5103.410) total time=   0.4s
[CV 3/4] END max_depth=3, n_estimators=500, subsample=0.8;, score=(train=-3367.963, test=-5468.721) total time=   0.5s
[CV 4/4] END max_depth=3, n_estimators=500, subsample=0.8;, score=(train=-3758.875, test=-4578.361) total time=   0.6s
[CV 1/4] END max_depth=3, n_estimators=500, subsample=0.7;, score=(train=-2355.573, test=-5076.791) total time=   0.3s
[CV 2/4] END max_depth=3, n_estimators=500, subsample=0.7;, score=(train=-3011.597, test=-5091.429) total time=   0.4s
[CV 3/4] END max_depth=3, n_estimators=500, subsample=0.7;, score=(train=-3360.113, test=-5479.538) total time=   0.6s
[CV 4/4] END max_depth=3, n_estimators=500, subsample=0.7;, score=(train=-3752.666, test=-4582.562) total time=   0.6s
[CV 1/4] END max_depth=3, n_estimators=650, subsample=0.95;, score=(train=-2119.647, test=-5152.035) total time=   0.4s
[CV 2/4] END max_depth=3, n_estimators=650, subsample=0.95;, score=(train=-2811.347, test=-5037.584) total time=   0.5s
[CV 3/4] END max_depth=3, n_estimators=650, subsample=0.95;, score=(train=-3190.039, test=-5625.655) total time=   0.6s
[CV 4/4] END max_depth=3, n_estimators=650, subsample=0.95;, score=(train=-3603.880, test=-4652.473) total time=   0.8s
[CV 1/4] END max_depth=3, n_estimators=650, subsample=0.8;, score=(train=-2123.628, test=-5141.868) total time=   0.4s
[CV 2/4] END max_depth=3, n_estimators=650, subsample=0.8;, score=(train=-2782.229, test=-5021.846) total time=   0.5s
[CV 3/4] END max_depth=3, n_estimators=650, subsample=0.8;, score=(train=-3196.181, test=-5587.635) total time=   0.7s
[CV 4/4] END max_depth=3, n_estimators=650, subsample=0.8;, score=(train=-3604.668, test=-4633.413) total time=   0.8s
[CV 1/4] END max_depth=3, n_estimators=650, subsample=0.7;, score=(train=-2106.346, test=-5145.054) total time=   0.4s
[CV 2/4] END max_depth=3, n_estimators=650, subsample=0.7;, score=(train=-2784.585, test=-5004.094) total time=   0.6s
[CV 3/4] END max_depth=3, n_estimators=650, subsample=0.7;, score=(train=-3186.810, test=-5602.977) total time=   0.8s
[CV 4/4] END max_depth=3, n_estimators=650, subsample=0.7;, score=(train=-3595.402, test=-4620.239) total time=   1.0s
[CV 1/4] END max_depth=5, n_estimators=350, subsample=0.95;, score=(train=-1858.552, test=-5398.367) total time=   0.6s
[CV 2/4] END max_depth=5, n_estimators=350, subsample=0.95;, score=(train=-2487.312, test=-5213.230) total time=   0.5s
[CV 3/4] END max_depth=5, n_estimators=350, subsample=0.95;, score=(train=-2850.500, test=-5518.796) total time=   0.7s
[CV 4/4] END max_depth=5, n_estimators=350, subsample=0.95;, score=(train=-3251.436, test=-4737.254) total time=   0.8s
[CV 1/4] END max_depth=5, n_estimators=350, subsample=0.8;, score=(train=-1856.138, test=-5370.553) total time=   0.3s
[CV 2/4] END max_depth=5, n_estimators=350, subsample=0.8;, score=(train=-2472.046, test=-5217.688) total time=   0.4s
[CV 3/4] END max_depth=5, n_estimators=350, subsample=0.8;, score=(train=-2846.926, test=-5529.644) total time=   0.6s
[CV 4/4] END max_depth=5, n_estimators=350, subsample=0.8;, score=(train=-3231.357, test=-4784.413) total time=   0.7s
[CV 1/4] END max_depth=5, n_estimators=350, subsample=0.7;, score=(train=-1862.138, test=-5364.426) total time=   0.4s
[CV 2/4] END max_depth=5, n_estimators=350, subsample=0.7;, score=(train=-2474.529, test=-5201.567) total time=   0.4s
[CV 3/4] END max_depth=5, n_estimators=350, subsample=0.7;, score=(train=-2848.627, test=-5530.060) total time=   0.5s
[CV 4/4] END max_depth=5, n_estimators=350, subsample=0.7;, score=(train=-3243.027, test=-4765.379) total time=   0.8s
[CV 1/4] END max_depth=5, n_estimators=500, subsample=0.95;, score=(train=-1471.480, test=-5518.575) total time=   0.6s
[CV 2/4] END max_depth=5, n_estimators=500, subsample=0.95;, score=(train=-2074.477, test=-5119.300) total time=   0.8s
[CV 3/4] END max_depth=5, n_estimators=500, subsample=0.95;, score=(train=-2462.195, test=-5719.333) total time=   1.2s
[CV 4/4] END max_depth=5, n_estimators=500, subsample=0.95;, score=(train=-2814.225, test=-4958.086) total time=   1.2s
[CV 1/4] END max_depth=5, n_estimators=500, subsample=0.8;, score=(train=-1470.125, test=-5483.362) total time=   0.7s
[CV 2/4] END max_depth=5, n_estimators=500, subsample=0.8;, score=(train=-2069.948, test=-5098.783) total time=   0.8s
[CV 3/4] END max_depth=5, n_estimators=500, subsample=0.8;, score=(train=-2454.989, test=-5731.243) total time=   1.1s
[CV 4/4] END max_depth=5, n_estimators=500, subsample=0.8;, score=(train=-2792.782, test=-5005.907) total time=   1.4s
[CV 1/4] END max_depth=5, n_estimators=500, subsample=0.7;, score=(train=-1475.928, test=-5483.292) total time=   0.6s
[CV 2/4] END max_depth=5, n_estimators=500, subsample=0.7;, score=(train=-2062.832, test=-5074.372) total time=   0.6s
[CV 3/4] END max_depth=5, n_estimators=500, subsample=0.7;, score=(train=-2445.450, test=-5736.584) total time=   0.8s
[CV 4/4] END max_depth=5, n_estimators=500, subsample=0.7;, score=(train=-2778.701, test=-4989.689) total time=   0.9s
[CV 1/4] END max_depth=5, n_estimators=650, subsample=0.95;, score=(train=-1289.783, test=-5565.117) total time=   0.9s
[CV 2/4] END max_depth=5, n_estimators=650, subsample=0.95;, score=(train=-1846.126, test=-5097.290) total time=   0.9s
[CV 3/4] END max_depth=5, n_estimators=650, subsample=0.95;, score=(train=-2192.778, test=-5883.177) total time=   1.1s
[CV 4/4] END max_depth=5, n_estimators=650, subsample=0.95;, score=(train=-2599.247, test=-5035.907) total time=   1.4s
[CV 1/4] END max_depth=5, n_estimators=650, subsample=0.8;, score=(train=-1285.718, test=-5539.337) total time=   0.8s
[CV 2/4] END max_depth=5, n_estimators=650, subsample=0.8;, score=(train=-1840.932, test=-5079.778) total time=   0.9s
[CV 3/4] END max_depth=5, n_estimators=650, subsample=0.8;, score=(train=-2172.288, test=-5886.164) total time=   1.3s
[CV 4/4] END max_depth=5, n_estimators=650, subsample=0.8;, score=(train=-2567.608, test=-5098.550) total time=   1.4s
[CV 1/4] END max_depth=5, n_estimators=650, subsample=0.7;, score=(train=-1291.287, test=-5539.883) total time=   0.8s
[CV 2/4] END max_depth=5, n_estimators=650, subsample=0.7;, score=(train=-1825.495, test=-5055.009) total time=   0.9s
[CV 3/4] END max_depth=5, n_estimators=650, subsample=0.7;, score=(train=-2178.344, test=-5888.638) total time=   1.1s
[CV 4/4] END max_depth=5, n_estimators=650, subsample=0.7;, score=(train=-2558.338, test=-5069.684) total time=   1.4s

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device='cpu', early_stopping_rounds=50,
             enable_categorical=False, eval_metric='rmse', feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.01, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=3,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=350,
             n_jobs=-1, num_parallel_tree=None, ...)

xgb_search.best_params_

{'max_depth': 3, 'n_estimators': 350, 'subsample': 0.8}

result_frame["pred_xgb_cv_gs"] = xgb_search.predict(X_test_cv)

fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(result_frame.index, result_frame["consommation"], "o", label="Test data")
ax.plot(
    result_frame.index,
    result_frame["pred_xgb_cv_gs"],
    "o",
    label="Prediction",
    alpha=0.5,
)

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

begin = "08-01-2020"
end = "08-7-2020"

fig, ax = plt.subplots(figsize=(16, 5))

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)]["consommation"],
    "o",
    label="Test set",
)

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)][
        "pred_xgb_simple"
    ],
    "o",
    label="Prediction simple",
)

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)][
        "pred_xgb_cv_gs"
    ],
    "o",
    label="Prediction cv gs",
)

ax.legend(loc="center", bbox_to_anchor=(1.1, 0.5))

ax.set_title("Prediction on test set - Two weeks")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

mape_xgboost_cv = mean_absolute_percentage_error(
    y_test, result_frame["pred_xgb_cv_gs"]
)

rmse_xgboost_cv = np.sqrt(mean_squared_error(y_test, result_frame["pred_xgb_cv_gs"]))
print(
    "Mean Absolute Percentage Error of the cross-validated model is: %.2f"
    % mape_xgboost_cv
)

print(
    "Root Mean Squared Error of the cross-validated model is: %.2f MW" % rmse_xgboost_cv
)

Mean Absolute Percentage Error of the cross-validated model is: 7.61
Root Mean Squared Error of the cross-validated model is: 5220.50 MW

X_all = df[FEATURES_CV]
y_all = df[TARGET]

xgb_best = xgb_search.best_estimator_
xgb_best.set_params(n_jobs=-1, early_stopping_rounds=50,verbose=100)
xgb_best.fit(
    X_all,
    y_all,
    eval_set=[(X_all, y_all)],
);

[0]	validation_0-rmse:11824.45655
[1]	validation_0-rmse:11740.82818
[2]	validation_0-rmse:11658.25922
[3]	validation_0-rmse:11576.61362
[4]	validation_0-rmse:11496.10728
[5]	validation_0-rmse:11416.66123
[6]	validation_0-rmse:11338.26749
[7]	validation_0-rmse:11260.61568
[8]	validation_0-rmse:11184.23015
[9]	validation_0-rmse:11108.41853
[10]	validation_0-rmse:11034.09765
[11]	validation_0-rmse:10960.42271
[12]	validation_0-rmse:10888.00126
[13]	validation_0-rmse:10815.93307
[14]	validation_0-rmse:10745.16571
[15]	validation_0-rmse:10674.97009
[16]	validation_0-rmse:10605.59842
[17]	validation_0-rmse:10537.17494
[18]	validation_0-rmse:10470.01907
[19]	validation_0-rmse:10403.43678
[20]	validation_0-rmse:10338.03835

c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\xgboost\callback.py:386: UserWarning: [13:57:12] WARNING: C:\actions-runner\_work\xgboost\xgboost\src\learner.cc:738: 
Parameters: { "verbose" } are not used.

  self.starting_round = model.num_boosted_rounds()

[21]	validation_0-rmse:10272.79686
[22]	validation_0-rmse:10208.96553
[23]	validation_0-rmse:10146.04907
[24]	validation_0-rmse:10081.79596
[25]	validation_0-rmse:10019.68557
[26]	validation_0-rmse:9957.02080
[27]	validation_0-rmse:9896.41345
[28]	validation_0-rmse:9835.31456
[29]	validation_0-rmse:9776.53931
[30]	validation_0-rmse:9718.95535
[31]	validation_0-rmse:9660.14497
[32]	validation_0-rmse:9603.44616
[33]	validation_0-rmse:9546.20481
[34]	validation_0-rmse:9489.86657
[35]	validation_0-rmse:9435.47792
[36]	validation_0-rmse:9380.45469
[37]	validation_0-rmse:9327.26968
[38]	validation_0-rmse:9274.71504
[39]	validation_0-rmse:9223.25379
[40]	validation_0-rmse:9170.85903
[41]	validation_0-rmse:9120.31827
[42]	validation_0-rmse:9070.91178
[43]	validation_0-rmse:9022.27798
[44]	validation_0-rmse:8972.55569
[45]	validation_0-rmse:8923.44662
[46]	validation_0-rmse:8875.04920
[47]	validation_0-rmse:8828.72693
[48]	validation_0-rmse:8782.68941
[49]	validation_0-rmse:8736.02389
[50]	validation_0-rmse:8691.60717
[51]	validation_0-rmse:8646.32759
[52]	validation_0-rmse:8602.58817
[53]	validation_0-rmse:8558.58372
[54]	validation_0-rmse:8515.10331
[55]	validation_0-rmse:8472.00886
[56]	validation_0-rmse:8430.62985
[57]	validation_0-rmse:8390.19250
[58]	validation_0-rmse:8348.92087
[59]	validation_0-rmse:8309.55203
[60]	validation_0-rmse:8269.22105
[61]	validation_0-rmse:8230.90738
[62]	validation_0-rmse:8191.83930
[63]	validation_0-rmse:8154.54910
[64]	validation_0-rmse:8117.56651
[65]	validation_0-rmse:8081.07863
[66]	validation_0-rmse:8041.94678
[67]	validation_0-rmse:8005.40641
[68]	validation_0-rmse:7970.55230
[69]	validation_0-rmse:7934.76745
[70]	validation_0-rmse:7899.61949
[71]	validation_0-rmse:7864.94316
[72]	validation_0-rmse:7830.94485
[73]	validation_0-rmse:7794.36782
[74]	validation_0-rmse:7761.06827
[75]	validation_0-rmse:7729.34212
[76]	validation_0-rmse:7698.12080
[77]	validation_0-rmse:7667.37747
[78]	validation_0-rmse:7635.74302
[79]	validation_0-rmse:7604.99193
[80]	validation_0-rmse:7574.42742
[81]	validation_0-rmse:7545.09540
[82]	validation_0-rmse:7512.13187
[83]	validation_0-rmse:7483.45454
[84]	validation_0-rmse:7451.82614
[85]	validation_0-rmse:7423.14638
[86]	validation_0-rmse:7395.03875
[87]	validation_0-rmse:7364.21565
[88]	validation_0-rmse:7336.65490
[89]	validation_0-rmse:7309.60384
[90]	validation_0-rmse:7280.54601
[91]	validation_0-rmse:7254.54006
[92]	validation_0-rmse:7228.47319
[93]	validation_0-rmse:7199.20022
[94]	validation_0-rmse:7170.31655
[95]	validation_0-rmse:7142.57338
[96]	validation_0-rmse:7114.61777
[97]	validation_0-rmse:7090.93862
[98]	validation_0-rmse:7067.66019
[99]	validation_0-rmse:7040.83966
[100]	validation_0-rmse:7014.40320
[101]	validation_0-rmse:6988.37910
[102]	validation_0-rmse:6963.27342
[103]	validation_0-rmse:6940.65065
[104]	validation_0-rmse:6915.70577
[105]	validation_0-rmse:6891.62233
[106]	validation_0-rmse:6867.83354
[107]	validation_0-rmse:6847.18593
[108]	validation_0-rmse:6824.25706
[109]	validation_0-rmse:6799.68796
[110]	validation_0-rmse:6779.78592
[111]	validation_0-rmse:6757.88286
[112]	validation_0-rmse:6734.34159
[113]	validation_0-rmse:6711.21206
[114]	validation_0-rmse:6690.25808
[115]	validation_0-rmse:6671.77319
[116]	validation_0-rmse:6649.51608
[117]	validation_0-rmse:6628.51860
[118]	validation_0-rmse:6606.92486
[119]	validation_0-rmse:6585.67626
[120]	validation_0-rmse:6565.57838
[121]	validation_0-rmse:6548.50574
[122]	validation_0-rmse:6531.50748
[123]	validation_0-rmse:6511.28808
[124]	validation_0-rmse:6494.96621
[125]	validation_0-rmse:6478.83126
[126]	validation_0-rmse:6459.43880
[127]	validation_0-rmse:6440.47275
[128]	validation_0-rmse:6422.21293
[129]	validation_0-rmse:6403.71155
[130]	validation_0-rmse:6388.55485
[131]	validation_0-rmse:6373.68557
[132]	validation_0-rmse:6358.69122
[133]	validation_0-rmse:6341.12445
[134]	validation_0-rmse:6324.28592
[135]	validation_0-rmse:6310.12837
[136]	validation_0-rmse:6293.31139
[137]	validation_0-rmse:6279.60407
[138]	validation_0-rmse:6263.61632
[139]	validation_0-rmse:6248.38766
[140]	validation_0-rmse:6232.36990
[141]	validation_0-rmse:6219.40306
[142]	validation_0-rmse:6204.71673
[143]	validation_0-rmse:6190.24145
[144]	validation_0-rmse:6175.97816
[145]	validation_0-rmse:6162.85456
[146]	validation_0-rmse:6148.95605
[147]	validation_0-rmse:6136.22111
[148]	validation_0-rmse:6124.32770
[149]	validation_0-rmse:6112.61741
[150]	validation_0-rmse:6099.53963
[151]	validation_0-rmse:6085.61265
[152]	validation_0-rmse:6072.63827
[153]	validation_0-rmse:6059.22045
[154]	validation_0-rmse:6046.03916
[155]	validation_0-rmse:6033.79741
[156]	validation_0-rmse:6020.99368
[157]	validation_0-rmse:6009.16623
[158]	validation_0-rmse:5997.54477
[159]	validation_0-rmse:5985.94873
[160]	validation_0-rmse:5974.33200
[161]	validation_0-rmse:5964.22310
[162]	validation_0-rmse:5952.95020
[163]	validation_0-rmse:5941.87995
[164]	validation_0-rmse:5929.97865
[165]	validation_0-rmse:5919.15068
[166]	validation_0-rmse:5908.19048
[167]	validation_0-rmse:5898.81289
[168]	validation_0-rmse:5887.95154
[169]	validation_0-rmse:5876.74994
[170]	validation_0-rmse:5862.65383
[171]	validation_0-rmse:5851.88152
[172]	validation_0-rmse:5838.23781
[173]	validation_0-rmse:5828.50794
[174]	validation_0-rmse:5817.23166
[175]	validation_0-rmse:5803.98200
[176]	validation_0-rmse:5794.60472
[177]	validation_0-rmse:5783.76231
[178]	validation_0-rmse:5771.01953
[179]	validation_0-rmse:5760.58579
[180]	validation_0-rmse:5748.24811
[181]	validation_0-rmse:5738.00683
[182]	validation_0-rmse:5728.73772
[183]	validation_0-rmse:5716.75932
[184]	validation_0-rmse:5706.67875
[185]	validation_0-rmse:5695.06709
[186]	validation_0-rmse:5683.67710
[187]	validation_0-rmse:5672.36144
[188]	validation_0-rmse:5663.68071
[189]	validation_0-rmse:5652.35982
[190]	validation_0-rmse:5641.56359
[191]	validation_0-rmse:5630.92272
[192]	validation_0-rmse:5621.79433
[193]	validation_0-rmse:5611.39431
[194]	validation_0-rmse:5601.07965
[195]	validation_0-rmse:5590.87408
[196]	validation_0-rmse:5580.68498
[197]	validation_0-rmse:5570.65203
[198]	validation_0-rmse:5562.67605
[199]	validation_0-rmse:5554.51282
[200]	validation_0-rmse:5544.87932
[201]	validation_0-rmse:5536.67442
[202]	validation_0-rmse:5528.68470
[203]	validation_0-rmse:5519.53109
[204]	validation_0-rmse:5511.77498
[205]	validation_0-rmse:5502.36033
[206]	validation_0-rmse:5493.55861
[207]	validation_0-rmse:5485.18850
[208]	validation_0-rmse:5476.37107
[209]	validation_0-rmse:5467.57066
[210]	validation_0-rmse:5459.20161
[211]	validation_0-rmse:5451.17733
[212]	validation_0-rmse:5442.08600
[213]	validation_0-rmse:5433.86159
[214]	validation_0-rmse:5425.67264
[215]	validation_0-rmse:5418.66935
[216]	validation_0-rmse:5410.73552
[217]	validation_0-rmse:5402.92479
[218]	validation_0-rmse:5394.93010
[219]	validation_0-rmse:5387.51052
[220]	validation_0-rmse:5379.84428
[221]	validation_0-rmse:5371.34198
[222]	validation_0-rmse:5363.82458
[223]	validation_0-rmse:5356.28889
[224]	validation_0-rmse:5348.99252
[225]	validation_0-rmse:5341.56543
[226]	validation_0-rmse:5333.90523
[227]	validation_0-rmse:5326.77380
[228]	validation_0-rmse:5319.98311
[229]	validation_0-rmse:5312.17441
[230]	validation_0-rmse:5305.26746
[231]	validation_0-rmse:5297.58280
[232]	validation_0-rmse:5290.53554
[233]	validation_0-rmse:5283.67583
[234]	validation_0-rmse:5276.96316
[235]	validation_0-rmse:5270.35916
[236]	validation_0-rmse:5263.31643
[237]	validation_0-rmse:5256.90535
[238]	validation_0-rmse:5250.37045
[239]	validation_0-rmse:5243.94478
[240]	validation_0-rmse:5237.65038
[241]	validation_0-rmse:5231.72659
[242]	validation_0-rmse:5224.64311
[243]	validation_0-rmse:5218.54733
[244]	validation_0-rmse:5211.53298
[245]	validation_0-rmse:5205.84989
[246]	validation_0-rmse:5199.02930
[247]	validation_0-rmse:5193.47666
[248]	validation_0-rmse:5187.42373
[249]	validation_0-rmse:5181.61440
[250]	validation_0-rmse:5175.79468
[251]	validation_0-rmse:5170.00759
[252]	validation_0-rmse:5163.52778
[253]	validation_0-rmse:5158.00930
[254]	validation_0-rmse:5152.88753
[255]	validation_0-rmse:5147.77395
[256]	validation_0-rmse:5141.50496
[257]	validation_0-rmse:5135.90135
[258]	validation_0-rmse:5130.53917
[259]	validation_0-rmse:5125.63739
[260]	validation_0-rmse:5120.24305
[261]	validation_0-rmse:5115.13427
[262]	validation_0-rmse:5110.05314
[263]	validation_0-rmse:5104.11833
[264]	validation_0-rmse:5098.99625
[265]	validation_0-rmse:5093.07648
[266]	validation_0-rmse:5087.86392
[267]	validation_0-rmse:5083.27810
[268]	validation_0-rmse:5077.76329
[269]	validation_0-rmse:5072.79817
[270]	validation_0-rmse:5067.46336
[271]	validation_0-rmse:5062.52666
[272]	validation_0-rmse:5057.64330
[273]	validation_0-rmse:5052.89425
[274]	validation_0-rmse:5047.21529
[275]	validation_0-rmse:5042.18465
[276]	validation_0-rmse:5037.85392
[277]	validation_0-rmse:5033.68754
[278]	validation_0-rmse:5029.12395
[279]	validation_0-rmse:5023.79614
[280]	validation_0-rmse:5018.64573
[281]	validation_0-rmse:5013.84675
[282]	validation_0-rmse:5009.86344
[283]	validation_0-rmse:5005.42978
[284]	validation_0-rmse:5000.76631
[285]	validation_0-rmse:4996.93011
[286]	validation_0-rmse:4990.86762
[287]	validation_0-rmse:4985.70733
[288]	validation_0-rmse:4981.95660
[289]	validation_0-rmse:4978.26064
[290]	validation_0-rmse:4972.37424
[291]	validation_0-rmse:4968.03042
[292]	validation_0-rmse:4964.49324
[293]	validation_0-rmse:4960.95219
[294]	validation_0-rmse:4955.95409
[295]	validation_0-rmse:4951.85220
[296]	validation_0-rmse:4946.17850
[297]	validation_0-rmse:4941.60404
[298]	validation_0-rmse:4938.16044
[299]	validation_0-rmse:4933.66062
[300]	validation_0-rmse:4929.74221
[301]	validation_0-rmse:4924.78222
[302]	validation_0-rmse:4920.79509
[303]	validation_0-rmse:4915.95483
[304]	validation_0-rmse:4910.49347
[305]	validation_0-rmse:4907.21229
[306]	validation_0-rmse:4903.36013
[307]	validation_0-rmse:4899.65854
[308]	validation_0-rmse:4894.90354
[309]	validation_0-rmse:4891.77794
[310]	validation_0-rmse:4886.49582
[311]	validation_0-rmse:4881.31661
[312]	validation_0-rmse:4877.80308
[313]	validation_0-rmse:4873.35692
[314]	validation_0-rmse:4869.67761
[315]	validation_0-rmse:4864.94168
[316]	validation_0-rmse:4861.47662
[317]	validation_0-rmse:4857.53407
[318]	validation_0-rmse:4854.02453
[319]	validation_0-rmse:4849.40812
[320]	validation_0-rmse:4846.13708
[321]	validation_0-rmse:4841.19329
[322]	validation_0-rmse:4836.32933
[323]	validation_0-rmse:4831.87325
[324]	validation_0-rmse:4828.72210
[325]	validation_0-rmse:4825.66616
[326]	validation_0-rmse:4821.31836
[327]	validation_0-rmse:4816.99409
[328]	validation_0-rmse:4814.25687
[329]	validation_0-rmse:4811.53109
[330]	validation_0-rmse:4808.68705
[331]	validation_0-rmse:4805.41696
[332]	validation_0-rmse:4801.23682
[333]	validation_0-rmse:4797.63581
[334]	validation_0-rmse:4793.01523
[335]	validation_0-rmse:4790.06002
[336]	validation_0-rmse:4786.99976
[337]	validation_0-rmse:4783.87576
[338]	validation_0-rmse:4779.37846
[339]	validation_0-rmse:4775.62810
[340]	validation_0-rmse:4772.78837
[341]	validation_0-rmse:4769.78039
[342]	validation_0-rmse:4765.37221
[343]	validation_0-rmse:4761.37872
[344]	validation_0-rmse:4758.63852
[345]	validation_0-rmse:4754.63676
[346]	validation_0-rmse:4750.66332
[347]	validation_0-rmse:4747.89998
[348]	validation_0-rmse:4745.48702
[349]	validation_0-rmse:4742.65470

# Create empty lists to store data
holiday_names = []
holiday_dates = []
holiday_names_observed = []
holiday_dates_observed = []

for row in holidays_df.itertuples():
    date = row.date
    name = getattr(row, 'name', None) 
    holiday_dates.append(date)
    holiday_names.append(name)
    if name and "Observed" in name:
        if holiday_dates_observed: holiday_dates_observed.pop()
        if holiday_names_observed: holiday_names_observed.pop()
    holiday_names_observed.append(name)
    holiday_dates_observed.append(np.datetime64(date))

holiday_dates_observed[:5]

[np.datetime64('2012-01-01T00:00:00.000000'),
 np.datetime64('2012-04-09T00:00:00.000000'),
 np.datetime64('2012-05-01T00:00:00.000000'),
 np.datetime64('2012-05-08T00:00:00.000000'),
 np.datetime64('2012-05-17T00:00:00.000000')]

prediction_days = 210

future = pd.date_range(
    str(df.index.max())[0:10],
    df.index.max() + datetime.timedelta(days=prediction_days),
    freq="30min",
)

future_df = pd.DataFrame(index=future)
future_df = settlement_period(future_df)
future_df["is_future"] = True
df["is_future"] = False

# Create a dataframe containing the original data and the predict df
df_and_future = pd.concat([df, future_df])

# add features and lag
# lag values will change w.r.t original dataframe
df_and_future = create_features(df_and_future)
df_and_future = add_lags(df_and_future)

# add bank holidays in future dataframe
df_and_future_wprediction = df_and_future.query("is_future").copy()

df_and_future_wprediction["settlement_date"] = df_and_future_wprediction.index.date
df_and_future_wprediction["is_holiday"] = df_and_future_wprediction["settlement_date"].apply(
    lambda x: pd.to_datetime(x) in holiday_dates_observed
)
df_and_future_wprediction["is_holiday"] = df_and_future_wprediction["is_holiday"].astype(int)

df_and_future_wprediction.tail()

df_and_future_wprediction["pred_xgboost"] = xgb_best.predict(
    df_and_future_wprediction[FEATURES_CV]
)


fig, ax = plt.subplots(figsize=(15, 5))
df_and_future_wprediction["pred_xgboost"].plot(figsize=(15, 5), style="-", ax=ax)
ax.set_xlabel("Date")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Future demand prediction");

train_data_prophet = to_prophet_df(y_train)
val_data_prophet   = to_prophet_df(y_val)
test_data_prophet  = to_prophet_df(y_test)
all_data_prophet   = to_prophet_df(y_all)

train_data_prophet.head()

%%time
prophet_model = Prophet()
prophet_model.fit(train_data_prophet)

np.random.seed(43)
prophet_predict_df = prophet_model.predict(test_data_prophet)
prophet_predict_df.head()

13:57:29 - cmdstanpy - INFO - Chain [1] start processing
13:59:39 - cmdstanpy - INFO - Chain [1] done processing

CPU times: total: 15.7 s
Wall time: 2min 25s

prophet_model.plot_components(prophet_predict_df);

fig, ax = plt.subplots(figsize=(12, 5))

prophet_model.plot(prophet_predict_df, ax=ax, include_legend=True)
ax.scatter(y_test.to_frame().index, y_test.to_frame()["consommation"], color='r', label="Test data")
ax.legend(bbox_to_anchor=(1.1, 0.5), loc="center", ncol=1)

ax.set_title("Prediction on test set - week")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

threshold_date = "2020-01-01"
lower = pd.to_datetime(threshold_date)

upper = lower + pd.Timedelta(days=7)

fig, ax = plt.subplots(figsize=(15,5))
prophet_model.plot(prophet_predict_df, ax=ax, include_legend=True)

x_idx = pd.to_datetime(y_test.index, errors="coerce")
y_test_clean = y_test.loc[x_idx.notna()].copy()
y_test_clean.index = x_idx[x_idx.notna()]

ax.scatter(y_test_clean.index, y_test_clean.to_numpy(), color='r', label="Test data")

ax.set_xlim(lower, upper)
ax.set_ylim(20000, 110000)
ax.legend(bbox_to_anchor=(1.1, 0.5), loc="center", ncol=1)
ax.set_title("First week of forecast vs actual data")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date")
plt.tight_layout()

threshold_date = "2021-01-01"
lower = pd.to_datetime(threshold_date)

upper = lower + pd.Timedelta(days=7)

fig, ax = plt.subplots(figsize=(15,5))
prophet_model.plot(prophet_predict_df, ax=ax, include_legend=True)

x_idx = pd.to_datetime(y_test.index, errors="coerce")
y_test_clean = y_test.loc[x_idx.notna()].copy()
y_test_clean.index = x_idx[x_idx.notna()]

ax.scatter(y_test_clean.index, y_test_clean.to_numpy(), color='r', label="Test data")

ax.set_xlim(lower, upper)
ax.set_ylim(20000, 110000)
ax.legend(bbox_to_anchor=(1.1, 0.5), loc="center", ncol=1)
ax.set_title("First week of forecast vs actual data")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date")
plt.tight_layout()

%%time
mape_prophet_simple = mean_absolute_percentage_error(
    y_test, prophet_predict_df["yhat"]
)
rmse_prophet_simple = np.sqrt(mean_squared_error(y_test, prophet_predict_df["yhat"]))
print(
    "Mean Absolute Percentage Error of the simple Prophet model is: %.2f"
    % mape_prophet_simple
)
print(
    "Root Mean Squared Error of the simple Prophet models is: %.2f MW" % rmse_prophet_simple
)

Mean Absolute Percentage Error of the simple Prophet model is: 7.44
Root Mean Squared Error of the simple Prophet models is: 5096.67 MW
CPU times: total: 15.6 ms
Wall time: 2.75 ms

holiday_df = (
    pd.DataFrame(list(metropole.items()), columns=["ds", "holiday"])
    .sort_values("ds")  
    .reset_index(drop=True)
)

holiday_df["ds"] = pd.to_datetime(holiday_df["ds"])

holiday_df.head(5)

prophet_hol_model = Prophet(holidays=holiday_df)
prophet_hol_model.fit(train_data_prophet)

np.random.seed(43)
prophet_hol_predict_df = prophet_hol_model.predict(test_data_prophet)
prophet_hol_model.plot_components(prophet_hol_predict_df);

13:59:59 - cmdstanpy - INFO - Chain [1] start processing
14:02:34 - cmdstanpy - INFO - Chain [1] done processing

fig, ax = plt.subplots(figsize=(12, 5))

prophet_hol_model.plot(prophet_hol_predict_df, ax=ax, include_legend=True)
ax.scatter(y_test.to_frame().index, y_test.to_frame()["consommation"], color='r', label="Test data")
ax.legend(bbox_to_anchor=(1.15, 0.5), loc="center", ncol=1)

ax.set_title("Prediction on test set")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

threshold_date_christmas_start = "2020-12-23"
threshold_date_christmas_end = "2020-12-28"
lower = pd.to_datetime(threshold_date_christmas_start)
upper = pd.to_datetime(threshold_date_christmas_end)

fig, ax = plt.subplots(figsize=(15,5))
prophet_model.plot(prophet_predict_df, ax=ax, include_legend=True)

ax.scatter(y_test.to_frame().index, y_test.to_frame()["consommation"], color='r', label="Test data")
ax.set_xlim(lower, upper)
ax.set_ylim(bottom=19000, top=104000)
ax.legend(bbox_to_anchor=(1.1, 0.5), loc="center", ncol=1);
ax.set_title("Christmas week 2020 - Model without holiday")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");


fig, ax = plt.subplots(figsize=(15,5))

prophet_model.plot(prophet_hol_predict_df, ax=ax, include_legend=True)
ax.scatter(y_test.to_frame().index, y_test.to_frame()["consommation"], color='r', label="Test data")
ax.set_xlim(lower, upper)
ax.set_ylim(bottom=10000, top=110000)
ax.legend(bbox_to_anchor=(1.1, 0.5), loc="center", ncol=1);
ax.set_title("Christmas week 2020 - Model with holiday")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

mape_prophet_holiday = mean_absolute_percentage_error(
    y_test, prophet_hol_predict_df["yhat"]
)
rmse_prophet_holiday = np.sqrt(mean_squared_error(y_test, prophet_hol_predict_df["yhat"]))
print(
    "Mean Absolute Percentage Error of the holiday Prophet model is: %.2f"
    % mape_prophet_holiday
)
print(
    "Root Mean Squared Error of the Prophet (with holiday) model is: %.2f MW" % rmse_prophet_holiday
)

Mean Absolute Percentage Error of the holiday Prophet model is: 7.43
Root Mean Squared Error of the Prophet (with holiday) model is: 5067.81 MW

n_years_test = 2
tss_prophet = TimeSeriesSplit(n_splits=2, test_size=48 * 365 * n_years_test, gap=48)

base = (
    y_train.loc[y_train.index < threshold_date_1]   
          .to_frame()                               
          .reset_index()                            
)

time_col = base.columns[0]   
val_col  = base.columns[1]   

base[time_col] = pd.to_datetime(base[time_col], errors="coerce")
base = base.dropna(subset=[time_col])

fig, axes = plt.subplots(2, 1, figsize=(15, 10), sharex=True)
fold = 0

for train_index, test_index in tss_prophet.split(base):
    train_data = (base.iloc[train_index]
                      .rename(columns={time_col: "ds", val_col: "y"}))
    test_data  = (base.iloc[test_index]
                      .rename(columns={time_col: "ds", val_col: "y"}))

    axes[fold].plot(train_data["ds"], train_data["y"], label="Training set")
    axes[fold].plot(test_data["ds"], test_data["y"], label="Test set")
    axes[fold].axvline(test_data["ds"].min(), color="k", ls="--")

    axes[fold].legend(loc="center", bbox_to_anchor=(1.075, 0.5))
    axes[fold].set_title(f"TimeSeriesSplit fold {fold+1}")
    axes[fold].set_ylabel("Energy Demand (MW)")
    axes[fold].set_xlabel("Date")
    fold += 1
plt.tight_layout()

# Define the parameter grid for the Prophet model
param_grid = {"changepoint_prior_scale": [0.05], "seasonality_prior_scale": [2.5, 10]}

# Create all possible combinations of the parameter grid
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

rmse_cv_gs = []

for params in all_params:
    rmse_cv = []
    
    for train_index, test_index in tss_prophet.split(df[df.index<threshold_date_1]):
        train_data = (base.iloc[train_index]
                      .rename(columns={time_col: "ds", val_col: "y"}))
        test_data  = (base.iloc[test_index]
                      .rename(columns={time_col: "ds", val_col: "y"}))

        model = Prophet(**params, holidays=holiday_df)
        model.fit(train_data)

        prediction = model.predict(test_data)
        mse_val = mean_squared_error(test_data["y"], prediction["yhat"])
        rmse_cv.append(np.sqrt(mse_val))
    
    rmse_cv_gs.append(np.mean(rmse_cv))
    
grid_search_results = pd.DataFrame(all_params)
grid_search_results['rmse'] = rmse_cv_gs

grid_search_results

14:02:49 - cmdstanpy - INFO - Chain [1] start processing
14:04:18 - cmdstanpy - INFO - Chain [1] done processing
14:04:33 - cmdstanpy - INFO - Chain [1] start processing
14:06:49 - cmdstanpy - INFO - Chain [1] done processing
14:07:01 - cmdstanpy - INFO - Chain [1] start processing
14:08:44 - cmdstanpy - INFO - Chain [1] done processing
14:08:58 - cmdstanpy - INFO - Chain [1] start processing
14:11:06 - cmdstanpy - INFO - Chain [1] done processing

best_params = all_params[np.argmin(rmse_cv_gs)]
print(best_params)

{'changepoint_prior_scale': 0.05, 'seasonality_prior_scale': 10}

# Create Prophet using the best parameters from the grid search
prophet_best = Prophet(**best_params, holidays=holiday_df)
prophet_best.fit(train_data_prophet)

prophet_best_predict_df = prophet_best.predict(test_data_prophet)

# Compute MAPE value
mape_prophet_cv_gs = mean_absolute_percentage_error(
    y_test, prophet_best_predict_df["yhat"]
)
rmse_prophet_cv_gs = np.sqrt(
    mean_squared_error(y_test, prophet_best_predict_df["yhat"])
)
print(
    "Mean Absolute Percentage Error of the best-parameters Prophet model is: %.2f"
    % mape_prophet_cv_gs
)
print(
    "Root Mean Squared Error of the Prophet (with cross-validation) model is: %.2f MW" % rmse_prophet_cv_gs
)

14:11:24 - cmdstanpy - INFO - Chain [1] start processing
14:14:03 - cmdstanpy - INFO - Chain [1] done processing

Mean Absolute Percentage Error of the best-parameters Prophet model is: 7.43
Root Mean Squared Error of the Prophet (with cross-validation) model is: 5067.81 MW

# Fit a new model using all the data as training data
prophet_future_model = Prophet(**best_params, holidays=holiday_df)
prophet_future_model.fit(all_data_prophet)

# Make predictions on future dataframe
prophet_future = prophet_future_model.make_future_dataframe(periods=210*24, freq="H", include_history=False)
prophet_fut_forecast = prophet_future_model.predict(prophet_future)

14:14:28 - cmdstanpy - INFO - Chain [1] start processing
14:19:55 - cmdstanpy - INFO - Chain [1] done processing
c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\prophet\forecaster.py:1872: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.
  dates = pd.date_range(

fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(prophet_fut_forecast["ds"], prophet_fut_forecast["yhat"])

ax.set_xlabel("Date")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Future demand prediction");

fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(
        df_and_future_wprediction.index, 
        df_and_future_wprediction["pred_xgboost"], 
        label = "XGBoost prediction"
        )
ax.plot(
        prophet_fut_forecast["ds"], 
        prophet_fut_forecast["yhat"], 
        label = "Prophet prediction", 
        alpha = 0.75
        )

ax.set_xlabel("Date")
ax.set_ylabel("Electricity demand (MW)")
ax.set_title("Prediction comparison")

ax.legend(bbox_to_anchor=(1.1, 0.5), loc="center", ncol=1);

train_data = df.loc[df.index < threshold_date_1]
test_data = df.loc[(df.index >= threshold_date_1) & (df.index < threshold_date_2)]
hold_out_data = df.loc[df.index >= threshold_date_2]

# Define the features and target variable
FEATURES = [
    "is_holiday",
    "settlement_period",
    "day_of_month",
    "day_of_week",
    "day_of_year",
    "quarter",
    "month",
    "year",
    "week_of_year",
]
TARGET = "consommation"

FEATURES_TARGET = FEATURES.copy()
FEATURES_TARGET.append(TARGET)
train_data_keras = train_data[FEATURES_TARGET]
test_data_keras = test_data[FEATURES_TARGET]

scaler = MinMaxScaler(feature_range=(0,1))
train_data_keras_s = scaler.fit_transform(train_data_keras.values)
test_data_keras_s = scaler.transform(test_data_keras.values)

X_train_keras = (
    train_data_keras_s[:,:-1].
    reshape(train_data_keras_s.shape[0],1,len(FEATURES))
)
y_train_keras = train_data_keras_s[:,-1]

X_test_keras = (
    test_data_keras_s[:,:-1].
    reshape(test_data_keras_s.shape[0],1,len(FEATURES))
)
y_test_keras = test_data_keras_s[:,-1]

# Define a random seed for reproducibility
tf.random.set_seed(221)

# Create and compite neural network
model = Sequential()
model.add(LSTM(256, input_shape=(X_train_keras.shape[1], X_train_keras.shape[2])))
model.add(Dropout(0.5))

model.add(Dense(1))
model.compile(loss = root_mean_squared_error, optimizer="adam")

# Define callbacks
monitor_param = root_mean_squared_error
mode="min"
early_stopping = EarlyStopping(monitor=root_mean_squared_error, patience=8, verbose=0, mode=mode)
checkpoint_save = ModelCheckpoint(
    "./models_data/simple_lstm/checkpoint.weights.h5",
    save_weights_only=True,
    monitor=monitor_param,
    mode=mode,
)
reduce_lr_loss = ReduceLROnPlateau(
    monitor=monitor_param, factor=0.1, patience=5, verbose=0, mode=mode
)

# Fit model
history = model.fit(
    X_train_keras,
    y_train_keras,
    epochs=100,
    batch_size=144,
    validation_data=(X_test_keras, y_test_keras),
    callbacks=[early_stopping, checkpoint_save, reduce_lr_loss]
)

Epoch 1/100

c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\keras\src\layers\rnn\rnn.py:199: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 8ms/step - loss: 0.1201 - val_loss: 0.0845 - learning_rate: 0.0010
Epoch 2/100
  8/974 ━━━━━━━━━━━━━━━━━━━━ 7s 8ms/step - loss: 0.1069

c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\keras\src\callbacks\early_stopping.py:99: UserWarning: Early stopping conditioned on metric `<function root_mean_squared_error at 0x0000020008AF0400>` which is not available. Available metrics are: loss,val_loss
  current = self.get_monitor_value(logs)
c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\keras\src\callbacks\callback_list.py:145: UserWarning: Learning rate reduction is conditioned on metric `<function root_mean_squared_error at 0x0000020008AF0400>` which is not available. Available metrics are: loss,val_loss,learning_rate.
  callback.on_epoch_end(epoch, logs)

974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0989 - val_loss: 0.0820 - learning_rate: 0.0010
Epoch 3/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0949 - val_loss: 0.0795 - learning_rate: 0.0010
Epoch 4/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0922 - val_loss: 0.0787 - learning_rate: 0.0010
Epoch 5/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0902 - val_loss: 0.0782 - learning_rate: 0.0010
Epoch 6/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0887 - val_loss: 0.0784 - learning_rate: 0.0010
Epoch 7/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0876 - val_loss: 0.0769 - learning_rate: 0.0010
Epoch 8/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0868 - val_loss: 0.0768 - learning_rate: 0.0010
Epoch 9/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0862 - val_loss: 0.0764 - learning_rate: 0.0010
Epoch 10/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0857 - val_loss: 0.0767 - learning_rate: 0.0010
Epoch 11/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 9ms/step - loss: 0.0853 - val_loss: 0.0761 - learning_rate: 0.0010
Epoch 12/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0847 - val_loss: 0.0765 - learning_rate: 0.0010
Epoch 13/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0843 - val_loss: 0.0762 - learning_rate: 0.0010
Epoch 14/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0840 - val_loss: 0.0757 - learning_rate: 0.0010
Epoch 15/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0836 - val_loss: 0.0748 - learning_rate: 0.0010
Epoch 16/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0831 - val_loss: 0.0744 - learning_rate: 0.0010
Epoch 17/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0823 - val_loss: 0.0754 - learning_rate: 0.0010
Epoch 18/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0816 - val_loss: 0.0749 - learning_rate: 0.0010
Epoch 19/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 10s 10ms/step - loss: 0.0812 - val_loss: 0.0740 - learning_rate: 0.0010
Epoch 20/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0808 - val_loss: 0.0739 - learning_rate: 0.0010
Epoch 21/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0806 - val_loss: 0.0743 - learning_rate: 0.0010
Epoch 22/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0803 - val_loss: 0.0745 - learning_rate: 0.0010
Epoch 23/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0800 - val_loss: 0.0737 - learning_rate: 0.0010
Epoch 24/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 9s 9ms/step - loss: 0.0798 - val_loss: 0.0740 - learning_rate: 0.0010
Epoch 25/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0796 - val_loss: 0.0743 - learning_rate: 0.0010
Epoch 26/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0793 - val_loss: 0.0736 - learning_rate: 0.0010
Epoch 27/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0792 - val_loss: 0.0738 - learning_rate: 0.0010
Epoch 28/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0790 - val_loss: 0.0731 - learning_rate: 0.0010
Epoch 29/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0788 - val_loss: 0.0732 - learning_rate: 0.0010
Epoch 30/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 8ms/step - loss: 0.0786 - val_loss: 0.0743 - learning_rate: 0.0010
Epoch 31/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0785 - val_loss: 0.0730 - learning_rate: 0.0010
Epoch 32/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0783 - val_loss: 0.0740 - learning_rate: 0.0010
Epoch 33/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0781 - val_loss: 0.0729 - learning_rate: 0.0010
Epoch 34/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0781 - val_loss: 0.0731 - learning_rate: 0.0010
Epoch 35/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0778 - val_loss: 0.0722 - learning_rate: 0.0010
Epoch 36/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0778 - val_loss: 0.0720 - learning_rate: 0.0010
Epoch 37/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0774 - val_loss: 0.0725 - learning_rate: 0.0010
Epoch 38/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 8ms/step - loss: 0.0773 - val_loss: 0.0719 - learning_rate: 0.0010
Epoch 39/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0774 - val_loss: 0.0727 - learning_rate: 0.0010
Epoch 40/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0770 - val_loss: 0.0720 - learning_rate: 0.0010
Epoch 41/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0770 - val_loss: 0.0713 - learning_rate: 0.0010
Epoch 42/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0768 - val_loss: 0.0713 - learning_rate: 0.0010
Epoch 43/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 8ms/step - loss: 0.0767 - val_loss: 0.0712 - learning_rate: 0.0010
Epoch 44/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 8ms/step - loss: 0.0766 - val_loss: 0.0709 - learning_rate: 0.0010
Epoch 45/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0762 - val_loss: 0.0706 - learning_rate: 0.0010
Epoch 46/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0761 - val_loss: 0.0702 - learning_rate: 0.0010
Epoch 47/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0757 - val_loss: 0.0692 - learning_rate: 0.0010
Epoch 48/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0753 - val_loss: 0.0688 - learning_rate: 0.0010
Epoch 49/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0750 - val_loss: 0.0701 - learning_rate: 0.0010
Epoch 50/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0747 - val_loss: 0.0679 - learning_rate: 0.0010
Epoch 51/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0742 - val_loss: 0.0672 - learning_rate: 0.0010
Epoch 52/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0740 - val_loss: 0.0665 - learning_rate: 0.0010
Epoch 53/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0737 - val_loss: 0.0657 - learning_rate: 0.0010
Epoch 54/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0733 - val_loss: 0.0663 - learning_rate: 0.0010
Epoch 55/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0731 - val_loss: 0.0656 - learning_rate: 0.0010
Epoch 56/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0728 - val_loss: 0.0654 - learning_rate: 0.0010
Epoch 57/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0726 - val_loss: 0.0646 - learning_rate: 0.0010
Epoch 58/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0723 - val_loss: 0.0642 - learning_rate: 0.0010
Epoch 59/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0723 - val_loss: 0.0640 - learning_rate: 0.0010
Epoch 60/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0720 - val_loss: 0.0646 - learning_rate: 0.0010
Epoch 61/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0718 - val_loss: 0.0638 - learning_rate: 0.0010
Epoch 62/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 8ms/step - loss: 0.0717 - val_loss: 0.0635 - learning_rate: 0.0010
Epoch 63/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - loss: 0.0715 - val_loss: 0.0641 - learning_rate: 0.0010
Epoch 64/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0714 - val_loss: 0.0637 - learning_rate: 0.0010
Epoch 65/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0714 - val_loss: 0.0640 - learning_rate: 0.0010
Epoch 66/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0711 - val_loss: 0.0636 - learning_rate: 0.0010
Epoch 67/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0711 - val_loss: 0.0641 - learning_rate: 0.0010
Epoch 68/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0708 - val_loss: 0.0638 - learning_rate: 0.0010
Epoch 69/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0708 - val_loss: 0.0625 - learning_rate: 0.0010
Epoch 70/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0706 - val_loss: 0.0632 - learning_rate: 0.0010
Epoch 71/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0705 - val_loss: 0.0635 - learning_rate: 0.0010
Epoch 72/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0703 - val_loss: 0.0637 - learning_rate: 0.0010
Epoch 73/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0701 - val_loss: 0.0630 - learning_rate: 0.0010
Epoch 74/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0701 - val_loss: 0.0637 - learning_rate: 0.0010
Epoch 75/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0700 - val_loss: 0.0633 - learning_rate: 0.0010
Epoch 76/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0698 - val_loss: 0.0636 - learning_rate: 0.0010
Epoch 77/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0696 - val_loss: 0.0636 - learning_rate: 0.0010
Epoch 78/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0695 - val_loss: 0.0627 - learning_rate: 0.0010
Epoch 79/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0694 - val_loss: 0.0639 - learning_rate: 0.0010
Epoch 80/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0694 - val_loss: 0.0626 - learning_rate: 0.0010
Epoch 81/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0692 - val_loss: 0.0626 - learning_rate: 0.0010
Epoch 82/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0692 - val_loss: 0.0638 - learning_rate: 0.0010
Epoch 83/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0690 - val_loss: 0.0624 - learning_rate: 0.0010
Epoch 84/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0690 - val_loss: 0.0628 - learning_rate: 0.0010
Epoch 85/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0690 - val_loss: 0.0623 - learning_rate: 0.0010
Epoch 86/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0687 - val_loss: 0.0625 - learning_rate: 0.0010
Epoch 87/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0687 - val_loss: 0.0623 - learning_rate: 0.0010
Epoch 88/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0685 - val_loss: 0.0624 - learning_rate: 0.0010
Epoch 89/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0686 - val_loss: 0.0638 - learning_rate: 0.0010
Epoch 90/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0684 - val_loss: 0.0636 - learning_rate: 0.0010
Epoch 91/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0685 - val_loss: 0.0615 - learning_rate: 0.0010
Epoch 92/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0682 - val_loss: 0.0624 - learning_rate: 0.0010
Epoch 93/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0682 - val_loss: 0.0627 - learning_rate: 0.0010
Epoch 94/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0681 - val_loss: 0.0625 - learning_rate: 0.0010
Epoch 95/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0680 - val_loss: 0.0631 - learning_rate: 0.0010
Epoch 96/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0680 - val_loss: 0.0614 - learning_rate: 0.0010
Epoch 97/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0680 - val_loss: 0.0617 - learning_rate: 0.0010
Epoch 98/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0678 - val_loss: 0.0607 - learning_rate: 0.0010
Epoch 99/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0676 - val_loss: 0.0611 - learning_rate: 0.0010
Epoch 100/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 7s 7ms/step - loss: 0.0676 - val_loss: 0.0616 - learning_rate: 0.0010

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(
    range(1, len(history.history["loss"]) + 1),
    history.history["loss"],
    label="training loss",
)
ax.plot(
    range(1, len(history.history["loss"]) + 1),
    history.history["val_loss"],
    label="validation loss",
)

ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")
ax.set_title("Loss evolution")
ax.legend(loc="best");

# Prediction on test set
pred_lstm = model.predict(X_test_keras)

# Inverse transform the prediction
# Since scaler was fit using all the data (9 features + outcome variable)
# we need to store the prediction in the a copy of the original data
results_lstm = test_data_keras_s
results_lstm[:,-1] = pred_lstm.reshape(pred_lstm.shape[0])
results_lstm = scaler.inverse_transform(results_lstm)

# Store inverse transformed predictions in the result dataframe
result_frame["pred_lstm"] = results_lstm[:,-1]

1097/1097 ━━━━━━━━━━━━━━━━━━━━ 2s 2ms/step

fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(result_frame.index, result_frame["consommation"], "o", label="Test set")
ax.plot(result_frame.index, result_frame["pred_lstm"], "o", label="Prediction")

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

begin = "05-01-2021"
end = "05-14-2021"

fig, ax = plt.subplots(figsize=(20, 5))

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)]["consommation"],
    "-o",
    label="Test set",
)

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)][
        "pred_lstm"
    ],
    "-s",
    label="LSTM",
)

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set - Two weeks")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

mape_lstm = mean_absolute_percentage_error(
    y_test, result_frame["pred_lstm"]
)

rmse_lstm = np.sqrt(mean_squared_error(y_test, result_frame["pred_lstm"]))

print(
    "Mean Absolute Percentage Error of the LSTM model is: %.2f" % mape_lstm
)

print(
    "Root Mean Squared Error of the LSTM model is: %.2f MW" % rmse_lstm
)

Mean Absolute Percentage Error of the LSTM model is: 7.65
Root Mean Squared Error of the LSTM model is: 5190.20 MW

# Create and compile neural network
model = Sequential()
model.add(LSTM(256, input_shape=(X_train_keras.shape[1], X_train_keras.shape[2]), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(32))
model.add(Dropout(0.5))

model.add(Dense(1))
model.compile(loss = root_mean_squared_error, optimizer="adam")

# Define callbacks
monitor_param = root_mean_squared_error
mode="min"
early_stopping = EarlyStopping(monitor=root_mean_squared_error, patience=8, verbose=0, mode=mode)
checkpoint_save = ModelCheckpoint(
    "./models_data/deep_lstm/checkpoint.weights.h5",
    save_weights_only=True,
    monitor=monitor_param,
    mode=mode,
)
reduce_lr_loss = ReduceLROnPlateau(
    monitor=monitor_param, factor=0.1, patience=5, verbose=0, mode=mode
)

# Fit model
history_deep_lstm = model.fit(
    X_train_keras,
    y_train_keras,
    epochs=100,
    batch_size=144,
    validation_data=(X_test_keras, y_test_keras),
    callbacks=[early_stopping, checkpoint_save, reduce_lr_loss]
)

Epoch 1/100

c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\keras\src\layers\rnn\rnn.py:199: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

974/974 ━━━━━━━━━━━━━━━━━━━━ 14s 11ms/step - loss: 0.1266 - val_loss: 0.0604 - learning_rate: 0.0010
Epoch 2/100
  6/974 ━━━━━━━━━━━━━━━━━━━━ 10s 11ms/step - loss: 0.1044

c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\keras\src\callbacks\early_stopping.py:99: UserWarning: Early stopping conditioned on metric `<function root_mean_squared_error at 0x0000020008AF0400>` which is not available. Available metrics are: loss,val_loss
  current = self.get_monitor_value(logs)
c:\Users\guill\OneDrive\Bureau\VSCode Project\Lib\site-packages\keras\src\callbacks\callback_list.py:145: UserWarning: Learning rate reduction is conditioned on metric `<function root_mean_squared_error at 0x0000020008AF0400>` which is not available. Available metrics are: loss,val_loss,learning_rate.
  callback.on_epoch_end(epoch, logs)

974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0971 - val_loss: 0.0490 - learning_rate: 0.0010
Epoch 3/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 12ms/step - loss: 0.0920 - val_loss: 0.0462 - learning_rate: 0.0010
Epoch 4/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0896 - val_loss: 0.0404 - learning_rate: 0.0010
Epoch 5/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0867 - val_loss: 0.0371 - learning_rate: 0.0010
Epoch 6/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0831 - val_loss: 0.0293 - learning_rate: 0.0010
Epoch 7/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 14ms/step - loss: 0.0790 - val_loss: 0.0253 - learning_rate: 0.0010
Epoch 8/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 14ms/step - loss: 0.0762 - val_loss: 0.0259 - learning_rate: 0.0010
Epoch 9/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0747 - val_loss: 0.0239 - learning_rate: 0.0010
Epoch 10/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0739 - val_loss: 0.0236 - learning_rate: 0.0010
Epoch 11/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 21s 13ms/step - loss: 0.0735 - val_loss: 0.0233 - learning_rate: 0.0010
Epoch 12/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 14ms/step - loss: 0.0730 - val_loss: 0.0239 - learning_rate: 0.0010
Epoch 13/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0725 - val_loss: 0.0238 - learning_rate: 0.0010
Epoch 14/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0724 - val_loss: 0.0242 - learning_rate: 0.0010
Epoch 15/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0720 - val_loss: 0.0239 - learning_rate: 0.0010
Epoch 16/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0719 - val_loss: 0.0242 - learning_rate: 0.0010
Epoch 17/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0714 - val_loss: 0.0235 - learning_rate: 0.0010
Epoch 18/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0712 - val_loss: 0.0240 - learning_rate: 0.0010
Epoch 19/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0710 - val_loss: 0.0244 - learning_rate: 0.0010
Epoch 20/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0707 - val_loss: 0.0245 - learning_rate: 0.0010
Epoch 21/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0706 - val_loss: 0.0239 - learning_rate: 0.0010
Epoch 22/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0702 - val_loss: 0.0244 - learning_rate: 0.0010
Epoch 23/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0702 - val_loss: 0.0250 - learning_rate: 0.0010
Epoch 24/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0702 - val_loss: 0.0242 - learning_rate: 0.0010
Epoch 25/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0698 - val_loss: 0.0247 - learning_rate: 0.0010
Epoch 26/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0696 - val_loss: 0.0250 - learning_rate: 0.0010
Epoch 27/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0695 - val_loss: 0.0247 - learning_rate: 0.0010
Epoch 28/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0693 - val_loss: 0.0253 - learning_rate: 0.0010
Epoch 29/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0693 - val_loss: 0.0254 - learning_rate: 0.0010
Epoch 30/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 14ms/step - loss: 0.0692 - val_loss: 0.0260 - learning_rate: 0.0010
Epoch 31/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 14s 14ms/step - loss: 0.0689 - val_loss: 0.0256 - learning_rate: 0.0010
Epoch 32/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 14s 14ms/step - loss: 0.0688 - val_loss: 0.0256 - learning_rate: 0.0010
Epoch 33/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 14s 14ms/step - loss: 0.0686 - val_loss: 0.0251 - learning_rate: 0.0010
Epoch 34/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 14s 14ms/step - loss: 0.0684 - val_loss: 0.0263 - learning_rate: 0.0010
Epoch 35/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0682 - val_loss: 0.0262 - learning_rate: 0.0010
Epoch 36/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0682 - val_loss: 0.0258 - learning_rate: 0.0010
Epoch 37/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 12ms/step - loss: 0.0679 - val_loss: 0.0258 - learning_rate: 0.0010
Epoch 38/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0678 - val_loss: 0.0262 - learning_rate: 0.0010
Epoch 39/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0676 - val_loss: 0.0269 - learning_rate: 0.0010
Epoch 40/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0674 - val_loss: 0.0273 - learning_rate: 0.0010
Epoch 41/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 12ms/step - loss: 0.0673 - val_loss: 0.0271 - learning_rate: 0.0010
Epoch 42/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0669 - val_loss: 0.0270 - learning_rate: 0.0010
Epoch 43/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 12ms/step - loss: 0.0669 - val_loss: 0.0268 - learning_rate: 0.0010
Epoch 44/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0669 - val_loss: 0.0270 - learning_rate: 0.0010
Epoch 45/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0666 - val_loss: 0.0274 - learning_rate: 0.0010
Epoch 46/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 12ms/step - loss: 0.0664 - val_loss: 0.0267 - learning_rate: 0.0010
Epoch 47/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 12ms/step - loss: 0.0662 - val_loss: 0.0273 - learning_rate: 0.0010
Epoch 48/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0661 - val_loss: 0.0274 - learning_rate: 0.0010
Epoch 49/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0659 - val_loss: 0.0285 - learning_rate: 0.0010
Epoch 50/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0659 - val_loss: 0.0277 - learning_rate: 0.0010
Epoch 51/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0655 - val_loss: 0.0287 - learning_rate: 0.0010
Epoch 52/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0657 - val_loss: 0.0291 - learning_rate: 0.0010
Epoch 53/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0654 - val_loss: 0.0297 - learning_rate: 0.0010
Epoch 54/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0652 - val_loss: 0.0284 - learning_rate: 0.0010
Epoch 55/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0651 - val_loss: 0.0295 - learning_rate: 0.0010
Epoch 56/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0651 - val_loss: 0.0293 - learning_rate: 0.0010
Epoch 57/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0648 - val_loss: 0.0294 - learning_rate: 0.0010
Epoch 58/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0647 - val_loss: 0.0297 - learning_rate: 0.0010
Epoch 59/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0645 - val_loss: 0.0301 - learning_rate: 0.0010
Epoch 60/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0645 - val_loss: 0.0302 - learning_rate: 0.0010
Epoch 61/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0645 - val_loss: 0.0306 - learning_rate: 0.0010
Epoch 62/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0642 - val_loss: 0.0305 - learning_rate: 0.0010
Epoch 63/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0640 - val_loss: 0.0309 - learning_rate: 0.0010
Epoch 64/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0641 - val_loss: 0.0312 - learning_rate: 0.0010
Epoch 65/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0639 - val_loss: 0.0317 - learning_rate: 0.0010
Epoch 66/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0639 - val_loss: 0.0310 - learning_rate: 0.0010
Epoch 67/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0638 - val_loss: 0.0323 - learning_rate: 0.0010
Epoch 68/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0636 - val_loss: 0.0329 - learning_rate: 0.0010
Epoch 69/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0634 - val_loss: 0.0335 - learning_rate: 0.0010
Epoch 70/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0632 - val_loss: 0.0339 - learning_rate: 0.0010
Epoch 71/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0631 - val_loss: 0.0336 - learning_rate: 0.0010
Epoch 72/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 14ms/step - loss: 0.0628 - val_loss: 0.0335 - learning_rate: 0.0010
Epoch 73/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0628 - val_loss: 0.0344 - learning_rate: 0.0010
Epoch 74/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 14ms/step - loss: 0.0628 - val_loss: 0.0346 - learning_rate: 0.0010
Epoch 75/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0626 - val_loss: 0.0351 - learning_rate: 0.0010
Epoch 76/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0623 - val_loss: 0.0347 - learning_rate: 0.0010
Epoch 77/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0624 - val_loss: 0.0360 - learning_rate: 0.0010
Epoch 78/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0624 - val_loss: 0.0369 - learning_rate: 0.0010
Epoch 79/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0620 - val_loss: 0.0381 - learning_rate: 0.0010
Epoch 80/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0619 - val_loss: 0.0366 - learning_rate: 0.0010
Epoch 81/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0618 - val_loss: 0.0369 - learning_rate: 0.0010
Epoch 82/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0618 - val_loss: 0.0395 - learning_rate: 0.0010
Epoch 83/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0616 - val_loss: 0.0395 - learning_rate: 0.0010
Epoch 84/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0615 - val_loss: 0.0405 - learning_rate: 0.0010
Epoch 85/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0612 - val_loss: 0.0385 - learning_rate: 0.0010
Epoch 86/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0612 - val_loss: 0.0415 - learning_rate: 0.0010
Epoch 87/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0609 - val_loss: 0.0402 - learning_rate: 0.0010
Epoch 88/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0609 - val_loss: 0.0424 - learning_rate: 0.0010
Epoch 89/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0608 - val_loss: 0.0436 - learning_rate: 0.0010
Epoch 90/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0606 - val_loss: 0.0444 - learning_rate: 0.0010
Epoch 91/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0606 - val_loss: 0.0443 - learning_rate: 0.0010
Epoch 92/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0605 - val_loss: 0.0469 - learning_rate: 0.0010
Epoch 93/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0605 - val_loss: 0.0444 - learning_rate: 0.0010
Epoch 94/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0601 - val_loss: 0.0469 - learning_rate: 0.0010
Epoch 95/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 13ms/step - loss: 0.0603 - val_loss: 0.0451 - learning_rate: 0.0010
Epoch 96/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 13s 13ms/step - loss: 0.0601 - val_loss: 0.0488 - learning_rate: 0.0010
Epoch 97/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0599 - val_loss: 0.0464 - learning_rate: 0.0010
Epoch 98/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 12s 12ms/step - loss: 0.0600 - val_loss: 0.0491 - learning_rate: 0.0010
Epoch 99/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0599 - val_loss: 0.0494 - learning_rate: 0.0010
Epoch 100/100
974/974 ━━━━━━━━━━━━━━━━━━━━ 11s 11ms/step - loss: 0.0598 - val_loss: 0.0502 - learning_rate: 0.0010

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(
    range(1, len(history_deep_lstm.history["loss"]) + 1),
    history_deep_lstm.history["loss"],
    label="training loss",
)
ax.plot(
    range(1, len(history_deep_lstm.history["loss"]) + 1),
    history_deep_lstm.history["val_loss"],
    label="validation loss",
)

ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")
ax.set_title("Loss evolution")
ax.legend(loc="best");

# Prediction on test set
pred_deep_lstm = model.predict(X_test_keras)

# Inverse transform the prediction
# Since scaler was fit using all the data (9 features + outcome variable)
# we need to store the prediction in the a copy of the original data
results_deep_lstm = test_data_keras_s
results_deep_lstm[:,-1] = pred_deep_lstm.reshape(pred_deep_lstm.shape[0])
results_deep_lstm = scaler.inverse_transform(results_deep_lstm)

# Store inverse transformed predictions in the result dataframe
result_frame["pred_deep_lstm"] = results_deep_lstm[:,-1]

1097/1097 ━━━━━━━━━━━━━━━━━━━━ 3s 3ms/step

fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(result_frame.index, result_frame["consommation"], "o", label="Test set")
ax.plot(result_frame.index, result_frame["pred_deep_lstm"], "o", label="Prediction")

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

begin = "05-01-2021"
end = "05-14-2021"

fig, ax = plt.subplots(figsize=(20, 5))

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)]["consommation"],
    "-o",
    label="Test set",
)

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)][
        "pred_lstm"
    ],
    "-s",
    label="LSTM",
)

ax.plot(
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)].index,
    result_frame.loc[(result_frame.index > begin) & (result_frame.index < end)][
        "pred_deep_lstm"
    ],
    "-d",
    label="Deep LSTM",
)

ax.legend(loc="center", bbox_to_anchor=(1.075, 0.5))

ax.set_title("Prediction on test set - Two weeks")
ax.set_ylabel("Energy Demand (MW)")
ax.set_xlabel("Date");

mape_deep_lstm = mean_absolute_percentage_error(
    y_test, result_frame["pred_deep_lstm"]
)

rmse_deep_lstm = np.sqrt(mean_squared_error(y_test, result_frame["pred_deep_lstm"]))

print(
    "Mean Absolute Percentage Error of the deep LSTM model is: %.2f" % mape_deep_lstm
)

print(
    "Root Mean Squared Error of the deep LSTM model is: %.2f MW" % rmse_deep_lstm
)

Mean Absolute Percentage Error of the deep LSTM model is: 7.91
Root Mean Squared Error of the deep LSTM model is: 5786.53 MW

summary_df = pd.DataFrame(
    {
        "XGBoost - Simple": [mape_xgboost_simple, rmse_xgboost_simple],
        "XGBoost - CV & GS": [mape_xgboost_cv, rmse_xgboost_cv],
        "Prophet - Simple": [mape_prophet_simple, rmse_prophet_simple],
        "Prophet - Holiday": [mape_prophet_holiday, rmse_prophet_holiday],
        "Prophet - CV & GS": [mape_prophet_cv_gs, rmse_prophet_cv_gs],
        "LSTM": [mape_lstm, rmse_lstm],
        "Deep LSTM": [mape_deep_lstm, rmse_deep_lstm],
        "Metric": ["MAPE", "RMSE"]
    }
)

summary_df.set_index("Metric", inplace=True)
display(summary_df)

	consommation	fioul	charbon	gaz	nucléaire	eolien	solaire	hydraulique	pompage	bioénergies
datetime
2012-01-01 00:00:00	58315.0	492.0	25.0	3816.0	52697.0	3588.0	0.0	7922.0	-1139.0	719.0
2012-01-01 00:30:00	58315.0	492.0	25.0	3816.0	52697.0	3588.0	0.0	7922.0	-1139.0	719.0
2012-01-01 01:00:00	56231.0	492.0	25.0	3834.0	51747.0	3536.0	0.0	7598.0	-1730.0	721.0
2012-01-01 01:30:00	56075.0	491.0	25.0	3832.0	51950.0	3526.0	0.0	7299.0	-2134.0	722.0
2012-01-01 02:00:00	55532.0	492.0	25.0	3839.0	51625.0	3535.0	0.0	7159.0	-2449.0	719.0

	consommation	fioul	charbon	gaz	nucléaire	eolien	solaire	hydraulique	pompage	bioénergies
datetime
2022-10-18 13:00:00	49807.0	173.0	244.0	4565.0	27934.0	1451.0	7760.0	2685.0	-355.0	1050.0
2016-05-03 11:00:00	58388.0	565.0	580.0	2805.0	42757.0	1734.0	2907.0	9902.0	-8.0	961.0
2015-11-25 01:30:00	63505.0	826.0	493.0	6086.0	52024.0	3668.0	0.0	5395.0	-2041.0	996.0
2023-05-06 07:00:00	35048.0	57.0	21.0	3207.0	34779.0	1583.0	216.0	5843.0	-172.0	1210.0
2014-06-02 23:30:00	48703.0	208.0	471.0	497.0	43775.0	626.0	0.0	8139.0	-48.0	755.0
2019-07-27 21:30:00	39821.0	312.0	25.0	4318.0	35747.0	4289.0	10.0	5199.0	-367.0	1127.0
2016-10-05 17:00:00	49904.0	605.0	1182.0	4922.0	38628.0	5056.0	2278.0	2268.0	-1379.0	894.0

	consommation	fioul	charbon	gaz	nucléaire	eolien	solaire	hydraulique	pompage	bioénergies
count	210384.000000	210384.000000	210384.000000	210384.000000	210384.000000	210384.000000	210384.000000	210384.000000	210384.000000	210384.000000
mean	53439.353416	258.528990	831.441383	3459.282754	42489.033857	3253.539547	1225.800175	7040.161514	-778.984081	1001.560741
std	11909.168287	277.336439	1096.143253	2465.190471	7917.716611	2900.773512	2035.774944	2788.824339	987.228009	186.389846
min	29124.000000	18.000000	0.000000	234.000000	19164.000000	21.000000	0.000000	1387.000000	-4086.000000	441.000000
25%	44390.000000	89.000000	15.000000	1181.000000	37750.000000	1229.000000	0.000000	4899.000000	-1458.000000	874.000000
50%	51675.000000	159.000000	377.000000	3040.000000	42082.000000	2296.000000	18.000000	6802.000000	-193.000000	1054.000000
75%	61479.000000	337.000000	1335.000000	5089.000000	47566.000000	4296.000000	1807.000000	8911.000000	-21.000000	1138.000000
max	102098.000000	5926.000000	6265.000000	10591.000000	61712.000000	18254.000000	13395.000000	17947.000000	-1.000000	3252.000000

	consommation	fioul	charbon	gaz	nucléaire	eolien	solaire	hydraulique	pompage	bioénergies	is_holiday
datetime
2012-02-21 09:00:00	82403.0	817.0	4632.0	8045.0	52522.0	603.0	373.0	11726.0	-19.0	654.0	0
2020-08-21 01:30:00	39820.0	75.0	13.0	3461.0	31260.0	5868.0	0.0	3709.0	-1090.0	1085.0	0
2022-12-21 12:30:00	61419.0	90.0	480.0	7335.0	40782.0	7949.0	3096.0	6783.0	-15.0	1289.0	0
2019-02-25 06:30:00	61764.0	411.0	2.0	7222.0	53212.0	2348.0	0.0	7584.0	-21.0	1155.0	0
2012-09-25 05:30:00	39713.0	274.0	1585.0	539.0	41340.0	3461.0	0.0	4169.0	-2705.0	670.0	0
2015-06-15 15:30:00	51054.0	226.0	13.0	568.0	44880.0	1043.0	2471.0	9046.0	-9.0	785.0	0
2016-08-15 00:30:00	39849.0	130.0	12.0	851.0	35347.0	2917.0	0.0	5124.0	-722.0	967.0	1
2016-05-01 09:00:00	49053.0	157.0	363.0	511.0	40248.0	3543.0	1365.0	6228.0	-2477.0	951.0	1
2023-04-21 18:30:00	47145.0	54.0	22.0	3848.0	35254.0	2059.0	3936.0	6189.0	-284.0	1134.0	0
2012-08-15 05:30:00	33138.0	279.0	1533.0	694.0	34569.0	2284.0	0.0	3323.0	-2420.0	708.0	1

	consommation	fioul	charbon	gaz	nucléaire	eolien	solaire	hydraulique	pompage	bioénergies	...	day_of_year	quarter	month	year	week_of_year	lag1	lag2	lag3	is_future	settlement_date
2024-07-28 21:30:00	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	210	3	7	2024	30	39082.0	41901.0	39165.0	True	2024-07-28
2024-07-28 22:00:00	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	210	3	7	2024	30	39345.0	42259.0	39603.0	True	2024-07-28
2024-07-28 22:30:00	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	210	3	7	2024	30	40048.0	43087.0	40419.0	True	2024-07-28
2024-07-28 23:00:00	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	210	3	7	2024	30	41254.0	44345.0	42166.0	True	2024-07-28
2024-07-28 23:30:00	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	210	3	7	2024	30	40183.0	43168.0	41250.0	True	2024-07-28

Training supervized prediction : Predict the electricity consumption of a region or company based on historical data.¶

Overview¶

Contents¶

Import Libraries¶

Functions¶

Data Load¶

EDA and Feature engineering¶

XGBoost¶

XGBoost Simple¶

XGBoost with Cross validation and Grid Search¶¶

Prophet¶

Simple Prophet¶

Prophet with holidays¶

Prophet with cross-validation and grid search¶

Prediction to the future¶

LTSM and deep LTSM¶

LTSM¶

Deep LTSM¶

Summary of the results and key insights¶

Next steps¶

	objective	'reg:squarederror'
	base_score	None
	booster	None
	callbacks	None
	colsample_bylevel	None
	colsample_bynode	None
	colsample_bytree	None
	device	'cpu'
	early_stopping_rounds	50
	enable_categorical	False
	eval_metric	'rmse'
	feature_types	None
	feature_weights	None
	gamma	None
	grow_policy	None
	importance_type	None
	interaction_constraints	None
	learning_rate	0.01
	max_bin	None
	max_cat_threshold	None
	max_cat_to_onehot	None
	max_delta_step	None
	max_depth	3
	max_leaves	None
	min_child_weight	None
	missing	nan
	monotone_constraints	None
	multi_strategy	None
	n_estimators	350
	n_jobs	-1
	num_parallel_tree	None
	random_state	43
	reg_alpha	None
	reg_lambda	None
	sampling_method	None
	scale_pos_weight	None
	subsample	0.8
	tree_method	'hist'
	validate_parameters	None
	verbosity	None

	ds	trend	yhat_lower	yhat_upper	trend_lower	trend_upper	additive_terms	additive_terms_lower	additive_terms_upper	daily	...	weekly	weekly_lower	weekly_upper	yearly	yearly_lower	yearly_upper	yhat
0	2020-01-01 00:00:00	53438.057037	58036.019666	69556.448229	53438.057037	53438.057037	10201.449197	10201.449197	10201.449197	-332.827228	...	2136.727547	2136.727547	2136.727547	8397.548878	8397.548878	8397.548878	63639.506235
1	2020-01-01 00:30:00	53438.038456	57478.582238	68717.715834	53438.038456	53438.038456	9712.469314	9712.469314	9712.469314	-827.910922	...	2137.296611	2137.296611	2137.296611	8403.083625	8403.083625	8403.083625	63150.507769
2	2020-01-01 01:00:00	53438.019874	56947.059826	67854.882380	53438.019874	53438.019874	8899.579954	8899.579954	8899.579954	-1648.146625	...	2139.082003	2139.082003	2139.082003	8408.644576	8408.644576	8408.644576	62337.599828
3	2020-01-01 01:30:00	53438.001293	55730.327096	67630.527838	53438.001293	53438.001293	7734.294235	7734.294235	7734.294235	-2822.006516	...	2142.069053	2142.069053	2142.069053	8414.231698	8414.231698	8414.231698	61172.295527
4	2020-01-01 02:00:00	53437.982711	54065.457546	65391.439227	53437.982711	53437.982711	6280.488451	6280.488451	6280.488451	-4285.594356	...	2146.237849	2146.237849	2146.237849	8419.844959	8419.844959	8419.844959	59718.471162

	ds	holiday
0	2012-01-01	Jour de l'an
1	2012-04-09	Lundi de Pâques
2	2012-05-01	Fête du Travail
3	2012-05-08	Fête de la Victoire
4	2012-05-17	Ascension

	XGBoost - Simple	XGBoost - CV & GS	Prophet - Simple	Prophet - Holiday	Prophet - CV & GS	LSTM	Deep LSTM
Metric
MAPE	8.239809	7.613463	7.443389	7.429350	7.429350	7.645418	7.907060
RMSE	5233.351946	5220.502605	5096.674932	5067.807116	5067.807116	5190.204585	5786.528073

	changepoint_prior_scale	seasonality_prior_scale	rmse
0	0.05	2.5	14185.706379
1	0.05	10.0	14142.896015