I have created a model in Python, but I don’t understand how to use it for predictions. For e.g. FB Prophet allows to set number of steps to predict. Could you please tell - what code should I run in order to predict 5 steps ahead with XGBoost?
I have a model built and evaluated it, I just need to understand how to use it.
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error
plt.style.use(‘fivethirtyeight’)dfs = pd.read_csv(‘F:\TDG\Analysts\Ops Analyst\Files\885 OCtober 2016+ Daily.csv’, index_col=[0], parse_dates=[0])
split_date = ‘1/1/2018’
dfs_train = dfs.loc[dfs.index <= split_date].copy()
dfs_test = dfs.loc[dfs.index > split_date].copy()_ = dfs_test
.rename(columns={‘y’: ‘TEST SET’})
.join(dfs_train.rename(columns={‘y’: ‘TRAINING SET’}), how=‘outer’)
.plot(figsize=(15,5), title=‘data’, style=’.’)def create_features(df, label=None):
“”"
Creates time series features from datetime index
“”"
df[‘date’] = df.index
df[‘hour’] = df[‘date’].dt.hour
df[‘dayofweek’] = df[‘date’].dt.dayofweek
df[‘quarter’] = df[‘date’].dt.quarter
df[‘month’] = df[‘date’].dt.month
df[‘year’] = df[‘date’].dt.year
df[‘dayofyear’] = df[‘date’].dt.dayofyear
df[‘dayofmonth’] = df[‘date’].dt.day
df[‘weekofyear’] = df[‘date’].dt.weekofyearX = df[['hour','dayofweek','quarter','month','year', 'dayofyear','dayofmonth','weekofyear']] if label: y = df[label] return X, y return X
X_train, y_train = create_features(dfs_train, label=‘y’)
X_test, y_test = create_features(dfs_test, label=‘y’)reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=False) # Change verbose to True if you want to see it train_ = plot_importance(reg, height=0.9)
Forecast on Test Set
dfs_test[‘y_Prediction’] = reg.predict(X_test)
dfs_all = pd.concat([dfs_test, dfs_train], sort=False)_ = dfs_all[[‘y’,‘y_Prediction’]].plot(figsize=(15, 5))
mean_squared_error(y_true=dfs_test[‘y’],
y_pred=dfs_test[‘y_Prediction’])mean_absolute_error(y_true=dfs_test[‘y’],
y_pred=dfs_test[‘y_Prediction’])def mean_absolute_percentage_error(y_true, y_pred):
“”“Calculates MAPE given y_true and y_pred”""
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100mean_absolute_percentage_error(y_true=dfs_test[‘y’],
y_pred=dfs_test[‘y_Prediction’])