From 0d57cbcd9c99661ab18718f011f61938c8a59fe7 Mon Sep 17 00:00:00 2001
From: Anirban Mukherjee <raji08xd@gmail.com>
Date: Wed, 6 Oct 2021 11:45:38 +0530
Subject: [PATCH] Create notebook.ipynb

---
 7-TimeSeries/3-SVR/working/notebook.ipynb | 625 ++++++++++++++++++++++
 1 file changed, 625 insertions(+)
 create mode 100644 7-TimeSeries/3-SVR/working/notebook.ipynb

diff --git a/7-TimeSeries/3-SVR/working/notebook.ipynb b/7-TimeSeries/3-SVR/working/notebook.ipynb
new file mode 100644
index 00000000..60968530
--- /dev/null
+++ b/7-TimeSeries/3-SVR/working/notebook.ipynb
@@ -0,0 +1,625 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "fv9OoQsMFk5A"
+   },
+   "source": [
+    "# Time series prediction using Support Vector Regressor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this notebook, we demonstrate how to:\n",
+    "\n",
+    "- prepare 2D time series data for training an SVM regressor model\n",
+    "- implement SVR using RBF kernel\n",
+    "- evaluate the model using plots and MAPE"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Importing modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "M687KNlQFp0-"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import warnings\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import datetime as dt\n",
+    "import math\n",
+    "\n",
+    "from sklearn.svm import SVR\n",
+    "from sklearn.preprocessing import MinMaxScaler\n",
+    "from common.utils import load_data, mape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Cj-kfVdMGjWP"
+   },
+   "source": [
+    "## Preparing data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "8fywSjC6GsRz"
+   },
+   "source": [
+    "### Load data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 363
+    },
+    "id": "aBDkEB11Fumg",
+    "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740"
+   },
+   "outputs": [],
+   "source": [
+    "energy = load_data('./data')[['load']]\n",
+    "energy.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "O0BWP13rGnh4"
+   },
+   "source": [
+    "### Plot the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 486
+    },
+    "id": "hGaNPKu_Gidk",
+    "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d"
+   },
+   "outputs": [],
+   "source": [
+    "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n",
+    "plt.xlabel('timestamp', fontsize=12)\n",
+    "plt.ylabel('load', fontsize=12)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "IPuNor4eGwYY"
+   },
+   "source": [
+    "### Create training and testing data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ysvsNyONGt0Q"
+   },
+   "outputs": [],
+   "source": [
+    "train_start_dt = '2014-11-01 00:00:00'\n",
+    "test_start_dt = '2014-12-30 00:00:00'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 548
+    },
+    "id": "SsfdLoPyGy9w",
+    "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7"
+   },
+   "outputs": [],
+   "source": [
+    "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n",
+    "    .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n",
+    "    .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n",
+    "plt.xlabel('timestamp', fontsize=12)\n",
+    "plt.ylabel('load', fontsize=12)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XbFTqBw6G1Ch"
+   },
+   "source": [
+    "### Preparing data for training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, you need to prepare the data for training by performing filtering and scaling of your data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "cYivRdQpHDj3",
+    "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1"
+   },
+   "outputs": [],
+   "source": [
+    "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n",
+    "test = energy.copy()[energy.index >= test_start_dt][['load']]\n",
+    "\n",
+    "print('Training data shape: ', train.shape)\n",
+    "print('Test data shape: ', test.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Scale the data to be in the range (0, 1)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 363
+    },
+    "id": "3DNntGQnZX8G",
+    "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c"
+   },
+   "outputs": [],
+   "source": [
+    "scaler = MinMaxScaler()\n",
+    "train['load'] = scaler.fit_transform(train)\n",
+    "train.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "26Yht-rzZexe",
+    "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301"
+   },
+   "outputs": [],
+   "source": [
+    "test['load'] = scaler.transform(test)\n",
+    "test.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "x0n6jqxOQ41Z"
+   },
+   "source": [
+    "### Creating data with time-steps"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "fdmxTZtOQ8xs"
+   },
+   "source": [
+    " For our SVR, we transform the input data to be of the form `[batch, timesteps]`. So, we reshape the existing `train_data` and `test_data` such that there is a new dimension which refers to the timesteps. For our example, we take `timesteps = 5`. So, the inputs to the model are the data for the first 4 timesteps, and the output will be the data for the 5<sup>th</sup> timestep."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Rpju-Sc2HFm0"
+   },
+   "outputs": [],
+   "source": [
+    "# Converting to numpy arrays\n",
+    "\n",
+    "train_data = train.values\n",
+    "test_data = test.values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Selecting the timesteps\n",
+    "\n",
+    "timesteps=None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "O-JrsrsVJhUQ",
+    "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef"
+   },
+   "outputs": [],
+   "source": [
+    "# Converting data to 2D tensor\n",
+    "\n",
+    "train_data_timesteps=None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "exJD8AI7KE4g",
+    "outputId": "ce90260c-f327-427d-80f2-77307b5a6318"
+   },
+   "outputs": [],
+   "source": [
+    "# Converting test data to 2D tensor\n",
+    "\n",
+    "test_data_timesteps=None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "2u0R2sIsLuq5"
+   },
+   "outputs": [],
+   "source": [
+    "x_train, y_train = None\n",
+    "x_test, y_test = None\n",
+    "\n",
+    "print(x_train.shape, y_train.shape)\n",
+    "print(x_test.shape, y_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "8wIPOtAGLZlh"
+   },
+   "source": [
+    "## Creating SVR model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "EhA403BEPEiD"
+   },
+   "outputs": [],
+   "source": [
+    "# Create model using RBF kernel\n",
+    "\n",
+    "model = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "GS0UA3csMbqp",
+    "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d"
+   },
+   "outputs": [],
+   "source": [
+    "# Fit model on training data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Rz_x8S3UrlcF"
+   },
+   "source": [
+    "### Make model prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "XR0gnt3MnuYS",
+    "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364"
+   },
+   "outputs": [],
+   "source": [
+    "# Making predictions\n",
+    "\n",
+    "y_train_pred = None\n",
+    "y_test_pred = None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_2epncg-SGzr"
+   },
+   "source": [
+    "## Analyzing model performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Scaling the predictions\n",
+    "\n",
+    "y_train_pred = scaler.inverse_transform(y_train_pred)\n",
+    "y_test_pred = scaler.inverse_transform(y_test_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "xmm_YLXhq7gV",
+    "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1"
+   },
+   "outputs": [],
+   "source": [
+    "# Scaling the original values\n",
+    "\n",
+    "y_train = scaler.inverse_transform(y_train)\n",
+    "y_test = scaler.inverse_transform(y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "u3LBj93coHEi",
+    "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4"
+   },
+   "outputs": [],
+   "source": [
+    "# Extract the timesteps for x-axis\n",
+    "\n",
+    "train_timestamps = None\n",
+    "test_timestamps = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(25,6))\n",
+    "# plot original output\n",
+    "# plot predicted output\n",
+    "plt.legend(['Actual','Predicted'])\n",
+    "plt.xlabel('Timestamp')\n",
+    "plt.title(\"Training data prediction\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "LnhzcnYtXHCm",
+    "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b"
+   },
+   "outputs": [],
+   "source": [
+    "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 225
+    },
+    "id": "53Q02FoqQH4V",
+    "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,3))\n",
+    "# plot original output\n",
+    "# plot predicted output\n",
+    "plt.legend(['Actual','Predicted'])\n",
+    "plt.xlabel('Timestamp')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "clOAUH-SXCJG",
+    "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5"
+   },
+   "outputs": [],
+   "source": [
+    "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "DHlKvVCId5ue"
+   },
+   "source": [
+    "## Full dataset prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "cOFJ45vreO0N",
+    "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16"
+   },
+   "outputs": [],
+   "source": [
+    "# Extracting load values as numpy array\n",
+    "data = None\n",
+    "\n",
+    "# Scaling\n",
+    "data = None\n",
+    "\n",
+    "# Transforming to 2D tensor as per model input requirement\n",
+    "data_timesteps=None\n",
+    "\n",
+    "# Selecting inputs and outputs from data\n",
+    "X, Y = None, None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ESSAdQgwexIi"
+   },
+   "outputs": [],
+   "source": [
+    "# Make model predictions\n",
+    "\n",
+    "# Inverse scale and reshape\n",
+    "Y_pred = None\n",
+    "Y = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 328
+    },
+    "id": "M_qhihN0RVVX",
+    "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(30,8))\n",
+    "# plot original output\n",
+    "# plot predicted output\n",
+    "plt.legend(['Actual','Predicted'])\n",
+    "plt.xlabel('Timestamp')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "AcN7pMYXVGTK",
+    "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770"
+   },
+   "outputs": [],
+   "source": [
+    "print('MAPE: ', mape(Y_pred, Y)*100, '%')"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "Recurrent_Neural_Networks.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}