From 0d57cbcd9c99661ab18718f011f61938c8a59fe7 Mon Sep 17 00:00:00 2001 From: Anirban Mukherjee Date: Wed, 6 Oct 2021 11:45:38 +0530 Subject: [PATCH] Create notebook.ipynb --- 7-TimeSeries/3-SVR/working/notebook.ipynb | 625 ++++++++++++++++++++++ 1 file changed, 625 insertions(+) create mode 100644 7-TimeSeries/3-SVR/working/notebook.ipynb diff --git a/7-TimeSeries/3-SVR/working/notebook.ipynb b/7-TimeSeries/3-SVR/working/notebook.ipynb new file mode 100644 index 00000000..60968530 --- /dev/null +++ b/7-TimeSeries/3-SVR/working/notebook.ipynb @@ -0,0 +1,625 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fv9OoQsMFk5A" + }, + "source": [ + "# Time series prediction using Support Vector Regressor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate how to:\n", + "\n", + "- prepare 2D time series data for training an SVM regressor model\n", + "- implement SVR using RBF kernel\n", + "- evaluate the model using plots and MAPE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing modules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M687KNlQFp0-" + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import datetime as dt\n", + "import math\n", + "\n", + "from sklearn.svm import SVR\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from common.utils import load_data, mape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cj-kfVdMGjWP" + }, + "source": [ + "## Preparing data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fywSjC6GsRz" + }, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "aBDkEB11Fumg", + "outputId": "99cf7987-0509-4b73-8cc2-75d7da0d2740" + }, + "outputs": [], + "source": [ + "energy = load_data('./data')[['load']]\n", + "energy.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0BWP13rGnh4" + }, + "source": [ + "### Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "hGaNPKu_Gidk", + "outputId": "7f89b326-9057-4f49-efbe-cb100ebdf76d" + }, + "outputs": [], + "source": [ + "energy.plot(y='load', subplots=True, figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IPuNor4eGwYY" + }, + "source": [ + "### Create training and testing data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ysvsNyONGt0Q" + }, + "outputs": [], + "source": [ + "train_start_dt = '2014-11-01 00:00:00'\n", + "test_start_dt = '2014-12-30 00:00:00'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "SsfdLoPyGy9w", + "outputId": "d6d6c25b-b1f4-47e5-91d1-707e043237d7" + }, + "outputs": [], + "source": [ + "energy[(energy.index < test_start_dt) & (energy.index >= train_start_dt)][['load']].rename(columns={'load':'train'}) \\\n", + " .join(energy[test_start_dt:][['load']].rename(columns={'load':'test'}), how='outer') \\\n", + " .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)\n", + "plt.xlabel('timestamp', fontsize=12)\n", + "plt.ylabel('load', fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XbFTqBw6G1Ch" + }, + "source": [ + "### Preparing data for training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, you need to prepare the data for training by performing filtering and scaling of your data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYivRdQpHDj3", + "outputId": "a138f746-461c-4fd6-bfa6-0cee094c4aa1" + }, + "outputs": [], + "source": [ + "train = energy.copy()[(energy.index >= train_start_dt) & (energy.index < test_start_dt)][['load']]\n", + "test = energy.copy()[energy.index >= test_start_dt][['load']]\n", + "\n", + "print('Training data shape: ', train.shape)\n", + "print('Test data shape: ', test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scale the data to be in the range (0, 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "3DNntGQnZX8G", + "outputId": "210046bc-7a66-4ccd-d70d-aa4a7309949c" + }, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "train['load'] = scaler.fit_transform(train)\n", + "train.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "26Yht-rzZexe", + "outputId": "20326077-a38a-4e78-cc5b-6fd7af95d301" + }, + "outputs": [], + "source": [ + "test['load'] = scaler.transform(test)\n", + "test.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x0n6jqxOQ41Z" + }, + "source": [ + "### Creating data with time-steps" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fdmxTZtOQ8xs" + }, + "source": [ + " For our SVR, we transform the input data to be of the form `[batch, timesteps]`. So, we reshape the existing `train_data` and `test_data` such that there is a new dimension which refers to the timesteps. For our example, we take `timesteps = 5`. So, the inputs to the model are the data for the first 4 timesteps, and the output will be the data for the 5th timestep." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rpju-Sc2HFm0" + }, + "outputs": [], + "source": [ + "# Converting to numpy arrays\n", + "\n", + "train_data = train.values\n", + "test_data = test.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the timesteps\n", + "\n", + "timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O-JrsrsVJhUQ", + "outputId": "c90dbe71-bacc-4ec4-b452-f82fe5aefaef" + }, + "outputs": [], + "source": [ + "# Converting data to 2D tensor\n", + "\n", + "train_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "exJD8AI7KE4g", + "outputId": "ce90260c-f327-427d-80f2-77307b5a6318" + }, + "outputs": [], + "source": [ + "# Converting test data to 2D tensor\n", + "\n", + "test_data_timesteps=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2u0R2sIsLuq5" + }, + "outputs": [], + "source": [ + "x_train, y_train = None\n", + "x_test, y_test = None\n", + "\n", + "print(x_train.shape, y_train.shape)\n", + "print(x_test.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8wIPOtAGLZlh" + }, + "source": [ + "## Creating SVR model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EhA403BEPEiD" + }, + "outputs": [], + "source": [ + "# Create model using RBF kernel\n", + "\n", + "model = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GS0UA3csMbqp", + "outputId": "d86b6f05-5742-4c1d-c2db-c40510bd4f0d" + }, + "outputs": [], + "source": [ + "# Fit model on training data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz_x8S3UrlcF" + }, + "source": [ + "### Make model prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XR0gnt3MnuYS", + "outputId": "157e40ab-9a23-4b66-a885-0d52a24b2364" + }, + "outputs": [], + "source": [ + "# Making predictions\n", + "\n", + "y_train_pred = None\n", + "y_test_pred = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2epncg-SGzr" + }, + "source": [ + "## Analyzing model performance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scaling the predictions\n", + "\n", + "y_train_pred = scaler.inverse_transform(y_train_pred)\n", + "y_test_pred = scaler.inverse_transform(y_test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmm_YLXhq7gV", + "outputId": "18392f64-4029-49ac-c71a-a4e2411152a1" + }, + "outputs": [], + "source": [ + "# Scaling the original values\n", + "\n", + "y_train = scaler.inverse_transform(y_train)\n", + "y_test = scaler.inverse_transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u3LBj93coHEi", + "outputId": "d4fd49e8-8c6e-4bb0-8ef9-ca0b26d725b4" + }, + "outputs": [], + "source": [ + "# Extract the timesteps for x-axis\n", + "\n", + "train_timestamps = None\n", + "test_timestamps = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(25,6))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.title(\"Training data prediction\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnhzcnYtXHCm", + "outputId": "f5f0d711-f18b-4788-ad21-d4470ea2c02b" + }, + "outputs": [], + "source": [ + "print('MAPE for training data: ', mape(y_train_pred, y_train)*100, '%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "53Q02FoqQH4V", + "outputId": "53e2d59b-5075-4765-ad9e-aed56c966583" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,3))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "clOAUH-SXCJG", + "outputId": "a3aa85ff-126a-4a4a-cd9e-90b9cc465ef5" + }, + "outputs": [], + "source": [ + "print('MAPE for testing data: ', mape(y_test_pred, y_test)*100, '%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHlKvVCId5ue" + }, + "source": [ + "## Full dataset prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOFJ45vreO0N", + "outputId": "35628e33-ecf9-4966-8036-f7ea86db6f16" + }, + "outputs": [], + "source": [ + "# Extracting load values as numpy array\n", + "data = None\n", + "\n", + "# Scaling\n", + "data = None\n", + "\n", + "# Transforming to 2D tensor as per model input requirement\n", + "data_timesteps=None\n", + "\n", + "# Selecting inputs and outputs from data\n", + "X, Y = None, None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESSAdQgwexIi" + }, + "outputs": [], + "source": [ + "# Make model predictions\n", + "\n", + "# Inverse scale and reshape\n", + "Y_pred = None\n", + "Y = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "id": "M_qhihN0RVVX", + "outputId": "a89cb23e-1d35-437f-9d63-8b8907e12f80" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(30,8))\n", + "# plot original output\n", + "# plot predicted output\n", + "plt.legend(['Actual','Predicted'])\n", + "plt.xlabel('Timestamp')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcN7pMYXVGTK", + "outputId": "7e1c2161-47ce-496c-9d86-7ad9ae0df770" + }, + "outputs": [], + "source": [ + "print('MAPE: ', mape(Y_pred, Y)*100, '%')" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Recurrent_Neural_Networks.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}