From 5a239eb27743029fa281d2969f77fc9c900b3810 Mon Sep 17 00:00:00 2001 From: Anirban Mukherjee Date: Wed, 6 Oct 2021 01:45:19 +0530 Subject: [PATCH] Delete utils.py --- 7-TimeSeries/3-SVR/common/utils.py | 147 ----------------------------- 1 file changed, 147 deletions(-) delete mode 100644 7-TimeSeries/3-SVR/common/utils.py diff --git a/7-TimeSeries/3-SVR/common/utils.py b/7-TimeSeries/3-SVR/common/utils.py deleted file mode 100644 index 4ab7623f..00000000 --- a/7-TimeSeries/3-SVR/common/utils.py +++ /dev/null @@ -1,147 +0,0 @@ -import numpy as np -import pandas as pd -import os -from collections import UserDict - -def load_data(data_dir): - """Load the GEFCom 2014 energy load data""" - - energy = pd.read_csv(os.path.join(data_dir, 'energy.csv'), parse_dates=['timestamp']) - - # Reindex the dataframe such that the dataframe has a record for every time point - # between the minimum and maximum timestamp in the time series. This helps to - # identify missing time periods in the data (there are none in this dataset). - - energy.index = energy['timestamp'] - energy = energy.reindex(pd.date_range(min(energy['timestamp']), - max(energy['timestamp']), - freq='H')) - energy = energy.drop('timestamp', axis=1) - - return energy - - -def mape(predictions, actuals): - """Mean absolute percentage error""" - predictions = np.array(predictions) - actuals = np.array(actuals) - return (np.absolute(predictions - actuals) / actuals).mean() - - -def create_evaluation_df(predictions, test_inputs, H, scaler): - """Create a data frame for easy evaluation""" - eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, H+1)]) - eval_df['timestamp'] = test_inputs.dataframe.index - eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h') - eval_df['actual'] = np.transpose(test_inputs['target']).ravel() - eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']]) - return eval_df - - -class TimeSeriesTensor(UserDict): - """A dictionary of tensors for input into the RNN model. - - Use this class to: - 1. Shift the values of the time series to create a Pandas dataframe containing all the data - for a single training example - 2. Discard any samples with missing values - 3. Transform this Pandas dataframe into a numpy array of shape - (samples, time steps, features) for input into Keras - - The class takes the following parameters: - - **dataset**: original time series - - **target** name of the target column - - **H**: the forecast horizon - - **tensor_structures**: a dictionary describing the tensor structure of the form - { 'tensor_name' : (range(max_backward_shift, max_forward_shift), [feature, feature, ...] ) } - if features are non-sequential and should not be shifted, use the form - { 'tensor_name' : (None, [feature, feature, ...])} - - **freq**: time series frequency (default 'H' - hourly) - - **drop_incomplete**: (Boolean) whether to drop incomplete samples (default True) - """ - - def __init__(self, dataset, target, H, tensor_structure, freq='H', drop_incomplete=True): - self.dataset = dataset - self.target = target - self.tensor_structure = tensor_structure - self.tensor_names = list(tensor_structure.keys()) - - self.dataframe = self._shift_data(H, freq, drop_incomplete) - self.data = self._df2tensors(self.dataframe) - - def _shift_data(self, H, freq, drop_incomplete): - - # Use the tensor_structures definitions to shift the features in the original dataset. - # The result is a Pandas dataframe with multi-index columns in the hierarchy - # tensor - the name of the input tensor - # feature - the input feature to be shifted - # time step - the time step for the RNN in which the data is input. These labels - # are centred on time t. the forecast creation time - df = self.dataset.copy() - - idx_tuples = [] - for t in range(1, H+1): - df['t+'+str(t)] = df[self.target].shift(t*-1, freq=freq) - idx_tuples.append(('target', 'y', 't+'+str(t))) - - for name, structure in self.tensor_structure.items(): - rng = structure[0] - dataset_cols = structure[1] - - for col in dataset_cols: - - # do not shift non-sequential 'static' features - if rng is None: - df['context_'+col] = df[col] - idx_tuples.append((name, col, 'static')) - - else: - for t in rng: - sign = '+' if t > 0 else '' - shift = str(t) if t != 0 else '' - period = 't'+sign+shift - shifted_col = name+'_'+col+'_'+period - df[shifted_col] = df[col].shift(t*-1, freq=freq) - idx_tuples.append((name, col, period)) - - df = df.drop(self.dataset.columns, axis=1) - idx = pd.MultiIndex.from_tuples(idx_tuples, names=['tensor', 'feature', 'time step']) - df.columns = idx - - if drop_incomplete: - df = df.dropna(how='any') - - return df - - def _df2tensors(self, dataframe): - - # Transform the shifted Pandas dataframe into the multidimensional numpy arrays. These - # arrays can be used to input into the keras model and can be accessed by tensor name. - # For example, for a TimeSeriesTensor object named "model_inputs" and a tensor named - # "target", the input tensor can be acccessed with model_inputs['target'] - - inputs = {} - y = dataframe['target'] - y = y.as_matrix() - inputs['target'] = y - - for name, structure in self.tensor_structure.items(): - rng = structure[0] - cols = structure[1] - tensor = dataframe[name][cols].as_matrix() - if rng is None: - tensor = tensor.reshape(tensor.shape[0], len(cols)) - else: - tensor = tensor.reshape(tensor.shape[0], len(cols), len(rng)) - tensor = np.transpose(tensor, axes=[0, 2, 1]) - inputs[name] = tensor - - return inputs - - def subset_data(self, new_dataframe): - - # Use this function to recreate the input tensors if the shifted dataframe - # has been filtered. - - self.dataframe = new_dataframe - self.data = self._df2tensors(self.dataframe)