Removed duplicates for 'common\utils.py'

Placed one 'common\utils.py' in the folder '7-TimeSeries'
4 years ago · 87f2cfb947
parent 910e735e98
commit 87f2cfb947
2 changed files with 0 additions and 147 deletions
--- a/7-TimeSeries/3-SVR/working/common/utils.py
+++ b/7-TimeSeries/3-SVR/working/common/utils.py
@ -1,147 +0,0 @@
-import numpy as np
-import pandas as pd
-import os
-from collections import UserDict
-
-def load_data(data_dir):
-    """Load the GEFCom 2014 energy load data"""
-
-    energy = pd.read_csv(os.path.join(data_dir, 'energy.csv'), parse_dates=['timestamp'])
-
-    # Reindex the dataframe such that the dataframe has a record for every time point
-    # between the minimum and maximum timestamp in the time series. This helps to 
-    # identify missing time periods in the data (there are none in this dataset).
-
-    energy.index = energy['timestamp']
-    energy = energy.reindex(pd.date_range(min(energy['timestamp']),
-                                          max(energy['timestamp']),
-                                          freq='H'))
-    energy = energy.drop('timestamp', axis=1)
-
-    return energy
-
-
-def mape(predictions, actuals):
-    """Mean absolute percentage error"""
-    predictions = np.array(predictions)
-    actuals = np.array(actuals)
-    return (np.absolute(predictions - actuals) / actuals).mean()
-
-
-def create_evaluation_df(predictions, test_inputs, H, scaler):
-    """Create a data frame for easy evaluation"""
-    eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, H+1)])
-    eval_df['timestamp'] = test_inputs.dataframe.index
-    eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')
-    eval_df['actual'] = np.transpose(test_inputs['target']).ravel()
-    eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])
-    return eval_df
-
-
-class TimeSeriesTensor(UserDict):
-    """A dictionary of tensors for input into the RNN model.
-    
-    Use this class to:
-      1. Shift the values of the time series to create a Pandas dataframe containing all the data
-         for a single training example
-      2. Discard any samples with missing values
-      3. Transform this Pandas dataframe into a numpy array of shape 
-         (samples, time steps, features) for input into Keras
-
-    The class takes the following parameters:
-       - **dataset**: original time series
-       - **target** name of the target column
-       - **H**: the forecast horizon
-       - **tensor_structures**: a dictionary describing the tensor structure of the form
-             { 'tensor_name' : (range(max_backward_shift, max_forward_shift), [feature, feature, ...] ) }
-             if features are non-sequential and should not be shifted, use the form
-             { 'tensor_name' : (None, [feature, feature, ...])}
-       - **freq**: time series frequency (default 'H' - hourly)
-       - **drop_incomplete**: (Boolean) whether to drop incomplete samples (default True)
-    """
-    
-    def __init__(self, dataset, target, H, tensor_structure, freq='H', drop_incomplete=True):
-        self.dataset = dataset
-        self.target = target
-        self.tensor_structure = tensor_structure
-        self.tensor_names = list(tensor_structure.keys())
-        
-        self.dataframe = self._shift_data(H, freq, drop_incomplete)
-        self.data = self._df2tensors(self.dataframe)
-    
-    def _shift_data(self, H, freq, drop_incomplete):
-        
-        # Use the tensor_structures definitions to shift the features in the original dataset.
-        # The result is a Pandas dataframe with multi-index columns in the hierarchy
-        #     tensor - the name of the input tensor
-        #     feature - the input feature to be shifted
-        #     time step - the time step for the RNN in which the data is input. These labels
-        #         are centred on time t. the forecast creation time
-        df = self.dataset.copy()
-        
-        idx_tuples = []
-        for t in range(1, H+1):
-            df['t+'+str(t)] = df[self.target].shift(t*-1, freq=freq)
-            idx_tuples.append(('target', 'y', 't+'+str(t)))
-
-        for name, structure in self.tensor_structure.items():
-            rng = structure[0]
-            dataset_cols = structure[1]
-            
-            for col in dataset_cols:
-            
-            # do not shift non-sequential 'static' features
-                if rng is None:
-                    df['context_'+col] = df[col]
-                    idx_tuples.append((name, col, 'static'))
-
-                else:
-                    for t in rng:
-                        sign = '+' if t > 0 else ''
-                        shift = str(t) if t != 0 else ''
-                        period = 't'+sign+shift
-                        shifted_col = name+'_'+col+'_'+period
-                        df[shifted_col] = df[col].shift(t*-1, freq=freq)
-                        idx_tuples.append((name, col, period))
-                
-        df = df.drop(self.dataset.columns, axis=1)
-        idx = pd.MultiIndex.from_tuples(idx_tuples, names=['tensor', 'feature', 'time step'])
-        df.columns = idx
-
-        if drop_incomplete:
-            df = df.dropna(how='any')
-
-        return df
-    
-    def _df2tensors(self, dataframe):
-        
-        # Transform the shifted Pandas dataframe into the multidimensional numpy arrays. These
-        # arrays can be used to input into the keras model and can be accessed by tensor name.
-        # For example, for a TimeSeriesTensor object named "model_inputs" and a tensor named
-        # "target", the input tensor can be acccessed with model_inputs['target']
-    
-        inputs = {}
-        y = dataframe['target']
-        y = y.as_matrix()
-        inputs['target'] = y
-
-        for name, structure in self.tensor_structure.items():
-            rng = structure[0]
-            cols = structure[1]
-            tensor = dataframe[name][cols].as_matrix()
-            if rng is None:
-                tensor = tensor.reshape(tensor.shape[0], len(cols))
-            else:
-                tensor = tensor.reshape(tensor.shape[0], len(cols), len(rng))
-                tensor = np.transpose(tensor, axes=[0, 2, 1])
-            inputs[name] = tensor
-
-        return inputs
-       
-    def subset_data(self, new_dataframe):
-        
-        # Use this function to recreate the input tensors if the shifted dataframe
-        # has been filtered.
-        
-        self.dataframe = new_dataframe
-        self.data = self._df2tensors(self.dataframe)
--- a/7-TimeSeries/3-SVR/solution/common/utils.py
+++ b/7-TimeSeries/3-SVR/solution/common/utils.py