You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							38 lines
						
					
					
						
							1.5 KiB
						
					
					
				
			
		
		
	
	
							38 lines
						
					
					
						
							1.5 KiB
						
					
					
				| import zipfile
 | |
| import os
 | |
| import sys
 | |
| import pandas as pd
 | |
| 
 | |
| # This function unzips the GEFCom2014 data zip file and extracts the 'extended'
 | |
| # load forecasting competition data. Data is saved in energy.csv
 | |
| def extract_data(data_dir):
 | |
|     GEFCom_dir = os.path.join(data_dir, 'GEFCom2014', 'GEFCom2014 Data')
 | |
| 
 | |
|     GEFCom_zipfile = os.path.join(data_dir, 'GEFCom2014.zip')
 | |
|     if not os.path.exists(GEFCom_zipfile):
 | |
|         sys.exit("Download GEFCom2014.zip from https://www.dropbox.com/s/pqenrr2mcvl0hk9/GEFCom2014.zip?dl=0 and save it to the '{}' directory.".format(data_dir))
 | |
| 
 | |
|     # unzip root directory
 | |
|     zip_ref = zipfile.ZipFile(GEFCom_zipfile, 'r')
 | |
|     zip_ref.extractall(os.path.join(data_dir, 'GEFCom2014'))
 | |
|     zip_ref.close()
 | |
| 
 | |
|     # extract the extended competition data
 | |
|     zip_ref = zipfile.ZipFile(os.path.join(GEFCom_dir, 'GEFCom2014-E_V2.zip'), 'r')
 | |
|     zip_ref.extractall(os.path.join(data_dir, 'GEFCom2014-E'))
 | |
|     zip_ref.close()
 | |
| 
 | |
|     # load the data from Excel file
 | |
|     data = pd.read_excel(os.path.join(data_dir, 'GEFCom2014-E', 'GEFCom2014-E.xlsx'), parse_date='Date')
 | |
| 
 | |
|     # create timestamp variable from Date and Hour
 | |
|     data['timestamp'] = data['Date'].add(pd.to_timedelta(data.Hour - 1, unit='h'))
 | |
|     data = data[['timestamp', 'load', 'T']]
 | |
|     data = data.rename(columns={'T':'temp'})
 | |
| 
 | |
|     # remove time period with no load data
 | |
|     data = data[data.timestamp >= '2012-01-01']
 | |
| 
 | |
|     # save to csv
 | |
|     data.to_csv(os.path.join(data_dir, 'energy.csv'), index=False)
 |