From c67515eb6bfa6edc504d1f154fb43004b2bf60e4 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Fri, 25 Dec 2020 10:42:31 +0800 Subject: [PATCH] =?UTF-8?q?Create=20=E5=BB=BA=E6=A8=A1=E4=B8=8E=E5=88=86?= =?UTF-8?q?=E6=9E=90=5F=E5=BB=BA=E7=AD=91=E8=83=BD=E6=BA=90=E5=88=A9?= =?UTF-8?q?=E7=94=A8=E7=8E=87=E9=A2=84=E6=B5=8B.ipynb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...分析_建筑能源利用率预测.ipynb | 943 ++++++++++++++++++ 1 file changed, 943 insertions(+) create mode 100644 机器学习竞赛实战_优胜解决方案/建筑能源利用率预测/建模与分析_建筑能源利用率预测.ipynb diff --git a/机器学习竞赛实战_优胜解决方案/建筑能源利用率预测/建模与分析_建筑能源利用率预测.ipynb b/机器学习竞赛实战_优胜解决方案/建筑能源利用率预测/建模与分析_建筑能源利用率预测.ipynb new file mode 100644 index 0000000..28335ad --- /dev/null +++ b/机器学习竞赛实战_优胜解决方案/建筑能源利用率预测/建模与分析_建筑能源利用率预测.ipynb @@ -0,0 +1,943 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 载入工具包" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "pd.options.mode.chained_assignment = None # 消除警告,比如说提示版本升级之类的\n", + "\n", + "pd.set_option('display.max_columns', 60) # 设置最大显示列为60\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "plt.rcParams['font.size'] = 24 # 设置字体大小\n", + "\n", + "from IPython.core.pylabtools import figsize # 设置画图大小\n", + "\n", + "import seaborn as sns # 画图工具\n", + "sns.set(font_scale=2)\n", + "\n", + "# 输入缺失值和缩放值\n", + "from sklearn.preprocessing import Imputer, MinMaxScaler\n", + "\n", + "# 机器学习模型\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n", + "from sklearn.svm import SVR\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "\n", + "# 超参数调整\n", + "from sklearn.model_selection import RandomizedSearchCV, GridSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Feature Size: (6622, 64)\n", + "Testing Feature Size: (2839, 64)\n", + "Training Labels Size: (6622, 1)\n", + "Testing Labels Size: (2839, 1)\n" + ] + } + ], + "source": [ + "# Read in data into dataframes \n", + "train_features = pd.read_csv('data/training_features.csv')\n", + "test_features = pd.read_csv('data/testing_features.csv')\n", + "train_labels = pd.read_csv('data/training_labels.csv')\n", + "test_labels = pd.read_csv('data/testing_labels.csv')\n", + "\n", + "# Display sizes of data\n", + "print('Training Feature Size: ', train_features.shape)\n", + "print('Testing Feature Size: ', test_features.shape)\n", + "print('Training Labels Size: ', train_labels.shape)\n", + "print('Testing Labels Size: ', test_labels.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Order | \n", + "Property Id | \n", + "DOF Gross Floor Area | \n", + "Year Built | \n", + "Number of Buildings - Self-reported | \n", + "Occupancy | \n", + "Site EUI (kBtu/ft²) | \n", + "Weather Normalized Site Electricity Intensity (kWh/ft²) | \n", + "Weather Normalized Site Natural Gas Intensity (therms/ft²) | \n", + "Water Intensity (All Water Sources) (gal/ft²) | \n", + "Latitude | \n", + "Longitude | \n", + "Community Board | \n", + "Census Tract | \n", + "log_Direct GHG Emissions (Metric Tons CO2e) | \n", + "log_Water Intensity (All Water Sources) (gal/ft²) | \n", + "Borough_Staten Island | \n", + "Largest Property Use Type_Adult Education | \n", + "Largest Property Use Type_Automobile Dealership | \n", + "Largest Property Use Type_Bank Branch | \n", + "Largest Property Use Type_College/University | \n", + "Largest Property Use Type_Convenience Store without Gas Station | \n", + "Largest Property Use Type_Courthouse | \n", + "Largest Property Use Type_Distribution Center | \n", + "Largest Property Use Type_Enclosed Mall | \n", + "Largest Property Use Type_Financial Office | \n", + "Largest Property Use Type_Hospital (General Medical & Surgical) | \n", + "Largest Property Use Type_Hotel | \n", + "Largest Property Use Type_K-12 School | \n", + "Largest Property Use Type_Library | \n", + "... | \n", + "Largest Property Use Type_Multifamily Housing | \n", + "Largest Property Use Type_Museum | \n", + "Largest Property Use Type_Non-Refrigerated Warehouse | \n", + "Largest Property Use Type_Other | \n", + "Largest Property Use Type_Other - Education | \n", + "Largest Property Use Type_Other - Entertainment/Public Assembly | \n", + "Largest Property Use Type_Other - Lodging/Residential | \n", + "Largest Property Use Type_Other - Mall | \n", + "Largest Property Use Type_Other - Public Services | \n", + "Largest Property Use Type_Other - Recreation | \n", + "Largest Property Use Type_Other - Services | \n", + "Largest Property Use Type_Other - Specialty Hospital | \n", + "Largest Property Use Type_Outpatient Rehabilitation/Physical Therapy | \n", + "Largest Property Use Type_Parking | \n", + "Largest Property Use Type_Performing Arts | \n", + "Largest Property Use Type_Pre-school/Daycare | \n", + "Largest Property Use Type_Refrigerated Warehouse | \n", + "Largest Property Use Type_Repair Services (Vehicle, Shoe, Locksmith, etc.) | \n", + "Largest Property Use Type_Residence Hall/Dormitory | \n", + "Largest Property Use Type_Residential Care Facility | \n", + "Largest Property Use Type_Restaurant | \n", + "Largest Property Use Type_Retail Store | \n", + "Largest Property Use Type_Self-Storage Facility | \n", + "Largest Property Use Type_Senior Care Community | \n", + "Largest Property Use Type_Social/Meeting Hall | \n", + "Largest Property Use Type_Strip Mall | \n", + "Largest Property Use Type_Supermarket/Grocery Store | \n", + "Largest Property Use Type_Urgent Care/Clinic/Other Outpatient | \n", + "Largest Property Use Type_Wholesale Club/Supercenter | \n", + "Largest Property Use Type_Worship Facility | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "13276 | \n", + "5849784 | \n", + "90300.0 | \n", + "1950 | \n", + "1 | \n", + "100 | \n", + "126.0 | \n", + "5.2 | \n", + "1.2 | \n", + "99.41 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "6.088818 | \n", + "4.599253 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
1 | \n", + "7377 | \n", + "4398442 | \n", + "52000.0 | \n", + "1926 | \n", + "1 | \n", + "100 | \n", + "95.4 | \n", + "4.7 | \n", + "0.9 | \n", + "NaN | \n", + "40.835496 | \n", + "-73.887745 | \n", + "3.0 | \n", + "161.0 | \n", + "5.384036 | \n", + "NaN | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
2 | \n", + "9479 | \n", + "4665374 | \n", + "104700.0 | \n", + "1954 | \n", + "1 | \n", + "100 | \n", + "40.4 | \n", + "3.8 | \n", + "0.3 | \n", + "NaN | \n", + "40.663206 | \n", + "-73.949469 | \n", + "9.0 | \n", + "329.0 | \n", + "5.017280 | \n", + "NaN | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
3 | \n", + "14774 | \n", + "3393340 | \n", + "129333.0 | \n", + "1992 | \n", + "1 | \n", + "100 | \n", + "157.1 | \n", + "16.9 | \n", + "1.1 | \n", + "NaN | \n", + "40.622968 | \n", + "-74.078742 | \n", + "1.0 | \n", + "27.0 | \n", + "6.510853 | \n", + "NaN | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
4 | \n", + "3286 | \n", + "2704325 | \n", + "109896.0 | \n", + "1927 | \n", + "1 | \n", + "100 | \n", + "62.3 | \n", + "3.5 | \n", + "0.0 | \n", + "28.65 | \n", + "40.782421 | \n", + "-73.972622 | \n", + "7.0 | \n", + "165.0 | \n", + "6.123589 | \n", + "3.355153 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
5 rows × 64 columns
\n", + "