From d4d96ebd557df7efa458fe4c9fd3b204710b1121 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Sat, 12 Dec 2020 17:35:09 +0800 Subject: [PATCH] =?UTF-8?q?Create=20=E7=96=BE=E7=97=85=E5=BC=95=E8=B5=B7?= =?UTF-8?q?=E5=8E=9F=E5=9B=A0.ipynb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../疾病引起原因.ipynb | 1407 +++++++++++++++++ 1 file changed, 1407 insertions(+) create mode 100644 机器学习竞赛实战_优胜解决方案/特征工程建模/疾病引起原因.ipynb diff --git a/机器学习竞赛实战_优胜解决方案/特征工程建模/疾病引起原因.ipynb b/机器学习竞赛实战_优胜解决方案/特征工程建模/疾病引起原因.ipynb new file mode 100644 index 0000000..304bcc0 --- /dev/null +++ b/机器学习竞赛实战_优胜解决方案/特征工程建模/疾病引起原因.ipynb @@ -0,0 +1,1407 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 疾病引起原因模型解释" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.tree import export_graphviz\n", + "from sklearn.metrics import roc_curve, auc, precision_score, recall_score,accuracy_score\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "import eli5\n", + "from eli5.sklearn import PermutationImportance\n", + "import shap\n", + "from pdpbox import pdp,info_plots\n", + "\n", + "plt.rcParams['font.sans-serif']=['SimHei'] # 让图形可以显示中文\n", + "plt.rcParams['axes.unicode_minus']=False" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(123)\n", + "\n", + "pd.options.mode.chained_assignment = None" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
\n", + "
" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target\n", + "0 63 1 3 145 233 1 0 150 0 2.3 0 0 1 1\n", + "1 37 1 2 130 250 0 1 187 0 3.5 0 0 2 1\n", + "2 41 0 1 130 204 0 0 172 0 1.4 2 0 2 1\n", + "3 56 1 1 120 236 0 1 178 0 0.8 2 0 2 1\n", + "4 57 0 0 120 354 0 1 163 1 0.6 2 0 2 1" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt = pd.read_csv(\"data/heart.csv\")\n", + "dt.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "