From c146541193af32d51c45cbf3dd97870a66994e8a Mon Sep 17 00:00:00 2001
From: benjas <909336740@qq.com>
Date: Wed, 18 Nov 2020 22:56:36 +0800
Subject: [PATCH] =?UTF-8?q?Add=20=E5=AE=9E=E6=93=8D=E5=81=87=E8=AE=BE?=
=?UTF-8?q?=E6=A3=80=E9=AA=8C=E5=AE=9E=E4=BE=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../假设检验实例-checkpoint.ipynb | 429 ++++++++++++++++++
.../假设检验章节/normtemp.dat.txt | 131 ++++++
.../假设检验实例.ipynb | 429 ++++++++++++++++++
3 files changed, 989 insertions(+)
create mode 100644 notebook_必备数学基础/假设检验章节/.ipynb_checkpoints/假设检验实例-checkpoint.ipynb
create mode 100644 notebook_必备数学基础/假设检验章节/normtemp.dat.txt
create mode 100644 notebook_必备数学基础/假设检验章节/假设检验实例.ipynb
diff --git a/notebook_必备数学基础/假设检验章节/.ipynb_checkpoints/假设检验实例-checkpoint.ipynb b/notebook_必备数学基础/假设检验章节/.ipynb_checkpoints/假设检验实例-checkpoint.ipynb
new file mode 100644
index 0000000..3cedcc9
--- /dev/null
+++ b/notebook_必备数学基础/假设检验章节/.ipynb_checkpoints/假设检验实例-checkpoint.ipynb
@@ -0,0 +1,429 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "数据集下载地址:https://ww2.amstat.org/publications/jse/jse_data_archive.htm\n",
+ "
数据集描述:https://ww2.amstat.org/publications/jse/datasets/normtemp.txt\n",
+ "
数据:normtemp.dat.txt "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import pylab\n",
+ "import math\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "from scipy.stats import norm\n",
+ "import scipy.stats\n",
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('normtemp.dat.txt', sep= ' ',\n",
+ " names=['Temperature','Gender','Heart Rate'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Temperature | \n",
+ " Gender | \n",
+ " Heart Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 130.000000 | \n",
+ " 130.000000 | \n",
+ " 130.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 98.249231 | \n",
+ " 1.500000 | \n",
+ " 73.761538 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.733183 | \n",
+ " 0.501934 | \n",
+ " 7.062077 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 96.300000 | \n",
+ " 1.000000 | \n",
+ " 57.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 97.800000 | \n",
+ " 1.000000 | \n",
+ " 69.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 98.300000 | \n",
+ " 1.500000 | \n",
+ " 74.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 98.700000 | \n",
+ " 2.000000 | \n",
+ " 79.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 100.800000 | \n",
+ " 2.000000 | \n",
+ " 89.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Temperature Gender Heart Rate\n",
+ "count 130.000000 130.000000 130.000000\n",
+ "mean 98.249231 1.500000 73.761538\n",
+ "std 0.733183 0.501934 7.062077\n",
+ "min 96.300000 1.000000 57.000000\n",
+ "25% 97.800000 1.000000 69.000000\n",
+ "50% 98.300000 1.500000 74.000000\n",
+ "75% 98.700000 2.000000 79.000000\n",
+ "max 100.800000 2.000000 89.000000"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Temperature | \n",
+ " Gender | \n",
+ " Heart Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 96.3 | \n",
+ " 1 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 96.7 | \n",
+ " 1 | \n",
+ " 71 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 96.9 | \n",
+ " 1 | \n",
+ " 74 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 97.0 | \n",
+ " 1 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 97.1 | \n",
+ " 1 | \n",
+ " 73 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Temperature Gender Heart Rate\n",
+ "0 96.3 1 70\n",
+ "1 96.7 1 71\n",
+ "2 96.9 1 74\n",
+ "3 97.0 1 80\n",
+ "4 97.1 1 73"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head() #Temperature温度,Gender男女,Heart Rate心跳频率"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 体温的分布是正态的吗?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Average (Mu):98.24923076923076/Standard Deviation: 0.7303577789050376\n"
+ ]
+ }
+ ],
+ "source": [
+ "observed_temperatures = df['Temperature'].sort_values()\n",
+ "bin_val = np.arange(start=observed_temperatures.min(), \n",
+ " stop=observed_temperatures.max(), step=.05)\n",
+ "# 计算均值和标准差\n",
+ "mu, std = np.mean(observed_temperatures), np.std(observed_temperatures)\n",
+ "\n",
+ "#基于均值和标准差画\n",
+ "p = norm.pdf(observed_temperatures, mu, std)\n",
+ "\n",
+ "plt.hist(observed_temperatures, bins=bin_val, normed=True, stacked=True)\n",
+ "plt.plot(observed_temperatures, p, color='red')\n",
+ "plt.xticks(np.arange(95.75,101.25,0.25),rotation=90)\n",
+ "plt.xlabel('Human Body Temperature Distributions')\n",
+ "plt.xlabel('human body temperature')\n",
+ "plt.show()\n",
+ "\n",
+ "print('Average (Mu):'+str(mu)+'/' 'Standard Deviation: '+str(std))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 正态检验"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shapiro-Wilk Stat: 0.9865769743919373 Shapiro-Wilk p_Value: 0.2331680953502655\n",
+ "p: 0.2587479863488212\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "x = observed_temperatures\n",
+ "\n",
+ "shapiro_test, shapiro_p = scipy.stats.shapiro(x)\n",
+ "print('Shapiro-Wilk Stat:', shapiro_test, 'Shapiro-Wilk p_Value:', shapiro_p)\n",
+ "\n",
+ "k2, p = scipy.stats.normaltest(observed_temperatures)\n",
+ "print('p:',p) #大于0.05即可接受为正态分布\n",
+ "\n",
+ "scipy.stats.probplot(observed_temperatures, dist='norm', plot=pylab)\n",
+ "pylab.show() #越接近斜线,越符合正态分布"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 第二种检验正态分布方法"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mean temperature : 98.24923076923076 with standard deviation of +/- 0.730357778905038\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def ecdf(data):\n",
+ " n = len(data)\n",
+ " x = np.sort(data)\n",
+ " y = np.arange(1, n+1)/n\n",
+ " return x,y\n",
+ "\n",
+ "# Compute empirical mean and standard deviationg \n",
+ "\n",
+ "# Number of sample\n",
+ "n = len(df['Temperature'])\n",
+ "#Sample mean \n",
+ "mu = np.mean(df['Temperature'])\n",
+ "#Sample standard deviation\n",
+ "std = np.std(df['Temperature'])\n",
+ "print('Mean temperature :', mu, 'with standard deviation of +/-',std)\n",
+ "\n",
+ "# Random sampling of the data based off of the mean of the data.\n",
+ "normalized_sample = np.random.normal(mu,std,size=10000)\n",
+ "x_temperature,y_temperature = ecdf(df['Temperature'])\n",
+ "normalized_x, normalized_y = ecdf(normalized_sample)\n",
+ "\n",
+ "#Plot the ECDFs\n",
+ "fig = plt.figure(figsize=(8,5))\n",
+ "plt.plot(normalized_x, normalized_y)\n",
+ "plt.plot(x_temperature, y_temperature,marker='.', linestyle='none')\n",
+ "plt.ylabel('ECDF')\n",
+ "plt.xlabel('Temperature')\n",
+ "plt.legend(('Normal Distribution', 'Sample data'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebook_必备数学基础/假设检验章节/normtemp.dat.txt b/notebook_必备数学基础/假设检验章节/normtemp.dat.txt
new file mode 100644
index 0000000..8ee1898
--- /dev/null
+++ b/notebook_必备数学基础/假设检验章节/normtemp.dat.txt
@@ -0,0 +1,131 @@
+96.3 1 70
+96.7 1 71
+96.9 1 74
+97.0 1 80
+97.1 1 73
+97.1 1 75
+97.1 1 82
+97.2 1 64
+97.3 1 69
+97.4 1 70
+97.4 1 68
+97.4 1 72
+97.4 1 78
+97.5 1 70
+97.5 1 75
+97.6 1 74
+97.6 1 69
+97.6 1 73
+97.7 1 77
+97.8 1 58
+97.8 1 73
+97.8 1 65
+97.8 1 74
+97.9 1 76
+97.9 1 72
+98.0 1 78
+98.0 1 71
+98.0 1 74
+98.0 1 67
+98.0 1 64
+98.0 1 78
+98.1 1 73
+98.1 1 67
+98.2 1 66
+98.2 1 64
+98.2 1 71
+98.2 1 72
+98.3 1 86
+98.3 1 72
+98.4 1 68
+98.4 1 70
+98.4 1 82
+98.4 1 84
+98.5 1 68
+98.5 1 71
+98.6 1 77
+98.6 1 78
+98.6 1 83
+98.6 1 66
+98.6 1 70
+98.6 1 82
+98.7 1 73
+98.7 1 78
+98.8 1 78
+98.8 1 81
+98.8 1 78
+98.9 1 80
+99.0 1 75
+99.0 1 79
+99.0 1 81
+99.1 1 71
+99.2 1 83
+99.3 1 63
+99.4 1 70
+99.5 1 75
+96.4 2 69
+96.7 2 62
+96.8 2 75
+97.2 2 66
+97.2 2 68
+97.4 2 57
+97.6 2 61
+97.7 2 84
+97.7 2 61
+97.8 2 77
+97.8 2 62
+97.8 2 71
+97.9 2 68
+97.9 2 69
+97.9 2 79
+98.0 2 76
+98.0 2 87
+98.0 2 78
+98.0 2 73
+98.0 2 89
+98.1 2 81
+98.2 2 73
+98.2 2 64
+98.2 2 65
+98.2 2 73
+98.2 2 69
+98.2 2 57
+98.3 2 79
+98.3 2 78
+98.3 2 80
+98.4 2 79
+98.4 2 81
+98.4 2 73
+98.4 2 74
+98.4 2 84
+98.5 2 83
+98.6 2 82
+98.6 2 85
+98.6 2 86
+98.6 2 77
+98.7 2 72
+98.7 2 79
+98.7 2 59
+98.7 2 64
+98.7 2 65
+98.7 2 82
+98.8 2 64
+98.8 2 70
+98.8 2 83
+98.8 2 89
+98.8 2 69
+98.8 2 73
+98.8 2 84
+98.9 2 76
+99.0 2 79
+99.0 2 81
+99.1 2 80
+99.1 2 74
+99.2 2 77
+99.2 2 66
+99.3 2 68
+99.4 2 77
+99.9 2 79
+100.0 2 78
+100.8 2 77
+
diff --git a/notebook_必备数学基础/假设检验章节/假设检验实例.ipynb b/notebook_必备数学基础/假设检验章节/假设检验实例.ipynb
new file mode 100644
index 0000000..3cedcc9
--- /dev/null
+++ b/notebook_必备数学基础/假设检验章节/假设检验实例.ipynb
@@ -0,0 +1,429 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "数据集下载地址:https://ww2.amstat.org/publications/jse/jse_data_archive.htm\n",
+ "
数据集描述:https://ww2.amstat.org/publications/jse/datasets/normtemp.txt\n",
+ "
数据:normtemp.dat.txt "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import pylab\n",
+ "import math\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "from scipy.stats import norm\n",
+ "import scipy.stats\n",
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('normtemp.dat.txt', sep= ' ',\n",
+ " names=['Temperature','Gender','Heart Rate'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Temperature | \n",
+ " Gender | \n",
+ " Heart Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 130.000000 | \n",
+ " 130.000000 | \n",
+ " 130.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 98.249231 | \n",
+ " 1.500000 | \n",
+ " 73.761538 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.733183 | \n",
+ " 0.501934 | \n",
+ " 7.062077 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 96.300000 | \n",
+ " 1.000000 | \n",
+ " 57.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 97.800000 | \n",
+ " 1.000000 | \n",
+ " 69.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 98.300000 | \n",
+ " 1.500000 | \n",
+ " 74.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 98.700000 | \n",
+ " 2.000000 | \n",
+ " 79.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 100.800000 | \n",
+ " 2.000000 | \n",
+ " 89.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Temperature Gender Heart Rate\n",
+ "count 130.000000 130.000000 130.000000\n",
+ "mean 98.249231 1.500000 73.761538\n",
+ "std 0.733183 0.501934 7.062077\n",
+ "min 96.300000 1.000000 57.000000\n",
+ "25% 97.800000 1.000000 69.000000\n",
+ "50% 98.300000 1.500000 74.000000\n",
+ "75% 98.700000 2.000000 79.000000\n",
+ "max 100.800000 2.000000 89.000000"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Temperature | \n",
+ " Gender | \n",
+ " Heart Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 96.3 | \n",
+ " 1 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 96.7 | \n",
+ " 1 | \n",
+ " 71 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 96.9 | \n",
+ " 1 | \n",
+ " 74 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 97.0 | \n",
+ " 1 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 97.1 | \n",
+ " 1 | \n",
+ " 73 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Temperature Gender Heart Rate\n",
+ "0 96.3 1 70\n",
+ "1 96.7 1 71\n",
+ "2 96.9 1 74\n",
+ "3 97.0 1 80\n",
+ "4 97.1 1 73"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head() #Temperature温度,Gender男女,Heart Rate心跳频率"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 体温的分布是正态的吗?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Average (Mu):98.24923076923076/Standard Deviation: 0.7303577789050376\n"
+ ]
+ }
+ ],
+ "source": [
+ "observed_temperatures = df['Temperature'].sort_values()\n",
+ "bin_val = np.arange(start=observed_temperatures.min(), \n",
+ " stop=observed_temperatures.max(), step=.05)\n",
+ "# 计算均值和标准差\n",
+ "mu, std = np.mean(observed_temperatures), np.std(observed_temperatures)\n",
+ "\n",
+ "#基于均值和标准差画\n",
+ "p = norm.pdf(observed_temperatures, mu, std)\n",
+ "\n",
+ "plt.hist(observed_temperatures, bins=bin_val, normed=True, stacked=True)\n",
+ "plt.plot(observed_temperatures, p, color='red')\n",
+ "plt.xticks(np.arange(95.75,101.25,0.25),rotation=90)\n",
+ "plt.xlabel('Human Body Temperature Distributions')\n",
+ "plt.xlabel('human body temperature')\n",
+ "plt.show()\n",
+ "\n",
+ "print('Average (Mu):'+str(mu)+'/' 'Standard Deviation: '+str(std))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 正态检验"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shapiro-Wilk Stat: 0.9865769743919373 Shapiro-Wilk p_Value: 0.2331680953502655\n",
+ "p: 0.2587479863488212\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "x = observed_temperatures\n",
+ "\n",
+ "shapiro_test, shapiro_p = scipy.stats.shapiro(x)\n",
+ "print('Shapiro-Wilk Stat:', shapiro_test, 'Shapiro-Wilk p_Value:', shapiro_p)\n",
+ "\n",
+ "k2, p = scipy.stats.normaltest(observed_temperatures)\n",
+ "print('p:',p) #大于0.05即可接受为正态分布\n",
+ "\n",
+ "scipy.stats.probplot(observed_temperatures, dist='norm', plot=pylab)\n",
+ "pylab.show() #越接近斜线,越符合正态分布"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 第二种检验正态分布方法"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mean temperature : 98.24923076923076 with standard deviation of +/- 0.730357778905038\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def ecdf(data):\n",
+ " n = len(data)\n",
+ " x = np.sort(data)\n",
+ " y = np.arange(1, n+1)/n\n",
+ " return x,y\n",
+ "\n",
+ "# Compute empirical mean and standard deviationg \n",
+ "\n",
+ "# Number of sample\n",
+ "n = len(df['Temperature'])\n",
+ "#Sample mean \n",
+ "mu = np.mean(df['Temperature'])\n",
+ "#Sample standard deviation\n",
+ "std = np.std(df['Temperature'])\n",
+ "print('Mean temperature :', mu, 'with standard deviation of +/-',std)\n",
+ "\n",
+ "# Random sampling of the data based off of the mean of the data.\n",
+ "normalized_sample = np.random.normal(mu,std,size=10000)\n",
+ "x_temperature,y_temperature = ecdf(df['Temperature'])\n",
+ "normalized_x, normalized_y = ecdf(normalized_sample)\n",
+ "\n",
+ "#Plot the ECDFs\n",
+ "fig = plt.figure(figsize=(8,5))\n",
+ "plt.plot(normalized_x, normalized_y)\n",
+ "plt.plot(x_temperature, y_temperature,marker='.', linestyle='none')\n",
+ "plt.ylabel('ECDF')\n",
+ "plt.xlabel('Temperature')\n",
+ "plt.legend(('Normal Distribution', 'Sample data'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}