|
|
|
@ -0,0 +1,224 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 单因素方差分析"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"0.10150375939849626\n",
|
|
|
|
|
"0.9038208903685354\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# 呷哺呷哺3个城市不同用户评分\n",
|
|
|
|
|
"from scipy.stats import f_oneway\n",
|
|
|
|
|
"a = [10,9,9,8,8,7,7,8,8,9] # 3个城市每个城市10个人评价\n",
|
|
|
|
|
"b = [10,8,9,8,7,7,7,8,9,9]\n",
|
|
|
|
|
"c = [9,9,8,8,8,7,6,9,8,9]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"f,p = f_oneway(a,b,c)\n",
|
|
|
|
|
"print(f) # 统计量\n",
|
|
|
|
|
"print(p) # 概率值"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"不能认为所检验的因素对观测值有显著影响"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 多因素方差分析"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>E</th>\n",
|
|
|
|
|
" <th>I</th>\n",
|
|
|
|
|
" <th>S</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" E I S\n",
|
|
|
|
|
"0 5 5 5\n",
|
|
|
|
|
"1 5 4 5\n",
|
|
|
|
|
"2 5 3 4\n",
|
|
|
|
|
"3 5 2 3\n",
|
|
|
|
|
"4 5 1 2"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# 呷哺呷哺2因素:环境等级,食材等级\n",
|
|
|
|
|
"from scipy import stats\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"from statsmodels.formula.api import ols\n",
|
|
|
|
|
"from statsmodels.stats.anova import anova_lm\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"environmental = [5,5,5,5,5,4,4,4,4,4,3,3,3,3,3,2,2,2,2,2,1,1,1,1,1]\n",
|
|
|
|
|
"ingredients = [5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1]\n",
|
|
|
|
|
"score = [5,5,4,3,2,5,4,4,3,2,4,4,3,3,2,4,3,2,2,2,3,3,3,2,1]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"data = {'E':environmental, 'I':ingredients, 'S':score}\n",
|
|
|
|
|
"df = pd.DataFrame(data)\n",
|
|
|
|
|
"df.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"符号意义:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"(~)隔离因变量和自变量(左边因变量,右边自变量)\n",
|
|
|
|
|
"<br>(+)分隔各个自变量\n",
|
|
|
|
|
"<br>(:)表示两个自变量交互影响"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
" df sum_sq mean_sq F PR(>F)\n",
|
|
|
|
|
"E 1.0 7.22 7.220000 54.539568 2.896351e-07\n",
|
|
|
|
|
"I 1.0 18.00 18.000000 135.971223 1.233581e-10\n",
|
|
|
|
|
"E:I 1.0 0.64 0.640000 4.834532 3.924030e-02\n",
|
|
|
|
|
"Residual 21.0 2.78 0.132381 NaN NaN\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"formula = 'S~E+I+E:I' #指定公式\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"model = ols(formula, df).fit()\n",
|
|
|
|
|
"results = anova_lm(model)\n",
|
|
|
|
|
"print(results)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"P值很小,拒绝原假设,F值越大。\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"表示该因素对结果影响越大,分别是E和I\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"E:I行的P值表示交互情况,小于0.05,之间并无交互"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.7.3"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|