From b02e5562811ee3af6c1e53c139ab659ca421148b Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Wed, 28 Apr 2021 12:49:21 +0800 Subject: [PATCH] =?UTF-8?q?Create.=207.=E6=94=AF=E6=8C=81=E5=90=91?= =?UTF-8?q?=E9=87=8F=E6=9C=BA=E2=80=94=E2=80=94=E5=8D=87=E7=BB=B4=E6=89=93?= =?UTF-8?q?=E5=87=BB.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../第一章——线性回归原理.md | 157 ++++++++++++++++++ .../.idea/.gitignore | 3 + .../inspectionProfiles/profiles_settings.xml | 6 + .../.idea/misc.xml | 7 + .../.idea/modules.xml | 8 + .../.idea/vcs.xml | 6 + ...二章——手写线性回归算法.iml | 15 ++ .../MultivariateLinearRegression.py | 134 +++++++++++++++ .../UnivariateLinearRegression.py | 58 +++++++ .../linear_regression.cpython-37.pyc | Bin 0 -> 3637 bytes .../LinearRegression/linear_regression.py | 114 +++++++++++++ .../data/world-happiness-report-2017.csv | 156 +++++++++++++++++ .../util/__init__.py | 0 .../util/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 147 bytes .../util/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 245 bytes .../util/features/__init__.py | 0 .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 156 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 254 bytes .../generate_polynomials.cpython-36.pyc | Bin 0 -> 1226 bytes .../generate_polynomials.cpython-37.pyc | Bin 0 -> 1324 bytes .../generate_sinusoids.cpython-36.pyc | Bin 0 -> 538 bytes .../generate_sinusoids.cpython-37.pyc | Bin 0 -> 636 bytes .../__pycache__/normalize.cpython-36.pyc | Bin 0 -> 529 bytes .../__pycache__/normalize.cpython-37.pyc | Bin 0 -> 627 bytes .../prepare_for_training.cpython-36.pyc | Bin 0 -> 909 bytes .../prepare_for_training.cpython-37.pyc | Bin 0 -> 1007 bytes .../util/features/generate_polynomials.py | 44 +++++ .../util/features/generate_sinusoids.py | 16 ++ .../util/features/normalize.py | 24 +++ .../util/features/prepare_for_training.py | 41 +++++ .../util/hopythesis/__init__.py | 0 .../7.支持向量机——升维打击.md | 117 +++++++++++++ 32 files changed, 906 insertions(+) create mode 100644 机器学习算法理论及应用/其它/第一章——线性回归原理.md create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/.gitignore create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/inspectionProfiles/profiles_settings.xml create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/misc.xml create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/modules.xml create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/vcs.xml create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/第二章——手写线性回归算法.iml create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/__pycache__/linear_regression.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/linear_regression.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/data/world-happiness-report-2017.csv create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__init__.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__pycache__/__init__.cpython-36.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__pycache__/__init__.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__init__.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/__init__.cpython-36.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/__init__.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/generate_polynomials.cpython-36.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/generate_polynomials.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/generate_sinusoids.cpython-36.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/generate_sinusoids.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/normalize.cpython-36.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/normalize.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/prepare_for_training.cpython-36.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/prepare_for_training.cpython-37.pyc create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/generate_polynomials.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/generate_sinusoids.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/normalize.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/prepare_for_training.py create mode 100644 机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/hopythesis/__init__.py create mode 100644 机器学习算法理论及应用/李航——统计学习方法/7.支持向量机——升维打击.md diff --git a/机器学习算法理论及应用/其它/第一章——线性回归原理.md b/机器学习算法理论及应用/其它/第一章——线性回归原理.md new file mode 100644 index 0000000..abc40ec --- /dev/null +++ b/机器学习算法理论及应用/其它/第一章——线性回归原理.md @@ -0,0 +1,157 @@ +# 第一章——线性回归原理 + +### 线性回归概述 + +#### 例子: + +- 数据:工资和年龄(两个特征) +- 目标:预测银行会贷款给我多少钱(标签) +- 考虑:工资和年龄都会影响最终银行贷款的结果,那么它们各自有多大的影响被?(参数) + +| 工资 X1 | 年龄 X2 | 额度 Y | +| ------- | ------- | ------ | +| 4000 | 25 | 20000 | +| 8000 | 30 | 70000 | +| 7500 | 33 | 50000 | + +其中工资、年龄是特征,用来预测额度,而我们不可能直接拿工资 × 年龄,因为明显工资更重要些,那么可能建成的方程是 Y = (X1 × θ1) × (X2 × θ1),其中θ就是各种特征的权重,那么最终我们要求解的就是各种的θ。 + +而线性回归就说得到每个数据最终的预测Y(具体的值),除了回归还有分类,分类是离散型的0/1等固定值的分类。 + +### 通俗理解 + +- X1,X2就是我们的两个特征工资和年龄,Y是银行最终会借给我们额度 +- 找到最合适的一条线,来拟合我们的数据点 + +![1613962795506](assets/1613962795506.png) + +> 红色的点是数据,即前面的特征等 + +当前的数据是线性的,也就是数据不能映射在同一个平面。那么 Y = (X1 × θ1) × (X2 × θ1)就不能覆盖所有的点进行计算。怎么样解决这个问题,或者说如果我们能尽可能的满足绝大多数数据点,是否就可以了呢。 + + + +### 误差 + +#### 误差项公式 + +接着上面的问题,什么样的平面才是最合理最满足的呢 + +- 假设 θ1是工资的参数, θ2是年龄的参数 +- 拟合的平面:h θ(x) = θ0 + θ1X1 + θ2X2 + - θ0是偏置项,不管θ1和θ2等什么变化,θ0的变化会影响平面向上或者向下浮动,对结果做微调 + - 上面的方程可能无法形成矩阵相乘的形式,因为θ0没有X0,我们可以添加一个不影响整体的X0,以达到矩阵相乘的效果 +- 整合:![1613963456265](assets/1613963456265.png) + +- 真实值和预测值之间肯定要存在差异的(用ε来表示该误差) + +- 对于每个样本:![1613965126989](assets/1613965126989.png) + + > y表示真实值,![1613965189106](assets/1613965189106.png)(第二项)表示预测值,ε表示误差值,即预测值和真实值之间有一个误差项,其中 i 表示每个样本之间都有自己的真实值、预测值、误差项 + +误差项越小,代表预测的越准确。 + +#### 独立同分布的意义 + +- 误差 ε(i) 是独立且具有相同的分布,并服从均值为0方差为θ平方的高斯分布 + + > 我们拆开上面的话 + + - 独立:小明和小红一起来贷款,他们没关系 + - 同分布:他们都是去同一个银行 + - 高斯分布:银行可能会多给,也可能会少给,但绝大多数情况下这个浮动不会太大,极小情况下浮动会比较大,符合正常情况 + + ![1613977618784](assets/1613977618784.png) + + 现实中也很难有绝对的高斯分布,大多数是近似高斯分布,也就是我们算法推导的时候也很难得到一个完全正确的答案,只有最接近的答案,也就是存在误差。 + +#### 似然函数的作用 + +- 预测值与误差:![1613978116054](assets/1613978116054.png)(1) + + > y是真实值、x是预测值、ε误差值,现在我们要求的就是θ,它应该怎么求解 + +- 由于误差服从高斯分布:![1613978136136](assets/1613978136136.png)(2) + + > 高斯分布的公式,这里我们要求的是θ,所以把θ移动到左边,变成y - θX = ε,即演变成 + +- 将(1)式带入(2)式:![1613978160407](assets/1613978160407.png)(3) + + > 这里我们希望左边的x和θ组合完后,和真实值y越解决越好,即成为y的可能性越大越好 + +- 似然函数:![1613978566334](assets/1613978566334.png) + + 解释:为什么引入,什么样的参数跟我们的数据组合后恰好是真实值 + +- 对数似然:![1613978607945](assets/1613978607945.png) + + 解释:乘法难解,加法就容易了,对数里乘法可以转换成加法 + + - 展开化简:![1613980700874](assets/1613980700874.png) + + - 目标:让似然函数(对数变换后也一样)越大越好 + + ![1613980821171](assets/1613980821171.png)(最小二乘法) + +#### 参数求解 + +- 目标函数:![1614044218548](assets/1614044218548.png) +- 求偏导:![1614044234709](assets/1614044234709.png) +- 偏导等于0的最优解:![1614044250549](assets/1614044250549.png) + + + +### 梯度下降 + +#### 通俗理解 + +- 引入:当我们得到了一个目标函数后,如何求解?(并不一定可解,线性回归可以当做是一个特例) +- 常规套路:机器学习的套路就是我们交给机器一堆数据,然后告诉它什么样的学习方式是对的(目标函数),然后让它朝着这个方向去做 +- 如何优化:一步步的完成迭代。![1614051473653](assets/1614051473653.png) + +#### 参数更新方法 + +- 目标函数:![1614053992861](assets/1614053992861.png) + + > θ0和θ1分别得出方向,最终找到综合的结果。 + +- 寻找山谷的最低点,也就是我们的目标函数终点 + +- 下山分多步走(更新参数) + + 1. 找到最合适的方向 + 2. 每次走一小步 + 3. 按照方向和步伐更新参数 + +![1614054121928](assets/1614054121928.png) + +梯度下降,目标函数:![1614054304093](assets/1614054304093.png) + +- 批量梯度下降:![1614054320647](assets/1614054320647.png) + + (容易得到最优解,但由于每次考虑所有样本,速度很慢) + +- 随机梯度下降:![1614054359779](assets/1614054359779.png) + + (每次找到一个样本,迭代速度快,但不一定每次都朝着收敛的方向) + +- 小批量梯度下降发:![1614054402887](assets/1614054402887.png) + + > 简化成代码即 θ = θ - α×(1/n) × ( (残差×数据)矩阵 ) + > + > 残差=![1614131914417](assets/1614131914417.png) + + (每次更新选择一小部分数据来算) + + + +#### 学习率(步长) + +> 上面小批量梯度公式里的α + +- 学习率(步长):对结果会产生巨大的影响,一般小一些 +- 如何选择:从小的开始,知道不能再小 +- 批处理数:32、64、128都可以,很多时候还要考虑资源和时间 + +![1614062404175](assets/1614062404175.png) + diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/.gitignore b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/.gitignore new file mode 100644 index 0000000..0e40fe8 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/.gitignore @@ -0,0 +1,3 @@ + +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/inspectionProfiles/profiles_settings.xml b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/misc.xml b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/misc.xml new file mode 100644 index 0000000..ba24381 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/modules.xml b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/modules.xml new file mode 100644 index 0000000..ea52c5e --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/vcs.xml b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/vcs.xml new file mode 100644 index 0000000..b2bdec2 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/第二章——手写线性回归算法.iml b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/第二章——手写线性回归算法.iml new file mode 100644 index 0000000..5c309c6 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/.idea/第二章——手写线性回归算法.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py new file mode 100644 index 0000000..f491fd3 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py @@ -0,0 +1,134 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import plotly +import plotly.graph_objs as go + +from linear_regression import LinearRegression +plotly.offline.init_notebook_mode() # 在线显示图标,更多功能 + +data = pd.read_csv('../data/world-happiness-report-2017.csv') + +train_data = data.sample(frac=0.8) +test_data = data.drop(train_data.index) + +# 与单特征模型相比,只是多了一个特征列 +input_param_name_1 = 'Economy..GDP.per.Capita.' +input_param_name_2 = 'Freedom' +output_param_name = 'Happiness.Score' +# 双特征的loss为:0.08517538069974877 +x_train = train_data[[input_param_name_1, input_param_name_2]].values +# 全特征的loss为:0.0019415807477718364 +# feat_list = list(train_data.columns.drop(['Happiness.Score','Country'])) +# x_train = train_data[feat_list].values +y_train = train_data[[output_param_name]].values + +x_test = test_data[[input_param_name_1, input_param_name_2]].values +y_test = test_data[[output_param_name]].values + +# Configure the plot with training dataset. +plot_training_trace = go.Scatter3d( + x=x_train[:, 0].flatten(), + y=x_train[:, 1].flatten(), + z=y_train.flatten(), + name='Training Set', + mode='markers', + marker={ + 'size': 10, + 'opacity': 1, + 'line': { + 'color': 'rgb(255, 255, 255)', + 'width': 1 + }, + } +) + +plot_test_trace = go.Scatter3d( + x=x_test[:, 0].flatten(), + y=x_test[:, 1].flatten(), + z=y_test.flatten(), + name='Test Set', + mode='markers', + marker={ + 'size': 10, + 'opacity': 1, + 'line': { + 'color': 'rgb(255, 255, 255)', + 'width': 1 + }, + } +) + +plot_layout = go.Layout( + title='Date Sets', + scene={ + 'xaxis': {'title': input_param_name_1}, + 'yaxis': {'title': input_param_name_2}, + 'zaxis': {'title': output_param_name} + }, + margin={'l': 0, 'r': 0, 'b': 0, 't': 0} +) + +plot_data = [plot_training_trace, plot_test_trace] + +plot_figure = go.Figure(data=plot_data, layout=plot_layout) + +plotly.offline.plot(plot_figure) + +num_iterations = 500 +learning_rate = 0.01 +polynomial_degree = 0 +sinusoid_degree = 0 + +linear_regression = LinearRegression(x_train, y_train, polynomial_degree, sinusoid_degree) + +(theta, cost_history) = linear_regression.train(learning_rate, num_iterations) + +print('开始时的损失:', cost_history[0]) +print('训练后的损失:', cost_history[-1]) + +plt.plot(range(num_iterations), cost_history) +plt.xlabel('Iterations') +plt.ylabel('Cost') +plt.title('Gradient Descent') +plt.show() + +predictions_num = 10 + +x_min = x_train[:, 0].min() +x_max = x_train[:, 0].max() + +y_min = x_train[:, 1].min() +y_max = x_train[:, 1].max() + +x_axis = np.linspace(x_min, x_max, predictions_num) +y_axis = np.linspace(y_min, y_max, predictions_num) + +x_predictions = np.zeros((predictions_num * predictions_num, 1)) +y_predictions = np.zeros((predictions_num * predictions_num, 1)) + +x_y_index = 0 +for x_index, x_value in enumerate(x_axis): + for y_index, y_value in enumerate(y_axis): + x_predictions[x_y_index] = x_value + y_predictions[x_y_index] = y_value + x_y_index += 1 + +z_predictions = linear_regression.predict(np.hstack((x_predictions, y_predictions))) + +plot_predictions_trace = go.Scatter3d( + x=x_predictions.flatten(), + y=y_predictions.flatten(), + z=z_predictions.flatten(), + name='Prediction Plane', + mode='markers', + marker={ + 'size': 1, + }, + opacity=0.8, + surfaceaxis=2, +) + +plot_data = [plot_training_trace, plot_test_trace, plot_predictions_trace] +plot_figure = go.Figure(data=plot_data, layout=plot_layout) +plotly.offline.plot(plot_figure) diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py new file mode 100644 index 0000000..8a42c5c --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py @@ -0,0 +1,58 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from linear_regression import LinearRegression +data = pd.read_csv('../data/world-happiness-report-2017.csv') # 导入数据 +# 得到训练和测试数据,以8:2切分 +train_data = data.sample(frac=0.8) +test_data = data.drop(train_data.index) + +input_param_name = 'Economy..GDP.per.Capita.' # 特征features +output_param_name = 'Happiness.Score' # 标签label + +x_train = train_data[[input_param_name]].values # 构建数据 +y_train = train_data[[output_param_name]].values + +x_test = test_data[[input_param_name]].values +y_test = test_data[[output_param_name]].values + +# 可视化展示 run, 可以看到训练数据和预测数据的分布 +plt.scatter(x_train, y_train, label='Train data') +plt.scatter(x_test, y_test, label='Test data') +plt.xlabel(input_param_name) +plt.ylabel(output_param_name) +plt.title('Happy') +plt.legend() +plt.show() + +# 训练线性回归模型 +num_iterations = 500 # 迭代次数 +learning_rate = 0.01 # 学习率 + +linear_regression = LinearRegression(x_train, y_train) # 初始化模型 +(theta, cost_history) = linear_regression.train(learning_rate, num_iterations) + +print('开始时的损失:', cost_history[0]) +print('训练后的损失:', cost_history[-1]) + +plt.plot(range(num_iterations), cost_history) +plt.xlabel('Iteration') +plt.ylabel('Cost') +plt.title('GD') +plt.show() + +# 测试线性回归模型 +predictions_num = 100 # 预测100个 +# 拿最大和最小值画一条线 +x_predictions = np.linspace(x_train.min(), x_train.max(), predictions_num).reshape(predictions_num, 1) +y_predictions = linear_regression.predict(x_predictions) + +plt.scatter(x_train, y_train, label='Train data') +plt.scatter(x_test, y_test, label='Test data') +plt.plot(x_predictions, y_predictions, 'r', label='Prediction') +plt.xlabel(input_param_name) +plt.ylabel(output_param_name) +plt.title('Happy') +plt.legend() +plt.show() diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/__pycache__/linear_regression.cpython-37.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/__pycache__/linear_regression.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ba02d412a1ba17ed00b447d5e0a2b1c852c5bed GIT binary patch literal 3637 zcmbtW-EZ6073ZZ$eOYpx)J>CSD3rFyCTunKf$c#kie)WO6f5%3de|T+V_99=k|v4j zB{kFr1-P{v$F*GtO`30~QWhj>mpB=+VzHAn|H1wMK5ix@J@sXK-#M4kRV}ALfl=Zi zFYo2$y}$GOopXMZOvWWR%tya`=as#Z^lxnRj}YAa8cyp77%EXxma+t{a#^l~vLPb9 zF3}KG79^_7%UOkzx23e&g|X7HG;zmk%&6(i(56hL*-S51i`D5o_Vy1k|0Fyy_}p2U zKF#MO*$~}N!!)u$VCE=|El63F#_0%L!!$vYaE%yII!gB}Nb@8cqhoY0Jd4wP^eMQG z&~aGJGw#@%#j2sRw~T3KSXR-jG6El)%!hl6z}@5ht8iNHfU%{f2k-^GN1Z>EOf>mIgq^U4g(!!1k7Wzu06s6>#)QH)!X55KUG9|YN^o>v% z))cBAh9!LS@M1jGJAO9d-P!a%yX&uRbRJ*ov{t%T7rbi=?NcZG)2rK$E{lObpYT39 z>3x09J9*b%n)5F&wJ$ETmo~kxKiIziB}^a&{$j#Ax$fOq_Eye%Yac>S`|~fl=f8@J z0aUm3Oz=tZtY(&Hs%E9Amo_` zdez-0n$-AnQMbX%?%rLyarX?Z*B#xtB3HEvdd+aNG5wcK>kF4W)69WA~I>_z3gG+8iAHuKWaTCF?-?VNvk%e!#X zyM29ov)SG_=imDb2-Dfv^431~w$8NA+|0Fa-P+z-Y5%qPuemeu_m`Hv6BpWBkNvqD z-lZ$vqfdj^a>EkImAPg#CPXt)n{lI>2J5sn4FRzBgOQSPl90pXTR%BLW)BX}H_?By z`yX&>9%YY*(mT2Ia6TuHp4^ffAgdCHa0&%O>ZhPnu&%RlyZvC^zuEHE&wcZ7#b3Gs zit(@B@wTpS-(T)tSPcZiJ?G6m^gdnjS3mcz{SEHRrezH@pqFa}J(KDo_{pO90=led z8{iONl9fqy9^c-6@E8A9>)R4b%kJ1T(`nJD+8_Wc4_CGa7MfPr7+hSH&krQqF`Jvn zo0hE=z~SIcPKUre#oys^@-W<5h>jEiIQQ>@lMfOwA}Nt?9L5aXOiPW>`#-fMN1i9I zNylGoKy+#nhqMThoKO>DSQFuerzN!ok%>s}js6~bIMg4*1Of^AVOM2(b=q)~0&B}QYHnDs)r=~2lN`;ddNq%}8VEG4 zawo87U_0ypnuB0mq75O=kas_Z)8c6`rjSt*C(n}E!@E!y3+OpJkAVpf)PqPNe}D!6 z3+&}7K(=KLHa=50uMv9nfP_vdDGe2p2BESeACkr;JKT&oN(mB6NovWZ=(q&qa0Db8 zitbVB1(;r7xP$aO%=?%A?w`He*=V7%&u&6YK!x19=`XB7C=ZfA=kY1%{T`0wnUWz7 zkELS)vw~bE#I6uEZI?`@!vhz%JPaD|>y=vBu(EL|4QP?O z%vGsTw)J2R42}aeupdAZ32otLrEo$X1K)(@=gI7I-|ZcwhtLMP1vkL8@hiA-ND1l~ z$ju>}fD#}pmqIOoA01*VP0ZvERj5H~Gh7(iWZf>7C;Am>z$}j16J~d#F9#cV1vkpo5haPx{{UTGQRM&t literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/linear_regression.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/linear_regression.py new file mode 100644 index 0000000..3c31e09 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/LinearRegression/linear_regression.py @@ -0,0 +1,114 @@ +import numpy as np +from util.features import prepare_for_training + + +class LinearRegression: + def __init__(self, data, labels, polynomial_degree=0, sinusoid_degree=0, normalize_data=True): + """: + 1.对数据进行预处理操作 + 2.先得到所有的特征个数 + 3.初始化参数矩阵 + + data:数据 + polynomial_degree: 是否做额外变换 + sinusoid_degree: 是否做额外变换 + normalize_data: 是否标准化数据 + """ + (data_processed, + features_mean, + features_deviation) = prepare_for_training.prepare_for_training(data, polynomial_degree, sinusoid_degree, + normalize_data) + + self.data = data_processed + self.labels = labels + self.features_mean = features_mean + self.features_deviation = features_deviation + self.polynomial_degree = polynomial_degree + self.sinusoid_degree = sinusoid_degree + self.normalize_data = normalize_data + + num_features = self.data.shape[1] + self.theta = np.zeros((num_features, 1)) + + def train(self, alpha, num_iterations=500): + """ + 训练模块,执行梯度下降得到theta值和损失值loss + + alpha: 学习率 + num_iterations: 迭代次数 + """ + cost_history = self.gradient_descent(alpha, num_iterations) + return self.theta, cost_history + + def gradient_descent(self, alpha, num_iterations): + """ + 实际迭代模块 + + alpha: 学习率 + num_iterations: 迭代次数 + + :return: 返回损失值 loss + """ + cost_history = [] # 收集每次的损失值 + for _ in range(num_iterations): # 开始迭代 + self.gradient_step(alpha) # 每次更新theta + cost_history.append(self.cost_function(self.data, self.labels)) + return cost_history + + def gradient_step(self, alpha): + """ + 梯度下降参数更新计算方法,注意是矩阵运算 + + alpha: 学习率 + """ + num_examples = self.data.shape[0] # 当前样本个数 + # 根据当前数据和θ获取预测值 + prediction = LinearRegression.hypothesis(self.data, self.theta) + delta = prediction - self.labels # 残差,即预测值减去真实值 + theta = self.theta + # 依照小批量梯度下降法,写代码表示 + theta = theta - alpha * (1/num_examples)*(np.dot(delta.T, self.data)).T + self.theta = theta # 计算完theta后更新当前theta + + def cost_function(self, data, labels): + """ + 损失计算方法,计算平均的损失而不是每个数据的损失值 + """ + num_examples = data.shape[0] + delta = LinearRegression.hypothesis(data, self.theta) - labels # 预测值-真实值 得到残差 + cost = np.dot(delta, delta.T) # 损失值 + return cost[0][0] + + @staticmethod + def hypothesis(data, theta): + """ + 获取预测值 + + :param data: 矩阵数据 + :param theta: 权重θ + :return: 返回预测值 + """ + predictions = np.dot(data, theta) + return predictions + + def get_cost(self, data, labels): + """ + 得到当前损失 + """ + data_processed = prepare_for_training.prepare_for_training(data, + self.polynomial_degree, + self.sinusoid_degree, + self.normalize_data)[0] + return self.cost_function(data_processed, labels) + + def predict(self, data): + """ + 用训练的参数模型,预测得到回归值的结果 + """ + data_processed = prepare_for_training.prepare_for_training(data, + self.polynomial_degree, + self.sinusoid_degree, + self.normalize_data)[0] + predictions = LinearRegression.hypothesis(data_processed, self.theta) + + return predictions diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/data/world-happiness-report-2017.csv b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/data/world-happiness-report-2017.csv new file mode 100644 index 0000000..efbb9ec --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/data/world-happiness-report-2017.csv @@ -0,0 +1,156 @@ +"Country","Happiness.Rank","Happiness.Score","Whisker.high","Whisker.low","Economy..GDP.per.Capita.","Family","Health..Life.Expectancy.","Freedom","Generosity","Trust..Government.Corruption.","Dystopia.Residual" +"Norway",1,7.53700017929077,7.59444482058287,7.47955553799868,1.61646318435669,1.53352355957031,0.796666502952576,0.635422587394714,0.36201223731041,0.315963834524155,2.27702665328979 +"Denmark",2,7.52199983596802,7.58172806486487,7.46227160707116,1.48238301277161,1.55112159252167,0.792565524578094,0.626006722450256,0.355280488729477,0.40077006816864,2.31370735168457 +"Iceland",3,7.50400018692017,7.62203047305346,7.38596990078688,1.480633020401,1.6105740070343,0.833552122116089,0.627162635326385,0.475540220737457,0.153526559472084,2.32271528244019 +"Switzerland",4,7.49399995803833,7.56177242040634,7.42622749567032,1.56497955322266,1.51691174507141,0.858131289482117,0.620070576667786,0.290549278259277,0.367007285356522,2.2767162322998 +"Finland",5,7.4689998626709,7.52754207581282,7.41045764952898,1.44357192516327,1.5402467250824,0.80915766954422,0.617950856685638,0.24548277258873,0.38261154294014,2.4301815032959 +"Netherlands",6,7.3769998550415,7.42742584124207,7.32657386884093,1.50394463539124,1.42893922328949,0.810696125030518,0.585384488105774,0.470489829778671,0.282661825418472,2.29480409622192 +"Canada",7,7.31599998474121,7.38440283536911,7.24759713411331,1.47920441627502,1.48134899139404,0.83455765247345,0.611100912094116,0.435539722442627,0.287371516227722,2.18726444244385 +"New Zealand",8,7.31400012969971,7.3795104418695,7.24848981752992,1.40570604801178,1.54819512367249,0.816759705543518,0.614062130451202,0.500005125999451,0.382816702127457,2.0464563369751 +"Sweden",9,7.28399991989136,7.34409487739205,7.22390496239066,1.49438726902008,1.47816216945648,0.830875158309937,0.612924098968506,0.385399252176285,0.384398728609085,2.09753799438477 +"Australia",10,7.28399991989136,7.35665122494102,7.2113486148417,1.484414935112,1.51004195213318,0.84388679265976,0.601607382297516,0.477699249982834,0.301183730363846,2.06521081924438 +"Israel",11,7.21299982070923,7.27985325649381,7.14614638492465,1.37538242340088,1.37628996372223,0.83840399980545,0.405988603830338,0.330082654953003,0.0852421000599861,2.80175733566284 +"Costa Rica",12,7.0789999961853,7.16811166629195,6.98988832607865,1.10970628261566,1.41640365123749,0.759509265422821,0.580131649971008,0.214613229036331,0.100106589496136,2.89863920211792 +"Austria",13,7.00600004196167,7.07066981211305,6.94133027181029,1.48709726333618,1.4599449634552,0.815328419208527,0.567766189575195,0.316472321748734,0.221060365438461,2.1385064125061 +"United States",14,6.99300003051758,7.07465674757957,6.91134331345558,1.54625928401947,1.41992056369781,0.77428662776947,0.505740523338318,0.392578780651093,0.135638788342476,2.2181134223938 +"Ireland",15,6.97700023651123,7.04335166752338,6.91064880549908,1.53570663928986,1.55823111534119,0.80978262424469,0.573110342025757,0.42785832285881,0.29838815331459,1.77386903762817 +"Germany",16,6.95100021362305,7.00538156926632,6.89661885797977,1.48792338371277,1.47252035140991,0.798950731754303,0.562511384487152,0.336269170045853,0.276731938123703,2.01576995849609 +"Belgium",17,6.89099979400635,6.95582075044513,6.82617883756757,1.46378076076508,1.46231269836426,0.818091869354248,0.539770722389221,0.231503337621689,0.251343131065369,2.12421035766602 +"Luxembourg",18,6.86299991607666,6.92368609987199,6.80231373228133,1.74194359779358,1.45758366584778,0.845089495182037,0.59662789106369,0.283180981874466,0.31883442401886,1.61951208114624 +"United Kingdom",19,6.71400022506714,6.78379176110029,6.64420868903399,1.44163393974304,1.49646008014679,0.805335938930511,0.508190035820007,0.492774158716202,0.265428066253662,1.70414352416992 +"Chile",20,6.65199995040894,6.73925056010485,6.56474934071302,1.25278460979462,1.28402495384216,0.819479703903198,0.376895278692245,0.326662421226501,0.0822879821062088,2.50958585739136 +"United Arab Emirates",21,6.64799976348877,6.72204730376601,6.57395222321153,1.62634336948395,1.26641023159027,0.726798236370087,0.60834527015686,0.3609419465065,0.324489563703537,1.734703540802 +"Brazil",22,6.63500022888184,6.72546950161457,6.5445309561491,1.10735321044922,1.43130600452423,0.616552352905273,0.437453746795654,0.16234989464283,0.111092761158943,2.76926708221436 +"Czech Republic",23,6.60900020599365,6.68386246263981,6.5341379493475,1.35268235206604,1.43388521671295,0.754444003105164,0.490946173667908,0.0881067588925362,0.0368729270994663,2.45186185836792 +"Argentina",24,6.59899997711182,6.69008508607745,6.50791486814618,1.18529546260834,1.44045114517212,0.695137083530426,0.494519203901291,0.109457060694695,0.059739887714386,2.61400532722473 +"Mexico",25,6.57800006866455,6.67114890769124,6.48485122963786,1.15318381786346,1.210862159729,0.709978997707367,0.412730008363724,0.120990432798862,0.132774114608765,2.83715486526489 +"Singapore",26,6.57200002670288,6.63672306910157,6.50727698430419,1.69227766990662,1.35381436347961,0.949492394924164,0.549840569496155,0.345965981483459,0.46430778503418,1.21636199951172 +"Malta",27,6.52699995040894,6.59839677289128,6.45560312792659,1.34327983856201,1.48841166496277,0.821944236755371,0.588767051696777,0.574730575084686,0.153066068887711,1.55686283111572 +"Uruguay",28,6.4539999961853,6.54590621769428,6.36209377467632,1.21755969524384,1.41222786903381,0.719216823577881,0.57939225435257,0.175096929073334,0.178061872720718,2.17240953445435 +"Guatemala",29,6.4539999961853,6.56687397271395,6.34112601965666,0.872001945972443,1.25558519363403,0.540239989757538,0.531310617923737,0.283488392829895,0.0772232785820961,2.89389109611511 +"Panama",30,6.4520001411438,6.55713071614504,6.34686956614256,1.23374843597412,1.37319254875183,0.706156134605408,0.550026834011078,0.21055693924427,0.070983923971653,2.30719995498657 +"France",31,6.44199991226196,6.51576780244708,6.36823202207685,1.43092346191406,1.38777685165405,0.844465851783752,0.470222115516663,0.129762306809425,0.172502428293228,2.00595474243164 +"Thailand",32,6.42399978637695,6.50911685571074,6.33888271704316,1.12786877155304,1.42579245567322,0.647239029407501,0.580200731754303,0.572123110294342,0.0316127352416515,2.03950834274292 +"Taiwan Province of China",33,6.42199993133545,6.49459602192044,6.34940384075046,1.43362653255463,1.38456535339355,0.793984234333038,0.361466586589813,0.258360475301743,0.0638292357325554,2.1266074180603 +"Spain",34,6.40299987792969,6.4710548453033,6.33494491055608,1.38439786434174,1.53209090232849,0.888960599899292,0.408781230449677,0.190133571624756,0.0709140971302986,1.92775774002075 +"Qatar",35,6.375,6.56847681432962,6.18152318567038,1.87076568603516,1.27429687976837,0.710098087787628,0.604130983352661,0.330473870038986,0.439299255609512,1.1454644203186 +"Colombia",36,6.35699987411499,6.45202005416155,6.26197969406843,1.07062232494354,1.4021829366684,0.595027923583984,0.477487415075302,0.149014472961426,0.0466687418520451,2.61606812477112 +"Saudi Arabia",37,6.3439998626709,6.44416661202908,6.24383311331272,1.53062355518341,1.28667759895325,0.590148329734802,0.449750572443008,0.147616013884544,0.27343225479126,2.0654296875 +"Trinidad and Tobago",38,6.16800022125244,6.38153389066458,5.95446655184031,1.36135590076447,1.3802285194397,0.519983291625977,0.518630743026733,0.325296461582184,0.00896481610834599,2.05324745178223 +"Kuwait",39,6.10500001907349,6.1919569888711,6.01804304927588,1.63295245170593,1.25969874858856,0.632105708122253,0.496337592601776,0.228289797902107,0.215159550309181,1.64042520523071 +"Slovakia",40,6.09800004959106,6.1773484121263,6.01865168705583,1.32539355754852,1.50505924224854,0.712732911109924,0.295817464590073,0.136544480919838,0.0242108516395092,2.09777665138245 +"Bahrain",41,6.08699989318848,6.17898906782269,5.99501071855426,1.48841226100922,1.32311046123505,0.653133034706116,0.536746919155121,0.172668486833572,0.257042169570923,1.65614938735962 +"Malaysia",42,6.08400011062622,6.17997963652015,5.98802058473229,1.29121541976929,1.28464603424072,0.618784427642822,0.402264982461929,0.416608929634094,0.0656007081270218,2.00444889068604 +"Nicaragua",43,6.07100009918213,6.18658360034227,5.95541659802198,0.737299203872681,1.28721570968628,0.653095960617065,0.447551846504211,0.301674216985703,0.130687981843948,2.51393055915833 +"Ecuador",44,6.00799989700317,6.10584767535329,5.91015211865306,1.00082039833069,1.28616881370544,0.685636222362518,0.4551981985569,0.150112465023994,0.140134647488594,2.29035258293152 +"El Salvador",45,6.00299978256226,6.108635122329,5.89736444279552,0.909784495830536,1.18212509155273,0.596018552780151,0.432452529668808,0.0782579854130745,0.0899809598922729,2.7145938873291 +"Poland",46,5.97300004959106,6.05390834122896,5.89209175795317,1.29178786277771,1.44571197032928,0.699475347995758,0.520342111587524,0.158465966582298,0.0593078061938286,1.79772281646729 +"Uzbekistan",47,5.97100019454956,6.06553757295012,5.876462816149,0.786441087722778,1.54896914958954,0.498272627592087,0.658248662948608,0.415983647108078,0.246528223156929,1.81691360473633 +"Italy",48,5.96400022506714,6.04273690596223,5.88526354417205,1.39506661891937,1.44492328166962,0.853144347667694,0.256450712680817,0.17278964817524,0.0280280914157629,1.81331205368042 +"Russia",49,5.96299982070923,6.03027490749955,5.89572473391891,1.28177809715271,1.46928238868713,0.547349333763123,0.373783111572266,0.0522638224065304,0.0329628810286522,2.20560741424561 +"Belize",50,5.95599985122681,6.19724231779575,5.71475738465786,0.907975316047668,1.08141779899597,0.450191766023636,0.547509372234344,0.240015640854836,0.0965810716152191,2.63195562362671 +"Japan",51,5.92000007629395,5.99071944460273,5.84928070798516,1.41691517829895,1.43633782863617,0.913475871086121,0.505625545978546,0.12057276815176,0.163760736584663,1.36322355270386 +"Lithuania",52,5.90199995040894,5.98266964137554,5.82133025944233,1.31458234786987,1.47351610660553,0.62894994020462,0.234231784939766,0.010164656676352,0.0118656428530812,2.22844052314758 +"Algeria",53,5.87200021743774,5.97828643366694,5.76571400120854,1.09186446666718,1.1462174654007,0.617584645748138,0.233335807919502,0.0694366469979286,0.146096110343933,2.56760382652283 +"Latvia",54,5.84999990463257,5.92026353821158,5.77973627105355,1.26074862480164,1.40471494197845,0.638566970825195,0.325707912445068,0.153074786067009,0.0738427266478539,1.99365520477295 +"South Korea",55,5.83799982070923,5.92255902826786,5.7534406131506,1.40167844295502,1.12827444076538,0.900214076042175,0.257921665906906,0.206674367189407,0.0632826685905457,1.88037800788879 +"Moldova",56,5.83799982070923,5.90837083846331,5.76762880295515,0.728870630264282,1.25182557106018,0.589465200901031,0.240729048848152,0.208779126405716,0.0100912861526012,2.80780839920044 +"Romania",57,5.82499980926514,5.91969415679574,5.73030546173453,1.21768391132355,1.15009129047394,0.685158312320709,0.457003742456436,0.133519917726517,0.00438790069893003,2.17683148384094 +"Bolivia",58,5.82299995422363,5.9039769025147,5.74202300593257,0.833756566047668,1.22761905193329,0.473630249500275,0.558732926845551,0.22556072473526,0.0604777261614799,2.44327902793884 +"Turkmenistan",59,5.82200002670288,5.88518087550998,5.75881917789578,1.13077676296234,1.49314916133881,0.437726080417633,0.41827192902565,0.24992498755455,0.259270340204239,1.83290982246399 +"Kazakhstan",60,5.81899976730347,5.90364177465439,5.73435775995255,1.28455626964569,1.38436901569366,0.606041550636292,0.437454283237457,0.201964423060417,0.119282886385918,1.78489255905151 +"North Cyprus",61,5.80999994277954,5.89736646488309,5.72263342067599,1.3469113111496,1.18630337715149,0.834647238254547,0.471203625202179,0.266845703125,0.155353352427483,1.54915761947632 +"Slovenia",62,5.75799989700317,5.84222516000271,5.67377463400364,1.3412059545517,1.45251882076263,0.790828227996826,0.572575807571411,0.242649093270302,0.0451289787888527,1.31331729888916 +"Peru",63,5.71500015258789,5.81194677859545,5.61805352658033,1.03522527217865,1.21877038478851,0.630166113376617,0.450002878904343,0.126819714903831,0.0470490865409374,2.20726943016052 +"Mauritius",64,5.62900018692017,5.72986219167709,5.52813818216324,1.18939554691315,1.20956099033356,0.638007462024689,0.491247326135635,0.360933750867844,0.0421815551817417,1.6975839138031 +"Cyprus",65,5.62099981307983,5.71469269931316,5.5273069268465,1.35593807697296,1.13136327266693,0.84471470117569,0.355111539363861,0.271254301071167,0.0412379764020443,1.62124919891357 +"Estonia",66,5.61100006103516,5.68813987419009,5.53386024788022,1.32087934017181,1.47667109966278,0.695168316364288,0.479131430387497,0.0988908112049103,0.183248922228813,1.35750865936279 +"Belarus",67,5.56899976730347,5.64611424401402,5.49188529059291,1.15655755996704,1.44494521617889,0.637714266777039,0.295400261878967,0.15513750910759,0.156313821673393,1.72323298454285 +"Libya",68,5.52500009536743,5.67695380687714,5.37304638385773,1.10180306434631,1.35756433010101,0.520169019699097,0.465733230113983,0.152073666453362,0.0926102101802826,1.83501124382019 +"Turkey",69,5.5,5.59486496329308,5.40513503670692,1.19827437400818,1.33775317668915,0.637605607509613,0.300740599632263,0.0466930419206619,0.0996715798974037,1.87927794456482 +"Paraguay",70,5.49300003051758,5.57738126963377,5.40861879140139,0.932537317276001,1.50728487968445,0.579250693321228,0.473507791757584,0.224150657653809,0.091065913438797,1.6853334903717 +"Hong Kong S.A.R., China",71,5.47200012207031,5.54959417313337,5.39440607100725,1.55167484283447,1.26279091835022,0.943062424659729,0.490968644618988,0.374465793371201,0.293933749198914,0.554633140563965 +"Philippines",72,5.42999982833862,5.54533505424857,5.31466460242867,0.85769921541214,1.25391757488251,0.468009054660797,0.585214674472809,0.193513423204422,0.0993318930268288,1.97260475158691 +"Serbia",73,5.39499998092651,5.49156965613365,5.29843030571938,1.06931757926941,1.25818979740143,0.65078467130661,0.208715528249741,0.220125883817673,0.0409037806093693,1.94708442687988 +"Jordan",74,5.33599996566772,5.44841002240777,5.22358990892768,0.991012394428253,1.23908889293671,0.604590058326721,0.418421149253845,0.172170460224152,0.11980327218771,1.79117655754089 +"Hungary",75,5.32399988174438,5.40303970918059,5.24496005430818,1.2860119342804,1.34313309192657,0.687763452529907,0.175863519310951,0.0784016624093056,0.0366369374096394,1.71645927429199 +"Jamaica",76,5.31099987030029,5.58139872848988,5.04060101211071,0.925579309463501,1.36821806430817,0.641022384166718,0.474307239055634,0.233818337321281,0.0552677810192108,1.61232566833496 +"Croatia",77,5.29300022125244,5.39177720457315,5.19422323793173,1.22255623340607,0.96798300743103,0.701288521289825,0.255772292613983,0.248002976179123,0.0431031100451946,1.85449242591858 +"Kosovo",78,5.27899980545044,5.36484799548984,5.19315161541104,0.951484382152557,1.13785350322723,0.541452050209045,0.260287940502167,0.319931447505951,0.0574716180562973,2.01054072380066 +"China",79,5.27299976348877,5.31927808977663,5.2267214372009,1.08116579055786,1.16083741188049,0.741415500640869,0.472787708044052,0.0288068410009146,0.0227942746132612,1.76493859291077 +"Pakistan",80,5.26900005340576,5.35998364135623,5.17801646545529,0.72688353061676,0.672690689563751,0.402047783136368,0.23521526157856,0.315446019172668,0.124348066747189,2.79248929023743 +"Indonesia",81,5.26200008392334,5.35288859814405,5.17111156970263,0.995538592338562,1.27444469928741,0.492345720529556,0.443323463201523,0.611704587936401,0.0153171354904771,1.42947697639465 +"Venezuela",82,5.25,5.3700319455564,5.1299680544436,1.12843120098114,1.43133759498596,0.617144227027893,0.153997123241425,0.0650196298956871,0.0644911229610443,1.78946375846863 +"Montenegro",83,5.23699998855591,5.34104444056749,5.13295553654432,1.12112903594971,1.23837649822235,0.667464673519135,0.194989055395126,0.197911024093628,0.0881741940975189,1.72919154167175 +"Morocco",84,5.2350001335144,5.31834096476436,5.15165930226445,0.878114581108093,0.774864435195923,0.59771066904068,0.408158332109451,0.0322099551558495,0.0877631828188896,2.45618939399719 +"Azerbaijan",85,5.23400020599365,5.29928653523326,5.16871387675405,1.15360176563263,1.15240025520325,0.540775775909424,0.398155838251114,0.0452693402767181,0.180987507104874,1.76248168945312 +"Dominican Republic",86,5.23000001907349,5.34906088516116,5.11093915298581,1.07937383651733,1.40241670608521,0.574873745441437,0.55258983373642,0.186967849731445,0.113945253193378,1.31946516036987 +"Greece",87,5.22700023651123,5.3252461694181,5.12875430360436,1.28948748111725,1.23941457271576,0.810198903083801,0.0957312509417534,0,0.04328977689147,1.74922156333923 +"Lebanon",88,5.22499990463257,5.31888228848577,5.13111752077937,1.07498753070831,1.12962424755096,0.735081076622009,0.288515985012054,0.264450758695602,0.037513829767704,1.69507384300232 +"Portugal",89,5.19500017166138,5.28504173308611,5.10495861023665,1.3151752948761,1.36704301834106,0.795843541622162,0.498465299606323,0.0951027125120163,0.0158694516867399,1.10768270492554 +"Bosnia and Herzegovina",90,5.18200016021729,5.27633568674326,5.08766463369131,0.982409417629242,1.0693359375,0.705186307430267,0.204403176903725,0.328867495059967,0,1.89217257499695 +"Honduras",91,5.18100023269653,5.30158279687166,5.0604176685214,0.730573117733002,1.14394497871399,0.582569479942322,0.348079860210419,0.236188873648643,0.0733454525470734,2.06581115722656 +"Macedonia",92,5.17500019073486,5.27217263966799,5.07782774180174,1.06457793712616,1.20789301395416,0.644948184490204,0.325905978679657,0.25376096367836,0.0602777935564518,1.6174693107605 +"Somalia",93,5.15100002288818,5.24248370990157,5.0595163358748,0.0226431842893362,0.721151351928711,0.113989137113094,0.602126955986023,0.291631311178207,0.282410323619843,3.11748456954956 +"Vietnam",94,5.07399988174438,5.14728076457977,5.000718998909,0.788547575473785,1.27749133110046,0.652168989181519,0.571055591106415,0.234968051314354,0.0876332372426987,1.46231865882874 +"Nigeria",95,5.07399988174438,5.20950013548136,4.93849962800741,0.783756256103516,1.21577048301697,0.0569157302379608,0.394952565431595,0.230947196483612,0.0261215660721064,2.36539053916931 +"Tajikistan",96,5.04099988937378,5.11142559587956,4.970574182868,0.524713635444641,1.27146327495575,0.529235124588013,0.471566706895828,0.248997643589973,0.146377146244049,1.84904932975769 +"Bhutan",97,5.01100015640259,5.07933456212282,4.94266575068235,0.885416388511658,1.34012651443481,0.495879292488098,0.501537680625916,0.474054545164108,0.173380389809608,1.14018440246582 +"Kyrgyzstan",98,5.00400018692017,5.08991990312934,4.91808047071099,0.596220076084137,1.39423859119415,0.553457796573639,0.454943388700485,0.42858037352562,0.0394391790032387,1.53672313690186 +"Nepal",99,4.96199989318848,5.06735607936978,4.85664370700717,0.479820191860199,1.17928326129913,0.504130780696869,0.440305948257446,0.394096165895462,0.0729755461215973,1.8912410736084 +"Mongolia",100,4.95499992370605,5.0216795091331,4.88832033827901,1.02723586559296,1.4930112361908,0.557783484458923,0.394143968820572,0.338464230298996,0.0329022891819477,1.11129236221313 +"South Africa",101,4.8289999961853,4.92943518772721,4.72856480464339,1.05469870567322,1.38478863239288,0.187080070376396,0.479246735572815,0.139362379908562,0.0725094974040985,1.51090860366821 +"Tunisia",102,4.80499982833862,4.88436700701714,4.72563264966011,1.00726580619812,0.868351459503174,0.613212049007416,0.289680689573288,0.0496933571994305,0.0867231488227844,1.89025115966797 +"Palestinian Territories",103,4.77500009536743,4.88184834256768,4.66815184816718,0.716249227523804,1.15564715862274,0.565666973590851,0.25471106171608,0.114173173904419,0.0892826020717621,1.8788902759552 +"Egypt",104,4.7350001335144,4.82513378962874,4.64486647740006,0.989701807498932,0.997471392154694,0.520187258720398,0.282110154628754,0.128631442785263,0.114381365478039,1.70216107368469 +"Bulgaria",105,4.71400022506714,4.80369470641017,4.62430574372411,1.1614590883255,1.43437945842743,0.708217680454254,0.289231717586517,0.113177694380283,0.0110515309497714,0.996139287948608 +"Sierra Leone",106,4.70900011062622,4.85064333498478,4.56735688626766,0.36842092871666,0.984136044979095,0.00556475389748812,0.318697690963745,0.293040901422501,0.0710951760411263,2.66845989227295 +"Cameroon",107,4.69500017166138,4.79654085725546,4.5934594860673,0.564305365085602,0.946018218994141,0.132892116904259,0.430388748645782,0.236298456788063,0.0513066314160824,2.3336455821991 +"Iran",108,4.69199991226196,4.79822470769286,4.58577511683106,1.15687310695648,0.711551249027252,0.639333188533783,0.249322608113289,0.387242913246155,0.048761073499918,1.49873495101929 +"Albania",109,4.64400005340576,4.75246400639415,4.53553610041738,0.996192753314972,0.803685247898102,0.731159746646881,0.381498634815216,0.201312944293022,0.0398642159998417,1.49044156074524 +"Bangladesh",110,4.60799980163574,4.68982165828347,4.52617794498801,0.586682975292206,0.735131740570068,0.533241033554077,0.478356659412384,0.172255352139473,0.123717859387398,1.97873616218567 +"Namibia",111,4.57399988174438,4.77035474091768,4.37764502257109,0.964434325695038,1.0984708070755,0.33861181139946,0.520303547382355,0.0771337449550629,0.0931469723582268,1.4818902015686 +"Kenya",112,4.55299997329712,4.65569159060717,4.45030835598707,0.560479462146759,1.06795072555542,0.309988349676132,0.452763766050339,0.444860309362411,0.0646413192152977,1.6519021987915 +"Mozambique",113,4.55000019073486,4.77410232633352,4.3258980551362,0.234305649995804,0.870701014995575,0.106654435396194,0.480791091918945,0.322228103876114,0.179436385631561,2.35565090179443 +"Myanmar",114,4.54500007629395,4.61473994642496,4.47526020616293,0.367110550403595,1.12323594093323,0.397522568702698,0.514492034912109,0.838075160980225,0.188816204667091,1.11529040336609 +"Senegal",115,4.53499984741211,4.6016037812829,4.46839591354132,0.479309022426605,1.17969191074371,0.409362852573395,0.377922266721725,0.183468893170357,0.115460447967052,1.78964614868164 +"Zambia",116,4.51399993896484,4.64410550147295,4.38389437645674,0.636406779289246,1.00318729877472,0.257835894823074,0.461603492498398,0.249580144882202,0.0782135501503944,1.82670545578003 +"Iraq",117,4.49700021743774,4.62259140968323,4.37140902519226,1.10271048545837,0.978613197803497,0.501180469989777,0.288555532693863,0.19963726401329,0.107215754687786,1.31890726089478 +"Gabon",118,4.46500015258789,4.5573617656529,4.37263853952289,1.1982102394104,1.1556202173233,0.356578588485718,0.312328577041626,0.0437853783369064,0.0760467872023582,1.32291626930237 +"Ethiopia",119,4.46000003814697,4.54272867664695,4.377271399647,0.339233845472336,0.86466920375824,0.353409707546234,0.408842742443085,0.312650740146637,0.165455713868141,2.01574373245239 +"Sri Lanka",120,4.44000005722046,4.55344719231129,4.32655292212963,1.00985014438629,1.25997638702393,0.625130832195282,0.561213254928589,0.490863561630249,0.0736539661884308,0.419389247894287 +"Armenia",121,4.37599992752075,4.46673461228609,4.28526524275541,0.900596737861633,1.00748372077942,0.637524425983429,0.198303267359734,0.0834880918264389,0.0266744215041399,1.5214991569519 +"India",122,4.31500005722046,4.37152201749384,4.25847809694707,0.792221248149872,0.754372596740723,0.455427616834641,0.469987004995346,0.231538489460945,0.0922268852591515,1.5191171169281 +"Mauritania",123,4.29199981689453,4.37716361626983,4.20683601751924,0.648457288742065,1.2720308303833,0.285349279642105,0.0960980430245399,0.201870024204254,0.136957004666328,1.65163731575012 +"Congo (Brazzaville)",124,4.29099988937378,4.41005350500345,4.17194627374411,0.808964252471924,0.832044363021851,0.28995743393898,0.435025870800018,0.120852127671242,0.0796181336045265,1.72413563728333 +"Georgia",125,4.28599977493286,4.37493396580219,4.19706558406353,0.950612664222717,0.57061493396759,0.649546980857849,0.309410035610199,0.0540088154375553,0.251666635274887,1.50013780593872 +"Congo (Kinshasa)",126,4.28000020980835,4.35781083270907,4.20218958690763,0.0921023488044739,1.22902345657349,0.191407024860382,0.235961347818375,0.246455833315849,0.0602413564920425,2.22495865821838 +"Mali",127,4.19000005722046,4.26967071101069,4.11032940343022,0.476180493831635,1.28147339820862,0.169365674257278,0.306613743305206,0.183354198932648,0.104970246553421,1.66819095611572 +"Ivory Coast",128,4.17999982833862,4.27518256321549,4.08481709346175,0.603048920631409,0.904780030250549,0.0486421696841717,0.447706192731857,0.201237469911575,0.130061775445938,1.84496426582336 +"Cambodia",129,4.16800022125244,4.27851781353354,4.05748262897134,0.601765096187592,1.00623834133148,0.429783403873444,0.633375823497772,0.385922968387604,0.0681059509515762,1.04294109344482 +"Sudan",130,4.13899993896484,4.34574716508389,3.9322527128458,0.65951669216156,1.21400856971741,0.290920823812485,0.0149958552792668,0.182317450642586,0.089847519993782,1.68706583976746 +"Ghana",131,4.11999988555908,4.22270720854402,4.01729256257415,0.667224824428558,0.873664736747742,0.295637726783752,0.423026293516159,0.256923943758011,0.0253363698720932,1.57786750793457 +"Ukraine",132,4.09600019454956,4.18541010454297,4.00659028455615,0.89465194940567,1.39453756809235,0.575903952121735,0.122974775731564,0.270061463117599,0.0230294708162546,0.814382314682007 +"Uganda",133,4.08099985122681,4.19579996705055,3.96619973540306,0.381430715322495,1.12982773780823,0.217632606625557,0.443185955286026,0.325766056776047,0.057069718837738,1.526362657547 +"Burkina Faso",134,4.03200006484985,4.12405906438828,3.93994106531143,0.3502277135849,1.04328000545502,0.215844258666039,0.324367851018906,0.250864684581757,0.120328105986118,1.72721290588379 +"Niger",135,4.02799987792969,4.11194681972265,3.94405293613672,0.161925330758095,0.993025004863739,0.26850500702858,0.36365869641304,0.228673845529556,0.138572946190834,1.87398338317871 +"Malawi",136,3.97000002861023,4.07747881740332,3.86252123981714,0.233442038297653,0.512568831443787,0.315089583396912,0.466914653778076,0.287170469760895,0.0727116540074348,2.08178615570068 +"Chad",137,3.93600010871887,4.0347115239501,3.83728869348764,0.438012987375259,0.953855872154236,0.0411347150802612,0.16234202682972,0.216113850474358,0.0535818822681904,2.07123804092407 +"Zimbabwe",138,3.875,3.97869964271784,3.77130035728216,0.375846534967422,1.08309590816498,0.196763753890991,0.336384207010269,0.189143493771553,0.0953753814101219,1.59797024726868 +"Lesotho",139,3.80800008773804,4.04434397548437,3.5716561999917,0.521021246910095,1.19009518623352,0,0.390661299228668,0.157497271895409,0.119094640016556,1.42983531951904 +"Angola",140,3.79500007629395,3.95164193540812,3.63835821717978,0.858428180217743,1.10441195964813,0.0498686656355858,0,0.097926490008831,0.0697203353047371,1.61448240280151 +"Afghanistan",141,3.79399991035461,3.87366141527891,3.71433840543032,0.401477217674255,0.581543326377869,0.180746778845787,0.106179520487785,0.311870932579041,0.0611578300595284,2.15080118179321 +"Botswana",142,3.76600003242493,3.87412266626954,3.65787739858031,1.12209415435791,1.22155499458313,0.341755509376526,0.505196332931519,0.0993484482169151,0.0985831990838051,0.3779137134552 +"Benin",143,3.65700006484985,3.74578355133533,3.56821657836437,0.431085407733917,0.435299843549728,0.209930211305618,0.425962775945663,0.207948461174965,0.0609290152788162,1.88563096523285 +"Madagascar",144,3.64400005340576,3.71431910589337,3.57368100091815,0.305808693170547,0.913020372390747,0.375223308801651,0.189196765422821,0.208732530474663,0.0672319754958153,1.58461260795593 +"Haiti",145,3.6029999256134,3.73471479773521,3.47128505349159,0.368610262870789,0.640449821949005,0.277321130037308,0.0303698573261499,0.489203780889511,0.0998721495270729,1.69716763496399 +"Yemen",146,3.59299993515015,3.69275031983852,3.49324955046177,0.591683447360992,0.93538224697113,0.310080915689468,0.249463722109795,0.104125209152699,0.0567674227058887,1.34560060501099 +"South Sudan",147,3.59100008010864,3.72553858578205,3.45646157443523,0.39724862575531,0.601323127746582,0.163486003875732,0.147062435746193,0.285670816898346,0.116793513298035,1.87956738471985 +"Liberia",148,3.53299999237061,3.65375626087189,3.41224372386932,0.119041793048382,0.872117936611176,0.229918196797371,0.332881182432175,0.26654988527298,0.0389482490718365,1.67328596115112 +"Guinea",149,3.50699996948242,3.58442812889814,3.4295718100667,0.244549930095673,0.791244685649872,0.194129139184952,0.348587512969971,0.264815092086792,0.110937617719173,1.55231189727783 +"Togo",150,3.49499988555908,3.59403811171651,3.39596165940166,0.305444717407227,0.431882530450821,0.247105568647385,0.38042613863945,0.196896150708199,0.0956650152802467,1.83722925186157 +"Rwanda",151,3.47099995613098,3.54303023353219,3.39896967872977,0.368745893239975,0.945707023143768,0.326424807310104,0.581843852996826,0.252756029367447,0.455220013856888,0.540061235427856 +"Syria",152,3.46199989318848,3.66366855680943,3.26033122956753,0.777153134346008,0.396102607250214,0.50053334236145,0.0815394446253777,0.493663728237152,0.151347130537033,1.06157350540161 +"Tanzania",153,3.34899997711182,3.46142975538969,3.23657019883394,0.511135876178741,1.04198980331421,0.364509284496307,0.390017777681351,0.354256361722946,0.0660351067781448,0.621130466461182 +"Burundi",154,2.90499997138977,3.07469033300877,2.73530960977077,0.091622568666935,0.629793584346771,0.151610791683197,0.0599007532000542,0.204435184597969,0.0841479450464249,1.68302416801453 +"Central African Republic",155,2.69300007820129,2.86488426923752,2.52111588716507,0,0,0.0187726859003305,0.270842045545578,0.280876487493515,0.0565650761127472,2.06600475311279 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__init__.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__pycache__/__init__.cpython-36.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..586b2ee1dfec28cc0720494f55461f8b66bc6aca GIT binary patch literal 147 zcmXr!<>k7@<{QfZ1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnF9-e5;?$yI{i6Kh z;*3=NfXd{I#G>4QqWrAXV6 literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__pycache__/__init__.cpython-37.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e601af02f28455b574e9e4ea4bcb31b79dc8356 GIT binary patch literal 245 zcmZ?b<>g`kg5%BUu^{>}h=2h`Aj1KOi&=m~3PUi1CZpdpTnr)!rz*}35Py6Mk0Pklb2?ZvuXPy4%`?way^%8HohYt}s3)$@Gy zf=3NgfaqCg_tUnS&v)&A*0B8P?0HZ3PJ(EQDJ{v&iHVN~x}qdLUaz3?7Kcr4eoARh MsvXGPpMjVG028fYm;e9( literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__init__.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/__init__.cpython-36.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3399488165f819934d23f246f5525a27c5e6d403 GIT binary patch literal 156 zcmXr!<>hia>l4cW1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnFHil@;?$yI{i6Kh z;*3=NfXd{I#G>4QqWrAX4% literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/__init__.cpython-37.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9862ab4cc99264261c5c248667e17e18c5d03c86 GIT binary patch literal 254 zcmZ?b<>g`kg5%BUu^{>}h=2h`Aj1KOi&=m~3PUi1CZpdpTnr)!rz*}35Py6Mk0Pklb2?ZvuXPy4%`?way^%8HohYt}s3)$@Gy zf=3NgfaqCg_tUnS&v)&A*0B8P?0HZ3PJ(EQDJ{v&iAhUMEGaEYEslwg2Rf%DK3=b& V@)n0pZhlH>PO2Tq1D}DI0RXv4Wn2IN literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/generate_polynomials.cpython-36.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/generate_polynomials.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e9bfdf169be06165f063e239650570994f24e3f2 GIT binary patch literal 1226 zcmb6YJC74bcxLuxdu>C&9qQP-+%UVfY6`l z#ODEj3qW50#Ly5i%t(%hgn7)*y}}!M7_kNmS@abi`iy*z`pun7w;7`|nJg7v4-PQkK6XtZRgvb|4!<|5YFFPWfDqsgfW82Z22HW8J7M0Q zS9%tD`3RLhPy#ft{**i=WmrZwsJTYjw4rUh@YG%7v0xX!#x`R93!ZS)WBLdP7)bTW`p?s_fg~Ke_+g{= ze)-1}^z!#7$dYfcI&UHy@8bF;VjIIA8uQ!mVt;-8{r=vM```V#|I4@k{PshO!nfGC zOc$E~8(&s3E zN7Z;*XHpF2Cd&t_VuRH}lXV4sWwvy^|1(s7roIXY1*dTXH%JS&NJT*c8iLLTzK46H zOM3VWa37Npw~2ZmV*2=AKX6f!FgZ#R7r@Z7rK)n)o2h;3nNFZ{V0Dcfg=F)b->hy? LR{=g$Ym`EM{5Ng<^Tu=|ZDn$n4W+qwhI<`AAVUyKZ zs=Blik5wzQJPHUc9H5A{LgLg2#2?@n_`m_0ggtSAJDLuL4)bK6GhxffmMd&73nq6VF*>)40XJ}33M1b00)K;@ZD)69=LkRn zUEdjl$4cDO=?~t)Kujwq?6sOE8|}} zL!upB6{y}JU*g0Xyd(`u6m;<+v`LoH>t~Y6;IuSUCD&lkoJgulR~1!#7#%~(P}O6o zj*=R@x}Ypo>ktav-y(^1@z?X{;xFfsBG)m0Oq$Bt#CazyWx*VZ{U&s5eC?CT)>o5{ z?oNLA@XsGVE!%Jg3s2aCMSz7jGQ61Mg%f)!Aeqc$`&90{OCSr+5X?vS2EO3gur^*; z@B30*7o4X5BJ+6YhuxE}tgwg=Pd4K?7=r9fJ|B&5+#KKj z`rh50>79>ZeWus1{&8nCzV_~Tv_0Lv=}d3kx;J`%`rXcNm$u=Xyz}1p>W%5>=gFmS z#-DvL{^^6gFsCp5z?nV8nXQXEuV0*shiU!43?{rpJP8OH_u(Qgk}@unjDiHT06HD` zITAAV8MzNOoWwI z{ep?K;tJB_fhTa0M`nxxr{?^?NnaRxSn}R0*vLjKWzSnPc_TKed)p{0d%o=@+{lk` z{d$uyBk7YK#=s2CC*7-F&tz2mEjQ}N>38T=RQ)*`o^Vj%PP_DMTYEMS4-rxk64Mow z4uYnIsWC`b2aRRjtb_K(%`qrm4bSq*<)%T$$>b;2G={JVM?7q)D(uwa+#`m4*OpW4 zOj)mx!qeTD!r&iaGTODKGc-->k#4~sv6nwr*;hhOnXiZzm|d>(h4JO3ua=lMlzl23 z8h`RR>JSx`%i5+`WxHFlIXd)gZ@Lqpvw9s~{NGkAM*9q)hvHC<#i2Nse#GZg1Od4x?4l;7glZ2_lTdsenfd{A(6x4%7c-v|8fUKii=LR|`^h8fk9f87-c+*I<#ORP>$7f`q-CMee60O+veyw^I zLOA|X-&~olIhaQW1({gscHa?ZMH!-JhfEPaR0i`x87qU^ewQf?M%9US;J0nbd;o}# zQCr%ANwomYNCXaTQP$;*saZerTC8t-q059iUbcm*915DXvefD@TSlCTPlzvom%n46 zwTspb!MLy*)N_1CNN&r3TkA~Rl|g8A9odWSSPkbYv$^m1-Q|rQ3wXTT2tzl9+?u>i zvz^`S%ZL4MqkQ~&^7U2zV*B?v&7MEb(qTT_wer2a{q$-6dGvE@2+!ovlWcn@Pk&6d zK4x#;W#3;Oj#-g(T&whGb(zn=3u~43#Yz~flSj)soI=b3A)|`tNgdDQQ-s$bH#KFz i*9l`@T>G>%J@|jL2n{uJ$qQ)YvdhH{iQ;#e(f$DrRLpDu literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/normalize.cpython-36.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/normalize.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..671083b4dd9ee49446df56772ae205e7edac454b GIT binary patch literal 529 zcmYjOO-}+b5S_NWuo^&u7d@D8<>DUn;DHbmZ-x+}@j^74W?N)oztR>BWI5rV@h|mi zV!~hW%BC_j+V4lHgTT7!JjoY|A-&CQRUJVC6V~e{nV842S3Z^A I{u1-(KPRn=AOHXW literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/normalize.cpython-37.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/normalize.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9587ade8c08ff08b6f5f4c0319c9d87223ec629e GIT binary patch literal 627 zcmYjP&ubGw6rLZuO_ZjVdeMX6HHUgDh?HWi$C8VB2?OhznKWzm7c;YklnwMC1*@P) zM2aHGL9JMO@nk^!Kjx_2tT~H!-^M2RX6KvteUG>E-pt&pRL&!?$?vuMYXqU61(_Le zbQ7k!1i(=Pam3|!ya6<1rr)rk~p0vV{@#Ki(;oy|>H z6$8*{fKxQY1ET2=_b_lPh5rvpkAQEV`7}ij7dXC*IO$k&BgHAXj(Sij>lhpZ#e z=oQe4SV~(IDx!5+Y&#?ShAPdCt=euZs+K9w zI%r&+c}_TMkDJO|oaU~~DHl7QtGy`vh-3x!{qqiB^9H-8gjB4?126QHmadK@yDd~l zM={&>bUS&#YTlM`W$1<5t9RQXlHJu>9Q$41Z2V@F9Uf($-=BURnGF`-FA}@djr&Dqq9yJ}u*kKaHT}eTn#-=_02TKqz<@F%q Pi7z&ai%`=yK+E(m4I;?l literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/prepare_for_training.cpython-36.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/prepare_for_training.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..245b70a900b4ca174e625b7a9c89c2da3a419381 GIT binary patch literal 909 zcmZuvO>fjN5RDx>o6Yt^tO|wOa@-3!AaOz!5*$%gLCYZmMON(HO`F(}?M2_SX9v&;xP znJGQ7nw7GY^IK>WoZce9De^<9bj_;rM!@E1E~L=Rio8+PN~*eKm4Wzp8#kp~8dY+0 zUcemv8Uc3xTQKtzzy+5u!?U0z7F}W&tWirjY9p|LqiyU${mO=puF>bPjoS#KaZ4TM z=!)pf5*ItX#_bTQsiST)vR=hFA_|5!}@4pEGw=& zQBs)c$dit0|7gcs&c$2{;SV>IZ2|{d#`C^rPcG|RT(i2Vgz*PmEN`?bgfW8q{h45P z33}$WVA7AbTQ0tq%$7=KhkeQRRGtktEloDsUC8_j^kcaC-;4CZ2yIfWj9G~EY*j3n zuFtf(6oobE`%(&~Cm*`&8v_l}r7f#;(>QJJ9#{@2Y!&}TorllDR2aWbf?{^(rM63)7SNLJ%A_h X*T=7G#g~;h=}t2V0KqZZ>puJsNJ{#( literal 0 HcmV?d00001 diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/prepare_for_training.cpython-37.pyc b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/__pycache__/prepare_for_training.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4837a100481c269bf8951ae72c03fab7cd601cc4 GIT binary patch literal 1007 zcmZuw&1(}u6yKShO*TnCu+*rskey{rrCnwccq?iVu?Y05?lm4aU= zQjwxadeMrt2%Z`c|BpFp)94@I#glJ0P3pm!eLvp3d2eRk`@Ow7K3>#d+}^)%=MT}e zAL(K^bWpB|wD15~If`#; zmD1|D8=S>1XQC_{tV~_$3MR|7h?gaIz0eEmzu+2e;THm|AXNw>9=N`@%3v{FXCdRR zWKJCUtuP8a*B4-)8rg*xHbvx7u~>yXvQYxefy{%54nU$UK@MlNHj$`<6Kxx{4T{=1 zkbGja%|z!HrJfk;$ky9tI|o*?Z6uhYWx_2<5;MW;xLuHF$w-W(P{R~z@f3p(%6#t2 z9QYvcDIWP0kA2|IJcyYPifecqB}R%6Yb3cOU(=yVWPzvIa|{645qHHCm4Y%v!;P83 zVXR0sid#Hoh8qB?RE@$a)FlMrY2qr^5YnwKD-wlFR7#4ZrB)}7>*i2a=ZvYzVabMu ziBZL!EM_ch1`b 1: + features_normalized -= features_mean + + # 防止除以0 + features_deviation[features_deviation == 0] = 1 + features_normalized /= features_deviation + + return features_normalized, features_mean, features_deviation diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/prepare_for_training.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/prepare_for_training.py new file mode 100644 index 0000000..f836ad8 --- /dev/null +++ b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/features/prepare_for_training.py @@ -0,0 +1,41 @@ +"""Prepares the dataset for training""" + +import numpy as np +from .normalize import normalize +from .generate_polynomials import generate_polynomials +from .generate_sinusoids import generate_sinusoids + + +def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True): + # 计算样本总数 + num_examples = data.shape[0] + + data_processed = np.copy(data) + + # 预处理 + features_mean = 0 + features_deviation = 0 + data_normalized = data_processed + if normalize_data: + ( + data_normalized, + features_mean, + features_deviation + ) = normalize(data_processed) + + data_processed = data_normalized + + # 特征变换sinusoidal + if sinusoid_degree > 0: + sinusoids = generate_sinusoids(data_normalized, sinusoid_degree) + data_processed = np.concatenate((data_processed, sinusoids), axis=1) + + # 特征变换polynomial + if polynomial_degree > 0: + polynomials = generate_polynomials(data_normalized, polynomial_degree) + data_processed = np.concatenate((data_processed, polynomials), axis=1) + + # 加一列1 + data_processed = np.hstack((np.ones((num_examples, 1)), data_processed)) + + return data_processed, features_mean, features_deviation diff --git a/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/hopythesis/__init__.py b/机器学习算法理论及应用/其它/第二章——手写线性回归算法/util/hopythesis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/机器学习算法理论及应用/李航——统计学习方法/7.支持向量机——升维打击.md b/机器学习算法理论及应用/李航——统计学习方法/7.支持向量机——升维打击.md new file mode 100644 index 0000000..7c028ed --- /dev/null +++ b/机器学习算法理论及应用/李航——统计学习方法/7.支持向量机——升维打击.md @@ -0,0 +1,117 @@ +# 7.支持向量机——升维打击 + +Support vector machines + +### 知识树 + +Knowledge tree + +![1619229799424](assets/1619229799424.png) + +> 支持向量机也是李航的统计学习中效果最好的模型,面试中也是问的最多的 +> +> 苹果表示重点 +> +> 间隔:使用了几何间隔,保证w b的度量,感知机则是函数间隔 +> +> 间隔最大化思想:则是支持向量机的独有,这使得它找到最优超平面 +> +> 核函数:面试当中可能会问到是否能写出其中的一个核函数 + + + +### 红豆绿豆的前世今生 + +前面章节讲到划分超平面,来区分红豆和绿豆 + +![1619230292531](assets/1619230292531.png) + +> 从上面可以看到,能找到很多的超平面,黄色的线,那哪条黄色的线才是最好的呢?当然是对角的黄色线,因为这条可以让红豆绿豆区分的最开,也就是线和豆的距离最远,即使区分新的豆(预测集),也能最好的区分开,因为可能豆有接近的情况。 + +**如何找到最优的超平面** + +![1619230622465](assets/1619230622465.png) + +> 从上图可知,超平面A是最优的。因为它与两个类的距离都足够大。 + +结论:我们试图找到一个超平面,这个超平面可以使得与它最近的样本点的距离必须大于其他所有超平面划分时与最近的样本点的距离。 + +在SVM中,这叫间隔最大化。 + +> 即该超平面与最近的样本点的距离,都大于所有超平面离最近样本点的距离 + +此时我们可以说,我们找到了最优的超平面,但随着时代的变迁,红豆绿豆也发生了变化,比如下图的 + +![1619230914515](assets/1619230914515.png) + +> 它不再是左右分开,而是混在一起 + +![1619230958940](assets/1619230958940.png) + +> 单纯用线性无法解决,如果是非线性呢? + +![1619230986759](assets/1619230986759.png) + +> 我们需要找到这么个圈的超平面,那么圈能是超平面呢? + +![1619231022688](assets/1619231022688.png) + +> 如上图,原本二维空间的样本,因为线性不可分, 即需要投射到三维空间,那么在三维空间就能用超平面切分。 +> +> 再将三维空间的超平面投射到二维空间,那么超平面在二维空间上就是曲线的,即非线性。 + +那么接下来,我们要考虑的是,怎么进行低维和高维之间的转换。 + + + +### 升维可分问题 + +1. 当有人拿着棍子指着你时,你只能看到棍子的横截面,是一个点,它是一维的。我们无法将两个点区分开来。因为她们重叠了。 + + ![1619232957172](assets/1619232957172.png) + + > 右边红色线表示看的方向 + +2. 当有人拿着棍子指着其它地方,我们能看到整个棍子,这时候是二维的。我们可以一把刀劈开,把红豆和绿豆区分开。所以,红豆和绿豆虽然在一维的时候不能分开,但在二维时线性可分了。 + + ![1619234003387](assets/1619234003387.png) + +3. 也可能二维不可分, 如下图 + + ![1619234073290](assets/1619234073290.png) + +4. 这时候我们可以把棍子看作三维中的棍子,有体积的。如果把棍子立在地上,很有可能红豆靠南侧,绿豆靠北侧,我们像劈柴一样从上至下即可劈开(分开),也一样是线性可分。 + +5. 如果三维还不能线性可分,那就升到思维 + +**总会从某一个维度开始,它变成线性可分了,即只要不断的增加维度(特征)总能区分开来** + +同时,我们发现高维中的超平面,映射到低维空间中时,可能会变成曲线或其它的划分形式。 + +这也就是为什么,在SVM中我们同样使用超平面来划分,SVM可以划分非线性的数据集。 + +它本质上仍然是线性超平面,不过是高维中的线性超平面。 + + + +**那么升维一定会线性可分吗?** + +**会不会升到无穷维了仍然线性不可分?** + +答案是不会的,首先要明白,我们的数据集一定是基于真实的某种分布,分为A类的样本和B类的一定在本质上有区别。只要有区别,就一定可以区分开来,一定在某个高维度上线性可分。 + +**另外,总能上升到有个维度空间中线性可分,无限上升的同时有没有可能在N+1维度又不可分了?** + +不会,随着维度的上升,我们获得的信息越来越多。当第N维的数据已经足够划分时,更多的信息量并不会出现又不可分的情况。 + + + +### 总结 + +Summarization + +1. SVM使用间隔最大化思想构造最优超平面。 +2. 构造出来的超平面使得其与最近的点的距离最大。 +3. SVM也可划分非线性数据集。 +4. 它通过高维中的线性超平面再低维中的投影来完成非线性的划分。因此从直观上来讲,我们的模型必定有一个升维的操作。 +5. 这是总体的概念。 \ No newline at end of file