From 824024810c9d9bdd0e5719e6ff360705ef6f33a8 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Wed, 24 Feb 2021 14:55:16 +0800 Subject: [PATCH] Add. MultivariateLinearRegression --- .../MultivariateLinearRegression.py | 131 ++++++++++++++++++ .../UnivariateLinearRegression.py | 15 ++ 2 files changed, 146 insertions(+) create mode 100644 机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py diff --git a/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py b/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py new file mode 100644 index 0000000..1a0f16b --- /dev/null +++ b/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/MultivariateLinearRegression.py @@ -0,0 +1,131 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import plotly +import plotly.graph_objs as go + +from linear_regression import LinearRegression +plotly.offline.init_notebook_mode() # 在线显示图标,更多功能 + +data = pd.read_csv('../data/world-happiness-report-2017.csv') + +train_data = data.sample(frac=0.8) +test_data = data.drop(train_data.index) + +# 与单特征模型相比,只是多了一个特征列 +input_param_name_1 = 'Economy..GDP.per.Capita.' +input_param_name_2 = 'Freedom' +output_param_name = 'Happiness.Score' + +x_train = train_data[[input_param_name_1, input_param_name_2]].values +y_train = train_data[[output_param_name]].values + +x_test = test_data[[input_param_name_1, input_param_name_2]].values +y_test = test_data[[output_param_name]].values + +# Configure the plot with training dataset. +plot_training_trace = go.Scatter3d( + x=x_train[:, 0].flatten(), + y=x_train[:, 1].flatten(), + z=y_train.flatten(), + name='Training Set', + mode='markers', + marker={ + 'size': 10, + 'opacity': 1, + 'line': { + 'color': 'rgb(255, 255, 255)', + 'width': 1 + }, + } +) + +plot_test_trace = go.Scatter3d( + x=x_test[:, 0].flatten(), + y=x_test[:, 1].flatten(), + z=y_test.flatten(), + name='Test Set', + mode='markers', + marker={ + 'size': 10, + 'opacity': 1, + 'line': { + 'color': 'rgb(255, 255, 255)', + 'width': 1 + }, + } +) + +plot_layout = go.Layout( + title='Date Sets', + scene={ + 'xaxis': {'title': input_param_name_1}, + 'yaxis': {'title': input_param_name_2}, + 'zaxis': {'title': output_param_name} + }, + margin={'l': 0, 'r': 0, 'b': 0, 't': 0} +) + +plot_data = [plot_training_trace, plot_test_trace] + +plot_figure = go.Figure(data=plot_data, layout=plot_layout) + +plotly.offline.plot(plot_figure) + +num_iterations = 500 +learning_rate = 0.01 +polynomial_degree = 0 +sinusoid_degree = 0 + +linear_regression = LinearRegression(x_train, y_train, polynomial_degree, sinusoid_degree) + +(theta, cost_history) = linear_regression.train(learning_rate, num_iterations) + +print('开始时的损失:', cost_history[0]) +print('训练后的损失:', cost_history[-1]) + +plt.plot(range(num_iterations), cost_history) +plt.xlabel('Iterations') +plt.ylabel('Cost') +plt.title('Gradient Descent') +plt.show() + +predictions_num = 10 + +x_min = x_train[:, 0].min() +x_max = x_train[:, 0].max() + +y_min = x_train[:, 1].min() +y_max = x_train[:, 1].max() + +x_axis = np.linspace(x_min, x_max, predictions_num) +y_axis = np.linspace(y_min, y_max, predictions_num) + +x_predictions = np.zeros((predictions_num * predictions_num, 1)) +y_predictions = np.zeros((predictions_num * predictions_num, 1)) + +x_y_index = 0 +for x_index, x_value in enumerate(x_axis): + for y_index, y_value in enumerate(y_axis): + x_predictions[x_y_index] = x_value + y_predictions[x_y_index] = y_value + x_y_index += 1 + +z_predictions = linear_regression.predict(np.hstack((x_predictions, y_predictions))) + +plot_predictions_trace = go.Scatter3d( + x=x_predictions.flatten(), + y=y_predictions.flatten(), + z=z_predictions.flatten(), + name='Prediction Plane', + mode='markers', + marker={ + 'size': 1, + }, + opacity=0.8, + surfaceaxis=2, +) + +plot_data = [plot_training_trace, plot_test_trace, plot_predictions_trace] +plot_figure = go.Figure(data=plot_data, layout=plot_layout) +plotly.offline.plot(plot_figure) diff --git a/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py b/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py index 6aa6db2..8a42c5c 100644 --- a/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py +++ b/机器学习算法理论及应用/第二章——手写线性回归算法/LinearRegression/UnivariateLinearRegression.py @@ -41,3 +41,18 @@ plt.xlabel('Iteration') plt.ylabel('Cost') plt.title('GD') plt.show() + +# 测试线性回归模型 +predictions_num = 100 # 预测100个 +# 拿最大和最小值画一条线 +x_predictions = np.linspace(x_train.min(), x_train.max(), predictions_num).reshape(predictions_num, 1) +y_predictions = linear_regression.predict(x_predictions) + +plt.scatter(x_train, y_train, label='Train data') +plt.scatter(x_test, y_test, label='Test data') +plt.plot(x_predictions, y_predictions, 'r', label='Prediction') +plt.xlabel(input_param_name) +plt.ylabel(output_param_name) +plt.title('Happy') +plt.legend() +plt.show()