"
},
"metadata": {},
"execution_count": 107
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"\n",
"# X = df.loc[:, ('danceability','energy')]\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Unknown label type: 'continuous'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'"
]
}
],
"source": [
"from sklearn.svm import SVC\n",
"from sklearn.semi_supervised import LabelSpreading\n",
"from sklearn.semi_supervised import SelfTrainingClassifier\n",
"from sklearn import datasets\n",
"\n",
"X = df[['danceability','acousticness']].values\n",
"y = df['energy'].values\n",
"\n",
"# X = scaler.fit_transform(X)\n",
"\n",
"# step size in the mesh\n",
"h = .02\n",
"\n",
"rng = np.random.RandomState(0)\n",
"y_rand = rng.rand(y.shape[0])\n",
"y_30 = np.copy(y)\n",
"y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n",
"y_50 = np.copy(y)\n",
"y_50[y_rand < 0.5] = -1\n",
"# we create an instance of SVM and fit out data. We do not scale our\n",
"# data since we want to plot the support vectors\n",
"ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n",
"ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n",
"ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n",
"\n",
"# the base classifier for self-training is identical to the SVC\n",
"base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n",
"st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n",
" y_30, 'Self-training 30% data')\n",
"st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n",
" y_50, 'Self-training 50% data')\n",
"\n",
"rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n",
"\n",
"# create a mesh to plot in\n",
"x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
"y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n",
" np.arange(y_min, y_max, h))\n",
"\n",
"color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n",
"\n",
"classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n",
"for i, (clf, y_train, title) in enumerate(classifiers):\n",
" # Plot the decision boundary. For that, we will assign a color to each\n",
" # point in the mesh [x_min, x_max]x[y_min, y_max].\n",
" plt.subplot(3, 2, i + 1)\n",
" Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
"\n",
" # Put the result into a color plot\n",
" Z = Z.reshape(xx.shape)\n",
" plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n",
" plt.axis('off')\n",
"\n",
" # Plot also the training points\n",
" colors = [color_map[y] for y in y_train]\n",
" plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n",
"\n",
" plt.title(title)\n",
"\n",
"plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n---\n\n**Tuyên bố miễn trừ trách nhiệm**: \nTài liệu này đã được dịch bằng dịch vụ dịch thuật AI [Co-op Translator](https://github.com/Azure/co-op-translator). Mặc dù chúng tôi cố gắng đảm bảo độ chính xác, xin lưu ý rằng các bản dịch tự động có thể chứa lỗi hoặc không chính xác. Tài liệu gốc bằng ngôn ngữ bản địa nên được coi là nguồn tham khảo chính thức. Đối với các thông tin quan trọng, nên sử dụng dịch vụ dịch thuật chuyên nghiệp từ con người. Chúng tôi không chịu trách nhiệm cho bất kỳ sự hiểu lầm hoặc diễn giải sai nào phát sinh từ việc sử dụng bản dịch này.\n"
]
}
]
}