"
},
"metadata": {},
"execution_count": 107
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"\n",
"# X = df.loc[:, ('danceability','energy')]\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Unknown label type: 'continuous'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'"
]
}
],
"source": [
"from sklearn.svm import SVC\n",
"from sklearn.semi_supervised import LabelSpreading\n",
"from sklearn.semi_supervised import SelfTrainingClassifier\n",
"from sklearn import datasets\n",
"\n",
"X = df[['danceability','acousticness']].values\n",
"y = df['energy'].values\n",
"\n",
"# X = scaler.fit_transform(X)\n",
"\n",
"# step size in the mesh\n",
"h = .02\n",
"\n",
"rng = np.random.RandomState(0)\n",
"y_rand = rng.rand(y.shape[0])\n",
"y_30 = np.copy(y)\n",
"y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n",
"y_50 = np.copy(y)\n",
"y_50[y_rand < 0.5] = -1\n",
"# we create an instance of SVM and fit out data. We do not scale our\n",
"# data since we want to plot the support vectors\n",
"ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n",
"ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n",
"ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n",
"\n",
"# the base classifier for self-training is identical to the SVC\n",
"base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n",
"st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n",
" y_30, 'Self-training 30% data')\n",
"st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n",
" y_50, 'Self-training 50% data')\n",
"\n",
"rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n",
"\n",
"# create a mesh to plot in\n",
"x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
"y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n",
" np.arange(y_min, y_max, h))\n",
"\n",
"color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n",
"\n",
"classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n",
"for i, (clf, y_train, title) in enumerate(classifiers):\n",
" # Plot the decision boundary. For that, we will assign a color to each\n",
" # point in the mesh [x_min, x_max]x[y_min, y_max].\n",
" plt.subplot(3, 2, i + 1)\n",
" Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
"\n",
" # Put the result into a color plot\n",
" Z = Z.reshape(xx.shape)\n",
" plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n",
" plt.axis('off')\n",
"\n",
" # Plot also the training points\n",
" colors = [color_map[y] for y in y_train]\n",
" plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n",
"\n",
" plt.title(title)\n",
"\n",
"plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n---\n\n**Avertissement** : \nCe document a été traduit à l'aide du service de traduction automatique [Co-op Translator](https://github.com/Azure/co-op-translator). Bien que nous nous efforcions d'assurer l'exactitude, veuillez noter que les traductions automatisées peuvent contenir des erreurs ou des inexactitudes. Le document original dans sa langue d'origine doit être considéré comme la source faisant autorité. Pour des informations critiques, il est recommandé de faire appel à une traduction humaine professionnelle. Nous déclinons toute responsabilité en cas de malentendus ou d'interprétations erronées résultant de l'utilisation de cette traduction.\n"
]
}
]
}