Create 游戏销售数据-常用特征构造方法-checkpoint.ipynb

pull/2/head
benjas 5 years ago
parent 5fd4f42640
commit 6b40284984

@ -0,0 +1,915 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 离散值处理"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Platform</th>\n",
" <th>Year</th>\n",
" <th>Genre</th>\n",
" <th>Publisher</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Super Mario Bros.</td>\n",
" <td>NES</td>\n",
" <td>1985.0</td>\n",
" <td>Platform</td>\n",
" <td>Nintendo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mario Kart Wii</td>\n",
" <td>Wii</td>\n",
" <td>2008.0</td>\n",
" <td>Racing</td>\n",
" <td>Nintendo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Wii Sports Resort</td>\n",
" <td>Wii</td>\n",
" <td>2009.0</td>\n",
" <td>Sports</td>\n",
" <td>Nintendo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pokemon Red/Pokemon Blue</td>\n",
" <td>GB</td>\n",
" <td>1996.0</td>\n",
" <td>Role-Playing</td>\n",
" <td>Nintendo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Tetris</td>\n",
" <td>GB</td>\n",
" <td>1989.0</td>\n",
" <td>Puzzle</td>\n",
" <td>Nintendo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>New Super Mario Bros.</td>\n",
" <td>DS</td>\n",
" <td>2006.0</td>\n",
" <td>Platform</td>\n",
" <td>Nintendo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Platform Year Genre Publisher\n",
"1 Super Mario Bros. NES 1985.0 Platform Nintendo\n",
"2 Mario Kart Wii Wii 2008.0 Racing Nintendo\n",
"3 Wii Sports Resort Wii 2009.0 Sports Nintendo\n",
"4 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing Nintendo\n",
"5 Tetris GB 1989.0 Puzzle Nintendo\n",
"6 New Super Mario Bros. DS 2006.0 Platform Nintendo"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vg_df = pd.read_csv('data/vgsales.csv', encoding='ISO-8859-1')\n",
"vg_df[['Name', 'Platform', 'Year', 'Genre', 'Publisher']].iloc[1:7]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"机器无法识别字符串类型数据,需要做处理"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Action', 'Adventure', 'Fighting', 'Misc', 'Platform', 'Puzzle',\n",
" 'Racing', 'Role-Playing', 'Shooter', 'Simulation', 'Sports',\n",
" 'Strategy'], dtype=object)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"genres = np.unique(vg_df['Genre'])\n",
"genres # 不同的字符串并不多"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LabelEncoder"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: 'Action',\n",
" 1: 'Adventure',\n",
" 2: 'Fighting',\n",
" 3: 'Misc',\n",
" 4: 'Platform',\n",
" 5: 'Puzzle',\n",
" 6: 'Racing',\n",
" 7: 'Role-Playing',\n",
" 8: 'Shooter',\n",
" 9: 'Simulation',\n",
" 10: 'Sports',\n",
" 11: 'Strategy'}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"gle = LabelEncoder() # 实例化\n",
"genre_labels = gle.fit_transform(vg_df['Genre']) # 转换需要离散值的一列\n",
"genre_mappings = {index: label for index, label in enumerate(gle.classes_)}\n",
"genre_mappings # 映射成数值"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Platform</th>\n",
" <th>Year</th>\n",
" <th>Genre</th>\n",
" <th>GenreLabel</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Super Mario Bros.</td>\n",
" <td>NES</td>\n",
" <td>1985.0</td>\n",
" <td>Platform</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mario Kart Wii</td>\n",
" <td>Wii</td>\n",
" <td>2008.0</td>\n",
" <td>Racing</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Wii Sports Resort</td>\n",
" <td>Wii</td>\n",
" <td>2009.0</td>\n",
" <td>Sports</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pokemon Red/Pokemon Blue</td>\n",
" <td>GB</td>\n",
" <td>1996.0</td>\n",
" <td>Role-Playing</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Tetris</td>\n",
" <td>GB</td>\n",
" <td>1989.0</td>\n",
" <td>Puzzle</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>New Super Mario Bros.</td>\n",
" <td>DS</td>\n",
" <td>2006.0</td>\n",
" <td>Platform</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Platform Year Genre GenreLabel\n",
"1 Super Mario Bros. NES 1985.0 Platform 4\n",
"2 Mario Kart Wii Wii 2008.0 Racing 6\n",
"3 Wii Sports Resort Wii 2009.0 Sports 10\n",
"4 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing 7\n",
"5 Tetris GB 1989.0 Puzzle 5\n",
"6 New Super Mario Bros. DS 2006.0 Platform 4"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vg_df['GenreLabel'] = genre_labels # 赋值到一列\n",
"vg_df[['Name', 'Platform', 'Year', 'Genre', 'GenreLabel']].iloc[1:7]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Map\n",
"自己建一个字典"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'Action': 0,\n",
" 'Adventure': 1,\n",
" 'Fighting': 2,\n",
" 'Misc': 3,\n",
" 'Platform': 4,\n",
" 'Puzzle': 5,\n",
" 'Racing': 6,\n",
" 'Role-Playing': 7,\n",
" 'Shooter': 8,\n",
" 'Simulation': 9,\n",
" 'Sports': 10,\n",
" 'Strategy': 11}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gen_ord_map = {label:index for index, label in enumerate(gle.classes_)}\n",
"gen_ord_map"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Genre</th>\n",
" <th>GenreLabel</th>\n",
" <th>GenreMap</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Super Mario Bros.</td>\n",
" <td>Platform</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mario Kart Wii</td>\n",
" <td>Racing</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Wii Sports Resort</td>\n",
" <td>Sports</td>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pokemon Red/Pokemon Blue</td>\n",
" <td>Role-Playing</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Tetris</td>\n",
" <td>Puzzle</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>New Super Mario Bros.</td>\n",
" <td>Platform</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Genre GenreLabel GenreMap\n",
"1 Super Mario Bros. Platform 4 4\n",
"2 Mario Kart Wii Racing 6 6\n",
"3 Wii Sports Resort Sports 10 10\n",
"4 Pokemon Red/Pokemon Blue Role-Playing 7 7\n",
"5 Tetris Puzzle 5 5\n",
"6 New Super Mario Bros. Platform 4 4"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vg_df['GenreMap'] = vg_df['Genre'].map(gen_ord_map)\n",
"vg_df[['Name', 'Genre', 'GenreLabel', 'GenreMap']].iloc[1:7] # 结果呈现我们设置的map"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## One-Hot Encoder\n",
"对于离散型特征基于树的方法是不需要使用one-hot编码的例如随机森林等。基于距离的模型都是要使用one-hot编码例如神经网络等。"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:415: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
"If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
"In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
" warnings.warn(msg, FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"array([[0., 0., 0., ..., 0., 1., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" ...,\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.]])"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import OneHotEncoder\n",
"# 获取onehot后的结果将字符串变成多列的0/1值有则为1无则为0\n",
"gen_ohe = OneHotEncoder()\n",
"gen_feature_arr = gen_ohe.fit_transform(vg_df[['GenreLabel']]).toarray()\n",
"gen_feature_arr"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Action</th>\n",
" <th>Adventure</th>\n",
" <th>Fighting</th>\n",
" <th>Misc</th>\n",
" <th>Platform</th>\n",
" <th>Puzzle</th>\n",
" <th>Racing</th>\n",
" <th>Role-Playing</th>\n",
" <th>Shooter</th>\n",
" <th>Simulation</th>\n",
" <th>Sports</th>\n",
" <th>Strategy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Action Adventure Fighting Misc Platform Puzzle Racing Role-Playing \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n",
"\n",
" Shooter Simulation Sports Strategy \n",
"0 0.0 0.0 1.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 1.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"genres = np.unique(vg_df['Genre']) # 获取全部不同的字符串\n",
"gen_features = pd.DataFrame(gen_feature_arr, columns=genres) # 将字符串作为列合并onehot数据\n",
"gen_features.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Genre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Wii Sports</td>\n",
" <td>Sports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Super Mario Bros.</td>\n",
" <td>Platform</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mario Kart Wii</td>\n",
" <td>Racing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Wii Sports Resort</td>\n",
" <td>Sports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pokemon Red/Pokemon Blue</td>\n",
" <td>Role-Playing</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Genre\n",
"0 Wii Sports Sports\n",
"1 Super Mario Bros. Platform\n",
"2 Mario Kart Wii Racing\n",
"3 Wii Sports Resort Sports\n",
"4 Pokemon Red/Pokemon Blue Role-Playing"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 拿出两列原本的数据,实际场景中是全部数据合并,这里是为了查看方便\n",
"vg_df_2 = vg_df[['Name', 'Genre']]\n",
"vg_df_2.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Genre</th>\n",
" <th>Action</th>\n",
" <th>Adventure</th>\n",
" <th>Fighting</th>\n",
" <th>Misc</th>\n",
" <th>Platform</th>\n",
" <th>Puzzle</th>\n",
" <th>Racing</th>\n",
" <th>Role-Playing</th>\n",
" <th>Shooter</th>\n",
" <th>Simulation</th>\n",
" <th>Sports</th>\n",
" <th>Strategy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Wii Sports</td>\n",
" <td>Sports</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Super Mario Bros.</td>\n",
" <td>Platform</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mario Kart Wii</td>\n",
" <td>Racing</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Wii Sports Resort</td>\n",
" <td>Sports</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pokemon Red/Pokemon Blue</td>\n",
" <td>Role-Playing</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Genre Action Adventure Fighting Misc \\\n",
"0 Wii Sports Sports 0.0 0.0 0.0 0.0 \n",
"1 Super Mario Bros. Platform 0.0 0.0 0.0 0.0 \n",
"2 Mario Kart Wii Racing 0.0 0.0 0.0 0.0 \n",
"3 Wii Sports Resort Sports 0.0 0.0 0.0 0.0 \n",
"4 Pokemon Red/Pokemon Blue Role-Playing 0.0 0.0 0.0 0.0 \n",
"\n",
" Platform Puzzle Racing Role-Playing Shooter Simulation Sports \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n",
"1 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n",
"4 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n",
"\n",
" Strategy \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 0.0 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vg_df_ohe = pd.concat([vg_df_2,gen_features],axis=1) # 两个数据合并\n",
"vg_df_ohe.head() # 可以看到Platform列第二行为1对应着Genre列第二行是Platform字符串"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading…
Cancel
Save