|
|
@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"### 背景描述\n",
|
|
|
|
|
|
|
|
"当一个新用户进来时,系统不知道推荐什么,可以从用户看什么来进行相关性的推荐,比如靠近交通、景区等,又或者是含早餐、有电梯等特殊的,这里怎么基于不同酒店的相似度来进行推荐的。\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"#### 基于酒店的文本描述来推荐相似酒店"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
|
|
"D:\\Anaconda3\\lib\\importlib\\_bootstrap.py:219: RuntimeWarning:\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"D:\\Anaconda3\\lib\\importlib\\_bootstrap.py:219: RuntimeWarning:\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
|
|
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
|
|
"text/html": [
|
|
|
|
|
|
|
|
" <script type=\"text/javascript\">\n",
|
|
|
|
|
|
|
|
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
|
|
|
|
|
|
|
|
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
|
|
|
|
|
|
|
" if (typeof require !== 'undefined') {\n",
|
|
|
|
|
|
|
|
" require.undef(\"plotly\");\n",
|
|
|
|
|
|
|
|
" requirejs.config({\n",
|
|
|
|
|
|
|
|
" paths: {\n",
|
|
|
|
|
|
|
|
" 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
" });\n",
|
|
|
|
|
|
|
|
" require(['plotly'], function(Plotly) {\n",
|
|
|
|
|
|
|
|
" window._Plotly = Plotly;\n",
|
|
|
|
|
|
|
|
" });\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
" </script>\n",
|
|
|
|
|
|
|
|
" "
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
|
|
"from nltk.corpus import stopwords\n",
|
|
|
|
|
|
|
|
"from sklearn.metrics.pairwise import linear_kernel\n",
|
|
|
|
|
|
|
|
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
|
|
|
|
|
|
|
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
|
|
|
|
|
|
|
"import re\n",
|
|
|
|
|
|
|
|
"import random\n",
|
|
|
|
|
|
|
|
"import cufflinks # pip install cufflinks\n",
|
|
|
|
|
|
|
|
"from plotly.offline import iplot\n",
|
|
|
|
|
|
|
|
"cufflinks.go_offline()"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 4,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
|
|
"text/html": [
|
|
|
|
|
|
|
|
"<div>\n",
|
|
|
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
"</style>\n",
|
|
|
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
|
|
|
" <th>name</th>\n",
|
|
|
|
|
|
|
|
" <th>address</th>\n",
|
|
|
|
|
|
|
|
" <th>desc</th>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
|
|
|
" <td>Hilton Garden Seattle Downtown</td>\n",
|
|
|
|
|
|
|
|
" <td>1821 Boren Avenue, Seattle Washington 98101 USA</td>\n",
|
|
|
|
|
|
|
|
" <td>Located on the southern tip of Lake Union, the...</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
|
|
|
" <td>Sheraton Grand Seattle</td>\n",
|
|
|
|
|
|
|
|
" <td>1400 6th Avenue, Seattle, Washington 98101 USA</td>\n",
|
|
|
|
|
|
|
|
" <td>Located in the city's vibrant core, the Sherat...</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
|
|
|
" <td>Crowne Plaza Seattle Downtown</td>\n",
|
|
|
|
|
|
|
|
" <td>1113 6th Ave, Seattle, WA 98101</td>\n",
|
|
|
|
|
|
|
|
" <td>Located in the heart of downtown Seattle, the ...</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
|
|
|
" <td>Kimpton Hotel Monaco Seattle</td>\n",
|
|
|
|
|
|
|
|
" <td>1101 4th Ave, Seattle, WA98101</td>\n",
|
|
|
|
|
|
|
|
" <td>What?s near our hotel downtown Seattle locatio...</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
|
|
|
" <td>The Westin Seattle</td>\n",
|
|
|
|
|
|
|
|
" <td>1900 5th Avenue, Seattle, Washington 98101 USA</td>\n",
|
|
|
|
|
|
|
|
" <td>Situated amid incredible shopping and iconic a...</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
|
|
|
"</table>\n",
|
|
|
|
|
|
|
|
"</div>"
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
|
|
" name \\\n",
|
|
|
|
|
|
|
|
"0 Hilton Garden Seattle Downtown \n",
|
|
|
|
|
|
|
|
"1 Sheraton Grand Seattle \n",
|
|
|
|
|
|
|
|
"2 Crowne Plaza Seattle Downtown \n",
|
|
|
|
|
|
|
|
"3 Kimpton Hotel Monaco Seattle \n",
|
|
|
|
|
|
|
|
"4 The Westin Seattle \n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" address \\\n",
|
|
|
|
|
|
|
|
"0 1821 Boren Avenue, Seattle Washington 98101 USA \n",
|
|
|
|
|
|
|
|
"1 1400 6th Avenue, Seattle, Washington 98101 USA \n",
|
|
|
|
|
|
|
|
"2 1113 6th Ave, Seattle, WA 98101 \n",
|
|
|
|
|
|
|
|
"3 1101 4th Ave, Seattle, WA98101 \n",
|
|
|
|
|
|
|
|
"4 1900 5th Avenue, Seattle, Washington 98101 USA \n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" desc \n",
|
|
|
|
|
|
|
|
"0 Located on the southern tip of Lake Union, the... \n",
|
|
|
|
|
|
|
|
"1 Located in the city's vibrant core, the Sherat... \n",
|
|
|
|
|
|
|
|
"2 Located in the heart of downtown Seattle, the ... \n",
|
|
|
|
|
|
|
|
"3 What?s near our hotel downtown Seattle locatio... \n",
|
|
|
|
|
|
|
|
"4 Situated amid incredible shopping and iconic a... "
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"execution_count": 4,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"df = pd.read_csv(\"data/Seattle_Hotels.csv\", encoding=\"latin-1\")\n",
|
|
|
|
|
|
|
|
"df.head()"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"上面分别是酒店名字、地址及描述"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
|
|
"(152, 3)"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"df.shape"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
|
|
"\"Located on the southern tip of Lake Union, the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure. \\nThe neighborhood is home to numerous major international companies including Amazon, Google and the Bill & Melinda Gates Foundation. A wealth of eclectic restaurants and bars make this area of Seattle one of the most sought out by locals and visitors. Our proximity to Lake Union allows visitors to take in some of the Pacific Northwest's majestic scenery and enjoy outdoor activities like kayaking and sailing. over 2,000 sq. ft. of versatile space and a complimentary business center. State-of-the-art A/V technology and our helpful staff will guarantee your conference, cocktail reception or wedding is a success. Refresh in the sparkling saltwater pool, or energize with the latest equipment in the 24-hour fitness center. Tastefully decorated and flooded with natural light, our guest rooms and suites offer everything you need to relax and stay productive. Unwind in the bar, and enjoy American cuisine for breakfast, lunch and dinner in our restaurant. The 24-hour Pavilion Pantry? stocks a variety of snacks, drinks and sundries.\""
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"df['desc'][0] # 查看酒店描述的个例"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": []
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
|
|
|
"version": "3.7.3"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
|
|
|
}
|