You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3615 lines
130 KiB
3615 lines
130 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import scipy.stats as stats\n",
|
|
"from pathlib import Path\n",
|
|
"import glob\n",
|
|
"import pickle\n",
|
|
"\n",
|
|
"import random\n",
|
|
"import os\n",
|
|
"\n",
|
|
"from sklearn.model_selection import StratifiedKFold\n",
|
|
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
|
"from tqdm import tqdm\n",
|
|
"import tensorflow as tf\n",
|
|
"import tensorflow.keras.layers as L\n",
|
|
"import tensorflow.keras.models as M\n",
|
|
"import tensorflow.keras.backend as K\n",
|
|
"import tensorflow_addons as tfa\n",
|
|
"from tensorflow_addons.layers import WeightNormalization\n",
|
|
"from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping\n",
|
|
"pd.options.mode.chained_assignment = None\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from tensorflow.python.client import device_lib\n",
|
|
"def get_available_gpus():\n",
|
|
" local_device_protos = device_lib.list_local_devices()\n",
|
|
" return [x.name for x in local_device_protos if x.device_type == 'GPU']\n",
|
|
"print(get_available_gpus())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>site_path_timestamp</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>ts_waypoint</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>5a0546857ecc773753327266_046cfa46be49fc1083481...</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>046cfa46be49fc10834815c6</td>\n",
|
|
" <td>9</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>5a0546857ecc773753327266_046cfa46be49fc1083481...</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>046cfa46be49fc10834815c6</td>\n",
|
|
" <td>9017</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>5a0546857ecc773753327266_046cfa46be49fc1083481...</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>046cfa46be49fc10834815c6</td>\n",
|
|
" <td>15326</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5a0546857ecc773753327266_046cfa46be49fc1083481...</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>046cfa46be49fc10834815c6</td>\n",
|
|
" <td>18763</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5a0546857ecc773753327266_046cfa46be49fc1083481...</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>046cfa46be49fc10834815c6</td>\n",
|
|
" <td>22328</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" site_path_timestamp \\\n",
|
|
"0 5a0546857ecc773753327266_046cfa46be49fc1083481... \n",
|
|
"1 5a0546857ecc773753327266_046cfa46be49fc1083481... \n",
|
|
"2 5a0546857ecc773753327266_046cfa46be49fc1083481... \n",
|
|
"3 5a0546857ecc773753327266_046cfa46be49fc1083481... \n",
|
|
"4 5a0546857ecc773753327266_046cfa46be49fc1083481... \n",
|
|
"\n",
|
|
" site path ts_waypoint \n",
|
|
"0 5a0546857ecc773753327266 046cfa46be49fc10834815c6 9 \n",
|
|
"1 5a0546857ecc773753327266 046cfa46be49fc10834815c6 9017 \n",
|
|
"2 5a0546857ecc773753327266 046cfa46be49fc10834815c6 15326 \n",
|
|
"3 5a0546857ecc773753327266 046cfa46be49fc10834815c6 18763 \n",
|
|
"4 5a0546857ecc773753327266 046cfa46be49fc10834815c6 22328 "
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"# PATH = '../input/indoor-location-navigation'\n",
|
|
"# test_files = glob.glob(f'{PATH}/test/*.txt')\n",
|
|
"# test_files_pd = [xx.split('/')[-1:][0].replace('.txt','') for xx in test_files]\n",
|
|
"# test_files_pd = pd.DataFrame(test_files_pd)\n",
|
|
"# test_files_pd.columns = ['path']\n",
|
|
"\n",
|
|
"sample_submission = pd.read_csv(\"../input/indoor-location-navigation/sample_submission.csv\")\n",
|
|
"sample_submission['site'] = [xx.split('_')[0] for xx in sample_submission.site_path_timestamp]\n",
|
|
"sample_submission['path'] = [xx.split('_')[1] for xx in sample_submission.site_path_timestamp]\n",
|
|
"sample_submission['ts_waypoint'] = [int(xx.split('_')[2]) for xx in sample_submission.site_path_timestamp]\n",
|
|
"del sample_submission['floor']\n",
|
|
"del sample_submission['x']\n",
|
|
"del sample_submission['y']\n",
|
|
"\n",
|
|
"path2site = dict(zip(sample_submission.path,sample_submission.site))\n",
|
|
"sample_submission.head()\n",
|
|
"# test_path_site = sample_submission[['site','path','timestamp','site_path_timestamp']]\n",
|
|
"# test_files_pd = pd.merge(test_files_pd,test_path_site,how='left',on='path')\n",
|
|
"# test_files_pd.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_wifi_files = glob.glob(f'../input/wifi_lbl_encode/test/*.txt')\n",
|
|
"\n",
|
|
"# train_files = glob.glob('../input/indoor-navigation-and-location-wifi-features-alldata/*train.csv') #if A \n",
|
|
"train_files = glob.glob('../input/data_abstract/*_train_waypoint_all.csv')#if B\n",
|
|
"\n",
|
|
" \n",
|
|
"train_wifi_files = glob.glob(f'../input/wifi_lbl_encode/train/*/*/*.txt')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['../input/data_abstract/5a0546857ecc773753327266_train_waypoint_all.csv',\n",
|
|
" '../input/data_abstract/5c3c44b80379370013e0fd2b_train_waypoint_all.csv']"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_files[:2]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"len train site list: 24\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# train_site_list = [xx.split('/')[-1].replace('_train.csv','') for xx in train_files] #if A \n",
|
|
"# train_site_list = [xx.split('/')[-1].replace('_train_waypoint_all.csv','') for xx in train_files] #if B 204\n",
|
|
"train_site_list = list(sample_submission.site.unique()) # if B 24\n",
|
|
"train_wifi_files = [xx for xx in train_wifi_files if xx.split('/')[-3] in train_site_list]\n",
|
|
"print('len train site list:',len(train_site_list))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"10877"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(train_wifi_files)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 11503/11503 [01:02<00:00, 184.06it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"ssidlist = set()\n",
|
|
"bssidlist = set()\n",
|
|
"for filename in tqdm(train_wifi_files+test_wifi_files):\n",
|
|
" tmp = pd.read_csv(filename)\n",
|
|
" ssidlist = ssidlist|set(tmp.ssid)\n",
|
|
" bssidlist = bssidlist|set(tmp.bssid)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(20044, 65952)"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(set(ssidlist)),len(set(bssidlist))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"seqlen = 100"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ssiddict = dict(zip(list(ssidlist)+['empty'],range(len(ssidlist)+1)))\n",
|
|
"bssiddict = dict(zip(list(bssidlist)+['empty'],range(len(bssidlist)+1)))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 10877/10877 [00:42<00:00, 254.02it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"train_wifi_pd_csv = []\n",
|
|
"for filename in tqdm(train_wifi_files):\n",
|
|
" tmp = pd.read_csv(filename)\n",
|
|
" tmp['path'] = filename.split('/')[-1].replace('.txt','')\n",
|
|
" tmp['floor'] = filename.split('/')[-2]\n",
|
|
" tmp['site'] = filename.split('/')[-3]\n",
|
|
" train_wifi_pd_csv.append(tmp)\n",
|
|
"train_wifi_pd_csv = pd.concat(train_wifi_pd_csv).reset_index(drop=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"floor_map = {\"B3\":-3,\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
|
|
" \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}\n",
|
|
"train_wifi_pd_csv = train_wifi_pd_csv[train_wifi_pd_csv.floor.isin(floor_map)].reset_index(drop=True)\n",
|
|
"train_wifi_pd_csv['floorNo'] = train_wifi_pd_csv['floor'].apply(lambda x: floor_map[x])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>last_timestamp</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floor</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1578462618826</td>\n",
|
|
" <td>63159</td>\n",
|
|
" <td>162932</td>\n",
|
|
" <td>-46</td>\n",
|
|
" <td>1578462603277</td>\n",
|
|
" <td>5e15730aa280850006f3d005</td>\n",
|
|
" <td>B1</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>-1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1578462618826</td>\n",
|
|
" <td>32835</td>\n",
|
|
" <td>65513</td>\n",
|
|
" <td>-49</td>\n",
|
|
" <td>1578462618272</td>\n",
|
|
" <td>5e15730aa280850006f3d005</td>\n",
|
|
" <td>B1</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>-1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid bssid rssi last_timestamp \\\n",
|
|
"0 1578462618826 63159 162932 -46 1578462603277 \n",
|
|
"1 1578462618826 32835 65513 -49 1578462618272 \n",
|
|
"\n",
|
|
" path floor site floorNo \n",
|
|
"0 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1 \n",
|
|
"1 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1 "
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_wifi_pd_csv.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 626/626 [00:02<00:00, 208.96it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"test_wifi_pd_csv = []\n",
|
|
"for filename in tqdm(test_wifi_files):\n",
|
|
" tmp = pd.read_csv(filename)\n",
|
|
" tmp['path'] = filename.split('/')[-1].replace('.txt','')\n",
|
|
" test_wifi_pd_csv.append(tmp)\n",
|
|
"test_wifi_pd_csv = pd.concat(test_wifi_pd_csv).reset_index(drop=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>last_timestamp</th>\n",
|
|
" <th>path</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1961</td>\n",
|
|
" <td>70537</td>\n",
|
|
" <td>28318</td>\n",
|
|
" <td>-34</td>\n",
|
|
" <td>1571828560156</td>\n",
|
|
" <td>14f45baa63b4d3a700126af6</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1961</td>\n",
|
|
" <td>43838</td>\n",
|
|
" <td>93116</td>\n",
|
|
" <td>-35</td>\n",
|
|
" <td>1571828560159</td>\n",
|
|
" <td>14f45baa63b4d3a700126af6</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid bssid rssi last_timestamp path\n",
|
|
"0 1961 70537 28318 -34 1571828560156 14f45baa63b4d3a700126af6\n",
|
|
"1 1961 43838 93116 -35 1571828560159 14f45baa63b4d3a700126af6"
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_wifi_pd_csv.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"submission = pd.read_csv('submission_floor.csv')\n",
|
|
"submission['path'] = [xx.split('_')[1] for xx in submission['site_path_timestamp']]\n",
|
|
"test_path_floor_dict = dict(zip(submission.path,submission.floor))\n",
|
|
"test_wifi_pd_csv['floorNo'] = [test_path_floor_dict[xx] for xx in test_wifi_pd_csv['path']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"ss = StandardScaler()\n",
|
|
"ss.fit(train_wifi_pd_csv.loc[:,['rssi','floorNo']])\n",
|
|
"train_wifi_pd_csv.loc[:,['rssi','floorNo']] = ss.transform(train_wifi_pd_csv.loc[:,['rssi','floorNo']])\n",
|
|
"test_wifi_pd_csv.loc[:,['rssi','floorNo']] = ss.transform(test_wifi_pd_csv.loc[:,['rssi','floorNo']])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>last_timestamp</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floor</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1578462618826</td>\n",
|
|
" <td>63159</td>\n",
|
|
" <td>162932</td>\n",
|
|
" <td>3.105926</td>\n",
|
|
" <td>1578462603277</td>\n",
|
|
" <td>5e15730aa280850006f3d005</td>\n",
|
|
" <td>B1</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>-1.340327</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1578462618826</td>\n",
|
|
" <td>32835</td>\n",
|
|
" <td>65513</td>\n",
|
|
" <td>2.810727</td>\n",
|
|
" <td>1578462618272</td>\n",
|
|
" <td>5e15730aa280850006f3d005</td>\n",
|
|
" <td>B1</td>\n",
|
|
" <td>5a0546857ecc773753327266</td>\n",
|
|
" <td>-1.340327</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid bssid rssi last_timestamp \\\n",
|
|
"0 1578462618826 63159 162932 3.105926 1578462603277 \n",
|
|
"1 1578462618826 32835 65513 2.810727 1578462618272 \n",
|
|
"\n",
|
|
" path floor site floorNo \n",
|
|
"0 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1.340327 \n",
|
|
"1 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1.340327 "
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_wifi_pd_csv.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 10877/10877 [02:51<00:00, 63.43it/s] \n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>floor</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1560500997770</td>\n",
|
|
" <td>[7702, 19396, 18304, 19396, 7702, 7702, 19396,...</td>\n",
|
|
" <td>[61027, 55262, 10121, 57287, 45809, 53865, 261...</td>\n",
|
|
" <td>[3.204325463643926, 3.1059258532748903, 2.9091...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.206</td>\n",
|
|
" <td>0.353603</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.088208</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1560500999681</td>\n",
|
|
" <td>[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...</td>\n",
|
|
" <td>[10121, 31140, 61027, 55262, 57287, 53865, 458...</td>\n",
|
|
" <td>[2.712327411798748, 2.712327411798748, 2.61392...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.220</td>\n",
|
|
" <td>0.299748</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.040317</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n",
|
|
"1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n",
|
|
"1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [3.204325463643926, 3.1059258532748903, 2.9091... \n",
|
|
"1 [2.712327411798748, 2.712327411798748, 2.61392... \n",
|
|
"\n",
|
|
" path floorNo floor site \\\n",
|
|
"0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"\n",
|
|
" wifi_len wifi_mean wifi_median wifi_std \n",
|
|
"0 0.206 0.353603 0.350737 1.088208 \n",
|
|
"1 0.220 0.299748 0.350737 1.040317 "
|
|
]
|
|
},
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_wifi_pd = []\n",
|
|
"for path,tmp in tqdm(train_wifi_pd_csv.groupby('path')):\n",
|
|
" #tmp = pd.read_csv(filename)\n",
|
|
" #tmp['rssi'] = tmp['rssi']/999\n",
|
|
" tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])\n",
|
|
" tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])\n",
|
|
" ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \\\n",
|
|
" list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) \n",
|
|
" ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \\\n",
|
|
" list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))\n",
|
|
" ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \\\n",
|
|
" list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))\n",
|
|
" \n",
|
|
" ss = pd.concat([ss1,ss2,ss3],axis=1)\n",
|
|
" ss['path'] = tmp.path.unique()[0]\n",
|
|
" ss['floorNo'] = tmp.floorNo.unique()[0]\n",
|
|
" ss['floor'] = tmp.floor.unique()[0]\n",
|
|
" ss['site'] = tmp.site.unique()[0]\n",
|
|
" ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500\n",
|
|
" ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()\n",
|
|
" ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()\n",
|
|
" ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()\n",
|
|
"\n",
|
|
" train_wifi_pd.append(ss)\n",
|
|
"train_wifi_pd = pd.concat(train_wifi_pd)\n",
|
|
"train_wifi_pd = train_wifi_pd.reset_index()\n",
|
|
"train_wifi_pd.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 626/626 [00:14<00:00, 41.79it/s]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>site</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1180</td>\n",
|
|
" <td>[7007, 9522, 15215, 18669, 15215, 19396, 4851,...</td>\n",
|
|
" <td>[35106, 10783, 39335, 4531, 48757, 19211, 1176...</td>\n",
|
|
" <td>[1.9251305288464635, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.038</td>\n",
|
|
" <td>0.024464</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>1.033093</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>3048</td>\n",
|
|
" <td>[18669, 9522, 7007, 19396, 15215, 15215, 1264,...</td>\n",
|
|
" <td>[4531, 10783, 35106, 19211, 39335, 48757, 6030...</td>\n",
|
|
" <td>[2.1219297495845346, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.040</td>\n",
|
|
" <td>0.075218</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>0.991529</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>4924</td>\n",
|
|
" <td>[9522, 18669, 7007, 19396, 15215, 4851, 15215,...</td>\n",
|
|
" <td>[10783, 4531, 35106, 19211, 48757, 11767, 3933...</td>\n",
|
|
" <td>[1.4331324770012857, 1.2363332562632146, 1.039...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.048</td>\n",
|
|
" <td>-0.149461</td>\n",
|
|
" <td>-0.436460</td>\n",
|
|
" <td>0.815521</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>6816</td>\n",
|
|
" <td>[18669, 4851, 15215, 7007, 9522, 19396, 19396,...</td>\n",
|
|
" <td>[4531, 11767, 39335, 35106, 10783, 19211, 5710...</td>\n",
|
|
" <td>[1.826730918477428, 1.1379336458941791, 1.0395...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.052</td>\n",
|
|
" <td>-0.118554</td>\n",
|
|
" <td>-0.534860</td>\n",
|
|
" <td>0.911802</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>8693</td>\n",
|
|
" <td>[18669, 15215, 7007, 4851, 9522, 19396, 15215,...</td>\n",
|
|
" <td>[4531, 48757, 35106, 11767, 10783, 19211, 3933...</td>\n",
|
|
" <td>[2.1219297495845346, 1.3347328666322502, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.062</td>\n",
|
|
" <td>-0.182526</td>\n",
|
|
" <td>-0.534860</td>\n",
|
|
" <td>0.905339</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1180 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n",
|
|
"1 3048 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n",
|
|
"2 4924 [9522, 18669, 7007, 19396, 15215, 4851, 15215,... \n",
|
|
"3 6816 [18669, 4851, 15215, 7007, 9522, 19396, 19396,... \n",
|
|
"4 8693 [18669, 15215, 7007, 4851, 9522, 19396, 15215,... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n",
|
|
"1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n",
|
|
"2 [10783, 4531, 35106, 19211, 48757, 11767, 3933... \n",
|
|
"3 [4531, 11767, 39335, 35106, 10783, 19211, 5710... \n",
|
|
"4 [4531, 48757, 35106, 11767, 10783, 19211, 3933... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [1.9251305288464635, 1.4331324770012857, 1.334... \n",
|
|
"1 [2.1219297495845346, 1.4331324770012857, 1.334... \n",
|
|
"2 [1.4331324770012857, 1.2363332562632146, 1.039... \n",
|
|
"3 [1.826730918477428, 1.1379336458941791, 1.0395... \n",
|
|
"4 [2.1219297495845346, 1.3347328666322502, 1.334... \n",
|
|
"\n",
|
|
" path floorNo wifi_len wifi_mean wifi_median \\\n",
|
|
"0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n",
|
|
"1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n",
|
|
"2 00ff0c9a71cc37a2ebdd0f05 0.845957 0.048 -0.149461 -0.436460 \n",
|
|
"3 00ff0c9a71cc37a2ebdd0f05 0.845957 0.052 -0.118554 -0.534860 \n",
|
|
"4 00ff0c9a71cc37a2ebdd0f05 0.845957 0.062 -0.182526 -0.534860 \n",
|
|
"\n",
|
|
" wifi_std site \n",
|
|
"0 1.033093 5da1389e4db8ce0c98bd0547 \n",
|
|
"1 0.991529 5da1389e4db8ce0c98bd0547 \n",
|
|
"2 0.815521 5da1389e4db8ce0c98bd0547 \n",
|
|
"3 0.911802 5da1389e4db8ce0c98bd0547 \n",
|
|
"4 0.905339 5da1389e4db8ce0c98bd0547 "
|
|
]
|
|
},
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_wifi_pd = []\n",
|
|
"# for filename in tqdm(test_wifi_files):\n",
|
|
"for path,tmp in tqdm(test_wifi_pd_csv.groupby('path')):\n",
|
|
" #tmp = pd.read_csv(filename)\n",
|
|
" #tmp['rssi'] = tmp['rssi']/999\n",
|
|
" tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])\n",
|
|
" tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])\n",
|
|
" ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \\\n",
|
|
" list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) \n",
|
|
" ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \\\n",
|
|
" list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))\n",
|
|
" ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \\\n",
|
|
" list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))\n",
|
|
" ss = pd.concat([ss1,ss2,ss3],axis=1)\n",
|
|
" #ss['path'] = filename.split('/')[-1].replace('.txt','')\n",
|
|
" ss['path'] = tmp.path.unique()[0]\n",
|
|
" ss['floorNo'] = tmp.floorNo.unique()[0]\n",
|
|
" ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500\n",
|
|
" ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()\n",
|
|
" ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()\n",
|
|
" ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()\n",
|
|
"\n",
|
|
" test_wifi_pd.append(ss)\n",
|
|
"test_wifi_pd = pd.concat(test_wifi_pd)\n",
|
|
"test_wifi_pd = test_wifi_pd.reset_index()\n",
|
|
"test_wifi_pd['site'] = [path2site[xx] for xx in test_wifi_pd.path]\n",
|
|
"test_wifi_pd.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 204/204 [00:00<00:00, 263.20it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# filename = train_files[0]\n",
|
|
"train_xy = []\n",
|
|
"for filename in tqdm(train_files):\n",
|
|
" tmp = pd.read_csv(filename,index_col=0)\n",
|
|
" ss = tmp[['path','site','floor','ts_waypoint','x','y']]\n",
|
|
" train_xy.append(ss)\n",
|
|
"train_xy = pd.concat(train_xy).reset_index(drop=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(166681, 6)"
|
|
]
|
|
},
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_xy=train_xy.drop_duplicates()\n",
|
|
"train_xy.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# path = '5e15730aa280850006f3d005'\n",
|
|
"# train_wifi_pd_x = train_wifi_pd[train_wifi_pd.path==path]\n",
|
|
"# train_y = train_xy[train_xy.path==path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n",
|
|
"# if len(train_y)==0:\n",
|
|
"# print(path,'have no waypoint')\n",
|
|
"# if len(train_y)>0:\n",
|
|
"# ts_point_min = train_y.ts_waypoint.min()\n",
|
|
"# ts_point_max = train_y.ts_waypoint.max()\n",
|
|
"# tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n",
|
|
"# tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]\n",
|
|
"# if len(tmp2)>0:\n",
|
|
"# T_rel = train_y['ts_waypoint']\n",
|
|
"# T_ref = tmp2['timestamp']\n",
|
|
"# xy_hat = scipy.interpolate.interp1d(T_rel, train_y[['x','y']], axis=0)(T_ref)\n",
|
|
"# tmp2['x'] = xy_hat[:,0]\n",
|
|
"# tmp2['y'] = xy_hat[:,1]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 10877/10877 [03:16<00:00, 55.30it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import scipy.stats as stats\n",
|
|
"import scipy\n",
|
|
"train_all = []\n",
|
|
"\n",
|
|
"for path,train_wifi_pd_x in tqdm(train_wifi_pd.groupby('path')):\n",
|
|
" # path = '5e15730aa280850006f3d005'\n",
|
|
" train_y = train_xy[train_xy.path==path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n",
|
|
" train_wifi_pd_x['ts_waypoint'] = 0\n",
|
|
" if len(train_y)==0:\n",
|
|
" print(path,'have no waypoint')\n",
|
|
" if len(train_y)>0:\n",
|
|
" ts_point_min = train_y.ts_waypoint.min()\n",
|
|
" ts_point_max = train_y.ts_waypoint.max()\n",
|
|
" tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n",
|
|
" tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]\n",
|
|
" if len(tmp2)>0:\n",
|
|
" T_rel = train_y['ts_waypoint']\n",
|
|
" T_ref = tmp2['timestamp']\n",
|
|
" xy_hat = scipy.interpolate.interp1d(T_rel, train_y[['x','y']], axis=0)(T_ref)\n",
|
|
" tmp2['x'] = xy_hat[:,0]\n",
|
|
" tmp2['y'] = xy_hat[:,1]\n",
|
|
" tmp2['path'] = path\n",
|
|
" train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp2,how='left',on=['path','timestamp'])\n",
|
|
" train_all.append(train_wifi_pd_x)\n",
|
|
" \n",
|
|
"train_all = pd.concat(train_all).reset_index(drop=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(258097, 15)"
|
|
]
|
|
},
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_all.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# ###use nearest location\n",
|
|
"# train_all = []\n",
|
|
"\n",
|
|
"# for path,train_wifi_pd_x in tqdm(train_wifi_pd.groupby('path')):\n",
|
|
"# # path = '5e15730aa280850006f3d005'\n",
|
|
"# train_y = train_xy[train_xy.path==path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n",
|
|
"# train_wifi_pd_x['ts_waypoint'] = 0\n",
|
|
"# if len(train_y)==0:\n",
|
|
"# print(path,'have no waypoint')\n",
|
|
"# if len(train_y)>0:\n",
|
|
"# timestamplist = np.array(train_y.ts_waypoint)\n",
|
|
"# for ii in train_wifi_pd_x.index:\n",
|
|
"# distlist = np.abs(timestamplist-train_wifi_pd_x.loc[ii,'timestamp'])\n",
|
|
"# nearest_wp_index = np.argmin(distlist)\n",
|
|
"# train_wifi_pd_x.loc[ii,'ts_waypoint'] = int(timestamplist[nearest_wp_index])\n",
|
|
"# train_wifi_pd_x = pd.merge(train_wifi_pd_x,train_y,how='left',on=['path','ts_waypoint'])\n",
|
|
"# train_all.append(train_wifi_pd_x)\n",
|
|
" \n",
|
|
"# train_all = pd.concat(train_all).reset_index(drop=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"((11756, 15), (11756, 15))"
|
|
]
|
|
},
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_all[train_all.x.isna()].shape,train_all[train_all.y.isna()].shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_all = train_all[~train_all.x.isna()].reset_index(drop=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>floor</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>ts_waypoint</th>\n",
|
|
" <th>x</th>\n",
|
|
" <th>y</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1560500997770</td>\n",
|
|
" <td>[7702, 19396, 18304, 19396, 7702, 7702, 19396,...</td>\n",
|
|
" <td>[61027, 55262, 10121, 57287, 45809, 53865, 261...</td>\n",
|
|
" <td>[3.204325463643926, 3.1059258532748903, 2.9091...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.206</td>\n",
|
|
" <td>0.353603</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.088208</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>195.790623</td>\n",
|
|
" <td>93.465301</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1560500999681</td>\n",
|
|
" <td>[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...</td>\n",
|
|
" <td>[10121, 31140, 61027, 55262, 57287, 53865, 458...</td>\n",
|
|
" <td>[2.712327411798748, 2.712327411798748, 2.61392...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.220</td>\n",
|
|
" <td>0.299748</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.040317</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>193.591333</td>\n",
|
|
" <td>92.973266</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1560501001590</td>\n",
|
|
" <td>[18304, 19396, 7702, 7702, 19396, 7702, 12721,...</td>\n",
|
|
" <td>[10121, 57287, 31140, 61027, 55262, 22353, 603...</td>\n",
|
|
" <td>[3.1059258532748903, 3.1059258532748903, 2.810...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.238</td>\n",
|
|
" <td>0.268875</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.046341</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>191.394344</td>\n",
|
|
" <td>92.481745</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1560501003516</td>\n",
|
|
" <td>[19396, 7702, 19396, 18304, 7702, 7702, 7702, ...</td>\n",
|
|
" <td>[57287, 31140, 55262, 10121, 22353, 53865, 432...</td>\n",
|
|
" <td>[3.1059258532748903, 2.8107270221677836, 2.613...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.258</td>\n",
|
|
" <td>0.230216</td>\n",
|
|
" <td>0.252337</td>\n",
|
|
" <td>0.995631</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>189.177791</td>\n",
|
|
" <td>91.985848</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1560501005442</td>\n",
|
|
" <td>[7702, 18304, 19396, 19396, 7702, 7702, 7702, ...</td>\n",
|
|
" <td>[31140, 10121, 55262, 57287, 43265, 61027, 612...</td>\n",
|
|
" <td>[2.8107270221677836, 2.6139278014297127, 2.613...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.282</td>\n",
|
|
" <td>0.210465</td>\n",
|
|
" <td>0.252337</td>\n",
|
|
" <td>0.963630</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>186.961238</td>\n",
|
|
" <td>91.489950</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n",
|
|
"1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n",
|
|
"2 1560501001590 [18304, 19396, 7702, 7702, 19396, 7702, 12721,... \n",
|
|
"3 1560501003516 [19396, 7702, 19396, 18304, 7702, 7702, 7702, ... \n",
|
|
"4 1560501005442 [7702, 18304, 19396, 19396, 7702, 7702, 7702, ... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n",
|
|
"1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n",
|
|
"2 [10121, 57287, 31140, 61027, 55262, 22353, 603... \n",
|
|
"3 [57287, 31140, 55262, 10121, 22353, 53865, 432... \n",
|
|
"4 [31140, 10121, 55262, 57287, 43265, 61027, 612... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [3.204325463643926, 3.1059258532748903, 2.9091... \n",
|
|
"1 [2.712327411798748, 2.712327411798748, 2.61392... \n",
|
|
"2 [3.1059258532748903, 3.1059258532748903, 2.810... \n",
|
|
"3 [3.1059258532748903, 2.8107270221677836, 2.613... \n",
|
|
"4 [2.8107270221677836, 2.6139278014297127, 2.613... \n",
|
|
"\n",
|
|
" path floorNo floor site \\\n",
|
|
"0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"2 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"3 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"4 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"\n",
|
|
" wifi_len wifi_mean wifi_median wifi_std ts_waypoint x \\\n",
|
|
"0 0.206 0.353603 0.350737 1.088208 0 195.790623 \n",
|
|
"1 0.220 0.299748 0.350737 1.040317 0 193.591333 \n",
|
|
"2 0.238 0.268875 0.350737 1.046341 0 191.394344 \n",
|
|
"3 0.258 0.230216 0.252337 0.995631 0 189.177791 \n",
|
|
"4 0.282 0.210465 0.252337 0.963630 0 186.961238 \n",
|
|
"\n",
|
|
" y \n",
|
|
"0 93.465301 \n",
|
|
"1 92.973266 \n",
|
|
"2 92.481745 \n",
|
|
"3 91.985848 \n",
|
|
"4 91.489950 "
|
|
]
|
|
},
|
|
"execution_count": 30,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_all.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(246341, 15)"
|
|
]
|
|
},
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_all.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# from sklearn.model_selection import StratifiedKFold\n",
|
|
"# from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
|
"# N_SPLITS = 10\n",
|
|
"# SEED = 42\n",
|
|
"# for fold, (trn_idx, val_idx) in enumerate(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED).split(train_all['site'], train_all['site'])):\n",
|
|
"# train_all.loc[val_idx, 'fold'] = fold\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import KFold\n",
|
|
"N_SPLITS = 10\n",
|
|
"\n",
|
|
"path_list = train_all['path'].unique()\n",
|
|
"folds = KFold(n_splits=N_SPLITS, shuffle=True, random_state=1024) \n",
|
|
"for n_fold, (train_idx, valid_idx) in enumerate(folds.split(path_list), start=0):\n",
|
|
" train_all.loc[train_all['path'].isin(path_list[valid_idx]), 'fold'] = n_fold"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# train_all[train_all.path=='5dd3824044333f00067aa2c4'].fold.value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# train_all[train_all.site=='5c3c44b80379370013e0fd2b'].fold.value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>floor</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>ts_waypoint</th>\n",
|
|
" <th>x</th>\n",
|
|
" <th>y</th>\n",
|
|
" <th>fold</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1560500997770</td>\n",
|
|
" <td>[7702, 19396, 18304, 19396, 7702, 7702, 19396,...</td>\n",
|
|
" <td>[61027, 55262, 10121, 57287, 45809, 53865, 261...</td>\n",
|
|
" <td>[3.204325463643926, 3.1059258532748903, 2.9091...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.206</td>\n",
|
|
" <td>0.353603</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.088208</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>195.790623</td>\n",
|
|
" <td>93.465301</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1560500999681</td>\n",
|
|
" <td>[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...</td>\n",
|
|
" <td>[10121, 31140, 61027, 55262, 57287, 53865, 458...</td>\n",
|
|
" <td>[2.712327411798748, 2.712327411798748, 2.61392...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.220</td>\n",
|
|
" <td>0.299748</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.040317</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>193.591333</td>\n",
|
|
" <td>92.973266</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n",
|
|
"1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n",
|
|
"1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [3.204325463643926, 3.1059258532748903, 2.9091... \n",
|
|
"1 [2.712327411798748, 2.712327411798748, 2.61392... \n",
|
|
"\n",
|
|
" path floorNo floor site \\\n",
|
|
"0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"\n",
|
|
" wifi_len wifi_mean wifi_median wifi_std ts_waypoint x \\\n",
|
|
"0 0.206 0.353603 0.350737 1.088208 0 195.790623 \n",
|
|
"1 0.220 0.299748 0.350737 1.040317 0 193.591333 \n",
|
|
"\n",
|
|
" y fold \n",
|
|
"0 93.465301 6.0 \n",
|
|
"1 92.973266 6.0 "
|
|
]
|
|
},
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_all.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# train_all['length'] = [len(xx) for xx in train_all['bssid']]\n",
|
|
"# del train_all['length']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# tmp1 = train_all[['x','y']].values\n",
|
|
"# tmp1 = pd.DataFrame(list(zip(tmp1)),columns = ['xy'])\n",
|
|
"# train_all = pd.concat([train_all,tmp1],axis=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# train_all_timestamp_min = train_all.timestamp.min()\n",
|
|
"# train_all_timestamp_max = train_all.timestamp.max()\n",
|
|
"# train_all['timestamp'] = (train_all['timestamp']-train_all_timestamp_min)/(train_all_timestamp_max-train_all_timestamp_min)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
|
|
"# \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}\n",
|
|
"# train_all['floor'] = train_all['floor'].apply(lambda x: floor_map[x])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>floor</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>ts_waypoint</th>\n",
|
|
" <th>x</th>\n",
|
|
" <th>y</th>\n",
|
|
" <th>fold</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1560500997770</td>\n",
|
|
" <td>[7702, 19396, 18304, 19396, 7702, 7702, 19396,...</td>\n",
|
|
" <td>[61027, 55262, 10121, 57287, 45809, 53865, 261...</td>\n",
|
|
" <td>[3.204325463643926, 3.1059258532748903, 2.9091...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.206</td>\n",
|
|
" <td>0.353603</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.088208</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>195.790623</td>\n",
|
|
" <td>93.465301</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1560500999681</td>\n",
|
|
" <td>[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...</td>\n",
|
|
" <td>[10121, 31140, 61027, 55262, 57287, 53865, 458...</td>\n",
|
|
" <td>[2.712327411798748, 2.712327411798748, 2.61392...</td>\n",
|
|
" <td>5d073b814a19c000086c558b</td>\n",
|
|
" <td>0.299386</td>\n",
|
|
" <td>F3</td>\n",
|
|
" <td>5c3c44b80379370013e0fd2b</td>\n",
|
|
" <td>0.220</td>\n",
|
|
" <td>0.299748</td>\n",
|
|
" <td>0.350737</td>\n",
|
|
" <td>1.040317</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>193.591333</td>\n",
|
|
" <td>92.973266</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n",
|
|
"1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n",
|
|
"1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [3.204325463643926, 3.1059258532748903, 2.9091... \n",
|
|
"1 [2.712327411798748, 2.712327411798748, 2.61392... \n",
|
|
"\n",
|
|
" path floorNo floor site \\\n",
|
|
"0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n",
|
|
"\n",
|
|
" wifi_len wifi_mean wifi_median wifi_std ts_waypoint x \\\n",
|
|
"0 0.206 0.353603 0.350737 1.088208 0 195.790623 \n",
|
|
"1 0.220 0.299748 0.350737 1.040317 0 193.591333 \n",
|
|
"\n",
|
|
" y fold \n",
|
|
"0 93.465301 6.0 \n",
|
|
"1 92.973266 6.0 "
|
|
]
|
|
},
|
|
"execution_count": 41,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train_all.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>site</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1180</td>\n",
|
|
" <td>[7007, 9522, 15215, 18669, 15215, 19396, 4851,...</td>\n",
|
|
" <td>[35106, 10783, 39335, 4531, 48757, 19211, 1176...</td>\n",
|
|
" <td>[1.9251305288464635, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.038</td>\n",
|
|
" <td>0.024464</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>1.033093</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>3048</td>\n",
|
|
" <td>[18669, 9522, 7007, 19396, 15215, 15215, 1264,...</td>\n",
|
|
" <td>[4531, 10783, 35106, 19211, 39335, 48757, 6030...</td>\n",
|
|
" <td>[2.1219297495845346, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.040</td>\n",
|
|
" <td>0.075218</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>0.991529</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1180 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n",
|
|
"1 3048 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n",
|
|
"1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [1.9251305288464635, 1.4331324770012857, 1.334... \n",
|
|
"1 [2.1219297495845346, 1.4331324770012857, 1.334... \n",
|
|
"\n",
|
|
" path floorNo wifi_len wifi_mean wifi_median \\\n",
|
|
"0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n",
|
|
"1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n",
|
|
"\n",
|
|
" wifi_std site \n",
|
|
"0 1.033093 5da1389e4db8ce0c98bd0547 \n",
|
|
"1 0.991529 5da1389e4db8ce0c98bd0547 "
|
|
]
|
|
},
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_wifi_pd.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(37678, 11)"
|
|
]
|
|
},
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_all = test_wifi_pd.copy()\n",
|
|
"test_all.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 45,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from tqdm import tqdm\n",
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"from dask.distributed import wait\n",
|
|
"\n",
|
|
"SENSORS = ['acce','acce_uncali','gyro',\n",
|
|
" 'gyro_uncali','magn','magn_uncali','ahrs']\n",
|
|
"\n",
|
|
"NFEAS = {\n",
|
|
" 'acce': 3,\n",
|
|
" 'acce_uncali': 3,\n",
|
|
" 'gyro': 3,\n",
|
|
" 'gyro_uncali': 3,\n",
|
|
" 'magn': 3,\n",
|
|
" 'magn_uncali': 3,\n",
|
|
" 'ahrs': 3,\n",
|
|
" 'wifi': 1,\n",
|
|
" 'ibeacon': 1,\n",
|
|
" 'waypoint': 3\n",
|
|
"}\n",
|
|
"\n",
|
|
"ACOLS = ['timestamp','x','y','z']\n",
|
|
" \n",
|
|
"FIELDS = {\n",
|
|
" 'acce': ACOLS,\n",
|
|
" 'acce_uncali': ACOLS,\n",
|
|
" 'gyro': ACOLS,\n",
|
|
" 'gyro_uncali': ACOLS,\n",
|
|
" 'magn': ACOLS,\n",
|
|
" 'magn_uncali': ACOLS,\n",
|
|
" 'ahrs': ACOLS,\n",
|
|
" 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n",
|
|
" 'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n",
|
|
" 'waypoint': ['timestamp','x','y']\n",
|
|
"}\n",
|
|
"\n",
|
|
"def to_frame(data, col):\n",
|
|
" cols = FIELDS[col]\n",
|
|
" is_dummy = False\n",
|
|
" if data.shape[0]>0:\n",
|
|
" df = pd.DataFrame(data, columns=cols)\n",
|
|
" else:\n",
|
|
" df = create_dummy_df(cols)\n",
|
|
" is_dummy = True\n",
|
|
" for col in df.columns:\n",
|
|
" if 'timestamp' in col:\n",
|
|
" df[col] = df[col].astype('int64')\n",
|
|
" return df, is_dummy\n",
|
|
"\n",
|
|
"def create_dummy_df(cols):\n",
|
|
" df = pd.DataFrame()\n",
|
|
" for col in cols:\n",
|
|
" df[col] = [0]\n",
|
|
" if col in ['ssid','bssid']:\n",
|
|
" df[col] = df[col].map(str)\n",
|
|
" return df\n",
|
|
"\n",
|
|
"from dataclasses import dataclass\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"\n",
|
|
"@dataclass\n",
|
|
"class ReadData:\n",
|
|
" acce: np.ndarray\n",
|
|
" acce_uncali: np.ndarray\n",
|
|
" gyro: np.ndarray\n",
|
|
" gyro_uncali: np.ndarray\n",
|
|
" magn: np.ndarray\n",
|
|
" magn_uncali: np.ndarray\n",
|
|
" ahrs: np.ndarray\n",
|
|
" wifi: np.ndarray\n",
|
|
" ibeacon: np.ndarray\n",
|
|
" waypoint: np.ndarray\n",
|
|
"\n",
|
|
"\n",
|
|
"def read_data_file(data_filename):\n",
|
|
" acce = []\n",
|
|
" acce_uncali = []\n",
|
|
" gyro = []\n",
|
|
" gyro_uncali = []\n",
|
|
" magn = []\n",
|
|
" magn_uncali = []\n",
|
|
" ahrs = []\n",
|
|
" wifi = []\n",
|
|
" ibeacon = []\n",
|
|
" waypoint = []\n",
|
|
"\n",
|
|
" with open(data_filename, 'r', encoding='utf-8') as file:\n",
|
|
" lines = file.readlines()\n",
|
|
"\n",
|
|
" for line_data in lines:\n",
|
|
" line_data = line_data.strip()\n",
|
|
" if not line_data or line_data[0] == '#':\n",
|
|
" continue\n",
|
|
"\n",
|
|
" line_data = line_data.split('\\t')\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_ACCELEROMETER':\n",
|
|
" acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
|
|
" acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_GYROSCOPE':\n",
|
|
" gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
|
|
" gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
|
|
" magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
|
|
" magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
|
|
" if len(line_data)>=5:\n",
|
|
" ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_WIFI':\n",
|
|
" sys_ts = line_data[0]\n",
|
|
" ssid = line_data[2]\n",
|
|
" bssid = line_data[3]\n",
|
|
" rssi = line_data[4]\n",
|
|
" lastseen_ts = line_data[6]\n",
|
|
" wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
|
|
" wifi.append(wifi_data)\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_BEACON':\n",
|
|
" ts = line_data[0]\n",
|
|
" uuid = line_data[2]\n",
|
|
" major = line_data[3]\n",
|
|
" minor = line_data[4]\n",
|
|
" rssi = line_data[6]\n",
|
|
" lastts = line_data[-1]\n",
|
|
" ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n",
|
|
" ibeacon.append(ibeacon_data)\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_WAYPOINT':\n",
|
|
" waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
|
|
"\n",
|
|
" acce = np.array(acce)\n",
|
|
" acce_uncali = np.array(acce_uncali)\n",
|
|
" gyro = np.array(gyro)\n",
|
|
" gyro_uncali = np.array(gyro_uncali)\n",
|
|
" magn = np.array(magn)\n",
|
|
" magn_uncali = np.array(magn_uncali)\n",
|
|
" ahrs = np.array(ahrs)\n",
|
|
" wifi = np.array(wifi)\n",
|
|
" ibeacon = np.array(ibeacon)\n",
|
|
" waypoint = np.array(waypoint)\n",
|
|
"\n",
|
|
" return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 46,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_test_dfs(PATH, test_files):\n",
|
|
" dtest = get_test_df(PATH)\n",
|
|
" buildings = set(dtest['building'].values.tolist())\n",
|
|
" dws = {}\n",
|
|
" ntest_files = []\n",
|
|
" for fname in tqdm(test_files):\n",
|
|
" path = fname.split('/')[-1].split('.')[0]\n",
|
|
" mask = dtest['path'] == path\n",
|
|
" dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n",
|
|
" ntest_files.append(fname)\n",
|
|
" return dws\n",
|
|
"\n",
|
|
"def get_test_df(PATH):\n",
|
|
" dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n",
|
|
" dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n",
|
|
" dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n",
|
|
" dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n",
|
|
" dtest['timestamp'] = dtest['timestamp'].astype('int64')\n",
|
|
" dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n",
|
|
" return dtest\n",
|
|
"\n",
|
|
"def get_time_gap(name):\n",
|
|
" data = read_data_file(name)\n",
|
|
" db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n",
|
|
"# print(db,no_ibeacon)\n",
|
|
" \n",
|
|
" if no_ibeacon==0:\n",
|
|
" gap = db['last_timestamp'] - db['timestamp']\n",
|
|
" assert gap.unique().shape[0]==1\n",
|
|
" return gap.values[0],no_ibeacon\n",
|
|
" \n",
|
|
" if no_ibeacon==1:\n",
|
|
" # Group wifis by timestamp\n",
|
|
" wifi_groups = pd.DataFrame(data.wifi).groupby(0) \n",
|
|
" # Find which one is the most recent of all time points.\n",
|
|
" est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n",
|
|
" return est_ts,no_ibeacon\n",
|
|
"\n",
|
|
" \n",
|
|
"\n",
|
|
"def fix_timestamp_test(df, gap):\n",
|
|
" df['real_timestamp'] = df['timestamp'] + gap\n",
|
|
" return df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 47,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n",
|
|
" '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n",
|
|
" '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n",
|
|
" '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']"
|
|
]
|
|
},
|
|
"execution_count": 47,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n",
|
|
"test_files_ori[:4]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 48,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<table style=\"border: 2px solid white;\">\n",
|
|
"<tr>\n",
|
|
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
|
|
"<h3 style=\"text-align: left;\">Client</h3>\n",
|
|
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
|
|
" <li><b>Scheduler: </b>tcp://127.0.0.1:36641</li>\n",
|
|
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n",
|
|
"</ul>\n",
|
|
"</td>\n",
|
|
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
|
|
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
|
|
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
|
|
" <li><b>Workers: </b>8</li>\n",
|
|
" <li><b>Cores: </b>8</li>\n",
|
|
" <li><b>Memory: </b>66.71 GB</li>\n",
|
|
"</ul>\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"</table>"
|
|
],
|
|
"text/plain": [
|
|
"<Client: 'tcp://127.0.0.1:36641' processes=8 threads=8, memory=66.71 GB>"
|
|
]
|
|
},
|
|
"execution_count": 48,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import dask\n",
|
|
"from dask.distributed import Client, wait, LocalCluster\n",
|
|
"\n",
|
|
"# set n_workers to number of cores\n",
|
|
"client = Client(n_workers=8, \n",
|
|
" threads_per_worker=1)\n",
|
|
"client"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 49,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 626/626 [00:00<00:00, 10654.38it/s]\n",
|
|
"100%|██████████| 626/626 [00:17<00:00, 34.85it/s]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: user 2.87 s, sys: 169 ms, total: 3.04 s\n",
|
|
"Wall time: 18 s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"futures = []\n",
|
|
"for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n",
|
|
" f = client.submit(get_time_gap,fname)\n",
|
|
" futures.append(f)\n",
|
|
" \n",
|
|
"testpath2gap = {}\n",
|
|
"for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n",
|
|
" testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 50,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_all['timestamp'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['timestamp'],test_all['path'])]\n",
|
|
"# test_all['ts_waypoint'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['ts_waypoint'],test_all['path'])]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 51,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# test_all['timestamp'] = (test_all['timestamp']-train_all_timestamp_min)/(train_all_timestamp_max-train_all_timestamp_min)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 52,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>site</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1573190312033</td>\n",
|
|
" <td>[7007, 9522, 15215, 18669, 15215, 19396, 4851,...</td>\n",
|
|
" <td>[35106, 10783, 39335, 4531, 48757, 19211, 1176...</td>\n",
|
|
" <td>[1.9251305288464635, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.038</td>\n",
|
|
" <td>0.024464</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>1.033093</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1573190313901</td>\n",
|
|
" <td>[18669, 9522, 7007, 19396, 15215, 15215, 1264,...</td>\n",
|
|
" <td>[4531, 10783, 35106, 19211, 39335, 48757, 6030...</td>\n",
|
|
" <td>[2.1219297495845346, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.040</td>\n",
|
|
" <td>0.075218</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>0.991529</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 1573190312033 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n",
|
|
"1 1573190313901 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n",
|
|
"1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [1.9251305288464635, 1.4331324770012857, 1.334... \n",
|
|
"1 [2.1219297495845346, 1.4331324770012857, 1.334... \n",
|
|
"\n",
|
|
" path floorNo wifi_len wifi_mean wifi_median \\\n",
|
|
"0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n",
|
|
"1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n",
|
|
"\n",
|
|
" wifi_std site \n",
|
|
"0 1.033093 5da1389e4db8ce0c98bd0547 \n",
|
|
"1 0.991529 5da1389e4db8ce0c98bd0547 "
|
|
]
|
|
},
|
|
"execution_count": 52,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_all.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 53,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"ss2 = StandardScaler()\n",
|
|
"ss2.fit(train_all.loc[:,['timestamp']])\n",
|
|
"train_all.loc[:,['timestamp']] = ss2.transform(train_all.loc[:,['timestamp']])\n",
|
|
"test_all.loc[:,['timestamp']] = ss2.transform(test_all.loc[:,['timestamp']])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 54,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# train_all_floor_min = train_all.floor.min()\n",
|
|
"# train_all_floor_max = train_all.floor.max()\n",
|
|
"# train_all['floor'] = (train_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)\n",
|
|
"# test_all['floor'] = (test_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 55,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sitelist = list(sorted(set(train_all.site)))\n",
|
|
"sitedict = dict(zip(sitelist,range(len(sitelist))))\n",
|
|
"train_all['site_id'] = train_all['site'].apply(lambda x: sitedict[x])\n",
|
|
"test_all['site_id'] = test_all['site'].apply(lambda x: sitedict[x])\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 56,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def MCRMSE(y_true, y_pred):\n",
|
|
" colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)\n",
|
|
" return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)\n",
|
|
"\n",
|
|
"def gru_layer(hidden_dim, dropout):\n",
|
|
" return L.Bidirectional(L.GRU(\n",
|
|
" hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal'))\n",
|
|
"\n",
|
|
"def pandas_list_to_array(df):\n",
|
|
" \"\"\"\n",
|
|
" Input: dataframe of shape (x, y), containing list of length l\n",
|
|
" Return: np.array of shape (x, l, y)\n",
|
|
" \"\"\"\n",
|
|
" \n",
|
|
" return np.transpose(\n",
|
|
" np.array(df.values.tolist()),\n",
|
|
" (0, 2, 1)\n",
|
|
" )\n",
|
|
"\n",
|
|
"def preprocess_inputs(df, cols=['ssid','bssid', 'rssi']):\n",
|
|
" return pandas_list_to_array(\n",
|
|
" df[cols]\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 57,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def build_model_time(embed_size, seq_len=100, pred_len=2, dropout=0.5, \n",
|
|
" sp_dropout=0.2, embed_dim=200, hidden_dim=256, n_layers=2):\n",
|
|
" inputs = L.Input(shape=(seq_len, 2))\n",
|
|
" input_time = L.Input(shape = (1,))\n",
|
|
" \n",
|
|
"\n",
|
|
" categorical_fea = inputs[:, :, :1]\n",
|
|
" numerical_fea = inputs[:, :, 1:]\n",
|
|
"\n",
|
|
" embed = L.Embedding(input_dim=embed_size, output_dim=embed_dim)(categorical_fea)\n",
|
|
" reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n",
|
|
" reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n",
|
|
" \n",
|
|
" \n",
|
|
" hidden = L.concatenate([reshaped, numerical_fea], axis=2)\n",
|
|
" \n",
|
|
" for x in range(n_layers):\n",
|
|
" hidden = gru_layer(hidden_dim, dropout)(hidden)\n",
|
|
" \n",
|
|
" # Since we are only making predictions on the first part of each sequence, \n",
|
|
" # we have to truncate it\n",
|
|
" truncated = hidden[:, :pred_len]\n",
|
|
" truncated = L.Flatten()(truncated)\n",
|
|
" truncated = L.concatenate([truncated, input_time], axis=1)\n",
|
|
"\n",
|
|
" out = L.Dense(2, activation='linear')(truncated)\n",
|
|
"\n",
|
|
" \n",
|
|
" model = tf.keras.Model(inputs=[inputs,input_time], outputs=out)\n",
|
|
" model.compile(tf.optimizers.Adam(), loss='mse')\n",
|
|
" \n",
|
|
" return model\n",
|
|
"\n",
|
|
"def get_embed_size(n_cat):\n",
|
|
" return min(600, round(1.6 * n_cat ** .56))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 58,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def build_model_mix(sid_size,bssid_size,site_size, seq_len=100, pred_len=2, dropout=0.2, \n",
|
|
" sp_dropout=0.1, embed_dim=64, hidden_dim=128, n_layers=3,lr=0.001):\n",
|
|
" inputs = L.Input(shape=(seq_len, 3))\n",
|
|
" input_time = L.Input(shape = (4,))\n",
|
|
" input_site = L.Input(shape = (1,))\n",
|
|
" \n",
|
|
" categorical_fea1 = inputs[:, :, :1]\n",
|
|
" categorical_fea2 = inputs[:, :, 1:2]\n",
|
|
" numerical_fea = inputs[:, :, 2:]\n",
|
|
" \n",
|
|
"\n",
|
|
" embed = L.Embedding(input_dim=sid_size, output_dim=embed_dim)(categorical_fea1)\n",
|
|
" reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n",
|
|
" reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n",
|
|
" \n",
|
|
" embed2 = L.Embedding(input_dim=bssid_size, output_dim=embed_dim)(categorical_fea2)\n",
|
|
" reshaped2 = tf.reshape(embed2, shape=(-1, embed2.shape[1], embed2.shape[2] * embed2.shape[3]))\n",
|
|
" reshaped2 = L.SpatialDropout1D(sp_dropout)(reshaped2)\n",
|
|
" \n",
|
|
" \n",
|
|
" hidden = L.concatenate([reshaped, reshaped2, numerical_fea], axis=2)\n",
|
|
" \n",
|
|
" for x in range(n_layers):\n",
|
|
" hidden = gru_layer(hidden_dim, dropout)(hidden)\n",
|
|
" \n",
|
|
" # Since we are only making predictions on the first part of each sequence, \n",
|
|
" # we have to truncate it\n",
|
|
" truncated = hidden[:, :pred_len]\n",
|
|
" truncated = L.Flatten()(truncated)\n",
|
|
" \n",
|
|
" embed_site = L.Embedding(input_dim=site_size, output_dim=1)(input_site)\n",
|
|
" embed_site = L.Flatten()(embed_site)\n",
|
|
" \n",
|
|
" truncated = L.concatenate([truncated, input_time,embed_site], axis=1)\n",
|
|
" \n",
|
|
" #out = L.Dense(32, activation='linear')(truncated)\n",
|
|
"\n",
|
|
" out = L.Dense(2, activation='linear')(truncated)\n",
|
|
" \n",
|
|
" model = tf.keras.Model(inputs=[inputs,input_time,input_site], outputs=out)\n",
|
|
" model.compile(tf.optimizers.Adam(lr), loss='mse')\n",
|
|
" \n",
|
|
" return model\n",
|
|
"\n",
|
|
"def get_embed_size(n_cat):\n",
|
|
" return min(600, round(1.6 * n_cat ** .56))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 59,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"# def build_model_time_floors_site(ssid_size,bssid_size,site_size,seq_len=100,dropout=0.5, \n",
|
|
"# sp_dropout=0.2, embed_dim=64, hidden_dim=256, n_layers=2):\n",
|
|
"# inputs = L.Input(shape=(seq_len, 2))\n",
|
|
"# input_time = L.Input(shape = (2,)) ##time and floor\n",
|
|
"# input_site = L.Input(shape = (1,)) \n",
|
|
"\n",
|
|
"# # ssid_fea = inputs[:, :, :1]\n",
|
|
"# bssid_fea = inputs[:,:,:1]\n",
|
|
"# rssi_fea = inputs[:,:,1:]\n",
|
|
"\n",
|
|
"# # embed_ssid = L.Embedding(input_dim=ssid_size, output_dim=32)(ssid_fea)\n",
|
|
"# embed_bssid = L.Embedding(input_dim=bssid_size, output_dim=64)(bssid_fea)\n",
|
|
"# embed_site = L.Embedding(input_dim=site_size, output_dim=3)(input_site)\n",
|
|
"\n",
|
|
"# # embed_ssid = L.Flatten()(embed_ssid)\n",
|
|
"# embed_bssid = L.Flatten()(embed_bssid)\n",
|
|
"# embed_site = L.Flatten()(embed_site)\n",
|
|
"# rssi_fea = L.Flatten()(rssi_fea)\n",
|
|
"\n",
|
|
"# #reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n",
|
|
"# #reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n",
|
|
" \n",
|
|
" \n",
|
|
"# hidden = L.concatenate([input_time,embed_bssid,rssi_fea], axis=1)\n",
|
|
"# hidden = L.Dropout(0.2)(hidden)\n",
|
|
"# print(hidden.shape)\n",
|
|
"# x = L.Reshape((1, -1))(hidden)\n",
|
|
" \n",
|
|
"# x = L.BatchNormalization()(x)\n",
|
|
"# x = L.LSTM(128, dropout=0.3, recurrent_dropout=0.3, return_sequences=True, activation='relu')(x)\n",
|
|
"# x = L.LSTM(16, dropout=0.1, return_sequences=False, activation='relu')(x)\n",
|
|
"\n",
|
|
"# out = L.Dense(2, activation='linear')(x)\n",
|
|
"\n",
|
|
" \n",
|
|
"# model = tf.keras.Model(inputs=[inputs,input_time,input_site], outputs=out)\n",
|
|
"# model.compile(tf.optimizers.Adam(), loss='mse')\n",
|
|
" \n",
|
|
"# return model\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 60,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# import pickle\n",
|
|
"# with open('train_all.pickle','wb') as fw:\n",
|
|
"# pickle.dump(train_all,fw)\n",
|
|
"# with open('test_all.pickle','wb') as fw:\n",
|
|
"# pickle.dump(test_all,fw)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 61,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"begin fold: 0\n",
|
|
"fold 0 7.73294929426513\n",
|
|
"150.92601263675743\n",
|
|
"elasped time: 84.61294651031494\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import time\n",
|
|
"t1 = time.time()\n",
|
|
"pred_cols = ['x','y']\n",
|
|
"train_inputs = preprocess_inputs(train_all,cols=['ssid', 'bssid', 'rssi'])\n",
|
|
"train_inputs_time = train_all[['timestamp','floorNo','wifi_len','wifi_mean']].values\n",
|
|
"train_inputs_site = train_all['site_id'].values\n",
|
|
"train_labels = train_all[pred_cols].values\n",
|
|
"test_inputs = preprocess_inputs(test_all,cols=['ssid','bssid', 'rssi'])\n",
|
|
"test_inputs_time = test_all[['timestamp','floorNo','wifi_len','wifi_mean']].values\n",
|
|
"test_inputs_site = test_all['site_id'].values\n",
|
|
"\n",
|
|
"\n",
|
|
" \n",
|
|
" \n",
|
|
"x_test = test_inputs\n",
|
|
"x_test_time = test_inputs_time\n",
|
|
"x_test_site = test_inputs_site\n",
|
|
"\n",
|
|
"oof_xy = np.zeros(train_labels.shape)\n",
|
|
"y_test_pred = 0\n",
|
|
"for fold_id in range(N_SPLITS):\n",
|
|
" trn_idx = train_all[train_all.fold!=fold_id].index.tolist()\n",
|
|
" val_idx = train_all[train_all.fold==fold_id].index.tolist()\n",
|
|
" print('begin fold:',fold_id)\n",
|
|
" x_train, x_val = train_inputs[trn_idx],train_inputs[val_idx]\n",
|
|
" x_train_time, x_val_time = train_inputs_time[trn_idx],train_inputs_time[val_idx]\n",
|
|
" x_train_site, x_val_site = train_inputs_site[trn_idx],train_inputs_site[val_idx]\n",
|
|
" y_train, y_val = train_labels[trn_idx],train_labels[val_idx]\n",
|
|
" \n",
|
|
" model = build_model_mix(len(ssiddict),len(bssiddict),len(sitedict),seqlen,lr=0.001)\n",
|
|
"# model.load_weights('rnn_model_v4/model_allsite_fold{}_times2.h5'.format(fold_id))\n",
|
|
" history = model.fit(\n",
|
|
" [x_train,x_train_time,x_train_site], y_train,\n",
|
|
" validation_data=([x_val,x_val_time,x_val_site], y_val),\n",
|
|
" batch_size=128,\n",
|
|
" epochs=100,\n",
|
|
" verbose=1,\n",
|
|
" callbacks=[\n",
|
|
" tf.keras.callbacks.ReduceLROnPlateau(patience=5),\n",
|
|
" tf.keras.callbacks.ModelCheckpoint('rnn_model_wifi/model_fold{}.h5'.format(fold_id)),\n",
|
|
" tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-4,\n",
|
|
" patience=5, mode='min', restore_best_weights=True)\n",
|
|
" ]\n",
|
|
" )\n",
|
|
"# model.load_weights('rnn_model_wifi/model_fold{}.h5')\n",
|
|
" y_val_pred = model.predict([x_val,x_val_time,x_val_site])\n",
|
|
" y_test_pred += model.predict([x_test,x_test_time,x_test_site])\n",
|
|
" oof_xy[val_idx] = y_val_pred\n",
|
|
" print('fold',fold_id, np.mean(np.sqrt(np.sum((y_val-y_val_pred)**2,axis=1))))\n",
|
|
" break\n",
|
|
"y_test_pred = y_test_pred/(fold_id + 1) \n",
|
|
"train_labels_inv = (pd.DataFrame(train_labels[:,:],columns = ['x','y']))\n",
|
|
"oof_xy_pred_inv = (pd.DataFrame(oof_xy[:,:],columns = ['x','y']))\n",
|
|
"y_test_pred_inv = (pd.DataFrame(y_test_pred[:,:],columns = ['x','y'])) \n",
|
|
"print(np.mean(np.sqrt(np.sum((train_labels_inv-oof_xy_pred_inv)**2,axis=1))))\n",
|
|
"\n",
|
|
"t2 = time.time()\n",
|
|
"print('elasped time:', t2 - t1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 62,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_all[['x','y']] = y_test_pred_inv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>ssid</th>\n",
|
|
" <th>bssid</th>\n",
|
|
" <th>rssi</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>floorNo</th>\n",
|
|
" <th>wifi_len</th>\n",
|
|
" <th>wifi_mean</th>\n",
|
|
" <th>wifi_median</th>\n",
|
|
" <th>wifi_std</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>site_id</th>\n",
|
|
" <th>x</th>\n",
|
|
" <th>y</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>0.345764</td>\n",
|
|
" <td>[7007, 9522, 15215, 18669, 15215, 19396, 4851,...</td>\n",
|
|
" <td>[35106, 10783, 39335, 4531, 48757, 19211, 1176...</td>\n",
|
|
" <td>[1.9251305288464635, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.038</td>\n",
|
|
" <td>0.024464</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>1.033093</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>19</td>\n",
|
|
" <td>49.430897</td>\n",
|
|
" <td>89.246811</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>0.345765</td>\n",
|
|
" <td>[18669, 9522, 7007, 19396, 15215, 15215, 1264,...</td>\n",
|
|
" <td>[4531, 10783, 35106, 19211, 39335, 48757, 6030...</td>\n",
|
|
" <td>[2.1219297495845346, 1.4331324770012857, 1.334...</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>0.845957</td>\n",
|
|
" <td>0.040</td>\n",
|
|
" <td>0.075218</td>\n",
|
|
" <td>-0.338061</td>\n",
|
|
" <td>0.991529</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>19</td>\n",
|
|
" <td>71.179886</td>\n",
|
|
" <td>87.176270</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp ssid \\\n",
|
|
"0 0.345764 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n",
|
|
"1 0.345765 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n",
|
|
"\n",
|
|
" bssid \\\n",
|
|
"0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n",
|
|
"1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n",
|
|
"\n",
|
|
" rssi \\\n",
|
|
"0 [1.9251305288464635, 1.4331324770012857, 1.334... \n",
|
|
"1 [2.1219297495845346, 1.4331324770012857, 1.334... \n",
|
|
"\n",
|
|
" path floorNo wifi_len wifi_mean wifi_median \\\n",
|
|
"0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n",
|
|
"1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n",
|
|
"\n",
|
|
" wifi_std site site_id x y \n",
|
|
"0 1.033093 5da1389e4db8ce0c98bd0547 19 49.430897 89.246811 \n",
|
|
"1 0.991529 5da1389e4db8ce0c98bd0547 19 71.179886 87.176270 "
|
|
]
|
|
},
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_all.head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 64,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>x</th>\n",
|
|
" <th>y</th>\n",
|
|
" <th>t1_wifi</th>\n",
|
|
" <th>path_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>0.345764</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>49.430897</td>\n",
|
|
" <td>89.246811</td>\n",
|
|
" <td>1180.0</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>0.345765</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.179886</td>\n",
|
|
" <td>87.176270</td>\n",
|
|
" <td>3048.0</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>0.345766</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.408737</td>\n",
|
|
" <td>86.979248</td>\n",
|
|
" <td>4924.0</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>0.345766</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.819069</td>\n",
|
|
" <td>83.849525</td>\n",
|
|
" <td>6816.0</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>0.345767</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.560272</td>\n",
|
|
" <td>86.284660</td>\n",
|
|
" <td>8693.0</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp path site x \\\n",
|
|
"0 0.345764 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 49.430897 \n",
|
|
"1 0.345765 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.179886 \n",
|
|
"2 0.345766 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.408737 \n",
|
|
"3 0.345766 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.819069 \n",
|
|
"4 0.345767 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.560272 \n",
|
|
"\n",
|
|
" y t1_wifi path_id \n",
|
|
"0 89.246811 1180.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"1 87.176270 3048.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"2 86.979248 4924.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"3 83.849525 6816.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"4 86.284660 8693.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 "
|
|
]
|
|
},
|
|
"execution_count": 64,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"result = test_all[['timestamp','path','site','x','y']]\n",
|
|
"result['t1_wifi'] = ss2.inverse_transform(result['timestamp'])\n",
|
|
"\n",
|
|
"result['t1_wifi'] = [xx-testpath2gap[yy][0] for (xx,yy) in zip(result['t1_wifi'],result['path'])]\n",
|
|
"result['path_id'] = result['site']+'_'+result['path']\n",
|
|
"result.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 65,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" <th>path</th>\n",
|
|
" <th>site</th>\n",
|
|
" <th>x</th>\n",
|
|
" <th>y</th>\n",
|
|
" <th>t1_wifi</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>path_id</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</th>\n",
|
|
" <td>0.345764</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>49.430897</td>\n",
|
|
" <td>89.246811</td>\n",
|
|
" <td>1180.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</th>\n",
|
|
" <td>0.345765</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.179886</td>\n",
|
|
" <td>87.176270</td>\n",
|
|
" <td>3048.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</th>\n",
|
|
" <td>0.345766</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.408737</td>\n",
|
|
" <td>86.979248</td>\n",
|
|
" <td>4924.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</th>\n",
|
|
" <td>0.345766</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.819069</td>\n",
|
|
" <td>83.849525</td>\n",
|
|
" <td>6816.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05</th>\n",
|
|
" <td>0.345767</td>\n",
|
|
" <td>00ff0c9a71cc37a2ebdd0f05</td>\n",
|
|
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
|
" <td>71.560272</td>\n",
|
|
" <td>86.284660</td>\n",
|
|
" <td>8693.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp \\\n",
|
|
"path_id \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345764 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345765 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345766 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345766 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345767 \n",
|
|
"\n",
|
|
" path \\\n",
|
|
"path_id \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n",
|
|
"\n",
|
|
" site \\\n",
|
|
"path_id \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n",
|
|
"\n",
|
|
" x y \\\n",
|
|
"path_id \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 49.430897 89.246811 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.179886 87.176270 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.408737 86.979248 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.819069 83.849525 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.560272 86.284660 \n",
|
|
"\n",
|
|
" t1_wifi \n",
|
|
"path_id \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 1180.0 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 3048.0 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 4924.0 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 6816.0 \n",
|
|
"5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 8693.0 "
|
|
]
|
|
},
|
|
"execution_count": 65,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# result['path_id'] = ['_'.join(xx.split('_')[:2]) for xx in result.site_path_timestamp]\n",
|
|
"# result['t1_wifi'] = [int(xx.split('_')[2]) for xx in result.site_path_timestamp]\n",
|
|
"# del result['site_path_timestamp']\n",
|
|
"result.set_index('path_id', inplace=True)\n",
|
|
"result.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 66,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from scipy.spatial.transform import Rotation as R\n",
|
|
"from PIL import Image\n",
|
|
"from mpl_toolkits.mplot3d import Axes3D\n",
|
|
"import plotly.graph_objs as go\n",
|
|
"from pathlib import Path\n",
|
|
"import scipy.signal as signal\n",
|
|
"import json\n",
|
|
"import seaborn as sns # visualization\n",
|
|
"from dataclasses import dataclass\n",
|
|
"\n",
|
|
"import matplotlib.pyplot as plt # visualization\n",
|
|
"import numpy as np # linear algebra\n",
|
|
"import random\n",
|
|
"import pandas as pd\n",
|
|
"from collections import Counter, defaultdict\n",
|
|
"\n",
|
|
"plt.rcParams.update({'font.size': 14})\n",
|
|
"\n",
|
|
"def split_ts_seq(ts_seq, sep_ts):\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" :param ts_seq:\n",
|
|
" :param sep_ts:\n",
|
|
" :return:\n",
|
|
" \"\"\"\n",
|
|
" tss = ts_seq[:, 0].astype(float)\n",
|
|
" unique_sep_ts = np.unique(sep_ts)\n",
|
|
" ts_seqs = []\n",
|
|
" start_index = 0\n",
|
|
" for i in range(0, unique_sep_ts.shape[0]):\n",
|
|
" end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')\n",
|
|
" if start_index == end_index:\n",
|
|
" continue\n",
|
|
" ts_seqs.append(ts_seq[start_index:end_index, :].copy())\n",
|
|
" start_index = end_index\n",
|
|
"\n",
|
|
" # tail data\n",
|
|
" if start_index < ts_seq.shape[0]:\n",
|
|
" ts_seqs.append(ts_seq[start_index:, :].copy())\n",
|
|
"\n",
|
|
" return ts_seqs\n",
|
|
"\n",
|
|
"\n",
|
|
"def correct_trajectory(original_xys, end_xy):\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" :param original_xys: numpy ndarray, shape(N, 2)\n",
|
|
" :param end_xy: numpy ndarray, shape(1, 2)\n",
|
|
" :return:\n",
|
|
" \"\"\"\n",
|
|
" corrected_xys = np.zeros((0, 2))\n",
|
|
"\n",
|
|
" A = original_xys[0, :]\n",
|
|
" B = end_xy\n",
|
|
" Bp = original_xys[-1, :]\n",
|
|
"\n",
|
|
" angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])\n",
|
|
" angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])\n",
|
|
" angle_BpAB = angle_BpAX - angle_BAX\n",
|
|
" AB = np.sqrt(np.sum((B - A) ** 2))\n",
|
|
" ABp = np.sqrt(np.sum((Bp - A) ** 2))\n",
|
|
"\n",
|
|
" corrected_xys = np.append(corrected_xys, [A], 0)\n",
|
|
" for i in np.arange(1, np.size(original_xys, 0)):\n",
|
|
" angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])\n",
|
|
"\n",
|
|
" angle_CAX = angle_CpAX - angle_BpAB\n",
|
|
"\n",
|
|
" ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))\n",
|
|
"\n",
|
|
" AC = ACp * AB / ABp\n",
|
|
"\n",
|
|
" delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])\n",
|
|
"\n",
|
|
" C = delta_C + A\n",
|
|
"\n",
|
|
" corrected_xys = np.append(corrected_xys, [C], 0)\n",
|
|
"\n",
|
|
" return corrected_xys\n",
|
|
"\n",
|
|
"\n",
|
|
"def correct_positions(rel_positions, reference_positions):\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" :param rel_positions:\n",
|
|
" :param reference_positions:\n",
|
|
" :return:\n",
|
|
" \"\"\"\n",
|
|
" rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])\n",
|
|
" if len(rel_positions_list) != reference_positions.shape[0] - 1:\n",
|
|
" # print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')\n",
|
|
" del rel_positions_list[-1]\n",
|
|
" assert len(rel_positions_list) == reference_positions.shape[0] - 1\n",
|
|
"\n",
|
|
" corrected_positions = np.zeros((0, 3))\n",
|
|
" for i, rel_ps in enumerate(rel_positions_list):\n",
|
|
" start_position = reference_positions[i]\n",
|
|
" end_position = reference_positions[i + 1]\n",
|
|
" abs_ps = np.zeros(rel_ps.shape)\n",
|
|
" abs_ps[:, 0] = rel_ps[:, 0]\n",
|
|
" # abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]\n",
|
|
" abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]\n",
|
|
" for j in range(1, rel_ps.shape[0]):\n",
|
|
" abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]\n",
|
|
" abs_ps = np.insert(abs_ps, 0, start_position, axis=0)\n",
|
|
" corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])\n",
|
|
" corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))\n",
|
|
" if i == 0:\n",
|
|
" corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)\n",
|
|
" else:\n",
|
|
" corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)\n",
|
|
"\n",
|
|
" corrected_positions = np.array(corrected_positions)\n",
|
|
"\n",
|
|
" return corrected_positions\n",
|
|
"\n",
|
|
"\n",
|
|
"def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):\n",
|
|
" order = 4\n",
|
|
" filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)\n",
|
|
" zf = signal.lfilter_zi(filter_b, filter_a)\n",
|
|
" _, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)\n",
|
|
" _, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)\n",
|
|
"\n",
|
|
" return filter_b, filter_a, filter_zf\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_rotation_matrix_from_vector(rotation_vector):\n",
|
|
" q1 = rotation_vector[0]\n",
|
|
" q2 = rotation_vector[1]\n",
|
|
" q3 = rotation_vector[2]\n",
|
|
"\n",
|
|
" if rotation_vector.size >= 4:\n",
|
|
" q0 = rotation_vector[3]\n",
|
|
" else:\n",
|
|
" q0 = 1 - q1*q1 - q2*q2 - q3*q3\n",
|
|
" if q0 > 0:\n",
|
|
" q0 = np.sqrt(q0)\n",
|
|
" else:\n",
|
|
" q0 = 0\n",
|
|
"\n",
|
|
" sq_q1 = 2 * q1 * q1\n",
|
|
" sq_q2 = 2 * q2 * q2\n",
|
|
" sq_q3 = 2 * q3 * q3\n",
|
|
" q1_q2 = 2 * q1 * q2\n",
|
|
" q3_q0 = 2 * q3 * q0\n",
|
|
" q1_q3 = 2 * q1 * q3\n",
|
|
" q2_q0 = 2 * q2 * q0\n",
|
|
" q2_q3 = 2 * q2 * q3\n",
|
|
" q1_q0 = 2 * q1 * q0\n",
|
|
"\n",
|
|
" R = np.zeros((9,))\n",
|
|
" if R.size == 9:\n",
|
|
" R[0] = 1 - sq_q2 - sq_q3\n",
|
|
" R[1] = q1_q2 - q3_q0\n",
|
|
" R[2] = q1_q3 + q2_q0\n",
|
|
"\n",
|
|
" R[3] = q1_q2 + q3_q0\n",
|
|
" R[4] = 1 - sq_q1 - sq_q3\n",
|
|
" R[5] = q2_q3 - q1_q0\n",
|
|
"\n",
|
|
" R[6] = q1_q3 - q2_q0\n",
|
|
" R[7] = q2_q3 + q1_q0\n",
|
|
" R[8] = 1 - sq_q1 - sq_q2\n",
|
|
"\n",
|
|
" R = np.reshape(R, (3, 3))\n",
|
|
" elif R.size == 16:\n",
|
|
" R[0] = 1 - sq_q2 - sq_q3\n",
|
|
" R[1] = q1_q2 - q3_q0\n",
|
|
" R[2] = q1_q3 + q2_q0\n",
|
|
" R[3] = 0.0\n",
|
|
"\n",
|
|
" R[4] = q1_q2 + q3_q0\n",
|
|
" R[5] = 1 - sq_q1 - sq_q3\n",
|
|
" R[6] = q2_q3 - q1_q0\n",
|
|
" R[7] = 0.0\n",
|
|
"\n",
|
|
" R[8] = q1_q3 - q2_q0\n",
|
|
" R[9] = q2_q3 + q1_q0\n",
|
|
" R[10] = 1 - sq_q1 - sq_q2\n",
|
|
" R[11] = 0.0\n",
|
|
"\n",
|
|
" R[12] = R[13] = R[14] = 0.0\n",
|
|
" R[15] = 1.0\n",
|
|
"\n",
|
|
" R = np.reshape(R, (4, 4))\n",
|
|
"\n",
|
|
" return R\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_orientation(R):\n",
|
|
" flat_R = R.flatten()\n",
|
|
" values = np.zeros((3,))\n",
|
|
" if np.size(flat_R) == 9:\n",
|
|
" values[0] = np.arctan2(flat_R[1], flat_R[4])\n",
|
|
" values[1] = np.arcsin(-flat_R[7])\n",
|
|
" values[2] = np.arctan2(-flat_R[6], flat_R[8])\n",
|
|
" else:\n",
|
|
" values[0] = np.arctan2(flat_R[1], flat_R[5])\n",
|
|
" values[1] = np.arcsin(-flat_R[9])\n",
|
|
" values[2] = np.arctan2(-flat_R[8], flat_R[10])\n",
|
|
"\n",
|
|
" return values\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_steps(acce_datas):\n",
|
|
" step_timestamps = np.array([])\n",
|
|
" step_indexs = np.array([], dtype=int)\n",
|
|
" step_acce_max_mins = np.zeros((0, 4))\n",
|
|
" sample_freq = 50\n",
|
|
" window_size = 22\n",
|
|
" low_acce_mag = 0.6\n",
|
|
" step_criterion = 1\n",
|
|
" interval_threshold = 250\n",
|
|
"\n",
|
|
" acce_max = np.zeros((2,))\n",
|
|
" acce_min = np.zeros((2,))\n",
|
|
" acce_binarys = np.zeros((window_size,), dtype=int)\n",
|
|
" acce_mag_pre = 0\n",
|
|
" state_flag = 0\n",
|
|
"\n",
|
|
" warmup_data = np.ones((window_size,)) * 9.81\n",
|
|
" filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)\n",
|
|
" acce_mag_window = np.zeros((window_size, 1))\n",
|
|
"\n",
|
|
" # detect steps according to acceleration magnitudes\n",
|
|
" for i in np.arange(0, np.size(acce_datas, 0)):\n",
|
|
" acce_data = acce_datas[i, :]\n",
|
|
" acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))\n",
|
|
"\n",
|
|
" acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)\n",
|
|
" acce_mag_filt = acce_mag_filt[0]\n",
|
|
"\n",
|
|
" acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])\n",
|
|
" acce_mag_window = np.delete(acce_mag_window, 0)\n",
|
|
" mean_gravity = np.mean(acce_mag_window)\n",
|
|
" acce_std = np.std(acce_mag_window)\n",
|
|
" mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])\n",
|
|
"\n",
|
|
" # detect valid peak or valley of acceleration magnitudes\n",
|
|
" acce_mag_filt_detrend = acce_mag_filt - mean_gravity\n",
|
|
" if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):\n",
|
|
" # peak\n",
|
|
" acce_binarys = np.append(acce_binarys, [1])\n",
|
|
" acce_binarys = np.delete(acce_binarys, 0)\n",
|
|
" elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):\n",
|
|
" # valley\n",
|
|
" acce_binarys = np.append(acce_binarys, [-1])\n",
|
|
" acce_binarys = np.delete(acce_binarys, 0)\n",
|
|
" else:\n",
|
|
" # between peak and valley\n",
|
|
" acce_binarys = np.append(acce_binarys, [0])\n",
|
|
" acce_binarys = np.delete(acce_binarys, 0)\n",
|
|
"\n",
|
|
" if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):\n",
|
|
" if state_flag == 0:\n",
|
|
" acce_max[:] = acce_data[0], acce_mag_filt\n",
|
|
" state_flag = 1\n",
|
|
" elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (\n",
|
|
" acce_mag_filt > acce_max[1]):\n",
|
|
" acce_max[:] = acce_data[0], acce_mag_filt\n",
|
|
" elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):\n",
|
|
" acce_max[:] = acce_data[0], acce_mag_filt\n",
|
|
" state_flag = 1\n",
|
|
"\n",
|
|
" # choose reasonable step criterion and check if there is a valid step\n",
|
|
" # save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]\n",
|
|
" step_flag = False\n",
|
|
" if step_criterion == 2:\n",
|
|
" if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):\n",
|
|
" step_flag = True\n",
|
|
" elif step_criterion == 3:\n",
|
|
" if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):\n",
|
|
" step_flag = True\n",
|
|
" else:\n",
|
|
" if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:\n",
|
|
" if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):\n",
|
|
" acce_min[:] = acce_data[0], acce_mag_filt\n",
|
|
" state_flag = 2\n",
|
|
" step_flag = True\n",
|
|
" elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (\n",
|
|
" acce_mag_filt < acce_min[1]):\n",
|
|
" acce_min[:] = acce_data[0], acce_mag_filt\n",
|
|
" if step_flag:\n",
|
|
" step_timestamps = np.append(step_timestamps, acce_data[0])\n",
|
|
" step_indexs = np.append(step_indexs, [i])\n",
|
|
" step_acce_max_mins = np.append(step_acce_max_mins,\n",
|
|
" [[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)\n",
|
|
" acce_mag_pre = acce_mag_filt_detrend\n",
|
|
"\n",
|
|
" return step_timestamps, step_indexs, step_acce_max_mins\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_stride_length(step_acce_max_mins):\n",
|
|
" K = 0.4\n",
|
|
" K_max = 0.8\n",
|
|
" K_min = 0.4\n",
|
|
" para_a0 = 0.21468084\n",
|
|
" para_a1 = 0.09154517\n",
|
|
" para_a2 = 0.02301998\n",
|
|
"\n",
|
|
" stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))\n",
|
|
" k_real = np.zeros((step_acce_max_mins.shape[0], 2))\n",
|
|
" step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))\n",
|
|
" stride_lengths[:, 0] = step_acce_max_mins[:, 0]\n",
|
|
" window_size = 2\n",
|
|
" step_timeperiod_temp = np.zeros((0, ))\n",
|
|
"\n",
|
|
" # calculate every step period - step_timeperiod unit: second\n",
|
|
" for i in range(0, step_timeperiod.shape[0]):\n",
|
|
" step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000\n",
|
|
" step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])\n",
|
|
" if step_timeperiod_temp.shape[0] > window_size:\n",
|
|
" step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])\n",
|
|
" step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]\n",
|
|
"\n",
|
|
" # calculate parameters by step period and acceleration magnitude variance\n",
|
|
" k_real[:, 0] = step_acce_max_mins[:, 0]\n",
|
|
" k_real[0, 1] = K\n",
|
|
" for i in range(0, step_timeperiod.shape[0]):\n",
|
|
" k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])\n",
|
|
" k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)\n",
|
|
"\n",
|
|
" # calculate every stride length by parameters and max and min data of acceleration magnitude\n",
|
|
" stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),\n",
|
|
" np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]\n",
|
|
"\n",
|
|
" return stride_lengths\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_headings(ahrs_datas):\n",
|
|
" headings = np.zeros((np.size(ahrs_datas, 0), 2))\n",
|
|
" for i in np.arange(0, np.size(ahrs_datas, 0)):\n",
|
|
" ahrs_data = ahrs_datas[i, :]\n",
|
|
" rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])\n",
|
|
" azimuth, pitch, roll = get_orientation(rot_mat)\n",
|
|
" around_z = (-azimuth) % (2 * np.pi)\n",
|
|
" headings[i, :] = ahrs_data[0], around_z\n",
|
|
" return headings\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_step_heading(step_timestamps, headings):\n",
|
|
" step_headings = np.zeros((len(step_timestamps), 2))\n",
|
|
" step_timestamps_index = 0\n",
|
|
" for i in range(0, len(headings)):\n",
|
|
" if step_timestamps_index < len(step_timestamps):\n",
|
|
" if headings[i, 0] == step_timestamps[step_timestamps_index]:\n",
|
|
" step_headings[step_timestamps_index, :] = headings[i, :]\n",
|
|
" step_timestamps_index += 1\n",
|
|
" else:\n",
|
|
" break\n",
|
|
" assert step_timestamps_index == len(step_timestamps)\n",
|
|
"\n",
|
|
" return step_headings\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_rel_positions(stride_lengths, step_headings):\n",
|
|
" rel_positions = np.zeros((stride_lengths.shape[0], 3))\n",
|
|
" for i in range(0, stride_lengths.shape[0]):\n",
|
|
" rel_positions[i, 0] = stride_lengths[i, 0]\n",
|
|
" rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])\n",
|
|
" rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])\n",
|
|
"\n",
|
|
" return rel_positions\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_step_positions(acce_datas, ahrs_datas, posi_datas):\n",
|
|
" step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)\n",
|
|
" headings = compute_headings(ahrs_datas)\n",
|
|
" stride_lengths = compute_stride_length(step_acce_max_mins)\n",
|
|
" step_headings = compute_step_heading(step_timestamps, headings)\n",
|
|
" rel_positions = compute_rel_positions(stride_lengths, step_headings)\n",
|
|
" step_positions = correct_positions(rel_positions, posi_datas)\n",
|
|
"\n",
|
|
" return step_positions\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 67,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sample_submission = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 68,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th>timestamp</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>building</th>\n",
|
|
" <th>path_id</th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th rowspan=\"5\" valign=\"top\">5a0546857ecc773753327266</th>\n",
|
|
" <th>046cfa46be49fc10834815c6</th>\n",
|
|
" <td>[0000000000009, 0000000009017, 0000000015326, ...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>05d052dde78384b0c543d89c</th>\n",
|
|
" <td>[0000000000012, 0000000005748, 0000000014654, ...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0c06cc9f21d172618d74c6c8</th>\n",
|
|
" <td>[0000000000011, 0000000011818, 0000000019825, ...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>146035943a1482883ed98570</th>\n",
|
|
" <td>[0000000000011, 0000000004535, 0000000011498, ...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1ef2771dfea25d508142ba06</th>\n",
|
|
" <td>[0000000000009, 0000000012833, 0000000021759, ...</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" timestamp\n",
|
|
"building path_id \n",
|
|
"5a0546857ecc773753327266 046cfa46be49fc10834815c6 [0000000000009, 0000000009017, 0000000015326, ...\n",
|
|
" 05d052dde78384b0c543d89c [0000000000012, 0000000005748, 0000000014654, ...\n",
|
|
" 0c06cc9f21d172618d74c6c8 [0000000000011, 0000000011818, 0000000019825, ...\n",
|
|
" 146035943a1482883ed98570 [0000000000011, 0000000004535, 0000000011498, ...\n",
|
|
" 1ef2771dfea25d508142ba06 [0000000000009, 0000000012833, 0000000021759, ..."
|
|
]
|
|
},
|
|
"execution_count": 68,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"sample_submission['building'] = [x.split('_')[0] for x in sample_submission['site_path_timestamp']]\n",
|
|
"sample_submission['path_id'] = [x.split('_')[1] for x in sample_submission['site_path_timestamp']]\n",
|
|
"sample_submission['timestamp'] = [x.split('_')[2] for x in sample_submission['site_path_timestamp']]\n",
|
|
"samples = pd.DataFrame(sample_submission.groupby(['building','path_id'])['timestamp'].apply(lambda x: list(x)))\n",
|
|
"buildings = np.unique([x[0] for x in samples.index])\n",
|
|
"samples.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 70,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"5a0546857ecc773753327266\n",
|
|
"5c3c44b80379370013e0fd2b\n",
|
|
"5d27075f03f801723c2e360f\n",
|
|
"5d27096c03f801723c31e5e0\n",
|
|
"5d27097f03f801723c320d97\n",
|
|
"5d27099f03f801723c32511d\n",
|
|
"5d2709a003f801723c3251bf\n",
|
|
"5d2709b303f801723c327472\n",
|
|
"5d2709bb03f801723c32852c\n",
|
|
"5d2709c303f801723c3299ee\n",
|
|
"5d2709d403f801723c32bd39\n",
|
|
"5d2709e003f801723c32d896\n",
|
|
"5da138274db8ce0c98bbd3d2\n",
|
|
"5da1382d4db8ce0c98bbe92e\n",
|
|
"5da138314db8ce0c98bbf3a0\n",
|
|
"5da138364db8ce0c98bc00f1\n",
|
|
"5da1383b4db8ce0c98bc11ab\n",
|
|
"5da138754db8ce0c98bca82f\n",
|
|
"5da138764db8ce0c98bcaa46\n",
|
|
"5da1389e4db8ce0c98bd0547\n",
|
|
"5da138b74db8ce0c98bd4774\n",
|
|
"5da958dd46f8266d0737457b\n",
|
|
"5dbc1d84c1eb61796cf7c010\n",
|
|
"5dc8cea7659e181adb076a3f\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from scipy.interpolate import interp1d\n",
|
|
"from scipy.ndimage.filters import uniform_filter1d\n",
|
|
"\n",
|
|
"colacce = ['xyz_time','x_acce','y_acce','z_acce']\n",
|
|
"colahrs = ['xyz_time','x_ahrs','y_ahrs','z_ahrs']\n",
|
|
"\n",
|
|
"for building in buildings:\n",
|
|
" print(building)\n",
|
|
" paths = samples.loc[building].index\n",
|
|
" # Acceleration info:\n",
|
|
" tfm = pd.read_csv(f'indoor_testing_accel/{building}.txt',index_col=0)\n",
|
|
" for path_id in paths:\n",
|
|
" # Original predicted values:\n",
|
|
" xy = result.loc[building+'_'+path_id]\n",
|
|
" tfmi = tfm.loc[path_id]\n",
|
|
" acce_datas = np.array(tfmi[colacce],dtype=np.float)\n",
|
|
" ahrs_datas = np.array(tfmi[colahrs],dtype=np.float)\n",
|
|
" posi_datas = np.array(xy[['t1_wifi','x','y']],dtype=np.float)\n",
|
|
" # Outlier removal:\n",
|
|
" xyout = uniform_filter1d(posi_datas,size=3,axis=0,mode='reflect')\n",
|
|
" xydiff = np.abs(posi_datas-xyout)\n",
|
|
" xystd = np.std(xydiff,axis=0)*3\n",
|
|
" posi_datas = posi_datas[(xydiff[:,1]<xystd[1])&(xydiff[:,2]<xystd[2])]\n",
|
|
" # Step detection:\n",
|
|
" step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)\n",
|
|
" stride_lengths = compute_stride_length(step_acce_max_mins)\n",
|
|
" # Orientation detection:\n",
|
|
" headings = compute_headings(ahrs_datas)\n",
|
|
" step_headings = compute_step_heading(step_timestamps, headings)\n",
|
|
" rel_positions = compute_rel_positions(stride_lengths, step_headings)\n",
|
|
" # Running average:\n",
|
|
" posi_datas = uniform_filter1d(posi_datas,size=3,axis=0,mode='reflect')[0::3,:]\n",
|
|
" # The 1st prediction timepoint should be earlier than the 1st step timepoint.\n",
|
|
" rel_positions = rel_positions[rel_positions[:,0]>posi_datas[0,0],:]\n",
|
|
" # If two consecutive predictions are in-between two step datapoints,\n",
|
|
" # the last one is removed, causing error (in the \"split_ts_seq\" function).\n",
|
|
" posi_index = [np.searchsorted(rel_positions[:,0], x, side='right') for x in posi_datas[:,0]]\n",
|
|
" u, i1, i2 = np.unique(posi_index, return_index=True, return_inverse=True)\n",
|
|
" posi_datas = np.vstack([np.mean(posi_datas[i2==i],axis=0) for i in np.unique(i2)])\n",
|
|
" # Position correction:\n",
|
|
" step_positions = correct_positions(rel_positions, posi_datas)\n",
|
|
" # Interpolate for timestamps in the testing set:\n",
|
|
"\n",
|
|
" t = step_positions[:,0]\n",
|
|
" x = step_positions[:,1]\n",
|
|
" y = step_positions[:,2]\n",
|
|
" fx = interp1d(t, x, kind='linear', fill_value=(x[0],x[-1]), bounds_error=False) #fill_value=\"extrapolate\"\n",
|
|
" fy = interp1d(t, y, kind='linear', fill_value=(y[0],y[-1]), bounds_error=False)\n",
|
|
" # Output result:\n",
|
|
" t0 = np.array(samples.loc[(building,path_id),'timestamp'],dtype=np.float64)\n",
|
|
" sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'x'] = fx(t0)\n",
|
|
" sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'y'] = fy(t0)\n",
|
|
" \n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 71,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"subold = pd.read_csv('submission_floor.csv')\n",
|
|
"sample_submission['floor']=subold['floor']\n",
|
|
"sample_submission[['site_path_timestamp','floor','x','y']].to_csv('submission_wifi.csv',index=False)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|