{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as stats\n", "from pathlib import Path\n", "import glob\n", "import pickle\n", "\n", "import random\n", "import os\n", "\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.preprocessing import StandardScaler, LabelEncoder\n", "from tqdm import tqdm\n", "import tensorflow as tf\n", "import tensorflow.keras.layers as L\n", "import tensorflow.keras.models as M\n", "import tensorflow.keras.backend as K\n", "import tensorflow_addons as tfa\n", "from tensorflow_addons.layers import WeightNormalization\n", "from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping\n", "pd.options.mode.chained_assignment = None\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n" ] } ], "source": [ "from tensorflow.python.client import device_lib\n", "def get_available_gpus():\n", " local_device_protos = device_lib.list_local_devices()\n", " return [x.name for x in local_device_protos if x.device_type == 'GPU']\n", "print(get_available_gpus())\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
site_path_timestampsitepathts_waypoint
05a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c69
15a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c69017
25a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c615326
35a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c618763
45a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c622328
\n", "
" ], "text/plain": [ " site_path_timestamp \\\n", "0 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "1 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "2 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "3 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "4 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "\n", " site path ts_waypoint \n", "0 5a0546857ecc773753327266 046cfa46be49fc10834815c6 9 \n", "1 5a0546857ecc773753327266 046cfa46be49fc10834815c6 9017 \n", "2 5a0546857ecc773753327266 046cfa46be49fc10834815c6 15326 \n", "3 5a0546857ecc773753327266 046cfa46be49fc10834815c6 18763 \n", "4 5a0546857ecc773753327266 046cfa46be49fc10834815c6 22328 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "# PATH = '../input/indoor-location-navigation'\n", "# test_files = glob.glob(f'{PATH}/test/*.txt')\n", "# test_files_pd = [xx.split('/')[-1:][0].replace('.txt','') for xx in test_files]\n", "# test_files_pd = pd.DataFrame(test_files_pd)\n", "# test_files_pd.columns = ['path']\n", "\n", "sample_submission = pd.read_csv(\"../input/indoor-location-navigation/sample_submission.csv\")\n", "sample_submission['site'] = [xx.split('_')[0] for xx in sample_submission.site_path_timestamp]\n", "sample_submission['path'] = [xx.split('_')[1] for xx in sample_submission.site_path_timestamp]\n", "sample_submission['ts_waypoint'] = [int(xx.split('_')[2]) for xx in sample_submission.site_path_timestamp]\n", "del sample_submission['floor']\n", "del sample_submission['x']\n", "del sample_submission['y']\n", "\n", "path2site = dict(zip(sample_submission.path,sample_submission.site))\n", "sample_submission.head()\n", "# test_path_site = sample_submission[['site','path','timestamp','site_path_timestamp']]\n", "# test_files_pd = pd.merge(test_files_pd,test_path_site,how='left',on='path')\n", "# test_files_pd.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "test_wifi_files = glob.glob(f'../input/wifi_lbl_encode/test/*.txt')\n", "\n", "# train_files = glob.glob('../input/indoor-navigation-and-location-wifi-features-alldata/*train.csv') #if A \n", "train_files = glob.glob('../input/data_abstract/*_train_waypoint_all.csv')#if B\n", "\n", " \n", "train_wifi_files = glob.glob(f'../input/wifi_lbl_encode/train/*/*/*.txt')\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../input/data_abstract/5a0546857ecc773753327266_train_waypoint_all.csv',\n", " '../input/data_abstract/5c3c44b80379370013e0fd2b_train_waypoint_all.csv']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_files[:2]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "len train site list: 24\n" ] } ], "source": [ "# train_site_list = [xx.split('/')[-1].replace('_train.csv','') for xx in train_files] #if A \n", "# train_site_list = [xx.split('/')[-1].replace('_train_waypoint_all.csv','') for xx in train_files] #if B 204\n", "train_site_list = list(sample_submission.site.unique()) # if B 24\n", "train_wifi_files = [xx for xx in train_wifi_files if xx.split('/')[-3] in train_site_list]\n", "print('len train site list:',len(train_site_list))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10877" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_wifi_files)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 11503/11503 [01:02<00:00, 184.06it/s]\n" ] } ], "source": [ "ssidlist = set()\n", "bssidlist = set()\n", "for filename in tqdm(train_wifi_files+test_wifi_files):\n", " tmp = pd.read_csv(filename)\n", " ssidlist = ssidlist|set(tmp.ssid)\n", " bssidlist = bssidlist|set(tmp.bssid)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20044, 65952)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(set(ssidlist)),len(set(bssidlist))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "seqlen = 100" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "ssiddict = dict(zip(list(ssidlist)+['empty'],range(len(ssidlist)+1)))\n", "bssiddict = dict(zip(list(bssidlist)+['empty'],range(len(bssidlist)+1)))\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10877/10877 [00:42<00:00, 254.02it/s]\n" ] } ], "source": [ "train_wifi_pd_csv = []\n", "for filename in tqdm(train_wifi_files):\n", " tmp = pd.read_csv(filename)\n", " tmp['path'] = filename.split('/')[-1].replace('.txt','')\n", " tmp['floor'] = filename.split('/')[-2]\n", " tmp['site'] = filename.split('/')[-3]\n", " train_wifi_pd_csv.append(tmp)\n", "train_wifi_pd_csv = pd.concat(train_wifi_pd_csv).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "floor_map = {\"B3\":-3,\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n", " \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}\n", "train_wifi_pd_csv = train_wifi_pd_csv[train_wifi_pd_csv.floor.isin(floor_map)].reset_index(drop=True)\n", "train_wifi_pd_csv['floorNo'] = train_wifi_pd_csv['floor'].apply(lambda x: floor_map[x])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamppathfloorsitefloorNo
0157846261882663159162932-4615784626032775e15730aa280850006f3d005B15a0546857ecc773753327266-1
115784626188263283565513-4915784626182725e15730aa280850006f3d005B15a0546857ecc773753327266-1
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp \\\n", "0 1578462618826 63159 162932 -46 1578462603277 \n", "1 1578462618826 32835 65513 -49 1578462618272 \n", "\n", " path floor site floorNo \n", "0 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1 \n", "1 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_wifi_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:02<00:00, 208.96it/s]\n" ] } ], "source": [ "test_wifi_pd_csv = []\n", "for filename in tqdm(test_wifi_files):\n", " tmp = pd.read_csv(filename)\n", " tmp['path'] = filename.split('/')[-1].replace('.txt','')\n", " test_wifi_pd_csv.append(tmp)\n", "test_wifi_pd_csv = pd.concat(test_wifi_pd_csv).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamppath
019617053728318-34157182856015614f45baa63b4d3a700126af6
119614383893116-35157182856015914f45baa63b4d3a700126af6
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp path\n", "0 1961 70537 28318 -34 1571828560156 14f45baa63b4d3a700126af6\n", "1 1961 43838 93116 -35 1571828560159 14f45baa63b4d3a700126af6" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "submission = pd.read_csv('submission_floor.csv')\n", "submission['path'] = [xx.split('_')[1] for xx in submission['site_path_timestamp']]\n", "test_path_floor_dict = dict(zip(submission.path,submission.floor))\n", "test_wifi_pd_csv['floorNo'] = [test_path_floor_dict[xx] for xx in test_wifi_pd_csv['path']]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "\n", "ss = StandardScaler()\n", "ss.fit(train_wifi_pd_csv.loc[:,['rssi','floorNo']])\n", "train_wifi_pd_csv.loc[:,['rssi','floorNo']] = ss.transform(train_wifi_pd_csv.loc[:,['rssi','floorNo']])\n", "test_wifi_pd_csv.loc[:,['rssi','floorNo']] = ss.transform(test_wifi_pd_csv.loc[:,['rssi','floorNo']])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamppathfloorsitefloorNo
01578462618826631591629323.10592615784626032775e15730aa280850006f3d005B15a0546857ecc773753327266-1.340327
1157846261882632835655132.81072715784626182725e15730aa280850006f3d005B15a0546857ecc773753327266-1.340327
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp \\\n", "0 1578462618826 63159 162932 3.105926 1578462603277 \n", "1 1578462618826 32835 65513 2.810727 1578462618272 \n", "\n", " path floor site floorNo \n", "0 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1.340327 \n", "1 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1.340327 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_wifi_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10877/10877 [02:51<00:00, 63.43it/s] \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_meanwifi_medianwifi_std
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.3536030.3507371.088208
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.2997480.3507371.040317
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean wifi_median wifi_std \n", "0 0.206 0.353603 0.350737 1.088208 \n", "1 0.220 0.299748 0.350737 1.040317 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_wifi_pd = []\n", "for path,tmp in tqdm(train_wifi_pd_csv.groupby('path')):\n", " #tmp = pd.read_csv(filename)\n", " #tmp['rssi'] = tmp['rssi']/999\n", " tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])\n", " tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])\n", " ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) \n", " ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))\n", " ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))\n", " \n", " ss = pd.concat([ss1,ss2,ss3],axis=1)\n", " ss['path'] = tmp.path.unique()[0]\n", " ss['floorNo'] = tmp.floorNo.unique()[0]\n", " ss['floor'] = tmp.floor.unique()[0]\n", " ss['site'] = tmp.site.unique()[0]\n", " ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500\n", " ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()\n", " ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()\n", " ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()\n", "\n", " train_wifi_pd.append(ss)\n", "train_wifi_pd = pd.concat(train_wifi_pd)\n", "train_wifi_pd = train_wifi_pd.reset_index()\n", "train_wifi_pd.head(2)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:14<00:00, 41.79it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_stdsite
01180[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.0330935da1389e4db8ce0c98bd0547
13048[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.9915295da1389e4db8ce0c98bd0547
24924[9522, 18669, 7007, 19396, 15215, 4851, 15215,...[10783, 4531, 35106, 19211, 48757, 11767, 3933...[1.4331324770012857, 1.2363332562632146, 1.039...00ff0c9a71cc37a2ebdd0f050.8459570.048-0.149461-0.4364600.8155215da1389e4db8ce0c98bd0547
36816[18669, 4851, 15215, 7007, 9522, 19396, 19396,...[4531, 11767, 39335, 35106, 10783, 19211, 5710...[1.826730918477428, 1.1379336458941791, 1.0395...00ff0c9a71cc37a2ebdd0f050.8459570.052-0.118554-0.5348600.9118025da1389e4db8ce0c98bd0547
48693[18669, 15215, 7007, 4851, 9522, 19396, 15215,...[4531, 48757, 35106, 11767, 10783, 19211, 3933...[2.1219297495845346, 1.3347328666322502, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.062-0.182526-0.5348600.9053395da1389e4db8ce0c98bd0547
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1180 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 3048 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "2 4924 [9522, 18669, 7007, 19396, 15215, 4851, 15215,... \n", "3 6816 [18669, 4851, 15215, 7007, 9522, 19396, 19396,... \n", "4 8693 [18669, 15215, 7007, 4851, 9522, 19396, 15215,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "2 [10783, 4531, 35106, 19211, 48757, 11767, 3933... \n", "3 [4531, 11767, 39335, 35106, 10783, 19211, 5710... \n", "4 [4531, 48757, 35106, 11767, 10783, 19211, 3933... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "2 [1.4331324770012857, 1.2363332562632146, 1.039... \n", "3 [1.826730918477428, 1.1379336458941791, 1.0395... \n", "4 [2.1219297495845346, 1.3347328666322502, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "2 00ff0c9a71cc37a2ebdd0f05 0.845957 0.048 -0.149461 -0.436460 \n", "3 00ff0c9a71cc37a2ebdd0f05 0.845957 0.052 -0.118554 -0.534860 \n", "4 00ff0c9a71cc37a2ebdd0f05 0.845957 0.062 -0.182526 -0.534860 \n", "\n", " wifi_std site \n", "0 1.033093 5da1389e4db8ce0c98bd0547 \n", "1 0.991529 5da1389e4db8ce0c98bd0547 \n", "2 0.815521 5da1389e4db8ce0c98bd0547 \n", "3 0.911802 5da1389e4db8ce0c98bd0547 \n", "4 0.905339 5da1389e4db8ce0c98bd0547 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd = []\n", "# for filename in tqdm(test_wifi_files):\n", "for path,tmp in tqdm(test_wifi_pd_csv.groupby('path')):\n", " #tmp = pd.read_csv(filename)\n", " #tmp['rssi'] = tmp['rssi']/999\n", " tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])\n", " tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])\n", " ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) \n", " ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))\n", " ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))\n", " ss = pd.concat([ss1,ss2,ss3],axis=1)\n", " #ss['path'] = filename.split('/')[-1].replace('.txt','')\n", " ss['path'] = tmp.path.unique()[0]\n", " ss['floorNo'] = tmp.floorNo.unique()[0]\n", " ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500\n", " ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()\n", " ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()\n", " ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()\n", "\n", " test_wifi_pd.append(ss)\n", "test_wifi_pd = pd.concat(test_wifi_pd)\n", "test_wifi_pd = test_wifi_pd.reset_index()\n", "test_wifi_pd['site'] = [path2site[xx] for xx in test_wifi_pd.path]\n", "test_wifi_pd.head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 204/204 [00:00<00:00, 263.20it/s]\n" ] } ], "source": [ "# filename = train_files[0]\n", "train_xy = []\n", "for filename in tqdm(train_files):\n", " tmp = pd.read_csv(filename,index_col=0)\n", " ss = tmp[['path','site','floor','ts_waypoint','x','y']]\n", " train_xy.append(ss)\n", "train_xy = pd.concat(train_xy).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(166681, 6)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_xy=train_xy.drop_duplicates()\n", "train_xy.shape" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# path = '5e15730aa280850006f3d005'\n", "# train_wifi_pd_x = train_wifi_pd[train_wifi_pd.path==path]\n", "# train_y = train_xy[train_xy.path==path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n", "# if len(train_y)==0:\n", "# print(path,'have no waypoint')\n", "# if len(train_y)>0:\n", "# ts_point_min = train_y.ts_waypoint.min()\n", "# ts_point_max = train_y.ts_waypoint.max()\n", "# tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n", "# tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]\n", "# if len(tmp2)>0:\n", "# T_rel = train_y['ts_waypoint']\n", "# T_ref = tmp2['timestamp']\n", "# xy_hat = scipy.interpolate.interp1d(T_rel, train_y[['x','y']], axis=0)(T_ref)\n", "# tmp2['x'] = xy_hat[:,0]\n", "# tmp2['y'] = xy_hat[:,1]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10877/10877 [03:16<00:00, 55.30it/s]\n" ] } ], "source": [ "import scipy.stats as stats\n", "import scipy\n", "train_all = []\n", "\n", "for path,train_wifi_pd_x in tqdm(train_wifi_pd.groupby('path')):\n", " # path = '5e15730aa280850006f3d005'\n", " train_y = train_xy[train_xy.path==path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n", " train_wifi_pd_x['ts_waypoint'] = 0\n", " if len(train_y)==0:\n", " print(path,'have no waypoint')\n", " if len(train_y)>0:\n", " ts_point_min = train_y.ts_waypoint.min()\n", " ts_point_max = train_y.ts_waypoint.max()\n", " tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n", " tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]\n", " if len(tmp2)>0:\n", " T_rel = train_y['ts_waypoint']\n", " T_ref = tmp2['timestamp']\n", " xy_hat = scipy.interpolate.interp1d(T_rel, train_y[['x','y']], axis=0)(T_ref)\n", " tmp2['x'] = xy_hat[:,0]\n", " tmp2['y'] = xy_hat[:,1]\n", " tmp2['path'] = path\n", " train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp2,how='left',on=['path','timestamp'])\n", " train_all.append(train_wifi_pd_x)\n", " \n", "train_all = pd.concat(train_all).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(258097, 15)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.shape" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# ###use nearest location\n", "# train_all = []\n", "\n", "# for path,train_wifi_pd_x in tqdm(train_wifi_pd.groupby('path')):\n", "# # path = '5e15730aa280850006f3d005'\n", "# train_y = train_xy[train_xy.path==path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n", "# train_wifi_pd_x['ts_waypoint'] = 0\n", "# if len(train_y)==0:\n", "# print(path,'have no waypoint')\n", "# if len(train_y)>0:\n", "# timestamplist = np.array(train_y.ts_waypoint)\n", "# for ii in train_wifi_pd_x.index:\n", "# distlist = np.abs(timestamplist-train_wifi_pd_x.loc[ii,'timestamp'])\n", "# nearest_wp_index = np.argmin(distlist)\n", "# train_wifi_pd_x.loc[ii,'ts_waypoint'] = int(timestamplist[nearest_wp_index])\n", "# train_wifi_pd_x = pd.merge(train_wifi_pd_x,train_y,how='left',on=['path','ts_waypoint'])\n", "# train_all.append(train_wifi_pd_x)\n", " \n", "# train_all = pd.concat(train_all).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((11756, 15), (11756, 15))" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all[train_all.x.isna()].shape,train_all[train_all.y.isna()].shape" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "train_all = train_all[~train_all.x.isna()].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_meanwifi_medianwifi_stdts_waypointxy
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.3536030.3507371.0882080195.79062393.465301
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.2997480.3507371.0403170193.59133392.973266
21560501001590[18304, 19396, 7702, 7702, 19396, 7702, 12721,...[10121, 57287, 31140, 61027, 55262, 22353, 603...[3.1059258532748903, 3.1059258532748903, 2.810...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2380.2688750.3507371.0463410191.39434492.481745
31560501003516[19396, 7702, 19396, 18304, 7702, 7702, 7702, ...[57287, 31140, 55262, 10121, 22353, 53865, 432...[3.1059258532748903, 2.8107270221677836, 2.613...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2580.2302160.2523370.9956310189.17779191.985848
41560501005442[7702, 18304, 19396, 19396, 7702, 7702, 7702, ...[31140, 10121, 55262, 57287, 43265, 61027, 612...[2.8107270221677836, 2.6139278014297127, 2.613...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2820.2104650.2523370.9636300186.96123891.489950
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "2 1560501001590 [18304, 19396, 7702, 7702, 19396, 7702, 12721,... \n", "3 1560501003516 [19396, 7702, 19396, 18304, 7702, 7702, 7702, ... \n", "4 1560501005442 [7702, 18304, 19396, 19396, 7702, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "2 [10121, 57287, 31140, 61027, 55262, 22353, 603... \n", "3 [57287, 31140, 55262, 10121, 22353, 53865, 432... \n", "4 [31140, 10121, 55262, 57287, 43265, 61027, 612... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "2 [3.1059258532748903, 3.1059258532748903, 2.810... \n", "3 [3.1059258532748903, 2.8107270221677836, 2.613... \n", "4 [2.8107270221677836, 2.6139278014297127, 2.613... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "2 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "3 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "4 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean wifi_median wifi_std ts_waypoint x \\\n", "0 0.206 0.353603 0.350737 1.088208 0 195.790623 \n", "1 0.220 0.299748 0.350737 1.040317 0 193.591333 \n", "2 0.238 0.268875 0.350737 1.046341 0 191.394344 \n", "3 0.258 0.230216 0.252337 0.995631 0 189.177791 \n", "4 0.282 0.210465 0.252337 0.963630 0 186.961238 \n", "\n", " y \n", "0 93.465301 \n", "1 92.973266 \n", "2 92.481745 \n", "3 91.985848 \n", "4 91.489950 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.head()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(246341, 15)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.shape" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# from sklearn.model_selection import StratifiedKFold\n", "# from sklearn.preprocessing import StandardScaler, LabelEncoder\n", "# N_SPLITS = 10\n", "# SEED = 42\n", "# for fold, (trn_idx, val_idx) in enumerate(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED).split(train_all['site'], train_all['site'])):\n", "# train_all.loc[val_idx, 'fold'] = fold\n", " " ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold\n", "N_SPLITS = 10\n", "\n", "path_list = train_all['path'].unique()\n", "folds = KFold(n_splits=N_SPLITS, shuffle=True, random_state=1024) \n", "for n_fold, (train_idx, valid_idx) in enumerate(folds.split(path_list), start=0):\n", " train_all.loc[train_all['path'].isin(path_list[valid_idx]), 'fold'] = n_fold" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# train_all[train_all.path=='5dd3824044333f00067aa2c4'].fold.value_counts()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# train_all[train_all.site=='5c3c44b80379370013e0fd2b'].fold.value_counts()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_meanwifi_medianwifi_stdts_waypointxyfold
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.3536030.3507371.0882080195.79062393.4653016.0
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.2997480.3507371.0403170193.59133392.9732666.0
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean wifi_median wifi_std ts_waypoint x \\\n", "0 0.206 0.353603 0.350737 1.088208 0 195.790623 \n", "1 0.220 0.299748 0.350737 1.040317 0 193.591333 \n", "\n", " y fold \n", "0 93.465301 6.0 \n", "1 92.973266 6.0 " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.head(2)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# train_all['length'] = [len(xx) for xx in train_all['bssid']]\n", "# del train_all['length']" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# tmp1 = train_all[['x','y']].values\n", "# tmp1 = pd.DataFrame(list(zip(tmp1)),columns = ['xy'])\n", "# train_all = pd.concat([train_all,tmp1],axis=1)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# train_all_timestamp_min = train_all.timestamp.min()\n", "# train_all_timestamp_max = train_all.timestamp.max()\n", "# train_all['timestamp'] = (train_all['timestamp']-train_all_timestamp_min)/(train_all_timestamp_max-train_all_timestamp_min)\n", "\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "# floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n", "# \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}\n", "# train_all['floor'] = train_all['floor'].apply(lambda x: floor_map[x])" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_meanwifi_medianwifi_stdts_waypointxyfold
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.3536030.3507371.0882080195.79062393.4653016.0
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.2997480.3507371.0403170193.59133392.9732666.0
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean wifi_median wifi_std ts_waypoint x \\\n", "0 0.206 0.353603 0.350737 1.088208 0 195.790623 \n", "1 0.220 0.299748 0.350737 1.040317 0 193.591333 \n", "\n", " y fold \n", "0 93.465301 6.0 \n", "1 92.973266 6.0 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.head(2)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_stdsite
01180[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.0330935da1389e4db8ce0c98bd0547
13048[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.9915295da1389e4db8ce0c98bd0547
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1180 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 3048 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "\n", " wifi_std site \n", "0 1.033093 5da1389e4db8ce0c98bd0547 \n", "1 0.991529 5da1389e4db8ce0c98bd0547 " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd.head(2)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(37678, 11)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_all = test_wifi_pd.copy()\n", "test_all.shape" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from tqdm import tqdm\n", "from sklearn.preprocessing import LabelEncoder\n", "from dask.distributed import wait\n", "\n", "SENSORS = ['acce','acce_uncali','gyro',\n", " 'gyro_uncali','magn','magn_uncali','ahrs']\n", "\n", "NFEAS = {\n", " 'acce': 3,\n", " 'acce_uncali': 3,\n", " 'gyro': 3,\n", " 'gyro_uncali': 3,\n", " 'magn': 3,\n", " 'magn_uncali': 3,\n", " 'ahrs': 3,\n", " 'wifi': 1,\n", " 'ibeacon': 1,\n", " 'waypoint': 3\n", "}\n", "\n", "ACOLS = ['timestamp','x','y','z']\n", " \n", "FIELDS = {\n", " 'acce': ACOLS,\n", " 'acce_uncali': ACOLS,\n", " 'gyro': ACOLS,\n", " 'gyro_uncali': ACOLS,\n", " 'magn': ACOLS,\n", " 'magn_uncali': ACOLS,\n", " 'ahrs': ACOLS,\n", " 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n", " 'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n", " 'waypoint': ['timestamp','x','y']\n", "}\n", "\n", "def to_frame(data, col):\n", " cols = FIELDS[col]\n", " is_dummy = False\n", " if data.shape[0]>0:\n", " df = pd.DataFrame(data, columns=cols)\n", " else:\n", " df = create_dummy_df(cols)\n", " is_dummy = True\n", " for col in df.columns:\n", " if 'timestamp' in col:\n", " df[col] = df[col].astype('int64')\n", " return df, is_dummy\n", "\n", "def create_dummy_df(cols):\n", " df = pd.DataFrame()\n", " for col in cols:\n", " df[col] = [0]\n", " if col in ['ssid','bssid']:\n", " df[col] = df[col].map(str)\n", " return df\n", "\n", "from dataclasses import dataclass\n", "\n", "import numpy as np\n", "\n", "\n", "@dataclass\n", "class ReadData:\n", " acce: np.ndarray\n", " acce_uncali: np.ndarray\n", " gyro: np.ndarray\n", " gyro_uncali: np.ndarray\n", " magn: np.ndarray\n", " magn_uncali: np.ndarray\n", " ahrs: np.ndarray\n", " wifi: np.ndarray\n", " ibeacon: np.ndarray\n", " waypoint: np.ndarray\n", "\n", "\n", "def read_data_file(data_filename):\n", " acce = []\n", " acce_uncali = []\n", " gyro = []\n", " gyro_uncali = []\n", " magn = []\n", " magn_uncali = []\n", " ahrs = []\n", " wifi = []\n", " ibeacon = []\n", " waypoint = []\n", "\n", " with open(data_filename, 'r', encoding='utf-8') as file:\n", " lines = file.readlines()\n", "\n", " for line_data in lines:\n", " line_data = line_data.strip()\n", " if not line_data or line_data[0] == '#':\n", " continue\n", "\n", " line_data = line_data.split('\\t')\n", "\n", " if line_data[1] == 'TYPE_ACCELEROMETER':\n", " acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n", " acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_GYROSCOPE':\n", " gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n", " gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n", " magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n", " magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_ROTATION_VECTOR':\n", " if len(line_data)>=5:\n", " ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_WIFI':\n", " sys_ts = line_data[0]\n", " ssid = line_data[2]\n", " bssid = line_data[3]\n", " rssi = line_data[4]\n", " lastseen_ts = line_data[6]\n", " wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n", " wifi.append(wifi_data)\n", " continue\n", "\n", " if line_data[1] == 'TYPE_BEACON':\n", " ts = line_data[0]\n", " uuid = line_data[2]\n", " major = line_data[3]\n", " minor = line_data[4]\n", " rssi = line_data[6]\n", " lastts = line_data[-1]\n", " ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n", " ibeacon.append(ibeacon_data)\n", " continue\n", "\n", " if line_data[1] == 'TYPE_WAYPOINT':\n", " waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n", "\n", " acce = np.array(acce)\n", " acce_uncali = np.array(acce_uncali)\n", " gyro = np.array(gyro)\n", " gyro_uncali = np.array(gyro_uncali)\n", " magn = np.array(magn)\n", " magn_uncali = np.array(magn_uncali)\n", " ahrs = np.array(ahrs)\n", " wifi = np.array(wifi)\n", " ibeacon = np.array(ibeacon)\n", " waypoint = np.array(waypoint)\n", "\n", " return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "def get_test_dfs(PATH, test_files):\n", " dtest = get_test_df(PATH)\n", " buildings = set(dtest['building'].values.tolist())\n", " dws = {}\n", " ntest_files = []\n", " for fname in tqdm(test_files):\n", " path = fname.split('/')[-1].split('.')[0]\n", " mask = dtest['path'] == path\n", " dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n", " ntest_files.append(fname)\n", " return dws\n", "\n", "def get_test_df(PATH):\n", " dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n", " dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n", " dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n", " dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n", " dtest['timestamp'] = dtest['timestamp'].astype('int64')\n", " dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n", " return dtest\n", "\n", "def get_time_gap(name):\n", " data = read_data_file(name)\n", " db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n", "# print(db,no_ibeacon)\n", " \n", " if no_ibeacon==0:\n", " gap = db['last_timestamp'] - db['timestamp']\n", " assert gap.unique().shape[0]==1\n", " return gap.values[0],no_ibeacon\n", " \n", " if no_ibeacon==1:\n", " # Group wifis by timestamp\n", " wifi_groups = pd.DataFrame(data.wifi).groupby(0) \n", " # Find which one is the most recent of all time points.\n", " est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n", " return est_ts,no_ibeacon\n", "\n", " \n", "\n", "def fix_timestamp_test(df, gap):\n", " df['real_timestamp'] = df['timestamp'] + gap\n", " return df" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n", " '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n", " '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n", " '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n", "test_files_ori[:4]" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "

Client

\n", "\n", "
\n", "

Cluster

\n", "
    \n", "
  • Workers: 8
  • \n", "
  • Cores: 8
  • \n", "
  • Memory: 66.71 GB
  • \n", "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dask\n", "from dask.distributed import Client, wait, LocalCluster\n", "\n", "# set n_workers to number of cores\n", "client = Client(n_workers=8, \n", " threads_per_worker=1)\n", "client" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:00<00:00, 10654.38it/s]\n", "100%|██████████| 626/626 [00:17<00:00, 34.85it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.87 s, sys: 169 ms, total: 3.04 s\n", "Wall time: 18 s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "%%time\n", "futures = []\n", "for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n", " f = client.submit(get_time_gap,fname)\n", " futures.append(f)\n", " \n", "testpath2gap = {}\n", "for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n", " testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n", " " ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "test_all['timestamp'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['timestamp'],test_all['path'])]\n", "# test_all['ts_waypoint'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['ts_waypoint'],test_all['path'])]" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# test_all['timestamp'] = (test_all['timestamp']-train_all_timestamp_min)/(train_all_timestamp_max-train_all_timestamp_min)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_stdsite
01573190312033[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.0330935da1389e4db8ce0c98bd0547
11573190313901[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.9915295da1389e4db8ce0c98bd0547
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1573190312033 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 1573190313901 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "\n", " wifi_std site \n", "0 1.033093 5da1389e4db8ce0c98bd0547 \n", "1 0.991529 5da1389e4db8ce0c98bd0547 " ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_all.head(2)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "\n", "ss2 = StandardScaler()\n", "ss2.fit(train_all.loc[:,['timestamp']])\n", "train_all.loc[:,['timestamp']] = ss2.transform(train_all.loc[:,['timestamp']])\n", "test_all.loc[:,['timestamp']] = ss2.transform(test_all.loc[:,['timestamp']])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "# train_all_floor_min = train_all.floor.min()\n", "# train_all_floor_max = train_all.floor.max()\n", "# train_all['floor'] = (train_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)\n", "# test_all['floor'] = (test_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "sitelist = list(sorted(set(train_all.site)))\n", "sitedict = dict(zip(sitelist,range(len(sitelist))))\n", "train_all['site_id'] = train_all['site'].apply(lambda x: sitedict[x])\n", "test_all['site_id'] = test_all['site'].apply(lambda x: sitedict[x])\n" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "def MCRMSE(y_true, y_pred):\n", " colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)\n", " return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)\n", "\n", "def gru_layer(hidden_dim, dropout):\n", " return L.Bidirectional(L.GRU(\n", " hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal'))\n", "\n", "def pandas_list_to_array(df):\n", " \"\"\"\n", " Input: dataframe of shape (x, y), containing list of length l\n", " Return: np.array of shape (x, l, y)\n", " \"\"\"\n", " \n", " return np.transpose(\n", " np.array(df.values.tolist()),\n", " (0, 2, 1)\n", " )\n", "\n", "def preprocess_inputs(df, cols=['ssid','bssid', 'rssi']):\n", " return pandas_list_to_array(\n", " df[cols]\n", " )" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "def build_model_time(embed_size, seq_len=100, pred_len=2, dropout=0.5, \n", " sp_dropout=0.2, embed_dim=200, hidden_dim=256, n_layers=2):\n", " inputs = L.Input(shape=(seq_len, 2))\n", " input_time = L.Input(shape = (1,))\n", " \n", "\n", " categorical_fea = inputs[:, :, :1]\n", " numerical_fea = inputs[:, :, 1:]\n", "\n", " embed = L.Embedding(input_dim=embed_size, output_dim=embed_dim)(categorical_fea)\n", " reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n", " reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n", " \n", " \n", " hidden = L.concatenate([reshaped, numerical_fea], axis=2)\n", " \n", " for x in range(n_layers):\n", " hidden = gru_layer(hidden_dim, dropout)(hidden)\n", " \n", " # Since we are only making predictions on the first part of each sequence, \n", " # we have to truncate it\n", " truncated = hidden[:, :pred_len]\n", " truncated = L.Flatten()(truncated)\n", " truncated = L.concatenate([truncated, input_time], axis=1)\n", "\n", " out = L.Dense(2, activation='linear')(truncated)\n", "\n", " \n", " model = tf.keras.Model(inputs=[inputs,input_time], outputs=out)\n", " model.compile(tf.optimizers.Adam(), loss='mse')\n", " \n", " return model\n", "\n", "def get_embed_size(n_cat):\n", " return min(600, round(1.6 * n_cat ** .56))\n" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "def build_model_mix(sid_size,bssid_size,site_size, seq_len=100, pred_len=2, dropout=0.2, \n", " sp_dropout=0.1, embed_dim=64, hidden_dim=128, n_layers=3,lr=0.001):\n", " inputs = L.Input(shape=(seq_len, 3))\n", " input_time = L.Input(shape = (4,))\n", " input_site = L.Input(shape = (1,))\n", " \n", " categorical_fea1 = inputs[:, :, :1]\n", " categorical_fea2 = inputs[:, :, 1:2]\n", " numerical_fea = inputs[:, :, 2:]\n", " \n", "\n", " embed = L.Embedding(input_dim=sid_size, output_dim=embed_dim)(categorical_fea1)\n", " reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n", " reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n", " \n", " embed2 = L.Embedding(input_dim=bssid_size, output_dim=embed_dim)(categorical_fea2)\n", " reshaped2 = tf.reshape(embed2, shape=(-1, embed2.shape[1], embed2.shape[2] * embed2.shape[3]))\n", " reshaped2 = L.SpatialDropout1D(sp_dropout)(reshaped2)\n", " \n", " \n", " hidden = L.concatenate([reshaped, reshaped2, numerical_fea], axis=2)\n", " \n", " for x in range(n_layers):\n", " hidden = gru_layer(hidden_dim, dropout)(hidden)\n", " \n", " # Since we are only making predictions on the first part of each sequence, \n", " # we have to truncate it\n", " truncated = hidden[:, :pred_len]\n", " truncated = L.Flatten()(truncated)\n", " \n", " embed_site = L.Embedding(input_dim=site_size, output_dim=1)(input_site)\n", " embed_site = L.Flatten()(embed_site)\n", " \n", " truncated = L.concatenate([truncated, input_time,embed_site], axis=1)\n", " \n", " #out = L.Dense(32, activation='linear')(truncated)\n", "\n", " out = L.Dense(2, activation='linear')(truncated)\n", " \n", " model = tf.keras.Model(inputs=[inputs,input_time,input_site], outputs=out)\n", " model.compile(tf.optimizers.Adam(lr), loss='mse')\n", " \n", " return model\n", "\n", "def get_embed_size(n_cat):\n", " return min(600, round(1.6 * n_cat ** .56))\n" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "# def build_model_time_floors_site(ssid_size,bssid_size,site_size,seq_len=100,dropout=0.5, \n", "# sp_dropout=0.2, embed_dim=64, hidden_dim=256, n_layers=2):\n", "# inputs = L.Input(shape=(seq_len, 2))\n", "# input_time = L.Input(shape = (2,)) ##time and floor\n", "# input_site = L.Input(shape = (1,)) \n", "\n", "# # ssid_fea = inputs[:, :, :1]\n", "# bssid_fea = inputs[:,:,:1]\n", "# rssi_fea = inputs[:,:,1:]\n", "\n", "# # embed_ssid = L.Embedding(input_dim=ssid_size, output_dim=32)(ssid_fea)\n", "# embed_bssid = L.Embedding(input_dim=bssid_size, output_dim=64)(bssid_fea)\n", "# embed_site = L.Embedding(input_dim=site_size, output_dim=3)(input_site)\n", "\n", "# # embed_ssid = L.Flatten()(embed_ssid)\n", "# embed_bssid = L.Flatten()(embed_bssid)\n", "# embed_site = L.Flatten()(embed_site)\n", "# rssi_fea = L.Flatten()(rssi_fea)\n", "\n", "# #reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n", "# #reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n", " \n", " \n", "# hidden = L.concatenate([input_time,embed_bssid,rssi_fea], axis=1)\n", "# hidden = L.Dropout(0.2)(hidden)\n", "# print(hidden.shape)\n", "# x = L.Reshape((1, -1))(hidden)\n", " \n", "# x = L.BatchNormalization()(x)\n", "# x = L.LSTM(128, dropout=0.3, recurrent_dropout=0.3, return_sequences=True, activation='relu')(x)\n", "# x = L.LSTM(16, dropout=0.1, return_sequences=False, activation='relu')(x)\n", "\n", "# out = L.Dense(2, activation='linear')(x)\n", "\n", " \n", "# model = tf.keras.Model(inputs=[inputs,input_time,input_site], outputs=out)\n", "# model.compile(tf.optimizers.Adam(), loss='mse')\n", " \n", "# return model\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "# import pickle\n", "# with open('train_all.pickle','wb') as fw:\n", "# pickle.dump(train_all,fw)\n", "# with open('test_all.pickle','wb') as fw:\n", "# pickle.dump(test_all,fw)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "begin fold: 0\n", "fold 0 7.73294929426513\n", "150.92601263675743\n", "elasped time: 84.61294651031494\n" ] } ], "source": [ "import time\n", "t1 = time.time()\n", "pred_cols = ['x','y']\n", "train_inputs = preprocess_inputs(train_all,cols=['ssid', 'bssid', 'rssi'])\n", "train_inputs_time = train_all[['timestamp','floorNo','wifi_len','wifi_mean']].values\n", "train_inputs_site = train_all['site_id'].values\n", "train_labels = train_all[pred_cols].values\n", "test_inputs = preprocess_inputs(test_all,cols=['ssid','bssid', 'rssi'])\n", "test_inputs_time = test_all[['timestamp','floorNo','wifi_len','wifi_mean']].values\n", "test_inputs_site = test_all['site_id'].values\n", "\n", "\n", " \n", " \n", "x_test = test_inputs\n", "x_test_time = test_inputs_time\n", "x_test_site = test_inputs_site\n", "\n", "oof_xy = np.zeros(train_labels.shape)\n", "y_test_pred = 0\n", "for fold_id in range(N_SPLITS):\n", " trn_idx = train_all[train_all.fold!=fold_id].index.tolist()\n", " val_idx = train_all[train_all.fold==fold_id].index.tolist()\n", " print('begin fold:',fold_id)\n", " x_train, x_val = train_inputs[trn_idx],train_inputs[val_idx]\n", " x_train_time, x_val_time = train_inputs_time[trn_idx],train_inputs_time[val_idx]\n", " x_train_site, x_val_site = train_inputs_site[trn_idx],train_inputs_site[val_idx]\n", " y_train, y_val = train_labels[trn_idx],train_labels[val_idx]\n", " \n", " model = build_model_mix(len(ssiddict),len(bssiddict),len(sitedict),seqlen,lr=0.001)\n", "# model.load_weights('rnn_model_v4/model_allsite_fold{}_times2.h5'.format(fold_id))\n", " history = model.fit(\n", " [x_train,x_train_time,x_train_site], y_train,\n", " validation_data=([x_val,x_val_time,x_val_site], y_val),\n", " batch_size=128,\n", " epochs=100,\n", " verbose=1,\n", " callbacks=[\n", " tf.keras.callbacks.ReduceLROnPlateau(patience=5),\n", " tf.keras.callbacks.ModelCheckpoint('rnn_model_wifi/model_fold{}.h5'.format(fold_id)),\n", " tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-4,\n", " patience=5, mode='min', restore_best_weights=True)\n", " ]\n", " )\n", "# model.load_weights('rnn_model_wifi/model_fold{}.h5')\n", " y_val_pred = model.predict([x_val,x_val_time,x_val_site])\n", " y_test_pred += model.predict([x_test,x_test_time,x_test_site])\n", " oof_xy[val_idx] = y_val_pred\n", " print('fold',fold_id, np.mean(np.sqrt(np.sum((y_val-y_val_pred)**2,axis=1))))\n", " break\n", "y_test_pred = y_test_pred/(fold_id + 1) \n", "train_labels_inv = (pd.DataFrame(train_labels[:,:],columns = ['x','y']))\n", "oof_xy_pred_inv = (pd.DataFrame(oof_xy[:,:],columns = ['x','y']))\n", "y_test_pred_inv = (pd.DataFrame(y_test_pred[:,:],columns = ['x','y'])) \n", "print(np.mean(np.sqrt(np.sum((train_labels_inv-oof_xy_pred_inv)**2,axis=1))))\n", "\n", "t2 = time.time()\n", "print('elasped time:', t2 - t1)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "test_all[['x','y']] = y_test_pred_inv" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_stdsitesite_idxy
00.345764[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.0330935da1389e4db8ce0c98bd05471949.43089789.246811
10.345765[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.9915295da1389e4db8ce0c98bd05471971.17988687.176270
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 0.345764 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 0.345765 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "\n", " wifi_std site site_id x y \n", "0 1.033093 5da1389e4db8ce0c98bd0547 19 49.430897 89.246811 \n", "1 0.991529 5da1389e4db8ce0c98bd0547 19 71.179886 87.176270 " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_all.head(2)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppathsitexyt1_wifipath_id
00.34576400ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054749.43089789.2468111180.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
10.34576500ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.17988687.1762703048.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
20.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.40873786.9792484924.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
30.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.81906983.8495256816.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
40.34576700ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.56027286.2846608693.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
\n", "
" ], "text/plain": [ " timestamp path site x \\\n", "0 0.345764 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 49.430897 \n", "1 0.345765 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.179886 \n", "2 0.345766 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.408737 \n", "3 0.345766 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.819069 \n", "4 0.345767 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 71.560272 \n", "\n", " y t1_wifi path_id \n", "0 89.246811 1180.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "1 87.176270 3048.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "2 86.979248 4924.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "3 83.849525 6816.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "4 86.284660 8693.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 " ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result = test_all[['timestamp','path','site','x','y']]\n", "result['t1_wifi'] = ss2.inverse_transform(result['timestamp'])\n", "\n", "result['t1_wifi'] = [xx-testpath2gap[yy][0] for (xx,yy) in zip(result['t1_wifi'],result['path'])]\n", "result['path_id'] = result['site']+'_'+result['path']\n", "result.head()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppathsitexyt1_wifi
path_id
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576400ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054749.43089789.2468111180.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576500ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.17988687.1762703048.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.40873786.9792484924.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.81906983.8495256816.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576700ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054771.56027286.2846608693.0
\n", "
" ], "text/plain": [ " timestamp \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345764 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345765 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345766 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345766 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345767 \n", "\n", " path \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "\n", " site \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "\n", " x y \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 49.430897 89.246811 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.179886 87.176270 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.408737 86.979248 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.819069 83.849525 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 71.560272 86.284660 \n", "\n", " t1_wifi \n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 1180.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 3048.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 4924.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 6816.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 8693.0 " ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# result['path_id'] = ['_'.join(xx.split('_')[:2]) for xx in result.site_path_timestamp]\n", "# result['t1_wifi'] = [int(xx.split('_')[2]) for xx in result.site_path_timestamp]\n", "# del result['site_path_timestamp']\n", "result.set_index('path_id', inplace=True)\n", "result.head()" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "from scipy.spatial.transform import Rotation as R\n", "from PIL import Image\n", "from mpl_toolkits.mplot3d import Axes3D\n", "import plotly.graph_objs as go\n", "from pathlib import Path\n", "import scipy.signal as signal\n", "import json\n", "import seaborn as sns # visualization\n", "from dataclasses import dataclass\n", "\n", "import matplotlib.pyplot as plt # visualization\n", "import numpy as np # linear algebra\n", "import random\n", "import pandas as pd\n", "from collections import Counter, defaultdict\n", "\n", "plt.rcParams.update({'font.size': 14})\n", "\n", "def split_ts_seq(ts_seq, sep_ts):\n", " \"\"\"\n", "\n", " :param ts_seq:\n", " :param sep_ts:\n", " :return:\n", " \"\"\"\n", " tss = ts_seq[:, 0].astype(float)\n", " unique_sep_ts = np.unique(sep_ts)\n", " ts_seqs = []\n", " start_index = 0\n", " for i in range(0, unique_sep_ts.shape[0]):\n", " end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')\n", " if start_index == end_index:\n", " continue\n", " ts_seqs.append(ts_seq[start_index:end_index, :].copy())\n", " start_index = end_index\n", "\n", " # tail data\n", " if start_index < ts_seq.shape[0]:\n", " ts_seqs.append(ts_seq[start_index:, :].copy())\n", "\n", " return ts_seqs\n", "\n", "\n", "def correct_trajectory(original_xys, end_xy):\n", " \"\"\"\n", "\n", " :param original_xys: numpy ndarray, shape(N, 2)\n", " :param end_xy: numpy ndarray, shape(1, 2)\n", " :return:\n", " \"\"\"\n", " corrected_xys = np.zeros((0, 2))\n", "\n", " A = original_xys[0, :]\n", " B = end_xy\n", " Bp = original_xys[-1, :]\n", "\n", " angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])\n", " angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])\n", " angle_BpAB = angle_BpAX - angle_BAX\n", " AB = np.sqrt(np.sum((B - A) ** 2))\n", " ABp = np.sqrt(np.sum((Bp - A) ** 2))\n", "\n", " corrected_xys = np.append(corrected_xys, [A], 0)\n", " for i in np.arange(1, np.size(original_xys, 0)):\n", " angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])\n", "\n", " angle_CAX = angle_CpAX - angle_BpAB\n", "\n", " ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))\n", "\n", " AC = ACp * AB / ABp\n", "\n", " delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])\n", "\n", " C = delta_C + A\n", "\n", " corrected_xys = np.append(corrected_xys, [C], 0)\n", "\n", " return corrected_xys\n", "\n", "\n", "def correct_positions(rel_positions, reference_positions):\n", " \"\"\"\n", "\n", " :param rel_positions:\n", " :param reference_positions:\n", " :return:\n", " \"\"\"\n", " rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])\n", " if len(rel_positions_list) != reference_positions.shape[0] - 1:\n", " # print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')\n", " del rel_positions_list[-1]\n", " assert len(rel_positions_list) == reference_positions.shape[0] - 1\n", "\n", " corrected_positions = np.zeros((0, 3))\n", " for i, rel_ps in enumerate(rel_positions_list):\n", " start_position = reference_positions[i]\n", " end_position = reference_positions[i + 1]\n", " abs_ps = np.zeros(rel_ps.shape)\n", " abs_ps[:, 0] = rel_ps[:, 0]\n", " # abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]\n", " abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]\n", " for j in range(1, rel_ps.shape[0]):\n", " abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]\n", " abs_ps = np.insert(abs_ps, 0, start_position, axis=0)\n", " corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])\n", " corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))\n", " if i == 0:\n", " corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)\n", " else:\n", " corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)\n", "\n", " corrected_positions = np.array(corrected_positions)\n", "\n", " return corrected_positions\n", "\n", "\n", "def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):\n", " order = 4\n", " filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)\n", " zf = signal.lfilter_zi(filter_b, filter_a)\n", " _, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)\n", " _, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)\n", "\n", " return filter_b, filter_a, filter_zf\n", "\n", "\n", "def get_rotation_matrix_from_vector(rotation_vector):\n", " q1 = rotation_vector[0]\n", " q2 = rotation_vector[1]\n", " q3 = rotation_vector[2]\n", "\n", " if rotation_vector.size >= 4:\n", " q0 = rotation_vector[3]\n", " else:\n", " q0 = 1 - q1*q1 - q2*q2 - q3*q3\n", " if q0 > 0:\n", " q0 = np.sqrt(q0)\n", " else:\n", " q0 = 0\n", "\n", " sq_q1 = 2 * q1 * q1\n", " sq_q2 = 2 * q2 * q2\n", " sq_q3 = 2 * q3 * q3\n", " q1_q2 = 2 * q1 * q2\n", " q3_q0 = 2 * q3 * q0\n", " q1_q3 = 2 * q1 * q3\n", " q2_q0 = 2 * q2 * q0\n", " q2_q3 = 2 * q2 * q3\n", " q1_q0 = 2 * q1 * q0\n", "\n", " R = np.zeros((9,))\n", " if R.size == 9:\n", " R[0] = 1 - sq_q2 - sq_q3\n", " R[1] = q1_q2 - q3_q0\n", " R[2] = q1_q3 + q2_q0\n", "\n", " R[3] = q1_q2 + q3_q0\n", " R[4] = 1 - sq_q1 - sq_q3\n", " R[5] = q2_q3 - q1_q0\n", "\n", " R[6] = q1_q3 - q2_q0\n", " R[7] = q2_q3 + q1_q0\n", " R[8] = 1 - sq_q1 - sq_q2\n", "\n", " R = np.reshape(R, (3, 3))\n", " elif R.size == 16:\n", " R[0] = 1 - sq_q2 - sq_q3\n", " R[1] = q1_q2 - q3_q0\n", " R[2] = q1_q3 + q2_q0\n", " R[3] = 0.0\n", "\n", " R[4] = q1_q2 + q3_q0\n", " R[5] = 1 - sq_q1 - sq_q3\n", " R[6] = q2_q3 - q1_q0\n", " R[7] = 0.0\n", "\n", " R[8] = q1_q3 - q2_q0\n", " R[9] = q2_q3 + q1_q0\n", " R[10] = 1 - sq_q1 - sq_q2\n", " R[11] = 0.0\n", "\n", " R[12] = R[13] = R[14] = 0.0\n", " R[15] = 1.0\n", "\n", " R = np.reshape(R, (4, 4))\n", "\n", " return R\n", "\n", "\n", "def get_orientation(R):\n", " flat_R = R.flatten()\n", " values = np.zeros((3,))\n", " if np.size(flat_R) == 9:\n", " values[0] = np.arctan2(flat_R[1], flat_R[4])\n", " values[1] = np.arcsin(-flat_R[7])\n", " values[2] = np.arctan2(-flat_R[6], flat_R[8])\n", " else:\n", " values[0] = np.arctan2(flat_R[1], flat_R[5])\n", " values[1] = np.arcsin(-flat_R[9])\n", " values[2] = np.arctan2(-flat_R[8], flat_R[10])\n", "\n", " return values\n", "\n", "\n", "def compute_steps(acce_datas):\n", " step_timestamps = np.array([])\n", " step_indexs = np.array([], dtype=int)\n", " step_acce_max_mins = np.zeros((0, 4))\n", " sample_freq = 50\n", " window_size = 22\n", " low_acce_mag = 0.6\n", " step_criterion = 1\n", " interval_threshold = 250\n", "\n", " acce_max = np.zeros((2,))\n", " acce_min = np.zeros((2,))\n", " acce_binarys = np.zeros((window_size,), dtype=int)\n", " acce_mag_pre = 0\n", " state_flag = 0\n", "\n", " warmup_data = np.ones((window_size,)) * 9.81\n", " filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)\n", " acce_mag_window = np.zeros((window_size, 1))\n", "\n", " # detect steps according to acceleration magnitudes\n", " for i in np.arange(0, np.size(acce_datas, 0)):\n", " acce_data = acce_datas[i, :]\n", " acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))\n", "\n", " acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)\n", " acce_mag_filt = acce_mag_filt[0]\n", "\n", " acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])\n", " acce_mag_window = np.delete(acce_mag_window, 0)\n", " mean_gravity = np.mean(acce_mag_window)\n", " acce_std = np.std(acce_mag_window)\n", " mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])\n", "\n", " # detect valid peak or valley of acceleration magnitudes\n", " acce_mag_filt_detrend = acce_mag_filt - mean_gravity\n", " if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):\n", " # peak\n", " acce_binarys = np.append(acce_binarys, [1])\n", " acce_binarys = np.delete(acce_binarys, 0)\n", " elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):\n", " # valley\n", " acce_binarys = np.append(acce_binarys, [-1])\n", " acce_binarys = np.delete(acce_binarys, 0)\n", " else:\n", " # between peak and valley\n", " acce_binarys = np.append(acce_binarys, [0])\n", " acce_binarys = np.delete(acce_binarys, 0)\n", "\n", " if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):\n", " if state_flag == 0:\n", " acce_max[:] = acce_data[0], acce_mag_filt\n", " state_flag = 1\n", " elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (\n", " acce_mag_filt > acce_max[1]):\n", " acce_max[:] = acce_data[0], acce_mag_filt\n", " elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):\n", " acce_max[:] = acce_data[0], acce_mag_filt\n", " state_flag = 1\n", "\n", " # choose reasonable step criterion and check if there is a valid step\n", " # save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]\n", " step_flag = False\n", " if step_criterion == 2:\n", " if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):\n", " step_flag = True\n", " elif step_criterion == 3:\n", " if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):\n", " step_flag = True\n", " else:\n", " if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:\n", " if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):\n", " acce_min[:] = acce_data[0], acce_mag_filt\n", " state_flag = 2\n", " step_flag = True\n", " elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (\n", " acce_mag_filt < acce_min[1]):\n", " acce_min[:] = acce_data[0], acce_mag_filt\n", " if step_flag:\n", " step_timestamps = np.append(step_timestamps, acce_data[0])\n", " step_indexs = np.append(step_indexs, [i])\n", " step_acce_max_mins = np.append(step_acce_max_mins,\n", " [[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)\n", " acce_mag_pre = acce_mag_filt_detrend\n", "\n", " return step_timestamps, step_indexs, step_acce_max_mins\n", "\n", "\n", "def compute_stride_length(step_acce_max_mins):\n", " K = 0.4\n", " K_max = 0.8\n", " K_min = 0.4\n", " para_a0 = 0.21468084\n", " para_a1 = 0.09154517\n", " para_a2 = 0.02301998\n", "\n", " stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))\n", " k_real = np.zeros((step_acce_max_mins.shape[0], 2))\n", " step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))\n", " stride_lengths[:, 0] = step_acce_max_mins[:, 0]\n", " window_size = 2\n", " step_timeperiod_temp = np.zeros((0, ))\n", "\n", " # calculate every step period - step_timeperiod unit: second\n", " for i in range(0, step_timeperiod.shape[0]):\n", " step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000\n", " step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])\n", " if step_timeperiod_temp.shape[0] > window_size:\n", " step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])\n", " step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]\n", "\n", " # calculate parameters by step period and acceleration magnitude variance\n", " k_real[:, 0] = step_acce_max_mins[:, 0]\n", " k_real[0, 1] = K\n", " for i in range(0, step_timeperiod.shape[0]):\n", " k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])\n", " k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)\n", "\n", " # calculate every stride length by parameters and max and min data of acceleration magnitude\n", " stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),\n", " np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]\n", "\n", " return stride_lengths\n", "\n", "\n", "def compute_headings(ahrs_datas):\n", " headings = np.zeros((np.size(ahrs_datas, 0), 2))\n", " for i in np.arange(0, np.size(ahrs_datas, 0)):\n", " ahrs_data = ahrs_datas[i, :]\n", " rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])\n", " azimuth, pitch, roll = get_orientation(rot_mat)\n", " around_z = (-azimuth) % (2 * np.pi)\n", " headings[i, :] = ahrs_data[0], around_z\n", " return headings\n", "\n", "\n", "def compute_step_heading(step_timestamps, headings):\n", " step_headings = np.zeros((len(step_timestamps), 2))\n", " step_timestamps_index = 0\n", " for i in range(0, len(headings)):\n", " if step_timestamps_index < len(step_timestamps):\n", " if headings[i, 0] == step_timestamps[step_timestamps_index]:\n", " step_headings[step_timestamps_index, :] = headings[i, :]\n", " step_timestamps_index += 1\n", " else:\n", " break\n", " assert step_timestamps_index == len(step_timestamps)\n", "\n", " return step_headings\n", "\n", "\n", "def compute_rel_positions(stride_lengths, step_headings):\n", " rel_positions = np.zeros((stride_lengths.shape[0], 3))\n", " for i in range(0, stride_lengths.shape[0]):\n", " rel_positions[i, 0] = stride_lengths[i, 0]\n", " rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])\n", " rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])\n", "\n", " return rel_positions\n", "\n", "\n", "def compute_step_positions(acce_datas, ahrs_datas, posi_datas):\n", " step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)\n", " headings = compute_headings(ahrs_datas)\n", " stride_lengths = compute_stride_length(step_acce_max_mins)\n", " step_headings = compute_step_heading(step_timestamps, headings)\n", " rel_positions = compute_rel_positions(stride_lengths, step_headings)\n", " step_positions = correct_positions(rel_positions, posi_datas)\n", "\n", " return step_positions\n" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "sample_submission = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamp
buildingpath_id
5a0546857ecc773753327266046cfa46be49fc10834815c6[0000000000009, 0000000009017, 0000000015326, ...
05d052dde78384b0c543d89c[0000000000012, 0000000005748, 0000000014654, ...
0c06cc9f21d172618d74c6c8[0000000000011, 0000000011818, 0000000019825, ...
146035943a1482883ed98570[0000000000011, 0000000004535, 0000000011498, ...
1ef2771dfea25d508142ba06[0000000000009, 0000000012833, 0000000021759, ...
\n", "
" ], "text/plain": [ " timestamp\n", "building path_id \n", "5a0546857ecc773753327266 046cfa46be49fc10834815c6 [0000000000009, 0000000009017, 0000000015326, ...\n", " 05d052dde78384b0c543d89c [0000000000012, 0000000005748, 0000000014654, ...\n", " 0c06cc9f21d172618d74c6c8 [0000000000011, 0000000011818, 0000000019825, ...\n", " 146035943a1482883ed98570 [0000000000011, 0000000004535, 0000000011498, ...\n", " 1ef2771dfea25d508142ba06 [0000000000009, 0000000012833, 0000000021759, ..." ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_submission['building'] = [x.split('_')[0] for x in sample_submission['site_path_timestamp']]\n", "sample_submission['path_id'] = [x.split('_')[1] for x in sample_submission['site_path_timestamp']]\n", "sample_submission['timestamp'] = [x.split('_')[2] for x in sample_submission['site_path_timestamp']]\n", "samples = pd.DataFrame(sample_submission.groupby(['building','path_id'])['timestamp'].apply(lambda x: list(x)))\n", "buildings = np.unique([x[0] for x in samples.index])\n", "samples.head()" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5a0546857ecc773753327266\n", "5c3c44b80379370013e0fd2b\n", "5d27075f03f801723c2e360f\n", "5d27096c03f801723c31e5e0\n", "5d27097f03f801723c320d97\n", "5d27099f03f801723c32511d\n", "5d2709a003f801723c3251bf\n", "5d2709b303f801723c327472\n", "5d2709bb03f801723c32852c\n", "5d2709c303f801723c3299ee\n", "5d2709d403f801723c32bd39\n", "5d2709e003f801723c32d896\n", "5da138274db8ce0c98bbd3d2\n", "5da1382d4db8ce0c98bbe92e\n", "5da138314db8ce0c98bbf3a0\n", "5da138364db8ce0c98bc00f1\n", "5da1383b4db8ce0c98bc11ab\n", "5da138754db8ce0c98bca82f\n", "5da138764db8ce0c98bcaa46\n", "5da1389e4db8ce0c98bd0547\n", "5da138b74db8ce0c98bd4774\n", "5da958dd46f8266d0737457b\n", "5dbc1d84c1eb61796cf7c010\n", "5dc8cea7659e181adb076a3f\n" ] } ], "source": [ "from scipy.interpolate import interp1d\n", "from scipy.ndimage.filters import uniform_filter1d\n", "\n", "colacce = ['xyz_time','x_acce','y_acce','z_acce']\n", "colahrs = ['xyz_time','x_ahrs','y_ahrs','z_ahrs']\n", "\n", "for building in buildings:\n", " print(building)\n", " paths = samples.loc[building].index\n", " # Acceleration info:\n", " tfm = pd.read_csv(f'indoor_testing_accel/{building}.txt',index_col=0)\n", " for path_id in paths:\n", " # Original predicted values:\n", " xy = result.loc[building+'_'+path_id]\n", " tfmi = tfm.loc[path_id]\n", " acce_datas = np.array(tfmi[colacce],dtype=np.float)\n", " ahrs_datas = np.array(tfmi[colahrs],dtype=np.float)\n", " posi_datas = np.array(xy[['t1_wifi','x','y']],dtype=np.float)\n", " # Outlier removal:\n", " xyout = uniform_filter1d(posi_datas,size=3,axis=0,mode='reflect')\n", " xydiff = np.abs(posi_datas-xyout)\n", " xystd = np.std(xydiff,axis=0)*3\n", " posi_datas = posi_datas[(xydiff[:,1]posi_datas[0,0],:]\n", " # If two consecutive predictions are in-between two step datapoints,\n", " # the last one is removed, causing error (in the \"split_ts_seq\" function).\n", " posi_index = [np.searchsorted(rel_positions[:,0], x, side='right') for x in posi_datas[:,0]]\n", " u, i1, i2 = np.unique(posi_index, return_index=True, return_inverse=True)\n", " posi_datas = np.vstack([np.mean(posi_datas[i2==i],axis=0) for i in np.unique(i2)])\n", " # Position correction:\n", " step_positions = correct_positions(rel_positions, posi_datas)\n", " # Interpolate for timestamps in the testing set:\n", "\n", " t = step_positions[:,0]\n", " x = step_positions[:,1]\n", " y = step_positions[:,2]\n", " fx = interp1d(t, x, kind='linear', fill_value=(x[0],x[-1]), bounds_error=False) #fill_value=\"extrapolate\"\n", " fy = interp1d(t, y, kind='linear', fill_value=(y[0],y[-1]), bounds_error=False)\n", " # Output result:\n", " t0 = np.array(samples.loc[(building,path_id),'timestamp'],dtype=np.float64)\n", " sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'x'] = fx(t0)\n", " sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'y'] = fy(t0)\n", " \n" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "subold = pd.read_csv('submission_floor.csv')\n", "sample_submission['floor']=subold['floor']\n", "sample_submission[['site_path_timestamp','floor','x','y']].to_csv('submission_wifi.csv',index=False)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }