{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as stats\n", "from pathlib import Path\n", "import glob\n", "import pickle\n", "\n", "import random\n", "import os\n", "\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.preprocessing import StandardScaler, LabelEncoder\n", "from tqdm import tqdm\n", "import tensorflow as tf\n", "import tensorflow.keras.layers as L\n", "import tensorflow.keras.models as M\n", "import tensorflow.keras.backend as K\n", "import tensorflow_addons as tfa\n", "from tensorflow_addons.layers import WeightNormalization\n", "from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping\n", "pd.options.mode.chained_assignment = None\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n" ] } ], "source": [ "from tensorflow.python.client import device_lib\n", "def get_available_gpus():\n", " local_device_protos = device_lib.list_local_devices()\n", " return [x.name for x in local_device_protos if x.device_type == 'GPU']\n", "print(get_available_gpus())\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
site_path_timestampsitepathts_waypoint
05a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c69
15a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c69017
25a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c615326
35a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c618763
45a0546857ecc773753327266_046cfa46be49fc1083481...5a0546857ecc773753327266046cfa46be49fc10834815c622328
\n", "
" ], "text/plain": [ " site_path_timestamp \\\n", "0 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "1 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "2 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "3 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "4 5a0546857ecc773753327266_046cfa46be49fc1083481... \n", "\n", " site path ts_waypoint \n", "0 5a0546857ecc773753327266 046cfa46be49fc10834815c6 9 \n", "1 5a0546857ecc773753327266 046cfa46be49fc10834815c6 9017 \n", "2 5a0546857ecc773753327266 046cfa46be49fc10834815c6 15326 \n", "3 5a0546857ecc773753327266 046cfa46be49fc10834815c6 18763 \n", "4 5a0546857ecc773753327266 046cfa46be49fc10834815c6 22328 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "# PATH = '../input/indoor-location-navigation'\n", "# test_files = glob.glob(f'{PATH}/test/*.txt')\n", "# test_files_pd = [xx.split('/')[-1:][0].replace('.txt','') for xx in test_files]\n", "# test_files_pd = pd.DataFrame(test_files_pd)\n", "# test_files_pd.columns = ['path']\n", "\n", "sample_submission = pd.read_csv(\"../input/indoor-location-navigation/sample_submission.csv\")\n", "sample_submission['site'] = [xx.split('_')[0] for xx in sample_submission.site_path_timestamp]\n", "sample_submission['path'] = [xx.split('_')[1] for xx in sample_submission.site_path_timestamp]\n", "sample_submission['ts_waypoint'] = [int(xx.split('_')[2]) for xx in sample_submission.site_path_timestamp]\n", "del sample_submission['floor']\n", "del sample_submission['x']\n", "del sample_submission['y']\n", "\n", "path2site = dict(zip(sample_submission.path,sample_submission.site))\n", "sample_submission.head()\n", "# test_path_site = sample_submission[['site','path','timestamp','site_path_timestamp']]\n", "# test_files_pd = pd.merge(test_files_pd,test_path_site,how='left',on='path')\n", "# test_files_pd.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "test_wifi_files = glob.glob(f'../input/wifi_lbl_encode/test/*.txt')\n", "test_sensor_files = glob.glob('../input/data_abstract/*_test_sensor.csv')\n", "\n", "# train_files = glob.glob('../input/indoor-navigation-and-location-wifi-features-alldata/*train.csv') #if A \n", "train_files = glob.glob('../input/data_abstract/*_train_waypoint_all.csv')#if B\n", "\n", " \n", "train_wifi_files = glob.glob(f'../input/wifi_lbl_encode/train/*/*/*.txt')\n", "train_sensor_files = glob.glob('../input/data_abstract/*_train_sensor.csv')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../input/data_abstract/5a0546857ecc773753327266_train_waypoint_all.csv',\n", " '../input/data_abstract/5c3c44b80379370013e0fd2b_train_waypoint_all.csv']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_files[:2]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "len train site list: 24\n" ] } ], "source": [ "# train_site_list = [xx.split('/')[-1].replace('_train.csv','') for xx in train_files] #if A \n", "# train_site_list = [xx.split('/')[-1].replace('_train_waypoint_all.csv','') for xx in train_files] #if B 204\n", "train_site_list = list(sample_submission.site.unique()) # if B 24\n", "train_wifi_files = [xx for xx in train_wifi_files if xx.split('/')[-3] in train_site_list]\n", "print('len train site list:',len(train_site_list))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10877" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_wifi_files)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 11503/11503 [01:01<00:00, 187.96it/s]\n" ] } ], "source": [ "ssidlist = set()\n", "bssidlist = set()\n", "for filename in tqdm(train_wifi_files+test_wifi_files):\n", " tmp = pd.read_csv(filename)\n", " ssidlist = ssidlist|set(tmp.ssid)\n", " bssidlist = bssidlist|set(tmp.bssid)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20044, 65952)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(set(ssidlist)),len(set(bssidlist))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "seqlen = 100" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "ssiddict = dict(zip(list(ssidlist)+['empty'],range(len(ssidlist)+1)))\n", "bssiddict = dict(zip(list(bssidlist)+['empty'],range(len(bssidlist)+1)))\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10877/10877 [00:43<00:00, 252.76it/s]\n" ] } ], "source": [ "train_wifi_pd_csv = []\n", "for filename in tqdm(train_wifi_files):\n", " tmp = pd.read_csv(filename)\n", " tmp['path'] = filename.split('/')[-1].replace('.txt','')\n", " tmp['floor'] = filename.split('/')[-2]\n", " tmp['site'] = filename.split('/')[-3]\n", " train_wifi_pd_csv.append(tmp)\n", "train_wifi_pd_csv = pd.concat(train_wifi_pd_csv).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/24 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ts_sensorx_accey_accez_accex_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatepathsitefloorfloor_orimagne
01.578463e+120.0236974.4509439.055649-0.0375370.0752560.030579-13.3911139.959412-30.3054810.227164-0.058094-0.2687735e15730aa280850006f3d0055a0546857ecc773753327266-1B10.008008
11.578463e+120.0506294.5521099.074799-0.043411-0.0057220.009796-12.0025639.959412-28.9550780.225032-0.059640-0.2672385e15730aa280850006f3d0055a0546857ecc773753327266-1B10.002013
\n", "" ], "text/plain": [ " ts_sensor x_acce y_acce z_acce x_magne y_magne z_magne \\\n", "0 1.578463e+12 0.023697 4.450943 9.055649 -0.037537 0.075256 0.030579 \n", "1 1.578463e+12 0.050629 4.552109 9.074799 -0.043411 -0.005722 0.009796 \n", "\n", " x_gyros y_gyros z_gyros x_rotate y_rotate z_rotate \\\n", "0 -13.391113 9.959412 -30.305481 0.227164 -0.058094 -0.268773 \n", "1 -12.002563 9.959412 -28.955078 0.225032 -0.059640 -0.267238 \n", "\n", " path site floor floor_ori \\\n", "0 5e15730aa280850006f3d005 5a0546857ecc773753327266 -1 B1 \n", "1 5e15730aa280850006f3d005 5a0546857ecc773753327266 -1 B1 \n", "\n", " magne \n", "0 0.008008 \n", "1 0.002013 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_sensor_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "floor_map = {\"B3\":-3,\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n", " \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}\n", "train_wifi_pd_csv = train_wifi_pd_csv[train_wifi_pd_csv.floor.isin(floor_map)].reset_index(drop=True)\n", "train_wifi_pd_csv['floorNo'] = train_wifi_pd_csv['floor'].apply(lambda x: floor_map[x])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamppathfloorsitefloorNo
0157846261882663159162932-4615784626032775e15730aa280850006f3d005B15a0546857ecc773753327266-1
115784626188263283565513-4915784626182725e15730aa280850006f3d005B15a0546857ecc773753327266-1
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp \\\n", "0 1578462618826 63159 162932 -46 1578462603277 \n", "1 1578462618826 32835 65513 -49 1578462618272 \n", "\n", " path floor site floorNo \n", "0 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1 \n", "1 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_wifi_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:02<00:00, 208.79it/s]\n" ] } ], "source": [ "test_wifi_pd_csv = []\n", "for filename in tqdm(test_wifi_files):\n", " tmp = pd.read_csv(filename)\n", " tmp['path'] = filename.split('/')[-1].replace('.txt','')\n", " test_wifi_pd_csv.append(tmp)\n", "test_wifi_pd_csv = pd.concat(test_wifi_pd_csv).reset_index(drop=True)\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 24/24 [00:07<00:00, 3.04it/s]\n" ] } ], "source": [ "test_sensor_pd_csv = []\n", "for filename in tqdm(test_sensor_files):\n", " tmp = pd.read_csv(filename)\n", " test_sensor_pd_csv.append(tmp)\n", "test_sensor_pd_csv = pd.concat(test_sensor_pd_csv).reset_index(drop=True)\n", "\n", "test_sensor_pd_csv['magne'] = test_sensor_pd_csv['x_magne']**2+\\\n", " test_sensor_pd_csv['y_magne']**2+test_sensor_pd_csv['z_magne']**2" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "standcols_sensor = ['x_acce', 'y_acce', 'z_acce', 'x_magne', 'y_magne',\n", " 'z_magne', 'x_gyros', 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate',\n", " 'z_rotate','magne']\n", "\n", "ss_sensor = StandardScaler() \n", "ss_sensor.fit(train_sensor_pd_csv.loc[:,standcols_sensor])\n", "train_sensor_pd_csv.loc[:,standcols_sensor] = ss_sensor.transform(train_sensor_pd_csv.loc[:,standcols_sensor])\n", "test_sensor_pd_csv.loc[:,standcols_sensor] = ss_sensor.transform(test_sensor_pd_csv.loc[:,standcols_sensor]) " ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamppath
019617053728318-34157182856015614f45baa63b4d3a700126af6
119614383893116-35157182856015914f45baa63b4d3a700126af6
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp path\n", "0 1961 70537 28318 -34 1571828560156 14f45baa63b4d3a700126af6\n", "1 1961 43838 93116 -35 1571828560159 14f45baa63b4d3a700126af6" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "submission = pd.read_csv('submission_floor.csv')\n", "submission['path'] = [xx.split('_')[1] for xx in submission['site_path_timestamp']]\n", "test_path_floor_dict = dict(zip(submission.path,submission.floor))\n", "test_wifi_pd_csv['floorNo'] = [test_path_floor_dict[xx] for xx in test_wifi_pd_csv['path']]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "standcols = ['rssi','floorNo']\n", "ss = StandardScaler()\n", "ss.fit(train_wifi_pd_csv.loc[:,standcols])\n", "train_wifi_pd_csv.loc[:,standcols] = ss.transform(train_wifi_pd_csv.loc[:,standcols])\n", "test_wifi_pd_csv.loc[:,standcols] = ss.transform(test_wifi_pd_csv.loc[:,standcols])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamppathfloorsitefloorNo
01578462618826631591629323.10592615784626032775e15730aa280850006f3d005B15a0546857ecc773753327266-1.340327
1157846261882632835655132.81072715784626182725e15730aa280850006f3d005B15a0546857ecc773753327266-1.340327
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp \\\n", "0 1578462618826 63159 162932 3.105926 1578462603277 \n", "1 1578462618826 32835 65513 2.810727 1578462618272 \n", "\n", " path floor site floorNo \n", "0 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1.340327 \n", "1 5e15730aa280850006f3d005 B1 5a0546857ecc773753327266 -1.340327 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_wifi_pd_csv.head(2)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10877/10877 [02:51<00:00, 63.29it/s] \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_meanwifi_medianwifi_std
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.3536030.3507371.088208
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.2997480.3507371.040317
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean wifi_median wifi_std \n", "0 0.206 0.353603 0.350737 1.088208 \n", "1 0.220 0.299748 0.350737 1.040317 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_wifi_pd = []\n", "for path,tmp in tqdm(train_wifi_pd_csv.groupby('path')):\n", " tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])\n", " tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])\n", " ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) \n", " ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))\n", " ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))\n", " \n", " ss = pd.concat([ss1,ss2,ss3],axis=1)\n", " ss['path'] = tmp.path.unique()[0]\n", " ss['floorNo'] = tmp.floorNo.unique()[0]\n", " ss['floor'] = tmp.floor.unique()[0]\n", " ss['site'] = tmp.site.unique()[0]\n", " ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500\n", " ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()\n", " ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()\n", " ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()\n", "\n", " train_wifi_pd.append(ss)\n", "train_wifi_pd = pd.concat(train_wifi_pd)\n", "train_wifi_pd = train_wifi_pd.reset_index()\n", "train_wifi_pd.head(2)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:15<00:00, 41.61it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_stdsite
01180[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.0330935da1389e4db8ce0c98bd0547
13048[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.9915295da1389e4db8ce0c98bd0547
24924[9522, 18669, 7007, 19396, 15215, 4851, 15215,...[10783, 4531, 35106, 19211, 48757, 11767, 3933...[1.4331324770012857, 1.2363332562632146, 1.039...00ff0c9a71cc37a2ebdd0f050.8459570.048-0.149461-0.4364600.8155215da1389e4db8ce0c98bd0547
36816[18669, 4851, 15215, 7007, 9522, 19396, 19396,...[4531, 11767, 39335, 35106, 10783, 19211, 5710...[1.826730918477428, 1.1379336458941791, 1.0395...00ff0c9a71cc37a2ebdd0f050.8459570.052-0.118554-0.5348600.9118025da1389e4db8ce0c98bd0547
48693[18669, 15215, 7007, 4851, 9522, 19396, 15215,...[4531, 48757, 35106, 11767, 10783, 19211, 3933...[2.1219297495845346, 1.3347328666322502, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.062-0.182526-0.5348600.9053395da1389e4db8ce0c98bd0547
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1180 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 3048 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "2 4924 [9522, 18669, 7007, 19396, 15215, 4851, 15215,... \n", "3 6816 [18669, 4851, 15215, 7007, 9522, 19396, 19396,... \n", "4 8693 [18669, 15215, 7007, 4851, 9522, 19396, 15215,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "2 [10783, 4531, 35106, 19211, 48757, 11767, 3933... \n", "3 [4531, 11767, 39335, 35106, 10783, 19211, 5710... \n", "4 [4531, 48757, 35106, 11767, 10783, 19211, 3933... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "2 [1.4331324770012857, 1.2363332562632146, 1.039... \n", "3 [1.826730918477428, 1.1379336458941791, 1.0395... \n", "4 [2.1219297495845346, 1.3347328666322502, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "2 00ff0c9a71cc37a2ebdd0f05 0.845957 0.048 -0.149461 -0.436460 \n", "3 00ff0c9a71cc37a2ebdd0f05 0.845957 0.052 -0.118554 -0.534860 \n", "4 00ff0c9a71cc37a2ebdd0f05 0.845957 0.062 -0.182526 -0.534860 \n", "\n", " wifi_std site \n", "0 1.033093 5da1389e4db8ce0c98bd0547 \n", "1 0.991529 5da1389e4db8ce0c98bd0547 \n", "2 0.815521 5da1389e4db8ce0c98bd0547 \n", "3 0.911802 5da1389e4db8ce0c98bd0547 \n", "4 0.905339 5da1389e4db8ce0c98bd0547 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd = []\n", "# for filename in tqdm(test_wifi_files):\n", "for path,tmp in tqdm(test_wifi_pd_csv.groupby('path')):\n", " #tmp = pd.read_csv(filename)\n", " #tmp['rssi'] = tmp['rssi']/999\n", " tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])\n", " tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])\n", " ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) \n", " ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))\n", " ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \\\n", " list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))\n", " ss = pd.concat([ss1,ss2,ss3],axis=1)\n", " #ss['path'] = filename.split('/')[-1].replace('.txt','')\n", " ss['path'] = tmp.path.unique()[0]\n", " ss['floorNo'] = tmp.floorNo.unique()[0]\n", " ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500\n", " ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()\n", " ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()\n", " ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()\n", "\n", " test_wifi_pd.append(ss)\n", "test_wifi_pd = pd.concat(test_wifi_pd)\n", "test_wifi_pd = test_wifi_pd.reset_index()\n", "test_wifi_pd['site'] = [path2site[xx] for xx in test_wifi_pd.path]\n", "test_wifi_pd.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(37678, 11)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd.shape" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 204/204 [00:00<00:00, 266.90it/s]\n" ] } ], "source": [ "# filename = train_files[0]\n", "train_xy = []\n", "for filename in tqdm(train_files):\n", " tmp = pd.read_csv(filename,index_col=0)\n", " ss = tmp[['path','site','floor','ts_waypoint','x','y']]\n", " train_xy.append(ss)\n", "train_xy = pd.concat(train_xy).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(166681, 6)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_xy=train_xy.drop_duplicates()\n", "train_xy.shape" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ts_sensorx_accey_accez_accex_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatepathsitefloorfloor_orimagne
01.578463e+120.5446942.170451-0.215846-0.0614730.2691890.06898-0.6244340.608398-0.3271023.204804-1.107252-0.3625155e15730aa280850006f3d0055a0546857ecc773753327266-1B1-0.296067
11.578463e+120.5704602.231734-0.209542-0.070594-0.0246650.02118-0.5614600.608398-0.1709873.168680-1.130263-0.3603215e15730aa280850006f3d0055a0546857ecc773753327266-1B1-0.298708
\n", "
" ], "text/plain": [ " ts_sensor x_acce y_acce z_acce x_magne y_magne z_magne \\\n", "0 1.578463e+12 0.544694 2.170451 -0.215846 -0.061473 0.269189 0.06898 \n", "1 1.578463e+12 0.570460 2.231734 -0.209542 -0.070594 -0.024665 0.02118 \n", "\n", " x_gyros y_gyros z_gyros x_rotate y_rotate z_rotate \\\n", "0 -0.624434 0.608398 -0.327102 3.204804 -1.107252 -0.362515 \n", "1 -0.561460 0.608398 -0.170987 3.168680 -1.130263 -0.360321 \n", "\n", " path site floor floor_ori \\\n", "0 5e15730aa280850006f3d005 5a0546857ecc773753327266 -1 B1 \n", "1 5e15730aa280850006f3d005 5a0546857ecc773753327266 -1 B1 \n", "\n", " magne \n", "0 -0.296067 \n", "1 -0.298708 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_sensor_pd_csv.head(2)\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['ts_sensor', 'x_acce', 'y_acce', 'z_acce', 'x_magne', 'y_magne',\n", " 'z_magne', 'x_gyros', 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate',\n", " 'z_rotate', 'path', 'site', 'floor', 'floor_ori', 'magne'],\n", " dtype='object')" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_sensor_pd_csv.columns" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "train_sensor_pd_csv_group = dict(list(train_sensor_pd_csv.groupby('path',as_index=False)))\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "train_xy_group = dict(list(train_xy.groupby('path',as_index=False)))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ts_sensorx_accey_accez_accex_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatepathsitefloorfloor_orimagne
17347831.560501e+12-0.994056-0.2483050.177490-0.3703490.0495320.3325080.238923-0.3983482.083426-1.5888270.4983321.4166405d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.265570
17347841.560501e+12-0.377300-0.0369210.058472-1.068294-3.4624300.1745100.238923-0.3983482.083426-1.5014650.3815651.4182805d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F30.310682
17347851.560501e+120.072799-0.1747010.075813-0.658117-2.5191850.3239450.145778-0.4312562.314335-1.3063270.3019931.4187075d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.000635
17347861.560501e+12-1.0885370.0341500.154237-0.209088-1.0444880.3165750.269925-0.4640912.468334-1.3094130.3131581.4193935d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.247187
17347871.560501e+12-0.625255-0.065917-0.0635040.042315-0.2964810.6976020.176849-0.4312562.160513-1.3458930.3498691.4207495d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.255654
.........................................................
17356791.560501e+120.5624170.747711-0.869028-0.123092-0.3640890.1940930.642367-2.3707234.084173-0.7059091.2521991.4233965d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.289443
17356801.560501e+120.6002120.790128-0.761443-0.447272-0.1727820.2761460.642367-2.4364664.238171-0.7024711.2111021.4241295d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.256173
17356811.560501e+120.6798030.779619-0.564002-0.684603-0.2577770.3092400.611365-2.4364664.161084-0.6994891.1488881.4250785d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.204516
17356821.560501e+120.6683580.855755-0.537991-0.694506-0.3563930.2675820.580293-2.3049794.084173-0.6922031.0847121.4259735d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.202040
17356831.560501e+120.8143710.855755-0.516908-0.625874-0.2153070.2541060.611365-2.3707233.930350-0.6898971.0280671.4267645d073b814a19c000086c558b5c3c44b80379370013e0fd2b2F3-0.221778
\n", "

901 rows × 18 columns

\n", "
" ], "text/plain": [ " ts_sensor x_acce y_acce z_acce x_magne y_magne \\\n", "1734783 1.560501e+12 -0.994056 -0.248305 0.177490 -0.370349 0.049532 \n", "1734784 1.560501e+12 -0.377300 -0.036921 0.058472 -1.068294 -3.462430 \n", "1734785 1.560501e+12 0.072799 -0.174701 0.075813 -0.658117 -2.519185 \n", "1734786 1.560501e+12 -1.088537 0.034150 0.154237 -0.209088 -1.044488 \n", "1734787 1.560501e+12 -0.625255 -0.065917 -0.063504 0.042315 -0.296481 \n", "... ... ... ... ... ... ... \n", "1735679 1.560501e+12 0.562417 0.747711 -0.869028 -0.123092 -0.364089 \n", "1735680 1.560501e+12 0.600212 0.790128 -0.761443 -0.447272 -0.172782 \n", "1735681 1.560501e+12 0.679803 0.779619 -0.564002 -0.684603 -0.257777 \n", "1735682 1.560501e+12 0.668358 0.855755 -0.537991 -0.694506 -0.356393 \n", "1735683 1.560501e+12 0.814371 0.855755 -0.516908 -0.625874 -0.215307 \n", "\n", " z_magne x_gyros y_gyros z_gyros x_rotate y_rotate z_rotate \\\n", "1734783 0.332508 0.238923 -0.398348 2.083426 -1.588827 0.498332 1.416640 \n", "1734784 0.174510 0.238923 -0.398348 2.083426 -1.501465 0.381565 1.418280 \n", "1734785 0.323945 0.145778 -0.431256 2.314335 -1.306327 0.301993 1.418707 \n", "1734786 0.316575 0.269925 -0.464091 2.468334 -1.309413 0.313158 1.419393 \n", "1734787 0.697602 0.176849 -0.431256 2.160513 -1.345893 0.349869 1.420749 \n", "... ... ... ... ... ... ... ... \n", "1735679 0.194093 0.642367 -2.370723 4.084173 -0.705909 1.252199 1.423396 \n", "1735680 0.276146 0.642367 -2.436466 4.238171 -0.702471 1.211102 1.424129 \n", "1735681 0.309240 0.611365 -2.436466 4.161084 -0.699489 1.148888 1.425078 \n", "1735682 0.267582 0.580293 -2.304979 4.084173 -0.692203 1.084712 1.425973 \n", "1735683 0.254106 0.611365 -2.370723 3.930350 -0.689897 1.028067 1.426764 \n", "\n", " path site floor floor_ori \\\n", "1734783 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1734784 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1734785 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1734786 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1734787 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "... ... ... ... ... \n", "1735679 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1735680 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1735681 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1735682 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "1735683 5d073b814a19c000086c558b 5c3c44b80379370013e0fd2b 2 F3 \n", "\n", " magne \n", "1734783 -0.265570 \n", "1734784 0.310682 \n", "1734785 -0.000635 \n", "1734786 -0.247187 \n", "1734787 -0.255654 \n", "... ... \n", "1735679 -0.289443 \n", "1735680 -0.256173 \n", "1735681 -0.204516 \n", "1735682 -0.202040 \n", "1735683 -0.221778 \n", "\n", "[901 rows x 18 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_sensor_pd_csv_group['5d073b814a19c000086c558b']" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10877/10877 [04:10<00:00, 43.34it/s]\n" ] } ], "source": [ "import scipy.stats as stats\n", "import scipy\n", "train_all = []\n", "\n", "for path,train_wifi_pd_x in tqdm(train_wifi_pd.groupby('path')):\n", " # path = '5e15730aa280850006f3d005'\n", " train_y = train_xy_group[path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)\n", " train_sensor = train_sensor_pd_csv_group[path][['ts_sensor', 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate', 'path']].reset_index(drop=True)\n", "\n", " train_wifi_pd_x['ts_waypoint'] = 0\n", " if len(train_y)==0:\n", " print(path,'have no waypoint')\n", " if len(train_y)>0:\n", " ts_point_min = train_y.ts_waypoint.min()\n", " ts_point_max = train_y.ts_waypoint.max()\n", " tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n", " tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]\n", " \n", " ts_sensor_min = train_sensor.ts_sensor.min()\n", " ts_sensor_max = train_sensor.ts_sensor.max()\n", " tmp3 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n", " tmp3 = tmp3[(tmp3.timestamp<=ts_sensor_max)&(tmp3.timestamp>=ts_sensor_min)]\n", " \n", " if len(tmp2)>0:\n", " T_rel = train_y['ts_waypoint']\n", " T_rel2 = train_sensor['ts_sensor']\n", " T_ref = tmp2['timestamp']\n", " T_ref2 = tmp3['timestamp']\n", " xy_hat = scipy.interpolate.interp1d(T_rel, train_y[['x','y']], axis=0)(T_ref)\n", " sensor_hat = scipy.interpolate.interp1d(T_rel2, train_sensor[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']], axis=0)(T_ref2)\n", " tmp2[['x','y']] = xy_hat\n", " tmp3[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = sensor_hat\n", " tmp2['path'] = path\n", " tmp3['path'] = path\n", " train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp2,how='left',on=['path','timestamp'])\n", " train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp3,how='left',on=['path','timestamp'])\n", " train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='ffill')\n", " train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='bfill')\n", " train_all.append(train_wifi_pd_x)\n", " \n", "train_all = pd.concat(train_all).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(258097, 27)" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.shape" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((11756, 27), (11756, 27))" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all[train_all.x.isna()].shape,train_all[train_all.y.isna()].shape" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "train_all = train_all[~train_all.x.isna()].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_mean...z_accex_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotate
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.353603...0.4618060.6341990.116776-0.1101900.983807-0.5955780.852319-0.6305920.8507561.353243
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.299748...-0.2464821.202698-0.395429-0.1675471.256958-0.4969991.898839-0.9770610.8192551.276234
21560501001590[18304, 19396, 7702, 7702, 19396, 7702, 12721,...[10121, 57287, 31140, 61027, 55262, 22353, 603...[3.1059258532748903, 3.1059258532748903, 2.810...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2380.268875...-0.704095-0.4600870.171733-0.2424351.697688-0.0367961.083052-0.4923611.0595351.177969
31560501003516[19396, 7702, 19396, 18304, 7702, 7702, 7702, ...[57287, 31140, 55262, 10121, 22353, 53865, 432...[3.1059258532748903, 2.8107270221677836, 2.613...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2580.230216...0.6980480.366486-0.610369-0.0435911.1079540.1933430.313588-0.5720310.8608521.132166
41560501005442[7702, 18304, 19396, 19396, 7702, 7702, 7702, ...[31140, 10121, 55262, 57287, 43265, 61027, 612...[2.8107270221677836, 2.6139278014297127, 2.613...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2820.210465...0.136020-0.585768-0.7076500.8534781.480396-0.2011180.544498-0.4707530.6578641.078007
\n", "

5 rows × 27 columns

\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "2 1560501001590 [18304, 19396, 7702, 7702, 19396, 7702, 12721,... \n", "3 1560501003516 [19396, 7702, 19396, 18304, 7702, 7702, 7702, ... \n", "4 1560501005442 [7702, 18304, 19396, 19396, 7702, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "2 [10121, 57287, 31140, 61027, 55262, 22353, 603... \n", "3 [57287, 31140, 55262, 10121, 22353, 53865, 432... \n", "4 [31140, 10121, 55262, 57287, 43265, 61027, 612... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "2 [3.1059258532748903, 3.1059258532748903, 2.810... \n", "3 [3.1059258532748903, 2.8107270221677836, 2.613... \n", "4 [2.8107270221677836, 2.6139278014297127, 2.613... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "2 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "3 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "4 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean ... z_acce x_magne y_magne z_magne x_gyros \\\n", "0 0.206 0.353603 ... 0.461806 0.634199 0.116776 -0.110190 0.983807 \n", "1 0.220 0.299748 ... -0.246482 1.202698 -0.395429 -0.167547 1.256958 \n", "2 0.238 0.268875 ... -0.704095 -0.460087 0.171733 -0.242435 1.697688 \n", "3 0.258 0.230216 ... 0.698048 0.366486 -0.610369 -0.043591 1.107954 \n", "4 0.282 0.210465 ... 0.136020 -0.585768 -0.707650 0.853478 1.480396 \n", "\n", " y_gyros z_gyros x_rotate y_rotate z_rotate \n", "0 -0.595578 0.852319 -0.630592 0.850756 1.353243 \n", "1 -0.496999 1.898839 -0.977061 0.819255 1.276234 \n", "2 -0.036796 1.083052 -0.492361 1.059535 1.177969 \n", "3 0.193343 0.313588 -0.572031 0.860852 1.132166 \n", "4 -0.201118 0.544498 -0.470753 0.657864 1.078007 \n", "\n", "[5 rows x 27 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.head()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(246341, 27)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.shape" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.preprocessing import StandardScaler, LabelEncoder\n", "N_SPLITS = 10\n", "SEED = 0#42\n", "for fold, (trn_idx, val_idx) in enumerate(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED).split(train_all['site'], train_all['site'])):\n", " train_all.loc[val_idx, 'fold'] = fold\n", " " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "# train_all[train_all.path=='5dd3824044333f00067aa2c4'].fold.value_counts()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# train_all[train_all.site=='5c3c44b80379370013e0fd2b'].fold.value_counts()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_mean...x_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatefold
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.353603...0.6341990.116776-0.1101900.983807-0.5955780.852319-0.6305920.8507561.3532436.0
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.299748...1.202698-0.395429-0.1675471.256958-0.4969991.898839-0.9770610.8192551.2762348.0
\n", "

2 rows × 28 columns

\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean ... x_magne y_magne z_magne x_gyros y_gyros \\\n", "0 0.206 0.353603 ... 0.634199 0.116776 -0.110190 0.983807 -0.595578 \n", "1 0.220 0.299748 ... 1.202698 -0.395429 -0.167547 1.256958 -0.496999 \n", "\n", " z_gyros x_rotate y_rotate z_rotate fold \n", "0 0.852319 -0.630592 0.850756 1.353243 6.0 \n", "1 1.898839 -0.977061 0.819255 1.276234 8.0 \n", "\n", "[2 rows x 28 columns]" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.head(2)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_mean...x_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatefold
01560500997770[7702, 19396, 18304, 19396, 7702, 7702, 19396,...[61027, 55262, 10121, 57287, 45809, 53865, 261...[3.204325463643926, 3.1059258532748903, 2.9091...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2060.353603...0.6341990.116776-0.1101900.983807-0.5955780.852319-0.6305920.8507561.3532436.0
11560500999681[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...[10121, 31140, 61027, 55262, 57287, 53865, 458...[2.712327411798748, 2.712327411798748, 2.61392...5d073b814a19c000086c558b0.299386F35c3c44b80379370013e0fd2b0.2200.299748...1.202698-0.395429-0.1675471.256958-0.4969991.898839-0.9770610.8192551.2762348.0
\n", "

2 rows × 28 columns

\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1560500997770 [7702, 19396, 18304, 19396, 7702, 7702, 19396,... \n", "1 1560500999681 [18304, 7702, 7702, 19396, 19396, 7702, 7702, ... \n", "\n", " bssid \\\n", "0 [61027, 55262, 10121, 57287, 45809, 53865, 261... \n", "1 [10121, 31140, 61027, 55262, 57287, 53865, 458... \n", "\n", " rssi \\\n", "0 [3.204325463643926, 3.1059258532748903, 2.9091... \n", "1 [2.712327411798748, 2.712327411798748, 2.61392... \n", "\n", " path floorNo floor site \\\n", "0 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "1 5d073b814a19c000086c558b 0.299386 F3 5c3c44b80379370013e0fd2b \n", "\n", " wifi_len wifi_mean ... x_magne y_magne z_magne x_gyros y_gyros \\\n", "0 0.206 0.353603 ... 0.634199 0.116776 -0.110190 0.983807 -0.595578 \n", "1 0.220 0.299748 ... 1.202698 -0.395429 -0.167547 1.256958 -0.496999 \n", "\n", " z_gyros x_rotate y_rotate z_rotate fold \n", "0 0.852319 -0.630592 0.850756 1.353243 6.0 \n", "1 1.898839 -0.977061 0.819255 1.276234 8.0 \n", "\n", "[2 rows x 28 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all.head(2)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_stdsite
01180[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.0330935da1389e4db8ce0c98bd0547
13048[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.9915295da1389e4db8ce0c98bd0547
\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1180 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 3048 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "\n", " wifi_std site \n", "0 1.033093 5da1389e4db8ce0c98bd0547 \n", "1 0.991529 5da1389e4db8ce0c98bd0547 " ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_wifi_pd.head(2)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "test_sensor_pd_csv_group = dict(list(test_sensor_pd_csv.groupby('path',as_index=False)))\n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:09<00:00, 64.84it/s]\n" ] } ], "source": [ "import scipy.stats as stats\n", "import scipy\n", "test_all = []\n", "\n", "for path,train_wifi_pd_x in tqdm(test_wifi_pd.groupby('path')):\n", " # path = '5e15730aa280850006f3d005'\n", " train_sensor = test_sensor_pd_csv_group[path][['ts_sensor', 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate', 'path']].reset_index(drop=True)\n", "\n", " train_wifi_pd_x['ts_waypoint'] = 0\n", "\n", " ts_point_min = train_sensor.ts_sensor.min()\n", " ts_point_max = train_sensor.ts_sensor.max()\n", " tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()\n", " tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]\n", " if len(tmp2)>0:\n", " T_rel2 = train_sensor['ts_sensor']\n", " T_ref = tmp2['timestamp']\n", " sensor_hat = scipy.interpolate.interp1d(T_rel2, train_sensor[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']], axis=0)(T_ref)\n", " tmp2[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = sensor_hat\n", " tmp2['path'] = path\n", " train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp2,how='left',on=['path','timestamp'])\n", " train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='ffill')\n", " train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', \n", " 'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', \n", " 'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='bfill')\n", " \n", " test_all.append(train_wifi_pd_x)\n", " \n", "test_all = pd.concat(test_all).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from tqdm import tqdm\n", "from sklearn.preprocessing import LabelEncoder\n", "from dask.distributed import wait\n", "\n", "SENSORS = ['acce','acce_uncali','gyro',\n", " 'gyro_uncali','magn','magn_uncali','ahrs']\n", "\n", "NFEAS = {\n", " 'acce': 3,\n", " 'acce_uncali': 3,\n", " 'gyro': 3,\n", " 'gyro_uncali': 3,\n", " 'magn': 3,\n", " 'magn_uncali': 3,\n", " 'ahrs': 3,\n", " 'wifi': 1,\n", " 'ibeacon': 1,\n", " 'waypoint': 3\n", "}\n", "\n", "ACOLS = ['timestamp','x','y','z']\n", " \n", "FIELDS = {\n", " 'acce': ACOLS,\n", " 'acce_uncali': ACOLS,\n", " 'gyro': ACOLS,\n", " 'gyro_uncali': ACOLS,\n", " 'magn': ACOLS,\n", " 'magn_uncali': ACOLS,\n", " 'ahrs': ACOLS,\n", " 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n", " 'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n", " 'waypoint': ['timestamp','x','y']\n", "}\n", "\n", "def to_frame(data, col):\n", " cols = FIELDS[col]\n", " is_dummy = False\n", " if data.shape[0]>0:\n", " df = pd.DataFrame(data, columns=cols)\n", " else:\n", " df = create_dummy_df(cols)\n", " is_dummy = True\n", " for col in df.columns:\n", " if 'timestamp' in col:\n", " df[col] = df[col].astype('int64')\n", " return df, is_dummy\n", "\n", "def create_dummy_df(cols):\n", " df = pd.DataFrame()\n", " for col in cols:\n", " df[col] = [0]\n", " if col in ['ssid','bssid']:\n", " df[col] = df[col].map(str)\n", " return df\n", "\n", "from dataclasses import dataclass\n", "\n", "import numpy as np\n", "\n", "\n", "@dataclass\n", "class ReadData:\n", " acce: np.ndarray\n", " acce_uncali: np.ndarray\n", " gyro: np.ndarray\n", " gyro_uncali: np.ndarray\n", " magn: np.ndarray\n", " magn_uncali: np.ndarray\n", " ahrs: np.ndarray\n", " wifi: np.ndarray\n", " ibeacon: np.ndarray\n", " waypoint: np.ndarray\n", "\n", "\n", "def read_data_file(data_filename):\n", " acce = []\n", " acce_uncali = []\n", " gyro = []\n", " gyro_uncali = []\n", " magn = []\n", " magn_uncali = []\n", " ahrs = []\n", " wifi = []\n", " ibeacon = []\n", " waypoint = []\n", "\n", " with open(data_filename, 'r', encoding='utf-8') as file:\n", " lines = file.readlines()\n", "\n", " for line_data in lines:\n", " line_data = line_data.strip()\n", " if not line_data or line_data[0] == '#':\n", " continue\n", "\n", " line_data = line_data.split('\\t')\n", "\n", " if line_data[1] == 'TYPE_ACCELEROMETER':\n", " acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n", " acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_GYROSCOPE':\n", " gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n", " gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n", " magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n", " magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_ROTATION_VECTOR':\n", " if len(line_data)>=5:\n", " ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_WIFI':\n", " sys_ts = line_data[0]\n", " ssid = line_data[2]\n", " bssid = line_data[3]\n", " rssi = line_data[4]\n", " lastseen_ts = line_data[6]\n", " wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n", " wifi.append(wifi_data)\n", " continue\n", "\n", " if line_data[1] == 'TYPE_BEACON':\n", " ts = line_data[0]\n", " uuid = line_data[2]\n", " major = line_data[3]\n", " minor = line_data[4]\n", " rssi = line_data[6]\n", " lastts = line_data[-1]\n", " ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n", " ibeacon.append(ibeacon_data)\n", " continue\n", "\n", " if line_data[1] == 'TYPE_WAYPOINT':\n", " waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n", "\n", " acce = np.array(acce)\n", " acce_uncali = np.array(acce_uncali)\n", " gyro = np.array(gyro)\n", " gyro_uncali = np.array(gyro_uncali)\n", " magn = np.array(magn)\n", " magn_uncali = np.array(magn_uncali)\n", " ahrs = np.array(ahrs)\n", " wifi = np.array(wifi)\n", " ibeacon = np.array(ibeacon)\n", " waypoint = np.array(waypoint)\n", "\n", " return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "def get_test_dfs(PATH, test_files):\n", " dtest = get_test_df(PATH)\n", " buildings = set(dtest['building'].values.tolist())\n", " dws = {}\n", " ntest_files = []\n", " for fname in tqdm(test_files):\n", " path = fname.split('/')[-1].split('.')[0]\n", " mask = dtest['path'] == path\n", " dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n", " ntest_files.append(fname)\n", " return dws\n", "\n", "def get_test_df(PATH):\n", " dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n", " dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n", " dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n", " dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n", " dtest['timestamp'] = dtest['timestamp'].astype('int64')\n", " dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n", " return dtest\n", "\n", "def get_time_gap(name):\n", " data = read_data_file(name)\n", " db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n", "# print(db,no_ibeacon)\n", " \n", " if no_ibeacon==0:\n", " gap = db['last_timestamp'] - db['timestamp']\n", " assert gap.unique().shape[0]==1\n", " return gap.values[0],no_ibeacon\n", " \n", " if no_ibeacon==1:\n", " # Group wifis by timestamp\n", " wifi_groups = pd.DataFrame(data.wifi).groupby(0) \n", " # Find which one is the most recent of all time points.\n", " est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n", " return est_ts,no_ibeacon\n", "\n", " \n", "\n", "def fix_timestamp_test(df, gap):\n", " df['real_timestamp'] = df['timestamp'] + gap\n", " return df" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n", " '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n", " '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n", " '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n", "test_files_ori[:4]" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ec2-user/anaconda3/lib/python3.7/site-packages/distributed/dashboard/core.py:79: UserWarning: \n", "Port 8787 is already in use. \n", "Perhaps you already have a cluster running?\n", "Hosting the diagnostics dashboard on a random port instead.\n", " warnings.warn(\"\\n\" + msg)\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "

Client

\n", "\n", "
\n", "

Cluster

\n", "
    \n", "
  • Workers: 8
  • \n", "
  • Cores: 8
  • \n", "
  • Memory: 66.71 GB
  • \n", "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dask\n", "from dask.distributed import Client, wait, LocalCluster\n", "\n", "# set n_workers to number of cores\n", "client = Client(n_workers=8, \n", " threads_per_worker=1)\n", "client" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 626/626 [00:00<00:00, 9976.27it/s]\n", "100%|██████████| 626/626 [00:18<00:00, 33.98it/s] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.8 s, sys: 194 ms, total: 3 s\n", "Wall time: 18.5 s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "%%time\n", "futures = []\n", "for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n", " f = client.submit(get_time_gap,fname)\n", " futures.append(f)\n", " \n", "testpath2gap = {}\n", "for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n", " testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n", " " ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "test_all['timestamp'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['timestamp'],test_all['path'])]\n", "# test_all['ts_waypoint'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['ts_waypoint'],test_all['path'])]" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# test_all['timestamp'] = (test_all['timestamp']-train_all_timestamp_min)/(train_all_timestamp_max-train_all_timestamp_min)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_std...z_accex_magney_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotate
01573190312033[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.033093...0.109450-0.465064-0.3721430.3039761.5887860.5581750.870030.0736501.1989000.804293
11573190313901[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.991529...-0.080417-0.1533591.2926010.5009781.5887930.2487230.800810.2037681.5160820.877682
\n", "

2 rows × 24 columns

\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 1573190312033 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 1573190313901 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "\n", " wifi_std ... z_acce x_magne y_magne z_magne x_gyros y_gyros \\\n", "0 1.033093 ... 0.109450 -0.465064 -0.372143 0.303976 1.588786 0.558175 \n", "1 0.991529 ... -0.080417 -0.153359 1.292601 0.500978 1.588793 0.248723 \n", "\n", " z_gyros x_rotate y_rotate z_rotate \n", "0 0.87003 0.073650 1.198900 0.804293 \n", "1 0.80081 0.203768 1.516082 0.877682 \n", "\n", "[2 rows x 24 columns]" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_all.head(2)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "\n", "ss2 = StandardScaler()\n", "ss2.fit(train_all.loc[:,['timestamp']])\n", "train_all.loc[:,['timestamp']] = ss2.transform(train_all.loc[:,['timestamp']])\n", "test_all.loc[:,['timestamp']] = ss2.transform(test_all.loc[:,['timestamp']])" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "# train_all_floor_min = train_all.floor.min()\n", "# train_all_floor_max = train_all.floor.max()\n", "# train_all['floor'] = (train_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)\n", "# test_all['floor'] = (test_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "sitelist = list(sorted(set(train_all.site)))\n", "sitedict = dict(zip(sitelist,range(len(sitelist))))\n", "train_all['site_id'] = train_all['site'].apply(lambda x: sitedict[x])\n", "test_all['site_id'] = test_all['site'].apply(lambda x: sitedict[x])\n" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "def MCRMSE(y_true, y_pred):\n", " colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)\n", " return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)\n", "\n", "def gru_layer(hidden_dim, dropout):\n", " return L.Bidirectional(L.GRU(\n", " hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal'))\n", "\n", "def pandas_list_to_array(df):\n", " \"\"\"\n", " Input: dataframe of shape (x, y), containing list of length l\n", " Return: np.array of shape (x, l, y)\n", " \"\"\"\n", " \n", " return np.transpose(\n", " np.array(df.values.tolist()),\n", " (0, 2, 1)\n", " )\n", "\n", "def preprocess_inputs(df, cols=['ssid','bssid', 'rssi']):\n", " return pandas_list_to_array(\n", " df[cols]\n", " )" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def build_model_mix(sid_size,bssid_size,site_size, seq_len=100, pred_len=2, dropout=0.2, \n", " sp_dropout=0.1, embed_dim=64, hidden_dim=128, n_layers=3,lr=0.001):\n", " inputs = L.Input(shape=(seq_len, 3))\n", " input_time = L.Input(shape = (4+12,))\n", " input_site = L.Input(shape = (1,))\n", " \n", " categorical_fea1 = inputs[:, :, :1]\n", " categorical_fea2 = inputs[:, :, 1:2]\n", " numerical_fea = inputs[:, :, 2:]\n", " \n", "\n", " embed = L.Embedding(input_dim=sid_size, output_dim=embed_dim)(categorical_fea1)\n", " reshaped = tf.reshape(embed, shape=(-1, embed.shape[1], embed.shape[2] * embed.shape[3]))\n", " reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)\n", " \n", " embed2 = L.Embedding(input_dim=bssid_size, output_dim=embed_dim)(categorical_fea2)\n", " reshaped2 = tf.reshape(embed2, shape=(-1, embed2.shape[1], embed2.shape[2] * embed2.shape[3]))\n", " reshaped2 = L.SpatialDropout1D(sp_dropout)(reshaped2)\n", " \n", " \n", " hidden = L.concatenate([reshaped, reshaped2, numerical_fea], axis=2)\n", " \n", " for x in range(n_layers):\n", " hidden = gru_layer(hidden_dim, dropout)(hidden)\n", " \n", " # Since we are only making predictions on the first part of each sequence, \n", " # we have to truncate it\n", " truncated = hidden[:, :pred_len]\n", " truncated = L.Flatten()(truncated)\n", " \n", " embed_site = L.Embedding(input_dim=site_size, output_dim=1)(input_site)\n", " embed_site = L.Flatten()(embed_site)\n", " \n", " truncated = L.concatenate([truncated, input_time,embed_site], axis=1)\n", " \n", " #out = L.Dense(32, activation='linear')(truncated)\n", " \n", " out = L.Dense(2, activation='linear')(truncated)\n", " \n", " model = tf.keras.Model(inputs=[inputs,input_time,input_site], outputs=out)\n", " model.compile(tf.optimizers.Adam(lr), loss='mse')\n", " \n", " return model\n", "\n", "def get_embed_size(n_cat):\n", " return min(600, round(1.6 * n_cat ** .56))\n" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "# import pickle\n", "# with open('train_all.pickle','wb') as fw:\n", "# pickle.dump(train_all,fw)\n", "# with open('test_all.pickle','wb') as fw:\n", "# pickle.dump(test_all,fw)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['timestamp', 'ssid', 'bssid', 'rssi', 'path', 'floorNo', 'wifi_len',\n", " 'wifi_mean', 'wifi_median', 'wifi_std', 'site', 'ts_waypoint', 'x_acce',\n", " 'y_acce', 'z_acce', 'x_magne', 'y_magne', 'z_magne', 'x_gyros',\n", " 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate', 'z_rotate', 'site_id'],\n", " dtype='object')" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_all.columns" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "ss = train_all[train_all.path=='5dd4b80627889b0006b7772d']\n", "ss.fillna(method='bfill',inplace=True)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNofloorsitewifi_lenwifi_mean...y_magnez_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatefoldsite_id
2246530.707866[15433, 15433, 13482, 19396, 13482, 19396, 154...[63855, 31672, 22276, 58558, 30991, 19176, 331...[2.4171285806916414, 1.826730918477428, 1.8267...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.262-0.325291...0.7136410.3640871.094598-1.1364770.824800-1.0214300.6949931.3546200.022
2246540.707867[15433, 15433, 13482, 19396, 11875, 15433, 976...[31672, 63855, 30991, 19176, 31883, 33131, 450...[2.318728970322606, 1.6299316977393568, 1.3347...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.260-0.363039...0.7136410.3640871.094598-1.1364770.824800-1.0214300.6949931.3546206.022
2246550.707867[13482, 15433, 19396, 11875, 9761, 15433, 1187...[30991, 63855, 19176, 31883, 45009, 31672, 389...[1.3347328666322502, 1.3347328666322502, 1.236...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.260-0.392559...1.2509621.4430870.997709-1.1897750.049868-1.275551-0.0500021.3694768.022
2246560.707868[15433, 11875, 9761, 11875, 13482, 19396, 1503...[63855, 31883, 45009, 38916, 42558, 59902, 172...[1.3347328666322502, 0.8427348147870724, 0.645...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.258-0.459344...1.017085-1.3343310.750805-0.969883-0.647448-1.030683-0.4851441.3811954.022
2246570.707869[15433, 11875, 9761, 11875, 13482, 19396, 1348...[63855, 31883, 45009, 38916, 42558, 59902, 265...[1.3347328666322502, 0.8427348147870724, 0.645...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.256-0.468747...-0.2460601.2027601.125877-1.2030990.514863-0.9301760.5463331.3651658.022
2246580.707869[15433, 11875, 9761, 13482, 19396, 13482, 1939...[63855, 31883, 45009, 26529, 62405, 15051, 153...[1.3347328666322502, 0.8427348147870724, 0.645...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.234-0.422163...0.6572070.4253351.844603-1.069854-0.492567-1.2350800.2916541.3673612.022
2246590.707870[15433, 11875, 13482, 19396, 9761, 13482, 1939...[63855, 31883, 26529, 62405, 45009, 15051, 153...[1.3347328666322502, 0.8427348147870724, 0.842...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.238-0.380232...-0.6346990.0244861.094598-1.536361-0.260070-1.412949-0.1778001.3559657.022
2246600.707871[13482, 19396, 19396, 15433, 14772, 11875, 193...[15051, 12689, 15371, 63855, 31025, 31883, 126...[1.5315320873703213, 1.5315320873703213, 1.531...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.246-0.278861...0.4128770.2545550.625828-1.5363610.359982-1.1253390.7281691.3774264.022
2246610.707871[19396, 19396, 13482, 13482, 19396, 11875, 147...[12690, 36864, 37924, 26529, 62405, 31883, 310...[1.4331324770012857, 1.4331324770012857, 1.433...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.258-0.282377...-0.5080253.236964-0.084674-1.5731460.058029-1.4863320.1972141.4486791.022
2246620.707872[19396, 19396, 13482, 13482, 19396, 19396, 134...[12690, 36864, 37924, 26529, 62405, 15371, 150...[1.6299316977393568, 1.6299316977393568, 1.629...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.258-0.258731...0.5691950.178858-1.374161-0.536651-0.5700070.243419-0.569259-1.1889665.022
2246630.707873[13482, 19396, 19396, 19396, 13482, 19396, 118...[26529, 62405, 12690, 36864, 37924, 12689, 318...[1.9251305288464635, 1.9251305288464635, 1.728...5dd4b80627889b0006b7772d1.939098F65dbc1d84c1eb61796cf7c0100.256-0.281942...-0.559328-4.1362661.438321-0.8032160.514863-1.1370830.3319221.2317251.022
\n", "

11 rows × 29 columns

\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "224653 0.707866 [15433, 15433, 13482, 19396, 13482, 19396, 154... \n", "224654 0.707867 [15433, 15433, 13482, 19396, 11875, 15433, 976... \n", "224655 0.707867 [13482, 15433, 19396, 11875, 9761, 15433, 1187... \n", "224656 0.707868 [15433, 11875, 9761, 11875, 13482, 19396, 1503... \n", "224657 0.707869 [15433, 11875, 9761, 11875, 13482, 19396, 1348... \n", "224658 0.707869 [15433, 11875, 9761, 13482, 19396, 13482, 1939... \n", "224659 0.707870 [15433, 11875, 13482, 19396, 9761, 13482, 1939... \n", "224660 0.707871 [13482, 19396, 19396, 15433, 14772, 11875, 193... \n", "224661 0.707871 [19396, 19396, 13482, 13482, 19396, 11875, 147... \n", "224662 0.707872 [19396, 19396, 13482, 13482, 19396, 19396, 134... \n", "224663 0.707873 [13482, 19396, 19396, 19396, 13482, 19396, 118... \n", "\n", " bssid \\\n", "224653 [63855, 31672, 22276, 58558, 30991, 19176, 331... \n", "224654 [31672, 63855, 30991, 19176, 31883, 33131, 450... \n", "224655 [30991, 63855, 19176, 31883, 45009, 31672, 389... \n", "224656 [63855, 31883, 45009, 38916, 42558, 59902, 172... \n", "224657 [63855, 31883, 45009, 38916, 42558, 59902, 265... \n", "224658 [63855, 31883, 45009, 26529, 62405, 15051, 153... \n", "224659 [63855, 31883, 26529, 62405, 45009, 15051, 153... \n", "224660 [15051, 12689, 15371, 63855, 31025, 31883, 126... \n", "224661 [12690, 36864, 37924, 26529, 62405, 31883, 310... \n", "224662 [12690, 36864, 37924, 26529, 62405, 15371, 150... \n", "224663 [26529, 62405, 12690, 36864, 37924, 12689, 318... \n", "\n", " rssi \\\n", "224653 [2.4171285806916414, 1.826730918477428, 1.8267... \n", "224654 [2.318728970322606, 1.6299316977393568, 1.3347... \n", "224655 [1.3347328666322502, 1.3347328666322502, 1.236... \n", "224656 [1.3347328666322502, 0.8427348147870724, 0.645... \n", "224657 [1.3347328666322502, 0.8427348147870724, 0.645... \n", "224658 [1.3347328666322502, 0.8427348147870724, 0.645... \n", "224659 [1.3347328666322502, 0.8427348147870724, 0.842... \n", "224660 [1.5315320873703213, 1.5315320873703213, 1.531... \n", "224661 [1.4331324770012857, 1.4331324770012857, 1.433... \n", "224662 [1.6299316977393568, 1.6299316977393568, 1.629... \n", "224663 [1.9251305288464635, 1.9251305288464635, 1.728... \n", "\n", " path floorNo floor site \\\n", "224653 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224654 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224655 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224656 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224657 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224658 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224659 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224660 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224661 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224662 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "224663 5dd4b80627889b0006b7772d 1.939098 F6 5dbc1d84c1eb61796cf7c010 \n", "\n", " wifi_len wifi_mean ... y_magne z_magne x_gyros y_gyros \\\n", "224653 0.262 -0.325291 ... 0.713641 0.364087 1.094598 -1.136477 \n", "224654 0.260 -0.363039 ... 0.713641 0.364087 1.094598 -1.136477 \n", "224655 0.260 -0.392559 ... 1.250962 1.443087 0.997709 -1.189775 \n", "224656 0.258 -0.459344 ... 1.017085 -1.334331 0.750805 -0.969883 \n", "224657 0.256 -0.468747 ... -0.246060 1.202760 1.125877 -1.203099 \n", "224658 0.234 -0.422163 ... 0.657207 0.425335 1.844603 -1.069854 \n", "224659 0.238 -0.380232 ... -0.634699 0.024486 1.094598 -1.536361 \n", "224660 0.246 -0.278861 ... 0.412877 0.254555 0.625828 -1.536361 \n", "224661 0.258 -0.282377 ... -0.508025 3.236964 -0.084674 -1.573146 \n", "224662 0.258 -0.258731 ... 0.569195 0.178858 -1.374161 -0.536651 \n", "224663 0.256 -0.281942 ... -0.559328 -4.136266 1.438321 -0.803216 \n", "\n", " z_gyros x_rotate y_rotate z_rotate fold site_id \n", "224653 0.824800 -1.021430 0.694993 1.354620 0.0 22 \n", "224654 0.824800 -1.021430 0.694993 1.354620 6.0 22 \n", "224655 0.049868 -1.275551 -0.050002 1.369476 8.0 22 \n", "224656 -0.647448 -1.030683 -0.485144 1.381195 4.0 22 \n", "224657 0.514863 -0.930176 0.546333 1.365165 8.0 22 \n", "224658 -0.492567 -1.235080 0.291654 1.367361 2.0 22 \n", "224659 -0.260070 -1.412949 -0.177800 1.355965 7.0 22 \n", "224660 0.359982 -1.125339 0.728169 1.377426 4.0 22 \n", "224661 0.058029 -1.486332 0.197214 1.448679 1.0 22 \n", "224662 -0.570007 0.243419 -0.569259 -1.188966 5.0 22 \n", "224663 0.514863 -1.137083 0.331922 1.231725 1.0 22 \n", "\n", "[11 rows x 29 columns]" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ss" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Series([], Name: path, dtype: int64)" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_all[train_all.x_acce.isna()].path.value_counts()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "begin fold: 0\n", "fold 0 1.5483097549014528\n", "151.3674636340884\n", "elasped time: 85.34241080284119\n" ] } ], "source": [ "import time\n", "t1 = time.time()\n", "pred_cols = ['x','y']\n", "train_inputs = preprocess_inputs(train_all,cols=['ssid', 'bssid', 'rssi'])\n", "train_inputs_time = train_all[['timestamp','floorNo','wifi_len','wifi_mean', 'x_acce',\n", " 'y_acce', 'z_acce', 'x_magne', 'y_magne', 'z_magne', 'x_gyros',\n", " 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate', 'z_rotate']].values\n", "train_inputs_site = train_all['site_id'].values\n", "train_labels = train_all[pred_cols].values\n", "test_inputs = preprocess_inputs(test_all,cols=['ssid','bssid', 'rssi'])\n", "test_inputs_time = test_all[['timestamp','floorNo','wifi_len','wifi_mean', 'x_acce',\n", " 'y_acce', 'z_acce', 'x_magne', 'y_magne', 'z_magne', 'x_gyros',\n", " 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate', 'z_rotate']].values\n", "test_inputs_site = test_all['site_id'].values\n", "\n", "\n", " \n", " \n", "x_test = test_inputs\n", "x_test_time = test_inputs_time\n", "x_test_site = test_inputs_site\n", "\n", "oof_xy = np.zeros(train_labels.shape)\n", "y_test_pred = 0\n", "for fold_id in range(N_SPLITS):\n", " trn_idx = train_all[train_all.fold!=fold_id].index.tolist()\n", " val_idx = train_all[train_all.fold==fold_id].index.tolist()\n", " print('begin fold:',fold_id)\n", " x_train, x_val = train_inputs[trn_idx],train_inputs[val_idx]\n", " x_train_time, x_val_time = train_inputs_time[trn_idx],train_inputs_time[val_idx]\n", " x_train_site, x_val_site = train_inputs_site[trn_idx],train_inputs_site[val_idx]\n", " y_train, y_val = train_labels[trn_idx],train_labels[val_idx]\n", " \n", " model = build_model_mix(len(ssiddict),len(bssiddict),len(sitedict),seqlen,lr=0.001)\n", "\n", " history = model.fit(\n", " [x_train,x_train_time,x_train_site], y_train,\n", " validation_data=([x_val,x_val_time,x_val_site], y_val),\n", " batch_size=128,\n", " epochs=100,\n", " verbose=1,\n", " callbacks=[\n", " tf.keras.callbacks.ReduceLROnPlateau(patience=5),\n", " tf.keras.callbacks.ModelCheckpoint('rnn_model_wifisensor/model_fold_{}.h5'.format(fold_id)),\n", " tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-4,\n", " patience=5, mode='min', restore_best_weights=True)\n", " ]\n", " )\n", "\n", "# model.load_weights('rnn_model_wifisensor/model_fold_{}.h5'.format(fold_id))\n", "\n", "\n", " y_val_pred = model.predict([x_val,x_val_time,x_val_site])\n", " y_test_pred += model.predict([x_test,x_test_time,x_test_site])\n", " oof_xy[val_idx] = y_val_pred\n", " print('fold',fold_id, np.mean(np.sqrt(np.sum((y_val-y_val_pred)**2,axis=1))))\n", " break\n", "y_test_pred = y_test_pred/(fold_id + 1) \n", "train_labels_inv = (pd.DataFrame(train_labels[:,:],columns = ['x','y']))\n", "oof_xy_pred_inv = (pd.DataFrame(oof_xy[:,:],columns = ['x','y']))\n", "y_test_pred_inv = (pd.DataFrame(y_test_pred[:,:],columns = ['x','y'])) \n", "print(np.mean(np.sqrt(np.sum((train_labels_inv-oof_xy_pred_inv)**2,axis=1))))\n", "\n", "t2 = time.time()\n", "print('elasped time:', t2 - t1)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fold 0 1.5483097549014528\n" ] } ], "source": [ "print('fold',fold_id, np.mean(np.sqrt(np.sum((y_val-y_val_pred)**2,axis=1))))\n" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "test_all[['x','y']] = y_test_pred_inv" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssipathfloorNowifi_lenwifi_meanwifi_medianwifi_std...z_magnex_gyrosy_gyrosz_gyrosx_rotatey_rotatez_rotatesite_idxy
00.345764[7007, 9522, 15215, 18669, 15215, 19396, 4851,...[35106, 10783, 39335, 4531, 48757, 19211, 1176...[1.9251305288464635, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0380.024464-0.3380611.033093...0.3039761.5887860.5581750.870030.0736501.1989000.8042931973.38291988.722977
10.345765[18669, 9522, 7007, 19396, 15215, 15215, 1264,...[4531, 10783, 35106, 19211, 39335, 48757, 6030...[2.1219297495845346, 1.4331324770012857, 1.334...00ff0c9a71cc37a2ebdd0f050.8459570.0400.075218-0.3380610.991529...0.5009781.5887930.2487230.800810.2037681.5160820.8776821973.45672687.362114
\n", "

2 rows × 27 columns

\n", "
" ], "text/plain": [ " timestamp ssid \\\n", "0 0.345764 [7007, 9522, 15215, 18669, 15215, 19396, 4851,... \n", "1 0.345765 [18669, 9522, 7007, 19396, 15215, 15215, 1264,... \n", "\n", " bssid \\\n", "0 [35106, 10783, 39335, 4531, 48757, 19211, 1176... \n", "1 [4531, 10783, 35106, 19211, 39335, 48757, 6030... \n", "\n", " rssi \\\n", "0 [1.9251305288464635, 1.4331324770012857, 1.334... \n", "1 [2.1219297495845346, 1.4331324770012857, 1.334... \n", "\n", " path floorNo wifi_len wifi_mean wifi_median \\\n", "0 00ff0c9a71cc37a2ebdd0f05 0.845957 0.038 0.024464 -0.338061 \n", "1 00ff0c9a71cc37a2ebdd0f05 0.845957 0.040 0.075218 -0.338061 \n", "\n", " wifi_std ... z_magne x_gyros y_gyros z_gyros x_rotate y_rotate \\\n", "0 1.033093 ... 0.303976 1.588786 0.558175 0.87003 0.073650 1.198900 \n", "1 0.991529 ... 0.500978 1.588793 0.248723 0.80081 0.203768 1.516082 \n", "\n", " z_rotate site_id x y \n", "0 0.804293 19 73.382919 88.722977 \n", "1 0.877682 19 73.456726 87.362114 \n", "\n", "[2 rows x 27 columns]" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_all.head(2)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppathsitexyt1_wifipath_id
00.34576400ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054773.38291988.7229771180.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
10.34576500ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054773.45672687.3621143048.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
20.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054772.72747885.7215584924.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
30.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054772.81537683.1486056816.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
40.34576700ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054773.72325187.7733928693.05da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
\n", "
" ], "text/plain": [ " timestamp path site x \\\n", "0 0.345764 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 73.382919 \n", "1 0.345765 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 73.456726 \n", "2 0.345766 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 72.727478 \n", "3 0.345766 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 72.815376 \n", "4 0.345767 00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 73.723251 \n", "\n", " y t1_wifi path_id \n", "0 88.722977 1180.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "1 87.362114 3048.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "2 85.721558 4924.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "3 83.148605 6816.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 \n", "4 87.773392 8693.0 5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 " ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result = test_all[['timestamp','path','site','x','y']]\n", "result['t1_wifi'] = ss2.inverse_transform(result['timestamp'])\n", "\n", "result['t1_wifi'] = [xx-testpath2gap[yy][0] for (xx,yy) in zip(result['t1_wifi'],result['path'])]\n", "result['path_id'] = result['site']+'_'+result['path']\n", "result.head()" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamppathsitexyt1_wifi
path_id
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576400ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054773.38291988.7229771180.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576500ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054773.45672687.3621143048.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054772.72747885.7215584924.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576600ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054772.81537683.1486056816.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f050.34576700ff0c9a71cc37a2ebdd0f055da1389e4db8ce0c98bd054773.72325187.7733928693.0
\n", "
" ], "text/plain": [ " timestamp \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345764 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345765 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345766 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345766 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 0.345767 \n", "\n", " path \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 00ff0c9a71cc37a2ebdd0f05 \n", "\n", " site \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 5da1389e4db8ce0c98bd0547 \n", "\n", " x y \\\n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 73.382919 88.722977 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 73.456726 87.362114 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 72.727478 85.721558 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 72.815376 83.148605 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 73.723251 87.773392 \n", "\n", " t1_wifi \n", "path_id \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 1180.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 3048.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 4924.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 6816.0 \n", "5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05 8693.0 " ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.set_index('path_id', inplace=True)\n", "result.head()" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "from scipy.spatial.transform import Rotation as R\n", "from PIL import Image\n", "from mpl_toolkits.mplot3d import Axes3D\n", "import plotly.graph_objs as go\n", "from pathlib import Path\n", "import scipy.signal as signal\n", "import json\n", "import seaborn as sns # visualization\n", "from dataclasses import dataclass\n", "\n", "import matplotlib.pyplot as plt # visualization\n", "import numpy as np # linear algebra\n", "import random\n", "import pandas as pd\n", "from collections import Counter, defaultdict\n", "\n", "plt.rcParams.update({'font.size': 14})\n", "\n", "def split_ts_seq(ts_seq, sep_ts):\n", " \"\"\"\n", "\n", " :param ts_seq:\n", " :param sep_ts:\n", " :return:\n", " \"\"\"\n", " tss = ts_seq[:, 0].astype(float)\n", " unique_sep_ts = np.unique(sep_ts)\n", " ts_seqs = []\n", " start_index = 0\n", " for i in range(0, unique_sep_ts.shape[0]):\n", " end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')\n", " if start_index == end_index:\n", " continue\n", " ts_seqs.append(ts_seq[start_index:end_index, :].copy())\n", " start_index = end_index\n", "\n", " # tail data\n", " if start_index < ts_seq.shape[0]:\n", " ts_seqs.append(ts_seq[start_index:, :].copy())\n", "\n", " return ts_seqs\n", "\n", "\n", "def correct_trajectory(original_xys, end_xy):\n", " \"\"\"\n", "\n", " :param original_xys: numpy ndarray, shape(N, 2)\n", " :param end_xy: numpy ndarray, shape(1, 2)\n", " :return:\n", " \"\"\"\n", " corrected_xys = np.zeros((0, 2))\n", "\n", " A = original_xys[0, :]\n", " B = end_xy\n", " Bp = original_xys[-1, :]\n", "\n", " angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])\n", " angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])\n", " angle_BpAB = angle_BpAX - angle_BAX\n", " AB = np.sqrt(np.sum((B - A) ** 2))\n", " ABp = np.sqrt(np.sum((Bp - A) ** 2))\n", "\n", " corrected_xys = np.append(corrected_xys, [A], 0)\n", " for i in np.arange(1, np.size(original_xys, 0)):\n", " angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])\n", "\n", " angle_CAX = angle_CpAX - angle_BpAB\n", "\n", " ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))\n", "\n", " AC = ACp * AB / ABp\n", "\n", " delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])\n", "\n", " C = delta_C + A\n", "\n", " corrected_xys = np.append(corrected_xys, [C], 0)\n", "\n", " return corrected_xys\n", "\n", "\n", "def correct_positions(rel_positions, reference_positions):\n", " \"\"\"\n", "\n", " :param rel_positions:\n", " :param reference_positions:\n", " :return:\n", " \"\"\"\n", " rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])\n", " if len(rel_positions_list) != reference_positions.shape[0] - 1:\n", " # print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')\n", " del rel_positions_list[-1]\n", " assert len(rel_positions_list) == reference_positions.shape[0] - 1\n", "\n", " corrected_positions = np.zeros((0, 3))\n", " for i, rel_ps in enumerate(rel_positions_list):\n", " start_position = reference_positions[i]\n", " end_position = reference_positions[i + 1]\n", " abs_ps = np.zeros(rel_ps.shape)\n", " abs_ps[:, 0] = rel_ps[:, 0]\n", " # abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]\n", " abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]\n", " for j in range(1, rel_ps.shape[0]):\n", " abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]\n", " abs_ps = np.insert(abs_ps, 0, start_position, axis=0)\n", " corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])\n", " corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))\n", " if i == 0:\n", " corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)\n", " else:\n", " corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)\n", "\n", " corrected_positions = np.array(corrected_positions)\n", "\n", " return corrected_positions\n", "\n", "\n", "def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):\n", " order = 4\n", " filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)\n", " zf = signal.lfilter_zi(filter_b, filter_a)\n", " _, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)\n", " _, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)\n", "\n", " return filter_b, filter_a, filter_zf\n", "\n", "\n", "def get_rotation_matrix_from_vector(rotation_vector):\n", " q1 = rotation_vector[0]\n", " q2 = rotation_vector[1]\n", " q3 = rotation_vector[2]\n", "\n", " if rotation_vector.size >= 4:\n", " q0 = rotation_vector[3]\n", " else:\n", " q0 = 1 - q1*q1 - q2*q2 - q3*q3\n", " if q0 > 0:\n", " q0 = np.sqrt(q0)\n", " else:\n", " q0 = 0\n", "\n", " sq_q1 = 2 * q1 * q1\n", " sq_q2 = 2 * q2 * q2\n", " sq_q3 = 2 * q3 * q3\n", " q1_q2 = 2 * q1 * q2\n", " q3_q0 = 2 * q3 * q0\n", " q1_q3 = 2 * q1 * q3\n", " q2_q0 = 2 * q2 * q0\n", " q2_q3 = 2 * q2 * q3\n", " q1_q0 = 2 * q1 * q0\n", "\n", " R = np.zeros((9,))\n", " if R.size == 9:\n", " R[0] = 1 - sq_q2 - sq_q3\n", " R[1] = q1_q2 - q3_q0\n", " R[2] = q1_q3 + q2_q0\n", "\n", " R[3] = q1_q2 + q3_q0\n", " R[4] = 1 - sq_q1 - sq_q3\n", " R[5] = q2_q3 - q1_q0\n", "\n", " R[6] = q1_q3 - q2_q0\n", " R[7] = q2_q3 + q1_q0\n", " R[8] = 1 - sq_q1 - sq_q2\n", "\n", " R = np.reshape(R, (3, 3))\n", " elif R.size == 16:\n", " R[0] = 1 - sq_q2 - sq_q3\n", " R[1] = q1_q2 - q3_q0\n", " R[2] = q1_q3 + q2_q0\n", " R[3] = 0.0\n", "\n", " R[4] = q1_q2 + q3_q0\n", " R[5] = 1 - sq_q1 - sq_q3\n", " R[6] = q2_q3 - q1_q0\n", " R[7] = 0.0\n", "\n", " R[8] = q1_q3 - q2_q0\n", " R[9] = q2_q3 + q1_q0\n", " R[10] = 1 - sq_q1 - sq_q2\n", " R[11] = 0.0\n", "\n", " R[12] = R[13] = R[14] = 0.0\n", " R[15] = 1.0\n", "\n", " R = np.reshape(R, (4, 4))\n", "\n", " return R\n", "\n", "\n", "def get_orientation(R):\n", " flat_R = R.flatten()\n", " values = np.zeros((3,))\n", " if np.size(flat_R) == 9:\n", " values[0] = np.arctan2(flat_R[1], flat_R[4])\n", " values[1] = np.arcsin(-flat_R[7])\n", " values[2] = np.arctan2(-flat_R[6], flat_R[8])\n", " else:\n", " values[0] = np.arctan2(flat_R[1], flat_R[5])\n", " values[1] = np.arcsin(-flat_R[9])\n", " values[2] = np.arctan2(-flat_R[8], flat_R[10])\n", "\n", " return values\n", "\n", "\n", "def compute_steps(acce_datas):\n", " step_timestamps = np.array([])\n", " step_indexs = np.array([], dtype=int)\n", " step_acce_max_mins = np.zeros((0, 4))\n", " sample_freq = 50\n", " window_size = 22\n", " low_acce_mag = 0.6\n", " step_criterion = 1\n", " interval_threshold = 250\n", "\n", " acce_max = np.zeros((2,))\n", " acce_min = np.zeros((2,))\n", " acce_binarys = np.zeros((window_size,), dtype=int)\n", " acce_mag_pre = 0\n", " state_flag = 0\n", "\n", " warmup_data = np.ones((window_size,)) * 9.81\n", " filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)\n", " acce_mag_window = np.zeros((window_size, 1))\n", "\n", " # detect steps according to acceleration magnitudes\n", " for i in np.arange(0, np.size(acce_datas, 0)):\n", " acce_data = acce_datas[i, :]\n", " acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))\n", "\n", " acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)\n", " acce_mag_filt = acce_mag_filt[0]\n", "\n", " acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])\n", " acce_mag_window = np.delete(acce_mag_window, 0)\n", " mean_gravity = np.mean(acce_mag_window)\n", " acce_std = np.std(acce_mag_window)\n", " mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])\n", "\n", " # detect valid peak or valley of acceleration magnitudes\n", " acce_mag_filt_detrend = acce_mag_filt - mean_gravity\n", " if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):\n", " # peak\n", " acce_binarys = np.append(acce_binarys, [1])\n", " acce_binarys = np.delete(acce_binarys, 0)\n", " elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):\n", " # valley\n", " acce_binarys = np.append(acce_binarys, [-1])\n", " acce_binarys = np.delete(acce_binarys, 0)\n", " else:\n", " # between peak and valley\n", " acce_binarys = np.append(acce_binarys, [0])\n", " acce_binarys = np.delete(acce_binarys, 0)\n", "\n", " if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):\n", " if state_flag == 0:\n", " acce_max[:] = acce_data[0], acce_mag_filt\n", " state_flag = 1\n", " elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (\n", " acce_mag_filt > acce_max[1]):\n", " acce_max[:] = acce_data[0], acce_mag_filt\n", " elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):\n", " acce_max[:] = acce_data[0], acce_mag_filt\n", " state_flag = 1\n", "\n", " # choose reasonable step criterion and check if there is a valid step\n", " # save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]\n", " step_flag = False\n", " if step_criterion == 2:\n", " if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):\n", " step_flag = True\n", " elif step_criterion == 3:\n", " if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):\n", " step_flag = True\n", " else:\n", " if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:\n", " if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):\n", " acce_min[:] = acce_data[0], acce_mag_filt\n", " state_flag = 2\n", " step_flag = True\n", " elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (\n", " acce_mag_filt < acce_min[1]):\n", " acce_min[:] = acce_data[0], acce_mag_filt\n", " if step_flag:\n", " step_timestamps = np.append(step_timestamps, acce_data[0])\n", " step_indexs = np.append(step_indexs, [i])\n", " step_acce_max_mins = np.append(step_acce_max_mins,\n", " [[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)\n", " acce_mag_pre = acce_mag_filt_detrend\n", "\n", " return step_timestamps, step_indexs, step_acce_max_mins\n", "\n", "\n", "def compute_stride_length(step_acce_max_mins):\n", " K = 0.4\n", " K_max = 0.8\n", " K_min = 0.4\n", " para_a0 = 0.21468084\n", " para_a1 = 0.09154517\n", " para_a2 = 0.02301998\n", "\n", " stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))\n", " k_real = np.zeros((step_acce_max_mins.shape[0], 2))\n", " step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))\n", " stride_lengths[:, 0] = step_acce_max_mins[:, 0]\n", " window_size = 2\n", " step_timeperiod_temp = np.zeros((0, ))\n", "\n", " # calculate every step period - step_timeperiod unit: second\n", " for i in range(0, step_timeperiod.shape[0]):\n", " step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000\n", " step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])\n", " if step_timeperiod_temp.shape[0] > window_size:\n", " step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])\n", " step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]\n", "\n", " # calculate parameters by step period and acceleration magnitude variance\n", " k_real[:, 0] = step_acce_max_mins[:, 0]\n", " k_real[0, 1] = K\n", " for i in range(0, step_timeperiod.shape[0]):\n", " k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])\n", " k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)\n", "\n", " # calculate every stride length by parameters and max and min data of acceleration magnitude\n", " stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),\n", " np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]\n", "\n", " return stride_lengths\n", "\n", "\n", "def compute_headings(ahrs_datas):\n", " headings = np.zeros((np.size(ahrs_datas, 0), 2))\n", " for i in np.arange(0, np.size(ahrs_datas, 0)):\n", " ahrs_data = ahrs_datas[i, :]\n", " rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])\n", " azimuth, pitch, roll = get_orientation(rot_mat)\n", " around_z = (-azimuth) % (2 * np.pi)\n", " headings[i, :] = ahrs_data[0], around_z\n", " return headings\n", "\n", "\n", "def compute_step_heading(step_timestamps, headings):\n", " step_headings = np.zeros((len(step_timestamps), 2))\n", " step_timestamps_index = 0\n", " for i in range(0, len(headings)):\n", " if step_timestamps_index < len(step_timestamps):\n", " if headings[i, 0] == step_timestamps[step_timestamps_index]:\n", " step_headings[step_timestamps_index, :] = headings[i, :]\n", " step_timestamps_index += 1\n", " else:\n", " break\n", " assert step_timestamps_index == len(step_timestamps)\n", "\n", " return step_headings\n", "\n", "\n", "def compute_rel_positions(stride_lengths, step_headings):\n", " rel_positions = np.zeros((stride_lengths.shape[0], 3))\n", " for i in range(0, stride_lengths.shape[0]):\n", " rel_positions[i, 0] = stride_lengths[i, 0]\n", " rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])\n", " rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])\n", "\n", " return rel_positions\n", "\n", "\n", "def compute_step_positions(acce_datas, ahrs_datas, posi_datas):\n", " step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)\n", " headings = compute_headings(ahrs_datas)\n", " stride_lengths = compute_stride_length(step_acce_max_mins)\n", " step_headings = compute_step_heading(step_timestamps, headings)\n", " rel_positions = compute_rel_positions(stride_lengths, step_headings)\n", " step_positions = correct_positions(rel_positions, posi_datas)\n", "\n", " return step_positions\n" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "sample_submission = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamp
buildingpath_id
5a0546857ecc773753327266046cfa46be49fc10834815c6[0000000000009, 0000000009017, 0000000015326, ...
05d052dde78384b0c543d89c[0000000000012, 0000000005748, 0000000014654, ...
0c06cc9f21d172618d74c6c8[0000000000011, 0000000011818, 0000000019825, ...
146035943a1482883ed98570[0000000000011, 0000000004535, 0000000011498, ...
1ef2771dfea25d508142ba06[0000000000009, 0000000012833, 0000000021759, ...
\n", "
" ], "text/plain": [ " timestamp\n", "building path_id \n", "5a0546857ecc773753327266 046cfa46be49fc10834815c6 [0000000000009, 0000000009017, 0000000015326, ...\n", " 05d052dde78384b0c543d89c [0000000000012, 0000000005748, 0000000014654, ...\n", " 0c06cc9f21d172618d74c6c8 [0000000000011, 0000000011818, 0000000019825, ...\n", " 146035943a1482883ed98570 [0000000000011, 0000000004535, 0000000011498, ...\n", " 1ef2771dfea25d508142ba06 [0000000000009, 0000000012833, 0000000021759, ..." ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_submission['building'] = [x.split('_')[0] for x in sample_submission['site_path_timestamp']]\n", "sample_submission['path_id'] = [x.split('_')[1] for x in sample_submission['site_path_timestamp']]\n", "sample_submission['timestamp'] = [x.split('_')[2] for x in sample_submission['site_path_timestamp']]\n", "samples = pd.DataFrame(sample_submission.groupby(['building','path_id'])['timestamp'].apply(lambda x: list(x)))\n", "buildings = np.unique([x[0] for x in samples.index])\n", "samples.head()" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5a0546857ecc773753327266\n", "5c3c44b80379370013e0fd2b\n", "5d27075f03f801723c2e360f\n", "5d27096c03f801723c31e5e0\n", "5d27097f03f801723c320d97\n", "5d27099f03f801723c32511d\n", "5d2709a003f801723c3251bf\n", "5d2709b303f801723c327472\n", "5d2709bb03f801723c32852c\n", "5d2709c303f801723c3299ee\n", "5d2709d403f801723c32bd39\n", "5d2709e003f801723c32d896\n", "5da138274db8ce0c98bbd3d2\n", "5da1382d4db8ce0c98bbe92e\n", "5da138314db8ce0c98bbf3a0\n", "5da138364db8ce0c98bc00f1\n", "5da1383b4db8ce0c98bc11ab\n", "5da138754db8ce0c98bca82f\n", "5da138764db8ce0c98bcaa46\n", "5da1389e4db8ce0c98bd0547\n", "5da138b74db8ce0c98bd4774\n", "5da958dd46f8266d0737457b\n", "5dbc1d84c1eb61796cf7c010\n", "5dc8cea7659e181adb076a3f\n" ] } ], "source": [ "from scipy.interpolate import interp1d\n", "from scipy.ndimage.filters import uniform_filter1d\n", "\n", "colacce = ['xyz_time','x_acce','y_acce','z_acce']\n", "colahrs = ['xyz_time','x_ahrs','y_ahrs','z_ahrs']\n", "\n", "for building in buildings:\n", " print(building)\n", " paths = samples.loc[building].index\n", " # Acceleration info:\n", " tfm = pd.read_csv(f'indoor_testing_accel/{building}.txt',index_col=0)\n", " for path_id in paths:\n", " # Original predicted values:\n", " xy = result.loc[building+'_'+path_id]\n", " tfmi = tfm.loc[path_id]\n", " acce_datas = np.array(tfmi[colacce],dtype=np.float)\n", " ahrs_datas = np.array(tfmi[colahrs],dtype=np.float)\n", " posi_datas = np.array(xy[['t1_wifi','x','y']],dtype=np.float)\n", " # Outlier removal:\n", " xyout = uniform_filter1d(posi_datas,size=3,axis=0,mode='reflect')\n", " xydiff = np.abs(posi_datas-xyout)\n", " xystd = np.std(xydiff,axis=0)*3\n", " posi_datas = posi_datas[(xydiff[:,1]posi_datas[0,0],:]\n", " # If two consecutive predictions are in-between two step datapoints,\n", " # the last one is removed, causing error (in the \"split_ts_seq\" function).\n", " posi_index = [np.searchsorted(rel_positions[:,0], x, side='right') for x in posi_datas[:,0]]\n", " u, i1, i2 = np.unique(posi_index, return_index=True, return_inverse=True)\n", " posi_datas = np.vstack([np.mean(posi_datas[i2==i],axis=0) for i in np.unique(i2)])\n", " # Position correction:\n", " step_positions = correct_positions(rel_positions, posi_datas)\n", " # Interpolate for timestamps in the testing set:\n", "\n", " t = step_positions[:,0]\n", " x = step_positions[:,1]\n", " y = step_positions[:,2]\n", " fx = interp1d(t, x, kind='linear', fill_value=(x[0],x[-1]), bounds_error=False) #fill_value=\"extrapolate\"\n", " fy = interp1d(t, y, kind='linear', fill_value=(y[0],y[-1]), bounds_error=False)\n", " # Output result:\n", " t0 = np.array(samples.loc[(building,path_id),'timestamp'],dtype=np.float64)\n", " sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'x'] = fx(t0)\n", " sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'y'] = fy(t0)\n", " \n", " #sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'floor'] = floors.loc[building+'_'+path_id,'floor']\n", "# break\n", "# break\n", "\n", "# sample_submission[['site_path_timestamp','floor','x','y']].to_csv('submission_mix_v3.3_del_outlier.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "subold = pd.read_csv('submission_floor.csv')\n", "\n", "sample_submission['floor']=subold['floor']\n", "sample_submission[['site_path_timestamp','floor','x','y']].to_csv('submission_wifi_sensor.csv',index=False)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }