In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from pathlib import Path
import glob
import pickle

import random
import os

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tqdm import tqdm
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.backend as K
import tensorflow_addons as tfa
from tensorflow_addons.layers import WeightNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
pd.options.mode.chained_assignment = None




In [2]:
from tensorflow.python.client import device_lib
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
print(get_available_gpus())


[]


In [3]:

# PATH = '../input/indoor-location-navigation'
# test_files = glob.glob(f'{PATH}/test/*.txt')
# test_files_pd = [xx.split('/')[-1:][0].replace('.txt','') for xx in test_files]
# test_files_pd = pd.DataFrame(test_files_pd)
# test_files_pd.columns = ['path']

sample_submission = pd.read_csv("../input/indoor-location-navigation/sample_submission.csv")
sample_submission['site'] = [xx.split('_')[0] for xx in sample_submission.site_path_timestamp]
sample_submission['path'] = [xx.split('_')[1] for xx in sample_submission.site_path_timestamp]
sample_submission['ts_waypoint'] = [int(xx.split('_')[2]) for xx in sample_submission.site_path_timestamp]
del sample_submission['floor']
del sample_submission['x']
del sample_submission['y']

path2site = dict(zip(sample_submission.path,sample_submission.site))
sample_submission.head()
# test_path_site = sample_submission[['site','path','timestamp','site_path_timestamp']]
# test_files_pd = pd.merge(test_files_pd,test_path_site,how='left',on='path')
# test_files_pd.head()

Unnamed: 0,site_path_timestamp,site,path,ts_waypoint
0,5a0546857ecc773753327266_046cfa46be49fc1083481...,5a0546857ecc773753327266,046cfa46be49fc10834815c6,9
1,5a0546857ecc773753327266_046cfa46be49fc1083481...,5a0546857ecc773753327266,046cfa46be49fc10834815c6,9017
2,5a0546857ecc773753327266_046cfa46be49fc1083481...,5a0546857ecc773753327266,046cfa46be49fc10834815c6,15326
3,5a0546857ecc773753327266_046cfa46be49fc1083481...,5a0546857ecc773753327266,046cfa46be49fc10834815c6,18763
4,5a0546857ecc773753327266_046cfa46be49fc1083481...,5a0546857ecc773753327266,046cfa46be49fc10834815c6,22328


In [4]:
test_wifi_files = glob.glob(f'../input/wifi_lbl_encode/test/*.txt')
test_sensor_files = glob.glob('../input/data_abstract/*_test_sensor.csv')

# train_files = glob.glob('../input/indoor-navigation-and-location-wifi-features-alldata/*train.csv') #if A 
train_files = glob.glob('../input/data_abstract/*_train_waypoint_all.csv')#if B

    
train_wifi_files = glob.glob(f'../input/wifi_lbl_encode/train/*/*/*.txt')
train_sensor_files = glob.glob('../input/data_abstract/*_train_sensor.csv')

In [5]:
train_files[:2]

['../input/data_abstract/5a0546857ecc773753327266_train_waypoint_all.csv',
 '../input/data_abstract/5c3c44b80379370013e0fd2b_train_waypoint_all.csv']

In [6]:
# train_site_list = [xx.split('/')[-1].replace('_train.csv','') for xx in train_files] #if A 
# train_site_list = [xx.split('/')[-1].replace('_train_waypoint_all.csv','') for xx in train_files] #if B 204
train_site_list = list(sample_submission.site.unique()) # if B 24
train_wifi_files = [xx for xx in train_wifi_files if xx.split('/')[-3] in train_site_list]
print('len train site list:',len(train_site_list))

len train site list: 24


In [7]:
len(train_wifi_files)

10877

In [8]:
ssidlist = set()
bssidlist = set()
for filename in tqdm(train_wifi_files+test_wifi_files):
    tmp = pd.read_csv(filename)
    ssidlist = ssidlist|set(tmp.ssid)
    bssidlist = bssidlist|set(tmp.bssid)

100%|██████████| 11503/11503 [01:01<00:00, 187.96it/s]


In [9]:
len(set(ssidlist)),len(set(bssidlist))

(20044, 65952)

In [10]:
seqlen = 100

In [11]:
ssiddict = dict(zip(list(ssidlist)+['empty'],range(len(ssidlist)+1)))
bssiddict = dict(zip(list(bssidlist)+['empty'],range(len(bssidlist)+1)))


In [12]:
train_wifi_pd_csv = []
for filename in tqdm(train_wifi_files):
    tmp = pd.read_csv(filename)
    tmp['path'] = filename.split('/')[-1].replace('.txt','')
    tmp['floor'] = filename.split('/')[-2]
    tmp['site'] = filename.split('/')[-3]
    train_wifi_pd_csv.append(tmp)
train_wifi_pd_csv = pd.concat(train_wifi_pd_csv).reset_index(drop=True)

100%|██████████| 10877/10877 [00:43<00:00, 252.76it/s]


In [13]:
train_sensor_pd_csv = []
for filename in tqdm(train_sensor_files):
    tmp = pd.read_csv(filename,index_col=0)
    train_sensor_pd_csv.append(tmp)
train_sensor_pd_csv = pd.concat(train_sensor_pd_csv).reset_index(drop=True)
train_sensor_pd_csv['magne'] = train_sensor_pd_csv['x_magne']**2+\
                                train_sensor_pd_csv['y_magne']**2+train_sensor_pd_csv['z_magne']**2

  mask |= (ar1 == a)
100%|██████████| 24/24 [01:02<00:00,  2.59s/it]


In [14]:
train_sensor_pd_csv.head(2)

Unnamed: 0,ts_sensor,x_acce,y_acce,z_acce,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,path,site,floor,floor_ori,magne
0,1578463000000.0,0.023697,4.450943,9.055649,-0.037537,0.075256,0.030579,-13.391113,9.959412,-30.305481,0.227164,-0.058094,-0.268773,5e15730aa280850006f3d005,5a0546857ecc773753327266,-1,B1,0.008008
1,1578463000000.0,0.050629,4.552109,9.074799,-0.043411,-0.005722,0.009796,-12.002563,9.959412,-28.955078,0.225032,-0.05964,-0.267238,5e15730aa280850006f3d005,5a0546857ecc773753327266,-1,B1,0.002013


In [15]:
floor_map = {"B3":-3,"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2, "F4":3, "F5":4, "F6":5, "F7":6,"F8":7, "F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5, "7F":6, "8F": 7, "9F":8}
train_wifi_pd_csv = train_wifi_pd_csv[train_wifi_pd_csv.floor.isin(floor_map)].reset_index(drop=True)
train_wifi_pd_csv['floorNo'] = train_wifi_pd_csv['floor'].apply(lambda x: floor_map[x])

In [16]:
train_wifi_pd_csv.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,last_timestamp,path,floor,site,floorNo
0,1578462618826,63159,162932,-46,1578462603277,5e15730aa280850006f3d005,B1,5a0546857ecc773753327266,-1
1,1578462618826,32835,65513,-49,1578462618272,5e15730aa280850006f3d005,B1,5a0546857ecc773753327266,-1


In [17]:
test_wifi_pd_csv = []
for filename in tqdm(test_wifi_files):
    tmp = pd.read_csv(filename)
    tmp['path'] = filename.split('/')[-1].replace('.txt','')
    test_wifi_pd_csv.append(tmp)
test_wifi_pd_csv = pd.concat(test_wifi_pd_csv).reset_index(drop=True)


100%|██████████| 626/626 [00:02<00:00, 208.79it/s]


In [18]:
test_sensor_pd_csv = []
for filename in tqdm(test_sensor_files):
    tmp = pd.read_csv(filename)
    test_sensor_pd_csv.append(tmp)
test_sensor_pd_csv = pd.concat(test_sensor_pd_csv).reset_index(drop=True)

test_sensor_pd_csv['magne'] = test_sensor_pd_csv['x_magne']**2+\
                                test_sensor_pd_csv['y_magne']**2+test_sensor_pd_csv['z_magne']**2

100%|██████████| 24/24 [00:07<00:00,  3.04it/s]


In [19]:


standcols_sensor = ['x_acce', 'y_acce', 'z_acce', 'x_magne', 'y_magne',
       'z_magne', 'x_gyros', 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate',
       'z_rotate','magne']

ss_sensor = StandardScaler() 
ss_sensor.fit(train_sensor_pd_csv.loc[:,standcols_sensor])
train_sensor_pd_csv.loc[:,standcols_sensor] = ss_sensor.transform(train_sensor_pd_csv.loc[:,standcols_sensor])
test_sensor_pd_csv.loc[:,standcols_sensor] = ss_sensor.transform(test_sensor_pd_csv.loc[:,standcols_sensor])        

In [20]:
test_wifi_pd_csv.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,last_timestamp,path
0,1961,70537,28318,-34,1571828560156,14f45baa63b4d3a700126af6
1,1961,43838,93116,-35,1571828560159,14f45baa63b4d3a700126af6


In [21]:
submission = pd.read_csv('submission_floor.csv')
submission['path'] = [xx.split('_')[1] for xx in submission['site_path_timestamp']]
test_path_floor_dict = dict(zip(submission.path,submission.floor))
test_wifi_pd_csv['floorNo'] = [test_path_floor_dict[xx] for xx in test_wifi_pd_csv['path']]

In [22]:
standcols = ['rssi','floorNo']
ss = StandardScaler()
ss.fit(train_wifi_pd_csv.loc[:,standcols])
train_wifi_pd_csv.loc[:,standcols] = ss.transform(train_wifi_pd_csv.loc[:,standcols])
test_wifi_pd_csv.loc[:,standcols] = ss.transform(test_wifi_pd_csv.loc[:,standcols])

In [23]:
train_wifi_pd_csv.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,last_timestamp,path,floor,site,floorNo
0,1578462618826,63159,162932,3.105926,1578462603277,5e15730aa280850006f3d005,B1,5a0546857ecc773753327266,-1.340327
1,1578462618826,32835,65513,2.810727,1578462618272,5e15730aa280850006f3d005,B1,5a0546857ecc773753327266,-1.340327


In [24]:
train_wifi_pd = []
for path,tmp in tqdm(train_wifi_pd_csv.groupby('path')):
    tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])
    tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])
    ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \
                                list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) 
    ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \
                                list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))
    ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \
                                list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))
    
    ss = pd.concat([ss1,ss2,ss3],axis=1)
    ss['path'] = tmp.path.unique()[0]
    ss['floorNo'] = tmp.floorNo.unique()[0]
    ss['floor'] = tmp.floor.unique()[0]
    ss['site'] = tmp.site.unique()[0]
    ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500
    ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()
    ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()
    ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()

    train_wifi_pd.append(ss)
train_wifi_pd = pd.concat(train_wifi_pd)
train_wifi_pd = train_wifi_pd.reset_index()
train_wifi_pd.head(2)

100%|██████████| 10877/10877 [02:51<00:00, 63.29it/s] 


Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,floor,site,wifi_len,wifi_mean,wifi_median,wifi_std
0,1560500997770,"[7702, 19396, 18304, 19396, 7702, 7702, 19396,...","[61027, 55262, 10121, 57287, 45809, 53865, 261...","[3.204325463643926, 3.1059258532748903, 2.9091...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.206,0.353603,0.350737,1.088208
1,1560500999681,"[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...","[10121, 31140, 61027, 55262, 57287, 53865, 458...","[2.712327411798748, 2.712327411798748, 2.61392...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.22,0.299748,0.350737,1.040317


In [25]:
test_wifi_pd = []
# for filename in tqdm(test_wifi_files):
for path,tmp in tqdm(test_wifi_pd_csv.groupby('path')):
    #tmp = pd.read_csv(filename)
    #tmp['rssi'] = tmp['rssi']/999
    tmp['ssid'] = tmp['ssid'].apply(lambda x: ssiddict[x])
    tmp['bssid'] = tmp['bssid'].apply(lambda x: bssiddict[x])
    ss1 = tmp.groupby('timestamp')['ssid'].apply(lambda x: \
                                list(x)[:seqlen] if len(x)>seqlen else list(x)+[ssiddict['empty']]*(seqlen-len(x))) 
    ss2 = tmp.groupby('timestamp')['bssid'].apply(lambda x: \
                                list(x)[:seqlen] if len(x)>seqlen else list(x)+[bssiddict['empty']]*(seqlen-len(x)))
    ss3 = tmp.groupby('timestamp')['rssi'].apply(lambda x: \
                                list(x)[:seqlen] if len(x)>seqlen else list(x)+[-10]*(seqlen-len(x)))
    ss = pd.concat([ss1,ss2,ss3],axis=1)
    #ss['path'] = filename.split('/')[-1].replace('.txt','')
    ss['path'] = tmp.path.unique()[0]
    ss['floorNo'] = tmp.floorNo.unique()[0]
    ss['wifi_len'] = tmp.groupby('timestamp')['rssi'].count()/500
    ss['wifi_mean'] = tmp.groupby('timestamp')['rssi'].mean()
    ss['wifi_median'] = tmp.groupby('timestamp')['rssi'].median()
    ss['wifi_std'] = tmp.groupby('timestamp')['rssi'].std()

    test_wifi_pd.append(ss)
test_wifi_pd = pd.concat(test_wifi_pd)
test_wifi_pd = test_wifi_pd.reset_index()
test_wifi_pd['site'] = [path2site[xx] for xx in test_wifi_pd.path]
test_wifi_pd.head()

100%|██████████| 626/626 [00:15<00:00, 41.61it/s]


Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,wifi_len,wifi_mean,wifi_median,wifi_std,site
0,1180,"[7007, 9522, 15215, 18669, 15215, 19396, 4851,...","[35106, 10783, 39335, 4531, 48757, 19211, 1176...","[1.9251305288464635, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.038,0.024464,-0.338061,1.033093,5da1389e4db8ce0c98bd0547
1,3048,"[18669, 9522, 7007, 19396, 15215, 15215, 1264,...","[4531, 10783, 35106, 19211, 39335, 48757, 6030...","[2.1219297495845346, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.04,0.075218,-0.338061,0.991529,5da1389e4db8ce0c98bd0547
2,4924,"[9522, 18669, 7007, 19396, 15215, 4851, 15215,...","[10783, 4531, 35106, 19211, 48757, 11767, 3933...","[1.4331324770012857, 1.2363332562632146, 1.039...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.048,-0.149461,-0.43646,0.815521,5da1389e4db8ce0c98bd0547
3,6816,"[18669, 4851, 15215, 7007, 9522, 19396, 19396,...","[4531, 11767, 39335, 35106, 10783, 19211, 5710...","[1.826730918477428, 1.1379336458941791, 1.0395...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.052,-0.118554,-0.53486,0.911802,5da1389e4db8ce0c98bd0547
4,8693,"[18669, 15215, 7007, 4851, 9522, 19396, 15215,...","[4531, 48757, 35106, 11767, 10783, 19211, 3933...","[2.1219297495845346, 1.3347328666322502, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.062,-0.182526,-0.53486,0.905339,5da1389e4db8ce0c98bd0547


In [26]:
test_wifi_pd.shape

(37678, 11)

In [27]:
# filename = train_files[0]
train_xy = []
for filename in tqdm(train_files):
    tmp = pd.read_csv(filename,index_col=0)
    ss = tmp[['path','site','floor','ts_waypoint','x','y']]
    train_xy.append(ss)
train_xy = pd.concat(train_xy).reset_index(drop=True)

100%|██████████| 204/204 [00:00<00:00, 266.90it/s]


In [28]:
train_xy=train_xy.drop_duplicates()
train_xy.shape

(166681, 6)

In [30]:
train_sensor_pd_csv.head(2)


Unnamed: 0,ts_sensor,x_acce,y_acce,z_acce,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,path,site,floor,floor_ori,magne
0,1578463000000.0,0.544694,2.170451,-0.215846,-0.061473,0.269189,0.06898,-0.624434,0.608398,-0.327102,3.204804,-1.107252,-0.362515,5e15730aa280850006f3d005,5a0546857ecc773753327266,-1,B1,-0.296067
1,1578463000000.0,0.57046,2.231734,-0.209542,-0.070594,-0.024665,0.02118,-0.56146,0.608398,-0.170987,3.16868,-1.130263,-0.360321,5e15730aa280850006f3d005,5a0546857ecc773753327266,-1,B1,-0.298708


In [31]:
train_sensor_pd_csv.columns

Index(['ts_sensor', 'x_acce', 'y_acce', 'z_acce', 'x_magne', 'y_magne',
       'z_magne', 'x_gyros', 'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate',
       'z_rotate', 'path', 'site', 'floor', 'floor_ori', 'magne'],
      dtype='object')

In [33]:
train_sensor_pd_csv_group = dict(list(train_sensor_pd_csv.groupby('path',as_index=False)))


In [34]:
train_xy_group = dict(list(train_xy.groupby('path',as_index=False)))

In [35]:
train_sensor_pd_csv_group['5d073b814a19c000086c558b']

Unnamed: 0,ts_sensor,x_acce,y_acce,z_acce,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,path,site,floor,floor_ori,magne
1734783,1.560501e+12,-0.994056,-0.248305,0.177490,-0.370349,0.049532,0.332508,0.238923,-0.398348,2.083426,-1.588827,0.498332,1.416640,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.265570
1734784,1.560501e+12,-0.377300,-0.036921,0.058472,-1.068294,-3.462430,0.174510,0.238923,-0.398348,2.083426,-1.501465,0.381565,1.418280,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,0.310682
1734785,1.560501e+12,0.072799,-0.174701,0.075813,-0.658117,-2.519185,0.323945,0.145778,-0.431256,2.314335,-1.306327,0.301993,1.418707,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.000635
1734786,1.560501e+12,-1.088537,0.034150,0.154237,-0.209088,-1.044488,0.316575,0.269925,-0.464091,2.468334,-1.309413,0.313158,1.419393,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.247187
1734787,1.560501e+12,-0.625255,-0.065917,-0.063504,0.042315,-0.296481,0.697602,0.176849,-0.431256,2.160513,-1.345893,0.349869,1.420749,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.255654
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1735679,1.560501e+12,0.562417,0.747711,-0.869028,-0.123092,-0.364089,0.194093,0.642367,-2.370723,4.084173,-0.705909,1.252199,1.423396,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.289443
1735680,1.560501e+12,0.600212,0.790128,-0.761443,-0.447272,-0.172782,0.276146,0.642367,-2.436466,4.238171,-0.702471,1.211102,1.424129,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.256173
1735681,1.560501e+12,0.679803,0.779619,-0.564002,-0.684603,-0.257777,0.309240,0.611365,-2.436466,4.161084,-0.699489,1.148888,1.425078,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.204516
1735682,1.560501e+12,0.668358,0.855755,-0.537991,-0.694506,-0.356393,0.267582,0.580293,-2.304979,4.084173,-0.692203,1.084712,1.425973,5d073b814a19c000086c558b,5c3c44b80379370013e0fd2b,2,F3,-0.202040


In [37]:
import scipy.stats as stats
import scipy
train_all = []

for path,train_wifi_pd_x in tqdm(train_wifi_pd.groupby('path')):
    # path = '5e15730aa280850006f3d005'
    train_y = train_xy_group[path][['path','ts_waypoint','x','y']].drop_duplicates().reset_index(drop=True)
    train_sensor = train_sensor_pd_csv_group[path][['ts_sensor', 'x_acce', 'y_acce', 'z_acce', 
       'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
       'z_gyros','x_rotate', 'y_rotate', 'z_rotate', 'path']].reset_index(drop=True)

    train_wifi_pd_x['ts_waypoint'] = 0
    if len(train_y)==0:
        print(path,'have no waypoint')
    if len(train_y)>0:
        ts_point_min = train_y.ts_waypoint.min()
        ts_point_max = train_y.ts_waypoint.max()
        tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()
        tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]
        
        ts_sensor_min = train_sensor.ts_sensor.min()
        ts_sensor_max = train_sensor.ts_sensor.max()
        tmp3 = train_wifi_pd_x[['timestamp']].drop_duplicates()
        tmp3 = tmp3[(tmp3.timestamp<=ts_sensor_max)&(tmp3.timestamp>=ts_sensor_min)]
        
        if len(tmp2)>0:
            T_rel = train_y['ts_waypoint']
            T_rel2 = train_sensor['ts_sensor']
            T_ref = tmp2['timestamp']
            T_ref2 = tmp3['timestamp']
            xy_hat = scipy.interpolate.interp1d(T_rel, train_y[['x','y']], axis=0)(T_ref)
            sensor_hat = scipy.interpolate.interp1d(T_rel2, train_sensor[[ 'x_acce', 'y_acce', 'z_acce', 
                                                   'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
                                                   'z_gyros','x_rotate', 'y_rotate', 'z_rotate']], axis=0)(T_ref2)
            tmp2[['x','y']] = xy_hat
            tmp3[[ 'x_acce', 'y_acce', 'z_acce', 
               'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
               'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = sensor_hat
            tmp2['path'] = path
            tmp3['path'] = path
            train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp2,how='left',on=['path','timestamp'])
            train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp3,how='left',on=['path','timestamp'])
            train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
               'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
               'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
               'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
               'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='ffill')
            train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
               'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
               'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
               'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
               'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='bfill')
            train_all.append(train_wifi_pd_x)
    
train_all = pd.concat(train_all).reset_index(drop=True)

100%|██████████| 10877/10877 [04:10<00:00, 43.34it/s]


In [38]:
train_all.shape

(258097, 27)

In [40]:
train_all[train_all.x.isna()].shape,train_all[train_all.y.isna()].shape

((11756, 27), (11756, 27))

In [41]:
train_all = train_all[~train_all.x.isna()].reset_index(drop=True)

In [42]:
train_all.head()

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,floor,site,wifi_len,wifi_mean,...,z_acce,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate
0,1560500997770,"[7702, 19396, 18304, 19396, 7702, 7702, 19396,...","[61027, 55262, 10121, 57287, 45809, 53865, 261...","[3.204325463643926, 3.1059258532748903, 2.9091...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.206,0.353603,...,0.461806,0.634199,0.116776,-0.11019,0.983807,-0.595578,0.852319,-0.630592,0.850756,1.353243
1,1560500999681,"[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...","[10121, 31140, 61027, 55262, 57287, 53865, 458...","[2.712327411798748, 2.712327411798748, 2.61392...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.22,0.299748,...,-0.246482,1.202698,-0.395429,-0.167547,1.256958,-0.496999,1.898839,-0.977061,0.819255,1.276234
2,1560501001590,"[18304, 19396, 7702, 7702, 19396, 7702, 12721,...","[10121, 57287, 31140, 61027, 55262, 22353, 603...","[3.1059258532748903, 3.1059258532748903, 2.810...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.238,0.268875,...,-0.704095,-0.460087,0.171733,-0.242435,1.697688,-0.036796,1.083052,-0.492361,1.059535,1.177969
3,1560501003516,"[19396, 7702, 19396, 18304, 7702, 7702, 7702, ...","[57287, 31140, 55262, 10121, 22353, 53865, 432...","[3.1059258532748903, 2.8107270221677836, 2.613...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.258,0.230216,...,0.698048,0.366486,-0.610369,-0.043591,1.107954,0.193343,0.313588,-0.572031,0.860852,1.132166
4,1560501005442,"[7702, 18304, 19396, 19396, 7702, 7702, 7702, ...","[31140, 10121, 55262, 57287, 43265, 61027, 612...","[2.8107270221677836, 2.6139278014297127, 2.613...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.282,0.210465,...,0.13602,-0.585768,-0.70765,0.853478,1.480396,-0.201118,0.544498,-0.470753,0.657864,1.078007


In [43]:
train_all.shape

(246341, 27)

In [44]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
N_SPLITS = 10
SEED = 0#42
for fold, (trn_idx, val_idx) in enumerate(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED).split(train_all['site'], train_all['site'])):
    train_all.loc[val_idx, 'fold'] = fold
    

In [45]:
# train_all[train_all.path=='5dd3824044333f00067aa2c4'].fold.value_counts()

In [46]:
# train_all[train_all.site=='5c3c44b80379370013e0fd2b'].fold.value_counts()

In [47]:
train_all.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,floor,site,wifi_len,wifi_mean,...,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,fold
0,1560500997770,"[7702, 19396, 18304, 19396, 7702, 7702, 19396,...","[61027, 55262, 10121, 57287, 45809, 53865, 261...","[3.204325463643926, 3.1059258532748903, 2.9091...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.206,0.353603,...,0.634199,0.116776,-0.11019,0.983807,-0.595578,0.852319,-0.630592,0.850756,1.353243,6.0
1,1560500999681,"[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...","[10121, 31140, 61027, 55262, 57287, 53865, 458...","[2.712327411798748, 2.712327411798748, 2.61392...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.22,0.299748,...,1.202698,-0.395429,-0.167547,1.256958,-0.496999,1.898839,-0.977061,0.819255,1.276234,8.0


In [52]:
train_all.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,floor,site,wifi_len,wifi_mean,...,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,fold
0,1560500997770,"[7702, 19396, 18304, 19396, 7702, 7702, 19396,...","[61027, 55262, 10121, 57287, 45809, 53865, 261...","[3.204325463643926, 3.1059258532748903, 2.9091...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.206,0.353603,...,0.634199,0.116776,-0.11019,0.983807,-0.595578,0.852319,-0.630592,0.850756,1.353243,6.0
1,1560500999681,"[18304, 7702, 7702, 19396, 19396, 7702, 7702, ...","[10121, 31140, 61027, 55262, 57287, 53865, 458...","[2.712327411798748, 2.712327411798748, 2.61392...",5d073b814a19c000086c558b,0.299386,F3,5c3c44b80379370013e0fd2b,0.22,0.299748,...,1.202698,-0.395429,-0.167547,1.256958,-0.496999,1.898839,-0.977061,0.819255,1.276234,8.0


In [53]:
test_wifi_pd.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,wifi_len,wifi_mean,wifi_median,wifi_std,site
0,1180,"[7007, 9522, 15215, 18669, 15215, 19396, 4851,...","[35106, 10783, 39335, 4531, 48757, 19211, 1176...","[1.9251305288464635, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.038,0.024464,-0.338061,1.033093,5da1389e4db8ce0c98bd0547
1,3048,"[18669, 9522, 7007, 19396, 15215, 15215, 1264,...","[4531, 10783, 35106, 19211, 39335, 48757, 6030...","[2.1219297495845346, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.04,0.075218,-0.338061,0.991529,5da1389e4db8ce0c98bd0547


In [54]:
test_sensor_pd_csv_group = dict(list(test_sensor_pd_csv.groupby('path',as_index=False)))


In [55]:
import scipy.stats as stats
import scipy
test_all = []

for path,train_wifi_pd_x in tqdm(test_wifi_pd.groupby('path')):
    # path = '5e15730aa280850006f3d005'
    train_sensor = test_sensor_pd_csv_group[path][['ts_sensor', 'x_acce', 'y_acce', 'z_acce', 
       'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
       'z_gyros','x_rotate', 'y_rotate', 'z_rotate', 'path']].reset_index(drop=True)

    train_wifi_pd_x['ts_waypoint'] = 0

    ts_point_min = train_sensor.ts_sensor.min()
    ts_point_max = train_sensor.ts_sensor.max()
    tmp2 = train_wifi_pd_x[['timestamp']].drop_duplicates()
    tmp2 = tmp2[(tmp2.timestamp<=ts_point_max)&(tmp2.timestamp>=ts_point_min)]
    if len(tmp2)>0:
        T_rel2 = train_sensor['ts_sensor']
        T_ref = tmp2['timestamp']
        sensor_hat = scipy.interpolate.interp1d(T_rel2, train_sensor[[ 'x_acce', 'y_acce', 'z_acce', 
                                               'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
                                               'z_gyros','x_rotate', 'y_rotate', 'z_rotate']], axis=0)(T_ref)
        tmp2[[ 'x_acce', 'y_acce', 'z_acce', 
           'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
           'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = sensor_hat
        tmp2['path'] = path
        train_wifi_pd_x = pd.merge(train_wifi_pd_x,tmp2,how='left',on=['path','timestamp'])
        train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
           'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
           'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
           'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
           'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='ffill')
        train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
           'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
           'z_gyros','x_rotate', 'y_rotate', 'z_rotate']] = train_wifi_pd_x[[ 'x_acce', 'y_acce', 'z_acce', 
           'x_magne','y_magne', 'z_magne', 'x_gyros', 'y_gyros', 
           'z_gyros','x_rotate', 'y_rotate', 'z_rotate']].fillna(method='bfill')
            
        test_all.append(train_wifi_pd_x)
    
test_all = pd.concat(test_all).reset_index(drop=True)

100%|██████████| 626/626 [00:09<00:00, 64.84it/s]


In [58]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from dask.distributed import wait

SENSORS = ['acce','acce_uncali','gyro',
           'gyro_uncali','magn','magn_uncali','ahrs']

NFEAS = {
    'acce': 3,
    'acce_uncali': 3,
    'gyro': 3,
    'gyro_uncali': 3,
    'magn': 3,
    'magn_uncali': 3,
    'ahrs': 3,
    'wifi': 1,
    'ibeacon': 1,
    'waypoint': 3
}

ACOLS = ['timestamp','x','y','z']
        
FIELDS = {
    'acce': ACOLS,
    'acce_uncali': ACOLS,
    'gyro': ACOLS,
    'gyro_uncali': ACOLS,
    'magn': ACOLS,
    'magn_uncali': ACOLS,
    'ahrs': ACOLS,
    'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],
    'ibeacon': ['timestamp','code','rssi','last_timestamp'],
    'waypoint': ['timestamp','x','y']
}

def to_frame(data, col):
    cols = FIELDS[col]
    is_dummy = False
    if data.shape[0]>0:
        df = pd.DataFrame(data, columns=cols)
    else:
        df = create_dummy_df(cols)
        is_dummy = True
    for col in df.columns:
        if 'timestamp' in col:
            df[col] = df[col].astype('int64')
    return df, is_dummy

def create_dummy_df(cols):
    df = pd.DataFrame()
    for col in cols:
        df[col] = [0]
        if col in ['ssid','bssid']:
            df[col] = df[col].map(str)
    return df

from dataclasses import dataclass

import numpy as np


@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray


def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line_data in lines:
        line_data = line_data.strip()
        if not line_data or line_data[0] == '#':
            continue

        line_data = line_data.split('\t')

        if line_data[1] == 'TYPE_ACCELEROMETER':
            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ROTATION_VECTOR':
            if len(line_data)>=5:
                ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
            wifi.append(wifi_data)
            continue

        if line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            lastts = line_data[-1]
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]
            ibeacon.append(ibeacon_data)
            continue

        if line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

In [59]:
def get_test_dfs(PATH, test_files):
    dtest = get_test_df(PATH)
    buildings = set(dtest['building'].values.tolist())
    dws = {}
    ntest_files = []
    for fname in tqdm(test_files):
        path = fname.split('/')[-1].split('.')[0]
        mask = dtest['path'] == path
        dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)
        ntest_files.append(fname)
    return dws

def get_test_df(PATH):
    dtest = pd.read_csv(f'{PATH}/sample_submission.csv')
    dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])
    dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])
    dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])
    dtest['timestamp'] = dtest['timestamp'].astype('int64')
    dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)
    return dtest

def get_time_gap(name):
    data = read_data_file(name)
    db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')
#     print(db,no_ibeacon)
    
    if no_ibeacon==0:
        gap = db['last_timestamp'] - db['timestamp']
        assert gap.unique().shape[0]==1
        return gap.values[0],no_ibeacon
    
    if no_ibeacon==1:
        # Group wifis by timestamp
        wifi_groups = pd.DataFrame(data.wifi).groupby(0)   
        # Find which one is the most recent of all time points.
        est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() 
        return est_ts,no_ibeacon

    

def fix_timestamp_test(df, gap):
    df['real_timestamp'] = df['timestamp'] + gap
    return df

In [60]:
test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')
test_files_ori[:4]

['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',
 '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',
 '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',
 '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']

In [61]:
import dask
from dask.distributed import Client, wait, LocalCluster

# set n_workers to number of cores
client = Client(n_workers=8, 
                threads_per_worker=1)
client

Port 8787 is already in use. 
Perhaps you already have a cluster running?
Hosting the diagnostics dashboard on a random port instead.


0,1
Client  Scheduler: tcp://127.0.0.1:46533  Dashboard: http://127.0.0.1:39225/status,Cluster  Workers: 8  Cores: 8  Memory: 66.71 GB


In [62]:
%%time
futures = []
for fname in tqdm(test_files_ori, total=len(test_files_ori)):
    f = client.submit(get_time_gap,fname)
    futures.append(f)
    
testpath2gap = {}
for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):
    testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()
    

100%|██████████| 626/626 [00:00<00:00, 9976.27it/s]
100%|██████████| 626/626 [00:18<00:00, 33.98it/s] 

CPU times: user 2.8 s, sys: 194 ms, total: 3 s
Wall time: 18.5 s





In [63]:
test_all['timestamp'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['timestamp'],test_all['path'])]
# test_all['ts_waypoint'] = [xx+testpath2gap[yy][0] for (xx,yy) in zip(test_all['ts_waypoint'],test_all['path'])]

In [64]:
# test_all['timestamp'] = (test_all['timestamp']-train_all_timestamp_min)/(train_all_timestamp_max-train_all_timestamp_min)

In [65]:
test_all.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,wifi_len,wifi_mean,wifi_median,wifi_std,...,z_acce,x_magne,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate
0,1573190312033,"[7007, 9522, 15215, 18669, 15215, 19396, 4851,...","[35106, 10783, 39335, 4531, 48757, 19211, 1176...","[1.9251305288464635, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.038,0.024464,-0.338061,1.033093,...,0.10945,-0.465064,-0.372143,0.303976,1.588786,0.558175,0.87003,0.07365,1.1989,0.804293
1,1573190313901,"[18669, 9522, 7007, 19396, 15215, 15215, 1264,...","[4531, 10783, 35106, 19211, 39335, 48757, 6030...","[2.1219297495845346, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.04,0.075218,-0.338061,0.991529,...,-0.080417,-0.153359,1.292601,0.500978,1.588793,0.248723,0.80081,0.203768,1.516082,0.877682


In [66]:

ss2 = StandardScaler()
ss2.fit(train_all.loc[:,['timestamp']])
train_all.loc[:,['timestamp']] = ss2.transform(train_all.loc[:,['timestamp']])
test_all.loc[:,['timestamp']] = ss2.transform(test_all.loc[:,['timestamp']])

In [67]:
# train_all_floor_min = train_all.floor.min()
# train_all_floor_max = train_all.floor.max()
# train_all['floor'] = (train_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)
# test_all['floor'] = (test_all['floor']-train_all_floor_min)/(train_all_floor_max-train_all_floor_min)

In [68]:
sitelist = list(sorted(set(train_all.site)))
sitedict = dict(zip(sitelist,range(len(sitelist))))
train_all['site_id'] = train_all['site'].apply(lambda x: sitedict[x])
test_all['site_id'] = test_all['site'].apply(lambda x: sitedict[x])


In [69]:
def MCRMSE(y_true, y_pred):
    colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)
    return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)

def gru_layer(hidden_dim, dropout):
    return L.Bidirectional(L.GRU(
        hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal'))

def pandas_list_to_array(df):
    """
    Input: dataframe of shape (x, y), containing list of length l
    Return: np.array of shape (x, l, y)
    """
    
    return np.transpose(
        np.array(df.values.tolist()),
        (0, 2, 1)
    )

def preprocess_inputs(df, cols=['ssid','bssid', 'rssi']):
    return pandas_list_to_array(
        df[cols]
    )

In [3]:
def build_model_mix(sid_size,bssid_size,site_size, seq_len=100, pred_len=2, dropout=0.2, 
                sp_dropout=0.1, embed_dim=64, hidden_dim=128, n_layers=3,lr=0.001):
    inputs = L.Input(shape=(seq_len, 3))
    input_time = L.Input(shape = (4+12,))
    input_site = L.Input(shape = (1,))
        
    categorical_fea1 = inputs[:, :, :1]
    categorical_fea2 = inputs[:, :, 1:2]
    numerical_fea = inputs[:, :, 2:]
    

    embed = L.Embedding(input_dim=sid_size, output_dim=embed_dim)(categorical_fea1)
    reshaped = tf.reshape(embed, shape=(-1, embed.shape[1],  embed.shape[2] * embed.shape[3]))
    reshaped = L.SpatialDropout1D(sp_dropout)(reshaped)
    
    embed2 = L.Embedding(input_dim=bssid_size, output_dim=embed_dim)(categorical_fea2)
    reshaped2 = tf.reshape(embed2, shape=(-1, embed2.shape[1],  embed2.shape[2] * embed2.shape[3]))
    reshaped2 = L.SpatialDropout1D(sp_dropout)(reshaped2)
    
    
    hidden = L.concatenate([reshaped, reshaped2, numerical_fea], axis=2)
    
    for x in range(n_layers):
        hidden = gru_layer(hidden_dim, dropout)(hidden)
    
    # Since we are only making predictions on the first part of each sequence, 
    # we have to truncate it
    truncated = hidden[:, :pred_len]
    truncated = L.Flatten()(truncated)
    
    embed_site = L.Embedding(input_dim=site_size, output_dim=1)(input_site)
    embed_site = L.Flatten()(embed_site)
        
    truncated = L.concatenate([truncated, input_time,embed_site], axis=1)
    
    #out = L.Dense(32, activation='linear')(truncated)
    
    out = L.Dense(2, activation='linear')(truncated)
        
    model = tf.keras.Model(inputs=[inputs,input_time,input_site], outputs=out)
    model.compile(tf.optimizers.Adam(lr), loss='mse')
    
    return model

def get_embed_size(n_cat):
    return min(600, round(1.6 * n_cat ** .56))


In [71]:
# import pickle
# with open('train_all.pickle','wb') as fw:
#     pickle.dump(train_all,fw)
# with open('test_all.pickle','wb') as fw:
#     pickle.dump(test_all,fw)

In [72]:
test_all.columns

Index(['timestamp', 'ssid', 'bssid', 'rssi', 'path', 'floorNo', 'wifi_len',
       'wifi_mean', 'wifi_median', 'wifi_std', 'site', 'ts_waypoint', 'x_acce',
       'y_acce', 'z_acce', 'x_magne', 'y_magne', 'z_magne', 'x_gyros',
       'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate', 'z_rotate', 'site_id'],
      dtype='object')

In [73]:
ss = train_all[train_all.path=='5dd4b80627889b0006b7772d']
ss.fillna(method='bfill',inplace=True)

In [74]:
ss

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,floor,site,wifi_len,wifi_mean,...,y_magne,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,fold,site_id
224653,0.707866,"[15433, 15433, 13482, 19396, 13482, 19396, 154...","[63855, 31672, 22276, 58558, 30991, 19176, 331...","[2.4171285806916414, 1.826730918477428, 1.8267...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.262,-0.325291,...,0.713641,0.364087,1.094598,-1.136477,0.8248,-1.02143,0.694993,1.35462,0.0,22
224654,0.707867,"[15433, 15433, 13482, 19396, 11875, 15433, 976...","[31672, 63855, 30991, 19176, 31883, 33131, 450...","[2.318728970322606, 1.6299316977393568, 1.3347...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.26,-0.363039,...,0.713641,0.364087,1.094598,-1.136477,0.8248,-1.02143,0.694993,1.35462,6.0,22
224655,0.707867,"[13482, 15433, 19396, 11875, 9761, 15433, 1187...","[30991, 63855, 19176, 31883, 45009, 31672, 389...","[1.3347328666322502, 1.3347328666322502, 1.236...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.26,-0.392559,...,1.250962,1.443087,0.997709,-1.189775,0.049868,-1.275551,-0.050002,1.369476,8.0,22
224656,0.707868,"[15433, 11875, 9761, 11875, 13482, 19396, 1503...","[63855, 31883, 45009, 38916, 42558, 59902, 172...","[1.3347328666322502, 0.8427348147870724, 0.645...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.258,-0.459344,...,1.017085,-1.334331,0.750805,-0.969883,-0.647448,-1.030683,-0.485144,1.381195,4.0,22
224657,0.707869,"[15433, 11875, 9761, 11875, 13482, 19396, 1348...","[63855, 31883, 45009, 38916, 42558, 59902, 265...","[1.3347328666322502, 0.8427348147870724, 0.645...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.256,-0.468747,...,-0.24606,1.20276,1.125877,-1.203099,0.514863,-0.930176,0.546333,1.365165,8.0,22
224658,0.707869,"[15433, 11875, 9761, 13482, 19396, 13482, 1939...","[63855, 31883, 45009, 26529, 62405, 15051, 153...","[1.3347328666322502, 0.8427348147870724, 0.645...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.234,-0.422163,...,0.657207,0.425335,1.844603,-1.069854,-0.492567,-1.23508,0.291654,1.367361,2.0,22
224659,0.70787,"[15433, 11875, 13482, 19396, 9761, 13482, 1939...","[63855, 31883, 26529, 62405, 45009, 15051, 153...","[1.3347328666322502, 0.8427348147870724, 0.842...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.238,-0.380232,...,-0.634699,0.024486,1.094598,-1.536361,-0.26007,-1.412949,-0.1778,1.355965,7.0,22
224660,0.707871,"[13482, 19396, 19396, 15433, 14772, 11875, 193...","[15051, 12689, 15371, 63855, 31025, 31883, 126...","[1.5315320873703213, 1.5315320873703213, 1.531...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.246,-0.278861,...,0.412877,0.254555,0.625828,-1.536361,0.359982,-1.125339,0.728169,1.377426,4.0,22
224661,0.707871,"[19396, 19396, 13482, 13482, 19396, 11875, 147...","[12690, 36864, 37924, 26529, 62405, 31883, 310...","[1.4331324770012857, 1.4331324770012857, 1.433...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.258,-0.282377,...,-0.508025,3.236964,-0.084674,-1.573146,0.058029,-1.486332,0.197214,1.448679,1.0,22
224662,0.707872,"[19396, 19396, 13482, 13482, 19396, 19396, 134...","[12690, 36864, 37924, 26529, 62405, 15371, 150...","[1.6299316977393568, 1.6299316977393568, 1.629...",5dd4b80627889b0006b7772d,1.939098,F6,5dbc1d84c1eb61796cf7c010,0.258,-0.258731,...,0.569195,0.178858,-1.374161,-0.536651,-0.570007,0.243419,-0.569259,-1.188966,5.0,22


In [75]:
train_all[train_all.x_acce.isna()].path.value_counts()

Series([], Name: path, dtype: int64)

In [76]:
import time
t1 = time.time()
pred_cols = ['x','y']
train_inputs = preprocess_inputs(train_all,cols=['ssid', 'bssid', 'rssi'])
train_inputs_time = train_all[['timestamp','floorNo','wifi_len','wifi_mean', 'x_acce',
       'y_acce', 'z_acce', 'x_magne', 'y_magne', 'z_magne', 'x_gyros',
       'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate', 'z_rotate']].values
train_inputs_site = train_all['site_id'].values
train_labels = train_all[pred_cols].values
test_inputs = preprocess_inputs(test_all,cols=['ssid','bssid', 'rssi'])
test_inputs_time = test_all[['timestamp','floorNo','wifi_len','wifi_mean', 'x_acce',
       'y_acce', 'z_acce', 'x_magne', 'y_magne', 'z_magne', 'x_gyros',
       'y_gyros', 'z_gyros', 'x_rotate', 'y_rotate', 'z_rotate']].values
test_inputs_site = test_all['site_id'].values


    
    
x_test = test_inputs
x_test_time = test_inputs_time
x_test_site = test_inputs_site

oof_xy = np.zeros(train_labels.shape)
y_test_pred = 0
for fold_id in range(N_SPLITS):
    trn_idx = train_all[train_all.fold!=fold_id].index.tolist()
    val_idx = train_all[train_all.fold==fold_id].index.tolist()
    print('begin fold:',fold_id)
    x_train, x_val = train_inputs[trn_idx],train_inputs[val_idx]
    x_train_time, x_val_time = train_inputs_time[trn_idx],train_inputs_time[val_idx]
    x_train_site, x_val_site = train_inputs_site[trn_idx],train_inputs_site[val_idx]
    y_train, y_val = train_labels[trn_idx],train_labels[val_idx]
    
    model = build_model_mix(len(ssiddict),len(bssiddict),len(sitedict),seqlen,lr=0.001)

    history = model.fit(
            [x_train,x_train_time,x_train_site], y_train,
            validation_data=([x_val,x_val_time,x_val_site], y_val),
            batch_size=128,
            epochs=100,
            verbose=1,
            callbacks=[
                tf.keras.callbacks.ReduceLROnPlateau(patience=5),
                tf.keras.callbacks.ModelCheckpoint('rnn_model_wifisensor/model_fold_{}.h5'.format(fold_id)),
                tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-4,
                                  patience=5, mode='min', restore_best_weights=True)
            ]
        )

#     model.load_weights('rnn_model_wifisensor/model_fold_{}.h5'.format(fold_id))


    y_val_pred = model.predict([x_val,x_val_time,x_val_site])
    y_test_pred += model.predict([x_test,x_test_time,x_test_site])
    oof_xy[val_idx] = y_val_pred
    print('fold',fold_id, np.mean(np.sqrt(np.sum((y_val-y_val_pred)**2,axis=1))))
    break
y_test_pred = y_test_pred/(fold_id + 1)    
train_labels_inv = (pd.DataFrame(train_labels[:,:],columns = ['x','y']))
oof_xy_pred_inv = (pd.DataFrame(oof_xy[:,:],columns = ['x','y']))
y_test_pred_inv = (pd.DataFrame(y_test_pred[:,:],columns = ['x','y']))  
print(np.mean(np.sqrt(np.sum((train_labels_inv-oof_xy_pred_inv)**2,axis=1))))

t2 = time.time()
print('elasped time:', t2 - t1)

begin fold: 0
fold 0 1.5483097549014528
151.3674636340884
elasped time: 85.34241080284119


In [77]:
print('fold',fold_id, np.mean(np.sqrt(np.sum((y_val-y_val_pred)**2,axis=1))))


fold 0 1.5483097549014528


In [78]:
test_all[['x','y']] = y_test_pred_inv

In [79]:
test_all.head(2)

Unnamed: 0,timestamp,ssid,bssid,rssi,path,floorNo,wifi_len,wifi_mean,wifi_median,wifi_std,...,z_magne,x_gyros,y_gyros,z_gyros,x_rotate,y_rotate,z_rotate,site_id,x,y
0,0.345764,"[7007, 9522, 15215, 18669, 15215, 19396, 4851,...","[35106, 10783, 39335, 4531, 48757, 19211, 1176...","[1.9251305288464635, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.038,0.024464,-0.338061,1.033093,...,0.303976,1.588786,0.558175,0.87003,0.07365,1.1989,0.804293,19,73.382919,88.722977
1,0.345765,"[18669, 9522, 7007, 19396, 15215, 15215, 1264,...","[4531, 10783, 35106, 19211, 39335, 48757, 6030...","[2.1219297495845346, 1.4331324770012857, 1.334...",00ff0c9a71cc37a2ebdd0f05,0.845957,0.04,0.075218,-0.338061,0.991529,...,0.500978,1.588793,0.248723,0.80081,0.203768,1.516082,0.877682,19,73.456726,87.362114


In [80]:
result = test_all[['timestamp','path','site','x','y']]
result['t1_wifi'] = ss2.inverse_transform(result['timestamp'])

result['t1_wifi'] = [xx-testpath2gap[yy][0] for (xx,yy) in zip(result['t1_wifi'],result['path'])]
result['path_id'] = result['site']+'_'+result['path']
result.head()

Unnamed: 0,timestamp,path,site,x,y,t1_wifi,path_id
0,0.345764,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,73.382919,88.722977,1180.0,5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
1,0.345765,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,73.456726,87.362114,3048.0,5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
2,0.345766,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,72.727478,85.721558,4924.0,5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
3,0.345766,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,72.815376,83.148605,6816.0,5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05
4,0.345767,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,73.723251,87.773392,8693.0,5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05


In [81]:
result.set_index('path_id', inplace=True)
result.head()

Unnamed: 0_level_0,timestamp,path,site,x,y,t1_wifi
path_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05,0.345764,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,73.382919,88.722977,1180.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05,0.345765,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,73.456726,87.362114,3048.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05,0.345766,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,72.727478,85.721558,4924.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05,0.345766,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,72.815376,83.148605,6816.0
5da1389e4db8ce0c98bd0547_00ff0c9a71cc37a2ebdd0f05,0.345767,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547,73.723251,87.773392,8693.0


In [82]:
from scipy.spatial.transform import Rotation as R
from PIL import Image
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objs as go
from pathlib import Path
import scipy.signal as signal
import json
import seaborn as sns # visualization
from dataclasses import dataclass

import matplotlib.pyplot as plt  # visualization
import numpy as np  # linear algebra
import random
import pandas as pd
from collections import Counter, defaultdict

plt.rcParams.update({'font.size': 14})

def split_ts_seq(ts_seq, sep_ts):
    """

    :param ts_seq:
    :param sep_ts:
    :return:
    """
    tss = ts_seq[:, 0].astype(float)
    unique_sep_ts = np.unique(sep_ts)
    ts_seqs = []
    start_index = 0
    for i in range(0, unique_sep_ts.shape[0]):
        end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')
        if start_index == end_index:
            continue
        ts_seqs.append(ts_seq[start_index:end_index, :].copy())
        start_index = end_index

    # tail data
    if start_index < ts_seq.shape[0]:
        ts_seqs.append(ts_seq[start_index:, :].copy())

    return ts_seqs


def correct_trajectory(original_xys, end_xy):
    """

    :param original_xys: numpy ndarray, shape(N, 2)
    :param end_xy: numpy ndarray, shape(1, 2)
    :return:
    """
    corrected_xys = np.zeros((0, 2))

    A = original_xys[0, :]
    B = end_xy
    Bp = original_xys[-1, :]

    angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])
    angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])
    angle_BpAB = angle_BpAX - angle_BAX
    AB = np.sqrt(np.sum((B - A) ** 2))
    ABp = np.sqrt(np.sum((Bp - A) ** 2))

    corrected_xys = np.append(corrected_xys, [A], 0)
    for i in np.arange(1, np.size(original_xys, 0)):
        angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])

        angle_CAX = angle_CpAX - angle_BpAB

        ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))

        AC = ACp * AB / ABp

        delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])

        C = delta_C + A

        corrected_xys = np.append(corrected_xys, [C], 0)

    return corrected_xys


def correct_positions(rel_positions, reference_positions):
    """

    :param rel_positions:
    :param reference_positions:
    :return:
    """
    rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])
    if len(rel_positions_list) != reference_positions.shape[0] - 1:
        # print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')
        del rel_positions_list[-1]
    assert len(rel_positions_list) == reference_positions.shape[0] - 1

    corrected_positions = np.zeros((0, 3))
    for i, rel_ps in enumerate(rel_positions_list):
        start_position = reference_positions[i]
        end_position = reference_positions[i + 1]
        abs_ps = np.zeros(rel_ps.shape)
        abs_ps[:, 0] = rel_ps[:, 0]
        # abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]
        abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]
        for j in range(1, rel_ps.shape[0]):
            abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]
        abs_ps = np.insert(abs_ps, 0, start_position, axis=0)
        corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])
        corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))
        if i == 0:
            corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)
        else:
            corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)

    corrected_positions = np.array(corrected_positions)

    return corrected_positions


def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):
    order = 4
    filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)
    zf = signal.lfilter_zi(filter_b, filter_a)
    _, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
    _, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)

    return filter_b, filter_a, filter_zf


def get_rotation_matrix_from_vector(rotation_vector):
    q1 = rotation_vector[0]
    q2 = rotation_vector[1]
    q3 = rotation_vector[2]

    if rotation_vector.size >= 4:
        q0 = rotation_vector[3]
    else:
        q0 = 1 - q1*q1 - q2*q2 - q3*q3
        if q0 > 0:
            q0 = np.sqrt(q0)
        else:
            q0 = 0

    sq_q1 = 2 * q1 * q1
    sq_q2 = 2 * q2 * q2
    sq_q3 = 2 * q3 * q3
    q1_q2 = 2 * q1 * q2
    q3_q0 = 2 * q3 * q0
    q1_q3 = 2 * q1 * q3
    q2_q0 = 2 * q2 * q0
    q2_q3 = 2 * q2 * q3
    q1_q0 = 2 * q1 * q0

    R = np.zeros((9,))
    if R.size == 9:
        R[0] = 1 - sq_q2 - sq_q3
        R[1] = q1_q2 - q3_q0
        R[2] = q1_q3 + q2_q0

        R[3] = q1_q2 + q3_q0
        R[4] = 1 - sq_q1 - sq_q3
        R[5] = q2_q3 - q1_q0

        R[6] = q1_q3 - q2_q0
        R[7] = q2_q3 + q1_q0
        R[8] = 1 - sq_q1 - sq_q2

        R = np.reshape(R, (3, 3))
    elif R.size == 16:
        R[0] = 1 - sq_q2 - sq_q3
        R[1] = q1_q2 - q3_q0
        R[2] = q1_q3 + q2_q0
        R[3] = 0.0

        R[4] = q1_q2 + q3_q0
        R[5] = 1 - sq_q1 - sq_q3
        R[6] = q2_q3 - q1_q0
        R[7] = 0.0

        R[8] = q1_q3 - q2_q0
        R[9] = q2_q3 + q1_q0
        R[10] = 1 - sq_q1 - sq_q2
        R[11] = 0.0

        R[12] = R[13] = R[14] = 0.0
        R[15] = 1.0

        R = np.reshape(R, (4, 4))

    return R


def get_orientation(R):
    flat_R = R.flatten()
    values = np.zeros((3,))
    if np.size(flat_R) == 9:
        values[0] = np.arctan2(flat_R[1], flat_R[4])
        values[1] = np.arcsin(-flat_R[7])
        values[2] = np.arctan2(-flat_R[6], flat_R[8])
    else:
        values[0] = np.arctan2(flat_R[1], flat_R[5])
        values[1] = np.arcsin(-flat_R[9])
        values[2] = np.arctan2(-flat_R[8], flat_R[10])

    return values


def compute_steps(acce_datas):
    step_timestamps = np.array([])
    step_indexs = np.array([], dtype=int)
    step_acce_max_mins = np.zeros((0, 4))
    sample_freq = 50
    window_size = 22
    low_acce_mag = 0.6
    step_criterion = 1
    interval_threshold = 250

    acce_max = np.zeros((2,))
    acce_min = np.zeros((2,))
    acce_binarys = np.zeros((window_size,), dtype=int)
    acce_mag_pre = 0
    state_flag = 0

    warmup_data = np.ones((window_size,)) * 9.81
    filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)
    acce_mag_window = np.zeros((window_size, 1))

    # detect steps according to acceleration magnitudes
    for i in np.arange(0, np.size(acce_datas, 0)):
        acce_data = acce_datas[i, :]
        acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))

        acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)
        acce_mag_filt = acce_mag_filt[0]

        acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])
        acce_mag_window = np.delete(acce_mag_window, 0)
        mean_gravity = np.mean(acce_mag_window)
        acce_std = np.std(acce_mag_window)
        mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])

        # detect valid peak or valley of acceleration magnitudes
        acce_mag_filt_detrend = acce_mag_filt - mean_gravity
        if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):
            # peak
            acce_binarys = np.append(acce_binarys, [1])
            acce_binarys = np.delete(acce_binarys, 0)
        elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):
            # valley
            acce_binarys = np.append(acce_binarys, [-1])
            acce_binarys = np.delete(acce_binarys, 0)
        else:
            # between peak and valley
            acce_binarys = np.append(acce_binarys, [0])
            acce_binarys = np.delete(acce_binarys, 0)

        if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):
            if state_flag == 0:
                acce_max[:] = acce_data[0], acce_mag_filt
                state_flag = 1
            elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (
                    acce_mag_filt > acce_max[1]):
                acce_max[:] = acce_data[0], acce_mag_filt
            elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):
                acce_max[:] = acce_data[0], acce_mag_filt
                state_flag = 1

        # choose reasonable step criterion and check if there is a valid step
        # save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]
        step_flag = False
        if step_criterion == 2:
            if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):
                step_flag = True
        elif step_criterion == 3:
            if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):
                step_flag = True
        else:
            if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:
                if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):
                    acce_min[:] = acce_data[0], acce_mag_filt
                    state_flag = 2
                    step_flag = True
                elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (
                        acce_mag_filt < acce_min[1]):
                    acce_min[:] = acce_data[0], acce_mag_filt
        if step_flag:
            step_timestamps = np.append(step_timestamps, acce_data[0])
            step_indexs = np.append(step_indexs, [i])
            step_acce_max_mins = np.append(step_acce_max_mins,
                                           [[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)
        acce_mag_pre = acce_mag_filt_detrend

    return step_timestamps, step_indexs, step_acce_max_mins


def compute_stride_length(step_acce_max_mins):
    K = 0.4
    K_max = 0.8
    K_min = 0.4
    para_a0 = 0.21468084
    para_a1 = 0.09154517
    para_a2 = 0.02301998

    stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))
    k_real = np.zeros((step_acce_max_mins.shape[0], 2))
    step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))
    stride_lengths[:, 0] = step_acce_max_mins[:, 0]
    window_size = 2
    step_timeperiod_temp = np.zeros((0, ))

    # calculate every step period - step_timeperiod unit: second
    for i in range(0, step_timeperiod.shape[0]):
        step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000
        step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])
        if step_timeperiod_temp.shape[0] > window_size:
            step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])
        step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]

    # calculate parameters by step period and acceleration magnitude variance
    k_real[:, 0] = step_acce_max_mins[:, 0]
    k_real[0, 1] = K
    for i in range(0, step_timeperiod.shape[0]):
        k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])
        k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)

    # calculate every stride length by parameters and max and min data of acceleration magnitude
    stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),
                                   np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]

    return stride_lengths


def compute_headings(ahrs_datas):
    headings = np.zeros((np.size(ahrs_datas, 0), 2))
    for i in np.arange(0, np.size(ahrs_datas, 0)):
        ahrs_data = ahrs_datas[i, :]
        rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])
        azimuth, pitch, roll = get_orientation(rot_mat)
        around_z = (-azimuth) % (2 * np.pi)
        headings[i, :] = ahrs_data[0], around_z
    return headings


def compute_step_heading(step_timestamps, headings):
    step_headings = np.zeros((len(step_timestamps), 2))
    step_timestamps_index = 0
    for i in range(0, len(headings)):
        if step_timestamps_index < len(step_timestamps):
            if headings[i, 0] == step_timestamps[step_timestamps_index]:
                step_headings[step_timestamps_index, :] = headings[i, :]
                step_timestamps_index += 1
        else:
            break
    assert step_timestamps_index == len(step_timestamps)

    return step_headings


def compute_rel_positions(stride_lengths, step_headings):
    rel_positions = np.zeros((stride_lengths.shape[0], 3))
    for i in range(0, stride_lengths.shape[0]):
        rel_positions[i, 0] = stride_lengths[i, 0]
        rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])
        rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])

    return rel_positions


def compute_step_positions(acce_datas, ahrs_datas, posi_datas):
    step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
    headings = compute_headings(ahrs_datas)
    stride_lengths = compute_stride_length(step_acce_max_mins)
    step_headings = compute_step_heading(step_timestamps, headings)
    rel_positions = compute_rel_positions(stride_lengths, step_headings)
    step_positions = correct_positions(rel_positions, posi_datas)

    return step_positions


In [83]:
sample_submission = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')


In [84]:
sample_submission['building'] = [x.split('_')[0] for x in sample_submission['site_path_timestamp']]
sample_submission['path_id'] = [x.split('_')[1] for x in sample_submission['site_path_timestamp']]
sample_submission['timestamp'] = [x.split('_')[2] for x in sample_submission['site_path_timestamp']]
samples = pd.DataFrame(sample_submission.groupby(['building','path_id'])['timestamp'].apply(lambda x: list(x)))
buildings = np.unique([x[0] for x in samples.index])
samples.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,timestamp
building,path_id,Unnamed: 2_level_1
5a0546857ecc773753327266,046cfa46be49fc10834815c6,"[0000000000009, 0000000009017, 0000000015326, ..."
5a0546857ecc773753327266,05d052dde78384b0c543d89c,"[0000000000012, 0000000005748, 0000000014654, ..."
5a0546857ecc773753327266,0c06cc9f21d172618d74c6c8,"[0000000000011, 0000000011818, 0000000019825, ..."
5a0546857ecc773753327266,146035943a1482883ed98570,"[0000000000011, 0000000004535, 0000000011498, ..."
5a0546857ecc773753327266,1ef2771dfea25d508142ba06,"[0000000000009, 0000000012833, 0000000021759, ..."


In [85]:
from scipy.interpolate import interp1d
from scipy.ndimage.filters import uniform_filter1d

colacce = ['xyz_time','x_acce','y_acce','z_acce']
colahrs = ['xyz_time','x_ahrs','y_ahrs','z_ahrs']

for building in buildings:
    print(building)
    paths = samples.loc[building].index
    # Acceleration info:
    tfm = pd.read_csv(f'indoor_testing_accel/{building}.txt',index_col=0)
    for path_id in paths:
        # Original predicted values:
        xy = result.loc[building+'_'+path_id]
        tfmi = tfm.loc[path_id]
        acce_datas = np.array(tfmi[colacce],dtype=np.float)
        ahrs_datas = np.array(tfmi[colahrs],dtype=np.float)
        posi_datas = np.array(xy[['t1_wifi','x','y']],dtype=np.float)
        # Outlier removal:
        xyout = uniform_filter1d(posi_datas,size=3,axis=0,mode='reflect')
        xydiff = np.abs(posi_datas-xyout)
        xystd = np.std(xydiff,axis=0)*3
        posi_datas = posi_datas[(xydiff[:,1]<xystd[1])&(xydiff[:,2]<xystd[2])]
        # Step detection:
        step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
        stride_lengths = compute_stride_length(step_acce_max_mins)
        # Orientation detection:
        headings = compute_headings(ahrs_datas)
        step_headings = compute_step_heading(step_timestamps, headings)
        rel_positions = compute_rel_positions(stride_lengths, step_headings)
        # Running average:
        posi_datas = uniform_filter1d(posi_datas,size=3,axis=0,mode='reflect')[0::3,:]
        # The 1st prediction timepoint should be earlier than the 1st step timepoint.
        rel_positions = rel_positions[rel_positions[:,0]>posi_datas[0,0],:]
        # If two consecutive predictions are in-between two step datapoints,
        # the last one is removed, causing error (in the "split_ts_seq" function).
        posi_index = [np.searchsorted(rel_positions[:,0], x, side='right') for x in posi_datas[:,0]]
        u, i1, i2 = np.unique(posi_index, return_index=True, return_inverse=True)
        posi_datas = np.vstack([np.mean(posi_datas[i2==i],axis=0) for i in np.unique(i2)])
        # Position correction:
        step_positions = correct_positions(rel_positions, posi_datas)
        # Interpolate for timestamps in the testing set:

        t = step_positions[:,0]
        x = step_positions[:,1]
        y = step_positions[:,2]
        fx = interp1d(t, x, kind='linear', fill_value=(x[0],x[-1]), bounds_error=False) #fill_value="extrapolate"
        fy = interp1d(t, y, kind='linear', fill_value=(y[0],y[-1]), bounds_error=False)
        # Output result:
        t0 = np.array(samples.loc[(building,path_id),'timestamp'],dtype=np.float64)
        sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'x'] = fx(t0)
        sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'y'] = fy(t0)
            
        #sample_submission.loc[(sample_submission.building==building)&(sample_submission.path_id==path_id),'floor'] = floors.loc[building+'_'+path_id,'floor']
#         break
#     break

# sample_submission[['site_path_timestamp','floor','x','y']].to_csv('submission_mix_v3.3_del_outlier.csv',index=False)

5a0546857ecc773753327266
5c3c44b80379370013e0fd2b
5d27075f03f801723c2e360f
5d27096c03f801723c31e5e0
5d27097f03f801723c320d97
5d27099f03f801723c32511d
5d2709a003f801723c3251bf
5d2709b303f801723c327472
5d2709bb03f801723c32852c
5d2709c303f801723c3299ee
5d2709d403f801723c32bd39
5d2709e003f801723c32d896
5da138274db8ce0c98bbd3d2
5da1382d4db8ce0c98bbe92e
5da138314db8ce0c98bbf3a0
5da138364db8ce0c98bc00f1
5da1383b4db8ce0c98bc11ab
5da138754db8ce0c98bca82f
5da138764db8ce0c98bcaa46
5da1389e4db8ce0c98bd0547
5da138b74db8ce0c98bd4774
5da958dd46f8266d0737457b
5dbc1d84c1eb61796cf7c010
5dc8cea7659e181adb076a3f


In [86]:
subold = pd.read_csv('submission_floor.csv')

sample_submission['floor']=subold['floor']
sample_submission[['site_path_timestamp','floor','x','y']].to_csv('submission_wifi_sensor.csv',index=False)

