You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1067 lines
49 KiB
1067 lines
49 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"1000 27 sec\n",
|
|
"2000 57 sec\n",
|
|
"3000 85 sec\n",
|
|
"4000 115 sec\n",
|
|
"5000 144 sec\n",
|
|
"6000 176 sec\n",
|
|
"7000 213 sec\n",
|
|
"8000 232 sec\n",
|
|
"9000 263 sec\n",
|
|
"10000 297 sec\n",
|
|
"read train data 322 sec\n",
|
|
"read test data 383 sec\n",
|
|
"steps per minute: 73\n",
|
|
"process acceleration data to get steps and speed 386 sec\n",
|
|
"got projected position 388 sec\n",
|
|
"prepared train coordinates 401 sec\n",
|
|
"0 5a0546857ecc773753327266 site 403 sec\n",
|
|
" 0 (1054, 1111) 406 sec\n",
|
|
" 840 (1054, 1111) 455 sec\n",
|
|
"1 5c3c44b80379370013e0fd2b site 467 sec\n",
|
|
" 0 (98, 187) 468 sec\n",
|
|
"2 5d27075f03f801723c2e360f site 469 sec\n",
|
|
" 0 (201, 226) 471 sec\n",
|
|
"3 5d27096c03f801723c31e5e0 site 474 sec\n",
|
|
" 0 (3274, 696) 476 sec\n",
|
|
" 1300 (3274, 696) 523 sec\n",
|
|
" 2600 (3274, 696) 570 sec\n",
|
|
"4 5d27097f03f801723c320d97 site 595 sec\n",
|
|
" 0 (805, 580) 597 sec\n",
|
|
"5 5d27099f03f801723c32511d site 624 sec\n",
|
|
" 0 (228, 207) 626 sec\n",
|
|
"6 5d2709a003f801723c3251bf site 627 sec\n",
|
|
" 0 (1003, 503) 629 sec\n",
|
|
"7 5d2709b303f801723c327472 site 641 sec\n",
|
|
" 0 (2625, 829) 644 sec\n",
|
|
" 660 (2625, 829) 692 sec\n",
|
|
" 1320 (2625, 829) 740 sec\n",
|
|
" 1980 (2625, 829) 789 sec\n",
|
|
"8 5d2709bb03f801723c32852c site 837 sec\n",
|
|
" 0 (3104, 1121) 841 sec\n",
|
|
" 440 (3104, 1121) 889 sec\n",
|
|
" 880 (3104, 1121) 937 sec\n",
|
|
" 1320 (3104, 1121) 985 sec\n",
|
|
" 1760 (3104, 1121) 1033 sec\n",
|
|
" 2200 (3104, 1121) 1081 sec\n",
|
|
" 2640 (3104, 1121) 1129 sec\n",
|
|
" 3080 (3104, 1121) 1177 sec\n",
|
|
"9 5d2709c303f801723c3299ee site 1179 sec\n",
|
|
" 0 (1330, 2043) 1183 sec\n",
|
|
" 480 (1330, 2043) 1229 sec\n",
|
|
" 960 (1330, 2043) 1275 sec\n",
|
|
"10 5d2709d403f801723c32bd39 site 1311 sec\n",
|
|
" 0 (4656, 914) 1314 sec\n",
|
|
" 900 (4656, 914) 1362 sec\n",
|
|
" 1800 (4656, 914) 1410 sec\n",
|
|
" 2700 (4656, 914) 1458 sec\n",
|
|
" 3600 (4656, 914) 1506 sec\n",
|
|
" 4500 (4656, 914) 1555 sec\n",
|
|
"11 5d2709e003f801723c32d896 site 1563 sec\n",
|
|
" 0 (2143, 641) 1566 sec\n",
|
|
" 1160 (2143, 641) 1613 sec\n",
|
|
"12 5da138274db8ce0c98bbd3d2 site 1653 sec\n",
|
|
" 0 (387, 202) 1655 sec\n",
|
|
"13 5da1382d4db8ce0c98bbe92e site 1656 sec\n",
|
|
" 0 (1148, 970) 1659 sec\n",
|
|
" 1020 (1148, 970) 1707 sec\n",
|
|
"14 5da138314db8ce0c98bbf3a0 site 1713 sec\n",
|
|
" 0 (963, 651) 1715 sec\n",
|
|
"15 5da138364db8ce0c98bc00f1 site 1748 sec\n",
|
|
" 0 (668, 300) 1750 sec\n",
|
|
"16 5da1383b4db8ce0c98bc11ab site 1753 sec\n",
|
|
" 0 (1466, 676) 1756 sec\n",
|
|
" 920 (1466, 676) 1804 sec\n",
|
|
"17 5da138754db8ce0c98bca82f site 1833 sec\n",
|
|
" 0 (1950, 590) 1835 sec\n",
|
|
"18 5da138764db8ce0c98bcaa46 site 1886 sec\n",
|
|
" 0 (1729, 868) 1889 sec\n",
|
|
" 1020 (1729, 868) 1937 sec\n",
|
|
"19 5da1389e4db8ce0c98bd0547 site 1970 sec\n",
|
|
" 0 (515, 294) 1971 sec\n",
|
|
"20 5da138b74db8ce0c98bd4774 site 1977 sec\n",
|
|
" 0 (1241, 1172) 1981 sec\n",
|
|
" 420 (1241, 1172) 2028 sec\n",
|
|
" 840 (1241, 1172) 2075 sec\n",
|
|
"21 5da958dd46f8266d0737457b site 2120 sec\n",
|
|
" 0 (3307, 1843) 2125 sec\n",
|
|
" 280 (3307, 1843) 2169 sec\n",
|
|
" 560 (3307, 1843) 2213 sec\n",
|
|
" 840 (3307, 1843) 2258 sec\n",
|
|
" 1120 (3307, 1843) 2302 sec\n",
|
|
" 1400 (3307, 1843) 2346 sec\n",
|
|
" 1680 (3307, 1843) 2391 sec\n",
|
|
" 1960 (3307, 1843) 2435 sec\n",
|
|
" 2240 (3307, 1843) 2479 sec\n",
|
|
" 2520 (3307, 1843) 2522 sec\n",
|
|
" 2800 (3307, 1843) 2566 sec\n",
|
|
" 3080 (3307, 1843) 2609 sec\n",
|
|
"22 5dbc1d84c1eb61796cf7c010 site 2645 sec\n",
|
|
" 0 (2102, 2418) 2651 sec\n",
|
|
" 200 (2102, 2418) 2694 sec\n",
|
|
" 400 (2102, 2418) 2738 sec\n",
|
|
" 600 (2102, 2418) 2781 sec\n",
|
|
" 800 (2102, 2418) 2825 sec\n",
|
|
" 1000 (2102, 2418) 2868 sec\n",
|
|
" 1200 (2102, 2418) 2912 sec\n",
|
|
" 1400 (2102, 2418) 2956 sec\n",
|
|
" 1600 (2102, 2418) 2999 sec\n",
|
|
" 1800 (2102, 2418) 3043 sec\n",
|
|
" 2000 (2102, 2418) 3087 sec\n",
|
|
"23 5dc8cea7659e181adb076a3f site 3109 sec\n",
|
|
" 0 (1681, 1326) 3113 sec\n",
|
|
" 400 (1681, 1326) 3159 sec\n",
|
|
" 800 (1681, 1326) 3205 sec\n",
|
|
" 1200 (1681, 1326) 3252 sec\n",
|
|
" 1600 (1681, 1326) 3298 sec\n",
|
|
"finish main fingerprinting loop (39075, 4) 3307 sec\n",
|
|
"prepared df_xy_pred 3315 sec\n",
|
|
"put predictions into df_dr - start 3328 sec\n",
|
|
"put predictions into df_dr - end 3348 sec\n",
|
|
"Finished 3422 sec\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import os\n",
|
|
"import glob\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from dataclasses import dataclass\n",
|
|
"from pathlib import Path\n",
|
|
"import time\n",
|
|
"from scipy.signal import find_peaks, savgol_filter\n",
|
|
"from numba import njit\n",
|
|
"from scipy.spatial.distance import cdist\n",
|
|
"import gc\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"######################################################################################\n",
|
|
"# part 1 - read all data #############################################################\n",
|
|
"######################################################################################\n",
|
|
"\n",
|
|
"# init timer\n",
|
|
"start_time = time.time()\n",
|
|
"\n",
|
|
"# data structure\n",
|
|
"@dataclass\n",
|
|
"class ReadData:\n",
|
|
" acce: np.ndarray\n",
|
|
" ahrs: np.ndarray\n",
|
|
" wifi: np.ndarray\n",
|
|
" waypoint: np.ndarray\n",
|
|
" SiteID: str\n",
|
|
" FileName: str\n",
|
|
" FloorNum: int\n",
|
|
"\n",
|
|
"# site decode dictionary\n",
|
|
"site_di = {'5a0546857ecc773753327266':0,'5c3c44b80379370013e0fd2b':1,'5d27075f03f801723c2e360f':2,'5d27096c03f801723c31e5e0':3,\n",
|
|
" '5d27097f03f801723c320d97':4,'5d27099f03f801723c32511d':5,'5d2709a003f801723c3251bf':6,'5d2709b303f801723c327472':7,\n",
|
|
" '5d2709bb03f801723c32852c':8,'5d2709c303f801723c3299ee':9,'5d2709d403f801723c32bd39':10,'5d2709e003f801723c32d896':11,\n",
|
|
" '5da138274db8ce0c98bbd3d2':12,'5da1382d4db8ce0c98bbe92e':13,'5da138314db8ce0c98bbf3a0':14,'5da138364db8ce0c98bc00f1':15,\n",
|
|
" '5da1383b4db8ce0c98bc11ab':16,'5da138754db8ce0c98bca82f':17,'5da138764db8ce0c98bcaa46':18,'5da1389e4db8ce0c98bd0547':19,\n",
|
|
" '5da138b74db8ce0c98bd4774':20,'5da958dd46f8266d0737457b':21,'5dbc1d84c1eb61796cf7c010':22,'5dc8cea7659e181adb076a3f':23}\n",
|
|
"\n",
|
|
"# all train sites\n",
|
|
"test_bldg = list(site_di.keys())\n",
|
|
"\n",
|
|
"# floor decode dictionary\n",
|
|
"fl_di = {'F1':0, 'F2':1, 'F3':2, 'F4':3, 'F5':4, 'F6':5, 'F7':6, 'F8':7, '1F':0, '2F':1, '3F':2,\n",
|
|
" '4F':3, '5F':4, '6F':5, '7F':6, '8F':7, '9F':8, 'B1':-1, 'B2':-2}\n",
|
|
"\n",
|
|
"# BSSID decode dictionary - construct it as data is read\n",
|
|
"BSSID_di = {}\n",
|
|
"\n",
|
|
"# this function reads one data file at a time\n",
|
|
"def read_data_file(data_filename, call_type):# call_type: 0=train, 1=test\n",
|
|
" acce = []\n",
|
|
" ahrs = []\n",
|
|
" wifi = []\n",
|
|
" waypoint = []\n",
|
|
" FloorNum = -99\n",
|
|
" ts = 0\n",
|
|
" wifi_c = 0\n",
|
|
"\n",
|
|
" with open(data_filename, 'r', encoding='utf-8') as file:\n",
|
|
" lines = file.readlines()\n",
|
|
"\n",
|
|
" # assign vals from filename\n",
|
|
" data_filename = str(data_filename).split('/')\n",
|
|
" FileName = data_filename[-1].split('.')[0]\n",
|
|
"\n",
|
|
" if call_type == 0: # train data, infer from path\n",
|
|
" SiteID = data_filename[-3]\n",
|
|
" FloorNum = fl_di[data_filename[-2]]\n",
|
|
" \n",
|
|
" for line_data in lines:\n",
|
|
" line_data = line_data.strip()\n",
|
|
" if not line_data or line_data[0] == '#':\n",
|
|
" # read metadata\n",
|
|
" if 'startTime' in line_data:\n",
|
|
" ld2 = line_data[10 + line_data.find('startTime'):]\n",
|
|
" ld2 = ld2.split('\\t')\n",
|
|
" ld2 = ld2[0].split(':')\n",
|
|
" startTime = int(ld2[0])\n",
|
|
" if 'SiteID' in line_data:\n",
|
|
" ld2 = line_data.split(':')\n",
|
|
" ld2 = ld2[1].split('\\t')\n",
|
|
" SiteID = ld2[0]\n",
|
|
" if 'FloorName' in line_data:\n",
|
|
" ld2 = line_data[line_data.find('FloorName'):]\n",
|
|
" ld2 = ld2.split(':')\n",
|
|
" if FloorNum == -99 and ld2[1] != '':\n",
|
|
" FloorNum = fl_di[ld2[1]]\n",
|
|
" continue\n",
|
|
"\n",
|
|
" line_data = line_data.split('\\t')\n",
|
|
"\n",
|
|
" if len(line_data) < 5: # correct data error\n",
|
|
" line_data.append(0)\n",
|
|
"\n",
|
|
" if call_type > 0 and line_data[1] == 'TYPE_ACCELEROMETER': # only need this for test data. Get tot acce - that is all i need\n",
|
|
" a = np.sqrt(float(line_data[2])**2 + float(line_data[3])**2 + float(line_data[4])**2)\n",
|
|
" acce.append([int(line_data[0])-startTime, a])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if call_type > 0 and line_data[1] == 'TYPE_ROTATION_VECTOR': # only need this for test data\n",
|
|
" ahrs.append([int(line_data[0])-startTime, float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_WIFI':\n",
|
|
" sys_ts = int(line_data[0])-startTime\n",
|
|
" bssid_t = line_data[3]\n",
|
|
" rssi = line_data[4]\n",
|
|
"\n",
|
|
" #skip wifis after 20 per timestamp\n",
|
|
" if sys_ts == ts:\n",
|
|
" wifi_c += 1\n",
|
|
" else:\n",
|
|
" wifi_c = 0\n",
|
|
" ts = sys_ts\n",
|
|
" if wifi_c > 20:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" bssid = (BSSID_di.get(bssid_t) or -1)\n",
|
|
" if bssid == -1: # add each new bssid to the dictionary\n",
|
|
" BSSID_di[bssid_t] = 1 + len(BSSID_di)\n",
|
|
" bssid = BSSID_di[bssid_t]\n",
|
|
" \n",
|
|
" wifi_data = [int(sys_ts), bssid, int(rssi)]\n",
|
|
" wifi.append(wifi_data)\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_WAYPOINT':\n",
|
|
" waypoint.append([int(line_data[0])-startTime, float(line_data[2]), float(line_data[3])])\n",
|
|
"\n",
|
|
" acce = np.array(acce, dtype=np.float32)\n",
|
|
" ahrs = np.array(ahrs, dtype=np.float32)\n",
|
|
" wifi = np.array(wifi, dtype=np.int32)\n",
|
|
" waypoint = np.array(waypoint, dtype=np.float32)\n",
|
|
" return ReadData(acce, ahrs, wifi, waypoint, SiteID, FileName, FloorNum)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# read train data - prepare data objects\n",
|
|
"misc_tr = pd.DataFrame()\n",
|
|
"waypoint_tr = np.zeros([75278, 5], dtype=np.float32)\n",
|
|
"wifi_tr = np.zeros([5385467, 6], dtype=np.int32)\n",
|
|
"train_waypoints = pd.DataFrame()\n",
|
|
"misc_tr = pd.DataFrame({'waypoint_s':np.zeros(10877, dtype=np.int32)})\n",
|
|
"misc_tr['wifi_s'] = 0\n",
|
|
"misc_tr['ahrs_s'] = 0\n",
|
|
"misc_tr['Floor'] = 0\n",
|
|
"misc_tr['Site'] = ''\n",
|
|
"misc_tr['PathName'] = ''\n",
|
|
"misc_tr['path'] = 0\n",
|
|
"wifi_s = i = waypoint_s = 0\n",
|
|
"\n",
|
|
"# read train data\n",
|
|
"data_path = Path('../input/indoor-location-navigation/train')\n",
|
|
"floorplans = []\n",
|
|
"\n",
|
|
"# select buildings in test\n",
|
|
"for f in sorted(glob.glob(f'{data_path}/*/*')):\n",
|
|
" if f.split('/')[-2] in test_bldg:\n",
|
|
" floorplans.append(f)\n",
|
|
"paths = {fp:glob.glob(f'{fp}/*.txt') for fp in floorplans}\n",
|
|
"\n",
|
|
"# loop over all sites\n",
|
|
"for p in paths:\n",
|
|
" for f in os.listdir(p):\n",
|
|
" data = read_data_file(os.path.join(p, f), 0)\n",
|
|
" \n",
|
|
" if data.waypoint.shape[0] > 0:\n",
|
|
" df = pd.DataFrame({'x':data.waypoint[:,1], 'y':data.waypoint[:,2], 'site':data.SiteID, 'floor':data.FloorNum, 'path':i, 'pathName':data.FileName})\n",
|
|
" train_waypoints = train_waypoints.append(df)\n",
|
|
" \n",
|
|
" waypoint_tr[waypoint_s:waypoint_s + data.waypoint.shape[0], 0:3] = data.waypoint\n",
|
|
" waypoint_tr[waypoint_s:waypoint_s + data.waypoint.shape[0], 3] = i\n",
|
|
" waypoint_tr[waypoint_s:waypoint_s + data.waypoint.shape[0], 4] = data.FloorNum\n",
|
|
" waypoint_s += data.waypoint.shape[0]\n",
|
|
"\n",
|
|
" if data.wifi.shape[0] > 0:\n",
|
|
" wifi_tr[wifi_s:wifi_s + data.wifi.shape[0], 0] = data.wifi[:,0]\n",
|
|
" wifi_tr[wifi_s:wifi_s + data.wifi.shape[0], 2] = data.wifi[:,1]\n",
|
|
" wifi_tr[wifi_s:wifi_s + data.wifi.shape[0], 3] = data.wifi[:,2]\n",
|
|
" wifi_tr[wifi_s:wifi_s + data.wifi.shape[0], 4] = i\n",
|
|
" wifi_tr[wifi_s:wifi_s + data.wifi.shape[0], 5] = data.FloorNum\n",
|
|
" wifi_s += data.wifi.shape[0]\n",
|
|
"\n",
|
|
" misc_tr['wifi_s'].iat[i] = wifi_s\n",
|
|
" misc_tr['waypoint_s'].iat[i] = waypoint_s\n",
|
|
" misc_tr['Floor'].iat[i] = data.FloorNum\n",
|
|
" misc_tr['Site'].iat[i] = data.SiteID\n",
|
|
" misc_tr['PathName'].iat[i] = data.FileName\n",
|
|
" misc_tr['path'].iat[i] = i\n",
|
|
"\n",
|
|
" if i > 0 and i%1000 == 0:\n",
|
|
" print(i, int(time.time() - start_time), 'sec')\n",
|
|
" i += 1\n",
|
|
"print('read train data', int(time.time() - start_time), 'sec')\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# read test data - prepare data objects\n",
|
|
"misc_te = pd.DataFrame()\n",
|
|
"ahrs = np.zeros([3819802, 9], dtype=np.float32)\n",
|
|
"acce = np.zeros([3819802, 2], dtype=np.float32)\n",
|
|
"wifi_te = np.zeros([790894, 6], dtype=np.int32)\n",
|
|
"misc_te = pd.DataFrame({'waypoint_s':np.zeros(626, dtype=np.int32)})\n",
|
|
"misc_te['wifi_s'] = 0\n",
|
|
"misc_te['ahrs_s'] = 0\n",
|
|
"misc_te['Floor'] = 0\n",
|
|
"misc_te['Site'] = ''\n",
|
|
"misc_te['PathName'] = ''\n",
|
|
"misc_te['path'] = 0\n",
|
|
"path_di = {}\n",
|
|
"wifi_s = i = ahrs_s = 0\n",
|
|
"\n",
|
|
"# read test data\n",
|
|
"data_path = Path('../input/indoor-location-navigation/test')\n",
|
|
"for f in os.listdir(data_path):\n",
|
|
" data = read_data_file(os.path.join(data_path, f), 1)\n",
|
|
" path_di[f[:-4]] = i # need this for encoding final submission\n",
|
|
"\n",
|
|
" if data.ahrs.shape[0] > 0:\n",
|
|
" ahrs[ahrs_s:ahrs_s + data.ahrs.shape[0], 8] = site_di[data.SiteID]\n",
|
|
" ahrs[ahrs_s:ahrs_s + data.ahrs.shape[0], 0:4] = data.ahrs\n",
|
|
" ahrs[ahrs_s:ahrs_s + data.ahrs.shape[0], 4] = i\n",
|
|
" acce[ahrs_s:ahrs_s + data.ahrs.shape[0], :] = data.acce\n",
|
|
" ahrs_s += data.ahrs.shape[0]\n",
|
|
"\n",
|
|
" if data.wifi.shape[0] > 0:\n",
|
|
" wifi_te[wifi_s:wifi_s + data.wifi.shape[0], 0] = data.wifi[:,0]\n",
|
|
" wifi_te[wifi_s:wifi_s + data.wifi.shape[0], 2] = data.wifi[:,1]\n",
|
|
" wifi_te[wifi_s:wifi_s + data.wifi.shape[0], 3] = data.wifi[:,2]\n",
|
|
" wifi_te[wifi_s:wifi_s + data.wifi.shape[0], 4] = i + 100000 # to separate test from train\n",
|
|
" wifi_s += data.wifi.shape[0]\n",
|
|
"\n",
|
|
" misc_te['wifi_s'].iat[i] = wifi_s\n",
|
|
" misc_te['ahrs_s'].iat[i] = ahrs_s\n",
|
|
" misc_te['Site'].iat[i] = data.SiteID\n",
|
|
" misc_te['PathName'].iat[i] = data.FileName\n",
|
|
" misc_te['path'].iat[i] = i + 100000 # to make path unique\n",
|
|
" i += 1\n",
|
|
"print('read test data', int(time.time() - start_time), 'sec')\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# read sample submission\n",
|
|
"sub = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
|
|
"tss = sub['site_path_timestamp'].str.split('_')\n",
|
|
"sub['path'] = tss.apply(lambda x: x[1]).map(path_di).astype('int32')\n",
|
|
"sub['ts'] = tss.apply(lambda x: x[2]).astype('int32')\n",
|
|
"sub = sub.sort_values(by=['path', 'ts']).reset_index(drop=True)\n",
|
|
"misc_te['waypoint_s'] = sub.groupby('path').size().reset_index()[0].cumsum()\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"######################################################################################\n",
|
|
"# part 2 - make relative prediction (dead reckoning) for test paths###################\n",
|
|
"######################################################################################\n",
|
|
"\n",
|
|
"# dead reckoning parameters\n",
|
|
"ang_lim = 19.5\n",
|
|
"h = 10.59\n",
|
|
"min_dist = 22\n",
|
|
"step_length = 0.6717\n",
|
|
"v_min = 0.02666536\n",
|
|
"window = 33\n",
|
|
"v_max = 1.4798\n",
|
|
"p1 = 0.116315\n",
|
|
"p2 = 0.03715\n",
|
|
"\n",
|
|
"# process acceleration data to get steps and speed\n",
|
|
"acce[:,1] = savgol_filter(acce[:,1], 15, 1)\n",
|
|
"peak_times, _ = find_peaks(acce[:,1], height=h, distance=min_dist)\n",
|
|
"peak_times = np.round(peak_times, 0).astype(np.int32)\n",
|
|
"print('steps per minute:', int(.5 + 60 * peak_times.shape[0] * 50 / acce.shape[0]))\n",
|
|
"\n",
|
|
"# set speed\n",
|
|
"v = np.zeros(ahrs.shape[0], dtype=np.float32)\n",
|
|
"i = v0 = 0\n",
|
|
"for j in range(peak_times.shape[0] - 1):\n",
|
|
" v[i:peak_times[j]] = v0\n",
|
|
" i = peak_times[j]\n",
|
|
" f = acce[peak_times[j]:peak_times[j+1],1]\n",
|
|
" f = f.std()\n",
|
|
" v0 = 50 * (p1 * f + step_length - p2 * np.sqrt(peak_times[j+1] - peak_times[j])) / (peak_times[j+1] - peak_times[j])\n",
|
|
"v[i:] = v0\n",
|
|
"v = savgol_filter(v, window, 1) # smooth speed\n",
|
|
"v = np.minimum(v_max, np.maximum(v_min, v)) # cap/floor\n",
|
|
"print('process acceleration data to get steps and speed', int(time.time() - start_time), 'sec')\n",
|
|
"\n",
|
|
"# process ahrs data\n",
|
|
"cos = np.sqrt(1 - np.minimum(0.9999999, (ahrs[:,1:4] * ahrs[:,1:4]).sum(axis=1)))\n",
|
|
"x = 2 * (ahrs[:,1] * ahrs[:,2] - ahrs[:,3] * cos)\n",
|
|
"y = 1 - 2 * (ahrs[:,1] * ahrs[:,1] + ahrs[:,3] * ahrs[:,3])\n",
|
|
"norm = np.sqrt(x * x + y * y)\n",
|
|
"x = x / norm\n",
|
|
"y = y / norm\n",
|
|
"\n",
|
|
"# rotate by an angle\n",
|
|
"ang = np.arctan2(x,y) * 180 / 3.14159 # degrees\n",
|
|
"\n",
|
|
"# this is rotation that places most points into +-10 degrees of cardinal directions\n",
|
|
"ang_rot_site = [ 15, -33, -5, -33, -25, 6, 11, 3, -17, 1, 11, -2, -39, -1, 0, -44, 8, 1, 2, 0, -14, 5, 40, -27]\n",
|
|
"for i in range(24): # loop over sites\n",
|
|
" ang_rot = ang_rot_site[i]\n",
|
|
" idx2 = (i == ahrs[:,8])\n",
|
|
" # if close to cardinal direction, assume it is equal to that direction\n",
|
|
" # north\n",
|
|
" idx = idx2 & (np.abs(ang-ang_rot) < ang_lim)\n",
|
|
" ang[idx] = 0 + ang_rot\n",
|
|
" # south\n",
|
|
" idx = idx2 & (np.abs(np.abs(ang-ang_rot) - 180) < ang_lim)\n",
|
|
" ang[idx] = 180 + ang_rot\n",
|
|
" # east\n",
|
|
" idx = idx2 & (np.abs(ang-ang_rot - 90) < ang_lim)\n",
|
|
" ang[idx] = 90 + ang_rot\n",
|
|
" # west\n",
|
|
" idx = idx2 & (np.abs(ang-ang_rot + 90) < ang_lim)\n",
|
|
" ang[idx] = -90 + ang_rot\n",
|
|
"ang_inc_site = [-2.0, 10.0, -1.5, -7.0, -4.5, -7.5, -2.5, -9.0, -10.0, -3.0, -6.5, -9.5, -7.0, -0.5, -5.0, -6.0, -8.0, 0.5, -5.0, -1.5, -1.0, -10.0, 0.0, -0.5]\n",
|
|
"for i in range(24): # loop over sites\n",
|
|
" idx = (i == ahrs[:,8])\n",
|
|
" ang[idx] = ang[idx] + ang_inc_site[i]\n",
|
|
" \n",
|
|
"# restate x/y using rotated coords\n",
|
|
"x = np.sin(ang / 180 * 3.14159)\n",
|
|
"y = np.cos(ang / 180 * 3.14159)\n",
|
|
"\n",
|
|
"# get projected position\n",
|
|
"ahrs[:,5] = (v * np.append(0, x[:-1] * (ahrs[1:,0] - ahrs[:-1,0]) / 1000)).cumsum()\n",
|
|
"ahrs[:,6] = (v * np.append(0, y[:-1] * (ahrs[1:,0] - ahrs[:-1,0]) / 1000)).cumsum()\n",
|
|
"print('got projected position', int(time.time() - start_time), 'sec')\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# indices of waypoints - only use them for finding intersecting paths\n",
|
|
"path1 = np.array(sub['path'].astype('int64'))\n",
|
|
"ts1 = np.array(sub['ts'].astype('int64'))\n",
|
|
"i1 = path1 * 10000000 + ts1\n",
|
|
"path2 = ahrs[:,4].astype(np.int64)\n",
|
|
"ts2 = ahrs[:,0].astype(np.int64)\n",
|
|
"i2 = path2 * 10000000 + ts2\n",
|
|
"indices = []\n",
|
|
"m0 = 0\n",
|
|
"for i in range(sub.shape[0]):\n",
|
|
" m = m0 + (i2[m0:m0 + 20000] >= i1[i]).argmax()\n",
|
|
" if np.abs(i1[i] - i2[m]) > 100000: # use last point from correct path\n",
|
|
" m -= 1\n",
|
|
" indices.append(m)\n",
|
|
" m0 = m\n",
|
|
"\n",
|
|
"# select waypoints only, and get the closest position to each one.\n",
|
|
"subl = sub.copy()\n",
|
|
"subl['x'] = ahrs[indices,5] # projected position for waypoints\n",
|
|
"subl['y'] = ahrs[indices,6]\n",
|
|
"\n",
|
|
"# find intersecting paths\n",
|
|
"misc_te2 = misc_te.copy()\n",
|
|
"misc_te2['path'] = misc_te['path'] - 100000 # turn it into normal path, for merging\n",
|
|
"subl = subl.merge(misc_te2[['path','waypoint_s','ahrs_s']], how='left', on='path')\n",
|
|
"res = []\n",
|
|
"for i1 in range(subl.shape[0] - 2):\n",
|
|
" for j1 in range(i1+2, subl.shape[0]):\n",
|
|
" if subl['path'].iat[i1] != subl['path'].iat[j1]:\n",
|
|
" break\n",
|
|
" dt = subl['ts'].iat[j1] - subl['ts'].iat[i1]\n",
|
|
" if dt > 3700:\n",
|
|
" dt = max(1, dt) / 1000\n",
|
|
" d = np.sqrt((subl['x'].iat[i1] - subl['x'].iat[j1])**2 + (subl['y'].iat[i1] - subl['y'].iat[j1])**2)\n",
|
|
" if d < 6.54 and d / dt < 0.064:\n",
|
|
" res.append([i1, j1, subl['path'].iat[i1], subl['waypoint_s'].iat[i1], indices[i1], indices[j1], subl['ahrs_s'].iat[i1]])\n",
|
|
" break # no tripples - move on to next i\n",
|
|
"res = pd.DataFrame(res)\n",
|
|
"res.columns = ['i', 'j', 'path', 'waypoint_s', 'i2', 'j2', 'ahrs_s']\n",
|
|
"\n",
|
|
"# correct intersecting paths\n",
|
|
"for k1 in range(res.shape[0]):\n",
|
|
" i, j, path, waypoint_s , i2, j2, ahrs_s = res.iloc[k1]\n",
|
|
" ts = np.array(subl['ts'].iloc[i:j+1])\n",
|
|
" ts = ts - ts[0]\n",
|
|
" ts = ts / ts[-1]\n",
|
|
" mult = np.append(ts, np.ones(waypoint_s - 1 - j))\n",
|
|
" subl['x'].iloc[i:waypoint_s] += (subl['x'].iloc[i] - subl['x'].iloc[j]) * mult\n",
|
|
" subl['y'].iloc[i:waypoint_s] += (subl['y'].iloc[i] - subl['y'].iloc[j]) * mult\n",
|
|
"\n",
|
|
" ts = np.array(ahrs[i2:j2+1, 0])\n",
|
|
" ts = ts - ts[0]\n",
|
|
" ts = ts / ts[-1]\n",
|
|
" mult = np.append(ts, np.ones(ahrs_s - 1 - j2))\n",
|
|
" ahrs[i2:ahrs_s, 5] += (ahrs[i2, 5] - ahrs[j2, 5]) * mult\n",
|
|
" ahrs[i2:ahrs_s, 6] += (ahrs[i2, 6] - ahrs[j2, 6]) * mult\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"######################################################################################\n",
|
|
"# part 3 - fingerprinting ############################################################\n",
|
|
"######################################################################################\n",
|
|
"\n",
|
|
"# assign coordinates to each train wifi point (interpolate between waypoints)\n",
|
|
"wifi_s = waypoint_s = 0\n",
|
|
"wifi_xy = np.zeros([wifi_tr.shape[0], 2], dtype=np.float32)\n",
|
|
"for i in range(misc_tr.shape[0]):\n",
|
|
" wifi = misc_tr['wifi_s'].iat[i] - wifi_s\n",
|
|
" waypoint = misc_tr['waypoint_s'].iat[i] - waypoint_s\n",
|
|
" waypoints = waypoint_tr[waypoint_s:waypoint_s + waypoint, :]\n",
|
|
" waypoints_t = waypoints[:,0].astype(np.int32)\n",
|
|
" # here each t is repeated many times - loop over distinct t values\n",
|
|
" values, counts = np.unique(wifi_tr[wifi_s:wifi_s+wifi,0], return_counts=True)\n",
|
|
" j = 0\n",
|
|
" for c in range(values.shape[0]):\n",
|
|
" t = values[c]\n",
|
|
" if t <= waypoints_t[0]:\n",
|
|
" k1 = 0\n",
|
|
" k2 = k1\n",
|
|
" w1 = 1\n",
|
|
" elif t >= waypoints_t[-1]:\n",
|
|
" k1 = waypoints_t.shape[0] - 1\n",
|
|
" k2 = k1\n",
|
|
" w1 = 1\n",
|
|
" else:\n",
|
|
" k2 = ((waypoints_t - t) > 0).argmax()\n",
|
|
" k1 = k2 - 1\n",
|
|
" w1 = (waypoints_t[k2] - t)/ (waypoints_t[k2] - waypoints_t[k1])\n",
|
|
" wifi_xy[wifi_s:wifi_s+counts[c], 0] = waypoint_tr[waypoint_s + k1, 1] * w1 + waypoint_tr[waypoint_s + k2, 1] * (1 - w1)\n",
|
|
" wifi_xy[wifi_s:wifi_s+counts[c], 1] = waypoint_tr[waypoint_s + k1, 2] * w1 + waypoint_tr[waypoint_s + k2, 2] * (1 - w1)\n",
|
|
" j += counts[c]\n",
|
|
" wifi_s +=counts[c]\n",
|
|
" waypoint_s += waypoint\n",
|
|
"print('prepared train coordinates', int(time.time() - start_time), 'sec')\n",
|
|
"\n",
|
|
"# function for data formatting: construct unique index\n",
|
|
"@njit\n",
|
|
"def f2id(ts, path):# ts, path: count unique combos of ts/path\n",
|
|
" j = 0\n",
|
|
" index = np.zeros(ts.shape[0], dtype=np.int32)\n",
|
|
" for i in range(1, ts.shape[0]):\n",
|
|
" if ts[i] != ts[i-1] or path[i] != path[i-1]:\n",
|
|
" j = j + 1\n",
|
|
" index[i] = j\n",
|
|
" return index\n",
|
|
"\n",
|
|
"# put id in wifi data\n",
|
|
"wifi_tr[:,1] = f2id(wifi_tr[:,0], wifi_tr[:,4])\n",
|
|
"wifi_te[:,1] = 1000000 + f2id(wifi_te[:,0], wifi_te[:,4]) # make test separable from train by adding 1M\n",
|
|
"\n",
|
|
"# only keep bssids that are in train data\n",
|
|
"bssids = set(wifi_tr[:,2])\n",
|
|
"rows = [i for i in range(wifi_te.shape[0]) if wifi_te[i,2] in bssids]\n",
|
|
"wifi_te = wifi_te[rows,:]\n",
|
|
"\n",
|
|
"# combine train and test data\n",
|
|
"wifi_xy = np.append(wifi_xy, np.zeros([wifi_tr.shape[0], wifi_xy.shape[1]], dtype=np.float32)).reshape(-1, wifi_xy.shape[1])\n",
|
|
"wifi_tr = np.append(wifi_tr, wifi_te).reshape(-1, wifi_tr.shape[1]) \n",
|
|
"misc_tr = misc_tr.append(misc_te)\n",
|
|
"\n",
|
|
"# save\n",
|
|
"wifi_tr0 = wifi_tr.copy()\n",
|
|
"wifi_xy0 = wifi_xy.copy()\n",
|
|
"\n",
|
|
"# loop over all sites******************************************************************\n",
|
|
"df1_tot = pd.DataFrame()\n",
|
|
"site_id = 0\n",
|
|
"for site in misc_tr['Site'].unique():\n",
|
|
" if site == '':\n",
|
|
" break\n",
|
|
" print(site_id, site, 'site', int(time.time() - start_time), 'sec')\n",
|
|
" site_id += 1\n",
|
|
" \n",
|
|
" # select current site only\n",
|
|
" paths = set(misc_tr['path'].loc[misc_tr['Site'] == site])\n",
|
|
" rows = [i for i in range(wifi_tr0.shape[0]) if wifi_tr0[i,4] in paths]\n",
|
|
" wifi_tr = wifi_tr0[rows,:].copy()\n",
|
|
" wifi_xy = wifi_xy0[rows,:].copy()\n",
|
|
"\n",
|
|
" # only keep bssids that are present in both train and val\n",
|
|
" bssids = set(wifi_tr[wifi_tr[:,1] >= 1000000,2])\n",
|
|
" bssids2 = set(wifi_tr[wifi_tr[:,1] < 1000000,2])\n",
|
|
" bssids = bssids.intersection(bssids2)\n",
|
|
" rows = [i for i in range(wifi_tr.shape[0]) if wifi_tr[i,2] in bssids]\n",
|
|
" wifi_tr = wifi_tr[rows,:]\n",
|
|
" wifi_xy = wifi_xy[rows,:]\n",
|
|
"\n",
|
|
" # renumber bssids\n",
|
|
" bssids = pd.DataFrame({'bssid':wifi_tr[:,2]})\n",
|
|
" wifi_tr[:,2] = np.array(bssids['bssid'].astype('category').cat.codes)\n",
|
|
"\n",
|
|
" # format data\n",
|
|
" df = pd.DataFrame(wifi_tr[:,[0, 1, 2, 3, 4, 5]])\n",
|
|
" df.columns = ['ts', 'id', 'bssid','rssi','path','f']\n",
|
|
" df['x'] = wifi_xy[:,0]\n",
|
|
" df['y'] = wifi_xy[:,1]\n",
|
|
" x = pd.pivot_table(df, values='rssi', index='id', columns='bssid', aggfunc=np.sum, fill_value=-10000).reset_index()\n",
|
|
"\n",
|
|
" # split into train/valid\n",
|
|
" x_tr = np.array(x.loc[x['id'] < 1000000], dtype=np.int32)\n",
|
|
" x_val = np.array(x.loc[x['id'] >= 1000000], dtype=np.int32)\n",
|
|
"\n",
|
|
" # process all val points in 1 pass\n",
|
|
" x_val2 = x_val.reshape(-1)\n",
|
|
" x_val2[x_val2 == -10000] = 10000\n",
|
|
" x_val = x_val2.reshape(x_val.shape)\n",
|
|
"\n",
|
|
" # process in chunks\n",
|
|
" x_val0 = x_val.copy() # save\n",
|
|
" chunk_size = int(5.e9 / 3. / 4. / x_tr.shape[0] / x_tr.shape[1]) # back into 5 Gb total\n",
|
|
" id1 = x_tr[:,0] # id of tr points\n",
|
|
" x_tr = x_tr[:,1:] # drop id\n",
|
|
" x_tr = x_tr.reshape(x_tr.shape[0], 1, x_tr.shape[1])\n",
|
|
" for i in range(1 + x_val.shape[0]//chunk_size): # loop over chunks\n",
|
|
" if i%20 == 0:\n",
|
|
" print(' ', i * chunk_size, x_val0.shape, int(time.time() - start_time), 'sec')\n",
|
|
" \n",
|
|
" x_val = x_val0[i*chunk_size:(i+1)*chunk_size,:].copy()\n",
|
|
" \n",
|
|
" id0 = x_val[:,0] # id of val points\n",
|
|
" x_val = x_val[:,1:] # drop id\n",
|
|
" x_val = x_val.reshape(1, x_val.shape[0], x_val.shape[1])\n",
|
|
"\n",
|
|
" # find closest match of rec in x_tr\n",
|
|
" x1 = np.abs(x_tr - x_val)\n",
|
|
" x1a = x1 < 200\n",
|
|
" # count of bssid matches\n",
|
|
" x2 = x1a.sum(axis=-1)\n",
|
|
" # diff for matched bssids\n",
|
|
" x3 = (x1a * x1).sum(axis=-1)\n",
|
|
" \n",
|
|
" # turn results into dataframe\n",
|
|
" df1 = pd.DataFrame({'id0':np.tile(id0, id1.shape[0]), 'cc':x2.ravel(), 'id':np.repeat(id1, id0.shape[0]), 'diff':x3.ravel()})\n",
|
|
" \n",
|
|
" # select closest matches for each match count\n",
|
|
" df1['m'] = 28 * df1['cc'] - df1['diff']\n",
|
|
" df2 = df1.groupby(['id0'])['m'].max().reset_index()\n",
|
|
" df2.columns = ['id0','m2']\n",
|
|
" df1 = df1.merge(df2, how='left', on='id0')\n",
|
|
" df1 = df1.loc[df1['m'] >= df1['m2']].reset_index(drop=True)\n",
|
|
" df1.drop(['m2', 'm'], axis=1, inplace=True)\n",
|
|
"\n",
|
|
" # append to total\n",
|
|
" df1_tot = df1_tot.append(df1)\n",
|
|
"print('finish main fingerprinting loop', df1_tot.shape, int(time.time() - start_time), 'sec')\n",
|
|
"del x3, x2, x1a, x1, x_val, x_tr, x_val0\n",
|
|
"gc.collect()\n",
|
|
"\n",
|
|
"# bring in coordinates\n",
|
|
"df = pd.DataFrame(wifi_tr0[:,[0, 1, 2, 3, 4, 5]])\n",
|
|
"df.columns = ['ts', 'id', 'bssid','rssi', 'path','f']\n",
|
|
"df['x'] = wifi_xy0[:df.shape[0],0]\n",
|
|
"df['y'] = wifi_xy0[:df.shape[0],1]\n",
|
|
"df_xy = df.groupby('id')[['x','y','f']].mean().reset_index()\n",
|
|
"df1_tot = df1_tot.merge(df_xy, how='left', on='id')\n",
|
|
"\n",
|
|
"# weight parameters\n",
|
|
"cc_di = {} # multiple of cc, tabulated\n",
|
|
"cc_l = [1,1,1,1,1,1,1,1,1,1,1.2,37,60,60,230,260,260,273,440,440,720,720]\n",
|
|
"for i in range(22):\n",
|
|
" cc_di[i] = cc_l[i]\n",
|
|
"diff_mult = 23.9\n",
|
|
"\n",
|
|
"# make predicted floor the same for all points on the same path\n",
|
|
"def f_pred_path(dft):# this replaces f1 with average floor per path\n",
|
|
" dft1 = pd.DataFrame(wifi_tr0[:,[1, 4]])\n",
|
|
" dft1.columns = ['id0', 'path']\n",
|
|
" dft1 = dft1.loc[dft1['path'] >= 100000] # select test from total\n",
|
|
" dft2 = dft1.groupby('id0').mean().reset_index()\n",
|
|
" dft3 = dft[['id0','f1']].merge(dft2, how='left', on='id0')\n",
|
|
" dft4 = dft3.groupby('path')['f1'].mean().reset_index()\n",
|
|
" dft4['f1'] = np.round(dft4['f1'], 0).astype('int32') # round to nearest. path, f1 - no dups.\n",
|
|
" dft.drop('f1', axis=1, inplace=True)\n",
|
|
" dft5 = dft2.merge(dft4, how='inner', on='path') # id0, path, f1\n",
|
|
" dft = dft.merge(dft5[['id0','f1']], how='left', on='id0')\n",
|
|
" return dft\n",
|
|
" \n",
|
|
"# bring in relative prediction into df_xy_pred: id, x, y\n",
|
|
"dft = pd.DataFrame(wifi_tr0[:,[0, 1, 4]])\n",
|
|
"dft.columns = ['ts', 'id', 'path']\n",
|
|
"df_xy_pred = dft.groupby('id').mean().reset_index()\n",
|
|
"dtypes = {'ts':'int32', 'x_p':'float32', 'y_p':'float32', 'path':'int32'}\n",
|
|
"df_xy_pred = df_xy_pred.loc[df_xy_pred['path'] >= 100000].reset_index(drop=True) # select test from total\n",
|
|
"df_dr = pd.DataFrame(ahrs[:,[0, 5, 6, 4]]) # relative prediction *********************************\n",
|
|
"paths = np.array(df_xy_pred['path'], dtype=np.int32) - 100000\n",
|
|
"tss = np.array(df_xy_pred['ts'], dtype=np.int32)\n",
|
|
"df_xy_pred.drop(['ts', 'path'], axis=1, inplace=True)\n",
|
|
"y_te = np.zeros([tss.shape[0], 2])\n",
|
|
"\n",
|
|
"# now only select data for wifi points (relative prediction was for sensor timestamps)\n",
|
|
"path0 = -1\n",
|
|
"df3a_np = np.array(df_dr)\n",
|
|
"for i in range(y_te.shape[0]):\n",
|
|
" path = paths[i]\n",
|
|
" ts = tss[i]\n",
|
|
" if path != path0:\n",
|
|
" d = df3a_np[df3a_np[:,3] == path,:]\n",
|
|
" offset = (df3a_np[:,3] == path).argmax()\n",
|
|
" path0 = path\n",
|
|
" if ts <= d[0,0]:\n",
|
|
" y_te[i,0] = d[0, 1]\n",
|
|
" y_te[i,1] = d[0, 2]\n",
|
|
" elif ts >= d[-1,0]:\n",
|
|
" y_te[i,0] = d[-1, 1]\n",
|
|
" y_te[i,1] = d[-1, 2]\n",
|
|
" else:# interpolate between 2 surrounding points\n",
|
|
" k2 = ((d[:,0] - ts) > 0).argmax()\n",
|
|
" k1 = k2 - 1\n",
|
|
" w1 = (d[k2,0] - ts)/ (d[k2,0] - d[k1,0])\n",
|
|
" y_te[i,0] = d[k1, 1] * w1 + d[k2, 1] * (1 - w1)\n",
|
|
" y_te[i,1] = d[k1, 2] * w1 + d[k2, 2] * (1 - w1)\n",
|
|
"print('prepared df_xy_pred', int(time.time() - start_time), 'sec')\n",
|
|
"del df3a_np\n",
|
|
"gc.collect()\n",
|
|
"df_xy_pred['x'] = y_te[:,0]\n",
|
|
"df_xy_pred['y'] = y_te[:,1]\n",
|
|
"df_xy_pred.columns = ['id0', 'x', 'y'] # use id0 here for easier merge\n",
|
|
" \n",
|
|
"\n",
|
|
"\n",
|
|
"# predict in batches based on DR with offset; use x0/y0 as val DR.\n",
|
|
"# add adjacent points to form a batch. Add offset to them.\n",
|
|
"\n",
|
|
"# bring in pred coordinates - need them for offset\n",
|
|
"df1_tot = df1_tot.merge(df_xy_pred, how='left', on='id0')\n",
|
|
"df1_tot.columns = ['id0', 'cc', 'id', 'diff', 'x', 'y', 'f', 'x0', 'y0']\n",
|
|
"\n",
|
|
"# bring in path for each id\n",
|
|
"df_p = df.groupby('id')['path'].mean().reset_index()\n",
|
|
"paths = np.array(df_p['path'])\n",
|
|
"def in_1(x):\n",
|
|
" return x in ids2\n",
|
|
"\n",
|
|
"df1_tot0 = df1_tot.copy() # save\n",
|
|
"max_offset = 90 # only add points withing this distance of current\n",
|
|
"outlier = 18\n",
|
|
"for shift in range(1, 43): # only add up to 42 points from before/after (up to 85 total)\n",
|
|
" # next point on the same path\n",
|
|
" ids = np.array(df_p['id'].iloc[shift:]) # skip first - it is never next\n",
|
|
" ids2 = set(ids[paths[shift:] == paths[:-shift]]) # this is the list of ids that can be reduced by 1 and still be on the same path\n",
|
|
" df1_tot_m = df1_tot0.loc[df1_tot0['id0'].map(in_1)].copy()\n",
|
|
" df1_tot_m['id0'] -= shift # make it the same as base\n",
|
|
" # get offset for it\n",
|
|
" df1_tot_m = df1_tot_m.merge(df_xy_pred, how='left', on='id0')\n",
|
|
" df1_tot_m.columns = ['id0', 'cc', 'id', 'diff', 'x', 'y', 'f', 'x0', 'y0', 'x0a', 'y0a']\n",
|
|
" # add offset\n",
|
|
" df1_tot_m['x'] -= df1_tot_m['x0'] - df1_tot_m['x0a']\n",
|
|
" df1_tot_m['y'] -= df1_tot_m['y0'] - df1_tot_m['y0a']\n",
|
|
" # only keep if offset < max_offset\n",
|
|
" idx = ((df1_tot_m['x0'] - df1_tot_m['x0a'])**2 + (df1_tot_m['y0'] - df1_tot_m['y0a'])**2) < max_offset**2\n",
|
|
" df1_tot_m = df1_tot_m.loc[idx].reset_index(drop=True)\n",
|
|
" # append next point\n",
|
|
" df1_tot_m.drop(['x0a', 'y0a'], axis=1, inplace=True)\n",
|
|
" df1_tot = df1_tot.append(df1_tot_m).reset_index(drop=True)\n",
|
|
"\n",
|
|
" # prev point on the same path\n",
|
|
" ids = np.array(df_p['id'].iloc[:-shift]) # skip last - it is never previous\n",
|
|
" ids2 = set(ids[paths[shift:] == paths[:-shift]]) # this is the list of ids that can be increased by 1 and still be on the same path\n",
|
|
" df1_tot_p = df1_tot0.loc[df1_tot0['id0'].map(in_1)].copy()\n",
|
|
" df1_tot_p['id0'] += shift # make it the same as base\n",
|
|
" # get offset for it\n",
|
|
" df1_tot_p = df1_tot_p.merge(df_xy_pred, how='left', on='id0')\n",
|
|
" df1_tot_p.columns = ['id0', 'cc', 'id', 'diff', 'x', 'y', 'f', 'x0', 'y0', 'x0a', 'y0a']\n",
|
|
" # add offset\n",
|
|
" df1_tot_p['x'] -= df1_tot_p['x0'] - df1_tot_p['x0a']\n",
|
|
" df1_tot_p['y'] -= df1_tot_p['y0'] - df1_tot_p['y0a']\n",
|
|
" # only keep if offset < max_offset\n",
|
|
" idx = ((df1_tot_p['x0'] - df1_tot_p['x0a'])**2 + (df1_tot_p['y0'] - df1_tot_p['y0a'])**2) < max_offset**2\n",
|
|
" df1_tot_p = df1_tot_p.loc[idx].reset_index(drop=True)\n",
|
|
" # append prev point\n",
|
|
" df1_tot_p.drop(['x0a', 'y0a'], axis=1, inplace=True)\n",
|
|
" df1_tot = df1_tot.append(df1_tot_p).reset_index(drop=True)\n",
|
|
"\n",
|
|
"\n",
|
|
"# calc score - raw\n",
|
|
"# weight of each point\n",
|
|
"df1_tot['w'] = (np.exp(- df1_tot['diff']/diff_mult) * df1_tot['cc'].map(cc_di)).astype('float32')\n",
|
|
"df1_tot['x1'] = (df1_tot['w'] * df1_tot['x']).astype('float32')\n",
|
|
"df1_tot['y1'] = (df1_tot['w'] * df1_tot['y']).astype('float32')\n",
|
|
"df1_tot['f1'] = (df1_tot['w'] * df1_tot['f']).astype('float32')\n",
|
|
"df2 = df1_tot.groupby('id0')[['w', 'x1', 'y1', 'f1']].sum().reset_index()\n",
|
|
"df1_tot.drop(['x1', 'y1', 'f1'], axis=1, inplace=True)\n",
|
|
"df2['x1'] = df2['x1'] / df2['w']\n",
|
|
"df2['y1'] = df2['y1'] / df2['w']\n",
|
|
"df2['f1'] = df2['f1'] / df2['w']\n",
|
|
"\n",
|
|
"# calc score - drop outliers\n",
|
|
"df1_tot = df1_tot.merge(df2[['id0', 'x1', 'y1', 'f1']], how='left', on='id0') # adds x1, y1\n",
|
|
"dist = np.sqrt((df1_tot['x'] - df1_tot['x1'])**2 + (df1_tot['y'] - df1_tot['y1'])**2)\n",
|
|
"df1_tot['x1'] = (df1_tot['w'] * df1_tot['x']).astype('float32')\n",
|
|
"df1_tot['y1'] = (df1_tot['w'] * df1_tot['y']).astype('float32')\n",
|
|
"df1_tot['f1'] = (df1_tot['w'] * df1_tot['f']).astype('float32')\n",
|
|
"df2 = df1_tot.loc[dist < outlier].groupby('id0')[['w', 'x1', 'y1', 'f1']].sum().reset_index() # drop outliers here\n",
|
|
"df1_tot.drop(['w', 'x1', 'y1', 'f1'], axis=1, inplace=True)\n",
|
|
"df2['x1'] = df2['x1'] / df2['w']\n",
|
|
"df2['y1'] = df2['y1'] / df2['w']\n",
|
|
"df2['f1'] = df2['f1'] / df2['w']\n",
|
|
"df2 = f_pred_path(df2) # make predicted floor the same for all points on the same path\n",
|
|
"\n",
|
|
"\n",
|
|
" \n",
|
|
"# put predictions into df_dr\n",
|
|
"print('put predictions into df_dr - start', int(time.time() - start_time), 'sec')\n",
|
|
"df_tp = df.groupby('id')[['ts','path']].mean().reset_index()\n",
|
|
"df2 = df2.merge(df_tp, how='left', left_on='id0', right_on='id')\n",
|
|
"x_p = np.array(df_dr[1])\n",
|
|
"y_p = np.array(df_dr[2])\n",
|
|
"df_dr[3] += 100000\n",
|
|
"for p in df2['path'].unique():\n",
|
|
" d = df2.loc[df2['path'] == p].reset_index(drop=True)\n",
|
|
" o1 = (df_dr[3] == p).argmax()\n",
|
|
" o2 = (df_dr[3] == p).sum() + o1\n",
|
|
" # start\n",
|
|
" n1 = (df_dr[0].iloc[o1:o2] < d['ts'].iat[0]).sum()\n",
|
|
" x_p[o1:o1+n1] += d['x1'].iat[0] - x_p[o1+n1]\n",
|
|
" y_p[o1:o1+n1] += d['y1'].iat[0] - y_p[o1+n1]\n",
|
|
" for i in range(1, d.shape[0]): # i is end of the range\n",
|
|
" n2 = (df_dr[0].iloc[o1:o2] < d['ts'].iat[i]).sum()\n",
|
|
" t = np.array(df_dr[0].iloc[o1+n1:o1+n2])\n",
|
|
" t = (t- t[0])/ (t[-1] - t[0]) # 0 to 1\n",
|
|
" x_p[o1+n1:o1+n2] += (d['x1'].iat[i-1] - x_p[o1+n1]) + t * ((d['x1'].iat[i] - x_p[o1+n2-1]) - (d['x1'].iat[i-1] - x_p[o1+n1]))\n",
|
|
" y_p[o1+n1:o1+n2] += (d['y1'].iat[i-1] - y_p[o1+n1]) + t * ((d['y1'].iat[i] - y_p[o1+n2-1]) - (d['y1'].iat[i-1] - y_p[o1+n1]))\n",
|
|
" n1 = n2\n",
|
|
" # end\n",
|
|
" x_p[o1+n1:o2] += d['x1'].iat[i] - x_p[o1+n1]\n",
|
|
" y_p[o1+n1:o2] += d['y1'].iat[i] - y_p[o1+n1]\n",
|
|
"df_dr[1] = x_p\n",
|
|
"df_dr[2] = y_p\n",
|
|
"df_dr.columns = ['ts','x_p','y_p','path']\n",
|
|
"df2a = df2.groupby('path')['f1'].mean().reset_index()\n",
|
|
"df_dr = df_dr.merge(df2a[['path','f1']], how='left', on='path')\n",
|
|
"print('put predictions into df_dr - end', int(time.time() - start_time), 'sec') \n",
|
|
"\n",
|
|
"\n",
|
|
"# now only select data for waypoints\n",
|
|
"df3a = df_dr[['ts','path','x_p','y_p','f1']]\n",
|
|
"df3a.columns = ['ts', 'path', 'x_p', 'y_p', 'f_p']\n",
|
|
"path0 = -1\n",
|
|
"df3a_np = np.array(df3a[['ts', 'x_p', 'y_p', 'f_p','path']], dtype=np.float32)\n",
|
|
"for i in range(sub.shape[0]):\n",
|
|
" path = sub['path'].iat[i]\n",
|
|
" ts = sub['ts'].iat[i]\n",
|
|
"\n",
|
|
" if path != path0:\n",
|
|
" d = df3a_np[df3a_np[:,4] - 100000 == path,:]\n",
|
|
" path0 = path\n",
|
|
" sub['floor'].iat[i] = d[0,3]\n",
|
|
"\n",
|
|
" if ts <= d[0,0]:\n",
|
|
" sub['x'].iat[i] = d[0, 1]\n",
|
|
" sub['y'].iat[i] = d[0, 2]\n",
|
|
" elif ts >= d[-1,0]:\n",
|
|
" sub['x'].iat[i] = d[-1, 1]\n",
|
|
" sub['y'].iat[i] = d[-1, 2]\n",
|
|
" else:# interpolate between 2 surrounding wifi points\n",
|
|
" k2 = ((d[:,0] - ts) > 0).argmax()\n",
|
|
" k1 = k2 - 1\n",
|
|
" w1 = (d[k2,0] - ts)/ (d[k2,0] - d[k1,0])\n",
|
|
" sub['x'].iat[i] = d[k1, 1] * w1 + d[k2, 1] * (1 - w1)\n",
|
|
" sub['y'].iat[i] = d[k1, 2] * w1 + d[k2, 2] * (1 - w1)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"######################################################################################\n",
|
|
"# part 4 - post-processing ###########################################################\n",
|
|
"######################################################################################\n",
|
|
"\n",
|
|
"# post-processing parameters\n",
|
|
"threshold = 5 # snap to grid if dist to grid point is < x\n",
|
|
"step_mult = 0.6 # snap next point on the path if dist to grid is < x * dist to current path point\n",
|
|
"\n",
|
|
"# save starting prediction\n",
|
|
"sub['x1'] = sub['x']\n",
|
|
"sub['y1'] = sub['y']\n",
|
|
"\n",
|
|
"# drop duplicate waypoints\n",
|
|
"train_waypoints = train_waypoints.sort_values(by=['site','floor','x','y'])\n",
|
|
"train_waypoints = train_waypoints.drop_duplicates(subset=['site','floor','x','y'], ignore_index=True)\n",
|
|
"\n",
|
|
"def add_xy(df): # add x/y\n",
|
|
" df['xy'] = [(x, y) for x,y in zip(df['x'], df['y'])]\n",
|
|
" return df\n",
|
|
"\n",
|
|
"train_waypoints = add_xy(train_waypoints)\n",
|
|
"\n",
|
|
"def closest_point(point, points): # find closest point from a list of points\n",
|
|
" return points[cdist([point], points).argmin()]\n",
|
|
"\n",
|
|
"\n",
|
|
"# snap to grid\n",
|
|
"sub.drop('path', axis=1, inplace=True)\n",
|
|
"sub = pd.concat([sub['site_path_timestamp'].str.split('_', expand=True).rename(columns={0:'site',1:'path',2:'timestamp'}), sub], axis=1).copy()\n",
|
|
"for N in range(20):# loop until converges\n",
|
|
" ds = []\n",
|
|
" sub = add_xy(sub)\n",
|
|
" for (site, myfloor), d in sub.groupby(['site','floor']):\n",
|
|
" idx = (train_waypoints['floor'] == myfloor) & (train_waypoints['site'] == site)\n",
|
|
" true_floor_locs = train_waypoints.loc[idx].reset_index(drop=True)\n",
|
|
" d['matched_point'] = [closest_point(x, list(true_floor_locs['xy'])) for x in d['xy']]\n",
|
|
" d['x_'] = d['matched_point'].apply(lambda x: x[0])\n",
|
|
" d['y_'] = d['matched_point'].apply(lambda x: x[1])\n",
|
|
" ds.append(d)\n",
|
|
" sub = pd.concat(ds)\n",
|
|
" sub['dist'] = np.sqrt( (sub.x-sub.x_)**2 + (sub.y-sub.y_)**2 )\n",
|
|
"\n",
|
|
" # Snap to grid if within a threshold.\n",
|
|
" sub['_x_'] = sub['x']\n",
|
|
" sub['_y_'] = sub['y']\n",
|
|
" idx = sub['dist'] < threshold\n",
|
|
" sub.loc[idx, '_x_'] = sub.loc[idx]['x_']\n",
|
|
" sub.loc[idx, '_y_'] = sub.loc[idx]['y_']\n",
|
|
" \n",
|
|
" # shift each path by mean shift, snap again\n",
|
|
" dft = sub.groupby('path')[['x','_x_','y','_y_']].mean().reset_index()\n",
|
|
" dft['dx'] = dft['_x_'] - dft['x']\n",
|
|
" dft['dy'] = dft['_y_'] - dft['y']\n",
|
|
" sub = sub.merge(dft[['path','dx','dy']], how='left', on='path')\n",
|
|
" sub['x'] = sub['x'] + sub['dx']\n",
|
|
" sub['y'] = sub['y'] + sub['dy']\n",
|
|
" sub = add_xy(sub)\n",
|
|
" sub.drop(['dx','dy'], axis=1, inplace=True)\n",
|
|
"\n",
|
|
"\n",
|
|
"# proceed 1 step at a time\n",
|
|
"for N in range(5):# loop until converges\n",
|
|
" # pass forward\n",
|
|
" sub['x2'] = sub['_x_'] # init to best prediction\n",
|
|
" sub['y2'] = sub['_y_']\n",
|
|
" sub['t'] = 0\n",
|
|
" for i in range(0, sub.shape[0]):\n",
|
|
" if i == 0 or sub['path'].iat[i] != sub['path'].iat[i-1]:# process new path\n",
|
|
" site = sub['site'].iat[i]\n",
|
|
" myfloor = sub['floor'].iat[i]\n",
|
|
" idx = (train_waypoints['floor'] == myfloor) & (train_waypoints['site'] == site)\n",
|
|
" true_floor_locs = train_waypoints.loc[idx].reset_index(drop=True)\n",
|
|
" points = list(true_floor_locs['xy'])\n",
|
|
" x = sub['x2'].iat[i]\n",
|
|
" y = sub['y2'].iat[i]\n",
|
|
" d0 = np.sqrt((sub['x1'].iat[i] - sub['x1'].iat[i+1])**2 + (sub['y1'].iat[i] - sub['y1'].iat[i+1])**2)\n",
|
|
" else: # get 1-step predicted current point: last point + dPDR\n",
|
|
" x = sub['x2'].iat[i-1] + sub['x1'].iat[i] - sub['x1'].iat[i-1]\n",
|
|
" y = sub['y2'].iat[i-1] + sub['y1'].iat[i] - sub['y1'].iat[i-1]\n",
|
|
" d0 = np.sqrt((sub['x1'].iat[i] - sub['x1'].iat[i-1])**2 + (sub['y1'].iat[i] - sub['y1'].iat[i-1])**2)\n",
|
|
" # find closest grid point to it\n",
|
|
" dists = cdist([(x,y)], points)\n",
|
|
" ii = dists.argmin()\n",
|
|
" p = points[ii]\n",
|
|
" dist = dists.min()\n",
|
|
" if dist < d0 * step_mult: # if grid point is close, snap to it\n",
|
|
" sub['x2'].iat[i] = p[0]\n",
|
|
" sub['y2'].iat[i] = p[1]\n",
|
|
" sub['t'].iat[i] = 1\n",
|
|
" sub['_x_'] = sub['x2'] # put this in final sub\n",
|
|
" sub['_y_'] = sub['y2']\n",
|
|
"\n",
|
|
" # pass backward\n",
|
|
" sub['x3'] = sub['_x_'] # init to best pred\n",
|
|
" sub['y3'] = sub['_y_']\n",
|
|
" sub['t'] = 0\n",
|
|
" for i in range(sub.shape[0] - 1, 0, -1):\n",
|
|
" if i == sub.shape[0] - 1 or sub['path'].iat[i] != sub['path'].iat[i+1]:# process new path\n",
|
|
" site = sub['site'].iat[i]\n",
|
|
" myfloor = sub['floor'].iat[i]\n",
|
|
" idx = (train_waypoints['floor'] == myfloor) & (train_waypoints['site'] == site)\n",
|
|
" true_floor_locs = train_waypoints.loc[idx].reset_index(drop=True)\n",
|
|
" points = list(true_floor_locs['xy'])\n",
|
|
" x = sub['x3'].iat[i]\n",
|
|
" y = sub['y3'].iat[i]\n",
|
|
" d0 = np.sqrt((sub['x1'].iat[i] - sub['x1'].iat[i-1])**2 + (sub['y1'].iat[i] - sub['y1'].iat[i-1])**2)\n",
|
|
" else: # get 1-step predicted current point: last point + dPDR\n",
|
|
" x = sub['x3'].iat[i+1] + sub['x1'].iat[i] - sub['x1'].iat[i+1]\n",
|
|
" y = sub['y3'].iat[i+1] + sub['y1'].iat[i] - sub['y1'].iat[i+1]\n",
|
|
" d0 = np.sqrt((sub['x1'].iat[i] - sub['x1'].iat[i+1])**2 + (sub['y1'].iat[i] - sub['y1'].iat[i+1])**2)\n",
|
|
" # find closest grid point to it\n",
|
|
" dists = cdist([(x,y)], points)\n",
|
|
" ii = dists.argmin()\n",
|
|
" p = points[ii]\n",
|
|
" dist = dists.min()\n",
|
|
" if dist < d0 * step_mult: # if grid point is close, snap to it\n",
|
|
" sub['x3'].iat[i] = p[0]\n",
|
|
" sub['y3'].iat[i] = p[1]\n",
|
|
" sub['t'].iat[i] = 1\n",
|
|
" sub['_x_'] = sub['x3'] # put this in final sub\n",
|
|
" sub['_y_'] = sub['y3']\n",
|
|
"# blend forward/backward 50/50\n",
|
|
"sub['_x_'] = (sub['x3'] + sub['x2']) / 2\n",
|
|
"sub['_y_'] = (sub['y3'] + sub['y2']) / 2\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# save submission\n",
|
|
"sub.drop(['x','y'], axis=1, inplace=True)\n",
|
|
"sub = sub.rename(columns={'_x_':'x', '_y_':'y'})\n",
|
|
"sub[['site_path_timestamp','floor','x','y']].to_csv('submission_ym.csv', index=False)\n",
|
|
"print('Finished', int(time.time() - start_time), 'sec')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|