Create. solution

pull/2/head
benjas 4 years ago
parent 1f5a7e770c
commit caa5ff1701

@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"\n",
"ss1 = pd.read_csv('submission_wifi.csv')\n",
"ss2 = pd.read_csv('submission_wifi_sensor.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.99816888],\n",
" [0.99816888, 1. ]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.corrcoef([ss1.y,ss2.y])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.99853603],\n",
" [0.99853603, 1. ]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.corrcoef([ss1.x,ss2.x])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"ss = ss1.copy()\n",
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"ss.to_csv('sub_wifi_sensor.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.99717624],\n",
" [0.99717624, 1. ]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ss1 = pd.read_csv('sub_wifi_sensor_post.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
"ss2 = pd.read_csv('submission_ym.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
"np.corrcoef([ss1.x,ss2.x])\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"ss = ss1.copy()\n",
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"ss.to_csv('final.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,361 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.007463,
"end_time": "2021-02-03T20:30:06.571139",
"exception": false,
"start_time": "2021-02-03T20:30:06.563676",
"status": "completed"
},
"tags": []
},
"source": [
"### Wifi features\n",
"\n",
"This this is the code to generate the wifi features available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). Using these features can get a score below 14. For an example notebook using them see [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter). They only uses waypoints, wifi and timestamp data to generate solution. See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for an outline of this solution method, and methods of improvement.\n",
"\n",
"There are `break`'s inserted into loops which need to be removed to get this to run. Right now data is written to current working directory. This takes 2-4 hours to run depending on hard drive etc. There is a lot of room for improvement speeding up feature generation. \n",
"\n",
"**Update:** I added one line that creates a column for the path filename, this allows for a groupkfold crossvalidation. \n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2021-02-03T20:30:06.590945Z",
"iopub.status.busy": "2021-02-03T20:30:06.589984Z",
"iopub.status.idle": "2021-02-03T20:30:06.593594Z",
"shell.execute_reply": "2021-02-03T20:30:06.592887Z"
},
"papermill": {
"duration": 0.01623,
"end_time": "2021-02-03T20:30:06.593847",
"exception": false,
"start_time": "2021-02-03T20:30:06.577617",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import glob\n",
"import os\n",
"import gc\n",
"import json "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:06.614521Z",
"iopub.status.busy": "2021-02-03T20:30:06.613572Z",
"iopub.status.idle": "2021-02-03T20:30:06.616669Z",
"shell.execute_reply": "2021-02-03T20:30:06.616121Z"
},
"papermill": {
"duration": 0.015585,
"end_time": "2021-02-03T20:30:06.616837",
"exception": false,
"start_time": "2021-02-03T20:30:06.601252",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"base_path = '../input/indoor-location-navigation/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:06.639011Z",
"iopub.status.busy": "2021-02-03T20:30:06.638118Z",
"iopub.status.idle": "2021-02-03T20:30:09.333807Z",
"shell.execute_reply": "2021-02-03T20:30:09.334360Z"
},
"papermill": {
"duration": 2.711076,
"end_time": "2021-02-03T20:30:09.334617",
"exception": false,
"start_time": "2021-02-03T20:30:06.623541",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
"ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
"\n",
"# only 24 of the total buildings are used in the test set, \n",
"# this allows us to greatly reduce the intial size of the dataset\n",
"\n",
"ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
"used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
"\n",
"# dictionary used to map the floor codes to the values used in the submission file. \n",
"floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
" \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.359905Z",
"iopub.status.busy": "2021-02-03T20:30:09.359123Z",
"iopub.status.idle": "2021-02-03T20:30:09.362909Z",
"shell.execute_reply": "2021-02-03T20:30:09.362224Z"
},
"papermill": {
"duration": 0.021272,
"end_time": "2021-02-03T20:30:09.363069",
"exception": false,
"start_time": "2021-02-03T20:30:09.341797",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
"# these will be the only ones used when constructing features\n",
"bssid = dict()\n",
"\n",
"for building in used_buildings:\n",
" break\n",
" folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
" print(building)\n",
" wifi = list()\n",
" for folder in folders:\n",
" floor = floor_map[folder.split('/')[-1]]\n",
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
" for file in files:\n",
" with open(file) as f:\n",
" txt = f.readlines()\n",
" for e, line in enumerate(txt):\n",
" tmp = line.strip().split()\n",
" if tmp[1] == \"TYPE_WIFI\":\n",
" wifi.append(tmp)\n",
" df = pd.DataFrame(wifi)\n",
" #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
" value_counts = df[3].value_counts()\n",
" top_bssid = value_counts[value_counts > 0].index.tolist()\n",
" print(len(top_bssid))\n",
" bssid[building] = top_bssid\n",
" del df\n",
" del wifi\n",
" gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.383252Z",
"iopub.status.busy": "2021-02-03T20:30:09.382581Z",
"iopub.status.idle": "2021-02-03T20:30:09.386704Z",
"shell.execute_reply": "2021-02-03T20:30:09.385809Z"
},
"papermill": {
"duration": 0.016635,
"end_time": "2021-02-03T20:30:09.386885",
"exception": false,
"start_time": "2021-02-03T20:30:09.370250",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"with open(\"bssid_1000.json\", \"w\") as f:\n",
" json.dump(bssid, f)\n",
"\n",
"with open(\"bssid_1000.json\") as f:\n",
" bssid = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.418284Z",
"iopub.status.busy": "2021-02-03T20:30:09.417119Z",
"iopub.status.idle": "2021-02-03T20:30:09.420513Z",
"shell.execute_reply": "2021-02-03T20:30:09.419767Z"
},
"papermill": {
"duration": 0.026514,
"end_time": "2021-02-03T20:30:09.420694",
"exception": false,
"start_time": "2021-02-03T20:30:09.394180",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# generate all the training data \n",
"building_dfs = dict()\n",
"\n",
"for building in used_buildings:\n",
" break\n",
" folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
" dfs = list()\n",
" index = sorted(bssid[building])\n",
" print(building)\n",
" for folder in folders:\n",
" floor = floor_map[folder.split('/')[-1]]\n",
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
" print(floor)\n",
" for file in files:\n",
" wifi = list()\n",
" waypoint = list()\n",
" with open(file) as f:\n",
" txt = f.readlines()\n",
" for line in txt:\n",
" line = line.strip().split()\n",
" if line[1] == \"TYPE_WAYPOINT\":\n",
" waypoint.append(line)\n",
" if line[1] == \"TYPE_WIFI\":\n",
" wifi.append(line)\n",
"\n",
" df = pd.DataFrame(np.array(wifi)) \n",
"\n",
" # generate a feature, and label for each wifi block\n",
" for gid, g in df.groupby(0):\n",
" dists = list()\n",
" for e, k in enumerate(waypoint):\n",
" dist = abs(int(gid) - int(k[0]))\n",
" dists.append(dist)\n",
" nearest_wp_index = np.argmin(dists)\n",
" \n",
" g = g.drop_duplicates(subset=3)\n",
" tmp = g.iloc[:,3:5]\n",
" feat = tmp.set_index(3).reindex(index).replace(np.nan, -999).T\n",
" feat[\"x\"] = float(waypoint[nearest_wp_index][2])\n",
" feat[\"y\"] = float(waypoint[nearest_wp_index][3])\n",
" feat[\"f\"] = floor\n",
" feat[\"path\"] = file.split('/')[-1].split('.')[0] # useful for crossvalidation\n",
" dfs.append(feat)\n",
" \n",
" building_df = pd.concat(dfs)\n",
" building_dfs[building] = df\n",
" building_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+building+\"_train.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.454304Z",
"iopub.status.busy": "2021-02-03T20:30:09.451093Z",
"iopub.status.idle": "2021-02-03T20:30:09.464308Z",
"shell.execute_reply": "2021-02-03T20:30:09.464854Z"
},
"papermill": {
"duration": 0.036471,
"end_time": "2021-02-03T20:30:09.465079",
"exception": false,
"start_time": "2021-02-03T20:30:09.428608",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# Generate the features for the test set\n",
"\n",
"ssubm_building_g = ssubm_df.groupby(0)\n",
"feature_dict = dict()\n",
"\n",
"for gid0, g0 in ssubm_building_g:\n",
" break\n",
" index = sorted(bssid[g0.iloc[0,0]])\n",
" feats = list()\n",
" print(gid0)\n",
" for gid,g in g0.groupby(1):\n",
"\n",
" # get all wifi time locations, \n",
" with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
" txt = f.readlines()\n",
"\n",
" wifi = list()\n",
"\n",
" for line in txt:\n",
" line = line.strip().split()\n",
" if line[1] == \"TYPE_WIFI\":\n",
" wifi.append(line)\n",
"\n",
" wifi_df = pd.DataFrame(wifi)\n",
" wifi_points = pd.DataFrame(wifi_df.groupby(0).count().index.tolist())\n",
" \n",
" for timepoint in g.iloc[:,2].tolist():\n",
"\n",
" deltas = (wifi_points.astype(int) - int(timepoint)).abs()\n",
" min_delta_idx = deltas.values.argmin()\n",
" wifi_block_timestamp = wifi_points.iloc[min_delta_idx].values[0]\n",
" \n",
" wifi_block = wifi_df[wifi_df[0] == wifi_block_timestamp].drop_duplicates(subset=3)\n",
" feat = wifi_block.set_index(3)[4].reindex(index).fillna(-999)\n",
"\n",
" feat['site_path_timestamp'] = g.iloc[0,0] + \"_\" + g.iloc[0,1] + \"_\" + timepoint\n",
" feats.append(feat)\n",
" feature_df = pd.concat(feats, axis=1).T\n",
" feature_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+gid0+\"_test.csv\")\n",
" feature_dict[gid0] = feature_df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"papermill": {
"default_parameters": {},
"duration": 9.894085,
"end_time": "2021-02-03T20:30:10.083699",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2021-02-03T20:30:00.189614",
"version": "2.2.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"\n",
"ss1 = pd.read_csv('submission_wifi.csv')\n",
"ss2 = pd.read_csv('submission_wifi_sensor.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.99816888],\n",
" [0.99816888, 1. ]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.corrcoef([ss1.y,ss2.y])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.99853603],\n",
" [0.99853603, 1. ]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.corrcoef([ss1.x,ss2.x])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"ss = ss1.copy()\n",
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"ss.to_csv('sub_wifi_sensor.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.99717624],\n",
" [0.99717624, 1. ]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ss1 = pd.read_csv('sub_wifi_sensor_post.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
"ss2 = pd.read_csv('submission_ym.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
"np.corrcoef([ss1.x,ss2.x])\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"ss = ss1.copy()\n",
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"ss.to_csv('final.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,656 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import glob\n",
"import os\n",
"import gc\n",
"import json \n",
"base_path = '../input/indoor-location-navigation/'\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
"ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
"\n",
"# only 24 of the total buildings are used in the test set, \n",
"# this allows us to greatly reduce the intial size of the dataset\n",
"\n",
"ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
"used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
"\n",
"# dictionary used to map the floor codes to the values used in the submission file. \n",
"floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
" \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# # get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
"# # these will be the only ones used when constructing features\n",
"# bssid = dict()\n",
"\n",
"# for building in used_buildings:\n",
"# # break\n",
"# folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
"# print(building)\n",
"# wifi = list()\n",
"# for folder in folders:\n",
"# floor = floor_map[folder.split('/')[-1]]\n",
"# files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
"# for file in files:\n",
"# with open(file) as f:\n",
"# txt = f.readlines()\n",
"# for e, line in enumerate(txt):\n",
"# tmp = line.strip().split()\n",
"# if tmp[1] == \"TYPE_WIFI\":\n",
"# wifi.append(tmp)\n",
"# df = pd.DataFrame(wifi)\n",
"# #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
"# value_counts = df[3].value_counts()\n",
"# top_bssid = value_counts[value_counts >= 0].index.tolist()\n",
"# print(len(top_bssid))\n",
"# bssid[building] = top_bssid\n",
"# del df\n",
"# del wifi\n",
"# gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# with open(\"bssid.json\", \"w\") as f:\n",
"# json.dump(bssid, f)\n",
"\n",
"with open(\"bssid.json\") as f:\n",
" bssid = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"def multi_line_spliter(s):\n",
" matches = re.finditer(\"TYPE_\", s)\n",
" matches_positions = [match.start() for match in matches]\n",
" split_idx = [0] + [matches_positions[i]-14 for i in range(1, len(matches_positions))] + [len(s)]\n",
" return [s[split_idx[i]:split_idx[i+1]] for i in range(len(split_idx)-1)]\n",
" \n",
" \n",
"def load_df(file):\n",
" #path = str(Path(self.input_path)/f\"train/{self.site_id}/{self.floor}/{self.path_id}.txt\")\n",
" with open(file) as f:\n",
" data = f.readlines()\n",
"\n",
"# modified_data = []\n",
"# for s in data:\n",
"# if s.count(\"TYPE_\")>1:\n",
"# lines = multi_line_spliter(s)\n",
"# modified_data.extend(lines)\n",
"# else:\n",
"# modified_data.append(s)\n",
"# del data\n",
"# return modified_data\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from dataclasses import dataclass\n",
"\n",
"import numpy as np\n",
"\n",
"\n",
"@dataclass\n",
"class ReadData:\n",
" acce: np.ndarray\n",
" acce_uncali: np.ndarray\n",
" gyro: np.ndarray\n",
" gyro_uncali: np.ndarray\n",
" magn: np.ndarray\n",
" magn_uncali: np.ndarray\n",
" ahrs: np.ndarray\n",
" wifi: np.ndarray\n",
" ibeacon: np.ndarray\n",
" waypoint: np.ndarray\n",
"\n",
"\n",
"def read_data_file(data_filename):\n",
" acce = []\n",
" acce_uncali = []\n",
" gyro = []\n",
" gyro_uncali = []\n",
" magn = []\n",
" magn_uncali = []\n",
" ahrs = []\n",
" wifi = []\n",
" ibeacon = []\n",
" waypoint = []\n",
"\n",
" with open(data_filename, 'r', encoding='utf-8') as file:\n",
" lines = file.readlines()\n",
"\n",
" for line_data in lines:\n",
" line_data = line_data.strip()\n",
" if not line_data or line_data[0] == '#':\n",
" continue\n",
"\n",
" line_data = line_data.split('\\t')\n",
"\n",
" if line_data[1] == 'TYPE_ACCELEROMETER':\n",
" acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
" acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_GYROSCOPE':\n",
" gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
" gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
" magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
" magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
" ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_WIFI':\n",
" sys_ts = line_data[0]\n",
" ssid = line_data[2]\n",
" bssid = line_data[3]\n",
" rssi = line_data[4]\n",
" lastseen_ts = line_data[6]\n",
" wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
" wifi.append(wifi_data)\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_BEACON':\n",
" ts = line_data[0]\n",
" uuid = line_data[2]\n",
" major = line_data[3]\n",
" minor = line_data[4]\n",
" rssi = line_data[6]\n",
" ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]\n",
" ibeacon.append(ibeacon_data)\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_WAYPOINT':\n",
" waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
"\n",
" acce = np.array(acce)\n",
" acce_uncali = np.array(acce_uncali)\n",
" gyro = np.array(gyro)\n",
" gyro_uncali = np.array(gyro_uncali)\n",
" magn = np.array(magn)\n",
" magn_uncali = np.array(magn_uncali)\n",
" ahrs = np.array(ahrs)\n",
" wifi = np.array(wifi)\n",
" ibeacon = np.array(ibeacon)\n",
" waypoint = np.array(waypoint)\n",
"\n",
" return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5a0546857ecc773753327266\n",
"-1\n",
"0\n",
"1\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-74e73dc9d7ca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfloor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfile\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_data_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwifi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mwifi_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwifi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-5-26abf575fd1a>\u001b[0m in \u001b[0;36mread_data_file\u001b[0;34m(data_filename)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'TYPE_GYROSCOPE_UNCALIBRATED'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 55\u001b[0;31m \u001b[0mgyro_uncali\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 56\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# generate all the training data \n",
"# used_buildings[:1]\n",
"for building in used_buildings:\n",
" #break\n",
" folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
" dfs = list()\n",
" index = sorted(bssid[building])\n",
" print(building)\n",
" building_df_wifi = []\n",
" building_df_waypoint = []\n",
" for folder in folders:\n",
" floor = floor_map[folder.split('/')[-1]]\n",
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
" print(floor)\n",
" for file in files:\n",
" data = read_data_file(file)\n",
" if len(data.wifi)>0:\n",
" wifi_data = pd.DataFrame(data.wifi)\n",
" wifi_data.columns = ['ts_wifi','ssid','bssid','rssi','ts_wifi_ls']\n",
" wifi_data['path'] = file.split('/')[-1].split('.')[0]\n",
" wifi_data['site'] = file.split('/')[-3]\n",
" wifi_data['floor'] = floor\n",
" wifi_data['floor_ori'] = folder.split('/')[-1]\n",
" building_df_wifi.append(wifi_data) \n",
" if len(data.waypoint)>0:\n",
" waypoint_data = pd.DataFrame(data.waypoint)\n",
" waypoint_data.columns = ['ts_waypoint','x','y']\n",
" waypoint_data['path'] = file.split('/')[-1].split('.')[0]\n",
" waypoint_data['site'] = file.split('/')[-3]\n",
" waypoint_data['floor'] = floor\n",
" waypoint_data['floor_ori'] = folder.split('/')[-1]\n",
" building_df_waypoint.append(waypoint_data) \n",
" building_df_wifi = pd.concat(building_df_wifi).reset_index(drop=True)\n",
" building_df_waypoint = pd.concat(building_df_waypoint).reset_index(drop=True)\n",
" building_df_wifi.to_csv('../input/data_abstract/'+building+\"_train_wifi.csv\")\n",
" building_df_waypoint.to_csv('../input/data_abstract/'+building+\"_train_waypoint.csv\")\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5a0546857ecc773753327266</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>0000000000009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5a0546857ecc773753327266</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>0000000009017</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"0 5a0546857ecc773753327266 046cfa46be49fc10834815c6 0000000000009\n",
"1 5a0546857ecc773753327266 046cfa46be49fc10834815c6 0000000009017"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ssubm_df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5a0546857ecc773753327266\n",
"5c3c44b80379370013e0fd2b\n",
"5d27075f03f801723c2e360f\n",
"5d27096c03f801723c31e5e0\n",
"5d27097f03f801723c320d97\n",
"5d27099f03f801723c32511d\n",
"5d2709a003f801723c3251bf\n",
"5d2709b303f801723c327472\n",
"5d2709bb03f801723c32852c\n",
"5d2709c303f801723c3299ee\n",
"5d2709d403f801723c32bd39\n",
"5d2709e003f801723c32d896\n",
"5da138274db8ce0c98bbd3d2\n",
"5da1382d4db8ce0c98bbe92e\n",
"5da138314db8ce0c98bbf3a0\n",
"5da138364db8ce0c98bc00f1\n",
"5da1383b4db8ce0c98bc11ab\n",
"5da138754db8ce0c98bca82f\n",
"5da138764db8ce0c98bcaa46\n",
"5da1389e4db8ce0c98bd0547\n",
"5da138b74db8ce0c98bd4774\n",
"5da958dd46f8266d0737457b\n",
"5dbc1d84c1eb61796cf7c010\n",
"5dc8cea7659e181adb076a3f\n"
]
}
],
"source": [
"ssubm_building_g = ssubm_df.groupby(0)\n",
"feature_dict = dict()\n",
"\n",
"for gid0, g0 in ssubm_building_g:\n",
" index = sorted(bssid[g0.iloc[0,0]])\n",
" feats = list()\n",
" print(gid0)\n",
" building_df_wifi = []\n",
" for gid,g in g0.groupby(1):\n",
"\n",
" # get all wifi time locations\n",
" #with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
" #txt = f.readlines()\n",
" data = read_data_file(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt'))\n",
" if len(data.wifi)>0:\n",
" wifi_data = pd.DataFrame(data.wifi)\n",
" wifi_data.columns = ['ts_wifi','ssid','bssid','rssi','ts_wifi_ls']\n",
" wifi_data['path'] = g.iloc[0,1]\n",
" wifi_data['site'] = gid0\n",
" building_df_wifi.append(wifi_data) \n",
" building_df_wifi = pd.concat(building_df_wifi).reset_index(drop=True)\n",
" building_df_wifi.to_csv('../input/data_abstract/'+gid0+\"_test_wifi.csv\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ts_wifi</th>\n",
" <th>ssid</th>\n",
" <th>bssid</th>\n",
" <th>rssi</th>\n",
" <th>ts_wifi_ls</th>\n",
" <th>path</th>\n",
" <th>site</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000000002340</td>\n",
" <td>da39a3ee5e6b4b0d3255bfef95601890afd80709</td>\n",
" <td>eebf5db207eec2f3e041f92153d789270f346821</td>\n",
" <td>-45</td>\n",
" <td>1578474544726</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000000002340</td>\n",
" <td>b9f0208be00bd8b337be7f12e02e3a3ce846e22b</td>\n",
" <td>7805f319f3f591986effe78c5b41143180278f2d</td>\n",
" <td>-46</td>\n",
" <td>1578474565732</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000000002340</td>\n",
" <td>ab150ecf6d972b476aeab16317bed6189d9f7cce</td>\n",
" <td>323607d8444900d64151ee06d164738ac727bbce</td>\n",
" <td>-46</td>\n",
" <td>1578474564279</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000000002340</td>\n",
" <td>b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7</td>\n",
" <td>b26914599f6d9ba16b43975394e1eeb9d82f4bab</td>\n",
" <td>-47</td>\n",
" <td>1578474565725</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000000002340</td>\n",
" <td>da39a3ee5e6b4b0d3255bfef95601890afd80709</td>\n",
" <td>02a1be3a5dab38320f879489d8a1e0f2a72768b3</td>\n",
" <td>-47</td>\n",
" <td>1578474547962</td>\n",
" <td>046cfa46be49fc10834815c6</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338901</th>\n",
" <td>0000000067545</td>\n",
" <td>b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7</td>\n",
" <td>f2fd7c8b3ae74a54ebcd5498b81b513b7c5e564a</td>\n",
" <td>-90</td>\n",
" <td>1578465380606</td>\n",
" <td>ffcd9524c80c0fa5bb859eaf</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338902</th>\n",
" <td>0000000067545</td>\n",
" <td>b9f0208be00bd8b337be7f12e02e3a3ce846e22b</td>\n",
" <td>94887049b5d6072ffd22a5e7de70523931861c2b</td>\n",
" <td>-91</td>\n",
" <td>1578465380654</td>\n",
" <td>ffcd9524c80c0fa5bb859eaf</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338903</th>\n",
" <td>0000000067545</td>\n",
" <td>b7e6027447eb1f81327d66cfd3adbe557aabf26c</td>\n",
" <td>e9f5c01efe9058d460ed3830b2a23b729dea930a</td>\n",
" <td>-92</td>\n",
" <td>1578465380607</td>\n",
" <td>ffcd9524c80c0fa5bb859eaf</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338904</th>\n",
" <td>0000000067545</td>\n",
" <td>02eb66d35bce69814f108c2f876e600a78ace137</td>\n",
" <td>0f5daed11a61e0d6941a1a42ff428ca216d61003</td>\n",
" <td>-93</td>\n",
" <td>1578465370203</td>\n",
" <td>ffcd9524c80c0fa5bb859eaf</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338905</th>\n",
" <td>0000000067545</td>\n",
" <td>d4f84491d3a4cd7fbd6f2e34e35fc3cf2f9c5c56</td>\n",
" <td>bfaebb72653fac35c19b00e7ce484dc2897f18bd</td>\n",
" <td>-93</td>\n",
" <td>1578465377777</td>\n",
" <td>ffcd9524c80c0fa5bb859eaf</td>\n",
" <td>5a0546857ecc773753327266</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>338906 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" ts_wifi ssid \\\n",
"0 0000000002340 da39a3ee5e6b4b0d3255bfef95601890afd80709 \n",
"1 0000000002340 b9f0208be00bd8b337be7f12e02e3a3ce846e22b \n",
"2 0000000002340 ab150ecf6d972b476aeab16317bed6189d9f7cce \n",
"3 0000000002340 b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7 \n",
"4 0000000002340 da39a3ee5e6b4b0d3255bfef95601890afd80709 \n",
"... ... ... \n",
"338901 0000000067545 b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7 \n",
"338902 0000000067545 b9f0208be00bd8b337be7f12e02e3a3ce846e22b \n",
"338903 0000000067545 b7e6027447eb1f81327d66cfd3adbe557aabf26c \n",
"338904 0000000067545 02eb66d35bce69814f108c2f876e600a78ace137 \n",
"338905 0000000067545 d4f84491d3a4cd7fbd6f2e34e35fc3cf2f9c5c56 \n",
"\n",
" bssid rssi ts_wifi_ls \\\n",
"0 eebf5db207eec2f3e041f92153d789270f346821 -45 1578474544726 \n",
"1 7805f319f3f591986effe78c5b41143180278f2d -46 1578474565732 \n",
"2 323607d8444900d64151ee06d164738ac727bbce -46 1578474564279 \n",
"3 b26914599f6d9ba16b43975394e1eeb9d82f4bab -47 1578474565725 \n",
"4 02a1be3a5dab38320f879489d8a1e0f2a72768b3 -47 1578474547962 \n",
"... ... ... ... \n",
"338901 f2fd7c8b3ae74a54ebcd5498b81b513b7c5e564a -90 1578465380606 \n",
"338902 94887049b5d6072ffd22a5e7de70523931861c2b -91 1578465380654 \n",
"338903 e9f5c01efe9058d460ed3830b2a23b729dea930a -92 1578465380607 \n",
"338904 0f5daed11a61e0d6941a1a42ff428ca216d61003 -93 1578465370203 \n",
"338905 bfaebb72653fac35c19b00e7ce484dc2897f18bd -93 1578465377777 \n",
"\n",
" path site \n",
"0 046cfa46be49fc10834815c6 5a0546857ecc773753327266 \n",
"1 046cfa46be49fc10834815c6 5a0546857ecc773753327266 \n",
"2 046cfa46be49fc10834815c6 5a0546857ecc773753327266 \n",
"3 046cfa46be49fc10834815c6 5a0546857ecc773753327266 \n",
"4 046cfa46be49fc10834815c6 5a0546857ecc773753327266 \n",
"... ... ... \n",
"338901 ffcd9524c80c0fa5bb859eaf 5a0546857ecc773753327266 \n",
"338902 ffcd9524c80c0fa5bb859eaf 5a0546857ecc773753327266 \n",
"338903 ffcd9524c80c0fa5bb859eaf 5a0546857ecc773753327266 \n",
"338904 ffcd9524c80c0fa5bb859eaf 5a0546857ecc773753327266 \n",
"338905 ffcd9524c80c0fa5bb859eaf 5a0546857ecc773753327266 \n",
"\n",
"[338906 rows x 7 columns]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"building_df_wifi"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,198 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 24 buildings in the testing set.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>file</th>\n",
" <th>building</th>\n",
" <th>site</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>../input/indoor-location-navigation/test//00ff...</td>\n",
" <td>5da1389e4db8ce0c98bd0547</td>\n",
" <td>SiteName:和达城商场</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>../input/indoor-location-navigation/test//01c4...</td>\n",
" <td>5da138b74db8ce0c98bd4774</td>\n",
" <td>SiteName:万象城</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>../input/indoor-location-navigation/test//030b...</td>\n",
" <td>5da138764db8ce0c98bcaa46</td>\n",
" <td>SiteName:银泰百货</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>../input/indoor-location-navigation/test//0389...</td>\n",
" <td>5dbc1d84c1eb61796cf7c010</td>\n",
" <td>SiteName:杭州大悦城</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>../input/indoor-location-navigation/test//0402...</td>\n",
" <td>5da1383b4db8ce0c98bc11ab</td>\n",
" <td>SiteName:永旺梦乐城</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" file \\\n",
"0 ../input/indoor-location-navigation/test//00ff... \n",
"1 ../input/indoor-location-navigation/test//01c4... \n",
"2 ../input/indoor-location-navigation/test//030b... \n",
"3 ../input/indoor-location-navigation/test//0389... \n",
"4 ../input/indoor-location-navigation/test//0402... \n",
"\n",
" building site \n",
"0 5da1389e4db8ce0c98bd0547 SiteName:和达城商场 \n",
"1 5da138b74db8ce0c98bd4774 SiteName:万象城 \n",
"2 5da138764db8ce0c98bcaa46 SiteName:银泰百货 \n",
"3 5dbc1d84c1eb61796cf7c010 SiteName:杭州大悦城 \n",
"4 5da1383b4db8ce0c98bc11ab SiteName:永旺梦乐城 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the read-only \"../input/\" directory\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"import os\n",
"# Prepare paths:\n",
"import glob\n",
"from pathlib import Path\n",
"inpath = '../input/indoor-location-navigation/'\n",
"metapath = inpath + 'metadata/'\n",
"trainpath = inpath + 'train/'\n",
"testpath = inpath + 'test/'\n",
"\n",
"# Extract testing files, buildings and sites:\n",
"os.system(f'grep SiteID {testpath}/* > test_buildings.txt' )\n",
"test_buildings = pd.read_csv('test_buildings.txt',sep='\\t',header=None,names=['file','building','site'])\n",
"test_buildings['file'] = test_buildings['file'].apply(lambda x: x[:-2])\n",
"test_buildings['building'] = test_buildings['building'].apply(lambda x: x[7:])\n",
"\n",
"# How many buildings in the testing set?\n",
"buildings = np.unique(test_buildings['building'])\n",
"print('There are',len(buildings),'buildings in the testing set.')\n",
"\n",
"test_buildings.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Compile C++ pre-processing code:\n",
"er=os.system(\"g++ ../input/indoor-cpp/1_preprocess.cpp -std=c++11 -o preprocess\")\n",
"if(er): print(\"Error\")\n",
"\n",
"# Reformat the testing set:\n",
"os.system('mkdir test')\n",
"for i,(path_filename,building) in enumerate(zip(test_buildings['file'],test_buildings['building'])):\n",
" er=os.system(f'./preprocess {path_filename} test {building} {0}') #since we do not know the floor, I put 0.\n",
" if(er): print(\"Error:\",path_filename)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Acceleration, magnetic and orientation testing data:\n",
"os.system('mkdir indoor_testing_accel')\n",
"os.system(\"g++ ../input/indoor-cpp/2_preprocess_accel.cpp -std=c++11 -o preprocess_accel\")\n",
"for building in buildings:\n",
" os.system(f'./preprocess_accel {building}')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Wifi testing data:\n",
"os.system('mkdir test_wifi')\n",
"os.system(\"g++ /kaggle/input/indoor-cpp/2_preprocess_wifi.cpp -std=c++11 -o preprocess_wifi\")\n",
"for building in buildings:\n",
" os.system(f'./preprocess_wifi {building}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017-2020 XYZ10, Inc. https://dangwu.com/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,141 @@
# Indoor Location Competition 2.0 (Sample Data and Code)
This repository contains sample data and code for [Indoor Location Competition 2.0](https://aka.ms/location20), a continuation of Microsoft Indoor Location Competition. Competition this year will be completely virtual and evaluated on large-scale real indoor location datasets. The dataset to be released consists of dense indoor signatures of WiFi, geomagnetic field, iBeacons etc., as well as ground truth collected from hundreds of buildings in Chinese cities.
## Webinar Video
We held a webinar in July, the video is [here](https://www.youtube.com/watch?v=xt3OzMC-XMU).
## Sample Data
`data` folder contains indoor traces from two sites. Each trace (`*.txt`) corresponds to an indoor path between position p<sub>1</sub> and p<sub>2</sub> walked by a site-surveyor. During the walk, site-surveyor is holding an Android smartphone flat in front of his body, and a sensor data recording app is running on the device to collect IMU (accelerometer, gyroscope) and geomagnetic field (magnetometer) readings, as well as WiFi and Bluetooth iBeacon scanning results. A detailed description of the format of trace file is shown below. In addition to raw traces, floor plan metadata (e.g., raster image, size, GeoJSON) are also included for each floor.
### Trace File Format*.txt
| Time | Data Type | Value | | | | | | | |
|----------------------|-----------------------------------------------------|------------------------------------------|-------------------|--------------|------------------|---------------------|-------------------|-------------------|----------------------------------------|
| 1574659531598 | TYPE\_WAYPOINT | 196\.41757 | 117\.84907 | | | | | | |
| | Location surveyor labeled on the map | Coordinate x (meter) | Coordiante y (meter) | | | | | | |
| | | | | | | | | | |
| 1574659531695 | TYPE\_ACCELEROMETER | \-1\.7085724 | \-0\.274765 | 16\.657166 | 2 | | | | |
| | Android Sensor\.TYPE\_ACCELEROMETER | X axis | Y axis | Z axis | accuracy | | | | |
| 1574659531695 | TYPE\_GYROSCOPE | \-0\.3021698 | 0\.2773285 | 0\.107543945 | 3 | | | | |
| | Android Sensor\.TYPE\_GYROSCOPE | X axis | Y axis | Z axis | accuracy | | | | |
| 1574659531695 | TYPE\_MAGNETIC\_FIELD | 20\.181274 | 16\.209412 | \-32\.22046 | 3 | | | | |
| | Android Sensor\.TYPE\_MAGNETIC\_FIELD | X axis | Y axis | Z axis | accuracy | | | | |
| 1574659531695 | TYPE\_ROTATION\_VECTOR | \-0\.00855688 | 0\.051367603 | 0\.362504 | 3 | | | | |
| | Android Sensor\.TYPE\_ROTATION\_VECTOR | X axis | Y axis | Z axis | accuracy | | | | |
| | | | | | | | | | |
| 1574659531695 | TYPE\_ACCELEROMETER\_UNCALIBRATED | \-1\.7085724 | \-0\.274765 | 16\.657166 | 0\.0 | 0\.0 | 0\.0 | 3 | |
| | Android Sensor\.TYPE\_ACCELEROMETER\_UNCALIBRATED | X axis | Y axis | Z axis | X axis | Y axis | Z axis | accuracy | |
| 1574659531695 | TYPE\_GYROSCOPE\_UNCALIBRATED | \-0\.42333984 | 0\.20202637 | 0\.09623718 | \-7\.9345703E\-4 | 3\.2043457E\-4 | 4\.119873E\-4 | 3 | |
| | Android Sensor\.TYPE\_GYROSCOPE\_UNCALIBRATED | X axis | Y axis | Z axis | X axis | Y axis | Z axis | accuracy | |
| 1574659531695 | TYPE\_MAGNETIC\_FIELD\_UNCALIBRATED | \-29\.830933 | \-26\.36261 | \-300\.3006 | \-50\.012207 | \-42\.57202 | \-268\.08014 | 3 | |
| | Android Sensor\.TYPE\_MAGNETIC\_FIELD\_UNCALIBRATED | X axis | Y axis | Z axis | X axis | Y axis | Z axis | accuracy | |
| | | | | | | | | | |
| 1574659533190 | TYPE\_WIFI | intime\_free | 0e:74:9c:a7:b2:e4 | \-43 | 5805 | 1574659532305 | | | |
| | Wi\-Fi data | ssid | bssid | RSSI | frequency | last seen timestamp | | | |
| | | | | | | | | | |
| 1574659532751 | TYPE\_BEACON | FDA50693\-A4E2\-4FB1\-AFCF\-C6EB07647825 | 10073 | 61418 | \-65 | \-82 | 5\.50634293288929 | 6B:11:4C:D1:29:F2 | 1574659532751 |
| | iBeacon data | UUID | MajorID | MinorID | Tx Power | RSSI | Distance | MAC Address | same with Unix time, padding data |
The first column is Unix Time in millisecond. In specific, we use SensorEvent.timestamp for sensor data and system time for WiFi and Bluetooth scans.
The second column is the data type (ten in total).
* TYPE_ACCELEROMETER
* TYPE_MAGNETIC_FIELD
* TYPE_GYROSCOPE
* TYPE_ROTATION_VECTOR
* TYPE_MAGNETIC_FIELD_UNCALIBRATED
* TYPE_GYROSCOPE_UNCALIBRATED
* TYPE_ACCELEROMETER_UNCALIBRATED
* TYPE_WIFI
* TYPE_BEACON
* TYPE_WAYPOINT: ground truth location labeled by the surveyor
Data values start from the third column.
Column 3-5 of TYPE_ACCELEROMETER、TYPE_MAGNETIC_FIELD、TYPE_GYROSCOPE、TYPE_ROTATION_VECTOR are SensorEvent.values[0-2] from the callback function onSensorChanged(). Column 6 is SensorEvent.accuracy.
Column 3-8 of TYPE_ACCELEROMETER_UNCALIBRATED、TYPE_GYROSCOPE_UNCALIBRATED、TYPE_MAGNETIC_FIELD_UNCALIBRATED are SensorEvent.values[0-5] from the callback function onSensorChanged(). Column 9 is SensorEvent.accuracy.
Values of TYPE_BEACON are obtained from ScanRecord.getBytes(). The results are decoded based on iBeacon protocol using the code below.
```
val major = ((scanRecord[startByte + 20].toInt() and 0xff) * 0x100 + (scanRecord[startByte + 21].toInt() and 0xff))
val minor = ((scanRecord[startByte + 22].toInt() and 0xff) * 0x100 + (scanRecord[startByte + 23].toInt() and 0xff))
val txPower = scanRecord[startByte + 24]
```
Distance in column 8 is calculated as
```
private static double calculateDistance(int txPower, double rssi) {
if (rssi == 0) {
return -1.0; // if we cannot determine distance, return -1.
}
double ratio = rssi*1.0/txPower;
if (ratio < 1.0) {
return Math.pow(ratio,10);
}
else {
double accuracy = (0.89976)*Math.pow(ratio,7.7095) + 0.111;
return accuracy;
}
}
```
### References:
https://developer.android.com/guide/topics/sensors
https://developer.android.com/reference/android/net/wifi/ScanResult.html
https://developer.android.com/reference/android/bluetooth/le/ScanRecord
## Sample Code
Along with sample data from two sites, this repo also provides several scripts on parsing and analyzing indoor traces. All scripts are tested with Python 3.6.9 on both Windows 10 and Mac OS 15.
### How to run the code
`python main.py`
#### Main functions
| Functions | Output |
|-----------------------------------------------|---------------------------------------------|
| Ground truth location visualization | output/site1/F1/path_images |
| Sample step detection and visualization | output/site1/F1/step_position.html |
| Geo-magnetic field intensity visualization | output/site1/F1/magnetic_strength.html |
| WiFi RSSI heatmap generation | output/site1/F1/wifi_images |
| iBeacon RSSI heatmap generation | output/site1/F1/ibeacon_images |
| WiFi SSID counts visualization | output/site1/F1/wifi_count.html |
## Contents
```
indoor-location-competition-20
│ README.md
│ main.py //main function of the sample code
| compute_f.py //data processing functions
| io_f.py //data preprocessing functions
| visualize_f.py //visualization function
└───data //raw data from two sites
└───site1
| └───B1 //traces from one floor
| | └───path_data_files
| | | └───5dda14a2c5b77e0006b17533.txt //trace file
| | | | ...
| | |
| | | floor_image.png //raster floor plan
| | | floor_info.json //floor size info
| | | geojson_map.json //floor plan in vector format (GeoJSON)
| |
| └───F1
| │ ...
|
└───site2
│ ...
```
## License
This repository is licensed with the [MIT license](./LICENSE).

@ -0,0 +1,361 @@
import numpy as np
import scipy.signal as signal
def split_ts_seq(ts_seq, sep_ts):
"""
:param ts_seq:
:param sep_ts:
:return:
"""
tss = ts_seq[:, 0].astype(float)
unique_sep_ts = np.unique(sep_ts)
ts_seqs = []
start_index = 0
for i in range(0, unique_sep_ts.shape[0]):
end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')
if start_index == end_index:
continue
ts_seqs.append(ts_seq[start_index:end_index, :].copy())
start_index = end_index
# tail data
if start_index < ts_seq.shape[0]:
ts_seqs.append(ts_seq[start_index:, :].copy())
return ts_seqs
def correct_trajectory(original_xys, end_xy):
"""
:param original_xys: numpy ndarray, shape(N, 2)
:param end_xy: numpy ndarray, shape(1, 2)
:return:
"""
corrected_xys = np.zeros((0, 2))
A = original_xys[0, :]
B = end_xy
Bp = original_xys[-1, :]
angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])
angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])
angle_BpAB = angle_BpAX - angle_BAX
AB = np.sqrt(np.sum((B - A) ** 2))
ABp = np.sqrt(np.sum((Bp - A) ** 2))
corrected_xys = np.append(corrected_xys, [A], 0)
for i in np.arange(1, np.size(original_xys, 0)):
angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])
angle_CAX = angle_CpAX - angle_BpAB
ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))
AC = ACp * AB / ABp
delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])
C = delta_C + A
corrected_xys = np.append(corrected_xys, [C], 0)
return corrected_xys
def correct_positions(rel_positions, reference_positions):
"""
:param rel_positions:
:param reference_positions:
:return:
"""
rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])
if len(rel_positions_list) != reference_positions.shape[0] - 1:
# print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')
del rel_positions_list[-1]
assert len(rel_positions_list) == reference_positions.shape[0] - 1
corrected_positions = np.zeros((0, 3))
for i, rel_ps in enumerate(rel_positions_list):
start_position = reference_positions[i]
end_position = reference_positions[i + 1]
abs_ps = np.zeros(rel_ps.shape)
abs_ps[:, 0] = rel_ps[:, 0]
# abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]
abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]
for j in range(1, rel_ps.shape[0]):
abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]
abs_ps = np.insert(abs_ps, 0, start_position, axis=0)
corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])
corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))
if i == 0:
corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)
else:
corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)
corrected_positions = np.array(corrected_positions)
return corrected_positions
def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):
order = 4
filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)
zf = signal.lfilter_zi(filter_b, filter_a)
_, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
_, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
return filter_b, filter_a, filter_zf
def get_rotation_matrix_from_vector(rotation_vector):
q1 = rotation_vector[0]
q2 = rotation_vector[1]
q3 = rotation_vector[2]
if rotation_vector.size >= 4:
q0 = rotation_vector[3]
else:
q0 = 1 - q1*q1 - q2*q2 - q3*q3
if q0 > 0:
q0 = np.sqrt(q0)
else:
q0 = 0
sq_q1 = 2 * q1 * q1
sq_q2 = 2 * q2 * q2
sq_q3 = 2 * q3 * q3
q1_q2 = 2 * q1 * q2
q3_q0 = 2 * q3 * q0
q1_q3 = 2 * q1 * q3
q2_q0 = 2 * q2 * q0
q2_q3 = 2 * q2 * q3
q1_q0 = 2 * q1 * q0
R = np.zeros((9,))
if R.size == 9:
R[0] = 1 - sq_q2 - sq_q3
R[1] = q1_q2 - q3_q0
R[2] = q1_q3 + q2_q0
R[3] = q1_q2 + q3_q0
R[4] = 1 - sq_q1 - sq_q3
R[5] = q2_q3 - q1_q0
R[6] = q1_q3 - q2_q0
R[7] = q2_q3 + q1_q0
R[8] = 1 - sq_q1 - sq_q2
R = np.reshape(R, (3, 3))
elif R.size == 16:
R[0] = 1 - sq_q2 - sq_q3
R[1] = q1_q2 - q3_q0
R[2] = q1_q3 + q2_q0
R[3] = 0.0
R[4] = q1_q2 + q3_q0
R[5] = 1 - sq_q1 - sq_q3
R[6] = q2_q3 - q1_q0
R[7] = 0.0
R[8] = q1_q3 - q2_q0
R[9] = q2_q3 + q1_q0
R[10] = 1 - sq_q1 - sq_q2
R[11] = 0.0
R[12] = R[13] = R[14] = 0.0
R[15] = 1.0
R = np.reshape(R, (4, 4))
return R
def get_orientation(R):
flat_R = R.flatten()
values = np.zeros((3,))
if np.size(flat_R) == 9:
values[0] = np.arctan2(flat_R[1], flat_R[4])
values[1] = np.arcsin(-flat_R[7])
values[2] = np.arctan2(-flat_R[6], flat_R[8])
else:
values[0] = np.arctan2(flat_R[1], flat_R[5])
values[1] = np.arcsin(-flat_R[9])
values[2] = np.arctan2(-flat_R[8], flat_R[10])
return values
def compute_steps(acce_datas):
step_timestamps = np.array([])
step_indexs = np.array([], dtype=int)
step_acce_max_mins = np.zeros((0, 4))
sample_freq = 50
window_size = 22
low_acce_mag = 0.6
step_criterion = 1
interval_threshold = 250
acce_max = np.zeros((2,))
acce_min = np.zeros((2,))
acce_binarys = np.zeros((window_size,), dtype=int)
acce_mag_pre = 0
state_flag = 0
warmup_data = np.ones((window_size,)) * 9.81
filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)
acce_mag_window = np.zeros((window_size, 1))
# detect steps according to acceleration magnitudes
for i in np.arange(0, np.size(acce_datas, 0)):
acce_data = acce_datas[i, :]
acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))
acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)
acce_mag_filt = acce_mag_filt[0]
acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])
acce_mag_window = np.delete(acce_mag_window, 0)
mean_gravity = np.mean(acce_mag_window)
acce_std = np.std(acce_mag_window)
mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])
# detect valid peak or valley of acceleration magnitudes
acce_mag_filt_detrend = acce_mag_filt - mean_gravity
if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):
# peak
acce_binarys = np.append(acce_binarys, [1])
acce_binarys = np.delete(acce_binarys, 0)
elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):
# valley
acce_binarys = np.append(acce_binarys, [-1])
acce_binarys = np.delete(acce_binarys, 0)
else:
# between peak and valley
acce_binarys = np.append(acce_binarys, [0])
acce_binarys = np.delete(acce_binarys, 0)
if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):
if state_flag == 0:
acce_max[:] = acce_data[0], acce_mag_filt
state_flag = 1
elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (
acce_mag_filt > acce_max[1]):
acce_max[:] = acce_data[0], acce_mag_filt
elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):
acce_max[:] = acce_data[0], acce_mag_filt
state_flag = 1
# choose reasonable step criterion and check if there is a valid step
# save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]
step_flag = False
if step_criterion == 2:
if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):
step_flag = True
elif step_criterion == 3:
if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):
step_flag = True
else:
if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:
if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):
acce_min[:] = acce_data[0], acce_mag_filt
state_flag = 2
step_flag = True
elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (
acce_mag_filt < acce_min[1]):
acce_min[:] = acce_data[0], acce_mag_filt
if step_flag:
step_timestamps = np.append(step_timestamps, acce_data[0])
step_indexs = np.append(step_indexs, [i])
step_acce_max_mins = np.append(step_acce_max_mins,
[[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)
acce_mag_pre = acce_mag_filt_detrend
return step_timestamps, step_indexs, step_acce_max_mins
def compute_stride_length(step_acce_max_mins):
K = 0.4
K_max = 0.8
K_min = 0.4
para_a0 = 0.21468084
para_a1 = 0.09154517
para_a2 = 0.02301998
stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))
k_real = np.zeros((step_acce_max_mins.shape[0], 2))
step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))
stride_lengths[:, 0] = step_acce_max_mins[:, 0]
window_size = 2
step_timeperiod_temp = np.zeros((0, ))
# calculate every step period - step_timeperiod unit: second
for i in range(0, step_timeperiod.shape[0]):
step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000
step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])
if step_timeperiod_temp.shape[0] > window_size:
step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])
step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]
# calculate parameters by step period and acceleration magnitude variance
k_real[:, 0] = step_acce_max_mins[:, 0]
k_real[0, 1] = K
for i in range(0, step_timeperiod.shape[0]):
k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])
k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)
# calculate every stride length by parameters and max and min data of acceleration magnitude
stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),
np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]
return stride_lengths
def compute_headings(ahrs_datas):
headings = np.zeros((np.size(ahrs_datas, 0), 2))
for i in np.arange(0, np.size(ahrs_datas, 0)):
ahrs_data = ahrs_datas[i, :]
rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])
azimuth, pitch, roll = get_orientation(rot_mat)
around_z = (-azimuth) % (2 * np.pi)
headings[i, :] = ahrs_data[0], around_z
return headings
def compute_step_heading(step_timestamps, headings):
step_headings = np.zeros((len(step_timestamps), 2))
step_timestamps_index = 0
for i in range(0, len(headings)):
if step_timestamps_index < len(step_timestamps):
if headings[i, 0] == step_timestamps[step_timestamps_index]:
step_headings[step_timestamps_index, :] = headings[i, :]
step_timestamps_index += 1
else:
break
assert step_timestamps_index == len(step_timestamps)
return step_headings
def compute_rel_positions(stride_lengths, step_headings):
rel_positions = np.zeros((stride_lengths.shape[0], 3))
for i in range(0, stride_lengths.shape[0]):
rel_positions[i, 0] = stride_lengths[i, 0]
rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])
rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])
return rel_positions
def compute_step_positions(acce_datas, ahrs_datas, posi_datas):
step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
headings = compute_headings(ahrs_datas)
stride_lengths = compute_stride_length(step_acce_max_mins)
step_headings = compute_step_heading(step_timestamps, headings)
rel_positions = compute_rel_positions(stride_lengths, step_headings)
step_positions = correct_positions(rel_positions, posi_datas)
return step_positions

@ -0,0 +1,104 @@
from dataclasses import dataclass
import numpy as np
@dataclass
class ReadData:
acce: np.ndarray
acce_uncali: np.ndarray
gyro: np.ndarray
gyro_uncali: np.ndarray
magn: np.ndarray
magn_uncali: np.ndarray
ahrs: np.ndarray
wifi: np.ndarray
ibeacon: np.ndarray
waypoint: np.ndarray
def read_data_file(data_filename):
acce = []
acce_uncali = []
gyro = []
gyro_uncali = []
magn = []
magn_uncali = []
ahrs = []
wifi = []
ibeacon = []
waypoint = []
with open(data_filename, 'r', encoding='utf-8') as file:
lines = file.readlines()
for line_data in lines:
line_data = line_data.strip()
if not line_data or line_data[0] == '#':
continue
line_data = line_data.split('\t')
if line_data[1] == 'TYPE_ACCELEROMETER':
acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_GYROSCOPE':
gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_MAGNETIC_FIELD':
magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_ROTATION_VECTOR':
ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
continue
if line_data[1] == 'TYPE_WIFI':
sys_ts = line_data[0]
ssid = line_data[2]
bssid = line_data[3]
rssi = line_data[4]
lastseen_ts = line_data[6]
wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
wifi.append(wifi_data)
continue
if line_data[1] == 'TYPE_BEACON':
ts = line_data[0]
uuid = line_data[2]
major = line_data[3]
minor = line_data[4]
rssi = line_data[6]
ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
ibeacon.append(ibeacon_data)
continue
if line_data[1] == 'TYPE_WAYPOINT':
waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])
acce = np.array(acce)
acce_uncali = np.array(acce_uncali)
gyro = np.array(gyro)
gyro_uncali = np.array(gyro_uncali)
magn = np.array(magn)
magn_uncali = np.array(magn_uncali)
ahrs = np.array(ahrs)
wifi = np.array(wifi)
ibeacon = np.array(ibeacon)
waypoint = np.array(waypoint)
return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

@ -0,0 +1,258 @@
import json
import os
from pathlib import Path
import numpy as np
from compute_f import split_ts_seq, compute_step_positions
from io_f import read_data_file
from visualize_f import visualize_trajectory, visualize_heatmap, save_figure_to_html
floor_data_dir = './data/site1/F1'
path_data_dir = floor_data_dir + '/path_data_files'
floor_plan_filename = floor_data_dir + '/floor_image.png'
floor_info_filename = floor_data_dir + '/floor_info.json'
save_dir = './output/site1/F1'
path_image_save_dir = save_dir + '/path_images'
step_position_image_save_dir = save_dir
magn_image_save_dir = save_dir
wifi_image_save_dir = save_dir + '/wifi_images'
ibeacon_image_save_dir = save_dir + '/ibeacon_images'
wifi_count_image_save_dir = save_dir
def calibrate_magnetic_wifi_ibeacon_to_position(path_file_list):
mwi_datas = {}
for path_filename in path_file_list:
print(f'Processing {path_filename}...')
path_datas = read_data_file(path_filename)
acce_datas = path_datas.acce
magn_datas = path_datas.magn
ahrs_datas = path_datas.ahrs
wifi_datas = path_datas.wifi
ibeacon_datas = path_datas.ibeacon
posi_datas = path_datas.waypoint
step_positions = compute_step_positions(acce_datas, ahrs_datas, posi_datas)
# visualize_trajectory(posi_datas[:, 1:3], floor_plan_filename, width_meter, height_meter, title='Ground Truth', show=True)
# visualize_trajectory(step_positions[:, 1:3], floor_plan_filename, width_meter, height_meter, title='Step Position', show=True)
if wifi_datas.size != 0:
sep_tss = np.unique(wifi_datas[:, 0].astype(float))
wifi_datas_list = split_ts_seq(wifi_datas, sep_tss)
for wifi_ds in wifi_datas_list:
diff = np.abs(step_positions[:, 0] - float(wifi_ds[0, 0]))
index = np.argmin(diff)
target_xy_key = tuple(step_positions[index, 1:3])
if target_xy_key in mwi_datas:
mwi_datas[target_xy_key]['wifi'] = np.append(mwi_datas[target_xy_key]['wifi'], wifi_ds, axis=0)
else:
mwi_datas[target_xy_key] = {
'magnetic': np.zeros((0, 4)),
'wifi': wifi_ds,
'ibeacon': np.zeros((0, 3))
}
if ibeacon_datas.size != 0:
sep_tss = np.unique(ibeacon_datas[:, 0].astype(float))
ibeacon_datas_list = split_ts_seq(ibeacon_datas, sep_tss)
for ibeacon_ds in ibeacon_datas_list:
diff = np.abs(step_positions[:, 0] - float(ibeacon_ds[0, 0]))
index = np.argmin(diff)
target_xy_key = tuple(step_positions[index, 1:3])
if target_xy_key in mwi_datas:
mwi_datas[target_xy_key]['ibeacon'] = np.append(mwi_datas[target_xy_key]['ibeacon'], ibeacon_ds, axis=0)
else:
mwi_datas[target_xy_key] = {
'magnetic': np.zeros((0, 4)),
'wifi': np.zeros((0, 5)),
'ibeacon': ibeacon_ds
}
sep_tss = np.unique(magn_datas[:, 0].astype(float))
magn_datas_list = split_ts_seq(magn_datas, sep_tss)
for magn_ds in magn_datas_list:
diff = np.abs(step_positions[:, 0] - float(magn_ds[0, 0]))
index = np.argmin(diff)
target_xy_key = tuple(step_positions[index, 1:3])
if target_xy_key in mwi_datas:
mwi_datas[target_xy_key]['magnetic'] = np.append(mwi_datas[target_xy_key]['magnetic'], magn_ds, axis=0)
else:
mwi_datas[target_xy_key] = {
'magnetic': magn_ds,
'wifi': np.zeros((0, 5)),
'ibeacon': np.zeros((0, 3))
}
return mwi_datas
def extract_magnetic_strength(mwi_datas):
magnetic_strength = {}
for position_key in mwi_datas:
# print(f'Position: {position_key}')
magnetic_data = mwi_datas[position_key]['magnetic']
magnetic_s = np.mean(np.sqrt(np.sum(magnetic_data[:, 1:4] ** 2, axis=1)))
magnetic_strength[position_key] = magnetic_s
return magnetic_strength
def extract_wifi_rssi(mwi_datas):
wifi_rssi = {}
for position_key in mwi_datas:
# print(f'Position: {position_key}')
wifi_data = mwi_datas[position_key]['wifi']
for wifi_d in wifi_data:
bssid = wifi_d[2]
rssi = int(wifi_d[3])
if bssid in wifi_rssi:
position_rssi = wifi_rssi[bssid]
if position_key in position_rssi:
old_rssi = position_rssi[position_key][0]
old_count = position_rssi[position_key][1]
position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
position_rssi[position_key][1] = old_count + 1
else:
position_rssi[position_key] = np.array([rssi, 1])
else:
position_rssi = {}
position_rssi[position_key] = np.array([rssi, 1])
wifi_rssi[bssid] = position_rssi
return wifi_rssi
def extract_ibeacon_rssi(mwi_datas):
ibeacon_rssi = {}
for position_key in mwi_datas:
# print(f'Position: {position_key}')
ibeacon_data = mwi_datas[position_key]['ibeacon']
for ibeacon_d in ibeacon_data:
ummid = ibeacon_d[1]
rssi = int(ibeacon_d[2])
if ummid in ibeacon_rssi:
position_rssi = ibeacon_rssi[ummid]
if position_key in position_rssi:
old_rssi = position_rssi[position_key][0]
old_count = position_rssi[position_key][1]
position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
position_rssi[position_key][1] = old_count + 1
else:
position_rssi[position_key] = np.array([rssi, 1])
else:
position_rssi = {}
position_rssi[position_key] = np.array([rssi, 1])
ibeacon_rssi[ummid] = position_rssi
return ibeacon_rssi
def extract_wifi_count(mwi_datas):
wifi_counts = {}
for position_key in mwi_datas:
# print(f'Position: {position_key}')
wifi_data = mwi_datas[position_key]['wifi']
count = np.unique(wifi_data[:, 2]).shape[0]
wifi_counts[position_key] = count
return wifi_counts
if __name__ == "__main__":
Path(path_image_save_dir).mkdir(parents=True, exist_ok=True)
Path(magn_image_save_dir).mkdir(parents=True, exist_ok=True)
Path(wifi_image_save_dir).mkdir(parents=True, exist_ok=True)
Path(ibeacon_image_save_dir).mkdir(parents=True, exist_ok=True)
with open(floor_info_filename) as f:
floor_info = json.load(f)
width_meter = floor_info["map_info"]["width"]
height_meter = floor_info["map_info"]["height"]
path_filenames = list(Path(path_data_dir).resolve().glob("*.txt"))
# 1. visualize ground truth positions
print('Visualizing ground truth positions...')
for path_filename in path_filenames:
print(f'Processing file: {path_filename}...')
path_data = read_data_file(path_filename)
path_id = path_filename.name.split(".")[0]
fig = visualize_trajectory(path_data.waypoint[:, 1:3], floor_plan_filename, width_meter, height_meter, title=path_id, show=False)
html_filename = f'{path_image_save_dir}/{path_id}.html'
html_filename = str(Path(html_filename).resolve())
save_figure_to_html(fig, html_filename)
# 2. visualize step position, magnetic, wifi, ibeacon
print('Visualizing more information...')
mwi_datas = calibrate_magnetic_wifi_ibeacon_to_position(path_filenames)
step_positions = np.array(list(mwi_datas.keys()))
fig = visualize_trajectory(step_positions, floor_plan_filename, width_meter, height_meter, mode='markers', title='Step Position', show=True)
html_filename = f'{step_position_image_save_dir}/step_position.html'
html_filename = str(Path(html_filename).resolve())
save_figure_to_html(fig, html_filename)
magnetic_strength = extract_magnetic_strength(mwi_datas)
heat_positions = np.array(list(magnetic_strength.keys()))
heat_values = np.array(list(magnetic_strength.values()))
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='mu tesla', title='Magnetic Strength', show=True)
html_filename = f'{magn_image_save_dir}/magnetic_strength.html'
html_filename = str(Path(html_filename).resolve())
save_figure_to_html(fig, html_filename)
wifi_rssi = extract_wifi_rssi(mwi_datas)
print(f'This floor has {len(wifi_rssi.keys())} wifi aps')
ten_wifi_bssids = list(wifi_rssi.keys())[0:10]
print('Example 10 wifi ap bssids:\n')
for bssid in ten_wifi_bssids:
print(bssid)
target_wifi = input(f"Please input target wifi ap bssid:\n")
# target_wifi = '1e:74:9c:a7:b2:e4'
heat_positions = np.array(list(wifi_rssi[target_wifi].keys()))
heat_values = np.array(list(wifi_rssi[target_wifi].values()))[:, 0]
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='dBm', title=f'Wifi: {target_wifi} RSSI', show=True)
html_filename = f'{wifi_image_save_dir}/{target_wifi.replace(":", "-")}.html'
html_filename = str(Path(html_filename).resolve())
save_figure_to_html(fig, html_filename)
ibeacon_rssi = extract_ibeacon_rssi(mwi_datas)
print(f'This floor has {len(ibeacon_rssi.keys())} ibeacons')
ten_ibeacon_ummids = list(ibeacon_rssi.keys())[0:10]
print('Example 10 ibeacon UUID_MajorID_MinorIDs:\n')
for ummid in ten_ibeacon_ummids:
print(ummid)
target_ibeacon = input(f"Please input target ibeacon UUID_MajorID_MinorID:\n")
# target_ibeacon = 'FDA50693-A4E2-4FB1-AFCF-C6EB07647825_10073_61418'
heat_positions = np.array(list(ibeacon_rssi[target_ibeacon].keys()))
heat_values = np.array(list(ibeacon_rssi[target_ibeacon].values()))[:, 0]
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='dBm', title=f'iBeacon: {target_ibeacon} RSSI', show=True)
html_filename = f'{ibeacon_image_save_dir}/{target_ibeacon}.html'
html_filename = str(Path(html_filename).resolve())
save_figure_to_html(fig, html_filename)
wifi_counts = extract_wifi_count(mwi_datas)
heat_positions = np.array(list(wifi_counts.keys()))
heat_values = np.array(list(wifi_counts.values()))
# filter out positions that no wifi detected
mask = heat_values != 0
heat_positions = heat_positions[mask]
heat_values = heat_values[mask]
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='number', title=f'Wifi Count', show=True)
html_filename = f'{wifi_count_image_save_dir}/wifi_count.html'
html_filename = str(Path(html_filename).resolve())
save_figure_to_html(fig, html_filename)
print('fff')

@ -0,0 +1,132 @@
import plotly.graph_objs as go
from PIL import Image
def save_figure_to_html(fig, filename):
fig.write_html(filename)
def visualize_trajectory(trajectory, floor_plan_filename, width_meter, height_meter, title=None, mode='lines + markers + text', show=False):
fig = go.Figure()
# add trajectory
size_list = [6] * trajectory.shape[0]
size_list[0] = 10
size_list[-1] = 10
color_list = ['rgba(4, 174, 4, 0.5)'] * trajectory.shape[0]
color_list[0] = 'rgba(12, 5, 235, 1)'
color_list[-1] = 'rgba(235, 5, 5, 1)'
position_count = {}
text_list = []
for i in range(trajectory.shape[0]):
if str(trajectory[i]) in position_count:
position_count[str(trajectory[i])] += 1
else:
position_count[str(trajectory[i])] = 0
text_list.append(' ' * position_count[str(trajectory[i])] + f'{i}')
text_list[0] = 'Start Point: 0'
text_list[-1] = f'End Point: {trajectory.shape[0] - 1}'
fig.add_trace(
go.Scattergl(
x=trajectory[:, 0],
y=trajectory[:, 1],
mode=mode,
marker=dict(size=size_list, color=color_list),
line=dict(shape='linear', color='rgb(100, 10, 100)', width=2, dash='dot'),
text=text_list,
textposition="top center",
name='trajectory',
))
# add floor plan
floor_plan = Image.open(floor_plan_filename)
fig.update_layout(images=[
go.layout.Image(
source=floor_plan,
xref="x",
yref="y",
x=0,
y=height_meter,
sizex=width_meter,
sizey=height_meter,
sizing="contain",
opacity=1,
layer="below",
)
])
# configure
fig.update_xaxes(autorange=False, range=[0, width_meter])
fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
fig.update_layout(
title=go.layout.Title(
text=title or "No title.",
xref="paper",
x=0,
),
autosize=True,
width=900,
height=200 + 900 * height_meter / width_meter,
template="plotly_white",
)
if show:
fig.show()
return fig
def visualize_heatmap(position, value, floor_plan_filename, width_meter, height_meter, colorbar_title="colorbar", title=None, show=False):
fig = go.Figure()
# add heat map
fig.add_trace(
go.Scatter(x=position[:, 0],
y=position[:, 1],
mode='markers',
marker=dict(size=7,
color=value,
colorbar=dict(title=colorbar_title),
colorscale="Rainbow"),
text=value,
name=title))
# add floor plan
floor_plan = Image.open(floor_plan_filename)
fig.update_layout(images=[
go.layout.Image(
source=floor_plan,
xref="x",
yref="y",
x=0,
y=height_meter,
sizex=width_meter,
sizey=height_meter,
sizing="contain",
opacity=1,
layer="below",
)
])
# configure
fig.update_xaxes(autorange=False, range=[0, width_meter])
fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
fig.update_layout(
title=go.layout.Title(
text=title or "No title.",
xref="paper",
x=0,
),
autosize=True,
width=900,
height=200 + 900 * height_meter / width_meter,
template="plotly_white",
)
if show:
fig.show()
return fig

@ -0,0 +1,375 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from dask.distributed import wait\n",
"import glob\n",
"\n",
"SENSORS = ['acce','acce_uncali','gyro',\n",
" 'gyro_uncali','magn','magn_uncali','ahrs']\n",
"\n",
"NFEAS = {\n",
" 'acce': 3,\n",
" 'acce_uncali': 3,\n",
" 'gyro': 3,\n",
" 'gyro_uncali': 3,\n",
" 'magn': 3,\n",
" 'magn_uncali': 3,\n",
" 'ahrs': 3,\n",
" 'wifi': 1,\n",
" 'ibeacon': 1,\n",
" 'waypoint': 3\n",
"}\n",
"\n",
"ACOLS = ['timestamp','x','y','z']\n",
" \n",
"FIELDS = {\n",
" 'acce': ACOLS,\n",
" 'acce_uncali': ACOLS,\n",
" 'gyro': ACOLS,\n",
" 'gyro_uncali': ACOLS,\n",
" 'magn': ACOLS,\n",
" 'magn_uncali': ACOLS,\n",
" 'ahrs': ACOLS,\n",
" 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n",
" 'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n",
" 'waypoint': ['timestamp','x','y']\n",
"}\n",
"\n",
"def to_frame(data, col):\n",
" cols = FIELDS[col]\n",
" is_dummy = False\n",
" if data.shape[0]>0:\n",
" df = pd.DataFrame(data, columns=cols)\n",
" else:\n",
" df = create_dummy_df(cols)\n",
" is_dummy = True\n",
" for col in df.columns:\n",
" if 'timestamp' in col:\n",
" df[col] = df[col].astype('int64')\n",
" return df, is_dummy\n",
"\n",
"def create_dummy_df(cols):\n",
" df = pd.DataFrame()\n",
" for col in cols:\n",
" df[col] = [0]\n",
" if col in ['ssid','bssid']:\n",
" df[col] = df[col].map(str)\n",
" return df\n",
"\n",
"from dataclasses import dataclass\n",
"\n",
"import numpy as np\n",
"\n",
"\n",
"@dataclass\n",
"class ReadData:\n",
" acce: np.ndarray\n",
" acce_uncali: np.ndarray\n",
" gyro: np.ndarray\n",
" gyro_uncali: np.ndarray\n",
" magn: np.ndarray\n",
" magn_uncali: np.ndarray\n",
" ahrs: np.ndarray\n",
" wifi: np.ndarray\n",
" ibeacon: np.ndarray\n",
" waypoint: np.ndarray\n",
"\n",
"\n",
"def read_data_file(data_filename):\n",
" acce = []\n",
" acce_uncali = []\n",
" gyro = []\n",
" gyro_uncali = []\n",
" magn = []\n",
" magn_uncali = []\n",
" ahrs = []\n",
" wifi = []\n",
" ibeacon = []\n",
" waypoint = []\n",
"\n",
" with open(data_filename, 'r', encoding='utf-8') as file:\n",
" lines = file.readlines()\n",
"\n",
" for line_data in lines:\n",
" line_data = line_data.strip()\n",
" if not line_data or line_data[0] == '#':\n",
" continue\n",
"\n",
" line_data = line_data.split('\\t')\n",
"\n",
" if line_data[1] == 'TYPE_ACCELEROMETER':\n",
" acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
" acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_GYROSCOPE':\n",
" gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
" gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
" magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
" magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
" if len(line_data)>=5:\n",
" ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_WIFI':\n",
" sys_ts = line_data[0]\n",
" ssid = line_data[2]\n",
" bssid = line_data[3]\n",
" rssi = line_data[4]\n",
" lastseen_ts = line_data[6]\n",
" wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
" wifi.append(wifi_data)\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_BEACON':\n",
" ts = line_data[0]\n",
" uuid = line_data[2]\n",
" major = line_data[3]\n",
" minor = line_data[4]\n",
" rssi = line_data[6]\n",
" lastts = line_data[-1]\n",
" ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n",
" ibeacon.append(ibeacon_data)\n",
" continue\n",
"\n",
" if line_data[1] == 'TYPE_WAYPOINT':\n",
" waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
"\n",
" acce = np.array(acce)\n",
" acce_uncali = np.array(acce_uncali)\n",
" gyro = np.array(gyro)\n",
" gyro_uncali = np.array(gyro_uncali)\n",
" magn = np.array(magn)\n",
" magn_uncali = np.array(magn_uncali)\n",
" ahrs = np.array(ahrs)\n",
" wifi = np.array(wifi)\n",
" ibeacon = np.array(ibeacon)\n",
" waypoint = np.array(waypoint)\n",
"\n",
" return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def get_test_dfs(PATH, test_files):\n",
" dtest = get_test_df(PATH)\n",
" buildings = set(dtest['building'].values.tolist())\n",
" dws = {}\n",
" ntest_files = []\n",
" for fname in tqdm(test_files):\n",
" path = fname.split('/')[-1].split('.')[0]\n",
" mask = dtest['path'] == path\n",
" dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n",
" ntest_files.append(fname)\n",
" return dws\n",
"\n",
"def get_test_df(PATH):\n",
" dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n",
" dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n",
" dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n",
" dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n",
" dtest['timestamp'] = dtest['timestamp'].astype('int64')\n",
" dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n",
" return dtest\n",
"\n",
"def get_time_gap(name):\n",
" data = read_data_file(name)\n",
" db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n",
"# print(db,no_ibeacon)\n",
" \n",
" if no_ibeacon==0:\n",
" gap = db['last_timestamp'] - db['timestamp']\n",
" assert gap.unique().shape[0]==1\n",
" return gap.values[0],no_ibeacon\n",
" \n",
" if no_ibeacon==1:\n",
" # Group wifis by timestamp\n",
" wifi_groups = pd.DataFrame(data.wifi).groupby(0) \n",
" # Find which one is the most recent of all time points.\n",
" est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n",
" return est_ts,no_ibeacon\n",
"\n",
" \n",
"\n",
"def fix_timestamp_test(df, gap):\n",
" df['real_timestamp'] = df['timestamp'] + gap\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n",
" '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n",
" '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n",
" '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n",
"test_files_ori[:4]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ec2-user/anaconda3/lib/python3.7/site-packages/distributed/dashboard/core.py:79: UserWarning: \n",
"Port 8787 is already in use. \n",
"Perhaps you already have a cluster running?\n",
"Hosting the diagnostics dashboard on a random port instead.\n",
" warnings.warn(\"\\n\" + msg)\n"
]
},
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:42097</li>\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:39155/status' target='_blank'>http://127.0.0.1:39155/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>8</li>\n",
" <li><b>Cores: </b>8</li>\n",
" <li><b>Memory: </b>32.89 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://127.0.0.1:42097' processes=8 threads=8, memory=32.89 GB>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import dask\n",
"from dask.distributed import Client, wait, LocalCluster\n",
"\n",
"# set n_workers to number of cores\n",
"client = Client(n_workers=8, \n",
" threads_per_worker=1)\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 626/626 [00:00<00:00, 4552.03it/s]\n",
"100%|██████████| 626/626 [00:16<00:00, 37.39it/s] \n"
]
}
],
"source": [
"futures = []\n",
"for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n",
" f = client.submit(get_time_gap,fname)\n",
" futures.append(f)\n",
" \n",
"testpath2gap = {}\n",
"for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n",
" testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"with open('testpath2gap.pkl','wb') as f:\n",
" pickle.dump(testpath2gap,f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,361 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.007463,
"end_time": "2021-02-03T20:30:06.571139",
"exception": false,
"start_time": "2021-02-03T20:30:06.563676",
"status": "completed"
},
"tags": []
},
"source": [
"### Wifi features\n",
"\n",
"This this is the code to generate the wifi features available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). Using these features can get a score below 14. For an example notebook using them see [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter). They only uses waypoints, wifi and timestamp data to generate solution. See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for an outline of this solution method, and methods of improvement.\n",
"\n",
"There are `break`'s inserted into loops which need to be removed to get this to run. Right now data is written to current working directory. This takes 2-4 hours to run depending on hard drive etc. There is a lot of room for improvement speeding up feature generation. \n",
"\n",
"**Update:** I added one line that creates a column for the path filename, this allows for a groupkfold crossvalidation. \n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2021-02-03T20:30:06.590945Z",
"iopub.status.busy": "2021-02-03T20:30:06.589984Z",
"iopub.status.idle": "2021-02-03T20:30:06.593594Z",
"shell.execute_reply": "2021-02-03T20:30:06.592887Z"
},
"papermill": {
"duration": 0.01623,
"end_time": "2021-02-03T20:30:06.593847",
"exception": false,
"start_time": "2021-02-03T20:30:06.577617",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import glob\n",
"import os\n",
"import gc\n",
"import json "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:06.614521Z",
"iopub.status.busy": "2021-02-03T20:30:06.613572Z",
"iopub.status.idle": "2021-02-03T20:30:06.616669Z",
"shell.execute_reply": "2021-02-03T20:30:06.616121Z"
},
"papermill": {
"duration": 0.015585,
"end_time": "2021-02-03T20:30:06.616837",
"exception": false,
"start_time": "2021-02-03T20:30:06.601252",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"base_path = '../input/indoor-location-navigation/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:06.639011Z",
"iopub.status.busy": "2021-02-03T20:30:06.638118Z",
"iopub.status.idle": "2021-02-03T20:30:09.333807Z",
"shell.execute_reply": "2021-02-03T20:30:09.334360Z"
},
"papermill": {
"duration": 2.711076,
"end_time": "2021-02-03T20:30:09.334617",
"exception": false,
"start_time": "2021-02-03T20:30:06.623541",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
"ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
"\n",
"# only 24 of the total buildings are used in the test set, \n",
"# this allows us to greatly reduce the intial size of the dataset\n",
"\n",
"ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
"used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
"\n",
"# dictionary used to map the floor codes to the values used in the submission file. \n",
"floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
" \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.359905Z",
"iopub.status.busy": "2021-02-03T20:30:09.359123Z",
"iopub.status.idle": "2021-02-03T20:30:09.362909Z",
"shell.execute_reply": "2021-02-03T20:30:09.362224Z"
},
"papermill": {
"duration": 0.021272,
"end_time": "2021-02-03T20:30:09.363069",
"exception": false,
"start_time": "2021-02-03T20:30:09.341797",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
"# these will be the only ones used when constructing features\n",
"bssid = dict()\n",
"\n",
"for building in used_buildings:\n",
" break\n",
" folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
" print(building)\n",
" wifi = list()\n",
" for folder in folders:\n",
" floor = floor_map[folder.split('/')[-1]]\n",
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
" for file in files:\n",
" with open(file) as f:\n",
" txt = f.readlines()\n",
" for e, line in enumerate(txt):\n",
" tmp = line.strip().split()\n",
" if tmp[1] == \"TYPE_WIFI\":\n",
" wifi.append(tmp)\n",
" df = pd.DataFrame(wifi)\n",
" #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
" value_counts = df[3].value_counts()\n",
" top_bssid = value_counts[value_counts > 0].index.tolist()\n",
" print(len(top_bssid))\n",
" bssid[building] = top_bssid\n",
" del df\n",
" del wifi\n",
" gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.383252Z",
"iopub.status.busy": "2021-02-03T20:30:09.382581Z",
"iopub.status.idle": "2021-02-03T20:30:09.386704Z",
"shell.execute_reply": "2021-02-03T20:30:09.385809Z"
},
"papermill": {
"duration": 0.016635,
"end_time": "2021-02-03T20:30:09.386885",
"exception": false,
"start_time": "2021-02-03T20:30:09.370250",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"with open(\"bssid_1000.json\", \"w\") as f:\n",
" json.dump(bssid, f)\n",
"\n",
"with open(\"bssid_1000.json\") as f:\n",
" bssid = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.418284Z",
"iopub.status.busy": "2021-02-03T20:30:09.417119Z",
"iopub.status.idle": "2021-02-03T20:30:09.420513Z",
"shell.execute_reply": "2021-02-03T20:30:09.419767Z"
},
"papermill": {
"duration": 0.026514,
"end_time": "2021-02-03T20:30:09.420694",
"exception": false,
"start_time": "2021-02-03T20:30:09.394180",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# generate all the training data \n",
"building_dfs = dict()\n",
"\n",
"for building in used_buildings:\n",
" break\n",
" folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
" dfs = list()\n",
" index = sorted(bssid[building])\n",
" print(building)\n",
" for folder in folders:\n",
" floor = floor_map[folder.split('/')[-1]]\n",
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
" print(floor)\n",
" for file in files:\n",
" wifi = list()\n",
" waypoint = list()\n",
" with open(file) as f:\n",
" txt = f.readlines()\n",
" for line in txt:\n",
" line = line.strip().split()\n",
" if line[1] == \"TYPE_WAYPOINT\":\n",
" waypoint.append(line)\n",
" if line[1] == \"TYPE_WIFI\":\n",
" wifi.append(line)\n",
"\n",
" df = pd.DataFrame(np.array(wifi)) \n",
"\n",
" # generate a feature, and label for each wifi block\n",
" for gid, g in df.groupby(0):\n",
" dists = list()\n",
" for e, k in enumerate(waypoint):\n",
" dist = abs(int(gid) - int(k[0]))\n",
" dists.append(dist)\n",
" nearest_wp_index = np.argmin(dists)\n",
" \n",
" g = g.drop_duplicates(subset=3)\n",
" tmp = g.iloc[:,3:5]\n",
" feat = tmp.set_index(3).reindex(index).replace(np.nan, -999).T\n",
" feat[\"x\"] = float(waypoint[nearest_wp_index][2])\n",
" feat[\"y\"] = float(waypoint[nearest_wp_index][3])\n",
" feat[\"f\"] = floor\n",
" feat[\"path\"] = file.split('/')[-1].split('.')[0] # useful for crossvalidation\n",
" dfs.append(feat)\n",
" \n",
" building_df = pd.concat(dfs)\n",
" building_dfs[building] = df\n",
" building_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+building+\"_train.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-03T20:30:09.454304Z",
"iopub.status.busy": "2021-02-03T20:30:09.451093Z",
"iopub.status.idle": "2021-02-03T20:30:09.464308Z",
"shell.execute_reply": "2021-02-03T20:30:09.464854Z"
},
"papermill": {
"duration": 0.036471,
"end_time": "2021-02-03T20:30:09.465079",
"exception": false,
"start_time": "2021-02-03T20:30:09.428608",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# Generate the features for the test set\n",
"\n",
"ssubm_building_g = ssubm_df.groupby(0)\n",
"feature_dict = dict()\n",
"\n",
"for gid0, g0 in ssubm_building_g:\n",
" break\n",
" index = sorted(bssid[g0.iloc[0,0]])\n",
" feats = list()\n",
" print(gid0)\n",
" for gid,g in g0.groupby(1):\n",
"\n",
" # get all wifi time locations, \n",
" with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
" txt = f.readlines()\n",
"\n",
" wifi = list()\n",
"\n",
" for line in txt:\n",
" line = line.strip().split()\n",
" if line[1] == \"TYPE_WIFI\":\n",
" wifi.append(line)\n",
"\n",
" wifi_df = pd.DataFrame(wifi)\n",
" wifi_points = pd.DataFrame(wifi_df.groupby(0).count().index.tolist())\n",
" \n",
" for timepoint in g.iloc[:,2].tolist():\n",
"\n",
" deltas = (wifi_points.astype(int) - int(timepoint)).abs()\n",
" min_delta_idx = deltas.values.argmin()\n",
" wifi_block_timestamp = wifi_points.iloc[min_delta_idx].values[0]\n",
" \n",
" wifi_block = wifi_df[wifi_df[0] == wifi_block_timestamp].drop_duplicates(subset=3)\n",
" feat = wifi_block.set_index(3)[4].reindex(index).fillna(-999)\n",
"\n",
" feat['site_path_timestamp'] = g.iloc[0,0] + \"_\" + g.iloc[0,1] + \"_\" + timepoint\n",
" feats.append(feat)\n",
" feature_df = pd.concat(feats, axis=1).T\n",
" feature_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+gid0+\"_test.csv\")\n",
" feature_dict[gid0] = feature_df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"papermill": {
"default_parameters": {},
"duration": 9.894085,
"end_time": "2021-02-03T20:30:10.083699",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2021-02-03T20:30:00.189614",
"version": "2.2.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading…
Cancel
Save