parent
1f5a7e770c
commit
caa5ff1701
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,122 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"ss1 = pd.read_csv('submission_wifi.csv')\n",
|
||||
"ss2 = pd.read_csv('submission_wifi_sensor.csv')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1. , 0.99816888],\n",
|
||||
" [0.99816888, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.corrcoef([ss1.y,ss2.y])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1. , 0.99853603],\n",
|
||||
" [0.99853603, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.corrcoef([ss1.x,ss2.x])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss = ss1.copy()\n",
|
||||
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
|
||||
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss.to_csv('sub_wifi_sensor.csv',index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1. , 0.99717624],\n",
|
||||
" [0.99717624, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ss1 = pd.read_csv('sub_wifi_sensor_post.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
|
||||
"ss2 = pd.read_csv('submission_ym.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
|
||||
"np.corrcoef([ss1.x,ss2.x])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss = ss1.copy()\n",
|
||||
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
|
||||
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss.to_csv('final.csv',index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -0,0 +1,361 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"papermill": {
|
||||
"duration": 0.007463,
|
||||
"end_time": "2021-02-03T20:30:06.571139",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.563676",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Wifi features\n",
|
||||
"\n",
|
||||
"This this is the code to generate the wifi features available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). Using these features can get a score below 14. For an example notebook using them see [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter). They only uses waypoints, wifi and timestamp data to generate solution. See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for an outline of this solution method, and methods of improvement.\n",
|
||||
"\n",
|
||||
"There are `break`'s inserted into loops which need to be removed to get this to run. Right now data is written to current working directory. This takes 2-4 hours to run depending on hard drive etc. There is a lot of room for improvement speeding up feature generation. \n",
|
||||
"\n",
|
||||
"**Update:** I added one line that creates a column for the path filename, this allows for a groupkfold crossvalidation. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
|
||||
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:06.590945Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:06.589984Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:06.593594Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:06.592887Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.01623,
|
||||
"end_time": "2021-02-03T20:30:06.593847",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.577617",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import glob\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import json "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:06.614521Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:06.613572Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:06.616669Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:06.616121Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.015585,
|
||||
"end_time": "2021-02-03T20:30:06.616837",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.601252",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_path = '../input/indoor-location-navigation/'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:06.639011Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:06.638118Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.333807Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.334360Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 2.711076,
|
||||
"end_time": "2021-02-03T20:30:09.334617",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.623541",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
|
||||
"ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
|
||||
"\n",
|
||||
"# only 24 of the total buildings are used in the test set, \n",
|
||||
"# this allows us to greatly reduce the intial size of the dataset\n",
|
||||
"\n",
|
||||
"ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
|
||||
"used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
|
||||
"\n",
|
||||
"# dictionary used to map the floor codes to the values used in the submission file. \n",
|
||||
"floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
|
||||
" \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.359905Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.359123Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.362909Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.362224Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.021272,
|
||||
"end_time": "2021-02-03T20:30:09.363069",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.341797",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
|
||||
"# these will be the only ones used when constructing features\n",
|
||||
"bssid = dict()\n",
|
||||
"\n",
|
||||
"for building in used_buildings:\n",
|
||||
" break\n",
|
||||
" folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
|
||||
" print(building)\n",
|
||||
" wifi = list()\n",
|
||||
" for folder in folders:\n",
|
||||
" floor = floor_map[folder.split('/')[-1]]\n",
|
||||
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
|
||||
" for file in files:\n",
|
||||
" with open(file) as f:\n",
|
||||
" txt = f.readlines()\n",
|
||||
" for e, line in enumerate(txt):\n",
|
||||
" tmp = line.strip().split()\n",
|
||||
" if tmp[1] == \"TYPE_WIFI\":\n",
|
||||
" wifi.append(tmp)\n",
|
||||
" df = pd.DataFrame(wifi)\n",
|
||||
" #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
|
||||
" value_counts = df[3].value_counts()\n",
|
||||
" top_bssid = value_counts[value_counts > 0].index.tolist()\n",
|
||||
" print(len(top_bssid))\n",
|
||||
" bssid[building] = top_bssid\n",
|
||||
" del df\n",
|
||||
" del wifi\n",
|
||||
" gc.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.383252Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.382581Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.386704Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.385809Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.016635,
|
||||
"end_time": "2021-02-03T20:30:09.386885",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.370250",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"bssid_1000.json\", \"w\") as f:\n",
|
||||
" json.dump(bssid, f)\n",
|
||||
"\n",
|
||||
"with open(\"bssid_1000.json\") as f:\n",
|
||||
" bssid = json.load(f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.418284Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.417119Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.420513Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.419767Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.026514,
|
||||
"end_time": "2021-02-03T20:30:09.420694",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.394180",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# generate all the training data \n",
|
||||
"building_dfs = dict()\n",
|
||||
"\n",
|
||||
"for building in used_buildings:\n",
|
||||
" break\n",
|
||||
" folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
|
||||
" dfs = list()\n",
|
||||
" index = sorted(bssid[building])\n",
|
||||
" print(building)\n",
|
||||
" for folder in folders:\n",
|
||||
" floor = floor_map[folder.split('/')[-1]]\n",
|
||||
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
|
||||
" print(floor)\n",
|
||||
" for file in files:\n",
|
||||
" wifi = list()\n",
|
||||
" waypoint = list()\n",
|
||||
" with open(file) as f:\n",
|
||||
" txt = f.readlines()\n",
|
||||
" for line in txt:\n",
|
||||
" line = line.strip().split()\n",
|
||||
" if line[1] == \"TYPE_WAYPOINT\":\n",
|
||||
" waypoint.append(line)\n",
|
||||
" if line[1] == \"TYPE_WIFI\":\n",
|
||||
" wifi.append(line)\n",
|
||||
"\n",
|
||||
" df = pd.DataFrame(np.array(wifi)) \n",
|
||||
"\n",
|
||||
" # generate a feature, and label for each wifi block\n",
|
||||
" for gid, g in df.groupby(0):\n",
|
||||
" dists = list()\n",
|
||||
" for e, k in enumerate(waypoint):\n",
|
||||
" dist = abs(int(gid) - int(k[0]))\n",
|
||||
" dists.append(dist)\n",
|
||||
" nearest_wp_index = np.argmin(dists)\n",
|
||||
" \n",
|
||||
" g = g.drop_duplicates(subset=3)\n",
|
||||
" tmp = g.iloc[:,3:5]\n",
|
||||
" feat = tmp.set_index(3).reindex(index).replace(np.nan, -999).T\n",
|
||||
" feat[\"x\"] = float(waypoint[nearest_wp_index][2])\n",
|
||||
" feat[\"y\"] = float(waypoint[nearest_wp_index][3])\n",
|
||||
" feat[\"f\"] = floor\n",
|
||||
" feat[\"path\"] = file.split('/')[-1].split('.')[0] # useful for crossvalidation\n",
|
||||
" dfs.append(feat)\n",
|
||||
" \n",
|
||||
" building_df = pd.concat(dfs)\n",
|
||||
" building_dfs[building] = df\n",
|
||||
" building_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+building+\"_train.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.454304Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.451093Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.464308Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.464854Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.036471,
|
||||
"end_time": "2021-02-03T20:30:09.465079",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.428608",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generate the features for the test set\n",
|
||||
"\n",
|
||||
"ssubm_building_g = ssubm_df.groupby(0)\n",
|
||||
"feature_dict = dict()\n",
|
||||
"\n",
|
||||
"for gid0, g0 in ssubm_building_g:\n",
|
||||
" break\n",
|
||||
" index = sorted(bssid[g0.iloc[0,0]])\n",
|
||||
" feats = list()\n",
|
||||
" print(gid0)\n",
|
||||
" for gid,g in g0.groupby(1):\n",
|
||||
"\n",
|
||||
" # get all wifi time locations, \n",
|
||||
" with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
|
||||
" txt = f.readlines()\n",
|
||||
"\n",
|
||||
" wifi = list()\n",
|
||||
"\n",
|
||||
" for line in txt:\n",
|
||||
" line = line.strip().split()\n",
|
||||
" if line[1] == \"TYPE_WIFI\":\n",
|
||||
" wifi.append(line)\n",
|
||||
"\n",
|
||||
" wifi_df = pd.DataFrame(wifi)\n",
|
||||
" wifi_points = pd.DataFrame(wifi_df.groupby(0).count().index.tolist())\n",
|
||||
" \n",
|
||||
" for timepoint in g.iloc[:,2].tolist():\n",
|
||||
"\n",
|
||||
" deltas = (wifi_points.astype(int) - int(timepoint)).abs()\n",
|
||||
" min_delta_idx = deltas.values.argmin()\n",
|
||||
" wifi_block_timestamp = wifi_points.iloc[min_delta_idx].values[0]\n",
|
||||
" \n",
|
||||
" wifi_block = wifi_df[wifi_df[0] == wifi_block_timestamp].drop_duplicates(subset=3)\n",
|
||||
" feat = wifi_block.set_index(3)[4].reindex(index).fillna(-999)\n",
|
||||
"\n",
|
||||
" feat['site_path_timestamp'] = g.iloc[0,0] + \"_\" + g.iloc[0,1] + \"_\" + timepoint\n",
|
||||
" feats.append(feat)\n",
|
||||
" feature_df = pd.concat(feats, axis=1).T\n",
|
||||
" feature_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+gid0+\"_test.csv\")\n",
|
||||
" feature_dict[gid0] = feature_df"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
},
|
||||
"papermill": {
|
||||
"default_parameters": {},
|
||||
"duration": 9.894085,
|
||||
"end_time": "2021-02-03T20:30:10.083699",
|
||||
"environment_variables": {},
|
||||
"exception": null,
|
||||
"input_path": "__notebook__.ipynb",
|
||||
"output_path": "__notebook__.ipynb",
|
||||
"parameters": {},
|
||||
"start_time": "2021-02-03T20:30:00.189614",
|
||||
"version": "2.2.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,122 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"ss1 = pd.read_csv('submission_wifi.csv')\n",
|
||||
"ss2 = pd.read_csv('submission_wifi_sensor.csv')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1. , 0.99816888],\n",
|
||||
" [0.99816888, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.corrcoef([ss1.y,ss2.y])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1. , 0.99853603],\n",
|
||||
" [0.99853603, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.corrcoef([ss1.x,ss2.x])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss = ss1.copy()\n",
|
||||
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
|
||||
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss.to_csv('sub_wifi_sensor.csv',index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1. , 0.99717624],\n",
|
||||
" [0.99717624, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ss1 = pd.read_csv('sub_wifi_sensor_post.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
|
||||
"ss2 = pd.read_csv('submission_ym.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
|
||||
"np.corrcoef([ss1.x,ss2.x])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss = ss1.copy()\n",
|
||||
"ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
|
||||
"ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ss.to_csv('final.csv',index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,198 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"There are 24 buildings in the testing set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>file</th>\n",
|
||||
" <th>building</th>\n",
|
||||
" <th>site</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>../input/indoor-location-navigation/test//00ff...</td>\n",
|
||||
" <td>5da1389e4db8ce0c98bd0547</td>\n",
|
||||
" <td>SiteName:和达城商场</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>../input/indoor-location-navigation/test//01c4...</td>\n",
|
||||
" <td>5da138b74db8ce0c98bd4774</td>\n",
|
||||
" <td>SiteName:万象城</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>../input/indoor-location-navigation/test//030b...</td>\n",
|
||||
" <td>5da138764db8ce0c98bcaa46</td>\n",
|
||||
" <td>SiteName:银泰百货</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>../input/indoor-location-navigation/test//0389...</td>\n",
|
||||
" <td>5dbc1d84c1eb61796cf7c010</td>\n",
|
||||
" <td>SiteName:杭州大悦城</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>../input/indoor-location-navigation/test//0402...</td>\n",
|
||||
" <td>5da1383b4db8ce0c98bc11ab</td>\n",
|
||||
" <td>SiteName:永旺梦乐城</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" file \\\n",
|
||||
"0 ../input/indoor-location-navigation/test//00ff... \n",
|
||||
"1 ../input/indoor-location-navigation/test//01c4... \n",
|
||||
"2 ../input/indoor-location-navigation/test//030b... \n",
|
||||
"3 ../input/indoor-location-navigation/test//0389... \n",
|
||||
"4 ../input/indoor-location-navigation/test//0402... \n",
|
||||
"\n",
|
||||
" building site \n",
|
||||
"0 5da1389e4db8ce0c98bd0547 SiteName:和达城商场 \n",
|
||||
"1 5da138b74db8ce0c98bd4774 SiteName:万象城 \n",
|
||||
"2 5da138764db8ce0c98bcaa46 SiteName:银泰百货 \n",
|
||||
"3 5dbc1d84c1eb61796cf7c010 SiteName:杭州大悦城 \n",
|
||||
"4 5da1383b4db8ce0c98bc11ab SiteName:永旺梦乐城 "
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np # linear algebra\n",
|
||||
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
|
||||
"\n",
|
||||
"# Input data files are available in the read-only \"../input/\" directory\n",
|
||||
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"# Prepare paths:\n",
|
||||
"import glob\n",
|
||||
"from pathlib import Path\n",
|
||||
"inpath = '../input/indoor-location-navigation/'\n",
|
||||
"metapath = inpath + 'metadata/'\n",
|
||||
"trainpath = inpath + 'train/'\n",
|
||||
"testpath = inpath + 'test/'\n",
|
||||
"\n",
|
||||
"# Extract testing files, buildings and sites:\n",
|
||||
"os.system(f'grep SiteID {testpath}/* > test_buildings.txt' )\n",
|
||||
"test_buildings = pd.read_csv('test_buildings.txt',sep='\\t',header=None,names=['file','building','site'])\n",
|
||||
"test_buildings['file'] = test_buildings['file'].apply(lambda x: x[:-2])\n",
|
||||
"test_buildings['building'] = test_buildings['building'].apply(lambda x: x[7:])\n",
|
||||
"\n",
|
||||
"# How many buildings in the testing set?\n",
|
||||
"buildings = np.unique(test_buildings['building'])\n",
|
||||
"print('There are',len(buildings),'buildings in the testing set.')\n",
|
||||
"\n",
|
||||
"test_buildings.head()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Compile C++ pre-processing code:\n",
|
||||
"er=os.system(\"g++ ../input/indoor-cpp/1_preprocess.cpp -std=c++11 -o preprocess\")\n",
|
||||
"if(er): print(\"Error\")\n",
|
||||
"\n",
|
||||
"# Reformat the testing set:\n",
|
||||
"os.system('mkdir test')\n",
|
||||
"for i,(path_filename,building) in enumerate(zip(test_buildings['file'],test_buildings['building'])):\n",
|
||||
" er=os.system(f'./preprocess {path_filename} test {building} {0}') #since we do not know the floor, I put 0.\n",
|
||||
" if(er): print(\"Error:\",path_filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Acceleration, magnetic and orientation testing data:\n",
|
||||
"os.system('mkdir indoor_testing_accel')\n",
|
||||
"os.system(\"g++ ../input/indoor-cpp/2_preprocess_accel.cpp -std=c++11 -o preprocess_accel\")\n",
|
||||
"for building in buildings:\n",
|
||||
" os.system(f'./preprocess_accel {building}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Wifi testing data:\n",
|
||||
"os.system('mkdir test_wifi')\n",
|
||||
"os.system(\"g++ /kaggle/input/indoor-cpp/2_preprocess_wifi.cpp -std=c++11 -o preprocess_wifi\")\n",
|
||||
"for building in buildings:\n",
|
||||
" os.system(f'./preprocess_wifi {building}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017-2020 XYZ10, Inc. https://dangwu.com/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,361 @@
|
||||
import numpy as np
|
||||
import scipy.signal as signal
|
||||
|
||||
|
||||
def split_ts_seq(ts_seq, sep_ts):
|
||||
"""
|
||||
|
||||
:param ts_seq:
|
||||
:param sep_ts:
|
||||
:return:
|
||||
"""
|
||||
tss = ts_seq[:, 0].astype(float)
|
||||
unique_sep_ts = np.unique(sep_ts)
|
||||
ts_seqs = []
|
||||
start_index = 0
|
||||
for i in range(0, unique_sep_ts.shape[0]):
|
||||
end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')
|
||||
if start_index == end_index:
|
||||
continue
|
||||
ts_seqs.append(ts_seq[start_index:end_index, :].copy())
|
||||
start_index = end_index
|
||||
|
||||
# tail data
|
||||
if start_index < ts_seq.shape[0]:
|
||||
ts_seqs.append(ts_seq[start_index:, :].copy())
|
||||
|
||||
return ts_seqs
|
||||
|
||||
|
||||
def correct_trajectory(original_xys, end_xy):
|
||||
"""
|
||||
|
||||
:param original_xys: numpy ndarray, shape(N, 2)
|
||||
:param end_xy: numpy ndarray, shape(1, 2)
|
||||
:return:
|
||||
"""
|
||||
corrected_xys = np.zeros((0, 2))
|
||||
|
||||
A = original_xys[0, :]
|
||||
B = end_xy
|
||||
Bp = original_xys[-1, :]
|
||||
|
||||
angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])
|
||||
angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])
|
||||
angle_BpAB = angle_BpAX - angle_BAX
|
||||
AB = np.sqrt(np.sum((B - A) ** 2))
|
||||
ABp = np.sqrt(np.sum((Bp - A) ** 2))
|
||||
|
||||
corrected_xys = np.append(corrected_xys, [A], 0)
|
||||
for i in np.arange(1, np.size(original_xys, 0)):
|
||||
angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])
|
||||
|
||||
angle_CAX = angle_CpAX - angle_BpAB
|
||||
|
||||
ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))
|
||||
|
||||
AC = ACp * AB / ABp
|
||||
|
||||
delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])
|
||||
|
||||
C = delta_C + A
|
||||
|
||||
corrected_xys = np.append(corrected_xys, [C], 0)
|
||||
|
||||
return corrected_xys
|
||||
|
||||
|
||||
def correct_positions(rel_positions, reference_positions):
|
||||
"""
|
||||
|
||||
:param rel_positions:
|
||||
:param reference_positions:
|
||||
:return:
|
||||
"""
|
||||
rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])
|
||||
if len(rel_positions_list) != reference_positions.shape[0] - 1:
|
||||
# print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')
|
||||
del rel_positions_list[-1]
|
||||
assert len(rel_positions_list) == reference_positions.shape[0] - 1
|
||||
|
||||
corrected_positions = np.zeros((0, 3))
|
||||
for i, rel_ps in enumerate(rel_positions_list):
|
||||
start_position = reference_positions[i]
|
||||
end_position = reference_positions[i + 1]
|
||||
abs_ps = np.zeros(rel_ps.shape)
|
||||
abs_ps[:, 0] = rel_ps[:, 0]
|
||||
# abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]
|
||||
abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]
|
||||
for j in range(1, rel_ps.shape[0]):
|
||||
abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]
|
||||
abs_ps = np.insert(abs_ps, 0, start_position, axis=0)
|
||||
corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])
|
||||
corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))
|
||||
if i == 0:
|
||||
corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)
|
||||
else:
|
||||
corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)
|
||||
|
||||
corrected_positions = np.array(corrected_positions)
|
||||
|
||||
return corrected_positions
|
||||
|
||||
|
||||
def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):
|
||||
order = 4
|
||||
filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)
|
||||
zf = signal.lfilter_zi(filter_b, filter_a)
|
||||
_, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
|
||||
_, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
|
||||
|
||||
return filter_b, filter_a, filter_zf
|
||||
|
||||
|
||||
def get_rotation_matrix_from_vector(rotation_vector):
|
||||
q1 = rotation_vector[0]
|
||||
q2 = rotation_vector[1]
|
||||
q3 = rotation_vector[2]
|
||||
|
||||
if rotation_vector.size >= 4:
|
||||
q0 = rotation_vector[3]
|
||||
else:
|
||||
q0 = 1 - q1*q1 - q2*q2 - q3*q3
|
||||
if q0 > 0:
|
||||
q0 = np.sqrt(q0)
|
||||
else:
|
||||
q0 = 0
|
||||
|
||||
sq_q1 = 2 * q1 * q1
|
||||
sq_q2 = 2 * q2 * q2
|
||||
sq_q3 = 2 * q3 * q3
|
||||
q1_q2 = 2 * q1 * q2
|
||||
q3_q0 = 2 * q3 * q0
|
||||
q1_q3 = 2 * q1 * q3
|
||||
q2_q0 = 2 * q2 * q0
|
||||
q2_q3 = 2 * q2 * q3
|
||||
q1_q0 = 2 * q1 * q0
|
||||
|
||||
R = np.zeros((9,))
|
||||
if R.size == 9:
|
||||
R[0] = 1 - sq_q2 - sq_q3
|
||||
R[1] = q1_q2 - q3_q0
|
||||
R[2] = q1_q3 + q2_q0
|
||||
|
||||
R[3] = q1_q2 + q3_q0
|
||||
R[4] = 1 - sq_q1 - sq_q3
|
||||
R[5] = q2_q3 - q1_q0
|
||||
|
||||
R[6] = q1_q3 - q2_q0
|
||||
R[7] = q2_q3 + q1_q0
|
||||
R[8] = 1 - sq_q1 - sq_q2
|
||||
|
||||
R = np.reshape(R, (3, 3))
|
||||
elif R.size == 16:
|
||||
R[0] = 1 - sq_q2 - sq_q3
|
||||
R[1] = q1_q2 - q3_q0
|
||||
R[2] = q1_q3 + q2_q0
|
||||
R[3] = 0.0
|
||||
|
||||
R[4] = q1_q2 + q3_q0
|
||||
R[5] = 1 - sq_q1 - sq_q3
|
||||
R[6] = q2_q3 - q1_q0
|
||||
R[7] = 0.0
|
||||
|
||||
R[8] = q1_q3 - q2_q0
|
||||
R[9] = q2_q3 + q1_q0
|
||||
R[10] = 1 - sq_q1 - sq_q2
|
||||
R[11] = 0.0
|
||||
|
||||
R[12] = R[13] = R[14] = 0.0
|
||||
R[15] = 1.0
|
||||
|
||||
R = np.reshape(R, (4, 4))
|
||||
|
||||
return R
|
||||
|
||||
|
||||
def get_orientation(R):
|
||||
flat_R = R.flatten()
|
||||
values = np.zeros((3,))
|
||||
if np.size(flat_R) == 9:
|
||||
values[0] = np.arctan2(flat_R[1], flat_R[4])
|
||||
values[1] = np.arcsin(-flat_R[7])
|
||||
values[2] = np.arctan2(-flat_R[6], flat_R[8])
|
||||
else:
|
||||
values[0] = np.arctan2(flat_R[1], flat_R[5])
|
||||
values[1] = np.arcsin(-flat_R[9])
|
||||
values[2] = np.arctan2(-flat_R[8], flat_R[10])
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def compute_steps(acce_datas):
|
||||
step_timestamps = np.array([])
|
||||
step_indexs = np.array([], dtype=int)
|
||||
step_acce_max_mins = np.zeros((0, 4))
|
||||
sample_freq = 50
|
||||
window_size = 22
|
||||
low_acce_mag = 0.6
|
||||
step_criterion = 1
|
||||
interval_threshold = 250
|
||||
|
||||
acce_max = np.zeros((2,))
|
||||
acce_min = np.zeros((2,))
|
||||
acce_binarys = np.zeros((window_size,), dtype=int)
|
||||
acce_mag_pre = 0
|
||||
state_flag = 0
|
||||
|
||||
warmup_data = np.ones((window_size,)) * 9.81
|
||||
filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)
|
||||
acce_mag_window = np.zeros((window_size, 1))
|
||||
|
||||
# detect steps according to acceleration magnitudes
|
||||
for i in np.arange(0, np.size(acce_datas, 0)):
|
||||
acce_data = acce_datas[i, :]
|
||||
acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))
|
||||
|
||||
acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)
|
||||
acce_mag_filt = acce_mag_filt[0]
|
||||
|
||||
acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])
|
||||
acce_mag_window = np.delete(acce_mag_window, 0)
|
||||
mean_gravity = np.mean(acce_mag_window)
|
||||
acce_std = np.std(acce_mag_window)
|
||||
mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])
|
||||
|
||||
# detect valid peak or valley of acceleration magnitudes
|
||||
acce_mag_filt_detrend = acce_mag_filt - mean_gravity
|
||||
if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):
|
||||
# peak
|
||||
acce_binarys = np.append(acce_binarys, [1])
|
||||
acce_binarys = np.delete(acce_binarys, 0)
|
||||
elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):
|
||||
# valley
|
||||
acce_binarys = np.append(acce_binarys, [-1])
|
||||
acce_binarys = np.delete(acce_binarys, 0)
|
||||
else:
|
||||
# between peak and valley
|
||||
acce_binarys = np.append(acce_binarys, [0])
|
||||
acce_binarys = np.delete(acce_binarys, 0)
|
||||
|
||||
if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):
|
||||
if state_flag == 0:
|
||||
acce_max[:] = acce_data[0], acce_mag_filt
|
||||
state_flag = 1
|
||||
elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (
|
||||
acce_mag_filt > acce_max[1]):
|
||||
acce_max[:] = acce_data[0], acce_mag_filt
|
||||
elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):
|
||||
acce_max[:] = acce_data[0], acce_mag_filt
|
||||
state_flag = 1
|
||||
|
||||
# choose reasonable step criterion and check if there is a valid step
|
||||
# save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]
|
||||
step_flag = False
|
||||
if step_criterion == 2:
|
||||
if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):
|
||||
step_flag = True
|
||||
elif step_criterion == 3:
|
||||
if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):
|
||||
step_flag = True
|
||||
else:
|
||||
if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:
|
||||
if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):
|
||||
acce_min[:] = acce_data[0], acce_mag_filt
|
||||
state_flag = 2
|
||||
step_flag = True
|
||||
elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (
|
||||
acce_mag_filt < acce_min[1]):
|
||||
acce_min[:] = acce_data[0], acce_mag_filt
|
||||
if step_flag:
|
||||
step_timestamps = np.append(step_timestamps, acce_data[0])
|
||||
step_indexs = np.append(step_indexs, [i])
|
||||
step_acce_max_mins = np.append(step_acce_max_mins,
|
||||
[[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)
|
||||
acce_mag_pre = acce_mag_filt_detrend
|
||||
|
||||
return step_timestamps, step_indexs, step_acce_max_mins
|
||||
|
||||
|
||||
def compute_stride_length(step_acce_max_mins):
|
||||
K = 0.4
|
||||
K_max = 0.8
|
||||
K_min = 0.4
|
||||
para_a0 = 0.21468084
|
||||
para_a1 = 0.09154517
|
||||
para_a2 = 0.02301998
|
||||
|
||||
stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))
|
||||
k_real = np.zeros((step_acce_max_mins.shape[0], 2))
|
||||
step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))
|
||||
stride_lengths[:, 0] = step_acce_max_mins[:, 0]
|
||||
window_size = 2
|
||||
step_timeperiod_temp = np.zeros((0, ))
|
||||
|
||||
# calculate every step period - step_timeperiod unit: second
|
||||
for i in range(0, step_timeperiod.shape[0]):
|
||||
step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000
|
||||
step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])
|
||||
if step_timeperiod_temp.shape[0] > window_size:
|
||||
step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])
|
||||
step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]
|
||||
|
||||
# calculate parameters by step period and acceleration magnitude variance
|
||||
k_real[:, 0] = step_acce_max_mins[:, 0]
|
||||
k_real[0, 1] = K
|
||||
for i in range(0, step_timeperiod.shape[0]):
|
||||
k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])
|
||||
k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)
|
||||
|
||||
# calculate every stride length by parameters and max and min data of acceleration magnitude
|
||||
stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),
|
||||
np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]
|
||||
|
||||
return stride_lengths
|
||||
|
||||
|
||||
def compute_headings(ahrs_datas):
|
||||
headings = np.zeros((np.size(ahrs_datas, 0), 2))
|
||||
for i in np.arange(0, np.size(ahrs_datas, 0)):
|
||||
ahrs_data = ahrs_datas[i, :]
|
||||
rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])
|
||||
azimuth, pitch, roll = get_orientation(rot_mat)
|
||||
around_z = (-azimuth) % (2 * np.pi)
|
||||
headings[i, :] = ahrs_data[0], around_z
|
||||
return headings
|
||||
|
||||
|
||||
def compute_step_heading(step_timestamps, headings):
|
||||
step_headings = np.zeros((len(step_timestamps), 2))
|
||||
step_timestamps_index = 0
|
||||
for i in range(0, len(headings)):
|
||||
if step_timestamps_index < len(step_timestamps):
|
||||
if headings[i, 0] == step_timestamps[step_timestamps_index]:
|
||||
step_headings[step_timestamps_index, :] = headings[i, :]
|
||||
step_timestamps_index += 1
|
||||
else:
|
||||
break
|
||||
assert step_timestamps_index == len(step_timestamps)
|
||||
|
||||
return step_headings
|
||||
|
||||
|
||||
def compute_rel_positions(stride_lengths, step_headings):
|
||||
rel_positions = np.zeros((stride_lengths.shape[0], 3))
|
||||
for i in range(0, stride_lengths.shape[0]):
|
||||
rel_positions[i, 0] = stride_lengths[i, 0]
|
||||
rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])
|
||||
rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])
|
||||
|
||||
return rel_positions
|
||||
|
||||
|
||||
def compute_step_positions(acce_datas, ahrs_datas, posi_datas):
|
||||
step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
|
||||
headings = compute_headings(ahrs_datas)
|
||||
stride_lengths = compute_stride_length(step_acce_max_mins)
|
||||
step_headings = compute_step_heading(step_timestamps, headings)
|
||||
rel_positions = compute_rel_positions(stride_lengths, step_headings)
|
||||
step_positions = correct_positions(rel_positions, posi_datas)
|
||||
|
||||
return step_positions
|
@ -0,0 +1,104 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReadData:
|
||||
acce: np.ndarray
|
||||
acce_uncali: np.ndarray
|
||||
gyro: np.ndarray
|
||||
gyro_uncali: np.ndarray
|
||||
magn: np.ndarray
|
||||
magn_uncali: np.ndarray
|
||||
ahrs: np.ndarray
|
||||
wifi: np.ndarray
|
||||
ibeacon: np.ndarray
|
||||
waypoint: np.ndarray
|
||||
|
||||
|
||||
def read_data_file(data_filename):
|
||||
acce = []
|
||||
acce_uncali = []
|
||||
gyro = []
|
||||
gyro_uncali = []
|
||||
magn = []
|
||||
magn_uncali = []
|
||||
ahrs = []
|
||||
wifi = []
|
||||
ibeacon = []
|
||||
waypoint = []
|
||||
|
||||
with open(data_filename, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
for line_data in lines:
|
||||
line_data = line_data.strip()
|
||||
if not line_data or line_data[0] == '#':
|
||||
continue
|
||||
|
||||
line_data = line_data.split('\t')
|
||||
|
||||
if line_data[1] == 'TYPE_ACCELEROMETER':
|
||||
acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
|
||||
acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_GYROSCOPE':
|
||||
gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
|
||||
gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_MAGNETIC_FIELD':
|
||||
magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
|
||||
magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_ROTATION_VECTOR':
|
||||
ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_WIFI':
|
||||
sys_ts = line_data[0]
|
||||
ssid = line_data[2]
|
||||
bssid = line_data[3]
|
||||
rssi = line_data[4]
|
||||
lastseen_ts = line_data[6]
|
||||
wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
|
||||
wifi.append(wifi_data)
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_BEACON':
|
||||
ts = line_data[0]
|
||||
uuid = line_data[2]
|
||||
major = line_data[3]
|
||||
minor = line_data[4]
|
||||
rssi = line_data[6]
|
||||
ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
|
||||
ibeacon.append(ibeacon_data)
|
||||
continue
|
||||
|
||||
if line_data[1] == 'TYPE_WAYPOINT':
|
||||
waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])
|
||||
|
||||
acce = np.array(acce)
|
||||
acce_uncali = np.array(acce_uncali)
|
||||
gyro = np.array(gyro)
|
||||
gyro_uncali = np.array(gyro_uncali)
|
||||
magn = np.array(magn)
|
||||
magn_uncali = np.array(magn_uncali)
|
||||
ahrs = np.array(ahrs)
|
||||
wifi = np.array(wifi)
|
||||
ibeacon = np.array(ibeacon)
|
||||
waypoint = np.array(waypoint)
|
||||
|
||||
return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)
|
@ -0,0 +1,258 @@
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
import numpy as np
|
||||
|
||||
from compute_f import split_ts_seq, compute_step_positions
|
||||
from io_f import read_data_file
|
||||
from visualize_f import visualize_trajectory, visualize_heatmap, save_figure_to_html
|
||||
|
||||
floor_data_dir = './data/site1/F1'
|
||||
path_data_dir = floor_data_dir + '/path_data_files'
|
||||
floor_plan_filename = floor_data_dir + '/floor_image.png'
|
||||
floor_info_filename = floor_data_dir + '/floor_info.json'
|
||||
|
||||
save_dir = './output/site1/F1'
|
||||
path_image_save_dir = save_dir + '/path_images'
|
||||
step_position_image_save_dir = save_dir
|
||||
magn_image_save_dir = save_dir
|
||||
wifi_image_save_dir = save_dir + '/wifi_images'
|
||||
ibeacon_image_save_dir = save_dir + '/ibeacon_images'
|
||||
wifi_count_image_save_dir = save_dir
|
||||
|
||||
|
||||
def calibrate_magnetic_wifi_ibeacon_to_position(path_file_list):
|
||||
mwi_datas = {}
|
||||
for path_filename in path_file_list:
|
||||
print(f'Processing {path_filename}...')
|
||||
|
||||
path_datas = read_data_file(path_filename)
|
||||
acce_datas = path_datas.acce
|
||||
magn_datas = path_datas.magn
|
||||
ahrs_datas = path_datas.ahrs
|
||||
wifi_datas = path_datas.wifi
|
||||
ibeacon_datas = path_datas.ibeacon
|
||||
posi_datas = path_datas.waypoint
|
||||
|
||||
step_positions = compute_step_positions(acce_datas, ahrs_datas, posi_datas)
|
||||
# visualize_trajectory(posi_datas[:, 1:3], floor_plan_filename, width_meter, height_meter, title='Ground Truth', show=True)
|
||||
# visualize_trajectory(step_positions[:, 1:3], floor_plan_filename, width_meter, height_meter, title='Step Position', show=True)
|
||||
|
||||
if wifi_datas.size != 0:
|
||||
sep_tss = np.unique(wifi_datas[:, 0].astype(float))
|
||||
wifi_datas_list = split_ts_seq(wifi_datas, sep_tss)
|
||||
for wifi_ds in wifi_datas_list:
|
||||
diff = np.abs(step_positions[:, 0] - float(wifi_ds[0, 0]))
|
||||
index = np.argmin(diff)
|
||||
target_xy_key = tuple(step_positions[index, 1:3])
|
||||
if target_xy_key in mwi_datas:
|
||||
mwi_datas[target_xy_key]['wifi'] = np.append(mwi_datas[target_xy_key]['wifi'], wifi_ds, axis=0)
|
||||
else:
|
||||
mwi_datas[target_xy_key] = {
|
||||
'magnetic': np.zeros((0, 4)),
|
||||
'wifi': wifi_ds,
|
||||
'ibeacon': np.zeros((0, 3))
|
||||
}
|
||||
|
||||
if ibeacon_datas.size != 0:
|
||||
sep_tss = np.unique(ibeacon_datas[:, 0].astype(float))
|
||||
ibeacon_datas_list = split_ts_seq(ibeacon_datas, sep_tss)
|
||||
for ibeacon_ds in ibeacon_datas_list:
|
||||
diff = np.abs(step_positions[:, 0] - float(ibeacon_ds[0, 0]))
|
||||
index = np.argmin(diff)
|
||||
target_xy_key = tuple(step_positions[index, 1:3])
|
||||
if target_xy_key in mwi_datas:
|
||||
mwi_datas[target_xy_key]['ibeacon'] = np.append(mwi_datas[target_xy_key]['ibeacon'], ibeacon_ds, axis=0)
|
||||
else:
|
||||
mwi_datas[target_xy_key] = {
|
||||
'magnetic': np.zeros((0, 4)),
|
||||
'wifi': np.zeros((0, 5)),
|
||||
'ibeacon': ibeacon_ds
|
||||
}
|
||||
|
||||
sep_tss = np.unique(magn_datas[:, 0].astype(float))
|
||||
magn_datas_list = split_ts_seq(magn_datas, sep_tss)
|
||||
for magn_ds in magn_datas_list:
|
||||
diff = np.abs(step_positions[:, 0] - float(magn_ds[0, 0]))
|
||||
index = np.argmin(diff)
|
||||
target_xy_key = tuple(step_positions[index, 1:3])
|
||||
if target_xy_key in mwi_datas:
|
||||
mwi_datas[target_xy_key]['magnetic'] = np.append(mwi_datas[target_xy_key]['magnetic'], magn_ds, axis=0)
|
||||
else:
|
||||
mwi_datas[target_xy_key] = {
|
||||
'magnetic': magn_ds,
|
||||
'wifi': np.zeros((0, 5)),
|
||||
'ibeacon': np.zeros((0, 3))
|
||||
}
|
||||
|
||||
return mwi_datas
|
||||
|
||||
|
||||
def extract_magnetic_strength(mwi_datas):
|
||||
magnetic_strength = {}
|
||||
for position_key in mwi_datas:
|
||||
# print(f'Position: {position_key}')
|
||||
|
||||
magnetic_data = mwi_datas[position_key]['magnetic']
|
||||
magnetic_s = np.mean(np.sqrt(np.sum(magnetic_data[:, 1:4] ** 2, axis=1)))
|
||||
magnetic_strength[position_key] = magnetic_s
|
||||
|
||||
return magnetic_strength
|
||||
|
||||
|
||||
def extract_wifi_rssi(mwi_datas):
|
||||
wifi_rssi = {}
|
||||
for position_key in mwi_datas:
|
||||
# print(f'Position: {position_key}')
|
||||
|
||||
wifi_data = mwi_datas[position_key]['wifi']
|
||||
for wifi_d in wifi_data:
|
||||
bssid = wifi_d[2]
|
||||
rssi = int(wifi_d[3])
|
||||
|
||||
if bssid in wifi_rssi:
|
||||
position_rssi = wifi_rssi[bssid]
|
||||
if position_key in position_rssi:
|
||||
old_rssi = position_rssi[position_key][0]
|
||||
old_count = position_rssi[position_key][1]
|
||||
position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
|
||||
position_rssi[position_key][1] = old_count + 1
|
||||
else:
|
||||
position_rssi[position_key] = np.array([rssi, 1])
|
||||
else:
|
||||
position_rssi = {}
|
||||
position_rssi[position_key] = np.array([rssi, 1])
|
||||
|
||||
wifi_rssi[bssid] = position_rssi
|
||||
|
||||
return wifi_rssi
|
||||
|
||||
|
||||
def extract_ibeacon_rssi(mwi_datas):
|
||||
ibeacon_rssi = {}
|
||||
for position_key in mwi_datas:
|
||||
# print(f'Position: {position_key}')
|
||||
|
||||
ibeacon_data = mwi_datas[position_key]['ibeacon']
|
||||
for ibeacon_d in ibeacon_data:
|
||||
ummid = ibeacon_d[1]
|
||||
rssi = int(ibeacon_d[2])
|
||||
|
||||
if ummid in ibeacon_rssi:
|
||||
position_rssi = ibeacon_rssi[ummid]
|
||||
if position_key in position_rssi:
|
||||
old_rssi = position_rssi[position_key][0]
|
||||
old_count = position_rssi[position_key][1]
|
||||
position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
|
||||
position_rssi[position_key][1] = old_count + 1
|
||||
else:
|
||||
position_rssi[position_key] = np.array([rssi, 1])
|
||||
else:
|
||||
position_rssi = {}
|
||||
position_rssi[position_key] = np.array([rssi, 1])
|
||||
|
||||
ibeacon_rssi[ummid] = position_rssi
|
||||
|
||||
return ibeacon_rssi
|
||||
|
||||
|
||||
def extract_wifi_count(mwi_datas):
|
||||
wifi_counts = {}
|
||||
for position_key in mwi_datas:
|
||||
# print(f'Position: {position_key}')
|
||||
|
||||
wifi_data = mwi_datas[position_key]['wifi']
|
||||
count = np.unique(wifi_data[:, 2]).shape[0]
|
||||
wifi_counts[position_key] = count
|
||||
|
||||
return wifi_counts
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
Path(path_image_save_dir).mkdir(parents=True, exist_ok=True)
|
||||
Path(magn_image_save_dir).mkdir(parents=True, exist_ok=True)
|
||||
Path(wifi_image_save_dir).mkdir(parents=True, exist_ok=True)
|
||||
Path(ibeacon_image_save_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(floor_info_filename) as f:
|
||||
floor_info = json.load(f)
|
||||
width_meter = floor_info["map_info"]["width"]
|
||||
height_meter = floor_info["map_info"]["height"]
|
||||
|
||||
path_filenames = list(Path(path_data_dir).resolve().glob("*.txt"))
|
||||
|
||||
# 1. visualize ground truth positions
|
||||
print('Visualizing ground truth positions...')
|
||||
for path_filename in path_filenames:
|
||||
print(f'Processing file: {path_filename}...')
|
||||
|
||||
path_data = read_data_file(path_filename)
|
||||
path_id = path_filename.name.split(".")[0]
|
||||
fig = visualize_trajectory(path_data.waypoint[:, 1:3], floor_plan_filename, width_meter, height_meter, title=path_id, show=False)
|
||||
html_filename = f'{path_image_save_dir}/{path_id}.html'
|
||||
html_filename = str(Path(html_filename).resolve())
|
||||
save_figure_to_html(fig, html_filename)
|
||||
|
||||
# 2. visualize step position, magnetic, wifi, ibeacon
|
||||
print('Visualizing more information...')
|
||||
mwi_datas = calibrate_magnetic_wifi_ibeacon_to_position(path_filenames)
|
||||
|
||||
step_positions = np.array(list(mwi_datas.keys()))
|
||||
fig = visualize_trajectory(step_positions, floor_plan_filename, width_meter, height_meter, mode='markers', title='Step Position', show=True)
|
||||
html_filename = f'{step_position_image_save_dir}/step_position.html'
|
||||
html_filename = str(Path(html_filename).resolve())
|
||||
save_figure_to_html(fig, html_filename)
|
||||
|
||||
magnetic_strength = extract_magnetic_strength(mwi_datas)
|
||||
heat_positions = np.array(list(magnetic_strength.keys()))
|
||||
heat_values = np.array(list(magnetic_strength.values()))
|
||||
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='mu tesla', title='Magnetic Strength', show=True)
|
||||
html_filename = f'{magn_image_save_dir}/magnetic_strength.html'
|
||||
html_filename = str(Path(html_filename).resolve())
|
||||
save_figure_to_html(fig, html_filename)
|
||||
|
||||
wifi_rssi = extract_wifi_rssi(mwi_datas)
|
||||
print(f'This floor has {len(wifi_rssi.keys())} wifi aps')
|
||||
ten_wifi_bssids = list(wifi_rssi.keys())[0:10]
|
||||
print('Example 10 wifi ap bssids:\n')
|
||||
for bssid in ten_wifi_bssids:
|
||||
print(bssid)
|
||||
target_wifi = input(f"Please input target wifi ap bssid:\n")
|
||||
# target_wifi = '1e:74:9c:a7:b2:e4'
|
||||
heat_positions = np.array(list(wifi_rssi[target_wifi].keys()))
|
||||
heat_values = np.array(list(wifi_rssi[target_wifi].values()))[:, 0]
|
||||
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='dBm', title=f'Wifi: {target_wifi} RSSI', show=True)
|
||||
html_filename = f'{wifi_image_save_dir}/{target_wifi.replace(":", "-")}.html'
|
||||
html_filename = str(Path(html_filename).resolve())
|
||||
save_figure_to_html(fig, html_filename)
|
||||
|
||||
ibeacon_rssi = extract_ibeacon_rssi(mwi_datas)
|
||||
print(f'This floor has {len(ibeacon_rssi.keys())} ibeacons')
|
||||
ten_ibeacon_ummids = list(ibeacon_rssi.keys())[0:10]
|
||||
print('Example 10 ibeacon UUID_MajorID_MinorIDs:\n')
|
||||
for ummid in ten_ibeacon_ummids:
|
||||
print(ummid)
|
||||
target_ibeacon = input(f"Please input target ibeacon UUID_MajorID_MinorID:\n")
|
||||
# target_ibeacon = 'FDA50693-A4E2-4FB1-AFCF-C6EB07647825_10073_61418'
|
||||
heat_positions = np.array(list(ibeacon_rssi[target_ibeacon].keys()))
|
||||
heat_values = np.array(list(ibeacon_rssi[target_ibeacon].values()))[:, 0]
|
||||
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='dBm', title=f'iBeacon: {target_ibeacon} RSSI', show=True)
|
||||
html_filename = f'{ibeacon_image_save_dir}/{target_ibeacon}.html'
|
||||
html_filename = str(Path(html_filename).resolve())
|
||||
save_figure_to_html(fig, html_filename)
|
||||
|
||||
wifi_counts = extract_wifi_count(mwi_datas)
|
||||
heat_positions = np.array(list(wifi_counts.keys()))
|
||||
heat_values = np.array(list(wifi_counts.values()))
|
||||
# filter out positions that no wifi detected
|
||||
mask = heat_values != 0
|
||||
heat_positions = heat_positions[mask]
|
||||
heat_values = heat_values[mask]
|
||||
fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='number', title=f'Wifi Count', show=True)
|
||||
html_filename = f'{wifi_count_image_save_dir}/wifi_count.html'
|
||||
html_filename = str(Path(html_filename).resolve())
|
||||
save_figure_to_html(fig, html_filename)
|
||||
|
||||
print('fff')
|
@ -0,0 +1,132 @@
|
||||
import plotly.graph_objs as go
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def save_figure_to_html(fig, filename):
|
||||
fig.write_html(filename)
|
||||
|
||||
|
||||
def visualize_trajectory(trajectory, floor_plan_filename, width_meter, height_meter, title=None, mode='lines + markers + text', show=False):
|
||||
fig = go.Figure()
|
||||
|
||||
# add trajectory
|
||||
size_list = [6] * trajectory.shape[0]
|
||||
size_list[0] = 10
|
||||
size_list[-1] = 10
|
||||
|
||||
color_list = ['rgba(4, 174, 4, 0.5)'] * trajectory.shape[0]
|
||||
color_list[0] = 'rgba(12, 5, 235, 1)'
|
||||
color_list[-1] = 'rgba(235, 5, 5, 1)'
|
||||
|
||||
position_count = {}
|
||||
text_list = []
|
||||
for i in range(trajectory.shape[0]):
|
||||
if str(trajectory[i]) in position_count:
|
||||
position_count[str(trajectory[i])] += 1
|
||||
else:
|
||||
position_count[str(trajectory[i])] = 0
|
||||
text_list.append(' ' * position_count[str(trajectory[i])] + f'{i}')
|
||||
text_list[0] = 'Start Point: 0'
|
||||
text_list[-1] = f'End Point: {trajectory.shape[0] - 1}'
|
||||
|
||||
fig.add_trace(
|
||||
go.Scattergl(
|
||||
x=trajectory[:, 0],
|
||||
y=trajectory[:, 1],
|
||||
mode=mode,
|
||||
marker=dict(size=size_list, color=color_list),
|
||||
line=dict(shape='linear', color='rgb(100, 10, 100)', width=2, dash='dot'),
|
||||
text=text_list,
|
||||
textposition="top center",
|
||||
name='trajectory',
|
||||
))
|
||||
|
||||
# add floor plan
|
||||
floor_plan = Image.open(floor_plan_filename)
|
||||
fig.update_layout(images=[
|
||||
go.layout.Image(
|
||||
source=floor_plan,
|
||||
xref="x",
|
||||
yref="y",
|
||||
x=0,
|
||||
y=height_meter,
|
||||
sizex=width_meter,
|
||||
sizey=height_meter,
|
||||
sizing="contain",
|
||||
opacity=1,
|
||||
layer="below",
|
||||
)
|
||||
])
|
||||
|
||||
# configure
|
||||
fig.update_xaxes(autorange=False, range=[0, width_meter])
|
||||
fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
|
||||
fig.update_layout(
|
||||
title=go.layout.Title(
|
||||
text=title or "No title.",
|
||||
xref="paper",
|
||||
x=0,
|
||||
),
|
||||
autosize=True,
|
||||
width=900,
|
||||
height=200 + 900 * height_meter / width_meter,
|
||||
template="plotly_white",
|
||||
)
|
||||
|
||||
if show:
|
||||
fig.show()
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def visualize_heatmap(position, value, floor_plan_filename, width_meter, height_meter, colorbar_title="colorbar", title=None, show=False):
|
||||
fig = go.Figure()
|
||||
|
||||
# add heat map
|
||||
fig.add_trace(
|
||||
go.Scatter(x=position[:, 0],
|
||||
y=position[:, 1],
|
||||
mode='markers',
|
||||
marker=dict(size=7,
|
||||
color=value,
|
||||
colorbar=dict(title=colorbar_title),
|
||||
colorscale="Rainbow"),
|
||||
text=value,
|
||||
name=title))
|
||||
|
||||
# add floor plan
|
||||
floor_plan = Image.open(floor_plan_filename)
|
||||
fig.update_layout(images=[
|
||||
go.layout.Image(
|
||||
source=floor_plan,
|
||||
xref="x",
|
||||
yref="y",
|
||||
x=0,
|
||||
y=height_meter,
|
||||
sizex=width_meter,
|
||||
sizey=height_meter,
|
||||
sizing="contain",
|
||||
opacity=1,
|
||||
layer="below",
|
||||
)
|
||||
])
|
||||
|
||||
# configure
|
||||
fig.update_xaxes(autorange=False, range=[0, width_meter])
|
||||
fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
|
||||
fig.update_layout(
|
||||
title=go.layout.Title(
|
||||
text=title or "No title.",
|
||||
xref="paper",
|
||||
x=0,
|
||||
),
|
||||
autosize=True,
|
||||
width=900,
|
||||
height=200 + 900 * height_meter / width_meter,
|
||||
template="plotly_white",
|
||||
)
|
||||
|
||||
if show:
|
||||
fig.show()
|
||||
|
||||
return fig
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,375 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"from sklearn.preprocessing import LabelEncoder\n",
|
||||
"from dask.distributed import wait\n",
|
||||
"import glob\n",
|
||||
"\n",
|
||||
"SENSORS = ['acce','acce_uncali','gyro',\n",
|
||||
" 'gyro_uncali','magn','magn_uncali','ahrs']\n",
|
||||
"\n",
|
||||
"NFEAS = {\n",
|
||||
" 'acce': 3,\n",
|
||||
" 'acce_uncali': 3,\n",
|
||||
" 'gyro': 3,\n",
|
||||
" 'gyro_uncali': 3,\n",
|
||||
" 'magn': 3,\n",
|
||||
" 'magn_uncali': 3,\n",
|
||||
" 'ahrs': 3,\n",
|
||||
" 'wifi': 1,\n",
|
||||
" 'ibeacon': 1,\n",
|
||||
" 'waypoint': 3\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"ACOLS = ['timestamp','x','y','z']\n",
|
||||
" \n",
|
||||
"FIELDS = {\n",
|
||||
" 'acce': ACOLS,\n",
|
||||
" 'acce_uncali': ACOLS,\n",
|
||||
" 'gyro': ACOLS,\n",
|
||||
" 'gyro_uncali': ACOLS,\n",
|
||||
" 'magn': ACOLS,\n",
|
||||
" 'magn_uncali': ACOLS,\n",
|
||||
" 'ahrs': ACOLS,\n",
|
||||
" 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n",
|
||||
" 'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n",
|
||||
" 'waypoint': ['timestamp','x','y']\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"def to_frame(data, col):\n",
|
||||
" cols = FIELDS[col]\n",
|
||||
" is_dummy = False\n",
|
||||
" if data.shape[0]>0:\n",
|
||||
" df = pd.DataFrame(data, columns=cols)\n",
|
||||
" else:\n",
|
||||
" df = create_dummy_df(cols)\n",
|
||||
" is_dummy = True\n",
|
||||
" for col in df.columns:\n",
|
||||
" if 'timestamp' in col:\n",
|
||||
" df[col] = df[col].astype('int64')\n",
|
||||
" return df, is_dummy\n",
|
||||
"\n",
|
||||
"def create_dummy_df(cols):\n",
|
||||
" df = pd.DataFrame()\n",
|
||||
" for col in cols:\n",
|
||||
" df[col] = [0]\n",
|
||||
" if col in ['ssid','bssid']:\n",
|
||||
" df[col] = df[col].map(str)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@dataclass\n",
|
||||
"class ReadData:\n",
|
||||
" acce: np.ndarray\n",
|
||||
" acce_uncali: np.ndarray\n",
|
||||
" gyro: np.ndarray\n",
|
||||
" gyro_uncali: np.ndarray\n",
|
||||
" magn: np.ndarray\n",
|
||||
" magn_uncali: np.ndarray\n",
|
||||
" ahrs: np.ndarray\n",
|
||||
" wifi: np.ndarray\n",
|
||||
" ibeacon: np.ndarray\n",
|
||||
" waypoint: np.ndarray\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def read_data_file(data_filename):\n",
|
||||
" acce = []\n",
|
||||
" acce_uncali = []\n",
|
||||
" gyro = []\n",
|
||||
" gyro_uncali = []\n",
|
||||
" magn = []\n",
|
||||
" magn_uncali = []\n",
|
||||
" ahrs = []\n",
|
||||
" wifi = []\n",
|
||||
" ibeacon = []\n",
|
||||
" waypoint = []\n",
|
||||
"\n",
|
||||
" with open(data_filename, 'r', encoding='utf-8') as file:\n",
|
||||
" lines = file.readlines()\n",
|
||||
"\n",
|
||||
" for line_data in lines:\n",
|
||||
" line_data = line_data.strip()\n",
|
||||
" if not line_data or line_data[0] == '#':\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" line_data = line_data.split('\\t')\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_ACCELEROMETER':\n",
|
||||
" acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
|
||||
" acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_GYROSCOPE':\n",
|
||||
" gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
|
||||
" gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
|
||||
" magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
|
||||
" magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
|
||||
" if len(line_data)>=5:\n",
|
||||
" ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_WIFI':\n",
|
||||
" sys_ts = line_data[0]\n",
|
||||
" ssid = line_data[2]\n",
|
||||
" bssid = line_data[3]\n",
|
||||
" rssi = line_data[4]\n",
|
||||
" lastseen_ts = line_data[6]\n",
|
||||
" wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
|
||||
" wifi.append(wifi_data)\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_BEACON':\n",
|
||||
" ts = line_data[0]\n",
|
||||
" uuid = line_data[2]\n",
|
||||
" major = line_data[3]\n",
|
||||
" minor = line_data[4]\n",
|
||||
" rssi = line_data[6]\n",
|
||||
" lastts = line_data[-1]\n",
|
||||
" ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n",
|
||||
" ibeacon.append(ibeacon_data)\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if line_data[1] == 'TYPE_WAYPOINT':\n",
|
||||
" waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
|
||||
"\n",
|
||||
" acce = np.array(acce)\n",
|
||||
" acce_uncali = np.array(acce_uncali)\n",
|
||||
" gyro = np.array(gyro)\n",
|
||||
" gyro_uncali = np.array(gyro_uncali)\n",
|
||||
" magn = np.array(magn)\n",
|
||||
" magn_uncali = np.array(magn_uncali)\n",
|
||||
" ahrs = np.array(ahrs)\n",
|
||||
" wifi = np.array(wifi)\n",
|
||||
" ibeacon = np.array(ibeacon)\n",
|
||||
" waypoint = np.array(waypoint)\n",
|
||||
"\n",
|
||||
" return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_test_dfs(PATH, test_files):\n",
|
||||
" dtest = get_test_df(PATH)\n",
|
||||
" buildings = set(dtest['building'].values.tolist())\n",
|
||||
" dws = {}\n",
|
||||
" ntest_files = []\n",
|
||||
" for fname in tqdm(test_files):\n",
|
||||
" path = fname.split('/')[-1].split('.')[0]\n",
|
||||
" mask = dtest['path'] == path\n",
|
||||
" dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n",
|
||||
" ntest_files.append(fname)\n",
|
||||
" return dws\n",
|
||||
"\n",
|
||||
"def get_test_df(PATH):\n",
|
||||
" dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n",
|
||||
" dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n",
|
||||
" dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n",
|
||||
" dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n",
|
||||
" dtest['timestamp'] = dtest['timestamp'].astype('int64')\n",
|
||||
" dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n",
|
||||
" return dtest\n",
|
||||
"\n",
|
||||
"def get_time_gap(name):\n",
|
||||
" data = read_data_file(name)\n",
|
||||
" db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n",
|
||||
"# print(db,no_ibeacon)\n",
|
||||
" \n",
|
||||
" if no_ibeacon==0:\n",
|
||||
" gap = db['last_timestamp'] - db['timestamp']\n",
|
||||
" assert gap.unique().shape[0]==1\n",
|
||||
" return gap.values[0],no_ibeacon\n",
|
||||
" \n",
|
||||
" if no_ibeacon==1:\n",
|
||||
" # Group wifis by timestamp\n",
|
||||
" wifi_groups = pd.DataFrame(data.wifi).groupby(0) \n",
|
||||
" # Find which one is the most recent of all time points.\n",
|
||||
" est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n",
|
||||
" return est_ts,no_ibeacon\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"def fix_timestamp_test(df, gap):\n",
|
||||
" df['real_timestamp'] = df['timestamp'] + gap\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n",
|
||||
" '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n",
|
||||
" '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n",
|
||||
" '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n",
|
||||
"test_files_ori[:4]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/ec2-user/anaconda3/lib/python3.7/site-packages/distributed/dashboard/core.py:79: UserWarning: \n",
|
||||
"Port 8787 is already in use. \n",
|
||||
"Perhaps you already have a cluster running?\n",
|
||||
"Hosting the diagnostics dashboard on a random port instead.\n",
|
||||
" warnings.warn(\"\\n\" + msg)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table style=\"border: 2px solid white;\">\n",
|
||||
"<tr>\n",
|
||||
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
|
||||
"<h3 style=\"text-align: left;\">Client</h3>\n",
|
||||
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
|
||||
" <li><b>Scheduler: </b>tcp://127.0.0.1:42097</li>\n",
|
||||
" <li><b>Dashboard: </b><a href='http://127.0.0.1:39155/status' target='_blank'>http://127.0.0.1:39155/status</a>\n",
|
||||
"</ul>\n",
|
||||
"</td>\n",
|
||||
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
|
||||
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
|
||||
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
|
||||
" <li><b>Workers: </b>8</li>\n",
|
||||
" <li><b>Cores: </b>8</li>\n",
|
||||
" <li><b>Memory: </b>32.89 GB</li>\n",
|
||||
"</ul>\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<Client: 'tcp://127.0.0.1:42097' processes=8 threads=8, memory=32.89 GB>"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import dask\n",
|
||||
"from dask.distributed import Client, wait, LocalCluster\n",
|
||||
"\n",
|
||||
"# set n_workers to number of cores\n",
|
||||
"client = Client(n_workers=8, \n",
|
||||
" threads_per_worker=1)\n",
|
||||
"client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 626/626 [00:00<00:00, 4552.03it/s]\n",
|
||||
"100%|██████████| 626/626 [00:16<00:00, 37.39it/s] \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"futures = []\n",
|
||||
"for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n",
|
||||
" f = client.submit(get_time_gap,fname)\n",
|
||||
" futures.append(f)\n",
|
||||
" \n",
|
||||
"testpath2gap = {}\n",
|
||||
"for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n",
|
||||
" testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pickle\n",
|
||||
"with open('testpath2gap.pkl','wb') as f:\n",
|
||||
" pickle.dump(testpath2gap,f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,361 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"papermill": {
|
||||
"duration": 0.007463,
|
||||
"end_time": "2021-02-03T20:30:06.571139",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.563676",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Wifi features\n",
|
||||
"\n",
|
||||
"This this is the code to generate the wifi features available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). Using these features can get a score below 14. For an example notebook using them see [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter). They only uses waypoints, wifi and timestamp data to generate solution. See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for an outline of this solution method, and methods of improvement.\n",
|
||||
"\n",
|
||||
"There are `break`'s inserted into loops which need to be removed to get this to run. Right now data is written to current working directory. This takes 2-4 hours to run depending on hard drive etc. There is a lot of room for improvement speeding up feature generation. \n",
|
||||
"\n",
|
||||
"**Update:** I added one line that creates a column for the path filename, this allows for a groupkfold crossvalidation. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
|
||||
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:06.590945Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:06.589984Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:06.593594Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:06.592887Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.01623,
|
||||
"end_time": "2021-02-03T20:30:06.593847",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.577617",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import glob\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import json "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:06.614521Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:06.613572Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:06.616669Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:06.616121Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.015585,
|
||||
"end_time": "2021-02-03T20:30:06.616837",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.601252",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_path = '../input/indoor-location-navigation/'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:06.639011Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:06.638118Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.333807Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.334360Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 2.711076,
|
||||
"end_time": "2021-02-03T20:30:09.334617",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:06.623541",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
|
||||
"ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
|
||||
"\n",
|
||||
"# only 24 of the total buildings are used in the test set, \n",
|
||||
"# this allows us to greatly reduce the intial size of the dataset\n",
|
||||
"\n",
|
||||
"ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
|
||||
"used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
|
||||
"\n",
|
||||
"# dictionary used to map the floor codes to the values used in the submission file. \n",
|
||||
"floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
|
||||
" \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.359905Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.359123Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.362909Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.362224Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.021272,
|
||||
"end_time": "2021-02-03T20:30:09.363069",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.341797",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
|
||||
"# these will be the only ones used when constructing features\n",
|
||||
"bssid = dict()\n",
|
||||
"\n",
|
||||
"for building in used_buildings:\n",
|
||||
" break\n",
|
||||
" folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
|
||||
" print(building)\n",
|
||||
" wifi = list()\n",
|
||||
" for folder in folders:\n",
|
||||
" floor = floor_map[folder.split('/')[-1]]\n",
|
||||
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
|
||||
" for file in files:\n",
|
||||
" with open(file) as f:\n",
|
||||
" txt = f.readlines()\n",
|
||||
" for e, line in enumerate(txt):\n",
|
||||
" tmp = line.strip().split()\n",
|
||||
" if tmp[1] == \"TYPE_WIFI\":\n",
|
||||
" wifi.append(tmp)\n",
|
||||
" df = pd.DataFrame(wifi)\n",
|
||||
" #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
|
||||
" value_counts = df[3].value_counts()\n",
|
||||
" top_bssid = value_counts[value_counts > 0].index.tolist()\n",
|
||||
" print(len(top_bssid))\n",
|
||||
" bssid[building] = top_bssid\n",
|
||||
" del df\n",
|
||||
" del wifi\n",
|
||||
" gc.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.383252Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.382581Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.386704Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.385809Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.016635,
|
||||
"end_time": "2021-02-03T20:30:09.386885",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.370250",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"bssid_1000.json\", \"w\") as f:\n",
|
||||
" json.dump(bssid, f)\n",
|
||||
"\n",
|
||||
"with open(\"bssid_1000.json\") as f:\n",
|
||||
" bssid = json.load(f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.418284Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.417119Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.420513Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.419767Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.026514,
|
||||
"end_time": "2021-02-03T20:30:09.420694",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.394180",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# generate all the training data \n",
|
||||
"building_dfs = dict()\n",
|
||||
"\n",
|
||||
"for building in used_buildings:\n",
|
||||
" break\n",
|
||||
" folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
|
||||
" dfs = list()\n",
|
||||
" index = sorted(bssid[building])\n",
|
||||
" print(building)\n",
|
||||
" for folder in folders:\n",
|
||||
" floor = floor_map[folder.split('/')[-1]]\n",
|
||||
" files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
|
||||
" print(floor)\n",
|
||||
" for file in files:\n",
|
||||
" wifi = list()\n",
|
||||
" waypoint = list()\n",
|
||||
" with open(file) as f:\n",
|
||||
" txt = f.readlines()\n",
|
||||
" for line in txt:\n",
|
||||
" line = line.strip().split()\n",
|
||||
" if line[1] == \"TYPE_WAYPOINT\":\n",
|
||||
" waypoint.append(line)\n",
|
||||
" if line[1] == \"TYPE_WIFI\":\n",
|
||||
" wifi.append(line)\n",
|
||||
"\n",
|
||||
" df = pd.DataFrame(np.array(wifi)) \n",
|
||||
"\n",
|
||||
" # generate a feature, and label for each wifi block\n",
|
||||
" for gid, g in df.groupby(0):\n",
|
||||
" dists = list()\n",
|
||||
" for e, k in enumerate(waypoint):\n",
|
||||
" dist = abs(int(gid) - int(k[0]))\n",
|
||||
" dists.append(dist)\n",
|
||||
" nearest_wp_index = np.argmin(dists)\n",
|
||||
" \n",
|
||||
" g = g.drop_duplicates(subset=3)\n",
|
||||
" tmp = g.iloc[:,3:5]\n",
|
||||
" feat = tmp.set_index(3).reindex(index).replace(np.nan, -999).T\n",
|
||||
" feat[\"x\"] = float(waypoint[nearest_wp_index][2])\n",
|
||||
" feat[\"y\"] = float(waypoint[nearest_wp_index][3])\n",
|
||||
" feat[\"f\"] = floor\n",
|
||||
" feat[\"path\"] = file.split('/')[-1].split('.')[0] # useful for crossvalidation\n",
|
||||
" dfs.append(feat)\n",
|
||||
" \n",
|
||||
" building_df = pd.concat(dfs)\n",
|
||||
" building_dfs[building] = df\n",
|
||||
" building_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+building+\"_train.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2021-02-03T20:30:09.454304Z",
|
||||
"iopub.status.busy": "2021-02-03T20:30:09.451093Z",
|
||||
"iopub.status.idle": "2021-02-03T20:30:09.464308Z",
|
||||
"shell.execute_reply": "2021-02-03T20:30:09.464854Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.036471,
|
||||
"end_time": "2021-02-03T20:30:09.465079",
|
||||
"exception": false,
|
||||
"start_time": "2021-02-03T20:30:09.428608",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generate the features for the test set\n",
|
||||
"\n",
|
||||
"ssubm_building_g = ssubm_df.groupby(0)\n",
|
||||
"feature_dict = dict()\n",
|
||||
"\n",
|
||||
"for gid0, g0 in ssubm_building_g:\n",
|
||||
" break\n",
|
||||
" index = sorted(bssid[g0.iloc[0,0]])\n",
|
||||
" feats = list()\n",
|
||||
" print(gid0)\n",
|
||||
" for gid,g in g0.groupby(1):\n",
|
||||
"\n",
|
||||
" # get all wifi time locations, \n",
|
||||
" with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
|
||||
" txt = f.readlines()\n",
|
||||
"\n",
|
||||
" wifi = list()\n",
|
||||
"\n",
|
||||
" for line in txt:\n",
|
||||
" line = line.strip().split()\n",
|
||||
" if line[1] == \"TYPE_WIFI\":\n",
|
||||
" wifi.append(line)\n",
|
||||
"\n",
|
||||
" wifi_df = pd.DataFrame(wifi)\n",
|
||||
" wifi_points = pd.DataFrame(wifi_df.groupby(0).count().index.tolist())\n",
|
||||
" \n",
|
||||
" for timepoint in g.iloc[:,2].tolist():\n",
|
||||
"\n",
|
||||
" deltas = (wifi_points.astype(int) - int(timepoint)).abs()\n",
|
||||
" min_delta_idx = deltas.values.argmin()\n",
|
||||
" wifi_block_timestamp = wifi_points.iloc[min_delta_idx].values[0]\n",
|
||||
" \n",
|
||||
" wifi_block = wifi_df[wifi_df[0] == wifi_block_timestamp].drop_duplicates(subset=3)\n",
|
||||
" feat = wifi_block.set_index(3)[4].reindex(index).fillna(-999)\n",
|
||||
"\n",
|
||||
" feat['site_path_timestamp'] = g.iloc[0,0] + \"_\" + g.iloc[0,1] + \"_\" + timepoint\n",
|
||||
" feats.append(feat)\n",
|
||||
" feature_df = pd.concat(feats, axis=1).T\n",
|
||||
" feature_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+gid0+\"_test.csv\")\n",
|
||||
" feature_dict[gid0] = feature_df"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
},
|
||||
"papermill": {
|
||||
"default_parameters": {},
|
||||
"duration": 9.894085,
|
||||
"end_time": "2021-02-03T20:30:10.083699",
|
||||
"environment_variables": {},
|
||||
"exception": null,
|
||||
"input_path": "__notebook__.ipynb",
|
||||
"output_path": "__notebook__.ipynb",
|
||||
"parameters": {},
|
||||
"start_time": "2021-02-03T20:30:00.189614",
|
||||
"version": "2.2.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue