Create. solution

4 years ago · caa5ff1701
parent 1f5a7e770c
commit caa5ff1701
30 changed files with 111619 additions and 0 deletions
--- a/Navigation/code/.ipynb_checkpoints/99-80-floor-accurate-model-blstm-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/99-80-floor-accurate-model-blstm-checkpoint.ipynb
--- a/Navigation/code/.ipynb_checkpoints/combine_v1-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/combine_v1-checkpoint.ipynb
@ -0,0 +1,122 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "ss1 = pd.read_csv('submission_wifi.csv')\n",
+    "ss2 = pd.read_csv('submission_wifi_sensor.csv')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.        , 0.99816888],\n",
+       "       [0.99816888, 1.        ]])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.corrcoef([ss1.y,ss2.y])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.        , 0.99853603],\n",
+       "       [0.99853603, 1.        ]])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.corrcoef([ss1.x,ss2.x])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss = ss1.copy()\n",
+    "ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
+    "ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss.to_csv('sub_wifi_sensor.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/Navigation/code/.ipynb_checkpoints/combine_v2-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/combine_v2-checkpoint.ipynb
@ -0,0 +1,85 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.        , 0.99717624],\n",
+       "       [0.99717624, 1.        ]])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ss1 = pd.read_csv('sub_wifi_sensor_post.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
+    "ss2 = pd.read_csv('submission_ym.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
+    "np.corrcoef([ss1.x,ss2.x])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss = ss1.copy()\n",
+    "ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
+    "ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss.to_csv('final.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/Navigation/code/.ipynb_checkpoints/create-unified-wifi-features-example-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/create-unified-wifi-features-example-checkpoint.ipynb
--- a/Navigation/code/.ipynb_checkpoints/floor-model-blstm-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/floor-model-blstm-checkpoint.ipynb
--- a/Navigation/code/.ipynb_checkpoints/lstm-wifi-encode-wifi-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/lstm-wifi-encode-wifi-checkpoint.ipynb
--- a/Navigation/code/.ipynb_checkpoints/lstm-wifi-encode-wifi-sensor-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/lstm-wifi-encode-wifi-sensor-checkpoint.ipynb
--- a/Navigation/code/.ipynb_checkpoints/post_process-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/post_process-checkpoint.ipynb
--- a/Navigation/code/.ipynb_checkpoints/wifi-features-checkpoint.ipynb
+++ b/Navigation/code/.ipynb_checkpoints/wifi-features-checkpoint.ipynb
@ -0,0 +1,361 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.007463,
+     "end_time": "2021-02-03T20:30:06.571139",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.563676",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "### Wifi features\n",
+    "\n",
+    "This this is the code to generate the wifi features available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). Using these features can get a score below 14. For an example notebook using them see [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter). They only uses waypoints, wifi and timestamp data to generate solution. See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for an outline of this solution method, and methods of improvement.\n",
+    "\n",
+    "There are `break`'s inserted into loops which need to be removed to get this to run. Right now data is written to current working directory. This takes 2-4 hours to run depending on hard drive etc. There is a lot of room for improvement speeding up feature generation. \n",
+    "\n",
+    "**Update:** I added one line that creates a column for the path filename, this allows for a groupkfold crossvalidation. \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:06.590945Z",
+     "iopub.status.busy": "2021-02-03T20:30:06.589984Z",
+     "iopub.status.idle": "2021-02-03T20:30:06.593594Z",
+     "shell.execute_reply": "2021-02-03T20:30:06.592887Z"
+    },
+    "papermill": {
+     "duration": 0.01623,
+     "end_time": "2021-02-03T20:30:06.593847",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.577617",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import glob\n",
+    "import os\n",
+    "import gc\n",
+    "import json "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:06.614521Z",
+     "iopub.status.busy": "2021-02-03T20:30:06.613572Z",
+     "iopub.status.idle": "2021-02-03T20:30:06.616669Z",
+     "shell.execute_reply": "2021-02-03T20:30:06.616121Z"
+    },
+    "papermill": {
+     "duration": 0.015585,
+     "end_time": "2021-02-03T20:30:06.616837",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.601252",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "base_path = '../input/indoor-location-navigation/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:06.639011Z",
+     "iopub.status.busy": "2021-02-03T20:30:06.638118Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.333807Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.334360Z"
+    },
+    "papermill": {
+     "duration": 2.711076,
+     "end_time": "2021-02-03T20:30:09.334617",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.623541",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
+    "ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
+    "\n",
+    "# only 24 of the total buildings are used in the test set, \n",
+    "# this allows us to greatly reduce the intial size of the dataset\n",
+    "\n",
+    "ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
+    "used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
+    "\n",
+    "# dictionary used to map the floor codes to the values used in the submission file. \n",
+    "floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
+    "             \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.359905Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.359123Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.362909Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.362224Z"
+    },
+    "papermill": {
+     "duration": 0.021272,
+     "end_time": "2021-02-03T20:30:09.363069",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.341797",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
+    "# these will be the only ones used when constructing features\n",
+    "bssid = dict()\n",
+    "\n",
+    "for building in used_buildings:\n",
+    "    break\n",
+    "    folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
+    "    print(building)\n",
+    "    wifi = list()\n",
+    "    for folder in folders:\n",
+    "        floor = floor_map[folder.split('/')[-1]]\n",
+    "        files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
+    "        for file in files:\n",
+    "            with open(file) as f:\n",
+    "                txt = f.readlines()\n",
+    "                for e, line in enumerate(txt):\n",
+    "                    tmp = line.strip().split()\n",
+    "                    if tmp[1] == \"TYPE_WIFI\":\n",
+    "                        wifi.append(tmp)\n",
+    "    df = pd.DataFrame(wifi)\n",
+    "    #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
+    "    value_counts = df[3].value_counts()\n",
+    "    top_bssid = value_counts[value_counts > 0].index.tolist()\n",
+    "    print(len(top_bssid))\n",
+    "    bssid[building] = top_bssid\n",
+    "    del df\n",
+    "    del wifi\n",
+    "    gc.collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.383252Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.382581Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.386704Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.385809Z"
+    },
+    "papermill": {
+     "duration": 0.016635,
+     "end_time": "2021-02-03T20:30:09.386885",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.370250",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open(\"bssid_1000.json\", \"w\") as f:\n",
+    "    json.dump(bssid, f)\n",
+    "\n",
+    "with open(\"bssid_1000.json\") as f:\n",
+    "    bssid = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.418284Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.417119Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.420513Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.419767Z"
+    },
+    "papermill": {
+     "duration": 0.026514,
+     "end_time": "2021-02-03T20:30:09.420694",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.394180",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# generate all the training data \n",
+    "building_dfs = dict()\n",
+    "\n",
+    "for building in used_buildings:\n",
+    "    break\n",
+    "    folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
+    "    dfs = list()\n",
+    "    index = sorted(bssid[building])\n",
+    "    print(building)\n",
+    "    for folder in folders:\n",
+    "        floor = floor_map[folder.split('/')[-1]]\n",
+    "        files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
+    "        print(floor)\n",
+    "        for file in files:\n",
+    "            wifi = list()\n",
+    "            waypoint = list()\n",
+    "            with open(file) as f:\n",
+    "                txt = f.readlines()\n",
+    "            for line in txt:\n",
+    "                line = line.strip().split()\n",
+    "                if line[1] == \"TYPE_WAYPOINT\":\n",
+    "                    waypoint.append(line)\n",
+    "                if line[1] == \"TYPE_WIFI\":\n",
+    "                    wifi.append(line)\n",
+    "\n",
+    "            df = pd.DataFrame(np.array(wifi))    \n",
+    "\n",
+    "            # generate a feature, and label for each wifi block\n",
+    "            for gid, g in df.groupby(0):\n",
+    "                dists = list()\n",
+    "                for e, k in enumerate(waypoint):\n",
+    "                    dist = abs(int(gid) - int(k[0]))\n",
+    "                    dists.append(dist)\n",
+    "                nearest_wp_index = np.argmin(dists)\n",
+    "                \n",
+    "                g = g.drop_duplicates(subset=3)\n",
+    "                tmp = g.iloc[:,3:5]\n",
+    "                feat = tmp.set_index(3).reindex(index).replace(np.nan, -999).T\n",
+    "                feat[\"x\"] = float(waypoint[nearest_wp_index][2])\n",
+    "                feat[\"y\"] = float(waypoint[nearest_wp_index][3])\n",
+    "                feat[\"f\"] = floor\n",
+    "                feat[\"path\"] = file.split('/')[-1].split('.')[0] # useful for crossvalidation\n",
+    "                dfs.append(feat)\n",
+    "                \n",
+    "    building_df = pd.concat(dfs)\n",
+    "    building_dfs[building] = df\n",
+    "    building_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+building+\"_train.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.454304Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.451093Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.464308Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.464854Z"
+    },
+    "papermill": {
+     "duration": 0.036471,
+     "end_time": "2021-02-03T20:30:09.465079",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.428608",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Generate the features for the test set\n",
+    "\n",
+    "ssubm_building_g = ssubm_df.groupby(0)\n",
+    "feature_dict = dict()\n",
+    "\n",
+    "for gid0, g0 in ssubm_building_g:\n",
+    "    break\n",
+    "    index = sorted(bssid[g0.iloc[0,0]])\n",
+    "    feats = list()\n",
+    "    print(gid0)\n",
+    "    for gid,g in g0.groupby(1):\n",
+    "\n",
+    "        # get all wifi time locations, \n",
+    "        with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
+    "            txt = f.readlines()\n",
+    "\n",
+    "        wifi = list()\n",
+    "\n",
+    "        for line in txt:\n",
+    "            line = line.strip().split()\n",
+    "            if line[1] == \"TYPE_WIFI\":\n",
+    "                wifi.append(line)\n",
+    "\n",
+    "        wifi_df = pd.DataFrame(wifi)\n",
+    "        wifi_points = pd.DataFrame(wifi_df.groupby(0).count().index.tolist())\n",
+    "        \n",
+    "        for timepoint in g.iloc[:,2].tolist():\n",
+    "\n",
+    "            deltas = (wifi_points.astype(int) - int(timepoint)).abs()\n",
+    "            min_delta_idx = deltas.values.argmin()\n",
+    "            wifi_block_timestamp = wifi_points.iloc[min_delta_idx].values[0]\n",
+    "            \n",
+    "            wifi_block = wifi_df[wifi_df[0] == wifi_block_timestamp].drop_duplicates(subset=3)\n",
+    "            feat = wifi_block.set_index(3)[4].reindex(index).fillna(-999)\n",
+    "\n",
+    "            feat['site_path_timestamp'] = g.iloc[0,0] + \"_\" + g.iloc[0,1] + \"_\" + timepoint\n",
+    "            feats.append(feat)\n",
+    "    feature_df = pd.concat(feats, axis=1).T\n",
+    "    feature_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+gid0+\"_test.csv\")\n",
+    "    feature_dict[gid0] = feature_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 9.894085,
+   "end_time": "2021-02-03T20:30:10.083699",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2021-02-03T20:30:00.189614",
+   "version": "2.2.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
@ -0,0 +1,122 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "ss1 = pd.read_csv('submission_wifi.csv')\n",
+    "ss2 = pd.read_csv('submission_wifi_sensor.csv')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.        , 0.99816888],\n",
+       "       [0.99816888, 1.        ]])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.corrcoef([ss1.y,ss2.y])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.        , 0.99853603],\n",
+       "       [0.99853603, 1.        ]])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.corrcoef([ss1.x,ss2.x])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss = ss1.copy()\n",
+    "ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
+    "ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss.to_csv('sub_wifi_sensor.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
@ -0,0 +1,85 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.        , 0.99717624],\n",
+       "       [0.99717624, 1.        ]])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ss1 = pd.read_csv('sub_wifi_sensor_post.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
+    "ss2 = pd.read_csv('submission_ym.csv').sort_values(by='site_path_timestamp').reset_index(drop=True)\n",
+    "np.corrcoef([ss1.x,ss2.x])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss = ss1.copy()\n",
+    "ss['x'] = ss1['x']*0.5+ss2['x']*0.5\n",
+    "ss['y'] = ss1['y']*0.5+ss2['y']*0.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ss.to_csv('final.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/Navigation/code/create-unified-wifi-features-example.ipynb
+++ b/Navigation/code/create-unified-wifi-features-example.ipynb
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
@ -0,0 +1,656 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import glob\n",
+    "import os\n",
+    "import gc\n",
+    "import json \n",
+    "base_path = '../input/indoor-location-navigation/'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
+    "ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
+    "\n",
+    "# only 24 of the total buildings are used in the test set, \n",
+    "# this allows us to greatly reduce the intial size of the dataset\n",
+    "\n",
+    "ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
+    "used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
+    "\n",
+    "# dictionary used to map the floor codes to the values used in the submission file. \n",
+    "floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
+    "             \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
+    "# # these will be the only ones used when constructing features\n",
+    "# bssid = dict()\n",
+    "\n",
+    "# for building in used_buildings:\n",
+    "# #     break\n",
+    "#     folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
+    "#     print(building)\n",
+    "#     wifi = list()\n",
+    "#     for folder in folders:\n",
+    "#         floor = floor_map[folder.split('/')[-1]]\n",
+    "#         files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
+    "#         for file in files:\n",
+    "#             with open(file) as f:\n",
+    "#                 txt = f.readlines()\n",
+    "#                 for e, line in enumerate(txt):\n",
+    "#                     tmp = line.strip().split()\n",
+    "#                     if tmp[1] == \"TYPE_WIFI\":\n",
+    "#                         wifi.append(tmp)\n",
+    "#     df = pd.DataFrame(wifi)\n",
+    "#     #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
+    "#     value_counts = df[3].value_counts()\n",
+    "#     top_bssid = value_counts[value_counts >= 0].index.tolist()\n",
+    "#     print(len(top_bssid))\n",
+    "#     bssid[building] = top_bssid\n",
+    "#     del df\n",
+    "#     del wifi\n",
+    "#     gc.collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# with open(\"bssid.json\", \"w\") as f:\n",
+    "#     json.dump(bssid, f)\n",
+    "\n",
+    "with open(\"bssid.json\") as f:\n",
+    "    bssid = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "def multi_line_spliter(s):\n",
+    "    matches = re.finditer(\"TYPE_\", s)\n",
+    "    matches_positions = [match.start() for match in matches]\n",
+    "    split_idx = [0] + [matches_positions[i]-14 for i in range(1, len(matches_positions))] + [len(s)]\n",
+    "    return [s[split_idx[i]:split_idx[i+1]] for i in range(len(split_idx)-1)]\n",
+    "    \n",
+    "    \n",
+    "def load_df(file):\n",
+    "    #path = str(Path(self.input_path)/f\"train/{self.site_id}/{self.floor}/{self.path_id}.txt\")\n",
+    "    with open(file) as f:\n",
+    "        data = f.readlines()\n",
+    "\n",
+    "#     modified_data = []\n",
+    "#     for s in data:\n",
+    "#         if s.count(\"TYPE_\")>1:\n",
+    "#             lines = multi_line_spliter(s)\n",
+    "#             modified_data.extend(lines)\n",
+    "#         else:\n",
+    "#             modified_data.append(s)\n",
+    "#     del data\n",
+    "#     return modified_data\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class ReadData:\n",
+    "    acce: np.ndarray\n",
+    "    acce_uncali: np.ndarray\n",
+    "    gyro: np.ndarray\n",
+    "    gyro_uncali: np.ndarray\n",
+    "    magn: np.ndarray\n",
+    "    magn_uncali: np.ndarray\n",
+    "    ahrs: np.ndarray\n",
+    "    wifi: np.ndarray\n",
+    "    ibeacon: np.ndarray\n",
+    "    waypoint: np.ndarray\n",
+    "\n",
+    "\n",
+    "def read_data_file(data_filename):\n",
+    "    acce = []\n",
+    "    acce_uncali = []\n",
+    "    gyro = []\n",
+    "    gyro_uncali = []\n",
+    "    magn = []\n",
+    "    magn_uncali = []\n",
+    "    ahrs = []\n",
+    "    wifi = []\n",
+    "    ibeacon = []\n",
+    "    waypoint = []\n",
+    "\n",
+    "    with open(data_filename, 'r', encoding='utf-8') as file:\n",
+    "        lines = file.readlines()\n",
+    "\n",
+    "    for line_data in lines:\n",
+    "        line_data = line_data.strip()\n",
+    "        if not line_data or line_data[0] == '#':\n",
+    "            continue\n",
+    "\n",
+    "        line_data = line_data.split('\\t')\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_ACCELEROMETER':\n",
+    "            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
+    "            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_GYROSCOPE':\n",
+    "            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
+    "            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
+    "            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
+    "            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
+    "            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_WIFI':\n",
+    "            sys_ts = line_data[0]\n",
+    "            ssid = line_data[2]\n",
+    "            bssid = line_data[3]\n",
+    "            rssi = line_data[4]\n",
+    "            lastseen_ts = line_data[6]\n",
+    "            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
+    "            wifi.append(wifi_data)\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_BEACON':\n",
+    "            ts = line_data[0]\n",
+    "            uuid = line_data[2]\n",
+    "            major = line_data[3]\n",
+    "            minor = line_data[4]\n",
+    "            rssi = line_data[6]\n",
+    "            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]\n",
+    "            ibeacon.append(ibeacon_data)\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_WAYPOINT':\n",
+    "            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
+    "\n",
+    "    acce = np.array(acce)\n",
+    "    acce_uncali = np.array(acce_uncali)\n",
+    "    gyro = np.array(gyro)\n",
+    "    gyro_uncali = np.array(gyro_uncali)\n",
+    "    magn = np.array(magn)\n",
+    "    magn_uncali = np.array(magn_uncali)\n",
+    "    ahrs = np.array(ahrs)\n",
+    "    wifi = np.array(wifi)\n",
+    "    ibeacon = np.array(ibeacon)\n",
+    "    waypoint = np.array(waypoint)\n",
+    "\n",
+    "    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5a0546857ecc773753327266\n",
+      "-1\n",
+      "0\n",
+      "1\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-6-74e73dc9d7ca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     14\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfloor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mfile\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_data_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     17\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwifi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m                 \u001b[0mwifi_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwifi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-5-26abf575fd1a>\u001b[0m in \u001b[0;36mread_data_file\u001b[0;34m(data_filename)\u001b[0m\n\u001b[1;32m     53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'TYPE_GYROSCOPE_UNCALIBRATED'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 55\u001b[0;31m             \u001b[0mgyro_uncali\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     56\u001b[0m             \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# generate all the training data \n",
+    "# used_buildings[:1]\n",
+    "for building in used_buildings:\n",
+    "    #break\n",
+    "    folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
+    "    dfs = list()\n",
+    "    index = sorted(bssid[building])\n",
+    "    print(building)\n",
+    "    building_df_wifi = []\n",
+    "    building_df_waypoint = []\n",
+    "    for folder in folders:\n",
+    "        floor = floor_map[folder.split('/')[-1]]\n",
+    "        files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
+    "        print(floor)\n",
+    "        for file in files:\n",
+    "            data = read_data_file(file)\n",
+    "            if len(data.wifi)>0:\n",
+    "                wifi_data = pd.DataFrame(data.wifi)\n",
+    "                wifi_data.columns = ['ts_wifi','ssid','bssid','rssi','ts_wifi_ls']\n",
+    "                wifi_data['path'] = file.split('/')[-1].split('.')[0]\n",
+    "                wifi_data['site'] = file.split('/')[-3]\n",
+    "                wifi_data['floor'] = floor\n",
+    "                wifi_data['floor_ori'] = folder.split('/')[-1]\n",
+    "                building_df_wifi.append(wifi_data) \n",
+    "            if len(data.waypoint)>0:\n",
+    "                waypoint_data = pd.DataFrame(data.waypoint)\n",
+    "                waypoint_data.columns = ['ts_waypoint','x','y']\n",
+    "                waypoint_data['path'] = file.split('/')[-1].split('.')[0]\n",
+    "                waypoint_data['site'] = file.split('/')[-3]\n",
+    "                waypoint_data['floor'] = floor\n",
+    "                waypoint_data['floor_ori'] = folder.split('/')[-1]\n",
+    "                building_df_waypoint.append(waypoint_data)             \n",
+    "    building_df_wifi = pd.concat(building_df_wifi).reset_index(drop=True)\n",
+    "    building_df_waypoint = pd.concat(building_df_waypoint).reset_index(drop=True)\n",
+    "    building_df_wifi.to_csv('../input/data_abstract/'+building+\"_train_wifi.csv\")\n",
+    "    building_df_waypoint.to_csv('../input/data_abstract/'+building+\"_train_waypoint.csv\")\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>0000000000009</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>0000000009017</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                          0                         1              2\n",
+       "0  5a0546857ecc773753327266  046cfa46be49fc10834815c6  0000000000009\n",
+       "1  5a0546857ecc773753327266  046cfa46be49fc10834815c6  0000000009017"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ssubm_df.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5a0546857ecc773753327266\n",
+      "5c3c44b80379370013e0fd2b\n",
+      "5d27075f03f801723c2e360f\n",
+      "5d27096c03f801723c31e5e0\n",
+      "5d27097f03f801723c320d97\n",
+      "5d27099f03f801723c32511d\n",
+      "5d2709a003f801723c3251bf\n",
+      "5d2709b303f801723c327472\n",
+      "5d2709bb03f801723c32852c\n",
+      "5d2709c303f801723c3299ee\n",
+      "5d2709d403f801723c32bd39\n",
+      "5d2709e003f801723c32d896\n",
+      "5da138274db8ce0c98bbd3d2\n",
+      "5da1382d4db8ce0c98bbe92e\n",
+      "5da138314db8ce0c98bbf3a0\n",
+      "5da138364db8ce0c98bc00f1\n",
+      "5da1383b4db8ce0c98bc11ab\n",
+      "5da138754db8ce0c98bca82f\n",
+      "5da138764db8ce0c98bcaa46\n",
+      "5da1389e4db8ce0c98bd0547\n",
+      "5da138b74db8ce0c98bd4774\n",
+      "5da958dd46f8266d0737457b\n",
+      "5dbc1d84c1eb61796cf7c010\n",
+      "5dc8cea7659e181adb076a3f\n"
+     ]
+    }
+   ],
+   "source": [
+    "ssubm_building_g = ssubm_df.groupby(0)\n",
+    "feature_dict = dict()\n",
+    "\n",
+    "for gid0, g0 in ssubm_building_g:\n",
+    "    index = sorted(bssid[g0.iloc[0,0]])\n",
+    "    feats = list()\n",
+    "    print(gid0)\n",
+    "    building_df_wifi = []\n",
+    "    for gid,g in g0.groupby(1):\n",
+    "\n",
+    "        # get all wifi time locations\n",
+    "        #with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
+    "            #txt = f.readlines()\n",
+    "        data = read_data_file(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt'))\n",
+    "        if len(data.wifi)>0:\n",
+    "            wifi_data = pd.DataFrame(data.wifi)\n",
+    "            wifi_data.columns = ['ts_wifi','ssid','bssid','rssi','ts_wifi_ls']\n",
+    "            wifi_data['path'] = g.iloc[0,1]\n",
+    "            wifi_data['site'] = gid0\n",
+    "            building_df_wifi.append(wifi_data)            \n",
+    "    building_df_wifi = pd.concat(building_df_wifi).reset_index(drop=True)\n",
+    "    building_df_wifi.to_csv('../input/data_abstract/'+gid0+\"_test_wifi.csv\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ts_wifi</th>\n",
+       "      <th>ssid</th>\n",
+       "      <th>bssid</th>\n",
+       "      <th>rssi</th>\n",
+       "      <th>ts_wifi_ls</th>\n",
+       "      <th>path</th>\n",
+       "      <th>site</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0000000002340</td>\n",
+       "      <td>da39a3ee5e6b4b0d3255bfef95601890afd80709</td>\n",
+       "      <td>eebf5db207eec2f3e041f92153d789270f346821</td>\n",
+       "      <td>-45</td>\n",
+       "      <td>1578474544726</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0000000002340</td>\n",
+       "      <td>b9f0208be00bd8b337be7f12e02e3a3ce846e22b</td>\n",
+       "      <td>7805f319f3f591986effe78c5b41143180278f2d</td>\n",
+       "      <td>-46</td>\n",
+       "      <td>1578474565732</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0000000002340</td>\n",
+       "      <td>ab150ecf6d972b476aeab16317bed6189d9f7cce</td>\n",
+       "      <td>323607d8444900d64151ee06d164738ac727bbce</td>\n",
+       "      <td>-46</td>\n",
+       "      <td>1578474564279</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0000000002340</td>\n",
+       "      <td>b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7</td>\n",
+       "      <td>b26914599f6d9ba16b43975394e1eeb9d82f4bab</td>\n",
+       "      <td>-47</td>\n",
+       "      <td>1578474565725</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0000000002340</td>\n",
+       "      <td>da39a3ee5e6b4b0d3255bfef95601890afd80709</td>\n",
+       "      <td>02a1be3a5dab38320f879489d8a1e0f2a72768b3</td>\n",
+       "      <td>-47</td>\n",
+       "      <td>1578474547962</td>\n",
+       "      <td>046cfa46be49fc10834815c6</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>338901</th>\n",
+       "      <td>0000000067545</td>\n",
+       "      <td>b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7</td>\n",
+       "      <td>f2fd7c8b3ae74a54ebcd5498b81b513b7c5e564a</td>\n",
+       "      <td>-90</td>\n",
+       "      <td>1578465380606</td>\n",
+       "      <td>ffcd9524c80c0fa5bb859eaf</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>338902</th>\n",
+       "      <td>0000000067545</td>\n",
+       "      <td>b9f0208be00bd8b337be7f12e02e3a3ce846e22b</td>\n",
+       "      <td>94887049b5d6072ffd22a5e7de70523931861c2b</td>\n",
+       "      <td>-91</td>\n",
+       "      <td>1578465380654</td>\n",
+       "      <td>ffcd9524c80c0fa5bb859eaf</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>338903</th>\n",
+       "      <td>0000000067545</td>\n",
+       "      <td>b7e6027447eb1f81327d66cfd3adbe557aabf26c</td>\n",
+       "      <td>e9f5c01efe9058d460ed3830b2a23b729dea930a</td>\n",
+       "      <td>-92</td>\n",
+       "      <td>1578465380607</td>\n",
+       "      <td>ffcd9524c80c0fa5bb859eaf</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>338904</th>\n",
+       "      <td>0000000067545</td>\n",
+       "      <td>02eb66d35bce69814f108c2f876e600a78ace137</td>\n",
+       "      <td>0f5daed11a61e0d6941a1a42ff428ca216d61003</td>\n",
+       "      <td>-93</td>\n",
+       "      <td>1578465370203</td>\n",
+       "      <td>ffcd9524c80c0fa5bb859eaf</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>338905</th>\n",
+       "      <td>0000000067545</td>\n",
+       "      <td>d4f84491d3a4cd7fbd6f2e34e35fc3cf2f9c5c56</td>\n",
+       "      <td>bfaebb72653fac35c19b00e7ce484dc2897f18bd</td>\n",
+       "      <td>-93</td>\n",
+       "      <td>1578465377777</td>\n",
+       "      <td>ffcd9524c80c0fa5bb859eaf</td>\n",
+       "      <td>5a0546857ecc773753327266</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>338906 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              ts_wifi                                      ssid  \\\n",
+       "0       0000000002340  da39a3ee5e6b4b0d3255bfef95601890afd80709   \n",
+       "1       0000000002340  b9f0208be00bd8b337be7f12e02e3a3ce846e22b   \n",
+       "2       0000000002340  ab150ecf6d972b476aeab16317bed6189d9f7cce   \n",
+       "3       0000000002340  b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7   \n",
+       "4       0000000002340  da39a3ee5e6b4b0d3255bfef95601890afd80709   \n",
+       "...               ...                                       ...   \n",
+       "338901  0000000067545  b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7   \n",
+       "338902  0000000067545  b9f0208be00bd8b337be7f12e02e3a3ce846e22b   \n",
+       "338903  0000000067545  b7e6027447eb1f81327d66cfd3adbe557aabf26c   \n",
+       "338904  0000000067545  02eb66d35bce69814f108c2f876e600a78ace137   \n",
+       "338905  0000000067545  d4f84491d3a4cd7fbd6f2e34e35fc3cf2f9c5c56   \n",
+       "\n",
+       "                                           bssid rssi     ts_wifi_ls  \\\n",
+       "0       eebf5db207eec2f3e041f92153d789270f346821  -45  1578474544726   \n",
+       "1       7805f319f3f591986effe78c5b41143180278f2d  -46  1578474565732   \n",
+       "2       323607d8444900d64151ee06d164738ac727bbce  -46  1578474564279   \n",
+       "3       b26914599f6d9ba16b43975394e1eeb9d82f4bab  -47  1578474565725   \n",
+       "4       02a1be3a5dab38320f879489d8a1e0f2a72768b3  -47  1578474547962   \n",
+       "...                                          ...  ...            ...   \n",
+       "338901  f2fd7c8b3ae74a54ebcd5498b81b513b7c5e564a  -90  1578465380606   \n",
+       "338902  94887049b5d6072ffd22a5e7de70523931861c2b  -91  1578465380654   \n",
+       "338903  e9f5c01efe9058d460ed3830b2a23b729dea930a  -92  1578465380607   \n",
+       "338904  0f5daed11a61e0d6941a1a42ff428ca216d61003  -93  1578465370203   \n",
+       "338905  bfaebb72653fac35c19b00e7ce484dc2897f18bd  -93  1578465377777   \n",
+       "\n",
+       "                            path                      site  \n",
+       "0       046cfa46be49fc10834815c6  5a0546857ecc773753327266  \n",
+       "1       046cfa46be49fc10834815c6  5a0546857ecc773753327266  \n",
+       "2       046cfa46be49fc10834815c6  5a0546857ecc773753327266  \n",
+       "3       046cfa46be49fc10834815c6  5a0546857ecc773753327266  \n",
+       "4       046cfa46be49fc10834815c6  5a0546857ecc773753327266  \n",
+       "...                          ...                       ...  \n",
+       "338901  ffcd9524c80c0fa5bb859eaf  5a0546857ecc773753327266  \n",
+       "338902  ffcd9524c80c0fa5bb859eaf  5a0546857ecc773753327266  \n",
+       "338903  ffcd9524c80c0fa5bb859eaf  5a0546857ecc773753327266  \n",
+       "338904  ffcd9524c80c0fa5bb859eaf  5a0546857ecc773753327266  \n",
+       "338905  ffcd9524c80c0fa5bb859eaf  5a0546857ecc773753327266  \n",
+       "\n",
+       "[338906 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "building_df_wifi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
@ -0,0 +1,198 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 24 buildings in the testing set.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>file</th>\n",
+       "      <th>building</th>\n",
+       "      <th>site</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>../input/indoor-location-navigation/test//00ff...</td>\n",
+       "      <td>5da1389e4db8ce0c98bd0547</td>\n",
+       "      <td>SiteName:和达城商场</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>../input/indoor-location-navigation/test//01c4...</td>\n",
+       "      <td>5da138b74db8ce0c98bd4774</td>\n",
+       "      <td>SiteName:万象城</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>../input/indoor-location-navigation/test//030b...</td>\n",
+       "      <td>5da138764db8ce0c98bcaa46</td>\n",
+       "      <td>SiteName:银泰百货</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>../input/indoor-location-navigation/test//0389...</td>\n",
+       "      <td>5dbc1d84c1eb61796cf7c010</td>\n",
+       "      <td>SiteName:杭州大悦城</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>../input/indoor-location-navigation/test//0402...</td>\n",
+       "      <td>5da1383b4db8ce0c98bc11ab</td>\n",
+       "      <td>SiteName:永旺梦乐城</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                file  \\\n",
+       "0  ../input/indoor-location-navigation/test//00ff...   \n",
+       "1  ../input/indoor-location-navigation/test//01c4...   \n",
+       "2  ../input/indoor-location-navigation/test//030b...   \n",
+       "3  ../input/indoor-location-navigation/test//0389...   \n",
+       "4  ../input/indoor-location-navigation/test//0402...   \n",
+       "\n",
+       "                   building            site  \n",
+       "0  5da1389e4db8ce0c98bd0547  SiteName:和达城商场  \n",
+       "1  5da138b74db8ce0c98bd4774    SiteName:万象城  \n",
+       "2  5da138764db8ce0c98bcaa46   SiteName:银泰百货  \n",
+       "3  5dbc1d84c1eb61796cf7c010  SiteName:杭州大悦城  \n",
+       "4  5da1383b4db8ce0c98bc11ab  SiteName:永旺梦乐城  "
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the read-only \"../input/\" directory\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "# Prepare paths:\n",
+    "import glob\n",
+    "from pathlib import Path\n",
+    "inpath = '../input/indoor-location-navigation/'\n",
+    "metapath = inpath + 'metadata/'\n",
+    "trainpath = inpath + 'train/'\n",
+    "testpath = inpath + 'test/'\n",
+    "\n",
+    "# Extract testing files, buildings and sites:\n",
+    "os.system(f'grep SiteID {testpath}/* > test_buildings.txt' )\n",
+    "test_buildings = pd.read_csv('test_buildings.txt',sep='\\t',header=None,names=['file','building','site'])\n",
+    "test_buildings['file'] = test_buildings['file'].apply(lambda x: x[:-2])\n",
+    "test_buildings['building'] = test_buildings['building'].apply(lambda x: x[7:])\n",
+    "\n",
+    "# How many buildings in the testing set?\n",
+    "buildings = np.unique(test_buildings['building'])\n",
+    "print('There are',len(buildings),'buildings in the testing set.')\n",
+    "\n",
+    "test_buildings.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compile C++ pre-processing code:\n",
+    "er=os.system(\"g++ ../input/indoor-cpp/1_preprocess.cpp -std=c++11 -o preprocess\")\n",
+    "if(er): print(\"Error\")\n",
+    "\n",
+    "# Reformat the testing set:\n",
+    "os.system('mkdir test')\n",
+    "for i,(path_filename,building) in enumerate(zip(test_buildings['file'],test_buildings['building'])):\n",
+    "    er=os.system(f'./preprocess {path_filename} test {building} {0}') #since we do not know the floor, I put 0.\n",
+    "    if(er): print(\"Error:\",path_filename)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Acceleration, magnetic and orientation testing data:\n",
+    "os.system('mkdir indoor_testing_accel')\n",
+    "os.system(\"g++ ../input/indoor-cpp/2_preprocess_accel.cpp -std=c++11 -o preprocess_accel\")\n",
+    "for building in buildings:\n",
+    "    os.system(f'./preprocess_accel {building}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Wifi testing data:\n",
+    "os.system('mkdir test_wifi')\n",
+    "os.system(\"g++ /kaggle/input/indoor-cpp/2_preprocess_wifi.cpp -std=c++11 -o preprocess_wifi\")\n",
+    "for building in buildings:\n",
+    "    os.system(f'./preprocess_wifi {building}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/Navigation/code/indoor_location_competition_20/LICENSE
+++ b/Navigation/code/indoor_location_competition_20/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017-2020 XYZ10, Inc. https://dangwu.com/
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/Navigation/code/indoor_location_competition_20/README.md
+++ b/Navigation/code/indoor_location_competition_20/README.md
@ -0,0 +1,141 @@
+# Indoor Location Competition 2.0 (Sample Data and Code)
+
+This repository contains sample data and code for [Indoor Location Competition 2.0](https://aka.ms/location20), a continuation of Microsoft Indoor Location Competition. Competition this year will be completely virtual and evaluated on large-scale real indoor location datasets. The dataset to be released consists of dense indoor signatures of WiFi, geomagnetic field, iBeacons etc., as well as ground truth collected from hundreds of buildings in Chinese cities. 
+
+## Webinar Video
+We held a webinar in July, the video is [here](https://www.youtube.com/watch?v=xt3OzMC-XMU).
+
+## Sample Data
+
+`data` folder contains indoor traces from two sites. Each trace (`*.txt`) corresponds to an indoor path between position p<sub>1</sub> and p<sub>2</sub> walked by a site-surveyor. During the walk, site-surveyor is holding an Android smartphone flat in front of his body, and a sensor data recording app is running on the device to collect IMU (accelerometer, gyroscope) and geomagnetic field (magnetometer) readings, as well as WiFi and Bluetooth iBeacon scanning results. A detailed description of the format of trace file is shown below. In addition to raw traces, floor plan metadata (e.g., raster image, size, GeoJSON) are also included for each floor. 
+
+### Trace File Format（*.txt）
+
+| Time | Data Type                                           | Value                                  |            |       |           |              |            |            |                                 |
+|----------------------|-----------------------------------------------------|------------------------------------------|-------------------|--------------|------------------|---------------------|-------------------|-------------------|----------------------------------------|
+| 1574659531598        | TYPE\_WAYPOINT                                      | 196\.41757                               | 117\.84907        |              |                  |                     |                   |                   |                                        |
+|                      | Location surveyor labeled on the map       | Coordinate x (meter)                             | Coordiante y (meter)     |              |                  |                     |                   |                   |                                        |
+|                      |                                                     |                                          |                   |              |                  |                     |                   |                   |                                        |
+| 1574659531695        | TYPE\_ACCELEROMETER                                 | \-1\.7085724                             | \-0\.274765       | 16\.657166   | 2                |                     |                   |                   |                                        |
+|                      | Android Sensor\.TYPE\_ACCELEROMETER                 | X axis                                   | Y axis            | Z axis       | accuracy         |                     |                   |                   |                                        |
+| 1574659531695        | TYPE\_GYROSCOPE                                     | \-0\.3021698                             | 0\.2773285        | 0\.107543945 | 3                |                     |                   |                   |                                        |
+|                      | Android Sensor\.TYPE\_GYROSCOPE                     | X axis                                   | Y axis            | Z axis       | accuracy         |                     |                   |                   |                                        |
+| 1574659531695        | TYPE\_MAGNETIC\_FIELD                               | 20\.181274                               | 16\.209412        | \-32\.22046  | 3                |                     |                   |                   |                                        |
+|                      | Android Sensor\.TYPE\_MAGNETIC\_FIELD               | X axis                                   | Y axis            | Z axis       | accuracy         |                     |                   |                   |                                        |
+| 1574659531695        | TYPE\_ROTATION\_VECTOR                              | \-0\.00855688                            | 0\.051367603      | 0\.362504    | 3                |                     |                   |                   |                                        |
+|                      | Android Sensor\.TYPE\_ROTATION\_VECTOR              | X axis                                   | Y axis            | Z axis       | accuracy         |                     |                   |                   |                                        |
+|                      |                                                     |                                          |                   |              |                  |                     |                   |                   |                                        |
+| 1574659531695        | TYPE\_ACCELEROMETER\_UNCALIBRATED                   | \-1\.7085724                             | \-0\.274765       | 16\.657166   | 0\.0             | 0\.0                | 0\.0              | 3                 |                                        |
+|                      | Android Sensor\.TYPE\_ACCELEROMETER\_UNCALIBRATED   | X axis                                   | Y axis            | Z axis       | X axis           | Y axis              | Z axis            | accuracy          |                                        |
+| 1574659531695        | TYPE\_GYROSCOPE\_UNCALIBRATED                       | \-0\.42333984                            | 0\.20202637       | 0\.09623718  | \-7\.9345703E\-4 | 3\.2043457E\-4      | 4\.119873E\-4     | 3                 |                                        |
+|                      | Android Sensor\.TYPE\_GYROSCOPE\_UNCALIBRATED       | X axis                                   | Y axis            | Z axis       | X axis           | Y axis              | Z axis            | accuracy          |                                        |
+| 1574659531695        | TYPE\_MAGNETIC\_FIELD\_UNCALIBRATED                 | \-29\.830933                             | \-26\.36261       | \-300\.3006  | \-50\.012207     | \-42\.57202         | \-268\.08014      | 3                 |                                        |
+|                      | Android Sensor\.TYPE\_MAGNETIC\_FIELD\_UNCALIBRATED | X axis                                   | Y axis            | Z axis       | X axis           | Y axis              | Z axis            | accuracy          |                                        |
+|                      |                                                     |                                          |                   |              |                  |                     |                   |                   |                                        |
+| 1574659533190        | TYPE\_WIFI                                          | intime\_free                             | 0e:74:9c:a7:b2:e4 | \-43         | 5805             | 1574659532305       |                   |                   |                                        |
+|                      | Wi\-Fi data                                         | ssid                                     | bssid             | RSSI         | frequency        | last seen timestamp |                   |                   |                                        |
+|                      |                                                     |                                          |                   |              |                  |                     |                   |                   |                                        |
+| 1574659532751        | TYPE\_BEACON                                        | FDA50693\-A4E2\-4FB1\-AFCF\-C6EB07647825 | 10073             | 61418        | \-65             | \-82                | 5\.50634293288929 | 6B:11:4C:D1:29:F2 | 1574659532751                          |
+|                      | iBeacon data                                        | UUID                                     | MajorID           | MinorID      | Tx Power         | RSSI                | Distance          | MAC Address       | same with Unix time, padding data |
+
+
+The first column is Unix Time in millisecond. In specific, we use SensorEvent.timestamp for sensor data and system time for WiFi and Bluetooth scans. 
+
+The second column is the data type (ten in total).
+* TYPE_ACCELEROMETER
+* TYPE_MAGNETIC_FIELD
+* TYPE_GYROSCOPE
+* TYPE_ROTATION_VECTOR
+* TYPE_MAGNETIC_FIELD_UNCALIBRATED
+* TYPE_GYROSCOPE_UNCALIBRATED
+* TYPE_ACCELEROMETER_UNCALIBRATED
+* TYPE_WIFI
+* TYPE_BEACON
+* TYPE_WAYPOINT: ground truth location labeled by the surveyor
+
+Data values start from the third column. 
+
+Column 3-5 of TYPE_ACCELEROMETER、TYPE_MAGNETIC_FIELD、TYPE_GYROSCOPE、TYPE_ROTATION_VECTOR are SensorEvent.values[0-2] from the callback function onSensorChanged(). Column 6 is SensorEvent.accuracy.
+
+Column 3-8 of TYPE_ACCELEROMETER_UNCALIBRATED、TYPE_GYROSCOPE_UNCALIBRATED、TYPE_MAGNETIC_FIELD_UNCALIBRATED are SensorEvent.values[0-5] from the callback function onSensorChanged(). Column 9 is SensorEvent.accuracy.
+
+Values of TYPE_BEACON are obtained from ScanRecord.getBytes(). The results are decoded based on iBeacon protocol using the code below. 
+```
+val major = ((scanRecord[startByte + 20].toInt() and 0xff) * 0x100 + (scanRecord[startByte + 21].toInt() and 0xff))
+val minor = ((scanRecord[startByte + 22].toInt() and 0xff) * 0x100 + (scanRecord[startByte + 23].toInt() and 0xff))
+val txPower = scanRecord[startByte + 24]
+```
+Distance in column 8 is calculated as 
+```
+private static double calculateDistance(int txPower, double rssi) {
+  if (rssi == 0) {
+    return -1.0; // if we cannot determine distance, return -1.
+  }
+  double ratio = rssi*1.0/txPower;
+  if (ratio < 1.0) {
+    return Math.pow(ratio,10);
+  }
+  else {
+    double accuracy =  (0.89976)*Math.pow(ratio,7.7095) + 0.111;
+    return accuracy;
+  }
+}
+```
+
+### References:  
+https://developer.android.com/guide/topics/sensors  
+https://developer.android.com/reference/android/net/wifi/ScanResult.html  
+https://developer.android.com/reference/android/bluetooth/le/ScanRecord
+
+
+
+## Sample Code
+
+Along with sample data from two sites, this repo also provides several scripts on parsing and analyzing indoor traces. All scripts are tested with Python 3.6.9 on both Windows 10 and Mac OS 15. 
+
+### How to run the code
+`python main.py`
+
+#### Main functions
+
+| Functions                                     | Output                                      |
+|-----------------------------------------------|---------------------------------------------|
+| Ground truth location visualization           | output/site1/F1/path_images                 |
+| Sample step detection and visualization       | output/site1/F1/step_position.html          |
+| Geo-magnetic field intensity visualization    | output/site1/F1/magnetic_strength.html      |
+| WiFi RSSI heatmap generation                  | output/site1/F1/wifi_images                 |
+| iBeacon RSSI heatmap generation               | output/site1/F1/ibeacon_images              |
+| WiFi SSID counts visualization                | output/site1/F1/wifi_count.html             |
+
+
+## Contents
+```
+indoor-location-competition-20
+│   README.md
+│   main.py                                                      //main function of the sample code
+|   compute_f.py                                                 //data processing functions
+|   io_f.py                                                      //data preprocessing functions
+|   visualize_f.py                                               //visualization function
+│
+└───data                                                         //raw data from two sites
+      └───site1
+      |     └───B1                                               //traces from one floor
+      |     |    └───path_data_files                             
+      |     |    |          └───5dda14a2c5b77e0006b17533.txt     //trace file
+      |     |    |          |   ...
+      |     |    |
+      |     |    |   floor_image.png                             //raster floor plan
+      |     |    |   floor_info.json                             //floor size info
+      |     |    |   geojson_map.json                            //floor plan in vector format (GeoJSON)
+      |     |
+      |     └───F1
+      |     │   ...
+      |
+      └───site2
+            │   ...
+```
+
+
+## License
+
+This repository is licensed with the [MIT license](./LICENSE).
--- a/Navigation/code/indoor_location_competition_20/compute_f.py
+++ b/Navigation/code/indoor_location_competition_20/compute_f.py
@ -0,0 +1,361 @@
+import numpy as np
+import scipy.signal as signal
+
+
+def split_ts_seq(ts_seq, sep_ts):
+    """
+
+    :param ts_seq:
+    :param sep_ts:
+    :return:
+    """
+    tss = ts_seq[:, 0].astype(float)
+    unique_sep_ts = np.unique(sep_ts)
+    ts_seqs = []
+    start_index = 0
+    for i in range(0, unique_sep_ts.shape[0]):
+        end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')
+        if start_index == end_index:
+            continue
+        ts_seqs.append(ts_seq[start_index:end_index, :].copy())
+        start_index = end_index
+
+    # tail data
+    if start_index < ts_seq.shape[0]:
+        ts_seqs.append(ts_seq[start_index:, :].copy())
+
+    return ts_seqs
+
+
+def correct_trajectory(original_xys, end_xy):
+    """
+
+    :param original_xys: numpy ndarray, shape(N, 2)
+    :param end_xy: numpy ndarray, shape(1, 2)
+    :return:
+    """
+    corrected_xys = np.zeros((0, 2))
+
+    A = original_xys[0, :]
+    B = end_xy
+    Bp = original_xys[-1, :]
+
+    angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])
+    angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])
+    angle_BpAB = angle_BpAX - angle_BAX
+    AB = np.sqrt(np.sum((B - A) ** 2))
+    ABp = np.sqrt(np.sum((Bp - A) ** 2))
+
+    corrected_xys = np.append(corrected_xys, [A], 0)
+    for i in np.arange(1, np.size(original_xys, 0)):
+        angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])
+
+        angle_CAX = angle_CpAX - angle_BpAB
+
+        ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))
+
+        AC = ACp * AB / ABp
+
+        delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])
+
+        C = delta_C + A
+
+        corrected_xys = np.append(corrected_xys, [C], 0)
+
+    return corrected_xys
+
+
+def correct_positions(rel_positions, reference_positions):
+    """
+
+    :param rel_positions:
+    :param reference_positions:
+    :return:
+    """
+    rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])
+    if len(rel_positions_list) != reference_positions.shape[0] - 1:
+        # print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')
+        del rel_positions_list[-1]
+    assert len(rel_positions_list) == reference_positions.shape[0] - 1
+
+    corrected_positions = np.zeros((0, 3))
+    for i, rel_ps in enumerate(rel_positions_list):
+        start_position = reference_positions[i]
+        end_position = reference_positions[i + 1]
+        abs_ps = np.zeros(rel_ps.shape)
+        abs_ps[:, 0] = rel_ps[:, 0]
+        # abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]
+        abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]
+        for j in range(1, rel_ps.shape[0]):
+            abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]
+        abs_ps = np.insert(abs_ps, 0, start_position, axis=0)
+        corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])
+        corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))
+        if i == 0:
+            corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)
+        else:
+            corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)
+
+    corrected_positions = np.array(corrected_positions)
+
+    return corrected_positions
+
+
+def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):
+    order = 4
+    filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)
+    zf = signal.lfilter_zi(filter_b, filter_a)
+    _, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
+    _, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
+
+    return filter_b, filter_a, filter_zf
+
+
+def get_rotation_matrix_from_vector(rotation_vector):
+    q1 = rotation_vector[0]
+    q2 = rotation_vector[1]
+    q3 = rotation_vector[2]
+
+    if rotation_vector.size >= 4:
+        q0 = rotation_vector[3]
+    else:
+        q0 = 1 - q1*q1 - q2*q2 - q3*q3
+        if q0 > 0:
+            q0 = np.sqrt(q0)
+        else:
+            q0 = 0
+
+    sq_q1 = 2 * q1 * q1
+    sq_q2 = 2 * q2 * q2
+    sq_q3 = 2 * q3 * q3
+    q1_q2 = 2 * q1 * q2
+    q3_q0 = 2 * q3 * q0
+    q1_q3 = 2 * q1 * q3
+    q2_q0 = 2 * q2 * q0
+    q2_q3 = 2 * q2 * q3
+    q1_q0 = 2 * q1 * q0
+
+    R = np.zeros((9,))
+    if R.size == 9:
+        R[0] = 1 - sq_q2 - sq_q3
+        R[1] = q1_q2 - q3_q0
+        R[2] = q1_q3 + q2_q0
+
+        R[3] = q1_q2 + q3_q0
+        R[4] = 1 - sq_q1 - sq_q3
+        R[5] = q2_q3 - q1_q0
+
+        R[6] = q1_q3 - q2_q0
+        R[7] = q2_q3 + q1_q0
+        R[8] = 1 - sq_q1 - sq_q2
+
+        R = np.reshape(R, (3, 3))
+    elif R.size == 16:
+        R[0] = 1 - sq_q2 - sq_q3
+        R[1] = q1_q2 - q3_q0
+        R[2] = q1_q3 + q2_q0
+        R[3] = 0.0
+
+        R[4] = q1_q2 + q3_q0
+        R[5] = 1 - sq_q1 - sq_q3
+        R[6] = q2_q3 - q1_q0
+        R[7] = 0.0
+
+        R[8] = q1_q3 - q2_q0
+        R[9] = q2_q3 + q1_q0
+        R[10] = 1 - sq_q1 - sq_q2
+        R[11] = 0.0
+
+        R[12] = R[13] = R[14] = 0.0
+        R[15] = 1.0
+
+        R = np.reshape(R, (4, 4))
+
+    return R
+
+
+def get_orientation(R):
+    flat_R = R.flatten()
+    values = np.zeros((3,))
+    if np.size(flat_R) == 9:
+        values[0] = np.arctan2(flat_R[1], flat_R[4])
+        values[1] = np.arcsin(-flat_R[7])
+        values[2] = np.arctan2(-flat_R[6], flat_R[8])
+    else:
+        values[0] = np.arctan2(flat_R[1], flat_R[5])
+        values[1] = np.arcsin(-flat_R[9])
+        values[2] = np.arctan2(-flat_R[8], flat_R[10])
+
+    return values
+
+
+def compute_steps(acce_datas):
+    step_timestamps = np.array([])
+    step_indexs = np.array([], dtype=int)
+    step_acce_max_mins = np.zeros((0, 4))
+    sample_freq = 50
+    window_size = 22
+    low_acce_mag = 0.6
+    step_criterion = 1
+    interval_threshold = 250
+
+    acce_max = np.zeros((2,))
+    acce_min = np.zeros((2,))
+    acce_binarys = np.zeros((window_size,), dtype=int)
+    acce_mag_pre = 0
+    state_flag = 0
+
+    warmup_data = np.ones((window_size,)) * 9.81
+    filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)
+    acce_mag_window = np.zeros((window_size, 1))
+
+    # detect steps according to acceleration magnitudes
+    for i in np.arange(0, np.size(acce_datas, 0)):
+        acce_data = acce_datas[i, :]
+        acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))
+
+        acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)
+        acce_mag_filt = acce_mag_filt[0]
+
+        acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])
+        acce_mag_window = np.delete(acce_mag_window, 0)
+        mean_gravity = np.mean(acce_mag_window)
+        acce_std = np.std(acce_mag_window)
+        mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])
+
+        # detect valid peak or valley of acceleration magnitudes
+        acce_mag_filt_detrend = acce_mag_filt - mean_gravity
+        if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):
+            # peak
+            acce_binarys = np.append(acce_binarys, [1])
+            acce_binarys = np.delete(acce_binarys, 0)
+        elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):
+            # valley
+            acce_binarys = np.append(acce_binarys, [-1])
+            acce_binarys = np.delete(acce_binarys, 0)
+        else:
+            # between peak and valley
+            acce_binarys = np.append(acce_binarys, [0])
+            acce_binarys = np.delete(acce_binarys, 0)
+
+        if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):
+            if state_flag == 0:
+                acce_max[:] = acce_data[0], acce_mag_filt
+                state_flag = 1
+            elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (
+                    acce_mag_filt > acce_max[1]):
+                acce_max[:] = acce_data[0], acce_mag_filt
+            elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):
+                acce_max[:] = acce_data[0], acce_mag_filt
+                state_flag = 1
+
+        # choose reasonable step criterion and check if there is a valid step
+        # save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]
+        step_flag = False
+        if step_criterion == 2:
+            if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):
+                step_flag = True
+        elif step_criterion == 3:
+            if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):
+                step_flag = True
+        else:
+            if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:
+                if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):
+                    acce_min[:] = acce_data[0], acce_mag_filt
+                    state_flag = 2
+                    step_flag = True
+                elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (
+                        acce_mag_filt < acce_min[1]):
+                    acce_min[:] = acce_data[0], acce_mag_filt
+        if step_flag:
+            step_timestamps = np.append(step_timestamps, acce_data[0])
+            step_indexs = np.append(step_indexs, [i])
+            step_acce_max_mins = np.append(step_acce_max_mins,
+                                           [[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)
+        acce_mag_pre = acce_mag_filt_detrend
+
+    return step_timestamps, step_indexs, step_acce_max_mins
+
+
+def compute_stride_length(step_acce_max_mins):
+    K = 0.4
+    K_max = 0.8
+    K_min = 0.4
+    para_a0 = 0.21468084
+    para_a1 = 0.09154517
+    para_a2 = 0.02301998
+
+    stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))
+    k_real = np.zeros((step_acce_max_mins.shape[0], 2))
+    step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))
+    stride_lengths[:, 0] = step_acce_max_mins[:, 0]
+    window_size = 2
+    step_timeperiod_temp = np.zeros((0, ))
+
+    # calculate every step period - step_timeperiod unit: second
+    for i in range(0, step_timeperiod.shape[0]):
+        step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000
+        step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])
+        if step_timeperiod_temp.shape[0] > window_size:
+            step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])
+        step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]
+
+    # calculate parameters by step period and acceleration magnitude variance
+    k_real[:, 0] = step_acce_max_mins[:, 0]
+    k_real[0, 1] = K
+    for i in range(0, step_timeperiod.shape[0]):
+        k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])
+        k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)
+
+    # calculate every stride length by parameters and max and min data of acceleration magnitude
+    stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),
+                                   np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]
+
+    return stride_lengths
+
+
+def compute_headings(ahrs_datas):
+    headings = np.zeros((np.size(ahrs_datas, 0), 2))
+    for i in np.arange(0, np.size(ahrs_datas, 0)):
+        ahrs_data = ahrs_datas[i, :]
+        rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])
+        azimuth, pitch, roll = get_orientation(rot_mat)
+        around_z = (-azimuth) % (2 * np.pi)
+        headings[i, :] = ahrs_data[0], around_z
+    return headings
+
+
+def compute_step_heading(step_timestamps, headings):
+    step_headings = np.zeros((len(step_timestamps), 2))
+    step_timestamps_index = 0
+    for i in range(0, len(headings)):
+        if step_timestamps_index < len(step_timestamps):
+            if headings[i, 0] == step_timestamps[step_timestamps_index]:
+                step_headings[step_timestamps_index, :] = headings[i, :]
+                step_timestamps_index += 1
+        else:
+            break
+    assert step_timestamps_index == len(step_timestamps)
+
+    return step_headings
+
+
+def compute_rel_positions(stride_lengths, step_headings):
+    rel_positions = np.zeros((stride_lengths.shape[0], 3))
+    for i in range(0, stride_lengths.shape[0]):
+        rel_positions[i, 0] = stride_lengths[i, 0]
+        rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])
+        rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])
+
+    return rel_positions
+
+
+def compute_step_positions(acce_datas, ahrs_datas, posi_datas):
+    step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
+    headings = compute_headings(ahrs_datas)
+    stride_lengths = compute_stride_length(step_acce_max_mins)
+    step_headings = compute_step_heading(step_timestamps, headings)
+    rel_positions = compute_rel_positions(stride_lengths, step_headings)
+    step_positions = correct_positions(rel_positions, posi_datas)
+
+    return step_positions
--- a/Navigation/code/indoor_location_competition_20/io_f.py
+++ b/Navigation/code/indoor_location_competition_20/io_f.py
@ -0,0 +1,104 @@
+from dataclasses import dataclass
+
+import numpy as np
+
+
+@dataclass
+class ReadData:
+    acce: np.ndarray
+    acce_uncali: np.ndarray
+    gyro: np.ndarray
+    gyro_uncali: np.ndarray
+    magn: np.ndarray
+    magn_uncali: np.ndarray
+    ahrs: np.ndarray
+    wifi: np.ndarray
+    ibeacon: np.ndarray
+    waypoint: np.ndarray
+
+
+def read_data_file(data_filename):
+    acce = []
+    acce_uncali = []
+    gyro = []
+    gyro_uncali = []
+    magn = []
+    magn_uncali = []
+    ahrs = []
+    wifi = []
+    ibeacon = []
+    waypoint = []
+
+    with open(data_filename, 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+
+    for line_data in lines:
+        line_data = line_data.strip()
+        if not line_data or line_data[0] == '#':
+            continue
+
+        line_data = line_data.split('\t')
+
+        if line_data[1] == 'TYPE_ACCELEROMETER':
+            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
+            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_GYROSCOPE':
+            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
+            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
+            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
+            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_ROTATION_VECTOR':
+            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
+            continue
+
+        if line_data[1] == 'TYPE_WIFI':
+            sys_ts = line_data[0]
+            ssid = line_data[2]
+            bssid = line_data[3]
+            rssi = line_data[4]
+            lastseen_ts = line_data[6]
+            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
+            wifi.append(wifi_data)
+            continue
+
+        if line_data[1] == 'TYPE_BEACON':
+            ts = line_data[0]
+            uuid = line_data[2]
+            major = line_data[3]
+            minor = line_data[4]
+            rssi = line_data[6]
+            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
+            ibeacon.append(ibeacon_data)
+            continue
+
+        if line_data[1] == 'TYPE_WAYPOINT':
+            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])
+
+    acce = np.array(acce)
+    acce_uncali = np.array(acce_uncali)
+    gyro = np.array(gyro)
+    gyro_uncali = np.array(gyro_uncali)
+    magn = np.array(magn)
+    magn_uncali = np.array(magn_uncali)
+    ahrs = np.array(ahrs)
+    wifi = np.array(wifi)
+    ibeacon = np.array(ibeacon)
+    waypoint = np.array(waypoint)
+
+    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)
--- a/Navigation/code/indoor_location_competition_20/main.py
+++ b/Navigation/code/indoor_location_competition_20/main.py
@ -0,0 +1,258 @@
+import json
+import os
+from pathlib import Path
+
+
+import numpy as np
+
+from compute_f import split_ts_seq, compute_step_positions
+from io_f import read_data_file
+from visualize_f import visualize_trajectory, visualize_heatmap, save_figure_to_html
+
+floor_data_dir = './data/site1/F1'
+path_data_dir = floor_data_dir + '/path_data_files'
+floor_plan_filename = floor_data_dir + '/floor_image.png'
+floor_info_filename = floor_data_dir + '/floor_info.json'
+
+save_dir = './output/site1/F1'
+path_image_save_dir = save_dir + '/path_images'
+step_position_image_save_dir = save_dir
+magn_image_save_dir = save_dir
+wifi_image_save_dir = save_dir + '/wifi_images'
+ibeacon_image_save_dir = save_dir + '/ibeacon_images'
+wifi_count_image_save_dir = save_dir
+
+
+def calibrate_magnetic_wifi_ibeacon_to_position(path_file_list):
+    mwi_datas = {}
+    for path_filename in path_file_list:
+        print(f'Processing {path_filename}...')
+
+        path_datas = read_data_file(path_filename)
+        acce_datas = path_datas.acce
+        magn_datas = path_datas.magn
+        ahrs_datas = path_datas.ahrs
+        wifi_datas = path_datas.wifi
+        ibeacon_datas = path_datas.ibeacon
+        posi_datas = path_datas.waypoint
+
+        step_positions = compute_step_positions(acce_datas, ahrs_datas, posi_datas)
+        # visualize_trajectory(posi_datas[:, 1:3], floor_plan_filename, width_meter, height_meter, title='Ground Truth', show=True)
+        # visualize_trajectory(step_positions[:, 1:3], floor_plan_filename, width_meter, height_meter, title='Step Position', show=True)
+
+        if wifi_datas.size != 0:
+            sep_tss = np.unique(wifi_datas[:, 0].astype(float))
+            wifi_datas_list = split_ts_seq(wifi_datas, sep_tss)
+            for wifi_ds in wifi_datas_list:
+                diff = np.abs(step_positions[:, 0] - float(wifi_ds[0, 0]))
+                index = np.argmin(diff)
+                target_xy_key = tuple(step_positions[index, 1:3])
+                if target_xy_key in mwi_datas:
+                    mwi_datas[target_xy_key]['wifi'] = np.append(mwi_datas[target_xy_key]['wifi'], wifi_ds, axis=0)
+                else:
+                    mwi_datas[target_xy_key] = {
+                        'magnetic': np.zeros((0, 4)),
+                        'wifi': wifi_ds,
+                        'ibeacon': np.zeros((0, 3))
+                    }
+
+        if ibeacon_datas.size != 0:
+            sep_tss = np.unique(ibeacon_datas[:, 0].astype(float))
+            ibeacon_datas_list = split_ts_seq(ibeacon_datas, sep_tss)
+            for ibeacon_ds in ibeacon_datas_list:
+                diff = np.abs(step_positions[:, 0] - float(ibeacon_ds[0, 0]))
+                index = np.argmin(diff)
+                target_xy_key = tuple(step_positions[index, 1:3])
+                if target_xy_key in mwi_datas:
+                    mwi_datas[target_xy_key]['ibeacon'] = np.append(mwi_datas[target_xy_key]['ibeacon'], ibeacon_ds, axis=0)
+                else:
+                    mwi_datas[target_xy_key] = {
+                        'magnetic': np.zeros((0, 4)),
+                        'wifi': np.zeros((0, 5)),
+                        'ibeacon': ibeacon_ds
+                    }
+
+        sep_tss = np.unique(magn_datas[:, 0].astype(float))
+        magn_datas_list = split_ts_seq(magn_datas, sep_tss)
+        for magn_ds in magn_datas_list:
+            diff = np.abs(step_positions[:, 0] - float(magn_ds[0, 0]))
+            index = np.argmin(diff)
+            target_xy_key = tuple(step_positions[index, 1:3])
+            if target_xy_key in mwi_datas:
+                mwi_datas[target_xy_key]['magnetic'] = np.append(mwi_datas[target_xy_key]['magnetic'], magn_ds, axis=0)
+            else:
+                mwi_datas[target_xy_key] = {
+                    'magnetic': magn_ds,
+                    'wifi': np.zeros((0, 5)),
+                    'ibeacon': np.zeros((0, 3))
+                }
+
+    return mwi_datas
+
+
+def extract_magnetic_strength(mwi_datas):
+    magnetic_strength = {}
+    for position_key in mwi_datas:
+        # print(f'Position: {position_key}')
+
+        magnetic_data = mwi_datas[position_key]['magnetic']
+        magnetic_s = np.mean(np.sqrt(np.sum(magnetic_data[:, 1:4] ** 2, axis=1)))
+        magnetic_strength[position_key] = magnetic_s
+
+    return magnetic_strength
+
+
+def extract_wifi_rssi(mwi_datas):
+    wifi_rssi = {}
+    for position_key in mwi_datas:
+        # print(f'Position: {position_key}')
+
+        wifi_data = mwi_datas[position_key]['wifi']
+        for wifi_d in wifi_data:
+            bssid = wifi_d[2]
+            rssi = int(wifi_d[3])
+
+            if bssid in wifi_rssi:
+                position_rssi = wifi_rssi[bssid]
+                if position_key in position_rssi:
+                    old_rssi = position_rssi[position_key][0]
+                    old_count = position_rssi[position_key][1]
+                    position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
+                    position_rssi[position_key][1] = old_count + 1
+                else:
+                    position_rssi[position_key] = np.array([rssi, 1])
+            else:
+                position_rssi = {}
+                position_rssi[position_key] = np.array([rssi, 1])
+
+            wifi_rssi[bssid] = position_rssi
+
+    return wifi_rssi
+
+
+def extract_ibeacon_rssi(mwi_datas):
+    ibeacon_rssi = {}
+    for position_key in mwi_datas:
+        # print(f'Position: {position_key}')
+
+        ibeacon_data = mwi_datas[position_key]['ibeacon']
+        for ibeacon_d in ibeacon_data:
+            ummid = ibeacon_d[1]
+            rssi = int(ibeacon_d[2])
+
+            if ummid in ibeacon_rssi:
+                position_rssi = ibeacon_rssi[ummid]
+                if position_key in position_rssi:
+                    old_rssi = position_rssi[position_key][0]
+                    old_count = position_rssi[position_key][1]
+                    position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
+                    position_rssi[position_key][1] = old_count + 1
+                else:
+                    position_rssi[position_key] = np.array([rssi, 1])
+            else:
+                position_rssi = {}
+                position_rssi[position_key] = np.array([rssi, 1])
+
+            ibeacon_rssi[ummid] = position_rssi
+
+    return ibeacon_rssi
+
+
+def extract_wifi_count(mwi_datas):
+    wifi_counts = {}
+    for position_key in mwi_datas:
+        # print(f'Position: {position_key}')
+
+        wifi_data = mwi_datas[position_key]['wifi']
+        count = np.unique(wifi_data[:, 2]).shape[0]
+        wifi_counts[position_key] = count
+
+    return wifi_counts
+
+
+if __name__ == "__main__":
+    Path(path_image_save_dir).mkdir(parents=True, exist_ok=True)
+    Path(magn_image_save_dir).mkdir(parents=True, exist_ok=True)
+    Path(wifi_image_save_dir).mkdir(parents=True, exist_ok=True)
+    Path(ibeacon_image_save_dir).mkdir(parents=True, exist_ok=True)
+
+    with open(floor_info_filename) as f:
+        floor_info = json.load(f)
+    width_meter = floor_info["map_info"]["width"]
+    height_meter = floor_info["map_info"]["height"]
+
+    path_filenames = list(Path(path_data_dir).resolve().glob("*.txt"))
+
+    # 1. visualize ground truth positions
+    print('Visualizing ground truth positions...')
+    for path_filename in path_filenames:
+        print(f'Processing file: {path_filename}...')
+
+        path_data = read_data_file(path_filename)
+        path_id = path_filename.name.split(".")[0]
+        fig = visualize_trajectory(path_data.waypoint[:, 1:3], floor_plan_filename, width_meter, height_meter, title=path_id, show=False)
+        html_filename = f'{path_image_save_dir}/{path_id}.html'
+        html_filename = str(Path(html_filename).resolve())
+        save_figure_to_html(fig, html_filename)
+
+    # 2. visualize step position, magnetic, wifi, ibeacon
+    print('Visualizing more information...')
+    mwi_datas = calibrate_magnetic_wifi_ibeacon_to_position(path_filenames)
+
+    step_positions = np.array(list(mwi_datas.keys()))
+    fig = visualize_trajectory(step_positions, floor_plan_filename, width_meter, height_meter, mode='markers', title='Step Position', show=True)
+    html_filename = f'{step_position_image_save_dir}/step_position.html'
+    html_filename = str(Path(html_filename).resolve())
+    save_figure_to_html(fig, html_filename)
+
+    magnetic_strength = extract_magnetic_strength(mwi_datas)
+    heat_positions = np.array(list(magnetic_strength.keys()))
+    heat_values = np.array(list(magnetic_strength.values()))
+    fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='mu tesla', title='Magnetic Strength', show=True)
+    html_filename = f'{magn_image_save_dir}/magnetic_strength.html'
+    html_filename = str(Path(html_filename).resolve())
+    save_figure_to_html(fig, html_filename)
+
+    wifi_rssi = extract_wifi_rssi(mwi_datas)
+    print(f'This floor has {len(wifi_rssi.keys())} wifi aps')
+    ten_wifi_bssids = list(wifi_rssi.keys())[0:10]
+    print('Example 10 wifi ap bssids:\n')
+    for bssid in ten_wifi_bssids:
+        print(bssid)
+    target_wifi = input(f"Please input target wifi ap bssid:\n")
+    # target_wifi = '1e:74:9c:a7:b2:e4'
+    heat_positions = np.array(list(wifi_rssi[target_wifi].keys()))
+    heat_values = np.array(list(wifi_rssi[target_wifi].values()))[:, 0]
+    fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='dBm', title=f'Wifi: {target_wifi} RSSI', show=True)
+    html_filename = f'{wifi_image_save_dir}/{target_wifi.replace(":", "-")}.html'
+    html_filename = str(Path(html_filename).resolve())
+    save_figure_to_html(fig, html_filename)
+
+    ibeacon_rssi = extract_ibeacon_rssi(mwi_datas)
+    print(f'This floor has {len(ibeacon_rssi.keys())} ibeacons')
+    ten_ibeacon_ummids = list(ibeacon_rssi.keys())[0:10]
+    print('Example 10 ibeacon UUID_MajorID_MinorIDs:\n')
+    for ummid in ten_ibeacon_ummids:
+        print(ummid)
+    target_ibeacon = input(f"Please input target ibeacon UUID_MajorID_MinorID:\n")
+    # target_ibeacon = 'FDA50693-A4E2-4FB1-AFCF-C6EB07647825_10073_61418'
+    heat_positions = np.array(list(ibeacon_rssi[target_ibeacon].keys()))
+    heat_values = np.array(list(ibeacon_rssi[target_ibeacon].values()))[:, 0]
+    fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='dBm', title=f'iBeacon: {target_ibeacon} RSSI', show=True)
+    html_filename = f'{ibeacon_image_save_dir}/{target_ibeacon}.html'
+    html_filename = str(Path(html_filename).resolve())
+    save_figure_to_html(fig, html_filename)
+
+    wifi_counts = extract_wifi_count(mwi_datas)
+    heat_positions = np.array(list(wifi_counts.keys()))
+    heat_values = np.array(list(wifi_counts.values()))
+    # filter out positions that no wifi detected
+    mask = heat_values != 0
+    heat_positions = heat_positions[mask]
+    heat_values = heat_values[mask]
+    fig = visualize_heatmap(heat_positions, heat_values, floor_plan_filename, width_meter, height_meter, colorbar_title='number', title=f'Wifi Count', show=True)
+    html_filename = f'{wifi_count_image_save_dir}/wifi_count.html'
+    html_filename = str(Path(html_filename).resolve())
+    save_figure_to_html(fig, html_filename)
+
+    print('fff')
--- a/Navigation/code/indoor_location_competition_20/visualize_f.py
+++ b/Navigation/code/indoor_location_competition_20/visualize_f.py
@ -0,0 +1,132 @@
+import plotly.graph_objs as go
+from PIL import Image
+
+
+def save_figure_to_html(fig, filename):
+    fig.write_html(filename)
+
+
+def visualize_trajectory(trajectory, floor_plan_filename, width_meter, height_meter, title=None, mode='lines + markers + text', show=False):
+    fig = go.Figure()
+
+    # add trajectory
+    size_list = [6] * trajectory.shape[0]
+    size_list[0] = 10
+    size_list[-1] = 10
+
+    color_list = ['rgba(4, 174, 4, 0.5)'] * trajectory.shape[0]
+    color_list[0] = 'rgba(12, 5, 235, 1)'
+    color_list[-1] = 'rgba(235, 5, 5, 1)'
+
+    position_count = {}
+    text_list = []
+    for i in range(trajectory.shape[0]):
+        if str(trajectory[i]) in position_count:
+            position_count[str(trajectory[i])] += 1
+        else:
+            position_count[str(trajectory[i])] = 0
+        text_list.append('        ' * position_count[str(trajectory[i])] + f'{i}')
+    text_list[0] = 'Start Point: 0'
+    text_list[-1] = f'End Point: {trajectory.shape[0] - 1}'
+
+    fig.add_trace(
+        go.Scattergl(
+            x=trajectory[:, 0],
+            y=trajectory[:, 1],
+            mode=mode,
+            marker=dict(size=size_list, color=color_list),
+            line=dict(shape='linear', color='rgb(100, 10, 100)', width=2, dash='dot'),
+            text=text_list,
+            textposition="top center",
+            name='trajectory',
+        ))
+
+    # add floor plan
+    floor_plan = Image.open(floor_plan_filename)
+    fig.update_layout(images=[
+        go.layout.Image(
+            source=floor_plan,
+            xref="x",
+            yref="y",
+            x=0,
+            y=height_meter,
+            sizex=width_meter,
+            sizey=height_meter,
+            sizing="contain",
+            opacity=1,
+            layer="below",
+        )
+    ])
+
+    # configure
+    fig.update_xaxes(autorange=False, range=[0, width_meter])
+    fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
+    fig.update_layout(
+        title=go.layout.Title(
+            text=title or "No title.",
+            xref="paper",
+            x=0,
+        ),
+        autosize=True,
+        width=900,
+        height=200 + 900 * height_meter / width_meter,
+        template="plotly_white",
+    )
+
+    if show:
+        fig.show()
+
+    return fig
+
+
+def visualize_heatmap(position, value, floor_plan_filename, width_meter, height_meter, colorbar_title="colorbar", title=None, show=False):
+    fig = go.Figure()
+
+    # add heat map
+    fig.add_trace(
+        go.Scatter(x=position[:, 0],
+                   y=position[:, 1],
+                   mode='markers',
+                   marker=dict(size=7,
+                               color=value,
+                               colorbar=dict(title=colorbar_title),
+                               colorscale="Rainbow"),
+                   text=value,
+                   name=title))
+
+    # add floor plan
+    floor_plan = Image.open(floor_plan_filename)
+    fig.update_layout(images=[
+        go.layout.Image(
+            source=floor_plan,
+            xref="x",
+            yref="y",
+            x=0,
+            y=height_meter,
+            sizex=width_meter,
+            sizey=height_meter,
+            sizing="contain",
+            opacity=1,
+            layer="below",
+        )
+    ])
+
+    # configure
+    fig.update_xaxes(autorange=False, range=[0, width_meter])
+    fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
+    fig.update_layout(
+        title=go.layout.Title(
+            text=title or "No title.",
+            xref="paper",
+            x=0,
+        ),
+        autosize=True,
+        width=900,
+        height=200 + 900 * height_meter / width_meter,
+        template="plotly_white",
+    )
+
+    if show:
+        fig.show()
+
+    return fig
--- a/Navigation/code/lstm-wifi-encode-wifi-sensor.ipynb
+++ b/Navigation/code/lstm-wifi-encode-wifi-sensor.ipynb
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
@ -0,0 +1,375 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from tqdm import tqdm\n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from dask.distributed import wait\n",
+    "import glob\n",
+    "\n",
+    "SENSORS = ['acce','acce_uncali','gyro',\n",
+    "           'gyro_uncali','magn','magn_uncali','ahrs']\n",
+    "\n",
+    "NFEAS = {\n",
+    "    'acce': 3,\n",
+    "    'acce_uncali': 3,\n",
+    "    'gyro': 3,\n",
+    "    'gyro_uncali': 3,\n",
+    "    'magn': 3,\n",
+    "    'magn_uncali': 3,\n",
+    "    'ahrs': 3,\n",
+    "    'wifi': 1,\n",
+    "    'ibeacon': 1,\n",
+    "    'waypoint': 3\n",
+    "}\n",
+    "\n",
+    "ACOLS = ['timestamp','x','y','z']\n",
+    "        \n",
+    "FIELDS = {\n",
+    "    'acce': ACOLS,\n",
+    "    'acce_uncali': ACOLS,\n",
+    "    'gyro': ACOLS,\n",
+    "    'gyro_uncali': ACOLS,\n",
+    "    'magn': ACOLS,\n",
+    "    'magn_uncali': ACOLS,\n",
+    "    'ahrs': ACOLS,\n",
+    "    'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n",
+    "    'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n",
+    "    'waypoint': ['timestamp','x','y']\n",
+    "}\n",
+    "\n",
+    "def to_frame(data, col):\n",
+    "    cols = FIELDS[col]\n",
+    "    is_dummy = False\n",
+    "    if data.shape[0]>0:\n",
+    "        df = pd.DataFrame(data, columns=cols)\n",
+    "    else:\n",
+    "        df = create_dummy_df(cols)\n",
+    "        is_dummy = True\n",
+    "    for col in df.columns:\n",
+    "        if 'timestamp' in col:\n",
+    "            df[col] = df[col].astype('int64')\n",
+    "    return df, is_dummy\n",
+    "\n",
+    "def create_dummy_df(cols):\n",
+    "    df = pd.DataFrame()\n",
+    "    for col in cols:\n",
+    "        df[col] = [0]\n",
+    "        if col in ['ssid','bssid']:\n",
+    "            df[col] = df[col].map(str)\n",
+    "    return df\n",
+    "\n",
+    "from dataclasses import dataclass\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class ReadData:\n",
+    "    acce: np.ndarray\n",
+    "    acce_uncali: np.ndarray\n",
+    "    gyro: np.ndarray\n",
+    "    gyro_uncali: np.ndarray\n",
+    "    magn: np.ndarray\n",
+    "    magn_uncali: np.ndarray\n",
+    "    ahrs: np.ndarray\n",
+    "    wifi: np.ndarray\n",
+    "    ibeacon: np.ndarray\n",
+    "    waypoint: np.ndarray\n",
+    "\n",
+    "\n",
+    "def read_data_file(data_filename):\n",
+    "    acce = []\n",
+    "    acce_uncali = []\n",
+    "    gyro = []\n",
+    "    gyro_uncali = []\n",
+    "    magn = []\n",
+    "    magn_uncali = []\n",
+    "    ahrs = []\n",
+    "    wifi = []\n",
+    "    ibeacon = []\n",
+    "    waypoint = []\n",
+    "\n",
+    "    with open(data_filename, 'r', encoding='utf-8') as file:\n",
+    "        lines = file.readlines()\n",
+    "\n",
+    "    for line_data in lines:\n",
+    "        line_data = line_data.strip()\n",
+    "        if not line_data or line_data[0] == '#':\n",
+    "            continue\n",
+    "\n",
+    "        line_data = line_data.split('\\t')\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_ACCELEROMETER':\n",
+    "            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
+    "            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_GYROSCOPE':\n",
+    "            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
+    "            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
+    "            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
+    "            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
+    "            if len(line_data)>=5:\n",
+    "                ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_WIFI':\n",
+    "            sys_ts = line_data[0]\n",
+    "            ssid = line_data[2]\n",
+    "            bssid = line_data[3]\n",
+    "            rssi = line_data[4]\n",
+    "            lastseen_ts = line_data[6]\n",
+    "            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
+    "            wifi.append(wifi_data)\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_BEACON':\n",
+    "            ts = line_data[0]\n",
+    "            uuid = line_data[2]\n",
+    "            major = line_data[3]\n",
+    "            minor = line_data[4]\n",
+    "            rssi = line_data[6]\n",
+    "            lastts = line_data[-1]\n",
+    "            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n",
+    "            ibeacon.append(ibeacon_data)\n",
+    "            continue\n",
+    "\n",
+    "        if line_data[1] == 'TYPE_WAYPOINT':\n",
+    "            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
+    "\n",
+    "    acce = np.array(acce)\n",
+    "    acce_uncali = np.array(acce_uncali)\n",
+    "    gyro = np.array(gyro)\n",
+    "    gyro_uncali = np.array(gyro_uncali)\n",
+    "    magn = np.array(magn)\n",
+    "    magn_uncali = np.array(magn_uncali)\n",
+    "    ahrs = np.array(ahrs)\n",
+    "    wifi = np.array(wifi)\n",
+    "    ibeacon = np.array(ibeacon)\n",
+    "    waypoint = np.array(waypoint)\n",
+    "\n",
+    "    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_test_dfs(PATH, test_files):\n",
+    "    dtest = get_test_df(PATH)\n",
+    "    buildings = set(dtest['building'].values.tolist())\n",
+    "    dws = {}\n",
+    "    ntest_files = []\n",
+    "    for fname in tqdm(test_files):\n",
+    "        path = fname.split('/')[-1].split('.')[0]\n",
+    "        mask = dtest['path'] == path\n",
+    "        dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n",
+    "        ntest_files.append(fname)\n",
+    "    return dws\n",
+    "\n",
+    "def get_test_df(PATH):\n",
+    "    dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n",
+    "    dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n",
+    "    dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n",
+    "    dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n",
+    "    dtest['timestamp'] = dtest['timestamp'].astype('int64')\n",
+    "    dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n",
+    "    return dtest\n",
+    "\n",
+    "def get_time_gap(name):\n",
+    "    data = read_data_file(name)\n",
+    "    db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n",
+    "#     print(db,no_ibeacon)\n",
+    "    \n",
+    "    if no_ibeacon==0:\n",
+    "        gap = db['last_timestamp'] - db['timestamp']\n",
+    "        assert gap.unique().shape[0]==1\n",
+    "        return gap.values[0],no_ibeacon\n",
+    "    \n",
+    "    if no_ibeacon==1:\n",
+    "        # Group wifis by timestamp\n",
+    "        wifi_groups = pd.DataFrame(data.wifi).groupby(0)   \n",
+    "        # Find which one is the most recent of all time points.\n",
+    "        est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n",
+    "        return est_ts,no_ibeacon\n",
+    "\n",
+    "    \n",
+    "\n",
+    "def fix_timestamp_test(df, gap):\n",
+    "    df['real_timestamp'] = df['timestamp'] + gap\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n",
+       " '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n",
+       " '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n",
+       " '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n",
+    "test_files_ori[:4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ec2-user/anaconda3/lib/python3.7/site-packages/distributed/dashboard/core.py:79: UserWarning: \n",
+      "Port 8787 is already in use. \n",
+      "Perhaps you already have a cluster running?\n",
+      "Hosting the diagnostics dashboard on a random port instead.\n",
+      "  warnings.warn(\"\\n\" + msg)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table style=\"border: 2px solid white;\">\n",
+       "<tr>\n",
+       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
+       "<h3 style=\"text-align: left;\">Client</h3>\n",
+       "<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
+       "  <li><b>Scheduler: </b>tcp://127.0.0.1:42097</li>\n",
+       "  <li><b>Dashboard: </b><a href='http://127.0.0.1:39155/status' target='_blank'>http://127.0.0.1:39155/status</a>\n",
+       "</ul>\n",
+       "</td>\n",
+       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
+       "<h3 style=\"text-align: left;\">Cluster</h3>\n",
+       "<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
+       "  <li><b>Workers: </b>8</li>\n",
+       "  <li><b>Cores: </b>8</li>\n",
+       "  <li><b>Memory: </b>32.89 GB</li>\n",
+       "</ul>\n",
+       "</td>\n",
+       "</tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "<Client: 'tcp://127.0.0.1:42097' processes=8 threads=8, memory=32.89 GB>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import dask\n",
+    "from dask.distributed import Client, wait, LocalCluster\n",
+    "\n",
+    "# set n_workers to number of cores\n",
+    "client = Client(n_workers=8, \n",
+    "                threads_per_worker=1)\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 626/626 [00:00<00:00, 4552.03it/s]\n",
+      "100%|██████████| 626/626 [00:16<00:00, 37.39it/s] \n"
+     ]
+    }
+   ],
+   "source": [
+    "futures = []\n",
+    "for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n",
+    "    f = client.submit(get_time_gap,fname)\n",
+    "    futures.append(f)\n",
+    "    \n",
+    "testpath2gap = {}\n",
+    "for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n",
+    "    testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "with open('testpath2gap.pkl','wb') as f:\n",
+    "    pickle.dump(testpath2gap,f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
@ -0,0 +1,361 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.007463,
+     "end_time": "2021-02-03T20:30:06.571139",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.563676",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "### Wifi features\n",
+    "\n",
+    "This this is the code to generate the wifi features available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). Using these features can get a score below 14. For an example notebook using them see [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter). They only uses waypoints, wifi and timestamp data to generate solution. See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for an outline of this solution method, and methods of improvement.\n",
+    "\n",
+    "There are `break`'s inserted into loops which need to be removed to get this to run. Right now data is written to current working directory. This takes 2-4 hours to run depending on hard drive etc. There is a lot of room for improvement speeding up feature generation. \n",
+    "\n",
+    "**Update:** I added one line that creates a column for the path filename, this allows for a groupkfold crossvalidation. \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:06.590945Z",
+     "iopub.status.busy": "2021-02-03T20:30:06.589984Z",
+     "iopub.status.idle": "2021-02-03T20:30:06.593594Z",
+     "shell.execute_reply": "2021-02-03T20:30:06.592887Z"
+    },
+    "papermill": {
+     "duration": 0.01623,
+     "end_time": "2021-02-03T20:30:06.593847",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.577617",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import glob\n",
+    "import os\n",
+    "import gc\n",
+    "import json "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:06.614521Z",
+     "iopub.status.busy": "2021-02-03T20:30:06.613572Z",
+     "iopub.status.idle": "2021-02-03T20:30:06.616669Z",
+     "shell.execute_reply": "2021-02-03T20:30:06.616121Z"
+    },
+    "papermill": {
+     "duration": 0.015585,
+     "end_time": "2021-02-03T20:30:06.616837",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.601252",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "base_path = '../input/indoor-location-navigation/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:06.639011Z",
+     "iopub.status.busy": "2021-02-03T20:30:06.638118Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.333807Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.334360Z"
+    },
+    "papermill": {
+     "duration": 2.711076,
+     "end_time": "2021-02-03T20:30:09.334617",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:06.623541",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# pull out all the buildings actually used in the test set, given current method we don't need the other ones\n",
+    "ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')\n",
+    "\n",
+    "# only 24 of the total buildings are used in the test set, \n",
+    "# this allows us to greatly reduce the intial size of the dataset\n",
+    "\n",
+    "ssubm_df = ssubm[\"site_path_timestamp\"].apply(lambda x: pd.Series(x.split(\"_\")))\n",
+    "used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())\n",
+    "\n",
+    "# dictionary used to map the floor codes to the values used in the submission file. \n",
+    "floor_map = {\"B2\":-2, \"B1\":-1, \"F1\":0, \"F2\": 1, \"F3\":2, \"F4\":3, \"F5\":4, \"F6\":5, \"F7\":6,\"F8\":7, \"F9\":8,\n",
+    "             \"1F\":0, \"2F\":1, \"3F\":2, \"4F\":3, \"5F\":4, \"6F\":5, \"7F\":6, \"8F\": 7, \"9F\":8}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.359905Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.359123Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.362909Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.362224Z"
+    },
+    "papermill": {
+     "duration": 0.021272,
+     "end_time": "2021-02-03T20:30:09.363069",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.341797",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# get only the wifi bssid that occur over 1000 times(this number can be experimented with)\n",
+    "# these will be the only ones used when constructing features\n",
+    "bssid = dict()\n",
+    "\n",
+    "for building in used_buildings:\n",
+    "    break\n",
+    "    folders = sorted(glob.glob(os.path.join(base_path,'train/'+building+'/*')))\n",
+    "    print(building)\n",
+    "    wifi = list()\n",
+    "    for folder in folders:\n",
+    "        floor = floor_map[folder.split('/')[-1]]\n",
+    "        files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
+    "        for file in files:\n",
+    "            with open(file) as f:\n",
+    "                txt = f.readlines()\n",
+    "                for e, line in enumerate(txt):\n",
+    "                    tmp = line.strip().split()\n",
+    "                    if tmp[1] == \"TYPE_WIFI\":\n",
+    "                        wifi.append(tmp)\n",
+    "    df = pd.DataFrame(wifi)\n",
+    "    #top_bssid = df[3].value_counts().iloc[:500].index.tolist()\n",
+    "    value_counts = df[3].value_counts()\n",
+    "    top_bssid = value_counts[value_counts > 0].index.tolist()\n",
+    "    print(len(top_bssid))\n",
+    "    bssid[building] = top_bssid\n",
+    "    del df\n",
+    "    del wifi\n",
+    "    gc.collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.383252Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.382581Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.386704Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.385809Z"
+    },
+    "papermill": {
+     "duration": 0.016635,
+     "end_time": "2021-02-03T20:30:09.386885",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.370250",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open(\"bssid_1000.json\", \"w\") as f:\n",
+    "    json.dump(bssid, f)\n",
+    "\n",
+    "with open(\"bssid_1000.json\") as f:\n",
+    "    bssid = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.418284Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.417119Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.420513Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.419767Z"
+    },
+    "papermill": {
+     "duration": 0.026514,
+     "end_time": "2021-02-03T20:30:09.420694",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.394180",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# generate all the training data \n",
+    "building_dfs = dict()\n",
+    "\n",
+    "for building in used_buildings:\n",
+    "    break\n",
+    "    folders = sorted(glob.glob(os.path.join(base_path,'train', building +'/*')))\n",
+    "    dfs = list()\n",
+    "    index = sorted(bssid[building])\n",
+    "    print(building)\n",
+    "    for folder in folders:\n",
+    "        floor = floor_map[folder.split('/')[-1]]\n",
+    "        files = glob.glob(os.path.join(folder, \"*.txt\"))\n",
+    "        print(floor)\n",
+    "        for file in files:\n",
+    "            wifi = list()\n",
+    "            waypoint = list()\n",
+    "            with open(file) as f:\n",
+    "                txt = f.readlines()\n",
+    "            for line in txt:\n",
+    "                line = line.strip().split()\n",
+    "                if line[1] == \"TYPE_WAYPOINT\":\n",
+    "                    waypoint.append(line)\n",
+    "                if line[1] == \"TYPE_WIFI\":\n",
+    "                    wifi.append(line)\n",
+    "\n",
+    "            df = pd.DataFrame(np.array(wifi))    \n",
+    "\n",
+    "            # generate a feature, and label for each wifi block\n",
+    "            for gid, g in df.groupby(0):\n",
+    "                dists = list()\n",
+    "                for e, k in enumerate(waypoint):\n",
+    "                    dist = abs(int(gid) - int(k[0]))\n",
+    "                    dists.append(dist)\n",
+    "                nearest_wp_index = np.argmin(dists)\n",
+    "                \n",
+    "                g = g.drop_duplicates(subset=3)\n",
+    "                tmp = g.iloc[:,3:5]\n",
+    "                feat = tmp.set_index(3).reindex(index).replace(np.nan, -999).T\n",
+    "                feat[\"x\"] = float(waypoint[nearest_wp_index][2])\n",
+    "                feat[\"y\"] = float(waypoint[nearest_wp_index][3])\n",
+    "                feat[\"f\"] = floor\n",
+    "                feat[\"path\"] = file.split('/')[-1].split('.')[0] # useful for crossvalidation\n",
+    "                dfs.append(feat)\n",
+    "                \n",
+    "    building_df = pd.concat(dfs)\n",
+    "    building_dfs[building] = df\n",
+    "    building_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+building+\"_train.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2021-02-03T20:30:09.454304Z",
+     "iopub.status.busy": "2021-02-03T20:30:09.451093Z",
+     "iopub.status.idle": "2021-02-03T20:30:09.464308Z",
+     "shell.execute_reply": "2021-02-03T20:30:09.464854Z"
+    },
+    "papermill": {
+     "duration": 0.036471,
+     "end_time": "2021-02-03T20:30:09.465079",
+     "exception": false,
+     "start_time": "2021-02-03T20:30:09.428608",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Generate the features for the test set\n",
+    "\n",
+    "ssubm_building_g = ssubm_df.groupby(0)\n",
+    "feature_dict = dict()\n",
+    "\n",
+    "for gid0, g0 in ssubm_building_g:\n",
+    "    break\n",
+    "    index = sorted(bssid[g0.iloc[0,0]])\n",
+    "    feats = list()\n",
+    "    print(gid0)\n",
+    "    for gid,g in g0.groupby(1):\n",
+    "\n",
+    "        # get all wifi time locations, \n",
+    "        with open(os.path.join(base_path, 'test/' + g.iloc[0,1] + '.txt')) as f:\n",
+    "            txt = f.readlines()\n",
+    "\n",
+    "        wifi = list()\n",
+    "\n",
+    "        for line in txt:\n",
+    "            line = line.strip().split()\n",
+    "            if line[1] == \"TYPE_WIFI\":\n",
+    "                wifi.append(line)\n",
+    "\n",
+    "        wifi_df = pd.DataFrame(wifi)\n",
+    "        wifi_points = pd.DataFrame(wifi_df.groupby(0).count().index.tolist())\n",
+    "        \n",
+    "        for timepoint in g.iloc[:,2].tolist():\n",
+    "\n",
+    "            deltas = (wifi_points.astype(int) - int(timepoint)).abs()\n",
+    "            min_delta_idx = deltas.values.argmin()\n",
+    "            wifi_block_timestamp = wifi_points.iloc[min_delta_idx].values[0]\n",
+    "            \n",
+    "            wifi_block = wifi_df[wifi_df[0] == wifi_block_timestamp].drop_duplicates(subset=3)\n",
+    "            feat = wifi_block.set_index(3)[4].reindex(index).fillna(-999)\n",
+    "\n",
+    "            feat['site_path_timestamp'] = g.iloc[0,0] + \"_\" + g.iloc[0,1] + \"_\" + timepoint\n",
+    "            feats.append(feat)\n",
+    "    feature_df = pd.concat(feats, axis=1).T\n",
+    "    feature_df.to_csv('../input/indoor-navigation-and-location-wifi-features/'+gid0+\"_test.csv\")\n",
+    "    feature_dict[gid0] = feature_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 9.894085,
+   "end_time": "2021-02-03T20:30:10.083699",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2021-02-03T20:30:00.189614",
+   "version": "2.2.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/机器学习竞赛实战_优胜解决方案/Indoor
+++ b/机器学习竞赛实战_优胜解决方案/Indoor
--- a/Navigation/input/indoor-location-train-waypoints/train_waypoints.csv
+++ b/Navigation/input/indoor-location-train-waypoints/train_waypoints.csv