You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
376 lines
12 KiB
376 lines
12 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from tqdm import tqdm\n",
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"from dask.distributed import wait\n",
|
|
"import glob\n",
|
|
"\n",
|
|
"SENSORS = ['acce','acce_uncali','gyro',\n",
|
|
" 'gyro_uncali','magn','magn_uncali','ahrs']\n",
|
|
"\n",
|
|
"NFEAS = {\n",
|
|
" 'acce': 3,\n",
|
|
" 'acce_uncali': 3,\n",
|
|
" 'gyro': 3,\n",
|
|
" 'gyro_uncali': 3,\n",
|
|
" 'magn': 3,\n",
|
|
" 'magn_uncali': 3,\n",
|
|
" 'ahrs': 3,\n",
|
|
" 'wifi': 1,\n",
|
|
" 'ibeacon': 1,\n",
|
|
" 'waypoint': 3\n",
|
|
"}\n",
|
|
"\n",
|
|
"ACOLS = ['timestamp','x','y','z']\n",
|
|
" \n",
|
|
"FIELDS = {\n",
|
|
" 'acce': ACOLS,\n",
|
|
" 'acce_uncali': ACOLS,\n",
|
|
" 'gyro': ACOLS,\n",
|
|
" 'gyro_uncali': ACOLS,\n",
|
|
" 'magn': ACOLS,\n",
|
|
" 'magn_uncali': ACOLS,\n",
|
|
" 'ahrs': ACOLS,\n",
|
|
" 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n",
|
|
" 'ibeacon': ['timestamp','code','rssi','last_timestamp'],\n",
|
|
" 'waypoint': ['timestamp','x','y']\n",
|
|
"}\n",
|
|
"\n",
|
|
"def to_frame(data, col):\n",
|
|
" cols = FIELDS[col]\n",
|
|
" is_dummy = False\n",
|
|
" if data.shape[0]>0:\n",
|
|
" df = pd.DataFrame(data, columns=cols)\n",
|
|
" else:\n",
|
|
" df = create_dummy_df(cols)\n",
|
|
" is_dummy = True\n",
|
|
" for col in df.columns:\n",
|
|
" if 'timestamp' in col:\n",
|
|
" df[col] = df[col].astype('int64')\n",
|
|
" return df, is_dummy\n",
|
|
"\n",
|
|
"def create_dummy_df(cols):\n",
|
|
" df = pd.DataFrame()\n",
|
|
" for col in cols:\n",
|
|
" df[col] = [0]\n",
|
|
" if col in ['ssid','bssid']:\n",
|
|
" df[col] = df[col].map(str)\n",
|
|
" return df\n",
|
|
"\n",
|
|
"from dataclasses import dataclass\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"\n",
|
|
"@dataclass\n",
|
|
"class ReadData:\n",
|
|
" acce: np.ndarray\n",
|
|
" acce_uncali: np.ndarray\n",
|
|
" gyro: np.ndarray\n",
|
|
" gyro_uncali: np.ndarray\n",
|
|
" magn: np.ndarray\n",
|
|
" magn_uncali: np.ndarray\n",
|
|
" ahrs: np.ndarray\n",
|
|
" wifi: np.ndarray\n",
|
|
" ibeacon: np.ndarray\n",
|
|
" waypoint: np.ndarray\n",
|
|
"\n",
|
|
"\n",
|
|
"def read_data_file(data_filename):\n",
|
|
" acce = []\n",
|
|
" acce_uncali = []\n",
|
|
" gyro = []\n",
|
|
" gyro_uncali = []\n",
|
|
" magn = []\n",
|
|
" magn_uncali = []\n",
|
|
" ahrs = []\n",
|
|
" wifi = []\n",
|
|
" ibeacon = []\n",
|
|
" waypoint = []\n",
|
|
"\n",
|
|
" with open(data_filename, 'r', encoding='utf-8') as file:\n",
|
|
" lines = file.readlines()\n",
|
|
"\n",
|
|
" for line_data in lines:\n",
|
|
" line_data = line_data.strip()\n",
|
|
" if not line_data or line_data[0] == '#':\n",
|
|
" continue\n",
|
|
"\n",
|
|
" line_data = line_data.split('\\t')\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_ACCELEROMETER':\n",
|
|
" acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n",
|
|
" acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_GYROSCOPE':\n",
|
|
" gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n",
|
|
" gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n",
|
|
" magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n",
|
|
" magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_ROTATION_VECTOR':\n",
|
|
" if len(line_data)>=5:\n",
|
|
" ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_WIFI':\n",
|
|
" sys_ts = line_data[0]\n",
|
|
" ssid = line_data[2]\n",
|
|
" bssid = line_data[3]\n",
|
|
" rssi = line_data[4]\n",
|
|
" lastseen_ts = line_data[6]\n",
|
|
" wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n",
|
|
" wifi.append(wifi_data)\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_BEACON':\n",
|
|
" ts = line_data[0]\n",
|
|
" uuid = line_data[2]\n",
|
|
" major = line_data[3]\n",
|
|
" minor = line_data[4]\n",
|
|
" rssi = line_data[6]\n",
|
|
" lastts = line_data[-1]\n",
|
|
" ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi, lastts]\n",
|
|
" ibeacon.append(ibeacon_data)\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if line_data[1] == 'TYPE_WAYPOINT':\n",
|
|
" waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n",
|
|
"\n",
|
|
" acce = np.array(acce)\n",
|
|
" acce_uncali = np.array(acce_uncali)\n",
|
|
" gyro = np.array(gyro)\n",
|
|
" gyro_uncali = np.array(gyro_uncali)\n",
|
|
" magn = np.array(magn)\n",
|
|
" magn_uncali = np.array(magn_uncali)\n",
|
|
" ahrs = np.array(ahrs)\n",
|
|
" wifi = np.array(wifi)\n",
|
|
" ibeacon = np.array(ibeacon)\n",
|
|
" waypoint = np.array(waypoint)\n",
|
|
"\n",
|
|
" return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_test_dfs(PATH, test_files):\n",
|
|
" dtest = get_test_df(PATH)\n",
|
|
" buildings = set(dtest['building'].values.tolist())\n",
|
|
" dws = {}\n",
|
|
" ntest_files = []\n",
|
|
" for fname in tqdm(test_files):\n",
|
|
" path = fname.split('/')[-1].split('.')[0]\n",
|
|
" mask = dtest['path'] == path\n",
|
|
" dws[fname] = dtest.loc[mask, ['timestamp','x','y','floor','building','site_path_timestamp']].copy().reset_index(drop=True)\n",
|
|
" ntest_files.append(fname)\n",
|
|
" return dws\n",
|
|
"\n",
|
|
"def get_test_df(PATH):\n",
|
|
" dtest = pd.read_csv(f'{PATH}/sample_submission.csv')\n",
|
|
" dtest['building'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[0])\n",
|
|
" dtest['path'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[1])\n",
|
|
" dtest['timestamp'] = dtest['site_path_timestamp'].apply(lambda x: x.split('_')[2])\n",
|
|
" dtest['timestamp'] = dtest['timestamp'].astype('int64')\n",
|
|
" dtest = dtest.sort_values(['path','timestamp']).reset_index(drop=True)\n",
|
|
" return dtest\n",
|
|
"\n",
|
|
"def get_time_gap(name):\n",
|
|
" data = read_data_file(name)\n",
|
|
" db,no_ibeacon = to_frame(data.ibeacon,'ibeacon')\n",
|
|
"# print(db,no_ibeacon)\n",
|
|
" \n",
|
|
" if no_ibeacon==0:\n",
|
|
" gap = db['last_timestamp'] - db['timestamp']\n",
|
|
" assert gap.unique().shape[0]==1\n",
|
|
" return gap.values[0],no_ibeacon\n",
|
|
" \n",
|
|
" if no_ibeacon==1:\n",
|
|
" # Group wifis by timestamp\n",
|
|
" wifi_groups = pd.DataFrame(data.wifi).groupby(0) \n",
|
|
" # Find which one is the most recent of all time points.\n",
|
|
" est_ts = (wifi_groups[4].max().astype(int) - wifi_groups[0].max().astype(int)).max() \n",
|
|
" return est_ts,no_ibeacon\n",
|
|
"\n",
|
|
" \n",
|
|
"\n",
|
|
"def fix_timestamp_test(df, gap):\n",
|
|
" df['real_timestamp'] = df['timestamp'] + gap\n",
|
|
" return df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['../input/indoor-location-navigation/test/00ff0c9a71cc37a2ebdd0f05.txt',\n",
|
|
" '../input/indoor-location-navigation/test/01c41f1aeba5c48c2c4dd568.txt',\n",
|
|
" '../input/indoor-location-navigation/test/030b3d94de8acae7c936563d.txt',\n",
|
|
" '../input/indoor-location-navigation/test/0389421238a7e2839701df0f.txt']"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"test_files_ori = glob.glob('../input/indoor-location-navigation/test/*.txt')\n",
|
|
"test_files_ori[:4]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/ec2-user/anaconda3/lib/python3.7/site-packages/distributed/dashboard/core.py:79: UserWarning: \n",
|
|
"Port 8787 is already in use. \n",
|
|
"Perhaps you already have a cluster running?\n",
|
|
"Hosting the diagnostics dashboard on a random port instead.\n",
|
|
" warnings.warn(\"\\n\" + msg)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<table style=\"border: 2px solid white;\">\n",
|
|
"<tr>\n",
|
|
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
|
|
"<h3 style=\"text-align: left;\">Client</h3>\n",
|
|
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
|
|
" <li><b>Scheduler: </b>tcp://127.0.0.1:42097</li>\n",
|
|
" <li><b>Dashboard: </b><a href='http://127.0.0.1:39155/status' target='_blank'>http://127.0.0.1:39155/status</a>\n",
|
|
"</ul>\n",
|
|
"</td>\n",
|
|
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
|
|
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
|
|
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
|
|
" <li><b>Workers: </b>8</li>\n",
|
|
" <li><b>Cores: </b>8</li>\n",
|
|
" <li><b>Memory: </b>32.89 GB</li>\n",
|
|
"</ul>\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"</table>"
|
|
],
|
|
"text/plain": [
|
|
"<Client: 'tcp://127.0.0.1:42097' processes=8 threads=8, memory=32.89 GB>"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import dask\n",
|
|
"from dask.distributed import Client, wait, LocalCluster\n",
|
|
"\n",
|
|
"# set n_workers to number of cores\n",
|
|
"client = Client(n_workers=8, \n",
|
|
" threads_per_worker=1)\n",
|
|
"client"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 626/626 [00:00<00:00, 4552.03it/s]\n",
|
|
"100%|██████████| 626/626 [00:16<00:00, 37.39it/s] \n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"futures = []\n",
|
|
"for fname in tqdm(test_files_ori, total=len(test_files_ori)):\n",
|
|
" f = client.submit(get_time_gap,fname)\n",
|
|
" futures.append(f)\n",
|
|
" \n",
|
|
"testpath2gap = {}\n",
|
|
"for f,fname in tqdm(zip(futures, test_files_ori), total=len(test_files_ori)):\n",
|
|
" testpath2gap[fname.split('/')[-1].replace('.txt','')] = f.result()\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pickle\n",
|
|
"with open('testpath2gap.pkl','wb') as f:\n",
|
|
" pickle.dump(testpath2gap,f)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|