{ "cells": [ { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.017006, "end_time": "2021-02-19T07:32:16.999516", "exception": false, "start_time": "2021-02-19T07:32:16.982510", "status": "completed" }, "tags": [] }, "source": [ "In this notebook, I'd like to share how to label encode all wifi files in parallel using dask. On my workstation with 20 cpu cores, it took only 10 mins to encode all the data. Due to the limitation of kaggle kernel, I have to set number of workers to 2. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:17.032789Z", "iopub.status.busy": "2021-02-19T07:32:17.031894Z", "iopub.status.idle": "2021-02-19T07:32:17.783035Z", "shell.execute_reply": "2021-02-19T07:32:17.783614Z" }, "papermill": { "duration": 0.769447, "end_time": "2021-02-19T07:32:17.783971", "exception": false, "start_time": "2021-02-19T07:32:17.014524", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "metadata sample_submission.csv test train\r\n" ] } ], "source": [ "! ls ../input/indoor-location-navigation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:17.817174Z", "iopub.status.busy": "2021-02-19T07:32:17.816429Z", "iopub.status.idle": "2021-02-19T07:32:18.815016Z", "shell.execute_reply": "2021-02-19T07:32:18.813025Z" }, "papermill": { "duration": 1.016149, "end_time": "2021-02-19T07:32:18.815223", "exception": false, "start_time": "2021-02-19T07:32:17.799074", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "from glob import glob\n", "import os\n", "import sys\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:18.859430Z", "iopub.status.busy": "2021-02-19T07:32:18.854138Z", "iopub.status.idle": "2021-02-19T07:32:19.455581Z", "shell.execute_reply": "2021-02-19T07:32:19.455022Z" }, "papermill": { "duration": 0.62535, "end_time": "2021-02-19T07:32:19.455739", "exception": false, "start_time": "2021-02-19T07:32:18.830389", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from tqdm import tqdm\n", "from sklearn.preprocessing import LabelEncoder\n", "from glob import glob\n", "from dask.distributed import wait\n", "\n", "SENSORS = ['acce','acce_uncali','gyro',\n", " 'gyro_uncali','magn','magn_uncali','ahrs']\n", "\n", "NFEAS = {\n", " 'acce': 3,\n", " 'acce_uncali': 3,\n", " 'gyro': 3,\n", " 'gyro_uncali': 3,\n", " 'magn': 3,\n", " 'magn_uncali': 3,\n", " 'ahrs': 3,\n", " 'wifi': 1,\n", " 'ibeacon': 1,\n", " 'waypoint': 3\n", "}\n", "\n", "ACOLS = ['timestamp','x','y','z']\n", " \n", "FIELDS = {\n", " 'acce': ACOLS,\n", " 'acce_uncali': ACOLS,\n", " 'gyro': ACOLS,\n", " 'gyro_uncali': ACOLS,\n", " 'magn': ACOLS,\n", " 'magn_uncali': ACOLS,\n", " 'ahrs': ACOLS,\n", " 'wifi': ['timestamp','ssid','bssid','rssi','last_timestamp'],\n", " 'ibeacon': ['timestamp','code','rssi'],\n", " 'waypoint': ['timestamp','x','y']\n", "}\n", "\n", "def to_frame(data, col):\n", " cols = FIELDS[col]\n", " if data.shape[0]>0:\n", " df = pd.DataFrame(data, columns=cols)\n", " else:\n", " df = create_dummy_df(cols)\n", " for col in df.columns:\n", " if 'timestamp' in col:\n", " df[col] = df[col].astype('int64')\n", " return df\n", "\n", "def create_dummy_df(cols):\n", " df = pd.DataFrame()\n", " for col in cols:\n", " df[col] = [0]\n", " if col in ['ssid','bssid']:\n", " df[col] = df[col].map(str)\n", " return df\n", "\n", "def get_building_floor(fname):\n", " xx = fname.split('/')\n", " return xx[-3],xx[-2]\n", "\n", "def get_test_building(name):\n", " with open(name) as f:\n", " for c,line in enumerate(f):\n", " if c==1:\n", " x = line.split()[1].split(':')[1]\n", " return x \n", "\n", "def get_floor_target(floor):\n", " floor = floor.lower()\n", " if floor in ['bf','bm']:\n", " return None\n", " elif floor == 'b':\n", " return -1\n", " if floor.startswith('f'):\n", " return int(floor[1])-1\n", " elif floor.endswith('f'):\n", " return int(floor[0])-1\n", " elif floor.startswith('b'):\n", " return -int(floor[1])\n", " elif floor.endswith('b'):\n", " return -int(floor[0])\n", " else:\n", " return None\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:19.505222Z", "iopub.status.busy": "2021-02-19T07:32:19.494804Z", "iopub.status.idle": "2021-02-19T07:32:19.517069Z", "shell.execute_reply": "2021-02-19T07:32:19.516496Z" }, "papermill": { "duration": 0.04658, "end_time": "2021-02-19T07:32:19.517227", "exception": false, "start_time": "2021-02-19T07:32:19.470647", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "from dataclasses import dataclass\n", "\n", "import numpy as np\n", "\n", "\n", "@dataclass\n", "class ReadData:\n", " acce: np.ndarray\n", " acce_uncali: np.ndarray\n", " gyro: np.ndarray\n", " gyro_uncali: np.ndarray\n", " magn: np.ndarray\n", " magn_uncali: np.ndarray\n", " ahrs: np.ndarray\n", " wifi: np.ndarray\n", " ibeacon: np.ndarray\n", " waypoint: np.ndarray\n", "\n", "\n", "def read_data_file(data_filename):\n", " acce = []\n", " acce_uncali = []\n", " gyro = []\n", " gyro_uncali = []\n", " magn = []\n", " magn_uncali = []\n", " ahrs = []\n", " wifi = []\n", " ibeacon = []\n", " waypoint = []\n", "\n", " with open(data_filename, 'r', encoding='utf-8') as file:\n", " lines = file.readlines()\n", "\n", " for line_data in lines:\n", " line_data = line_data.strip()\n", " if not line_data or line_data[0] == '#':\n", " continue\n", "\n", " line_data = line_data.split('\\t')\n", "\n", " if line_data[1] == 'TYPE_ACCELEROMETER':\n", " acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':\n", " acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_GYROSCOPE':\n", " gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':\n", " gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_MAGNETIC_FIELD':\n", " magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':\n", " magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_ROTATION_VECTOR':\n", " if len(line_data)>=5:\n", " ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])\n", " continue\n", "\n", " if line_data[1] == 'TYPE_WIFI':\n", " sys_ts = line_data[0]\n", " ssid = line_data[2]\n", " bssid = line_data[3]\n", " rssi = line_data[4]\n", " lastseen_ts = line_data[6]\n", " wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]\n", " wifi.append(wifi_data)\n", " continue\n", "\n", " if line_data[1] == 'TYPE_BEACON':\n", " ts = line_data[0]\n", " uuid = line_data[2]\n", " major = line_data[3]\n", " minor = line_data[4]\n", " rssi = line_data[6]\n", " ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]\n", " ibeacon.append(ibeacon_data)\n", " continue\n", "\n", " if line_data[1] == 'TYPE_WAYPOINT':\n", " waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])\n", "\n", " acce = np.array(acce)\n", " acce_uncali = np.array(acce_uncali)\n", " gyro = np.array(gyro)\n", " gyro_uncali = np.array(gyro_uncali)\n", " magn = np.array(magn)\n", " magn_uncali = np.array(magn_uncali)\n", " ahrs = np.array(ahrs)\n", " wifi = np.array(wifi)\n", " ibeacon = np.array(ibeacon)\n", " waypoint = np.array(waypoint)\n", "\n", " return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:19.552943Z", "iopub.status.busy": "2021-02-19T07:32:19.551880Z", "iopub.status.idle": "2021-02-19T07:32:19.555636Z", "shell.execute_reply": "2021-02-19T07:32:19.555024Z" }, "papermill": { "duration": 0.023563, "end_time": "2021-02-19T07:32:19.555795", "exception": false, "start_time": "2021-02-19T07:32:19.532232", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import dask\n", "from dask.distributed import Client, wait, LocalCluster" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:19.591248Z", "iopub.status.busy": "2021-02-19T07:32:19.590578Z", "iopub.status.idle": "2021-02-19T07:32:21.064629Z", "shell.execute_reply": "2021-02-19T07:32:21.065319Z" }, "papermill": { "duration": 1.494609, "end_time": "2021-02-19T07:32:21.065605", "exception": false, "start_time": "2021-02-19T07:32:19.570996", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "

Client

\n", "\n", "
\n", "

Cluster

\n", "
    \n", "
  • Workers: 2
  • \n", "
  • Cores: 2
  • \n", "
  • Memory: 17.18 GB
  • \n", "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# set n_workers to number of cores\n", "client = Client(n_workers=2, \n", " threads_per_worker=1)\n", "client" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.015792, "end_time": "2021-02-19T07:32:21.097941", "exception": false, "start_time": "2021-02-19T07:32:21.082149", "status": "completed" }, "tags": [] }, "source": [ "### Label encode wifi ID features" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:21.137129Z", "iopub.status.busy": "2021-02-19T07:32:21.136113Z", "iopub.status.idle": "2021-02-19T07:32:21.139703Z", "shell.execute_reply": "2021-02-19T07:32:21.139149Z" }, "papermill": { "duration": 0.025247, "end_time": "2021-02-19T07:32:21.139872", "exception": false, "start_time": "2021-02-19T07:32:21.114625", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "PATH = '../input/indoor-location-navigation'\n", "OUT = '../input/wifi_lbl_encode'\n", "os.mkdir(OUT)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:21.178354Z", "iopub.status.busy": "2021-02-19T07:32:21.177687Z", "iopub.status.idle": "2021-02-19T07:32:21.188733Z", "shell.execute_reply": "2021-02-19T07:32:21.187818Z" }, "papermill": { "duration": 0.031758, "end_time": "2021-02-19T07:32:21.188900", "exception": false, "start_time": "2021-02-19T07:32:21.157142", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def read_wifi_ssid_bssid(name):\n", " data = read_data_file(name)\n", " dw = to_frame(data.wifi,'wifi')\n", " ss = dw['ssid'].unique()\n", " bs = dw['bssid'].unique()\n", " return ss,bs\n", "\n", "def encode_wifi_ssid_bssid(name, lbl_ssid, lbl_bssid):\n", " data = read_data_file(name)\n", " dw = to_frame(data.wifi,'wifi')\n", " dw['ssid'] = lbl_ssid.transform(dw['ssid'])\n", " dw['bssid'] = lbl_bssid.transform(dw['bssid'])\n", " name = name.replace(PATH,OUT)\n", " dw.to_csv(name,index=False)\n", " return dw" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.016428, "end_time": "2021-02-19T07:32:21.222099", "exception": false, "start_time": "2021-02-19T07:32:21.205671", "status": "completed" }, "tags": [] }, "source": [ "### Run" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:21.258939Z", "iopub.status.busy": "2021-02-19T07:32:21.258260Z", "iopub.status.idle": "2021-02-19T07:32:25.615593Z", "shell.execute_reply": "2021-02-19T07:32:25.615024Z" }, "papermill": { "duration": 4.377188, "end_time": "2021-02-19T07:32:25.615758", "exception": false, "start_time": "2021-02-19T07:32:21.238570", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(626, 26925)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_files = glob(f'{PATH}/train/*/*/*.txt')\n", "test_files = glob(f'{PATH}/test/*.txt')\n", "len(test_files),len(train_files)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:25.663419Z", "iopub.status.busy": "2021-02-19T07:32:25.662683Z", "iopub.status.idle": "2021-02-19T07:32:25.734282Z", "shell.execute_reply": "2021-02-19T07:32:25.733346Z" }, "papermill": { "duration": 0.100222, "end_time": "2021-02-19T07:32:25.734466", "exception": false, "start_time": "2021-02-19T07:32:25.634244", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(numpy.ndarray,\n", " (19,),\n", " (19,),\n", " '../input/indoor-location-navigation/train/5cd56c0ce2acfd2d33b6ab27/B1/5d09a625bd54340008acddb9.txt')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "name = train_files[0]\n", "ss,bs = read_wifi_ssid_bssid(name)\n", "type(ss),ss.shape,bs.shape,name" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:25.783302Z", "iopub.status.busy": "2021-02-19T07:32:25.782524Z", "iopub.status.idle": "2021-02-19T07:32:25.944817Z", "shell.execute_reply": "2021-02-19T07:32:25.945372Z" }, "papermill": { "duration": 0.193007, "end_time": "2021-02-19T07:32:25.945570", "exception": false, "start_time": "2021-02-19T07:32:25.752563", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "os.mkdir(f'{OUT}/train')\n", "os.mkdir(f'{OUT}/test')\n", "for site in os.listdir(f'{PATH}/train'):\n", " os.mkdir(f'{OUT}/train/{site}')\n", " for floor in os.listdir(f'{PATH}/train/{site}'):\n", " os.mkdir(f'{OUT}/train/{site}/{floor}')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:25.987521Z", "iopub.status.busy": "2021-02-19T07:32:25.986482Z", "iopub.status.idle": "2021-02-19T07:32:26.098839Z", "shell.execute_reply": "2021-02-19T07:32:26.099376Z" }, "papermill": { "duration": 0.135143, "end_time": "2021-02-19T07:32:26.099568", "exception": false, "start_time": "2021-02-19T07:32:25.964425", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 26925/26925 [00:00<00:00, 288117.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 97.7 ms, sys: 2.84 ms, total: 101 ms\n", "Wall time: 101 ms\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "%%time\n", "buildings = []\n", "floors = []\n", "used = []\n", "for fname in tqdm(train_files):\n", " b,f = get_building_floor(fname)\n", " f = get_floor_target(f)\n", " if f is None:\n", " continue\n", " used.append(fname)\n", " buildings.append(b)\n", " floors.append(f)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T07:32:26.143381Z", "iopub.status.busy": "2021-02-19T07:32:26.142328Z", "iopub.status.idle": "2021-02-19T08:43:54.571192Z", "shell.execute_reply": "2021-02-19T08:43:54.571750Z" }, "papermill": { "duration": 4288.452672, "end_time": "2021-02-19T08:43:54.572122", "exception": false, "start_time": "2021-02-19T07:32:26.119450", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 24732/24732 [00:11<00:00, 2222.39it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 39min 16s, sys: 1min 40s, total: 40min 57s\n", "Wall time: 1h 11min 28s\n" ] } ], "source": [ "%%time\n", "futures = [] # save the future since dask is lazy, otherwise nothing is executed.\n", "for fname in tqdm(test_files+used):\n", " f = client.submit(read_wifi_ssid_bssid,fname) \n", " futures.append(f) \n", "_ = wait(futures)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T08:43:54.670582Z", "iopub.status.busy": "2021-02-19T08:43:54.669650Z", "iopub.status.idle": "2021-02-19T08:45:35.428441Z", "shell.execute_reply": "2021-02-19T08:45:35.427413Z" }, "papermill": { "duration": 100.812271, "end_time": "2021-02-19T08:45:35.428636", "exception": false, "start_time": "2021-02-19T08:43:54.616365", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 24732/24732 [01:40<00:00, 246.25it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1min 9s, sys: 9.36 s, total: 1min 18s\n", "Wall time: 1min 40s\n" ] }, { "data": { "text/plain": [ "((1536700,), (4858800,))" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "ss = []\n", "bs = []\n", "for f in tqdm(futures):\n", " s,b = f.result()\n", " ss.append(s)\n", " bs.append(b)\n", "ss = np.concatenate(ss)\n", "bs = np.concatenate(bs)\n", "ss.shape, bs.shape" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T08:45:36.434012Z", "iopub.status.busy": "2021-02-19T08:45:36.423373Z", "iopub.status.idle": "2021-02-19T08:45:36.455609Z", "shell.execute_reply": "2021-02-19T08:45:36.454894Z" }, "papermill": { "duration": 0.675897, "end_time": "2021-02-19T08:45:36.455784", "exception": false, "start_time": "2021-02-19T08:45:35.779887", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(74246,)\n", "CPU times: user 283 ms, sys: 6.93 ms, total: 290 ms\n", "Wall time: 289 ms\n" ] }, { "data": { "text/plain": [ "array(['0', '000015ed38fdbc763149432d7ba3b7bd208461d3',\n", " '000073084baa0ea60776e25c07c6ee6b988aa072', ...,\n", " 'fffe51167f1ad1bf26dda45ccfc40b5d7fab8384',\n", " 'fffe9c57a9b25623ac219260c1b5155087a788e9',\n", " 'ffff286949cbddd576ed8b2d5e16ce30eab87af6'], dtype=object)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "lbl_ssid = LabelEncoder()\n", "lbl_ssid.fit(ss)\n", "print(lbl_ssid.classes_.shape)\n", "lbl_ssid.classes_" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T08:45:38.090288Z", "iopub.status.busy": "2021-02-19T08:45:38.089396Z", "iopub.status.idle": "2021-02-19T08:45:38.127469Z", "shell.execute_reply": "2021-02-19T08:45:38.126844Z" }, "papermill": { "duration": 1.317812, "end_time": "2021-02-19T08:45:38.127643", "exception": false, "start_time": "2021-02-19T08:45:36.809831", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(216210,)\n", "CPU times: user 948 ms, sys: 9.79 ms, total: 958 ms\n", "Wall time: 942 ms\n" ] }, { "data": { "text/plain": [ "array(['0', '00001d7b6fbf0a24da65285b686b03c6e796962a',\n", " '0000fe40d201cfc6cada502b07f29883cd17fe4a', ...,\n", " 'ffff295a9e6ae90d9de67a1e1939ba969b765259',\n", " 'ffff2c098362b016764229a1bb5e06d10cf0d895',\n", " 'ffffb8116ceb5c0326ec2eb039028ec71ffdfbab'], dtype=object)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "lbl_bssid = LabelEncoder()\n", "lbl_bssid.fit(bs)\n", "print(lbl_bssid.classes_.shape)\n", "lbl_bssid.classes_" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.349307, "end_time": "2021-02-19T08:45:38.841314", "exception": false, "start_time": "2021-02-19T08:45:38.492007", "status": "completed" }, "tags": [] }, "source": [ "Let's look at one encode result." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T08:45:39.579857Z", "iopub.status.busy": "2021-02-19T08:45:39.579079Z", "iopub.status.idle": "2021-02-19T08:45:39.910385Z", "shell.execute_reply": "2021-02-19T08:45:39.909623Z" }, "papermill": { "duration": 0.720966, "end_time": "2021-02-19T08:45:39.910549", "exception": false, "start_time": "2021-02-19T08:45:39.189583", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampssidbssidrssilast_timestamp
015609133701165477469149-901560913363914
1156091337011618591182228-801560913363973
2156091337011654874109727-871560913354584
3156091337011636901124351-851560913351034
4156091337011651444114314-841560913363543
\n", "
" ], "text/plain": [ " timestamp ssid bssid rssi last_timestamp\n", "0 1560913370116 54774 69149 -90 1560913363914\n", "1 1560913370116 18591 182228 -80 1560913363973\n", "2 1560913370116 54874 109727 -87 1560913354584\n", "3 1560913370116 36901 124351 -85 1560913351034\n", "4 1560913370116 51444 114314 -84 1560913363543" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "encode_wifi_ssid_bssid(name, lbl_ssid, lbl_bssid)\n", "name = name.replace(PATH,OUT)\n", "df = pd.read_csv(name)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T08:45:40.737958Z", "iopub.status.busy": "2021-02-19T08:45:40.736888Z", "iopub.status.idle": "2021-02-19T08:45:43.340615Z", "shell.execute_reply": "2021-02-19T08:45:43.339718Z" }, "papermill": { "duration": 3.060116, "end_time": "2021-02-19T08:45:43.340782", "exception": false, "start_time": "2021-02-19T08:45:40.280666", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 284 ms, sys: 34 ms, total: 317 ms\n", "Wall time: 2.6 s\n" ] } ], "source": [ "%%time\n", "\n", "lbl_ssid_f = client.scatter(lbl_ssid)\n", "lbl_bssid_f = client.scatter(lbl_bssid)\n", "_ = wait(lbl_ssid_f)\n", "_ = wait(lbl_bssid_f)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "execution": { "iopub.execute_input": "2021-02-19T08:45:44.071507Z", "iopub.status.busy": "2021-02-19T08:45:44.070692Z", "iopub.status.idle": "2021-02-19T10:47:34.115015Z", "shell.execute_reply": "2021-02-19T10:47:34.115693Z" }, "papermill": { "duration": 7310.41872, "end_time": "2021-02-19T10:47:34.116072", "exception": false, "start_time": "2021-02-19T08:45:43.697352", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 24732/24732 [00:17<00:00, 1415.46it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 54min 44s, sys: 2min 6s, total: 56min 50s\n", "Wall time: 2h 1min 49s\n" ] } ], "source": [ "%%time\n", "futures = [] # save the future since dask is lazy, otherwise nothing is executed.\n", "\n", "for fname in tqdm(test_files+used):\n", " f = client.submit(encode_wifi_ssid_bssid, fname, lbl_ssid_f, lbl_bssid_f)\n", " futures.append(f) \n", "_ = wait(futures)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" }, "papermill": { "default_parameters": {}, "duration": 11729.018326, "end_time": "2021-02-19T10:47:39.772795", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2021-02-19T07:32:10.754469", "version": "2.2.2" } }, "nbformat": 4, "nbformat_minor": 4 }