fix board logger

4 years ago · 156ccb947b
parent fa6e333422
commit 156ccb947b
3 changed files with 222 additions and 46 deletions
--- a/.notebook/python_test.ipynb
+++ b/.notebook/python_test.ipynb
@ -637,7 +637,7 @@
  {
   "cell_type": "code",
   "execution_count": 59,
-   "id": "norwegian-cleveland",
+   "id": "engaged-offense",
   "metadata": {},
   "outputs": [
    {
@ -660,7 +660,7 @@
  {
   "cell_type": "code",
   "execution_count": 35,
-   "id": "endless-kidney",
+   "id": "level-fairy",
   "metadata": {},
   "outputs": [
    {
@ -705,7 +705,7 @@
  {
   "cell_type": "code",
   "execution_count": 3,
-   "id": "later-louisiana",
+   "id": "beautiful-geometry",
   "metadata": {},
   "outputs": [
    {
@ -728,7 +728,7 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "id": "funded-nudist",
+   "id": "african-trustee",
   "metadata": {},
   "outputs": [
    {
@ -748,7 +748,7 @@
  {
   "cell_type": "code",
   "execution_count": 5,
-   "id": "contrary-affiliation",
+   "id": "ready-wages",
   "metadata": {},
   "outputs": [],
   "source": [
@ -758,7 +758,7 @@
  {
   "cell_type": "code",
   "execution_count": 6,
-   "id": "friendly-interpretation",
+   "id": "distinguished-printer",
   "metadata": {},
   "outputs": [
    {
@ -776,7 +776,7 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "id": "bottom-wilderness",
+   "id": "precious-limit",
   "metadata": {},
   "outputs": [
    {
@ -809,7 +809,7 @@
  {
   "cell_type": "code",
   "execution_count": 17,
-   "id": "acquired-jacksonville",
+   "id": "chemical-convenience",
   "metadata": {},
   "outputs": [
    {
@ -839,7 +839,7 @@
  {
   "cell_type": "code",
   "execution_count": 18,
-   "id": "entertaining-capture",
+   "id": "round-remark",
   "metadata": {},
   "outputs": [
    {
@ -871,7 +871,7 @@
  {
   "cell_type": "code",
   "execution_count": 19,
-   "id": "amber-grade",
+   "id": "smaller-shower",
   "metadata": {},
   "outputs": [
    {
@ -903,7 +903,7 @@
  {
   "cell_type": "code",
   "execution_count": 31,
-   "id": "hidden-playback",
+   "id": "integrated-block",
   "metadata": {},
   "outputs": [
    {
@ -935,7 +935,7 @@
  {
   "cell_type": "code",
   "execution_count": 32,
-   "id": "twelve-university",
+   "id": "favorite-failure",
   "metadata": {},
   "outputs": [
    {
@ -966,7 +966,7 @@
  {
   "cell_type": "code",
   "execution_count": 20,
-   "id": "minor-endorsement",
+   "id": "boolean-saint",
   "metadata": {},
   "outputs": [],
   "source": [
@ -977,7 +977,7 @@
  {
   "cell_type": "code",
   "execution_count": 46,
-   "id": "upper-majority",
+   "id": "senior-hospital",
   "metadata": {},
   "outputs": [
    {
@ -997,7 +997,7 @@
  {
   "cell_type": "code",
   "execution_count": 30,
-   "id": "supreme-coverage",
+   "id": "consolidated-incident",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1007,7 +1007,7 @@
  {
   "cell_type": "code",
   "execution_count": 31,
-   "id": "tough-domain",
+   "id": "pursuant-paragraph",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1017,7 +1017,7 @@
  {
   "cell_type": "code",
   "execution_count": 47,
-   "id": "indian-empire",
+   "id": "mexican-apollo",
   "metadata": {},
   "outputs": [
    {
@ -1038,7 +1038,7 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "id": "horizontal-paragraph",
+   "id": "encouraging-integration",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1049,7 +1049,7 @@
  {
   "cell_type": "code",
   "execution_count": 56,
-   "id": "homeless-zoning",
+   "id": "trying-auckland",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1059,7 +1059,7 @@
  {
   "cell_type": "code",
   "execution_count": 58,
-   "id": "floating-atmosphere",
+   "id": "national-edward",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1069,7 +1069,7 @@
  {
   "cell_type": "code",
   "execution_count": 60,
-   "id": "stupid-reducing",
+   "id": "aerial-campaign",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1079,7 +1079,7 @@
  {
   "cell_type": "code",
   "execution_count": 66,
-   "id": "practical-airline",
+   "id": "instant-violence",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1089,7 +1089,7 @@
  {
   "cell_type": "code",
   "execution_count": 95,
-   "id": "apart-comfort",
+   "id": "medical-globe",
   "metadata": {},
   "outputs": [
    {
@ -1110,7 +1110,7 @@
  {
   "cell_type": "code",
   "execution_count": 81,
-   "id": "underlying-brand",
+   "id": "three-contrast",
   "metadata": {},
   "outputs": [
    {
@ -1131,7 +1131,7 @@
  {
   "cell_type": "code",
   "execution_count": 11,
-   "id": "german-things",
+   "id": "cross-atlas",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1161,7 +1161,7 @@
  {
   "cell_type": "code",
   "execution_count": 12,
-   "id": "third-regression",
+   "id": "empirical-defense",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1172,7 +1172,7 @@
  {
   "cell_type": "code",
   "execution_count": 14,
-   "id": "thick-korea",
+   "id": "rocky-listening",
   "metadata": {},
   "outputs": [
    {
@ -1201,7 +1201,7 @@
  {
   "cell_type": "code",
   "execution_count": 13,
-   "id": "institutional-hands",
+   "id": "surrounded-absolute",
   "metadata": {},
   "outputs": [
    {
@ -1230,7 +1230,7 @@
  {
   "cell_type": "code",
   "execution_count": 15,
-   "id": "brave-native",
+   "id": "differential-surgery",
   "metadata": {},
   "outputs": [
    {
@ -1260,7 +1260,7 @@
  {
   "cell_type": "code",
   "execution_count": 29,
-   "id": "turkish-ticket",
+   "id": "durable-powell",
   "metadata": {},
   "outputs": [
    {
@ -1290,7 +1290,7 @@
  {
   "cell_type": "code",
   "execution_count": 30,
-   "id": "executed-excerpt",
+   "id": "young-continuity",
   "metadata": {},
   "outputs": [
    {
@ -1308,7 +1308,7 @@
  {
   "cell_type": "code",
   "execution_count": 22,
-   "id": "continental-boring",
+   "id": "geological-sarah",
   "metadata": {},
   "outputs": [
    {
@ -1343,7 +1343,7 @@
  {
   "cell_type": "code",
   "execution_count": 23,
-   "id": "linear-assembly",
+   "id": "possible-angle",
   "metadata": {},
   "outputs": [
    {
@ -1376,7 +1376,7 @@
  {
   "cell_type": "code",
   "execution_count": 33,
-   "id": "applied-louis",
+   "id": "novel-sucking",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1386,7 +1386,7 @@
  {
   "cell_type": "code",
   "execution_count": 34,
-   "id": "historic-struggle",
+   "id": "fixed-wallet",
   "metadata": {},
   "outputs": [
    {
@ -1428,7 +1428,7 @@
  {
   "cell_type": "code",
   "execution_count": 35,
-   "id": "monthly-roads",
+   "id": "north-seattle",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1438,7 +1438,7 @@
  {
   "cell_type": "code",
   "execution_count": 38,
-   "id": "boxed-peoples",
+   "id": "above-western",
   "metadata": {},
   "outputs": [
    {
@ -1471,17 +1471,99 @@
  {
   "cell_type": "code",
   "execution_count": 41,
-   "id": "fresh-tender",
+   "id": "choice-diabetes",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "white-vessel",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2, 2)\n",
      "[ 1 20]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "l = [(1, 20), (2, 30)]\n",
    "scores = np.array(l)\n",
    "print(scores.shape)\n",
    "print(scores[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "treated-freedom",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1]\n"
     ]
    }
   ],
   "source": [
    "sort_idx = np.argsort(scores[:, -1])\n",
    "print(sort_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "convinced-safety",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1 20]\n",
      " [ 2 30]]\n"
     ]
    }
   ],
   "source": [
    "sorted_val_scores = scores[sort_idx][::1]\n",
    "print(sorted_val_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "blond-bunny",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1 20]\n",
      " [ 2 30]]\n"
     ]
    }
   ],
   "source": [
    "sorted_val_scores = scores[sort_idx]\n",
    "print(sorted_val_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "religious-peripheral",
+   "id": "utility-monroe",
   "metadata": {},
   "outputs": [],
   "source": []
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@ -89,9 +89,8 @@ class U2Trainer(Trainer):
        if (batch_index + 1) % train_conf.accum_grad == 0:
            if dist.get_rank() == 0 and self.visualizer:
-                for k, v in losses_np.items():
+                losses_np.update({"lr": self.lr_scheduler()})
-                    self.visualizer.add_scalar("train/{}".format(k), v,
+                self.visualizer.add_scalars("step", losses_np, self.iteration)
                                               self.iteration)
            self.optimizer.step()
            self.optimizer.clear_grad()
            self.lr_scheduler.step()
@ -144,7 +143,7 @@ class U2Trainer(Trainer):
                raise e
            valid_losses = self.valid()
-            self.save(infos=valid_losses)
+            self.save(tag=self.epoch, infos=valid_losses)
            self.new_epoch()
    @mp_tools.rank_zero_only
@ -172,9 +171,8 @@ class U2Trainer(Trainer):
        logger.info(msg)
        if self.visualizer:
-            for k, v in valid_losses.items():
+            valid_losses.update({"lr": self.lr_scheduler()})
-                self.visualizer.add_scalar("valid/{}".format(k), v,
+            self.visualizer.add_scalars('epoch', valid_losses, self.epoch)
                                           self.iteration)
        return valid_losses
    def setup_dataloader(self):
--- a/utils/avg_model.py
+++ b/utils/avg_model.py
@ -0,0 +1,96 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 import glob
 import json
 import os
 import numpy as np
 import paddle
 def main(args):
    checkpoints = []
    val_scores = []
    if args.val_best:
        jsons = glob.glob(f'{args.ckpt_dir}/[!train]*.json')
        for y in jsons:
            dic_json = json.load(y)
            loss = dic_json['valid_loss']
            epoch = dic_json['epoch']
            if epoch >= args.min_epoch and epoch <= args.max_epoch:
                val_scores.append((epoch, loss))
        val_scores = np.array(val_scores)
        sort_idx = np.argsort(val_scores[:, 1])
        sorted_val_scores = val_scores[sort_idx]
        path_list = [
            args.ckpt_dir + '/{}.pdparams'.format(int(epoch))
            for epoch in sorted_val_scores[:args.num, 0]
        ]
        print("best val scores = " + str(sorted_val_scores[:args.num, 1]))
        print("selected epochs = " + str(sorted_val_scores[:args.num, 0].astype(
            np.int64)))
    else:
        path_list = glob.glob(f'{args.ckpt_dir}/[!avg][!final]*.pdparams')
        path_list = sorted(path_list, key=os.path.getmtime)
        path_list = path_list[-args.num:]
    print(path_list)
    avg = None
    num = args.num
    assert num == len(path_list)
    for path in path_list:
        print(f'Processing {path}')
        states = paddle.load(path)
        if avg is None:
            avg = states
        else:
            for k in avg.keys():
                avg[k] += states[k]
    # average
    for k in avg.keys():
        if avg[k] is not None:
            avg[k] = paddle.divide(avg[k], num)
    paddle.save(avg, args.dst_model)
    print(f'Saving to {args.dst_model}')
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='average model')
    parser.add_argument('--dst_model', required=True, help='averaged model')
    parser.add_argument(
        '--ckpt_dir', required=True, help='ckpt model dir for average')
    parser.add_argument(
        '--val_best', action="store_true", help='averaged model')
    parser.add_argument(
        '--num', default=5, type=int, help='nums for averaged model')
    parser.add_argument(
        '--min_epoch',
        default=0,
        type=int,
        help='min epoch used for averaging model')
    parser.add_argument(
        '--max_epoch',
        default=65536,  # Big enough
        type=int,
        help='max epoch used for averaging model')
    args = parser.parse_args()
    print(args)
    main(args)