fix cmvn and print prarams

4 years ago · 28658cc169
parent 48f4bda3c5
commit 28658cc169
5 changed files with 1593 additions and 37 deletions
--- a/.notebook/u2_model.ipynb
+++ b/.notebook/u2_model.ipynb
--- a/deepspeech/frontend/normalizer.py
+++ b/deepspeech/frontend/normalizer.py
@ -77,15 +77,19 @@ class FeatureNormalizer(object):
        :param filepath: File to write mean and stddev.
        :type filepath: str
        """
-        np.savez(filepath, mean=self._mean, std=self._std)
+        np.savez(filepath, mean=self._mean, istd=self._istd)
    def _read_mean_std_from_file(self, filepath, eps=1e-20):
        """Load mean and std from file."""
-        mean, std = load_cmvn(filepath, filetype='npz')
+        mean, istd = load_cmvn(filepath, filetype='npz')
        self._mean = mean.T
-        self._istd = 1.0 / std.T
+        self._istd = istd.T
-    def _compute_mean_std(self, manifest_path, featurize_func, num_samples):
+    def _compute_mean_std(self,
                          manifest_path,
                          featurize_func,
                          num_samples,
                          eps=1e-20):
        """Compute mean and std from randomly sampled instances."""
        manifest = read_manifest(manifest_path)
        if num_samples == -1:
@ -98,4 +102,6 @@ class FeatureNormalizer(object):
                featurize_func(AudioSegment.from_file(instance["feat"])))
        features = np.hstack(features)  #(D, T)
        self._mean = np.mean(features, axis=1).reshape([1, -1])  #(1, D)
-        self._std = np.std(features, axis=1).reshape([1, -1])  #(1, D)
+        std = np.std(features, axis=1).reshape([1, -1])  #(1, D)
        std = np.clip(std, eps, None)
        self._istd = 1.0 / std
--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@ -238,10 +238,8 @@ def _load_kaldi_cmvn(kaldi_cmvn_file):
 def _load_npz_cmvn(npz_cmvn_file, eps=1e-20):
    npzfile = np.load(npz_cmvn_file)
    means = npzfile["mean"]  #(1, D)
-    std = npzfile["std"]  #(1, D)
+    istd = npzfile["istd"]  #(1, D)
-    std = np.clip(std, eps, None)
+    cmvn = np.array([means, istd])
    variance = 1.0 / std
    cmvn = np.array([means, variance])
    return cmvn
--- a/deepspeech/modules/mask.py
+++ b/deepspeech/modules/mask.py
@ -25,7 +25,7 @@ __all__ = [
 def sequence_mask(x_len, max_len=None, dtype='float32'):
-    """[summary]
+    """batch sequence mask.
    Args:
        x_len ([paddle.Tensor]): xs lenght, [B]
--- a/deepspeech/utils/layer_tools.py
+++ b/deepspeech/utils/layer_tools.py
@ -22,8 +22,6 @@ __all__ = [
 def summary(layer: nn.Layer, print_func=print):
    num_params = num_elements = 0
    if print_func:
        print_func(f"{layer.__class__.__name__} summary:")
    for name, param in layer.state_dict().items():
        if print_func:
            print_func(
@ -31,9 +29,7 @@ def summary(layer: nn.Layer, print_func=print):
        num_elements += np.prod(param.shape)
        num_params += 1
    if print_func:
-        print_func(
+        print_func(f"Total parameters: {num_params}, {num_elements} elements.")
            f"{layer.__class__.__name__} has {num_params} parameters, {num_elements} elements."
        )
 def gradient_norm(layer: nn.Layer):
@ -45,25 +41,6 @@ def gradient_norm(layer: nn.Layer):
    return grad_norm_dict
 def recursively_remove_weight_norm(layer: nn.Layer):
    for layer in layer.sublayers():
        try:
            nn.utils.remove_weight_norm(layer)
        except ValueError as e:
            # ther is not weight norm hoom in this layer
            pass
 def freeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = False
 def unfreeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = True
 def print_grads(model, print_func=print):
    if print_func is None:
        return
@ -75,12 +52,32 @@ def print_grads(model, print_func=print):
 def print_params(model, print_func=print):
    if print_func is None:
        return
    total = 0.0
    num_params = 0.0
    for n, p in model.named_parameters():
-        msg = f"param: {n}: shape: {p.shape} stop_grad: {p.stop_gradient}"
+        msg = f"{n} | {p.shape} | {np.prod(p.shape)} | {not p.stop_gradient}"
        total += np.prod(p.shape)
        num_params += 1
        if print_func:
            print_func(msg)
    if print_func:
-        print_func(f"Total parameters: {total}!")
+        print_func(f"Total parameters: {num_params}, {total} elements.")
 def recursively_remove_weight_norm(layer: nn.Layer):
    for layer in layer.sublayers():
        try:
            nn.utils.remove_weight_norm(layer)
        except ValueError as e:
            # ther is not weight norm hoom in this layer
            pass
 def freeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = False
 def unfreeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = True