PaddleSpeech/paddlespeech/text/exps/ernie_linear/test.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import numpy as np
import paddle
import pandas as pd
import yaml
from paddle import nn
from paddle.io import DataLoader
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from yacs.config import CfgNode

from paddlespeech.text.models.ernie_linear import ErnieLinear
from paddlespeech.text.models.ernie_linear import PuncDataset
from paddlespeech.text.models.ernie_linear import PuncDatasetFromErnieTokenizer

DefinedClassifier = {
    'ErnieLinear': ErnieLinear,
}

DefinedLoss = {
    "ce": nn.CrossEntropyLoss,
}

DefinedDataset = {
    'Punc': PuncDataset,
    'Ernie': PuncDatasetFromErnieTokenizer,
}


def evaluation(y_pred, y_test):
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_test, y_pred, average=None, labels=[1, 2, 3])
    overall = precision_recall_fscore_support(
        y_test, y_pred, average='macro', labels=[1, 2, 3])
    result = pd.DataFrame(
        np.array([precision, recall, f1]),
        columns=list(['O', 'COMMA', 'PERIOD', 'QUESTION'])[1:],
        index=['Precision', 'Recall', 'F1'])
    result['OVERALL'] = overall[:3]
    return result


def test(args):
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))
    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    test_dataset = DefinedDataset[config["dataset_type"]](
        train_path=config["test_path"], **config["data_params"])
    test_loader = DataLoader(
        test_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    model = DefinedClassifier[config["model_type"]](**config["model"])
    state_dict = paddle.load(args.checkpoint)
    model.set_state_dict(state_dict["main_params"])
    model.eval()

    punc_list = []
    for i in range(len(test_loader.dataset.id2punc)):
        punc_list.append(test_loader.dataset.id2punc[i])

    test_total_label = []
    test_total_predict = []

    for i, batch in enumerate(test_loader):
        input, label = batch
        label = paddle.reshape(label, shape=[-1])
        y, logit = model(input)
        pred = paddle.argmax(logit, axis=1)
        test_total_label.extend(label.numpy().tolist())
        test_total_predict.extend(pred.numpy().tolist())
    t = classification_report(
        test_total_label, test_total_predict, target_names=punc_list)
    print(t)
    t2 = evaluation(test_total_label, test_total_predict)
    print('=========================================================')
    print(t2)


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Test a ErnieLinear model.")
    parser.add_argument("--config", type=str, help="ErnieLinear config file.")
    parser.add_argument("--checkpoint", type=str, help="snapshot to load.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    test(args)


if __name__ == "__main__":
    main()
Merge punctuation restoration task into PaddleSpeech. 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
refactor punc 3 years ago			`import argparse`

			`import numpy as np`
			`import paddle`
			`import pandas as pd`
Merge punctuation restoration task into PaddleSpeech. 3 years ago			`import yaml`
refactor punc 3 years ago			`from paddle import nn`
			`from paddle.io import DataLoader`
			`from sklearn.metrics import classification_report`
			`from sklearn.metrics import precision_recall_fscore_support`
			`from yacs.config import CfgNode`
[CLI][Demo][Text]Refactor punctuation_restoration. (#1013) * Refactor punctuation_restoration. * Add text cli and punc demo. 3 years ago
refactor punc 3 years ago			`from paddlespeech.text.models.ernie_linear import ErnieLinear`
			`from paddlespeech.text.models.ernie_linear import PuncDataset`
			`from paddlespeech.text.models.ernie_linear import PuncDatasetFromErnieTokenizer`
Merge punctuation restoration task into PaddleSpeech. 3 years ago
refactor punc 3 years ago			`DefinedClassifier = {`
			`'ErnieLinear': ErnieLinear,`
			`}`
Merge punctuation restoration task into PaddleSpeech. 3 years ago
refactor punc 3 years ago			`DefinedLoss = {`
			`"ce": nn.CrossEntropyLoss,`
			`}`
Merge punctuation restoration task into PaddleSpeech. 3 years ago
refactor punc 3 years ago			`DefinedDataset = {`
			`'Punc': PuncDataset,`
			`'Ernie': PuncDatasetFromErnieTokenizer,`
			`}`
Merge punctuation restoration task into PaddleSpeech. 3 years ago

refactor punc 3 years ago			`def evaluation(y_pred, y_test):`
			`precision, recall, f1, _ = precision_recall_fscore_support(`
			`y_test, y_pred, average=None, labels=[1, 2, 3])`
			`overall = precision_recall_fscore_support(`
			`y_test, y_pred, average='macro', labels=[1, 2, 3])`
			`result = pd.DataFrame(`
			`np.array([precision, recall, f1]),`
			`columns=list(['O', 'COMMA', 'PERIOD', 'QUESTION'])[1:],`
			`index=['Precision', 'Recall', 'F1'])`
			`result['OVERALL'] = overall[:3]`
			`return result`


			`def test(args):`
			`with open(args.config) as f:`
			`config = CfgNode(yaml.safe_load(f))`
			`print("========Args========")`
			`print(yaml.safe_dump(vars(args)))`
			`print("========Config========")`
			`print(config)`

			`test_dataset = DefinedDataset[config["dataset_type"]](`
			`train_path=config["test_path"], **config["data_params"])`
			`test_loader = DataLoader(`
			`test_dataset,`
			`batch_size=config.batch_size,`
			`shuffle=False,`
			`drop_last=False)`
			`model = DefinedClassifier[config["model_type"]](**config["model"])`
			`state_dict = paddle.load(args.checkpoint)`
			`model.set_state_dict(state_dict["main_params"])`
			`model.eval()`

			`punc_list = []`
			`for i in range(len(test_loader.dataset.id2punc)):`
			`punc_list.append(test_loader.dataset.id2punc[i])`

			`test_total_label = []`
			`test_total_predict = []`

			`for i, batch in enumerate(test_loader):`
			`input, label = batch`
			`label = paddle.reshape(label, shape=[-1])`
			`y, logit = model(input)`
			`pred = paddle.argmax(logit, axis=1)`
			`test_total_label.extend(label.numpy().tolist())`
			`test_total_predict.extend(pred.numpy().tolist())`
			`t = classification_report(`
			`test_total_label, test_total_predict, target_names=punc_list)`
			`print(t)`
			`t2 = evaluation(test_total_label, test_total_predict)`
			`print('=========================================================')`
			`print(t2)`


			`def main():`
			`# parse args and config and redirect to train_sp`
			`parser = argparse.ArgumentParser(description="Test a ErnieLinear model.")`
			`parser.add_argument("--config", type=str, help="ErnieLinear config file.")`
			`parser.add_argument("--checkpoint", type=str, help="snapshot to load.")`
			`parser.add_argument(`
			`"--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")`
Merge punctuation restoration task into PaddleSpeech. 3 years ago
			`args = parser.parse_args()`

refactor punc 3 years ago			`if args.ngpu == 0:`
			`paddle.set_device("cpu")`
			`elif args.ngpu > 0:`
			`paddle.set_device("gpu")`
			`else:`
			`print("ngpu should >= 0 !")`

			`test(args)`
Merge punctuation restoration task into PaddleSpeech. 3 years ago

refactor punc 3 years ago			`if __name__ == "__main__":`
			`main()`