|
|
|
@ -12,28 +12,30 @@
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
Make VoxCeleb1 trial of kaldi format
|
|
|
|
|
this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt
|
|
|
|
|
to kaldi trial format
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import codecs
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
|
|
|
parser.add_argument("--voxceleb_trial",
|
|
|
|
|
default="voxceleb1_test_v2",
|
|
|
|
|
type=str,
|
|
|
|
|
help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt")
|
|
|
|
|
parser.add_argument("--trial",
|
|
|
|
|
default="data/test/trial",
|
|
|
|
|
type=str,
|
|
|
|
|
help="Kaldi format trial file")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--voxceleb_trial",
|
|
|
|
|
default="voxceleb1_test_v2",
|
|
|
|
|
type=str,
|
|
|
|
|
help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--trial",
|
|
|
|
|
default="data/test/trial",
|
|
|
|
|
type=str,
|
|
|
|
|
help="Kaldi format trial file")
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(voxceleb_trial, trial):
|
|
|
|
|
"""
|
|
|
|
|
VoxCeleb provide several trial file, which format is different with kaldi format.
|
|
|
|
@ -58,7 +60,9 @@ def main(voxceleb_trial, trial):
|
|
|
|
|
"""
|
|
|
|
|
print("Start convert the voxceleb trial to kaldi format")
|
|
|
|
|
if not os.path.exists(voxceleb_trial):
|
|
|
|
|
raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial))
|
|
|
|
|
raise RuntimeError(
|
|
|
|
|
"{} does not exist. Pleas input the correct file path".format(
|
|
|
|
|
voxceleb_trial))
|
|
|
|
|
|
|
|
|
|
trial_dirname = os.path.dirname(trial)
|
|
|
|
|
if not os.path.exists(trial_dirname):
|
|
|
|
@ -66,9 +70,9 @@ def main(voxceleb_trial, trial):
|
|
|
|
|
|
|
|
|
|
with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \
|
|
|
|
|
codecs.open(trial, 'w', encoding='utf-8') as w:
|
|
|
|
|
for line in f:
|
|
|
|
|
for line in f:
|
|
|
|
|
target_or_nontarget, path1, path2 = line.strip().split()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utt_id1 = "-".join(path1.split("/"))
|
|
|
|
|
utt_id2 = "-".join(path2.split("/"))
|
|
|
|
|
target = "nontarget"
|
|
|
|
@ -77,5 +81,6 @@ def main(voxceleb_trial, trial):
|
|
|
|
|
w.write("{} {} {}\n".format(utt_id1, utt_id2, target))
|
|
|
|
|
print("Convert the voxceleb trial to kaldi format successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main(args.voxceleb_trial, args.trial)
|
|
|
|
|