Merge pull request #1061 from LittleChenCc/develop

[Bug Fix] fix bugs in the data reader
pull/1064/head
Jackwaterveg 3 years ago committed by GitHub
commit cfed8d0182
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -42,7 +42,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
# generate manifests # generate manifests
python3 ${TARGET_DIR}/ted_en_zh/ted_en_zh.py \ python3 ${TARGET_DIR}/ted_en_zh/ted_en_zh.py \
--manifest_prefix="data/manifest" \ --manifest_prefix="data/manifest" \
--src_dir="${data_dir}" --src-dir="${data_dir}"
echo "Complete raw data pre-process." echo "Complete raw data pre-process."
fi fi

@ -102,9 +102,9 @@ def read_manifest(
with jsonlines.open(manifest_path, 'r') as reader: with jsonlines.open(manifest_path, 'r') as reader:
for json_data in reader: for json_data in reader:
feat_len = json_data["input"][0]["shape"][ feat_len = json_data["input"][0]["shape"][
0] if 'shape' in json_data["input"][0] else 1.0 0] if "input" in json_data and "shape" in json_data["input"][0] else 1.0
token_len = json_data["output"][0]["shape"][ token_len = json_data["output"][0]["shape"][
0] if 'shape' in json_data["output"][0] else 1.0 0] if "output" in json_data and "shape" in json_data["output"][0] else 1.0
conditions = [ conditions = [
feat_len >= min_input_len, feat_len >= min_input_len,
feat_len <= max_input_len, feat_len <= max_input_len,

Loading…
Cancel
Save