using jsonlines to read manifest and dump decode result

pull/852/head
Hui Zhang 4 years ago
parent 33b8790252
commit 83a5e0f8d6

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Contains data helper functions.""" """Contains data helper functions."""
import codecs
import json import json
import math import math
from typing import List from typing import List
@ -92,26 +91,22 @@ def read_manifest(
""" """
manifest = [] manifest = []
for json_line in codecs.open(manifest_path, 'r', 'utf-8'): with jsonlines.open(manifest_path, 'r') as reader:
try: for json_data in reader:
json_data = json.loads(json_line) feat_len = json_data["feat_shape"][
except Exception as e: 0] if 'feat_shape' in json_data else 1.0
raise IOError("Error reading manifest: %s" % str(e)) token_len = json_data["token_shape"][
0] if 'token_shape' in json_data else 1.0
feat_len = json_data["feat_shape"][ conditions = [
0] if 'feat_shape' in json_data else 1.0 feat_len >= min_input_len,
token_len = json_data["token_shape"][ feat_len <= max_input_len,
0] if 'token_shape' in json_data else 1.0 token_len >= min_output_len,
conditions = [ token_len <= max_output_len,
feat_len >= min_input_len, token_len / feat_len >= min_output_input_ratio,
feat_len <= max_input_len, token_len / feat_len <= max_output_input_ratio,
token_len >= min_output_len, ]
token_len <= max_output_len, if all(conditions):
token_len / feat_len >= min_output_input_ratio, manifest.append(json_data)
token_len / feat_len <= max_output_input_ratio,
]
if all(conditions):
manifest.append(json_data)
return manifest return manifest

Loading…
Cancel
Save