|
|
|
@ -19,7 +19,7 @@
|
|
|
|
|
#### 读取处理自己的数据集
|
|
|
|
|
|
|
|
|
|
~~~python
|
|
|
|
|
class DataProcessor(object):
|
|
|
|
|
class MyDataProcessor(object):
|
|
|
|
|
"""Base class for data converters for sequence classification data sets."""
|
|
|
|
|
|
|
|
|
|
def get_train_examples(self, data_dir):
|
|
|
|
@ -39,3 +39,20 @@ class DataProcessor(object):
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
~~~
|
|
|
|
|
|
|
|
|
|
> 这是完全照搬class DataProcessor的类,只是类名改成MyDataProcessor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参照
|
|
|
|
|
|
|
|
|
|
~~~python
|
|
|
|
|
guid = "train-%d" % (i) # 获取样本ID
|
|
|
|
|
text_a = tokenization.convert_to_unicode(line[0])
|
|
|
|
|
text_b = tokenization.convert_to_unicode(line[1]) # 获取text_a和b,我们只有a所以把b去掉
|
|
|
|
|
label = tokenization.convert_to_unicode(line[2]) # 获取标签
|
|
|
|
|
if label == tokenization.convert_to_unicode("contradictory"):
|
|
|
|
|
label = tokenization.convert_to_unicode("contradiction")
|
|
|
|
|
examples.append(
|
|
|
|
|
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) # 把读进来的东西传到InputExample,这个类可以点进去,里面什么都没做,只不过是模板,我们也照着做
|
|
|
|
|
~~~
|
|
|
|
|
|
|
|
|
|