You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

432 lines
24 KiB

import pandas as pd
import numpy as np
import gc
import tensorflow as tf
import process
import dcn_model
import sys
import random
import os
from sklearn.preprocessing import StandardScaler
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(42)
os.environ["TF_CPP_MIN_LOG_LEVEL"]='3'
RANDOM_SEED = 42
# types of columns of the data_set DataFrame
CATEGORICAL_COLS = [
'weather_le', 'hightemp', 'lowtemp', 'dayofweek',
'slice_id', 'link_current_status_4'
]
NUMERIC_COLS = [
'distance', 'simple_eta', 'link_time_sum', 'link_count',
'cr_t_sum', 'link_current_status_4_percent', 'link_current_status_mean',
'pr_mean', 'dc_mean','lk_arrival_0_percent', 'lk_arrival_1_percent',
'lk_arrival_2_percent', 'lk_arrival_3_percent', 'lk_arrival_4_percent'
]
WIDE_COLS = [
'weather_le', 'hightemp', 'lowtemp', 'dayofweek'
]
IGNORE_COLS = [
'order_id', 'ata'
]
TRAINING = True
VAL_TO_TEST = False
def set_seed(seed=42):
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
if __name__ == '__main__':
set_seed(RANDOM_SEED)
print(dcn_model.get_available_gpus()) # 返回格式为:['/device:GPU:0', '/device:GPU:1']
# LOAD DATA
print('*-' * 40, 'LOAD DATA')
making_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/max_order_xt/'
link_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/max_170_link_sqe_for_order/'
cross_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/for_0714_cross_sqe_for_order/'
link_data_other_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/for_0714_link_sqe_for_order_other/'
head_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/max_head_link_data_clear/'
win_order_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/win_order_xw/'
#pre_arrival_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/final_pre_arrival_data/'
arrival_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/max_link_sqe_for_order_arrival/'
zsl_arrival_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/zsl_arrival/'
arrival_sqe_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/max_170_lk_arrival_sqe_for_order/'
#h_s_for_link_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/max_hightmp_slice_for_link_eb/'
pre_arrival_sqe_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/sqe_arrival_for_link/'
zsl_link_data_dir = '/home/didi2021/didi2021/giscup_2021/final_train_data_0703/zsl_train_link/'
data, mk_cols_list, link_cols_list, cross_cols_list = process.load_data(making_data_dir,
link_data_dir,
cross_data_dir,
link_data_other_dir,
head_data_dir,
win_order_data_dir,
pre_arrival_sqe_dir,
zsl_link_data_dir,
#pre_arrival_data_dir,
#h_s_for_link_dir,
arrival_data_dir,
zsl_arrival_data_dir,
arrival_sqe_data_dir)
#fd = dcn_model.FeatureDictionary(data, numeric_cols=NUMERIC_COLS, ignore_cols=IGNORE_COLS,
# cate_cols=CATEGORICAL_COLS)
# PROCESSING DATA
data['date_time'] = data['date_time'].astype(int)
print("type(data['date_time']):", data['date_time'].dtype)
data = data[data['date_time'] != 20200901]
print('Here train_test_split..................')
# all_train_data, _ = train_test_split(all_train_data, test_size=0.9, random_state=42)
data = data.reset_index()
del data['index']
print('*-' * 40, 'The data.shape:', data.shape)
train_data, val_data = train_test_split(data, test_size=0.15, random_state=RANDOM_SEED)
train_data = train_data.reset_index()
val_data = val_data.reset_index()
del train_data['index']
del val_data['index']
print('Save End.................')
fb_list = CATEGORICAL_COLS+NUMERIC_COLS+IGNORE_COLS
data_bak = data[fb_list]
del data
data = data_bak.copy()
del data_bak
gc.collect()
print('*-' * 40, 'PROCESSING DATA FOR TRAIN')
train_data = process.processing_data(train_data, link_cols_list, cross_cols_list, mk_cols_list, WIDE_COLS)
#del data
#fb_list = CATEGORICAL_COLS+NUMERIC_COLS+IGNORE_COLS
#data = data[fb_list]
#gc.collect()
# print(train_data.columns.tolist())
# PROCESSING INPUTS
print('*-' * 40, 'PROCESSING INPUTS')
# SAVE LIST
a = np.array(mk_cols_list)
np.save('../model_h5/mk_cols_list_0720_2.npy', a)
a = np.array(link_cols_list)
np.save('../model_h5/link_cols_list_0720_2.npy', a)
a = np.array(cross_cols_list)
np.save('../model_h5/cross_cols_list_0720_2.npy', cross_cols_list)
a = np.array(CATEGORICAL_COLS)
np.save('../model_h5/CATEGORICAL_COLS_0720_2.npy', a)
del a
pred_cols = ['ata']
print('*-' * 40, 'PROCESSING INPUTS FOR TRAIN_DATA', train_data.shape)
train_link_inputs, train_cross_inputs, train_deep_input, train_wide_input, \
train_inputs_slice, train_labels, train_arrival = process.processing_inputs(
train_data, mk_cols_list, link_cols_list, cross_cols_list, WIDE_COLS)
X_train = dcn_model.preprocess(train_data, CATEGORICAL_COLS, NUMERIC_COLS)
train_pre = train_data[['order_id']]
del train_data
gc.collect()
print('*-' * 40, 'PROCESSING DATA FOR TRAIN')
val_data = process.processing_data(val_data, link_cols_list, cross_cols_list, mk_cols_list, WIDE_COLS, is_test=True)
print('*-' * 40, 'PROCESSING INPUTS FOR VAL_DATA', val_data.shape)
val_link_inputs, val_cross_inputs, val_deep_input, val_wide_input, \
val_inputs_slice, val_labels, val_arrival = process.processing_inputs(
val_data, mk_cols_list, link_cols_list, cross_cols_list, WIDE_COLS)
X_val = dcn_model.preprocess(val_data, CATEGORICAL_COLS, NUMERIC_COLS)
# val_data.to_csv('../model_h5/val_data.csv', index=0) # saving csv for test running
val_pre = val_data[['order_id']]
del val_data
gc.collect()
# MODEL_INIT
print('*-' * 40, 'T_MODEL_INIT')
deep_col_len, wide_col_len = train_deep_input.values.shape[1], train_wide_input.shape[1]
link_size = 639877 + 2
cross_size = 44313 + 2
link_nf_size, cross_nf_size = train_link_inputs.shape[2], train_cross_inputs.shape[2]
slice_size = 288
# link_seqlen, cross_seqlen = 170, 12 # 已默认
print("link_size:{},link_nf_size:{},cross_size:{},cross_nf_size:{},slice_size:{}".format(link_size, link_nf_size,
cross_size, cross_nf_size,
slice_size))
print("deep_col_len:{}, wide_col_len:{}".format(deep_col_len, wide_col_len))
fd = dcn_model.FeatureDictionary(data, numeric_cols=NUMERIC_COLS, ignore_cols=IGNORE_COLS,
cate_cols=CATEGORICAL_COLS)
inp_layer, inp_embed = dcn_model.embedding_layers(fd)
autoencoder, encoder = dcn_model.create_autoencoder(train_deep_input.values.shape[-1], 1, noise=0.1)
if TRAINING:
autoencoder.fit(train_deep_input.values, (train_deep_input.values, train_labels.values),
epochs=1000, # 1000
batch_size=2048, # 1024
validation_split=0.1,
callbacks=[tf.keras.callbacks.EarlyStopping('val_ata_output_loss', patience=10, restore_best_weights=True)])
encoder.save_weights('../model_h5/t_encoder.hdf5')
else:
encoder.load_weights('../model_h5/t_encoder.hdf5')
encoder.trainable = False
del autoencoder
t_model = dcn_model.DCN_model(inp_layer, inp_embed, link_size, cross_size, slice_size, deep_col_len, wide_col_len,
link_nf_size, cross_nf_size, encoder, conv=True, have_knowledge=False)
#del encoder
gc.collect()
mc, es, lr = dcn_model.get_mc_es_lr('0720_2', patience=5, min_delta=1e-4)
print('*-' * 40, 'MODEL_INIT END')
print('*-' * 40, 'ARRIVAL_MODEL_FIT')
t_history = t_model.fit(
[
X_train['weather_le'], X_train['hightemp'], X_train['lowtemp'], X_train['dayofweek'],
X_train['slice_id'], X_train['link_current_status_4'],
X_train['distance'], X_train['simple_eta'], X_train['link_time_sum'], X_train['link_count'],
X_train['cr_t_sum'], X_train['link_current_status_4_percent'], X_train['link_current_status_mean'],
X_train['pr_mean'], X_train['dc_mean'],
X_train['lk_arrival_0_percent'], X_train['lk_arrival_1_percent'],X_train['lk_arrival_2_percent'],
X_train['lk_arrival_3_percent'],X_train['lk_arrival_4_percent'],
train_link_inputs, train_cross_inputs, train_deep_input.values, train_wide_input, train_inputs_slice],
train_labels.values,
validation_data=(
[
X_val['weather_le'], X_val['hightemp'], X_val['lowtemp'], X_val['dayofweek'],
X_val['slice_id'], X_val['link_current_status_4'],
X_val['distance'], X_val['simple_eta'], X_val['link_time_sum'], X_val['link_count'],
X_val['cr_t_sum'], X_val['link_current_status_4_percent'], X_val['link_current_status_mean'],
X_val['pr_mean'], X_val['dc_mean'],
X_val['lk_arrival_0_percent'], X_val['lk_arrival_1_percent'],X_val['lk_arrival_2_percent'],
X_val['lk_arrival_3_percent'],X_val['lk_arrival_4_percent'],
val_link_inputs, val_cross_inputs, val_deep_input.values, val_wide_input, val_inputs_slice],
(val_labels.values),),
batch_size=2048, # 2048,1024
epochs=100, # 100
verbose=1,
# )
callbacks=[es]) # lr
np.save('../model_h5/t_model_0720_2.npy', t_history.history)
t_model.save_weights("../model_h5/t_model_0720_2.h5")
print('*-' * 40, 't_MODEL_PREDICT')
y_knowledge_train = t_model.predict(
[X_train['weather_le'], X_train['hightemp'], X_train['lowtemp'], X_train['dayofweek'],
X_train['slice_id'], X_train['link_current_status_4'],
X_train['distance'], X_train['simple_eta'], X_train['link_time_sum'], X_train['link_count'],
X_train['cr_t_sum'], X_train['link_current_status_4_percent'], X_train['link_current_status_mean'],
X_train['pr_mean'], X_train['dc_mean'],
X_train['lk_arrival_0_percent'], X_train['lk_arrival_1_percent'],X_train['lk_arrival_2_percent'],
X_train['lk_arrival_3_percent'],X_train['lk_arrival_4_percent'],
train_link_inputs, train_cross_inputs, train_deep_input.values, train_wide_input, train_inputs_slice],
batch_size=2048)
y_knowledge_val = t_model.predict(
[
X_val['weather_le'], X_val['hightemp'], X_val['lowtemp'], X_val['dayofweek'],
X_val['slice_id'], X_val['link_current_status_4'],
X_val['distance'], X_val['simple_eta'], X_val['link_time_sum'], X_val['link_count'],
X_val['cr_t_sum'], X_val['link_current_status_4_percent'], X_val['link_current_status_mean'],
X_val['pr_mean'], X_val['dc_mean'],
X_val['lk_arrival_0_percent'], X_val['lk_arrival_1_percent'],X_val['lk_arrival_2_percent'],
X_val['lk_arrival_3_percent'],X_val['lk_arrival_4_percent'],
val_link_inputs, val_cross_inputs, val_deep_input.values, val_wide_input, val_inputs_slice],
batch_size=2048)
print('*-'*40, 'TRAINFORME')
train_labels = pd.DataFrame(train_labels)
train_labels['y_knowledge_train'] = np.squeeze(y_knowledge_train)
print(np.squeeze(y_knowledge_train)[:2])
print(train_labels['y_knowledge_train'].head(2))
val_labels = pd.DataFrame(val_labels)
val_labels['y_knowledge_val'] = np.squeeze(y_knowledge_val)
print('*-' * 40, 't_MODEL_END')
zsl_arrival_cols = ['zsl_link_arrival_status_mean','zsl_link_arrival_status_nunique','zsl_link_arrival_status0','zsl_link_arrival_status1','zsl_link_arrival_status2','zsl_link_arrival_status3']
train_deep_input = train_deep_input.drop(['lk_arrival_0_percent','lk_arrival_1_percent','lk_arrival_2_percent','lk_arrival_3_percent','lk_arrival_4_percent'],axis=1)
train_deep_input = train_deep_input.drop(zsl_arrival_cols, axis=1)
val_deep_input = val_deep_input.drop(['lk_arrival_0_percent','lk_arrival_1_percent','lk_arrival_2_percent','lk_arrival_3_percent','lk_arrival_4_percent'],axis=1)
val_deep_input = val_deep_input.drop(zsl_arrival_cols, axis=1)
if 'ata' in train_deep_input.columns.tolist():
print('The ata in the train_deep_input')
print('*-' * 40, 'EXIT')
sys.exit(0)
if 'lk_arrival_0_percent' in train_deep_input.columns.tolist():
print('The lk_arrival_0_percent in the train_deep_input')
print('*-' * 40, 'EXIT')
sys.exit(0)
if 'lk_arrival_0_percent' in val_deep_input.columns.tolist():
print('The lk_arrival_0_percent in the val_deep_input')
print('*-' * 40, 'EXIT')
sys.exit(0)
if 'zsl_link_arrival_status_mean' in train_deep_input.columns.tolist():
print('The zsl_link_arrival_status_mean in the train_deep_input')
print('*-' * 40, 'EXIT')
sys.exit(0)
mk_cols_list = train_deep_input.columns.tolist()
print('*-' * 40, 'MODEL_FIT')
deep_col_len, wide_col_len = train_deep_input.values.shape[1], train_wide_input.shape[1]
print("deep_col_len:{}, wide_col_len:{}".format(deep_col_len, wide_col_len))
NUMERIC_COLS = list(set(NUMERIC_COLS)-set(['lk_arrival_0_percent','lk_arrival_1_percent','lk_arrival_2_percent',
'lk_arrival_3_percent','lk_arrival_4_percent']))
fb_list = CATEGORICAL_COLS+NUMERIC_COLS+IGNORE_COLS
if 'lk_arrival_0_percent' in fb_list:
print('The lk_arrival_0_percent in the fb_list')
print('*-' * 40, 'EXIT')
sys.exit(0)
data = data[fb_list]
fd = dcn_model.FeatureDictionary(data, numeric_cols=NUMERIC_COLS, ignore_cols=IGNORE_COLS,
cate_cols=CATEGORICAL_COLS)
inp_layer, inp_embed = dcn_model.embedding_layers(fd)
autoencoder, encoder = dcn_model.create_autoencoder(train_deep_input.values.shape[-1], 1, noise=0.1)
if TRAINING:
autoencoder.fit(train_deep_input.values, (train_deep_input.values, train_labels['ata'].values),
epochs=1000, # 1000
batch_size=2048, # 1024
validation_split=0.1,
callbacks=[tf.keras.callbacks.EarlyStopping('val_ata_output_loss', patience=10, restore_best_weights=True)])
encoder.save_weights('../model_h5/main_encoder.hdf5')
else:
encoder.load_weights('../model_h5/main_encoder.hdf5')
encoder.trainable = False
del autoencoder
#print(type(train_labels['y_knowledge_train']))
#print(type(train_labels))
#y_train = np.vstack((train_labels, train_pre['y_knowledge_train'])).T
#y_valid = np.vstack((val_labels, val_pre['y_knowledge_val'])).T
#print(train_labels.shape)
print(train_labels.head(1))
print(train_labels.values[0])
print('*-'*40, 'The shape of train_link_inputs before', train_link_inputs.shape)
train_link_inputs = np.concatenate((train_link_inputs[:, :, :5], train_link_inputs[:, :, 6:]), axis=2)
print('*-'*40, 'The shape of train_link_inputs after', train_link_inputs.shape)
val_link_inputs = np.concatenate((val_link_inputs[:, :, :5], val_link_inputs[:, :, 6:]), axis=2)
link_nf_size, cross_nf_size = train_link_inputs.shape[2], train_cross_inputs.shape[2]
mc, es, lr = dcn_model.get_mc_es_lr_for_student('0720_2', patience=5, min_delta=1e-4)
model = dcn_model.DCN_model(inp_layer, inp_embed, link_size, cross_size, slice_size, deep_col_len, wide_col_len,
link_nf_size, cross_nf_size, encoder, conv=True)
history = model.fit(
[
X_train['weather_le'], X_train['hightemp'], X_train['lowtemp'], X_train['dayofweek'],
X_train['slice_id'], X_train['link_current_status_4'],
X_train['distance'], X_train['simple_eta'], X_train['link_time_sum'], X_train['link_count'],
X_train['cr_t_sum'], X_train['link_current_status_4_percent'], X_train['link_current_status_mean'],
X_train['pr_mean'], X_train['dc_mean'],
train_link_inputs, train_cross_inputs, train_deep_input.values, train_wide_input, train_inputs_slice],
train_labels.values,
validation_data=(
[
X_val['weather_le'], X_val['hightemp'], X_val['lowtemp'], X_val['dayofweek'],
X_val['slice_id'], X_val['link_current_status_4'],
X_val['distance'], X_val['simple_eta'], X_val['link_time_sum'], X_val['link_count'],
X_val['cr_t_sum'], X_val['link_current_status_4_percent'], X_val['link_current_status_mean'],
X_val['pr_mean'], X_val['dc_mean'],
val_link_inputs, val_cross_inputs, val_deep_input.values, val_wide_input, val_inputs_slice],
(val_labels.values),),
batch_size=2048, # 2048,1024
epochs=100, # 100
verbose=1,
# )
callbacks=[es]) # lr
np.save('../model_h5/history_0720_2.npy', history.history)
model.save_weights("../model_h5/dcn_model_0720_2.h5")
# MODEL_RPEDICT
if VAL_TO_TEST:
print('*-'*40,'val_to_test')
val_pre = val_pre.rename(columns={'order_id': 'id'})
print(val_link_inputs.shape, val_cross_inputs.shape, X_val.shape)
print('*-' * 40, 'MODEL_RPEDICT')
val_pred = model.predict(
[
X_val['weather_le'], X_val['hightemp'], X_val['lowtemp'], X_val['dayofweek'],
X_val['slice_id'], X_val['link_current_status_4'],
X_val['distance'], X_val['simple_eta'], X_val['link_time_sum'], X_val['link_count'],
X_val['cr_t_sum'], X_val['link_current_status_4_percent'], X_val['link_current_status_mean'],
X_val['pr_mean'], X_val['dc_mean'],
val_link_inputs, val_cross_inputs, val_deep_input.values, val_wide_input, val_inputs_slice],
batch_size=2048)
val_pre['val_predict'] = np.squeeze(val_pred[:, 1])
val_pre['other_predict'] = np.squeeze(val_pred[:, 0])
# val_pre['val_predict'] = val_pre['val_predict'].round(0)
val_pre = val_pre.rename(columns={'val_predict': 'result'}) # 更改列名
val_pre = val_pre[['id', 'result', 'other_predict']]
val_pre['ata'] = val_labels['ata'].values
print(val_pre.head())
result_save_path = '../result_csv/val_0720_2.csv'
print('*-' * 40, 'CSV_SAVE_PATH:', result_save_path)
print('..........Finish')
del X_train, train_link_inputs, train_cross_inputs, train_deep_input, \
train_wide_input, train_inputs_slice, train_labels
del X_val, val_link_inputs, val_cross_inputs, val_deep_input, val_wide_input, val_inputs_slice, val_labels
gc.collect()
#print('*-' * 40, 'EXIT')
#sys.exit(0)
print('*-' * 40, 'LOAD TEST DATA')
making_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/order_xt/'
link_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/max_170_link_sqe_for_order/'
cross_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/cross_sqe_for_order/'
link_test_data_other_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/link_sqe_for_order_other/'
head_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/head_link_data_clear/'
win_order_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/win_order_xw/'
pre_arrival_sqe_test_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/sqe_arrival_for_link/'
#h_s_for_test_link_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/max_hightmp_slice_for_link_eb/'
#pre_arrival_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/final_pre_arrival_data/'
zsl_link_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/zsl_test_link/'
#zsl_cross_test_data_dir = '/home/didi2021/didi2021/giscup_2021/final_test_data_0703/zsl_test_cross_0703/'
test_data, _, _, _ = process.load_data(making_test_data_dir,
link_test_data_dir,
cross_test_data_dir,
link_test_data_other_dir,
head_test_data_dir,
win_order_test_data_dir,
pre_arrival_sqe_test_dir,
zsl_link_test_data_dir) #,
#h_s_for_test_link_dir)
#pre_arrival_test_data_dir)
print('*-' * 40, 'PROCESSING DATA')
link_cols_list.remove('link_arrival_status')
test_data = process.processing_data(test_data, link_cols_list, cross_cols_list, mk_cols_list, WIDE_COLS, is_test=True)
gc.collect()
print('*-' * 40, 'PROCESSING INPUTS FOR TEST_DATA', test_data.shape)
test_link_inputs, test_cross_inputs, test_deep_input, test_wide_input, \
test_inputs_slice, _ = process.processing_inputs(
test_data, mk_cols_list, link_cols_list, cross_cols_list, WIDE_COLS, arrival=False)
X_test = dcn_model.preprocess(test_data, CATEGORICAL_COLS, NUMERIC_COLS)
test_pre = test_data[['order_id']]
test_arrival_pre = test_data[['order_id']]
gc.collect()
test_pre = test_pre.rename(columns={'order_id': 'id'})
print(test_link_inputs.shape, test_cross_inputs.shape, X_test.shape, test_deep_input.shape)
print('*-' * 40, 'MODEL_RPEDICT')
test_pred = model.predict(
[
X_test['weather_le'], X_test['hightemp'], X_test['lowtemp'], X_test['dayofweek'],
X_test['slice_id'], X_test['link_current_status_4'],
X_test['distance'], X_test['simple_eta'], X_test['link_time_sum'], X_test['link_count'],
X_test['cr_t_sum'], X_test['link_current_status_4_percent'], X_test['link_current_status_mean'],
X_test['pr_mean'], X_test['dc_mean'],
test_link_inputs, test_cross_inputs, test_deep_input.values, test_wide_input, test_inputs_slice],
batch_size=2048)
test_pre['test_predict'] = np.squeeze(test_pred[:, 1])
test_pre['other_predict'] = np.squeeze(test_pred[:, 0])
# test_pre['test_predict'] = test_pre['test_predict'].round(0)
test_pre = test_pre.rename(columns={'test_predict': 'result'}) # 更改列名
test_pre = test_pre[['id', 'result','other_predict']]
print(test_pre.head())
result_save_path = '../result_csv/submit_0720_2.csv'
print('*-' * 40, 'CSV_SAVE_PATH:', result_save_path)
test_pre.to_csv(result_save_path, index=0) # 保存
print('..........Finish')