分片任务失败重试优化,仅重试当前失败的分片;

v1.9.2
xuxueli 6 years ago
parent 4d8f92e8ef
commit 54fce811eb

@ -1282,7 +1282,8 @@ Tips: 历史版本(V1.3.x)目前已经Release至稳定版本, 进入维护阶段
- 32、底层RPC序列化协议调整为hessian2 - 32、底层RPC序列化协议调整为hessian2
- 33、修复表字段 “t.order”与数据库关键字冲突查询失败的问题 - 33、修复表字段 “t.order”与数据库关键字冲突查询失败的问题
- 34、调度中心提供API服务支持通过API服务对任务进行查询、新增、更新、启停等操作 - 34、调度中心提供API服务支持通过API服务对任务进行查询、新增、更新、启停等操作
- 35、【迭代中】分片任务失败重试优化仅重试当前失败的分片 - 35、分片任务失败重试优化仅重试当前失败的分片
- 36、【迭代中】任务参数数据框调整手动触发时支持动态输入参数
### TODO LIST ### TODO LIST

@ -179,6 +179,7 @@ CREATE TABLE `XXL_JOB_QRTZ_TRIGGER_LOG` (
`executor_address` varchar(255) DEFAULT NULL COMMENT '执行器地址,本次执行的地址', `executor_address` varchar(255) DEFAULT NULL COMMENT '执行器地址,本次执行的地址',
`executor_handler` varchar(255) DEFAULT NULL COMMENT '执行器任务handler', `executor_handler` varchar(255) DEFAULT NULL COMMENT '执行器任务handler',
`executor_param` varchar(512) DEFAULT NULL COMMENT '执行器任务参数', `executor_param` varchar(512) DEFAULT NULL COMMENT '执行器任务参数',
`executor_sharding_param` varchar(20) DEFAULT NULL COMMENT '执行器任务分片参数,格式如 1/2',
`executor_fail_retry_count` int(11) NOT NULL DEFAULT '0' COMMENT '失败重试次数', `executor_fail_retry_count` int(11) NOT NULL DEFAULT '0' COMMENT '失败重试次数',
`trigger_time` datetime DEFAULT NULL COMMENT '调度-时间', `trigger_time` datetime DEFAULT NULL COMMENT '调度-时间',
`trigger_code` int(11) NOT NULL COMMENT '调度-结果', `trigger_code` int(11) NOT NULL COMMENT '调度-结果',

@ -92,7 +92,7 @@ public class JobInfoController {
@ResponseBody @ResponseBody
//@PermessionLimit(limit = false) //@PermessionLimit(limit = false)
public ReturnT<String> triggerJob(int id) { public ReturnT<String> triggerJob(int id) {
JobTriggerPoolHelper.trigger(id, -1, TriggerTypeEnum.MANUAL); JobTriggerPoolHelper.trigger(id, TriggerTypeEnum.MANUAL, -1, null);
return ReturnT.SUCCESS; return ReturnT.SUCCESS;
} }

@ -29,7 +29,7 @@ public class RemoteHttpJobBean extends QuartzJobBean {
// trigger // trigger
//XxlJobTrigger.trigger(jobId); //XxlJobTrigger.trigger(jobId);
JobTriggerPoolHelper.trigger(jobId, -1, TriggerTypeEnum.CRON); JobTriggerPoolHelper.trigger(jobId, TriggerTypeEnum.CRON, -1, null);
} }
} }

@ -18,6 +18,7 @@ public class XxlJobLog {
private String executorAddress; private String executorAddress;
private String executorHandler; private String executorHandler;
private String executorParam; private String executorParam;
private String executorShardingParam;
private int executorFailRetryCount; private int executorFailRetryCount;
// trigger info // trigger info
@ -78,6 +79,14 @@ public class XxlJobLog {
this.executorParam = executorParam; this.executorParam = executorParam;
} }
public String getExecutorShardingParam() {
return executorShardingParam;
}
public void setExecutorShardingParam(String executorShardingParam) {
this.executorShardingParam = executorShardingParam;
}
public int getExecutorFailRetryCount() { public int getExecutorFailRetryCount() {
return executorFailRetryCount; return executorFailRetryCount;
} }

@ -73,10 +73,7 @@ public class JobFailMonitorHelper {
XxlJobInfo info = XxlJobDynamicScheduler.xxlJobInfoDao.loadById(log.getJobId()); XxlJobInfo info = XxlJobDynamicScheduler.xxlJobInfoDao.loadById(log.getJobId());
if (log.getExecutorFailRetryCount() > 0) { if (log.getExecutorFailRetryCount() > 0) {
JobTriggerPoolHelper.trigger(log.getJobId(), TriggerTypeEnum.RETRY, (log.getExecutorFailRetryCount()-1), log.getExecutorShardingParam());
// TODO分片任务失败重试优化仅重试失败分片
JobTriggerPoolHelper.trigger(log.getJobId(), (log.getExecutorFailRetryCount()-1), TriggerTypeEnum.RETRY);
String retryMsg = "<br><br><span style=\"color:#F39C12;\" > >>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<< </span><br>"; String retryMsg = "<br><br><span style=\"color:#F39C12;\" > >>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<< </span><br>";
log.setTriggerMsg(log.getTriggerMsg() + retryMsg); log.setTriggerMsg(log.getTriggerMsg() + retryMsg);
XxlJobDynamicScheduler.xxlJobLogDao.updateTriggerInfo(log); XxlJobDynamicScheduler.xxlJobLogDao.updateTriggerInfo(log);

@ -29,11 +29,11 @@ public class JobTriggerPoolHelper {
new ThreadPoolExecutor.CallerRunsPolicy()); new ThreadPoolExecutor.CallerRunsPolicy());
public void addTrigger(final int jobId, final int failRetryCount, final TriggerTypeEnum triggerType) { public void addTrigger(final int jobId, final TriggerTypeEnum triggerType, final int failRetryCount, final String executorShardingParam) {
triggerPool.execute(new Runnable() { triggerPool.execute(new Runnable() {
@Override @Override
public void run() { public void run() {
XxlJobTrigger.trigger(jobId, failRetryCount, triggerType); XxlJobTrigger.trigger(jobId, triggerType, failRetryCount, executorShardingParam);
} }
}); });
} }
@ -55,8 +55,8 @@ public class JobTriggerPoolHelper {
* <0: use param from job info config * <0: use param from job info config
* *
*/ */
public static void trigger(int jobId, int failRetryCount, TriggerTypeEnum triggerType) { public static void trigger(int jobId, TriggerTypeEnum triggerType, int failRetryCount, String executorShardingParam) {
helper.addTrigger(jobId, failRetryCount, triggerType); helper.addTrigger(jobId, triggerType, failRetryCount, executorShardingParam);
} }
public static void toStop() { public static void toStop() {

@ -13,12 +13,11 @@ import com.xxl.job.core.biz.model.TriggerParam;
import com.xxl.job.core.enums.ExecutorBlockStrategyEnum; import com.xxl.job.core.enums.ExecutorBlockStrategyEnum;
import com.xxl.job.core.util.IpUtil; import com.xxl.job.core.util.IpUtil;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List;
/** /**
* xxl-job trigger * xxl-job trigger
@ -36,27 +35,38 @@ public class XxlJobTrigger {
* <0: use param from job info config * <0: use param from job info config
* *
*/ */
public static void trigger(int jobId, int failRetryCount, TriggerTypeEnum triggerType) { public static void trigger(int jobId, TriggerTypeEnum triggerType, int failRetryCount, String executorShardingParam) {
// load data // load data
XxlJobInfo jobInfo = XxlJobDynamicScheduler.xxlJobInfoDao.loadById(jobId); // job info XxlJobInfo jobInfo = XxlJobDynamicScheduler.xxlJobInfoDao.loadById(jobId);
if (jobInfo == null) { if (jobInfo == null) {
logger.warn(">>>>>>>>>>>> trigger fail, jobId invalidjobId={}", jobId); logger.warn(">>>>>>>>>>>> trigger fail, jobId invalidjobId={}", jobId);
return; return;
} }
int finalFailRetryCount = failRetryCount>=0?failRetryCount:jobInfo.getExecutorFailRetryCount(); int finalFailRetryCount = failRetryCount>=0?failRetryCount:jobInfo.getExecutorFailRetryCount();
XxlJobGroup group = XxlJobDynamicScheduler.xxlJobGroupDao.load(jobInfo.getJobGroup()); // group info XxlJobGroup group = XxlJobDynamicScheduler.xxlJobGroupDao.load(jobInfo.getJobGroup());
// process trigger // process trigger
if (ExecutorRouteStrategyEnum.SHARDING_BROADCAST == ExecutorRouteStrategyEnum.match(jobInfo.getExecutorRouteStrategy(), null) && CollectionUtils.isNotEmpty(group.getRegistryList())) { if (triggerType==TriggerTypeEnum.RETRY && executorShardingParam!=null) {
for (int i = 0; i < group.getRegistryList().size(); i++) { String[] shardingArr = executorShardingParam.split("/");
processTrigger(group, jobInfo, finalFailRetryCount, triggerType, i); if (shardingArr.length==2 && StringUtils.isNumeric(shardingArr[0]) && StringUtils.isNumeric(shardingArr[1])); {
processTrigger(group, jobInfo, finalFailRetryCount, triggerType, Integer.valueOf(shardingArr[0]), Integer.valueOf(shardingArr[1]));
} }
} else { } else {
processTrigger(group, jobInfo, finalFailRetryCount, triggerType, 0); if (CollectionUtils.isNotEmpty(group.getRegistryList())) {
if (ExecutorRouteStrategyEnum.SHARDING_BROADCAST == ExecutorRouteStrategyEnum.match(jobInfo.getExecutorRouteStrategy(), null)) {
for (int i = 0; i < group.getRegistryList().size(); i++) {
processTrigger(group, jobInfo, finalFailRetryCount, triggerType, i, group.getRegistryList().size());
}
} else {
processTrigger(group, jobInfo, finalFailRetryCount, triggerType, 0, 1);
}
} else {
processTrigger(group, jobInfo, finalFailRetryCount, triggerType, 0, 0);
}
} }
} }
private static void processTrigger(XxlJobGroup group, XxlJobInfo jobInfo, int finalFailRetryCount, TriggerTypeEnum triggerType, int index){ private static void processTrigger(XxlJobGroup group, XxlJobInfo jobInfo, int finalFailRetryCount, TriggerTypeEnum triggerType, int index, int total){
// param // param
ExecutorBlockStrategyEnum blockStrategy = ExecutorBlockStrategyEnum.match(jobInfo.getExecutorBlockStrategy(), ExecutorBlockStrategyEnum.SERIAL_EXECUTION); // block strategy ExecutorBlockStrategyEnum blockStrategy = ExecutorBlockStrategyEnum.match(jobInfo.getExecutorBlockStrategy(), ExecutorBlockStrategyEnum.SERIAL_EXECUTION); // block strategy
@ -83,11 +93,12 @@ public class XxlJobTrigger {
triggerParam.setGlueSource(jobInfo.getGlueSource()); triggerParam.setGlueSource(jobInfo.getGlueSource());
triggerParam.setGlueUpdatetime(jobInfo.getGlueUpdatetime().getTime()); triggerParam.setGlueUpdatetime(jobInfo.getGlueUpdatetime().getTime());
triggerParam.setBroadcastIndex(index); triggerParam.setBroadcastIndex(index);
triggerParam.setBroadcastTotal(group.getRegistryList()!=null?group.getRegistryList().size():0); triggerParam.setBroadcastTotal(total);
// 3、init address // 3、init address
String address = null; String address = null;
ReturnT<String> routeAddressResult = null; ReturnT<String> routeAddressResult = null;
String shardingParam = (ExecutorRouteStrategyEnum.SHARDING_BROADCAST==executorRouteStrategyEnum &&total>0)?String.valueOf(triggerParam.getBroadcastIndex()).concat("/").concat(String.valueOf(triggerParam.getBroadcastTotal())):null;
if (CollectionUtils.isNotEmpty(group.getRegistryList())) { if (CollectionUtils.isNotEmpty(group.getRegistryList())) {
if (ExecutorRouteStrategyEnum.SHARDING_BROADCAST == executorRouteStrategyEnum) { if (ExecutorRouteStrategyEnum.SHARDING_BROADCAST == executorRouteStrategyEnum) {
address = group.getRegistryList().get(index); address = group.getRegistryList().get(index);
@ -117,8 +128,8 @@ public class XxlJobTrigger {
.append( (group.getAddressType() == 0)?I18nUtil.getString("jobgroup_field_addressType_0"):I18nUtil.getString("jobgroup_field_addressType_1") ); .append( (group.getAddressType() == 0)?I18nUtil.getString("jobgroup_field_addressType_0"):I18nUtil.getString("jobgroup_field_addressType_1") );
triggerMsgSb.append("<br>").append(I18nUtil.getString("jobconf_trigger_exe_regaddress")).append("").append(group.getRegistryList()); triggerMsgSb.append("<br>").append(I18nUtil.getString("jobconf_trigger_exe_regaddress")).append("").append(group.getRegistryList());
triggerMsgSb.append("<br>").append(I18nUtil.getString("jobinfo_field_executorRouteStrategy")).append("").append(executorRouteStrategyEnum.getTitle()); triggerMsgSb.append("<br>").append(I18nUtil.getString("jobinfo_field_executorRouteStrategy")).append("").append(executorRouteStrategyEnum.getTitle());
if (ExecutorRouteStrategyEnum.SHARDING_BROADCAST == ExecutorRouteStrategyEnum.match(jobInfo.getExecutorRouteStrategy(), null)) { if (shardingParam != null) {
triggerMsgSb.append("("+index+"/"+(group.getRegistryList()!=null?group.getRegistryList().size():0)+")"); triggerMsgSb.append("("+shardingParam+")");
} }
triggerMsgSb.append("<br>").append(I18nUtil.getString("jobinfo_field_executorBlockStrategy")).append("").append(blockStrategy.getTitle()); triggerMsgSb.append("<br>").append(I18nUtil.getString("jobinfo_field_executorBlockStrategy")).append("").append(blockStrategy.getTitle());
triggerMsgSb.append("<br>").append(I18nUtil.getString("jobinfo_field_timeout")).append("").append(jobInfo.getExecutorTimeout()); triggerMsgSb.append("<br>").append(I18nUtil.getString("jobinfo_field_timeout")).append("").append(jobInfo.getExecutorTimeout());
@ -131,6 +142,7 @@ public class XxlJobTrigger {
jobLog.setExecutorAddress(address); jobLog.setExecutorAddress(address);
jobLog.setExecutorHandler(jobInfo.getExecutorHandler()); jobLog.setExecutorHandler(jobInfo.getExecutorHandler());
jobLog.setExecutorParam(jobInfo.getExecutorParam()); jobLog.setExecutorParam(jobInfo.getExecutorParam());
jobLog.setExecutorShardingParam(shardingParam);
jobLog.setExecutorFailRetryCount(finalFailRetryCount); jobLog.setExecutorFailRetryCount(finalFailRetryCount);
//jobLog.setTriggerTime(); //jobLog.setTriggerTime();
jobLog.setTriggerCode(triggerResult.getCode()); jobLog.setTriggerCode(triggerResult.getCode());

@ -71,7 +71,7 @@ public class AdminBizImpl implements AdminBiz {
int childJobId = (StringUtils.isNotBlank(childJobIds[i]) && StringUtils.isNumeric(childJobIds[i]))?Integer.valueOf(childJobIds[i]):-1; int childJobId = (StringUtils.isNotBlank(childJobIds[i]) && StringUtils.isNumeric(childJobIds[i]))?Integer.valueOf(childJobIds[i]):-1;
if (childJobId > 0) { if (childJobId > 0) {
JobTriggerPoolHelper.trigger(childJobId, 0, TriggerTypeEnum.PARENT); JobTriggerPoolHelper.trigger(childJobId, TriggerTypeEnum.PARENT, 0, null);
ReturnT<String> triggerChildResult = ReturnT.SUCCESS; ReturnT<String> triggerChildResult = ReturnT.SUCCESS;
// add msg // add msg

@ -12,6 +12,7 @@
<result column="executor_address" property="executorAddress" /> <result column="executor_address" property="executorAddress" />
<result column="executor_handler" property="executorHandler" /> <result column="executor_handler" property="executorHandler" />
<result column="executor_param" property="executorParam" /> <result column="executor_param" property="executorParam" />
<result column="executor_sharding_param" property="executorShardingParam" />
<result column="executor_fail_retry_count" property="executorFailRetryCount" /> <result column="executor_fail_retry_count" property="executorFailRetryCount" />
<result column="trigger_time" property="triggerTime" /> <result column="trigger_time" property="triggerTime" />
@ -31,6 +32,7 @@
t.executor_address, t.executor_address,
t.executor_handler, t.executor_handler,
t.executor_param, t.executor_param,
t.executor_sharding_param,
t.executor_fail_retry_count, t.executor_fail_retry_count,
t.trigger_time, t.trigger_time,
t.trigger_code, t.trigger_code,
@ -141,6 +143,7 @@
`executor_address`= #{executorAddress}, `executor_address`= #{executorAddress},
`executor_handler`=#{executorHandler}, `executor_handler`=#{executorHandler},
`executor_param`= #{executorParam}, `executor_param`= #{executorParam},
`executor_sharding_param`= #{executorShardingParam},
`executor_fail_retry_count`= #{executorFailRetryCount} `executor_fail_retry_count`= #{executorFailRetryCount}
WHERE `id`= #{id} WHERE `id`= #{id}
</update> </update>

Loading…
Cancel
Save