1、解决爬虫新浪新闻统计循环计数问题

pull/254/head
xjs 4 years ago
parent 2f75b19ccd
commit 83a006c47a

@ -263,8 +263,6 @@ export default {
this.getApiName() this.getApiName()
}, },
// todo bug
methods: { methods: {
//api //api
getApiName() { getApiName() {

@ -32,20 +32,20 @@ public class SinaNewsTask {
@Autowired @Autowired
private SinaNewsService sinaNewsService; private SinaNewsService sinaNewsService;
public static final String URL = "https://news.sina.com.cn/"; private static final String URL = "https://news.sina.com.cn/";
@ReptileLog(name = "新浪新闻", url = URL)
public Long reptileSinaNews() {
//定义循环次数计时器 //定义循环次数计时器
Long count = 0L; private Long count = 0L;
@ReptileLog(name = "新浪新闻", url = URL)
public Long reptileSinaNews() {
try { try {
String html = httpUtils.doGetHtml(URL); String html = httpUtils.doGetHtml(URL);
Document document = Jsoup.parse(html); Document document = Jsoup.parse(html);
count = this.parse(document, count); this.parse(document);
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage()); log.error(e.getMessage());
} }
@ -56,9 +56,8 @@ public class SinaNewsTask {
* dom * dom
* *
* @param document dom * @param document dom
* @param count
*/ */
private Long parse(Document document, Long count) { private void parse(Document document) {
//获取子链接 //获取子链接
Elements nav_mod_1 = document.getElementsByClass("nav-mod-1"); Elements nav_mod_1 = document.getElementsByClass("nav-mod-1");
Elements link = nav_mod_1.select("ul > li > a"); Elements link = nav_mod_1.select("ul > li > a");
@ -77,13 +76,10 @@ public class SinaNewsTask {
String html = httpUtils.doGetHtml(entry.getValue()); String html = httpUtils.doGetHtml(entry.getValue());
Document docChild = Jsoup.parse(html); Document docChild = Jsoup.parse(html);
Long newCount = this.parseChile(docChild, entry.getKey(), count); this.parseChile(docChild, entry.getKey());
count = count + newCount;
} }
} }
return count;
} }
/** /**
@ -92,7 +88,7 @@ public class SinaNewsTask {
* @param docChild * @param docChild
* @param key key * @param key key
*/ */
private Long parseChile(Document docChild, String key, Long count) { private void parseChile(Document docChild, String key) {
try { try {
Elements a = docChild.getElementsByTag("a"); Elements a = docChild.getElementsByTag("a");
ArrayList<String> link = new ArrayList<>(); ArrayList<String> link = new ArrayList<>();
@ -170,10 +166,12 @@ public class SinaNewsTask {
sinaNewsList.add(sinaNews); sinaNewsList.add(sinaNews);
} }
}
//计数 //计数
count++; count++;
}
sinaNewsService.saveBatch(sinaNewsList, 30); sinaNewsService.saveBatch(sinaNewsList, 30);
@ -184,8 +182,6 @@ public class SinaNewsTask {
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage()); log.error(e.getMessage());
} }
return count;
} }
} }

Loading…
Cancel
Save