From bbcf1cb75676bafc8d09f9095a61141a59af9632 Mon Sep 17 00:00:00 2001 From: Simon Castella Date: Fri, 18 May 2018 11:54:55 +0200 Subject: [PATCH] tunned leTemps scrapper --- Projet/Crawlers/CrawlerLeTemps/letemps_scrapper.iml | 2 +- .../src/main/java/ch/wem/scrapper/LeTempsCrawler.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Projet/Crawlers/CrawlerLeTemps/letemps_scrapper.iml b/Projet/Crawlers/CrawlerLeTemps/letemps_scrapper.iml index c442616..5dc7002 100644 --- a/Projet/Crawlers/CrawlerLeTemps/letemps_scrapper.iml +++ b/Projet/Crawlers/CrawlerLeTemps/letemps_scrapper.iml @@ -5,8 +5,8 @@ - + diff --git a/Projet/Crawlers/CrawlerLeTemps/src/main/java/ch/wem/scrapper/LeTempsCrawler.java b/Projet/Crawlers/CrawlerLeTemps/src/main/java/ch/wem/scrapper/LeTempsCrawler.java index ad06642..6514e5f 100644 --- a/Projet/Crawlers/CrawlerLeTemps/src/main/java/ch/wem/scrapper/LeTempsCrawler.java +++ b/Projet/Crawlers/CrawlerLeTemps/src/main/java/ch/wem/scrapper/LeTempsCrawler.java @@ -44,9 +44,8 @@ public void visit(Page page) { String html = htmlParseData.getHtml(); Document doc = Jsoup.parse(html); Element title = doc.selectFirst("#block-letemps-content > article > div.container > div > div.col-sm-9.col-md-6 > div.article-content.article-content-inset.gallery.main-content > h1 > span"); - Element article = doc.selectFirst("#block-letemps-content > article > div.container > div > div.col-sm-9.col-md-6 > div.article-content.article-content-inset.gallery.main-content > div.article_body"); + Element article = doc.selectFirst("#block-letemps-content > article > div.container > div > div.col-sm-9.col-md-6 > div.article-content.article-content-inset.gallery.main-content > div.article_body > div.body_content"); List tags = doc.select("#block-letemps-content > article > div.container > div > div.col-sm-3.col-md-2 > section > p.tags").select("b").eachText(); - Element date = doc.select("meta[itemprop='datePublished']").first(); Long dateUnix = date != null ? parseDate(date.attr("content")) : 0l;