Skip to content

Commit

Permalink
修改抓取文件
Browse files Browse the repository at this point in the history
  • Loading branch information
leelance committed Jun 18, 2014
1 parent 3f8c308 commit 002ea05
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,28 @@
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.lance.entity.BlogEntity;
import com.lance.entity.UserEntity;
import com.lance.repository.BlogRepository;

@Component
public class Crawler {
private String url = "http://www.oschina.net/blog?type=0&p=";
//地址
private String url = "http://************";
private Logger logger = LoggerFactory.getLogger(getClass());
private static int page = 1;

public static void main(String[] args) {
new Crawler().getBlogList(page);
}
@Autowired
private BlogRepository blogRepository;
/**
* 获取Elements
* @param page
*/
private void getBlogList(int page) {
url = url+page+"#catalogs";
String css = "#RecentBlogs .BlogList > li";
public void getBlogList(int page) {
String css = "#blogs .blog_list > li";
Elements elements = getElements(url, css);

parseElements(elements);
Expand All @@ -44,7 +47,7 @@ private void parseElements(Elements elements) {

for(Element element: elements){
handlerElement(element);
return;
//return;
}

getBlogList(page++);
Expand Down Expand Up @@ -75,30 +78,32 @@ private void handlerElement(Element element) {
* @return
*/
private void getBlogDetail(String href, BlogEntity blogEntity){
String css = ".BlogEntity";
String css = ".blog_detail";
Element element = getElements(href, css).first();

//获取摘要
String summary = element.select(".BlogAbstracts span").first().text();
String summary = element.select(".blog-title span").first().text();
blogEntity.setSummary(summary);

//获取标签
Elements links = element.select(".BlogAbstracts a");
Elements links = element.select(".blog-content a");
String tags = null;
for(Element link: links){
tags+=link.text();
break;
}
blogEntity.setTags(tags);

//获取内容
String content = element.select(".BlogContent").first().text();
String content = element.select(".blog-content").first().text();
blogEntity.setContent(content);
blogEntity.setCreateDate(new Date());

UserEntity user = new UserEntity();
user.setId(1);
blogEntity.setBlongUser(user);
System.out.println(blogEntity.getContent());

blogRepository.save(blogEntity);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.lance.application;

import java.util.concurrent.Callable;

public class CrawlerCall implements Callable<String>{

public String call() throws Exception {

return null;
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.lance.application;

import java.util.Date;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.builder.SpringApplicationBuilder;
Expand All @@ -19,7 +18,9 @@
@EntityScan("com.lance.entity")
@EnableJpaRepositories("com.lance.repository")
public class WebAppConfig extends WebMvcConfigurerAdapter{

@Autowired
private Crawler crawler;

protected SpringApplicationBuilder configure(SpringApplicationBuilder application) {
return application.sources(WebAppConfig.class);
}
Expand All @@ -40,45 +41,8 @@ public void addInterceptors(InterceptorRegistry registry) {
/**
* spring boot 定时任务
*/
@Scheduled(cron="0 0 12 * * ?")
@Scheduled(cron="0 41 21 * * ?")
public void reportCurrentTime() {
System.out.println("The time is now: " + new Date());
crawler.getBlogList(1);
}
}
/**
字段 允许值 允许的特殊字符
秒 0-59 , - * /
分 0-59 , - * /
小时 0-23 , - * /
日期 1-31 , - * ? / L W C
月份 1-12 或者 JAN-DEC , - * /
星期 1-7 或者 SUN-SAT , - * ? / L C #
年(可选) 留空, 1970-2099 , - * /
表达式意义
"0 0 12 * * ?" 每天中午12点触发
"0 15 10 ? * *" 每天上午10:15触发
"0 15 10 * * ?" 每天上午10:15触发
"0 15 10 * * ? *" 每天上午10:15触发
"0 15 10 * * ? 2005" 2005年的每天上午10:15触发
"0 * 14 * * ?" 在每天下午2点到下午2:59期间的每1分钟触发
"0 0/5 14 * * ?" 在每天下午2点到下午2:55期间的每5分钟触发
"0 0/5 14,18 * * ?" 在每天下午2点到2:55期间和下午6点到6:55期间的每5分钟触发
"0 0-5 14 * * ?" 在每天下午2点到下午2:05期间的每1分钟触发
"0 10,44 14 ? 3 WED" 每年三月的星期三的下午2:10和2:44触发
"0 15 10 ? * MON-FRI" 周一至周五的上午10:15触发
"0 15 10 15 * ?" 每月15日上午10:15触发
"0 15 10 L * ?" 每月最后一日的上午10:15触发
"0 15 10 ? * 6L" 每月的最后一个星期五上午10:15触发
"0 15 10 ? * 6L 2002-2005" 2002年至2005年的每月的最后一个星期五上午10:15触发
"0 15 10 ? * 6#3" 每月的第三个星期五上午10:15触发
每天早上6点
0 6 * * *
每两个小时
0 * /2 * * *
晚上11点到早上8点之间每两个小时,早上八点
0 23-7/2,8 * * *
每个月的4号和每个礼拜的礼拜一到礼拜三的早上11点
0 11 4 * 1-3
1月1日早上4点
0 4 1 1 *
*/
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.lance.repository;

import org.springframework.data.jpa.repository.JpaRepository;

import com.lance.entity.BlogEntity;

public interface BlogRepository extends JpaRepository<BlogEntity, Long>{

}
25 changes: 8 additions & 17 deletions spring-boot-samples/src/main/resources/application.properties
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
#DB properties:
spring.datasource.url=jdbc:mysql://localhost:3306/test
spring.datasource.username=root
spring.datasource.password=123456
#Created by JInto - www.guh-software.de
#Wed Jun 18 21:31:46 CST 2014
server.port=8080
server.tomcat.access-log-enabled=true
spring.datasource.driverClassName=com.mysql.jdbc.Driver

#JPA Configuration:
spring.jpa.show-sql=true
#spring.jpa.generate-ddl=true
spring.datasource.password=123456
spring.datasource.url=jdbc\:mysql\://localhost\:3306/test?useUnicode\=true&characterEncoding\=utf-8
spring.datasource.username=root
spring.jpa.hibernate.ddl-auto=update
#spring.jpa.database-platform=org.hibernate.dialect.MySQL5Dialect
#spring.jpa.hibernate.naming_strategy=org.hibernate.cfg.ImprovedNamingStrategy
#spring.jpa.database=org.hibernate.dialect.MySQL5InnoDBDialect

#view Configuration:
spring.jpa.show-sql=true
spring.view.prefix=/WEB-INF/views/
spring.view.suffix=

#Server Configuration:
server.port=8080
server.tomcat.access-log-enabled=true
25 changes: 8 additions & 17 deletions spring-boot-samples/target/classes/application.properties
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
#DB properties:
spring.datasource.url=jdbc:mysql://localhost:3306/test
spring.datasource.username=root
spring.datasource.password=123456
#Created by JInto - www.guh-software.de
#Wed Jun 18 21:31:46 CST 2014
server.port=8080
server.tomcat.access-log-enabled=true
spring.datasource.driverClassName=com.mysql.jdbc.Driver

#JPA Configuration:
spring.jpa.show-sql=true
#spring.jpa.generate-ddl=true
spring.datasource.password=123456
spring.datasource.url=jdbc\:mysql\://localhost\:3306/test?useUnicode\=true&characterEncoding\=utf-8
spring.datasource.username=root
spring.jpa.hibernate.ddl-auto=update
#spring.jpa.database-platform=org.hibernate.dialect.MySQL5Dialect
#spring.jpa.hibernate.naming_strategy=org.hibernate.cfg.ImprovedNamingStrategy
#spring.jpa.database=org.hibernate.dialect.MySQL5InnoDBDialect

#view Configuration:
spring.jpa.show-sql=true
spring.view.prefix=/WEB-INF/views/
spring.view.suffix=

#Server Configuration:
server.port=8080
server.tomcat.access-log-enabled=true

0 comments on commit 002ea05

Please sign in to comment.