Skip to content

Commit

Permalink
fix SourceRegion error and add some tests on it code4craft#144
Browse files Browse the repository at this point in the history
  • Loading branch information
code4craft committed Aug 21, 2014
1 parent 4e5ba02 commit e7668e0
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ public Selectable xpath(String xpath) {
return selectElements(xpathSelector);
}

@Override
public Selectable selectList(Selector selector) {
if (selector instanceof BaseElementSelector) {
return selectElements((BaseElementSelector) selector);
}
return selectList(selector, getSourceTexts());
}

@Override
public Selectable select(Selector selector) {
return selectList(selector);
}

/**
* select elements
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
package us.codecraft.webmagic.model;

import junit.framework.Assert;
import org.junit.Test;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.pipeline.PageModelPipeline;

import static org.assertj.core.api.Assertions.assertThat;

/**
* @author [email protected] <br>
*/
public class GithubRepoTest {

@Test
public void test() {
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
OOSpider.create(Site.me().setSleepTime(0)
, new PageModelPipeline<GithubRepo>() {
@Override
public void process(GithubRepo o, Task task) {
Assert.assertEquals(86, o.getStar());
Assert.assertEquals(70, o.getFork());
assertThat(o.getStar()).isEqualTo(86);
assertThat(o.getFork()).isEqualTo(70);
}
}, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package us.codecraft.webmagic.model;

import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;

/**
* @author [email protected]
*/
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
public class MockModel {

}
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package us.codecraft.webmagic.model;

import org.apache.commons.io.IOUtils;
import org.junit.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.selector.PlainText;

import java.io.IOException;

import static org.assertj.core.api.Assertions.assertThat;

/**
Expand Down Expand Up @@ -40,6 +43,22 @@ public void testMultiModel_should_not_skip_when_match() throws Exception {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
modelPageProcessor.process(page);
assertThat(page.getResultItems().isSkip()).isFalse();
}

@Test
public void testExtractLinks() throws Exception {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
Page page = getMockPage();
modelPageProcessor.process(page);
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));

}

private Page getMockPage() throws IOException {
Page page = new Page();
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
page.setRequest(new Request("http://webmagic.io/list/0"));
page.setUrl(new PlainText("http://webmagic.io/list/0"));
return page;
}
}
22 changes: 22 additions & 0 deletions webmagic-extension/src/test/resouces/html/mock-webmagic.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<!DOCTYPE html>
<html>
<head lang="zh">
<meta charset="UTF-8">
<title></title>
</head>
<body>
<ul>
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
<li class="list"><a href="http://webmagic.io/post/3"></a></li>
<li class="list"><a href="http://webmagic.io/post/4"></a></li>
</ul>
<ul>
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
</ul>

</body>
</html>

0 comments on commit e7668e0

Please sign in to comment.