Skip to content

Commit

Permalink
add content-length filter
Browse files Browse the repository at this point in the history
  • Loading branch information
FreedomZZQ committed Apr 28, 2016
1 parent b1ec51a commit d9359ba
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/crawler/Crawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ public void processURL(String strUrl) {
log(TYPE_CONNECTING, url.toString(), TAG_ERROR);
return;
}

//过滤小于 CONTENT_LENGTH 的文件链接
if((connection.getContentLength() < CONTENT_LENGTH)){
log(TYPE_CONNECTING, url.toString(), TAG_ERROR);
return;
}

log(TYPE_CONNECTING, url.toString(), TAG_SUCCESS);

// read the URL
Expand Down
1 change: 1 addition & 0 deletions src/utils/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ public abstract class Constants {
public static final String TAG_ERROR = "Error";
public static final String LOG_FILE_NAME = "IR201330551365LOG.txt";
public static final String CONTENT_TYPE = "text/";
public static final int CONTENT_LENGTH = 8000;
}

0 comments on commit d9359ba

Please sign in to comment.