forked from scalad/Note
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGrabPicture.java
138 lines (128 loc) · 3.66 KB
/
GrabPicture.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package com.silence.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* Java开发图片批量采集
* @author cx112
* @version v1.0
*/
public class GrabPicture {
/**
* 根据网站的地址和页面的编码集来获取网页的源代码
*
* @author cx112
* @param url
* 网址路径
* @param encoding
* 编码集
* @return String 网页的源代码
*/
public static String gethtmlResourceByURL(String url, String encoding) {
// 用于存储网页源代码
StringBuffer buf = new StringBuffer();
URL urlObj = null;
URLConnection uc = null;
InputStreamReader isr = null;
BufferedReader buffer = null;
try {
// 建立网络连接
urlObj = new URL(url);
// 打开网络连接
uc = urlObj.openConnection();
// 将连接网络的输入流转换
isr = new InputStreamReader(uc.getInputStream(), encoding);
// 建立缓冲写入流
buffer = new BufferedReader(isr);
String line = null;
while ((line = buffer.readLine()) != null) {
buf.append(line + "\n");// 一行一行的追加代码
}
} catch (Exception e) {
System.out.println("test");
e.printStackTrace();
} finally {
try {
if (isr != null) {
isr.close();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return buf.toString();
}
/**
* 根据图片的网络地址,下载图片带本地服务器
*
* @author cx112
* @param filePath
* 文件保存的路径
* @param imgURL
* 图片的网络地址
*/
public static void DownImages(String filePath, String imgURL) {
String fileName = imgURL.substring(imgURL.lastIndexOf("/"));
try {
// 创建文件目录
File files = new File(filePath);
// 判断是否存在文件夹
if (!files.exists()) {
files.mkdirs();
}
// 获取下载地址
URL url = new URL(imgURL);
// 连接网络地址
HttpURLConnection huc = (HttpURLConnection) url.openConnection();
// 获取连接的输出流
InputStream is = huc.getInputStream();
// 创建文件
File file = new File(filePath + fileName);
// 创建输入流,写入文件
FileOutputStream out = null;
if (file.getName().endsWith("jpg") || file.getName().endsWith("png")
|| file.getName().endsWith("jpeg") || file.getName().endsWith("jpg") ){
out = new FileOutputStream(file);
int i = 0;
while ((i = is.read()) != -1) {
out.write(i);
}
is.close();
out.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void getImage(String url, String encoding,String path){
String htmlResouce = gethtmlResourceByURL(url, encoding);
// 解析网页源代码
Document document = Jsoup.parse(htmlResouce);
// 获取所以图片的地址<img src="" alt= "" width= "" height=""/>
Elements elements = document.getElementsByTag("img");
for (Element element : elements) {
String imgSrc = element.attr("src");
if (!"".equals(imgSrc) && imgSrc.startsWith("http://")) {
System.out.println("下载图片的地址===" + imgSrc);
DownImages(path, imgSrc);
}
}
}
public static void main(String[] args) {
// 根据网页地址和网页的编码集 获取网页的内容
String url = "http://www.tripadvisor.cn";
String encoding = "gb2312";
getImage(url, encoding, "e:\\test");
}
}