- package test;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.InputStream;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.Date;
- import java.sql.SQLException;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- public class Download1024 {
- public String downloadwebsite = "http:/xx.xx.xx/thread0806.php?fid=16";
- /**
- * @param args
- * @throws SQLException
- * @throws IOException
- * @throws Exception
- */
- public static void main(String[] args) throws Exception {
- Download1024 d = new Download1024();
- d.doAction();
- }
- public void doAction() throws SQLException, IOException {
- // 下载主界面
- System.out.println("欢迎使用xx社区图片下载器,正在下载主文件");
- ArrayList<String> list = getList();
- httpDownload(downloadWebsite, "jt.html");
- System.out.println("主文件下载完毕,正在解析文档");
- String[] s = readFileLine("pic/jt.html");
- int i = 0;
- // 处理个贴子
- for (String page : s) {
- if (page != null) {
- i++;
- boolean hasExists = false;
- String PageAddress = page;
- //重命名图片,避免图片名称相同覆盖
- String prefix =new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
- hasExists = list.contains(page);
- // 如果网页不存在进行下载
- if (hasExists == false) {
- System.out.println("hasExists:" + hasExists);
- System.out.print("准备下载第" + i + "个帖子:");
- httpDownload(PageAddress, getLastStringFromURL(page));
- String[] s3 = readDetailsFileLine("pic/"+getLastStringFromURL(page));
- int j=0;
- for (String imglink : s3) {
- if (imglink != null && (!imglink.equals(""))) {
- httpDownload(imglink, prefix+"__"+(++j)+".jpg");
- }
- }
- //下载完的帖子写入log
- writeLog(page);
- File file = new File("pic/"+getLastStringFromURL(page));
- if (file.exists()) {
- file.delete();
- }
- }
- }
- }
- System.out.println("图片全部下载完毕,请慢慢欣赏");
- }
- // 读取log,避免重复下载
- public ArrayList<String> getList() throws IOException {
- ArrayList<String> list = new ArrayList<String>();
- File logFile = new File("url.log");
- if (logFile.exists()) {
- BufferedReader bReader = new BufferedReader(new FileReader(logFile));
- String line = "";
- while ((line = bReader.readLine()) != null) {
- if (!list.contains(line)) {
- list.add(line);
- }
- }
- bReader.close();
- }
- System.out.println("读取log完成!" + list.size());
- return list;
- }
- //写入log,避免重复下载
- public void writeLog(String url) throws IOException{
- FileWriter fw = new FileWriter("url.log",true);
- fw.write(url+"\\n");
- fw.flush();
- fw.close();
- }
- /**
- * 使用http下载文件
- *
- * @param httpUrl
- * @param saveFile
- * @return
- */
- public boolean httpDownload(String httpUrl, String saveFile) {
- System.out.println("空闲内存:=========" + Runtime.getRuntime().freeMemory()/1024 / 1024+"M");
- System.out.println(httpUrl);
- saveFile = "pic/"+saveFile;
- // 下载网络文件
- int bytesum = 0;
- int byteread = 0;
- URL url = null;
- try {
- url = new URL(httpUrl);
- } catch (MalformedURLException e1) {
- e1.printStackTrace();
- return false;
- }
- try {
- URLConnection conn = url.openConnection();
- InputStream inStream = conn.getInputStream();
- FileOutputStream fs = new FileOutputStream(saveFile);
- byte[] buffer = new byte[1024];
- while ((byteread = inStream.read(buffer)) != -1) {
- bytesum += byteread;
- fs.write(buffer, 0, byteread);
- }
- fs.close();
- inStream.close();
- return true;
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- return false;
- } catch (IOException e) {
- e.printStackTrace();
- return false;
- }
- }
- /**
- * 使用BufferedReader读取贴吧目录中的详细帖子地址
- *
- * @param file
- * @return
- * @throws IOException
- */
- public String[] readFileLine(String file) throws IOException {
- BufferedReader br = null;
- String[] sb = new String[100];
- int i = 0;
- try {
- br = new BufferedReader(new FileReader(file));
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.contains("<h3><a href=\\"htm_data/16/1307/")) {
- line = line.trim();
- line = "http://xx.xx.xx/" + line.substring(13, 41);
- sb[i] = line;
- i++;
- }
- }
- } catch (Exception ex) {
- System.out.println("Error occurs during reading " + file);
- } finally {
- if (br != null)
- br.close();
- }
- return sb;
- }
- /**
- * 使用BufferedReader读取帖子详细内容中的图片链接地址
- *
- * @param file
- * @return
- * @throws IOException
- */
- public String[] readDetailsFileLine(String file) throws IOException {
- BufferedReader br = null;
- String[] sb = new String[100];
- int i = 0;
- try {
- br = new BufferedReader(new FileReader(file));
- String line = null;
- int x = 0;
- int e = 0;
- while ((line = br.readLine()) != null) {
- int j = 1;
- while (line.contains("<input type='image' src='http://")) {
- j++;
- x = line.indexOf("<input type='image' src='http://") + 25;
- e = line.indexOf("onclick=\\"window.open('http") - 2;
- sb[i] = line.substring(x, e);
- i++;
- line = line.substring(e+10);
- }
- if (line.contains("<div>------------------------</div>")) {
- break;
- }
- }
- } catch (Exception ex) {
- System.out.println("有错误发生" + file);
- } finally {
- if (br != null)
- br.close();
- }
- return sb;
- }
- /**
- * 提取URL中的文件名
- *
- * @param url
- * @return
- */
- public String getLastStringFromURL(String url) {
- try {
- String[] ss = url.split("/");
- int i = ss.length;
- ss[i - 1] = ss[i - 1].replace("?", "");
- ss[i - 1] = ss[i - 1].replace("v=tbs", "");
- return ss[i - 1];
- } catch (Exception e) {
- System.out.println(url);
- }
- return null;
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/2210201410782.html
来源: http://www.codesnippet.cn/detail/2210201410782.html