叼丝娱乐-->从社区下载妹子图

 
package test;
  
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
  
public class Download1024 {
    public String downloadwebsite = "http:/xx.xx.xx/thread0806.php?fid=16";
  
    /**
     * @param args
     * @throws SQLException
     * @throws IOException
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        Download1024 d = new Download1024();
  
        d.doAction();
  
    }
  
    public void doAction() throws SQLException, IOException {
        // 下载主界面
        System.out.println("欢迎使用xx社区图片下载器，正在下载主文件");
        ArrayList<String> list = getList();
        httpDownload(downloadWebsite, "jt.html");
        System.out.println("主文件下载完毕，正在解析文档");
        String[] s = readFileLine("pic/jt.html");
        int i = 0;
        // 处理个贴子
        for (String page : s) {
            if (page != null) {
                i++;
  
                boolean hasExists = false;
                String PageAddress = page;
  
                //重命名图片,避免图片名称相同覆盖
                String prefix =new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
  
                hasExists = list.contains(page);
                // 如果网页不存在进行下载
                if (hasExists == false) {
                    System.out.println("hasExists:" + hasExists);
                    System.out.print("准备下载第" + i + "个帖子:");
  
                    httpDownload(PageAddress, getLastStringFromURL(page));
      
                    String[] s3 = readDetailsFileLine("pic/"+getLastStringFromURL(page));
                    int j=0;
                    for (String imglink : s3) {
                        if (imglink != null && (!imglink.equals(""))) {
                            httpDownload(imglink, prefix+"__"+(++j)+".jpg");
                        }
                    }
                      
                    //下载完的帖子写入log
                    writeLog(page);
  
                    File file = new File("pic/"+getLastStringFromURL(page));
                    if (file.exists()) {
                        file.delete();
                    }
                }
            }
        }
  
  
        System.out.println("图片全部下载完毕，请慢慢欣赏");
    }
  
    // 读取log，避免重复下载
    public ArrayList<String> getList() throws IOException {
        ArrayList<String> list = new ArrayList<String>();
        File logFile = new File("url.log");
        if (logFile.exists()) {
            BufferedReader bReader = new BufferedReader(new FileReader(logFile));
            String line = "";
            while ((line = bReader.readLine()) != null) {
                if (!list.contains(line)) {
                    list.add(line);
                }
  
            }
            bReader.close();
        }
        System.out.println("读取log完成！" + list.size());
        return list;
  
    }
  
    //写入log,避免重复下载
    public void writeLog(String url) throws IOException{
        FileWriter fw = new FileWriter("url.log",true);
        fw.write(url+"\\n");
        fw.flush();
        fw.close();
    }
      
  
    /**
     * 使用http下载文件
     * 
     * @param httpUrl
     * @param saveFile
     * @return
     */
    public boolean httpDownload(String httpUrl, String saveFile) {
        System.out.println("空闲内存:=========" + Runtime.getRuntime().freeMemory()/1024 / 1024+"M");
        System.out.println(httpUrl);
        saveFile = "pic/"+saveFile;
        // 下载网络文件
        int bytesum = 0;
        int byteread = 0;
  
        URL url = null;
        try {
            url = new URL(httpUrl);
        } catch (MalformedURLException e1) {
            e1.printStackTrace();
            return false;
        }
          
        try {
            URLConnection conn = url.openConnection();
            InputStream inStream = conn.getInputStream();
            FileOutputStream fs = new FileOutputStream(saveFile);
  
            byte[] buffer = new byte[1024];
            while ((byteread = inStream.read(buffer)) != -1) {
                bytesum += byteread;
                fs.write(buffer, 0, byteread);
            }
            fs.close();
            inStream.close();
            return true;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
            return false;
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }
  
    /**
     * 使用BufferedReader读取贴吧目录中的详细帖子地址
     * 
     * @param file
     * @return
     * @throws IOException
     */
    public String[] readFileLine(String file) throws IOException {
        BufferedReader br = null;
        String[] sb = new String[100];
        int i = 0;
        try {
            br = new BufferedReader(new FileReader(file));
            String line = null;
  
            while ((line = br.readLine()) != null) {
                if (line.contains("<h3><a href=\\"htm_data/16/1307/")) {
                    line = line.trim();
                    line = "http://xx.xx.xx/" + line.substring(13, 41);
                    sb[i] = line;
                    i++;
                }
            }
        } catch (Exception ex) {
            System.out.println("Error occurs during reading " + file);
        } finally {
            if (br != null)
                br.close();
        }
        return sb;
    }
  
    /**
     * 使用BufferedReader读取帖子详细内容中的图片链接地址
     * 
     * @param file
     * @return
     * @throws IOException
     */
    public String[] readDetailsFileLine(String file) throws IOException {
        BufferedReader br = null;
        String[] sb = new String[100];
        int i = 0;
        try {
            br = new BufferedReader(new FileReader(file));
            String line = null;
            int x = 0;
            int e = 0;
  
            while ((line = br.readLine()) != null) {
  
                int j = 1;
                while (line.contains("<input type='image' src='http://")) {
  
                    j++;
                    x = line.indexOf("<input type='image' src='http://") + 25;
                    e = line.indexOf("onclick=\\"window.open('http") - 2;
  
                    sb[i] = line.substring(x, e);
  
                    i++;
                    line = line.substring(e+10);
                }
  
                if (line.contains("<div>------------------------</div>")) {
                    break;
                }
            }
        } catch (Exception ex) {
            System.out.println("有错误发生" + file);
        } finally {
            if (br != null)
                br.close();
        }
        return sb;
    }
  
    /**
     * 提取URL中的文件名
     * 
     * @param url
     * @return
     */
    public String getLastStringFromURL(String url) {
        try {
            String[] ss = url.split("/");
            int i = ss.length;
            ss[i - 1] = ss[i - 1].replace("?", "");
            ss[i - 1] = ss[i - 1].replace("v=tbs", "");
            return ss[i - 1];
        } catch (Exception e) {
            System.out.println(url);
        }
        return null;
    }
  
}
//该片段来自于http://www.codesnippet.cn/detail/2210201410782.html
来源: http://www.codesnippet.cn/detail/2210201410782.html
与本文相关文章

暂无,快来抢沙发吧！