Java读写文件缓存和URL处理

 
package com.taoniwu;
import java.util.regex.*;
import java.io.*;
 
public class TestRead {
    public static void main (String[] args) {
        File file=new File ("D://web.txt");
 
        try
        {
            BufferedReader input=new BufferedReader (new FileReader (file) );
            String text;
            int sum = 0;
            File txt = new File("d://web.html");
            //判断文件是否存在
            if(!txt.exists()){
                txt.createNewFile();
            }
            else{
                txt.delete();   
            }
            FileWriter fw=new FileWriter( "d://web.html",true);
            BufferedWriter bw=new BufferedWriter(fw);
            String sr = "";
            while ( (text=input.readLine() ) !=null) {
                //正则表达，过滤非www开头的网址
                Pattern p = Pattern.compile ("http://www.*./");
                Matcher m = p.matcher (text);
                while (m.find()) //查找符合pattern的字符串
                {
                    //过滤带”baidu“和带”tarena”的url，并叠加链接代码
                    if(m.group().indexOf("baidu") == -1 && m.group().indexOf("tarena") == -1 && m.group().indexOf("aowin") == -1)
                    {
                        sr = sr + sum + "、<a target=\\"_blank\\" href=\\""+m.group()+"\\">"+m.group()+"</a><br />\\n"; 
                        sum++;
                    }
                    //添加缓存，当缓存达到30k时写入，并把sr清空
                    if(sr.length()>30720){
                        bw.write(sr);
                        sr = "";
                    }
                }
            }
            bw.write(sr+"\\n");
            bw.close();
 
        } catch (Exception ex) {
            System.out.println (ex+"错误");
        }
        System.out.println("完成！");
    }
}
//该片段来自于http://www.codesnippet.cn/detail/010820134911.html
来源: http://www.codesnippet.cn/detail/010820134911.html
与本文相关文章

暂无,快来抢沙发吧！