- package com.taoniwu;
- import java.util.regex.*;
- import java.io.*;
- public class TestRead {
- public static void main (String[] args) {
- File file=new File ("D://web.txt");
- try
- {
- BufferedReader input=new BufferedReader (new FileReader (file) );
- String text;
- int sum = 0;
- File txt = new File("d://web.html");
- //判断文件是否存在
- if(!txt.exists()){
- txt.createNewFile();
- }
- else{
- txt.delete();
- }
- FileWriter fw=new FileWriter( "d://web.html",true);
- BufferedWriter bw=new BufferedWriter(fw);
- String sr = "";
- while ( (text=input.readLine() ) !=null) {
- //正则表达,过滤非www开头的网址
- Pattern p = Pattern.compile ("http://www.*./");
- Matcher m = p.matcher (text);
- while (m.find()) //查找符合pattern的字符串
- {
- //过滤带”baidu“和带”tarena”的url,并叠加链接代码
- if(m.group().indexOf("baidu") == -1 && m.group().indexOf("tarena") == -1 && m.group().indexOf("aowin") == -1)
- {
- sr = sr + sum + "、<a target=\\"_blank\\" href=\\""+m.group()+"\\">"+m.group()+"</a><br />\\n";
- sum++;
- }
- //添加缓存,当缓存达到30k时写入,并把sr清空
- if(sr.length()>30720){
- bw.write(sr);
- sr = "";
- }
- }
- }
- bw.write(sr+"\\n");
- bw.close();
- } catch (Exception ex) {
- System.out.println (ex+"错误");
- }
- System.out.println("完成!");
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/010820134911.html
来源: http://www.codesnippet.cn/detail/010820134911.html