- import java.io.*;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.Set;
- public class SpeedClear {
- public static void main(String[] args) {
- if(args.length==0){
- print();
- System.exit(1);
- }
- if(args.length!=2){
- System.out.println("Format error...");
- System.exit(1);
- }
- String pathname = args[0];
- String newPath = args[1];
- clear(pathname,newPath); //调用去重复的方法...
- }
- /**
- *
- * @param pathname
- * 源文件路径
- * @param newPath
- * 新的文件路径
- * @throws Exception
- */
- public static void clear(String pathname, String newPath) {
- System.out.println("Start... ");
- try{ //懒的写Try..直接都包围起来吧....
- File file = new File(pathname);
- BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
- BufferedReader buffer = new BufferedReader(new InputStreamReader(fis,"utf-8"),20*1024*1024);// 用5M的缓冲读取文本文件
- //FileWriter fw = new FileWriter(new File(newPath),true); //去除后的文本
- OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new File(newPath)),"utf-8") ;
- Set<String> set = new HashSet<String>();
- String temp = ""; // 临时字符串
- int x = 0;
- while ((temp = buffer.readLine()) != null) { // 读文件,一行读一个
- set.add(temp); // 存储到Set集合里面
- if(x%30000==0){
- System.out.print("..") ;
- }
- x++;
- }
- fis.close();
- buffer.close(); //关闭读取操作
- //下面开始写文件
- for (String xxser : set) {
- out.write(xxser+"\r\n");
- }
- System.out.println("") ;
- out.close(); //关闭写操作
- System.out.println("size = " + set.size());
- System.out.println("End...");
- }catch(Exception e){
- System.out.println("文件太大了,建议先100MB大小..") ;
- }
- }
- public static void print(){
- System.out.println("*************************************************");
- System.out.println("\t\tTo repeat \t\t");
- System.out.println();
- System.out.println(" format: java -Xmx1000m SpeedClear c:\\old.txt c:\\new.txt\t\t");
- System.out.println();
- System.out.println("\t\tAuthor:xxser QQ:616100108");
- System.out.println("*************************************************");
- }
- }
来源: http://www.phpxs.com/code/1001950/