- package test;
- import java.io.BufferedOutputStream;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.FilenameFilter;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.List;
- import java.util.Random;
- /**
- * @author wenhuan
- * skype:jasonwenhuan
- */
- public class HugeDataSort {
- public final static String ORIGINALPATH = "E:/bigdatatest/bigData.txt" ;
- public final static String TEMPFILEPATH = "E:/bigdatatest/";
- public final static String LASTFILEPATH = "E:/bigdatatest/";
- public final static String LASTFILENAME = "last.txt";
- public final static int BIGDATALENGTH = 10000000;
- public final static int TEMPFILELENGTH = 1000000;
- public static int rewriteTime = 1;
- private static File tempFiles[];
- public static int writeTime = 0;
- public static int threadNumber = 2;
- public static void main(String[] args) throws IOException {
- generateDate();
- splitBigFileToLittleFile();
- unitAllTempFileAndDeleteTempFile();
- }
- public static void generateDate() throws IOException {
- BufferedWriter writer = new BufferedWriter(new FileWriter(ORIGINALPATH ));
- Random random = new Random();
- for (int i = 0; i < BIGDATALENGTH; i++) {
- writer.write(String. valueOf(random.nextInt(BIGDATALENGTH)) + "\\n");
- }
- writer.close();
- }
- public static void splitBigFileToLittleFile() throws IOException {
- BufferedReader br = new BufferedReader(new FileReader(ORIGINALPATH ));
- tempFiles = new File[BIGDATALENGTH / TEMPFILELENGTH];
- for (int i = 0; i < tempFiles. length; i++) {
- tempFiles[i] = new File(TEMPFILEPATH + "sortTempFile" + i + ".txt");
- BufferedWriter writer = new BufferedWriter(new FileWriter(
- tempFiles[i]));
- List<Integer> smallLine = new ArrayList<Integer>();
- for (int j = 0; j < TEMPFILELENGTH; j++) {
- String text = null;
- if ((text = br.readLine()) != null) {
- smallLine.add(Integer. parseInt(text));
- }
- }
- Collections. sort(smallLine);
- for (Integer line : smallLine) {
- writer.write(String. valueOf(line)
- + System.getProperty("line.separator"));
- }
- writer.close();
- }
- }
- public static void multiWaysMergeSort(String[] files) throws IOException {
- if (files.length == 1) {
- String lastFilePath = LASTFILEPATH + LASTFILENAME ;
- copyFile(files[0],lastFilePath,false);
- deleteFile(files[0]);
- return;
- }
- /*List<String> listFiles = Arrays.asList(files);
- int filesEveryThread = tempFiles.length/threadNumber;
- for( int j=0;j<threadNumber;j++){
- int from = 0;
- int to = 0;
- from = filesEveryThread * j;
- if (j == threadNumber - 1) {
- to = listFiles.size();
- } else {
- to = threadNumber * (j + 1);
- }
- List<String> list = listFiles.subList(from, to);
- }*/
- for (int i = 0; i < files.length; i++) {
- if(i == files.length -1){
- renameFile(files[i],i);
- break;
- }
- BufferedReader br1 = new BufferedReader(new FileReader(files[i]));
- BufferedReader br2 = new BufferedReader(new FileReader(files[i+1]));
- BufferedWriter writer = new BufferedWriter(new FileWriter(TEMPFILEPATH + "last_" + rewriteTime + "_" + i + ".txt"));
- String s1 = br1.readLine();
- String s2 = br2.readLine();
- while (s1 != null || s2 != null) {
- int mergeResult = -1;
- if(s1 != null && s2 != null){
- mergeResult = merge(Integer.parseInt(s1.toString()),
- Integer. parseInt(s2.toString()));
- }
- if (mergeResult == 0) {
- writer.write(s2);
- writer.write(System. getProperty("line.separator"));
- s2 = br2.readLine();
- }
- if (mergeResult == 1) {
- writer.write(s1);
- writer.write(System. getProperty("line.separator"));
- s1 = br1.readLine();
- s2 = br2.readLine();
- }
- if(mergeResult == 2){
- writer.write(s1);
- writer.write(System. getProperty("line.separator"));
- s1 = br1.readLine();
- }
- if(s1 == null && s2 != null){
- writer.write(s2);
- writer.write(System. getProperty("line.separator"));
- s2 = br2.readLine();
- }
- if(s2 == null && s1 != null){
- writer.write(s1);
- writer.write(System. getProperty("line.separator"));
- s1 = br1.readLine();
- }
- System. out.println("write time : " + writeTime++);
- }
- br1.close();
- br2.close();
- deleteFile(files[i]);
- deleteFile(files[i+1]);
- i++;
- writer.close();
- }
- rewriteTime++;
- multiWaysMergeSort(getTempFiles ("last_" ));
- }
- public static int merge(int a, int b) {
- if (a > b) {
- return 0;
- } else if (a == b) {
- return 1;
- } else {
- return 2;
- }
- }
- public static void unitAllTempFileAndDeleteTempFile() throws IOException {
- String[] files = getTempFiles("sortTempFile");
- multiWaysMergeSort(files);
- }
- public static String[] getTempFiles(final String startName) {
- File f = new File(TEMPFILEPATH );
- String[] files = f.list( new FilenameFilter() {
- @Override
- public boolean accept(File dir, String name) {
- return name.startsWith(startName == null ? "" : startName);
- }
- });
- String[] retFiles = new String[files.length ];
- for (int i = 0; i < files.length; i++) {
- retFiles[i] = TEMPFILEPATH + files[i];
- }
- return retFiles;
- }
- public static void copyFile(String org, String dst, boolean useBuffer) {
- FileInputStream fis = null;
- FileOutputStream fos = null;
- BufferedOutputStream bos = null;
- try {
- fis = new FileInputStream(org);
- fos = new FileOutputStream(dst);
- bos = new BufferedOutputStream(new FileOutputStream(dst));
- int length = 0;
- byte[] bytes = new byte[1024];
- while ((length = fis.read(bytes)) != -1) {
- if (useBuffer) {
- bos.write(bytes, 0, length);
- } else {
- fos.write(bytes, 0, length);
- }
- }
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- if (fos != null) {
- try {
- fos.close();
- } catch (IOException e) {
- }
- }
- if (bos != null) {
- try {
- bos.close();
- } catch (IOException e) {
- }
- }
- if (fis != null) {
- try {
- fis.close();
- } catch (IOException e) {
- }
- }
- }
- }
- public static boolean deleteFile(String filePath){
- boolean flag = false;
- File f = new File(filePath);
- if(f.exists()){
- f.delete();
- flag = true;
- }
- return flag;
- }
- public static boolean renameFile(String fileName, int i){
- File file = new File(fileName);
- return file.renameTo(new File(TEMPFILEPATH + "last_" + rewriteTime + "_" + i + ".txt"));
- }
- class MyThread implements Runnable{
- @Override
- public void run() {
- }
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/0311201410891.html
来源: http://www.codesnippet.cn/detail/0311201410891.html