- import java.io.*;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * 搜索字符串并输出到控制台
- */
- public class ExtractStr {
- public static String getHelpString(String[] args){
- String result = String.format("%s [path]", new Object[]{"a"});
- return result;
- }
- private void getFiles(String rootPath, final String fileExt, List<File> fileList){
- File f =new File(rootPath);
- File[] list=f.listFiles(new FileFilter() {
- @Override
- public boolean accept(File f) {
- boolean ret = f.isDirectory() || (f.isFile() && f.getName().endsWith(fileExt));
- return ret;
- }
- });
- for(File fn : list){
- if (fn.isDirectory()){
- this.getFiles(fn.getAbsolutePath(), fileExt, fileList);
- }else{
- fileList.add(fn);
- }
- }
- }
- public List<File> run(String rootPath, String fileExt){
- List<File> result = new LinkedList<File>();
- this.getFiles(rootPath, fileExt, result);
- return result;
- }
- public List<String> parserSourceFile(List<String> patternList, File file, int miniCharCount) throws Exception {
- List<String> result = new LinkedList<String>();
- BufferedReader r = new BufferedReader(new FileReader(file));
- char[] buffer = new char[(int)file.length()];
- r.read(buffer, 0, (int)file.length());
- String text=new String(buffer, 0, buffer.length);
- for(String patternStr : patternList){
- //Pattern pattern = Pattern.compile("\"(.*?)\"");
- Pattern pattern = Pattern.compile(patternStr);
- Matcher matchers= pattern.matcher(text);
- while(matchers.find()){
- String t=matchers.group();
- if (t.length()>=miniCharCount)
- result.add(t);
- }
- }
- return result;
- }
- public static void main(String []args){
- if (args.length==0){
- StringBuilder sb = new StringBuilder()
- .append("未传入需要搜索的有效的源代码路径")
- .append("\n")
- .append(ExtractStr.getHelpString(args));
- System.out.println(sb.toString());
- System.exit(1);
- }
- List<String> searchFolders=new ArrayList<String>(100);
- for(int i=0; i<=args.length-1;i++){
- File f=new File(args[i]);
- if (!f.isDirectory() || !f.exists())
- continue;
- searchFolders.add(f.getAbsolutePath());
- }
- List<String> patternList = new LinkedList<String>();
- patternList.add("'([\\u4E00-\\u9FA5]+)'");
- patternList.add("\"([\\u4E00-\\u9FA5]+)\"");
- ExtractStr es = new ExtractStr();
- List<File> fileList = new ArrayList<File>(1000);
- for(String sarchFolder : searchFolders){
- List<File> t=es.run(sarchFolder, ".php");
- fileList.addAll(t);
- }
- Set<String> outList=new HashSet<String>();
- for(File f : fileList){
- try{
- List<String> items=es.parserSourceFile(patternList, f, 12);
- outList.addAll(items);
- }catch(Exception e){
- e.printStackTrace();
- }
- }
- for(String str : outList){
- System.out.println(str);
- }
- }
- }
对于拼接字符串的中文输出支持的不算好,而且也对中文的长度有限制(最少12个字符)
来源: http://www.phpxs.com/code/1001784/