- package cn.com.songjy.demo.lucene;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.Date;
- import java.util.List;
- import org.apache.commons.io.FileUtils;
- import org.apache.lucene.analysis.WhitespaceAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.apache.lucene.util.Version;
- public class ExportCode {
- private String PATH_INDEX = "lucene\\\\index";// 索引保存路径
- private List<File> listCode = new ArrayList<File>();
- private static List<String> code_path_java = new ArrayList<String>();
- private static String src_dir_java = "E:\\\\Workspaces\\\\java\\\\srt\\\\meg\\\\ZJAssInterface\\\\src\\\\main\\\\java";//java文件目录
- private static String src_dir_class = "E:\\\\Workspaces\\\\java\\\\srt\\\\meg\\\\ZJAssInterface\\\\target\\\\classes";//class文件目录
- private String export_path = "E:\\\\Workspaces\\\\java\\\\srt\\\\MyUpload\\\\";//导出文件的保存路径
- /**
- * 01_lucenc简介和创建索引初步
- */
- public void index(String time) {//time as 2013-04-19 00:00:00
- // 1、创建
- // Directory directory = new RAMDirectory();//索引建立在内存中
- Directory directory = null;
- try {
- directory = FSDirectory.open(new File(PATH_INDEX));// 索引建立在硬盘中
- } catch (IOException e) {
- e.printStackTrace();
- }
- // 2、创建IndexWriter
- IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
- Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35));//因为要搜索类似字符【2013-04-19】,必须使用WhitespaceAnalyzer分词器
- IndexWriter writer = null;
- try {
- writer = new IndexWriter(directory, indexWriterConfig);
- // 删除已存在索引
- writer.deleteAll();
- // 3、创建Document
- Document doc = null;
- // 4、为Document添加Filed
- // File f = new File(PATH_FILE);
- try {
- list_java_list(src_dir_java, "java", time);
- } catch (java.text.ParseException e) {
- e.printStackTrace();
- }
- for (File file : listCode) {
- doc = new Document();
- doc.add(new Field("content", new FileReader(file)));
- doc.add(new Field("filename", file.getName(), Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- doc.add(new Field("path", file.getAbsolutePath(),
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- // 5、通过IndexWriter添加文档到索引中
- writer.addDocument(doc);
- }
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- } catch (LockObtainFailedException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- if (null != writer) {
- try {
- writer.close();
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- }
- /**
- * 02_lucene简介和搜索初步
- *
- * @throws IOException
- * @throws ParseException
- */
- public void searcher(String time) throws IOException, ParseException {//time as 2013-04-19
- // 1、创建Directory
- Directory directory = FSDirectory.open(new File(PATH_INDEX));// 索引建立在硬盘中
- // 2、创建IndexReader
- IndexReader reader = IndexReader.open(directory);
- // 3、根据IndexReader创建IndexSearcher
- IndexSearcher searcher = new IndexSearcher(reader);
- // 4、创建搜索的Query,创建parser来确定要搜索问价内容,第二个参数表示要搜索的域
- QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
- new WhitespaceAnalyzer(Version.LUCENE_35));
- // 创建Query,表示要搜索域为content中包含字符串“2013-04-19”的文档
- Query query = parser.parse(time);// 必须使用WhitespaceAnalyzer分词器
- // 5、根据searcher搜索并返回TopDocs
- TopDocs tds = searcher.search(query, 100);// 只显示100条记录
- // 6、根据TopDocs获取ScoreDoc对象
- ScoreDoc[] sds = tds.scoreDocs;
- for (ScoreDoc sd : sds) {
- // 7、根据searcher和ScoreDoc对象获取具体的Dicument对象
- Document d = searcher.doc(sd.doc);
- // 8、根据Document对象获取需要的值
- System.out.println(d.get("filename") + "[" + d.get("path") + "]");
- code_path_java.add(d.get("path"));
- }
- reader.close();
- }
- /**
- * 递归列出【dir】目录及子目录下的所有文件
- *
- * @param dir
- * -需要列出文件的目录,不可为null且长度必须大于0
- * @param suffix
- * -列出后缀名为【suffix】的文件,如java、xml、txt,可为null
- * @param lastModified列出最后修改
- * (创建)时间大于【lastModified】的文件,可为null
- * @return List<File>
- * @throws ParseException
- * @throws java.text.ParseException
- */
- public List<File> list_java_list(String dir, String suffix,
- String lastModified) throws java.text.ParseException {
- if (null != dir && dir.trim().length() > 0) {
- File file0 = new File(dir.trim());
- if (true == file0.isDirectory()) {
- File[] listFiles = file0.listFiles();
- if (null != listFiles && listFiles.length > 0) {
- String regex = "\\\\d{4}-\\\\d{2}-\\\\d(2) \\\\d{2}:\\\\d{2}:\\\\d(2)";
- SimpleDateFormat simpleDateFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- Date old = null;
- if (null != lastModified && lastModified.matches(regex))
- old = simpleDateFormat.parse(lastModified);
- for (File file : listFiles) {
- if (true == file.isFile()) {
- if (null == suffix && null == old) {
- listCode.add(file);
- } else if (null == suffix && null != old) {
- if (file.lastModified() > old.getTime())
- listCode.add(file);
- } else if (null != suffix
- && suffix.trim().length() > 0
- && null == old) {
- if (file.getName().endsWith(suffix.trim()))
- listCode.add(file);
- } else if (null != suffix
- && suffix.trim().length() > 0
- && null != old) {
- if (file.lastModified() > old.getTime()
- && file.getName().endsWith(
- suffix.trim()))
- listCode.add(file);
- } else {
- listCode.add(file);
- }
- } else {
- list_java_list(file.getAbsolutePath(), suffix,
- lastModified);
- }
- }
- }
- }
- }
- return listCode;
- }
- /**
- * 拷贝文件到指定目录
- * songjy
- * @param file_path -需拷贝的目标文件(java文件)
- * @throws IOException
- */
- public void copy_file(List<String> file_path) throws IOException {
- String currentDate = new SimpleDateFormat("yyyyMMdd")
- .format(new Date());
- String export_path_java = export_path + "java\\\\" + currentDate;
- String export_path_class = export_path + "class\\\\" + currentDate;
- for (String java_path : file_path) {
- File srcFile = new File(java_path);
- String srcFile_path = srcFile.getAbsolutePath();
- String srcFile_name = srcFile.getName();
- String package_name = srcFile_path.substring(src_dir_java.length(), (srcFile_path.length()-srcFile_name.length()));
- File dest_dir_java = new File(export_path_java + package_name);
- FileUtils.copyFileToDirectory(srcFile, dest_dir_java);// 拷贝java文件
- String class_path = src_dir_class + package_name
- + srcFile.getName().replaceAll("\\\\.java", "\\\\.class");
- srcFile = new File(class_path);
- dest_dir_java = new File(export_path_class + package_name);
- FileUtils.copyFileToDirectory(srcFile, dest_dir_java);// 拷贝class文件
- }
- }
- public static void main(String[] args) throws IOException, ParseException,
- java.text.ParseException {
- ExportCode exportCode = new ExportCode();
- SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
- System.out.println("建立索引开始!"
- + simpleDateFormat.format(new Date()));
- exportCode.index("2013-05-10 15:07:00");
- System.out.println("建立索引完成,搜索代码开始!"
- + simpleDateFormat.format(new Date()));
- exportCode.searcher("2013-05-10");
- System.out.println("搜索代码完成,文件拷贝开始!"
- + simpleDateFormat.format(new Date()));
- exportCode.copy_file(code_path_java);
- System.out.println("文件拷贝完成!"
- + simpleDateFormat.format(new Date()));
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/2208201410282.html
来源: http://www.codesnippet.cn/detail/2208201410282.html