- package com.rayn.test;
- import java.io.BufferedReader;
- import java.io.Console;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.Scanner;
- import java.util.logging.ConsoleHandler;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.NumericDocValuesField;
- import org.apache.lucene.document.StringField;
- import org.apache.lucene.index.DirectoryReader;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.IndexWriterConfig.OpenMode;
- import org.apache.lucene.queryparser.classic.ParseException;
- import org.apache.lucene.queryparser.classic.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class FileIndexUtils
- {
- private static Directory directory = null;
- private static IndexReader reader = null;
- static
- {
- try
- {
- directory = FSDirectory.open(new File("D:/indexDemo/Demo3"));
- }
- catch (Exception e)
- {
- e.printStackTrace();
- }
- }
- public static Directory getDirectory()
- {
- return directory;
- }
- public static IndexWriter getIndexWriter()
- {
- IndexWriter writer = null;
- try
- {
- IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, new StandardAnalyzer(Version.LUCENE_43));
- iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
- iwc.setRAMBufferSizeMB(200.00);
- writer = new IndexWriter(directory, iwc);
- }
- catch (Exception e)
- {
- e.printStackTrace();
- }
- return writer;
- }
- public static void addAllFilesToFields(String filePath)
- {
- File files = new File(filePath);
- IndexWriter writer = null;
- try
- {
- writer = getIndexWriter();
- if (files.isFile())
- {
- writer.addDocument(file2AddFields(files));
- }
- else if (files.isDirectory())
- {
- for (File file : files.listFiles())
- {
- writer.addDocument(file2AddFields(file));
- }
- }
- }
- catch (IOException e)
- {
- e.printStackTrace();
- }
- finally
- {
- try
- {
- if (null != writer)
- {
- writer.close();
- }
- }
- catch (Exception e)
- {
- e.printStackTrace();
- }
- }
- }
- public static Document file2AddFields(File file)
- {
- Document doc = null;
- System.out.println("【被索引文件】" + file.getAbsoluteFile());
- try
- {
- doc = new Document();
- doc.add(new StringField("path", file.getPath(), Field.Store.YES));
- doc.add(new StringField("filename", file.getName(), Field.Store.YES));
- doc.add(new Field("content", new FileReader(file)));
- doc.add(new NumericDocValuesField("size", file.length() / 1024));
- doc.add(new NumericDocValuesField("date", file.lastModified()));
- }
- catch (Exception e)
- {
- e.printStackTrace();
- }
- return doc;
- }
- /**
- * 获取索引搜索
- *
- * @return
- */
- private static IndexSearcher getSearcher(Directory dir)
- {
- IndexSearcher indexSearcher = null;
- try
- {
- if (null == reader)
- {
- reader = DirectoryReader.open(dir);
- }
- indexSearcher = new IndexSearcher(reader);
- }
- catch (IOException e1)
- {
- e1.printStackTrace();
- }
- return indexSearcher;
- }
- /**
- * 分页查询索引
- *
- * @param queryStr
- * @param start
- * @param size
- * @throws IOException
- */
- public static void searchByPaging(String queryStr, int pageIndex, int pageSize)
- {
- Directory dir = FileIndexUtils.getDirectory();
- IndexSearcher searcher = null;
- QueryParser parser = null;
- Query query = null;
- try
- {
- searcher = getSearcher(dir);
- parser = new QueryParser(Version.LUCENE_43, "content", new StandardAnalyzer(Version.LUCENE_43));
- query = parser.parse(queryStr);
- int getCount = pageSize;
- if(pageIndex > 1)
- {
- getCount = 10 * pageIndex;
- }
- else if(pageIndex > 10)
- {
- getCount = 10;
- }
- TopDocs tds = searcher.search(query, getCount);
- ScoreDoc[] sds = tds.scoreDocs;
- sds = getNextPage(searcher, sds, query, pageIndex, pageSize);
- for (int i = 0, len = sds.length; i < len; i++)
- {
- Document doc = searcher.doc(sds[i].doc);
- System.out.println(doc.get("path") + "----" + doc.get("filename"));
- if((i + 1) % 5 == 0)
- System.out.println("*******************************");
- }
- }
- catch (ParseException e)
- {
- e.printStackTrace();
- }
- catch (IOException e)
- {
- e.printStackTrace();
- }
- System.out.println("【分页查询完毕】");
- }
- public static ScoreDoc [] getNextPage(IndexSearcher searcher, ScoreDoc[] sds, Query query, int pageIndex, int pageSize) throws IOException
- {
- if(pageIndex <= 1)
- return sds;
- int num = (pageIndex-1) * pageSize;
- int starSds = num -1;
- if(num < 0)
- starSds = 0;
- //分页查询,每次要取的数量为0--XX。然后通过最后一个值之后再进行获取某也的值
- TopDocs tdsPaging = searcher.searchAfter(sds[starSds], query, pageSize);
- return tdsPaging.scoreDocs;
- }
- /**
- * 分页查询Lucene索引文件信息
- * @param br
- * @param searcher
- * @param query
- * @param hitsPerPage
- * @param raw
- * @param interactive
- * @throws IOException
- */
- public static void doPagingSearch(BufferedReader br, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive)
- throws IOException
- {
- TopDocs results = searcher.search(query, 10 * hitsPerPage);
- ScoreDoc[] hits = results.scoreDocs;
- int numTotalHits = results.totalHits;
- System.out.println("【查询到匹配项一共有" + numTotalHits + "条】");
- int start = 0;
- int end = Math.min(numTotalHits, hitsPerPage);
- while (true)
- {
- if (end > hits.length)
- {
- System.out.println("【结果只有 1 - " + hits.length + "条数据。共 " + numTotalHits + "条匹配的数据索引信息。");
- System.out.println("是否继续查询 (y/n) ?");
- String line = br.readLine();
- if (line.length() == 0 || line.charAt(0) == 'n')
- {
- break;
- }
- hits = searcher.search(query, numTotalHits).scoreDocs;
- }
- end = Math.min(hits.length, start + hitsPerPage);
- for (int i = start; i < end; i++)
- {
- if (raw)
- {
- System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
- continue;
- }
- Document doc = searcher.doc(hits[i].doc);
- String path = doc.get("path");
- if (path != null)
- {
- System.out.println((i + 1) + ". " + path);
- String title = doc.get("title");
- if (title != null)
- {
- System.out.println(" Title: " + doc.get("title"));
- }
- }
- else
- {
- System.out.println((i + 1) + ". " + "No path for this document");
- }
- }
- if (!interactive || end == 0)
- {
- break;
- }
- if (numTotalHits >= end)
- {
- boolean quit = false;
- while (true)
- {
- System.out.print("Press ");
- if (start - hitsPerPage >= 0)
- {
- System.out.print("(p)revious page, ");
- }
- if (start + hitsPerPage < numTotalHits)
- {
- System.out.print("(n)ext page, ");
- }
- System.out.println("(q)uit or enter number to jump to a page.");
- String line = br.readLine();
- if (line.length() == 0 || line.charAt(0) == 'q')
- {
- quit = true;
- break;
- }
- if (line.charAt(0) == 'p')
- {
- start = Math.max(0, start - hitsPerPage);
- break;
- }
- else if (line.charAt(0) == 'n')
- {
- if (start + hitsPerPage < numTotalHits)
- {
- start += hitsPerPage;
- }
- break;
- }
- else
- {
- int page = Integer.parseInt(line);
- if ((page - 1) * hitsPerPage < numTotalHits)
- {
- start = (page - 1) * hitsPerPage;
- break;
- }
- else
- {
- System.out.println("No such page");
- }
- }
- }
- if (quit)
- break;
- end = Math.min(numTotalHits, start + hitsPerPage);
- }
- }
- }
- public static void main(String[] args) throws InterruptedException
- {
- String fileFolderPath = "D:/indexDemo/document/";
- // addAllFilesToFields(fileFolderPath);
- // System.err.println("【睡眠5秒钟,进行第二次索引建立】");
- // Thread.sleep(5000);
- // Thread t = new Thread(new Runnable()
- // {
- // @Override
- // public void run()
- // {
- // String fileFolderPath = "D:/indexDemo/document/";
- // findAllFiles(fileFolderPath);
- // }
- // });
- // t.start();
- // addAllFilesToFields(fileFolderPath);
- // System.err.println("【索引文件建立完毕!】");
- searchByPaging("java", 13, 5);
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/0804201512152.html
来源: http://www.codesnippet.cn/detail/0804201512152.html