對lucene的學習之建立索引
需要lucene3.03.zip
先創建一個接口:
- package cn.net.persist.dao;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.index.IndexWriter.MaxFieldLength;
- import org.apache.lucene.util.Version;
- public interface Constants {
- public String FILE_DIR = "D:\\Workspaces\\MyEclipse 8.5\\luceneDemo\\dataSource";//文件存放的目錄
- public String INDEX_DIR = "D:\\Workspaces\\MyEclipse 8.5\\luceneDemo\\index"; //索引存放的目錄
- static Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_30);// 中文采用該分詞器
- static Analyzer enAnalyzer = new StandardAnalyzer(Version.LUCENE_30);// 英文采用該分詞器
- static Version version = Version.LUCENE_30;//Lucene版本
- static MaxFieldLength maxLength = MaxFieldLength.LIMITED;//Field的長度限制
- }
然後寫一個工具類:
- package cn.net.persist.utils;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.InputStreamReader;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Field.Index;
- import org.apache.lucene.document.Field.Store;
- public class File2DocumentUtils {
- /**
- * @param path
- */
- public static Document file2Document(String path) {
- File file = new File(path);
- Document doc = new Document();
- doc.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
- doc.add(new Field("contents", readFileContent(file), Store.YES, Index.ANALYZED));
- doc.add(new Field("size", file.length() + "", Store.YES, Index.NOT_ANALYZED));
- doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
- return doc;
- }
- /**
- * @param file
- */
- public static Document file2Document(File file) throws Exception {
- Document doc = new Document();
- doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.ANALYZED));
- // doc.add(new Field("contents", new FileReader(f)));// that is not stored.
- doc.add(new Field("contents", readFileContent(file), Store.YES, Index.ANALYZED));
- doc.add(new Field("size", file.length() + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
- return doc;
- }
- /**
- *
- * 獲取 name 屬性的值的兩種方法:
- *
- * <pre>
- * 1,Field f = doc.getField("name");
- * f.stringValue();
- * 2,doc.get("name");
- * </pre>
- *
- * @param doc
- */
- public static void printDocumentInfo(Document doc) {
- System.out.println("filename = " + doc.get("filename"));
- System.out.println("contents = " + doc.get("contents"));
- System.out.println("size = " + doc.getField("size"));
- System.out.println("path = " + doc.getField("path"));
- }
- /**
- *
- * 讀取文件內容
- * @param file
- */
- public static String readFileContent(File file) {
- try {
- InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), "gbk");
- BufferedReader reader = new BufferedReader(inputStreamReader);
- StringBuffer content = new StringBuffer();
- for (String line = null; (line = reader.readLine()) != null;) {
- content.append(line).append("\n");
- }
- return content.toString();
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
最後寫測試方法:
- package cn.net.persist.dir;
- import java.io.File;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.RAMDirectory;
- import org.junit.Test;
- import cn.net.persist.dao.Constants;
- import cn.net.persist.utils.File2DocumentUtils;
- public class Directorys implements Constants{
- private static String filePath = "D:\\Workspaces\\MyEclipse 8.5\\luceneDemo\\dataSource\\xiaoli.txt";
- /**
- * 把文件寫入索引
- */
- @Test
- public void test1() throws Exception {
- Directory fsDir = FSDirectory.open(new File(INDEX_DIR));
- Document doc = File2DocumentUtils.file2Document(filePath);
- // 寫入索引
- IndexWriter indexWriter = new IndexWriter(fsDir, analyzer, maxLength);
- indexWriter.addDocument(doc);
- // 優化索引
- indexWriter.optimize();
- indexWriter.close();
- // 打印相關信息
- File2DocumentUtils.printDocumentInfo(doc);
- }
- /**
- * 把磁盤索引加載到內存當中讀寫
- */
- @Test
- public void test2() throws Exception{
- Directory fsDir =FSDirectory.open(new File(INDEX_DIR));
- //1,設置啓動時讀取
- Directory ramDir = new RAMDirectory(fsDir);
- //2,運行程序是操作ramDir
- IndexWriter ramIndexWriter = new IndexWriter(ramDir,analyzer,maxLength);
- //添加Document
- Document doc = File2DocumentUtils.file2Document(filePath);
- ramIndexWriter.addDocument(doc);
- ramIndexWriter.close();
- //4,把內存中的修改同步到磁盤文件
- IndexWriter fsIndexWriter = new IndexWriter(fsDir,analyzer,true,maxLength);
- fsIndexWriter.addIndexesNoOptimize(ramDir);
- fsIndexWriter.optimize();//優化
- fsIndexWriter.commit();//提交事務
- System.out.println("是否有刪除="+fsIndexWriter.hasDeletions());
- System.out.println("一共有="+fsIndexWriter.maxDoc());
- System.out.println("還剩="+fsIndexWriter.numDocs());
- fsIndexWriter.close();
- }
- }
測試成功,在控制檯輸出:
- filename = xiaoli.txt
- contents = 1 會員登陸沒有驗證碼問題:
- 因爲在提交的時候代碼沒有對驗證碼處理,所以,當沒有驗證碼的時候,也可以通過驗證,進而實現登陸
- 2 個人會員註冊問題:
- 註冊會失敗,因爲沒有驗證碼的原因,所以把驗證碼加上的時候,就可以實現會員的註冊
- 3 企業會員註冊問題
- 企業會員的註冊可以放在個人會員的時候,讓用戶選擇註冊時個人會員還是企業會員,這樣在action中稍加判斷就可以實現個人和企業的註冊,
- 4 當登陸後臺的時候,修改會員組的時候,會出現亂碼問題,處理中
- size = stored,indexed<size:437>
- path = stored,indexed<path:D:\Workspaces\MyEclipse 8.5\luceneDemo\dataSource\xiaoli.txt>
- ------------------------------------------------
- 是否有刪除=false
- 一共有=13
- 還剩=13
哇,寫完,收工,希望大家能學到東西!
(因附件大過4m,沒能上傳成功,需要的給我要)