lucene的基本使用

一、創建項目(Maven或java項目)

二、加入jar包(maven的pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>name.yaohuan</groupId>
    <artifactId>lucene</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.3</version>
        </dependency>
        <!-- MySql -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.32</version>
        </dependency>
    </dependencies>
</project>


如果是非maven項目,則加入以下jar包


三、執行sql腳本,去創建一個數據庫,以便於下面的操作,比如:下面這個圖


四、pojo

package com.tf.pojo;
/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 備註:
* 注意:
*/
public class Book {
	private Integer id;
	private String name;
	private Float price;
	private String pic;
	private String description;
	public Integer getId() {
		return id;
	}
	public void setId(Integer id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public Float getPrice() {
		return price;
	}
	public void setPrice(Float price) {
		this.price = price;
	}
	public String getPic() {
		return pic;
	}
	public void setPic(String pic) {
		this.pic = pic;
	}
	public String getDescription() {
		return description;
	}
	public void setDescription(String description) {
		this.description = description;
	}
	public Book() {
		super();
	}
	@Override
	public String toString() {
		return "Book [id=" + id + ", name=" + name + ", price=" + price + ", pic=" + pic + ", description="
				+ description + "]";
	}
	
}



五、dao
package com.tf.dao;

import java.util.List;

import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 備註:
* 注意:
*/
public interface BookDao {
	public List<Book> queryBooks();
}



六、dao.impl
package com.tf.dao.impl;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;

import com.tf.dao.BookDao;
import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 備註:
* 注意:
*/
public class BookDaoImpl implements BookDao{

	@Override
	public List<Book> queryBooks() {
		// TODO Auto-generated method stub
		Connection connection = null;
		PreparedStatement prepareStatement = null;
		ResultSet resultSet = null;
		List<Book> bookList = new ArrayList<Book>();
		try {
//		加載驅動
			Class.forName("com.mysql.jdbc.Driver");
//		連接數據庫
			connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/lucene","root","123");
//		創建preparedStatement
			String sql = "select * from book";
			prepareStatement = connection.prepareStatement(sql);
//		執行查詢
			resultSet = prepareStatement.executeQuery();
//		解析結果
			while (resultSet.next()) {
				Book book = new Book();
				book.setId(resultSet.getInt("id"));
				book.setName(resultSet.getString("name"));
				book.setPic(resultSet.getString("pic"));
				book.setPrice(resultSet.getFloat("price"));
				book.setDescription(resultSet.getString("description"));
				bookList.add(book);
			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				if (connection != null) {
					connection.close();
				}
				if (resultSet != null) {
					resultSet.close();
				}
				if (prepareStatement != null) {
					prepareStatement.close();
				}
			} catch (Exception e2) {
				// TODO: handle exception
				e2.printStackTrace();
			}
		}
		return bookList;
	}

}




七、建立索引
創建測試類CreateIndex
1、查詢數據源,創建document
2、創建分詞器
分詞:將field域中的內容一個個的分詞
過濾:將分好的詞進行過濾,比如去掉標點符號、大寫轉小寫、詞的型還原(複數轉單數、過去式轉成現在式)、停用詞過濾
停用詞:單獨應用沒有特殊意義的詞。比如的、啊、等,英文中的this is a the等等
package com.tf.test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.tf.dao.BookDao;
import com.tf.dao.impl.BookDaoImpl;
import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 備註:
* 注意:
*/
public class CreateIndex {
	/**
	 * 創建索引
	 * @throws IOException
	 */
	@Test
	public void createIndex() throws IOException{
//		採集數據
		BookDao bookDao = new BookDaoImpl();
		List<Book> bookList = bookDao.queryBooks();
		for (Book book : bookList) {
			System.out.println(book.getPic());
		}
//		創建索引庫
		List<Document> doclist = new ArrayList<Document>();
		
		for (Book book : bookList) {
			Document doc = new Document();
//			商品id:存儲 、不分詞、索引
			Field id = new StringField("id",book.getId().toString(),Store.YES);
//			商品名稱:存儲、分詞、索引
			Field name = new TextField("name", book.getName(), Store.YES);
//			在增加的時候,給id爲4的商品加權,使其排名靠前
//			在查詢的時候,直接在query後面加.setBoost(100f);
			if (book.getId() == 4) {
				name.setBoost(100f);
			}
//			商品價格:存儲、不分詞、索引
			Field price = new FloatField("price", book.getPrice(), Store.YES);
//			商品圖片:存儲、不分詞、不索引
			Field pic = new StoredField("pic", book.getPic());
//			商品描述:不存儲、分詞、索引
			Field description = new TextField("description", book.getDescription(),Store.NO);
			 
			doc.add(id);
			doc.add(name);
			doc.add(price);
			doc.add(pic);
			doc.add(description);
			
			doclist.add(doc);
		}
//		創建分詞器
//		Analyzer analyzer = new StandardAnalyzer();
//		中文分詞器
		Analyzer analyzer = new IKAnalyzer();
//		創建索引庫
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		IndexWriter writer = new IndexWriter(directory, config);
//		寫入索引庫
		for (Document document : doclist) {
			writer.addDocument(document);
		}
		writer.close();
	}
	/**
	 * 查詢索引
	 * @throws ParseException 
	 * @throws IOException 
	 */
	@Test
	public void searchIndex() throws ParseException, IOException{
//		參數1:指定查詢的Field
//		參數2:指定分詞器
		QueryParser parser = new QueryParser("description", new StandardAnalyzer());
		Query query = parser.parse("description:spring AND mybatis");
//		指定索引庫的位置
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		
		DirectoryReader reader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		
		TopDocs topDocs = searcher.search(query, 10);
		int count = topDocs.totalHits;
		System.out.println("查詢到:"+count+"條記錄");
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		List<Book> list = new ArrayList<>();
		for (ScoreDoc scoreDoc : scoreDocs) {
			Book book = new Book();
//			索引庫中的id
			int docId = scoreDoc.doc;
			Document doc = searcher.doc(docId);
			book.setId(Integer.valueOf(doc.get("id")));
			book.setName(doc.get("name"));
			book.setPrice(Float.valueOf(doc.get("price")));
			book.setPic(doc.get("pic"));
			book.setDescription(doc.get("description"));
			list.add(book);
		}
		for(Book book: list){
			System.out.println(book);
		}
	}
	/**
	 * 刪除索引
	 * @throws IOException 
	 */
	@Test
	public void deleteIndex() throws IOException{
//		創建分詞器
		Analyzer analyzer = new StandardAnalyzer();
//		創建索引庫
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		IndexWriter writer = new IndexWriter(directory, config);
		Term term = new Term("name","solr");
		writer.deleteDocuments(term);
		writer.close();
	}
	/**
	 * 刪除全部
	 * @throws IOException 
	 */
	@Test
	public void deleteAll() throws IOException{
//		創建分詞器
		Analyzer analyzer = new StandardAnalyzer();
//		創建索引庫
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File file = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(file);
		IndexWriter writer = new IndexWriter(directory,config);
		writer.deleteAll();
		writer.close();
	}
	/**
	 * 更新索引
	 * @throws IOException 
	 */
	@Test
	public  void  updateIndex() throws IOException {
//		創建分詞器
		Analyzer analyzer = new StandardAnalyzer();

//		創建索引庫
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File indecFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indecFile);
		IndexWriter writer = new IndexWriter(directory, config);
		
//		要更新的數據
		Document doc = new Document();
		Field name = new TextField("name", "JaveEE 實戰",Store.YES); 
		doc.add(name);
		
//		原數據
		Term term = new Term("name", "solr");
		writer.updateDocument(term, doc);
		
		writer.close();
	}

}





八、數據查詢
package com.tf.test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月14日
* @version 1.0
* 備註:
* 注意:
*/
public class IndexSearch {
	
	private void doQuery(Query query) throws IOException {
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		
		IndexReader reader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		
		TopDocs topDocs = searcher.search(query, 10);
		int count = topDocs.totalHits;
		System.out.println("查詢到:"+count+"條記錄");
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		List<Book> list = new ArrayList<>();
		for (ScoreDoc scoreDoc : scoreDocs) {
			Book book = new Book();
//			索引庫中的id
			int docId = scoreDoc.doc;
			Document doc = searcher.doc(docId);
			book.setId(Integer.valueOf(doc.get("id")));
			book.setName(doc.get("name"));
			book.setPrice(Float.valueOf(doc.get("price")));
			book.setPic(doc.get("pic"));
			book.setDescription(doc.get("description"));
			list.add(book);
		}
		for(Book book: list){
			System.out.println(book);
		}
	}
	
	/**
	 * TermQuery查詢
	 * @throws IOException
	 */
	@Test
	public void termQuerySearch() throws IOException {
		Query query = new TermQuery(new Term("name","spring"));
		doQuery(query);
	}
	
	/**
	 * NumericRangeQuery查詢
	 * @throws IOException
	 */
	@Test
	public void numericRangeQuerySearch() throws IOException{
		Query query = NumericRangeQuery.newFloatRange("price",50F,100F, false, true);
		doQuery(query);
	}
	/**
	 * 組合查詢
	 * @throws IOException 
	 */
	@Test
	public void booleanQuerySearch() throws IOException {
		BooleanQuery query = new BooleanQuery();
		Query query1 = new TermQuery(new Term("name","spring"));
		Query query2 = NumericRangeQuery.newFloatRange("price",50F,100F, false, false);
		/*
		 * MUST:與
		 * MUST_NOT:必須不包含
		 * SHOULD:或
		 */
		query.add(query1,Occur.MUST);
		query.add(query2,Occur.MUST);
		doQuery(query);
	}
	/**
	 * QueryParser查詢
	 * @throws Exception
	 */
	@Test
	public void queryParseSearch() throws Exception{
		QueryParser parser = new QueryParser("description",new StandardAnalyzer());
		Query query = parser.parse("description:spring AND mybatis");
		doQuery(query);
	}
	/**
	 * MultiFieldQueryParser
	 * @throws ParseException
	 * @throws IOException
	 */
	@Test
	public void multiFileQuerySearch() throws ParseException, IOException {
		String[] fields = {"name","description"};
		MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
//		+:與   空格:或  -:非
		Query query = parser.parse("+name:spring description:spring");
		doQuery(query);
	}
	/**
	 * 相關度排序
	 * @throws IOException 
	 */
	@Test
	public void Search() throws IOException {
		Query query = new TermQuery(new Term("name","solr"));
		doQuery(query);
	}
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章