Lucene的Field說明
Lucene存儲對象是以document爲存儲單元,對象中相關的屬性值則存放到Field中;
lucene中所有Field都是IndexableField接口的實現
- org.apache.lucene.index.IndexableField
- Represents a single field for indexing. IndexWriter consumes Iterable<IndexableField> as a document.
IndexableField接口提供了一些方法,主要是對field相關屬性的獲取,包括
- /** 獲取field的名稱 */
- public String name();
- /** 獲取field的類型fieldType */
- public IndexableFieldType fieldType();
- /**
- *獲取當前field的權重(評分值) 只有Field有評分的概念,如果我們想對document進行評分值的設定 必須預先對document中對應的field值進行評分設設定*/ public float boost();
- /** 如果此Filed爲二進制類型的,返回相應的值*/
- public BytesRef binaryValue();
...
- /**
- * 創建一個用戶索引此Field的TokenStream
- */
- public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException;
所有的Field均是org.apache.lucene.document.Field的子類;
項目中我們常用的Field類型主要有IntField, LongField, FloatField, DoubleField, BinaryDocValuesField, NumericDocValuesField, SortedDocValuesField, StringField, TextField, StoredField.
lucene常見Field
IntField 主要對int類型的字段進行存儲,需要注意的是如果需要對InfField進行排序使用SortField.Type.INT來比較,如果進範圍查詢或過濾,需要採用NumericRangeQuery.newIntRange() LongField 主要處理Long類型的字段的存儲,排序使用SortField.Type.Long,如果進行範圍查詢或過濾利用NumericRangeQuery.newLongRange(),LongField常用來進行時間戳的排序,保存System.currentTimeMillions() FloatField 對Float類型的字段進行存儲,排序採用SortField.Type.Float,範圍查詢採用NumericRangeQuery.newFloatRange() BinaryDocVluesField 只存儲不共享值,如果需要共享值可以用SortedDocValuesField NumericDocValuesField 用於數值類型的Field的排序(預排序),需要在要排序的field後添加一個同名的NumericDocValuesField SortedDocValuesField 用於String類型的Field的排序,需要在StringField後添加同名的SortedDocValuesField StringField 用戶String類型的字段的存儲,StringField是隻索引不分詞 TextField 對String類型的字段進行存儲,TextField和StringField的不同是TextField既索引又分詞 StoredField 存儲Field的值,可以用IndexSearcher.doc和IndexReader.document來獲取此Field和存儲的值IntField使用
- package com.lucene.field;
- import java.io.IOException;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.IntField;
- import org.apache.lucene.document.NumericDocValuesField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.MatchAllDocsQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.junit.Test;
- import com.lucene.index.IndexUtil;
- import com.lucene.search.SearchUtil;
- public class IntFieldTest {
- /**
- * 保存一個intField
- */
- @Test
- public void testIndexIntFieldStored() {
- Document document = new Document();
- document.add(new IntField("intValue", 30, Field.Store.YES));
- //要排序必須加同名的field,且類型爲NumericDocValuesField
- document.add(new NumericDocValuesField("intValue", 30));
- Document document1 = new Document();
- document1.add(new IntField("intValue", 40, Field.Store.YES));
- document1.add(new NumericDocValuesField("intValue", 40));
- IndexWriter writer = null;
- try {
- writer = IndexUtil.getIndexWriter("intFieldPath", false);
- writer.addDocument(document);
- writer.addDocument(document1);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- writer.commit();
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 測試intField排序
- */
- @Test
- public void testIntFieldSort(){
- try {
- IndexSearcher searcher = SearchUtil.getIndexSearcher("intFieldPath", null);
- //構建排序字段
- SortField[] sortField = new SortField[1];
- sortField[0] = new SortField("intValue",SortField.Type.INT,true);
- Sort sort = new Sort(sortField);
- //查詢所有結果
- Query query = new MatchAllDocsQuery();
- TopFieldDocs docs = searcher.search(query, 2, sort);
- ScoreDoc[] scores = docs.scoreDocs;
- //遍歷結果
- for (ScoreDoc scoreDoc : scores) {
- System.out.println(searcher.doc(scoreDoc.doc));;
- }
- //searcher.search(query, results);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
測試排序結果如下
- Document<stored<intValue:40>>
- Document<stored<intValue:30>>
如果修改NumericDocValuesField對應的值,結果會隨着其值的大小而改變
LongField使用
- package com.lucene.field;
- import java.io.IOException;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.LongField;
- import org.apache.lucene.document.NumericDocValuesField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.MatchAllDocsQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.junit.Test;
- import com.lucene.index.IndexUtil;
- import com.lucene.search.SearchUtil;
- public class LongFieldTest {
- /**
- * 保存一個longField
- */
- @Test
- public void testIndexLongFieldStored() {
- Document document = new Document();
- document.add(new LongField("longValue", 50L, Field.Store.YES));
- document.add(new NumericDocValuesField("longValue", 50L));
- Document document1 = new Document();
- document1.add(new LongField("longValue", 80L, Field.Store.YES));
- document1.add(new NumericDocValuesField("longValue", 80L));
- IndexWriter writer = null;
- try {
- writer = IndexUtil.getIndexWriter("longFieldPath", false);
- writer.addDocument(document);
- writer.addDocument(document1);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- writer.commit();
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 測試longField排序
- */
- @Test
- public void testLongFieldSort(){
- try {
- IndexSearcher searcher = SearchUtil.getIndexSearcher("longFieldPath", null);
- //構建排序字段
- SortField[] sortField = new SortField[1];
- sortField[0] = new SortField("longValue",SortField.Type.LONG,true);
- Sort sort = new Sort(sortField);
- //查詢所有結果
- Query query = new MatchAllDocsQuery();
- TopFieldDocs docs = searcher.search(query, 2, sort);
- ScoreDoc[] scores = docs.scoreDocs;
- //遍歷結果
- for (ScoreDoc scoreDoc : scores) {
- //System.out.println(searcher.doc(scoreDoc.doc));;
- Document doc = searcher.doc(scoreDoc.doc);
- System.out.println(doc.getField("longValue").numericValue());
- }
- //searcher.search(query, results);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
運行結果如下:
- Document<stored<longValue:80>>
- Document<stored<longValue:50>>
FloatField使用
- package com.lucene.field;
- import java.io.IOException;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.FloatDocValuesField;
- import org.apache.lucene.document.FloatField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.MatchAllDocsQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.junit.Test;
- import com.lucene.index.IndexUtil;
- import com.lucene.search.SearchUtil;
- public class FloatFieldTest {
- /**
- * 保存一個floatField
- */
- @Test
- public void testIndexFloatFieldStored() {
- Document document = new Document();
- document.add(new FloatField("floatValue", 9.1f, Field.Store.YES));
- document.add(new FloatDocValuesField("floatValue", 82.0f));
- Document document1 = new Document();
- document1.add(new FloatField("floatValue", 80.1f, Field.Store.YES));
- document1.add(new FloatDocValuesField("floatValue", 80.1f));
- IndexWriter writer = null;
- try {
- writer = IndexUtil.getIndexWriter("floatFieldPath", false);
- writer.addDocument(document);
- writer.addDocument(document1);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- writer.commit();
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 測試intField排序
- */
- @Test
- public void testFloatFieldSort(){
- try {
- IndexSearcher searcher = SearchUtil.getIndexSearcher("floatFieldPath", null);
- //構建排序字段
- SortField[] sortField = new SortField[1];
- sortField[0] = new SortField("floatValue",SortField.Type.FLOAT,true);
- Sort sort = new Sort(sortField);
- //查詢所有結果
- Query query = new MatchAllDocsQuery();
- TopFieldDocs docs = searcher.search(query, 2, sort);
- ScoreDoc[] scores = docs.scoreDocs;
- //遍歷結果
- for (ScoreDoc scoreDoc : scores) {
- //System.out.println(searcher.doc(scoreDoc.doc));;
- Document doc = searcher.doc(scoreDoc.doc);
- System.out.println(doc.getField("floatValue").numericValue());
- }
- //searcher.search(query, results);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
結果如下:
- Document<stored<floatValue:9.1>>
- Document<stored<floatValue:80.1>>
BinaryDocValuesField使用
- package com.lucene.field;
- import java.io.IOException;
- import org.apache.lucene.document.BinaryDocValuesField;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.FloatDocValuesField;
- import org.apache.lucene.document.FloatField;
- import org.apache.lucene.document.IntField;
- import org.apache.lucene.document.LongField;
- import org.apache.lucene.document.NumericDocValuesField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.MatchAllDocsQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.apache.lucene.util.BytesRef;
- import org.junit.Test;
- import com.lucene.index.IndexUtil;
- import com.lucene.search.SearchUtil;
- public class BinaryDocValuesFieldTest {
- /**
- * 保存一個BinaryDocValuesField
- */
- @Test
- public void testIndexLongFieldStored() {
- Document document = new Document();
- document.add(new BinaryDocValuesField("binaryValue",new BytesRef("1234".getBytes())));
- Document document1 = new Document();
- document1.add(new BinaryDocValuesField("binaryValue",new BytesRef("2345".getBytes())));
- IndexWriter writer = null;
- try {
- writer = IndexUtil.getIndexWriter("binaryValueFieldPath", false);
- writer.addDocument(document);
- writer.addDocument(document1);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- writer.commit();
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 測試BinaryDocValuesField排序
- */
- @Test
- public void testBinaryDocValuesFieldSort(){
- try {
- IndexSearcher searcher = SearchUtil.getIndexSearcher("binaryValueFieldPath", null);
- //構建排序字段
- SortField[] sortField = new SortField[1];
- sortField[0] = new SortField("binaryValue",SortField.Type.STRING_VAL,true);
- Sort sort = new Sort(sortField);
- //查詢所有結果
- Query query = new MatchAllDocsQuery();
- TopFieldDocs docs = searcher.search(query, 2, sort);
- ScoreDoc[] scores = docs.scoreDocs;
- //遍歷結果
- for (ScoreDoc scoreDoc : scores) {
- //System.out.println(searcher.doc(scoreDoc.doc));;
- Document doc = searcher.doc(scoreDoc.doc);
- System.out.println(doc);
- //System.out.println(doc.getField("binaryValue").numericValue());
- }
- //searcher.search(query, results);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
運行結果:
- Document<>
- Document<>
爲什麼這樣呢,這是跟BinaryDocValuesField的特性決定的,只索引不存值!
StringField使用
- package com.lucene.field;
- import java.io.IOException;
- import org.apache.lucene.document.BinaryDocValuesField;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.FloatDocValuesField;
- import org.apache.lucene.document.FloatField;
- import org.apache.lucene.document.IntField;
- import org.apache.lucene.document.LongField;
- import org.apache.lucene.document.NumericDocValuesField;
- import org.apache.lucene.document.SortedDocValuesField;
- import org.apache.lucene.document.StringField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.MatchAllDocsQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.apache.lucene.util.BytesRef;
- import org.junit.Test;
- import com.lucene.index.IndexUtil;
- import com.lucene.search.SearchUtil;
- public class StringFieldTest {
- /**
- * 保存一個StringField
- */
- @Test
- public void testIndexLongFieldStored() {
- Document document = new Document();
- document.add(new StringField("stringValue","12445", Field.Store.YES));
- document.add(new SortedDocValuesField("stringValue", new BytesRef("12445".getBytes())));
- Document document1 = new Document();
- document1.add(new StringField("stringValue","23456", Field.Store.YES));
- document1.add(new SortedDocValuesField("stringValue", new BytesRef("23456".getBytes())));
- IndexWriter writer = null;
- try {
- writer = IndexUtil.getIndexWriter("stringFieldPath", false);
- writer.addDocument(document);
- writer.addDocument(document1);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- writer.commit();
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 測試StringField排序
- */
- @Test
- public void testStringFieldSort(){
- try {
- IndexSearcher searcher = SearchUtil.getIndexSearcher("stringFieldPath", null);
- //構建排序字段
- SortField[] sortField = new SortField[1];
- sortField[0] = new SortField("stringVal",SortField.Type.STRING,true);
- Sort sort = new Sort(sortField);
- //查詢所有結果
- Query query = new MatchAllDocsQuery();
- TopFieldDocs docs = searcher.search(query, 2, sort);
- ScoreDoc[] scores = docs.scoreDocs;
- //遍歷結果
- for (ScoreDoc scoreDoc : scores) {
- //System.out.println(searcher.doc(scoreDoc.doc));;
- Document doc = searcher.doc(scoreDoc.doc);
- System.out.println(doc);
- //System.out.println(doc.getField("binaryValue").numericValue());
- }
- //searcher.search(query, results);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
運行結果如下:
- Document<stored,indexed,tokenized,omitNorms,indexOptions=DOCS<stringValue:12445>>
- Document<stored,indexed,tokenized,omitNorms,indexOptions=DOCS<stringValue:23456>>
TextField使用
- package com.lucene.field;
- import java.io.IOException;
- import org.apache.lucene.document.BinaryDocValuesField;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.FloatDocValuesField;
- import org.apache.lucene.document.FloatField;
- import org.apache.lucene.document.IntField;
- import org.apache.lucene.document.LongField;
- import org.apache.lucene.document.NumericDocValuesField;
- import org.apache.lucene.document.SortedDocValuesField;
- import org.apache.lucene.document.StringField;
- import org.apache.lucene.document.TextField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.MatchAllDocsQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.apache.lucene.util.BytesRef;
- import org.junit.Test;
- import com.lucene.index.IndexUtil;
- import com.lucene.search.SearchUtil;
- public class TextFieldTest {
- /**
- * 保存一個StringField
- */
- @Test
- public void testIndexLongFieldStored() {
- Document document = new Document();
- document.add(new TextField("textValue","12345", Field.Store.YES));
- document.add(new SortedDocValuesField("textValue", new BytesRef("12345".getBytes())));
- Document document1 = new Document();
- document1.add(new TextField("textValue","23456", Field.Store.YES));
- document1.add(new SortedDocValuesField("textValue", new BytesRef("23456".getBytes())));
- IndexWriter writer = null;
- try {
- writer = IndexUtil.getIndexWriter("textFieldPath", false);
- writer.addDocument(document);
- writer.addDocument(document1);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- writer.commit();
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 測試StringField排序
- */
- @Test
- public void testStringFieldSort(){
- try {
- IndexSearcher searcher = SearchUtil.getIndexSearcher("textFieldPath", null);
- //構建排序字段
- SortField[] sortField = new SortField[1];
- sortField[0] = new SortField("textValue",SortField.Type.STRING,true);
- Sort sort = new Sort(sortField);
- //查詢所有結果
- Query query = new MatchAllDocsQuery();
- TopFieldDocs docs = searcher.search(query, 2, sort);
- ScoreDoc[] scores = docs.scoreDocs;
- //遍歷結果
- for (ScoreDoc scoreDoc : scores) {
- //System.out.println(searcher.doc(scoreDoc.doc));;
- Document doc = searcher.doc(scoreDoc.doc);
- System.out.println(doc);
- //System.out.println(doc.getField("binaryValue").numericValue());
- }
- //searcher.search(query, results);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
運行結果如下:
- Document<stored,indexed,tokenized<textValue:23456>>
- Document<stored,indexed,tokenized<textValue:12345>>