HBase中使用過濾器篩選數據

一、過濾器能幹什麼

  • HBase爲篩選數據提供了一組過濾器,通過過濾器可以在HBase中的數據的多個維度(行,列,數據版本)上進行對數據的篩選操作。
  • 通常來說,通過行鍵、列來篩選數據的應用場景較多。

二、常見的過濾器

  1. 基於行的過濾器

    • PrefixFilter: 行的前綴匹配
    • PageFilter: 基於行的分頁
  2. 基於列的過濾器

    • ColumnPrefixFilter: 列前綴匹配
    • FirstKeyOnlyFilter: 只返回每一行的第一列
  3. 基於單元值的過濾器

    • KeyOnlyFilter: 返回的數據不包括單元值,只包含行鍵與列
    • TimestampsFilter: 根據數據的時間戳版本進行過濾
  4. 基於列和單元值的過濾器

    • SingleColumnValueFilter: 對該列的單元值進行比較過濾
    • SingleColumnValueExcludeFilter: 對該列的單元值進行比較過濾
  5. 比較過濾器

    • 比較過濾器通常需要一個比較運算符以及一個比較器來實現過濾
    • RowFilter、 FamilyFilter、 QualifierFilter、 ValueFilter
  6. 常見過濾器總結

過濾器(Filter) 功能
RowFilter 篩選出匹配的所有的行
PrefixFilter 篩選出具有特定前綴的行鍵的數據
KeyOnlyFilter 只返回每行的行鍵,值全部爲空
ColumnPrefixFilter 按照列名的前綴來篩選單元格
ValueFilter 按照具體的值來篩選單元格的過濾器
TimestampsFilter 根據數據的時間戳版本進行過濾
FilterList 用於綜合使用多個過濾器

三、開發演示

/**
 * @title HBaseFilterTest
 * @date 2019/12/9 15:01
 * @description 嘗試使用過濾器
 */
public class HBaseFilterTest {

    @Test
    public void createTable(){
        HBaseUtil.createTable("FileTable", new String[]{"fileInfo", "saveInfo"});
    }

    @Test
    public void addFileDetails(){
        HBaseUtil.putRow("FileTable", "rowkey1", "fileInfo", "name", "file1.txt");
        HBaseUtil.putRow("FileTable", "rowkey1", "fileInfo", "type", "txt");
        HBaseUtil.putRow("FileTable", "rowkey1", "fileInfo", "size", "1024");
        HBaseUtil.putRow("FileTable", "rowkey1", "saveInfo", "creator", "suiwo1");
        HBaseUtil.putRow("FileTable", "rowkey2", "fileInfo", "name", "file2.jpg");
        HBaseUtil.putRow("FileTable", "rowkey2", "fileInfo", "type", "jpg");
        HBaseUtil.putRow("FileTable", "rowkey2", "fileInfo", "size", "2048");
        HBaseUtil.putRow("FileTable", "rowkey2", "saveInfo", "creator", "suiwo3");
        HBaseUtil.putRow("FileTable", "rowkey3", "fileInfo", "name", "file3.jpg");
        HBaseUtil.putRow("FileTable", "rowkey3", "fileInfo", "type", "jpg");
        HBaseUtil.putRow("FileTable", "rowkey3", "fileInfo", "size", "2048");
        HBaseUtil.putRow("FileTable", "rowkey3", "saveInfo", "creator", "suiwo3");
    }

    /**
     * rowkey = rowkey1
     * fileName = file1.txt
     */
    @Test
    public void rowFilterTest(){
        Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("rowkey1")));

        // MUST_PASS_ALL指必須通過所有的Filter
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filter));

        ResultScanner scanner = HBaseUtil.getScanner("FileTable","rowkey1","rowkey3", filterList);

        if(scanner != null){
            scanner.forEach(result -> {
                System.out.println("rowkey = " + Bytes.toString(result.getRow()));
                System.out.println("fileName = " + Bytes.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
            });
            scanner.close();
        }
    }

    /**
     * rowkey = rowkey2
     * fileName = file2.jpg
     */
    @Test
    public void prefixFilterTest(){
        Filter filter = new PrefixFilter(Bytes.toBytes("rowkey2"));
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filter));
        ResultScanner scanner = HBaseUtil.getScanner("FileTable","rowkey1","rowkey3", filterList);

        if(scanner != null){
            scanner.forEach(result -> {
                System.out.println("rowkey = " + Bytes.toString(result.getRow()));
                System.out.println("fileName = " + Bytes.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
            });
            scanner.close();
        }
    }

    /**
     * rowkey = rowkey1
     * fileName = ���
     * rowkey = rowkey2
     * fileName = ���
     */
    @Test
    public void keyOnlyFilterTest(){
        Filter filter = new KeyOnlyFilter(true);
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filter));
        ResultScanner scanner = HBaseUtil.getScanner("FileTable","rowkey1","rowkey3", filterList);

        if(scanner != null){
            scanner.forEach(result -> {
                System.out.println("rowkey = " + Bytes.toString(result.getRow()));
                System.out.println("fileName = " + Bytes.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
            });
            scanner.close();
        }
    }

    /**
     * rowkey = rowkey1
     * fileName = file1.txt
     * fileType = null
     * rowkey = rowkey2
     * fileName = file2.jpg
     * fileType = null
     */
    @Test
    public void columnPrefixFilterTest(){
        Filter filter = new ColumnPrefixFilter(Bytes.toBytes("nam"));// 前綴爲nam
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filter));
        ResultScanner scanner = HBaseUtil.getScanner("FileTable","rowkey1","rowkey3", filterList);

        if(scanner != null){
            scanner.forEach(result -> {
                System.out.println("rowkey = " + Bytes.toString(result.getRow()));
                System.out.println("fileName = " + Bytes.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
                System.out.println("fileType = " + Bytes.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("type"))));
            });
            scanner.close();
        }
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章