HBase(二):Shell操作、API、MapReduce

一、HBase Shell操作

  1. 創建表
hbase(main):002:0> create 'student','info'
  1. 插入數據到表
hbase(main):003:0> put 'student','1001','info:sex','male'
hbase(main):004:0> put 'student','1001','info:age','18'
hbase(main):005:0> put 'student','1002','info:age','20'
hbase(main):006:0> put 'student','1002','info:sex','female'
hbase(main):007:0> put 'student','1002','info:name','Rose'
  1. 掃描查看錶數據
hbase(main):008:0> scan 'student'
ROW                    COLUMN+CELL                                                   
 1001                  column=info:age, timestamp=1592878691918, value=18            
 1001                  column=info:sex, timestamp=1592878652665, value=male          
 1002                  column=info:age, timestamp=1592878712127, value=20            
 1002                  column=info:name, timestamp=1592878742670, value=Rose         
 1002                  column=info:sex, timestamp=1592878727779, value=female 

hbase(main):010:0> scan 'student',{STARTROW => '1001', STOPROW  => '1002'}
ROW                      COLUMN+CELL  
 1001                    column=info:age, timestamp=1592878691918, value=18

hbase(main):013:0> scan 'student',{STARTROW => '1001'}
ROW                      COLUMN+CELL
 1001                    column=info:age, timestamp=1592878691918, value=18
 1001                    column=info:sex, timestamp=1592878652665, value=male
 1002                    column=info:age, timestamp=1592878712127, value=20
 1002                    column=info:name, timestamp=1592878742670, value=Rose
 1002                    column=info:sex, timestamp=1592878727779, value=female

STARTROWSTOPROW均需要大寫,範圍是前開後閉

  1. 查看錶結構
hbase(main):014:0> describe 'student'
Table student is ENABLED
student
COLUMN FAMILIES DESCRIPTION
{NAME => 'info', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', 
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER',
COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536',
REPLICATION_SCOPE => '0'} 
  1. 更新指定字段的數據
hbase(main):015:0> put 'student','1001','info:name','Nick'
hbase(main):016:0> put 'student','1001','info:age','100'

hbase(main):017:0> scan 'student',{STARTROW => '1001', STOPROW  => '1001'}
ROW                      COLUMN+CELL
 1001                    column=info:age, timestamp=1592879968475, value=100
 1001                    column=info:name, timestamp=1592879961347, value=Nick
 1001                    column=info:sex, timestamp=1592878652665, value=male
  1. 查看“指定行”或“指定列族:列”的數據
hbase(main):019:0> get 'student','1001'
COLUMN                   CELL
 info:age                timestamp=1592879968475, value=100
 info:name               timestamp=1592879961347, value=Nick
 info:sex                timestamp=1592878652665, value=male
 
hbase(main):020:0> get 'student','1001','info:name'
COLUMN                   CELL
 info:name               timestamp=1592879961347, value=Nick  
  1. 統計表數據行數
hbase(main):021:0> count 'student'
=> 2
  1. 刪除數據
    刪除某rowkey的全部數據:hbase(main):016:0> deleteall 'student','1001'
    刪除某rowkey的某一列數據:hbase(main):017:0> delete 'student','1002','info:sex'

  2. 清空表數據

hbase(main):018:0> truncate 'student'

提示:清空表的操作順序爲先disable,然後再truncate

  1. 刪除表
    首先需要先讓該表爲disable狀態:hbase(main):019:0> disable 'student'
    然後才能drop這個表:hbase(main):020:0> drop 'student'

提示:如果直接drop表,會報錯:ERROR: Table student is enabled. Disable it first

  1. 變更表信息
    info列族中的數據存放3個版本:
hbase(main):022:0> alter 'student',{NAME=>'info',VERSIONS=>3}

hbase(main):023:0> put 'student','1001','info:age','50'
hbase(main):024:0> put 'student','1001','info:age','20'

hbase(main):025:0> get 'student','1001',{COLUMN=>'info:age',VERSIONS=>3}
COLUMN                   CELL
 info:age                timestamp=1592881081492, value=20
 info:age                timestamp=1592880933902, value=50
 info:age                timestamp=1592879968475, value=100  

hbase(main):026:0> get 'student','1001','info:age'
COLUMN                   CELL
 info:age                timestamp=1592881081492, value=20

二、HBase API操作

HBaseAdmin用於操作表級別、Table用於操作表數據。

①獲取ConfigurationHBaseAdminConnection對象

public class HBaseTest {

    public static Configuration conf;
	//用於表級別的操作
    public static HBaseAdmin admin;

    public static Connection connection;

    static {
        try {
            conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.quorum", "hadoop100,hadoop101,hadoop102");
            conf.set("hbase.zookeeper.property.clientPort", "2181");
            connection = ConnectionFactory.createConnection(conf);
            admin = (HBaseAdmin) connection.getAdmin();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

②判斷表是否存在

public static boolean isTableExist(String tableName) throws IOException {
    return admin.tableExists(tableName);
}

③創建表

public static void createTable(String tableName, String... columnFamily) throws IOException {
    if (isTableExist(tableName)) {
        System.out.println("表 " + tableName + " 已存在");
    } else {
        //指定表名
        HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
        for (String cf : columnFamily) {
            descriptor.addFamily(new HColumnDescriptor(cf));
        }
        admin.createTable(descriptor);
        System.out.println("表 " + tableName + " 創建成功");
    }
}

④刪除表

public static void deleteTable(String tableName) throws IOException {
    if (isTableExist(tableName)) {
        admin.disableTable(tableName);
        admin.deleteTable(tableName);
        System.out.println("表 " + tableName + " 刪除成功");
    } else {
        System.out.println("表 " + tableName + " 不存在");
    }
}

⑤創建命名空間

/**
* 指定命名空間創建表:  createTable("namespace:stu","info");
*/
public static void createNameSpace(String nameSpace) throws IOException {
   NamespaceDescriptor descriptor = NamespaceDescriptor.create(nameSpace).build();
   admin.createNamespace(descriptor);
}

⑥插入數據

public static void addRowData(String tableName, String rowKey, String columnFamily,
                              String column, String value) throws IOException {
    //創建table
    Table table = connection.getTable(TableName.valueOf(tableName));
    //向表中插入數據
    Put put = new Put(Bytes.toBytes(rowKey));
    //向put對象中組裝數據
    put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));

    table.put(put);
    table.close();
    System.out.println("表 " + tableName + " 插入數據成功");
}

⑦刪除多行數據

public static void deleteMultiRows(String tableName, String... rowKeys) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    List<Delete> deleteList = new ArrayList<>();
    for (String rowKey : rowKeys) {
        deleteList.add(new Delete(Bytes.toBytes(rowKey)));
    }
    table.delete(deleteList);
    table.close();
    System.out.println("刪除多行數據成功");
}

⑧獲取所有數據

public static void getAllRows(String tableName) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    //得到用於掃描region的對象
    Scan scan = new Scan();
    //通過table得到ResultScanner實現類對象
    ResultScanner resultScanner = table.getScanner(scan);
    for (Result result : resultScanner) {
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            System.out.println("Row Key:" + Bytes.toString(CellUtil.cloneRow(cell)));
            System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
    }
}

⑨獲取某一行數據

public static void getRow(String tableName, String rowKey) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Get get = new Get(Bytes.toBytes(rowKey));
    Result result = table.get(get);
    Cell[] cells = result.rawCells();
    for (Cell cell : cells) {
        System.out.println("Row Key:" + Bytes.toString(CellUtil.cloneRow(cell)));
        System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell)));
    }
}

⑩獲取某一行指定“列族:列”的數據

public static void getRowQualifier(String tableName, String rowKey, String family,
                                   String qualifier) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Get get = new Get(Bytes.toBytes(rowKey));
    get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    Result result = table.get(get);
    Cell[] cells = result.rawCells();
    for (Cell cell : cells) {
        System.out.println("Row Key:" + Bytes.toString(CellUtil.cloneRow(cell)));
        System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell)));
    }
}

三、MapReduce

通過HBase的相關JavaAPI,我們可以實現伴隨HBase操作的MapReduce過程,比如使用MapReduce將數據從本地文件系統導入到HBase的表中,比如我們從HBase中讀取一些原始數據後使MapReduce做數據分析。

3.1 官方HBase-MapReduce

1.環境變量的位置

export HBASE_HOME=/opt/module/hbase-1.3.1
export HADOOP_HOME=/opt/module/hadoop-2.7.2

2.配置hadoop-env.sh

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase-1.3.1/lib/*

3.案例一:統計表有多少行數據

[root@hadoop100 hbase-1.3.1]# /opt/module/hadoop-2.7.2/bin/yarn jar \
lib/hbase-server-1.3.1.jar rowcounter student

4.案例二:使用MapReduce將本地數據導入到HBase

  1. 在本地創建一個tsv格式的文件:fruit.tsv
1001	Apple	Red
1002	Pear	Yellow
1003	Pineapple	Yellow
  1. 創建HBase
hbase(main):001:0> create 'fruit','info'
  1. 上傳fruit.tsv
[root@hadoop100 datas]#/opt/module/hadoop-2.7.2/bin/hdfs dfs -put fruit.tsv \
  1. 執行MapReduce導入數據到HBasefruit表中
[root@hadoop100 hbase-1.3.1]#/opt/module/hadoop-2.7.2/bin/yarn jar \
lib/hbase-server-1.3.1.jar importtsv \
-Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
hdfs://hadoop100:9000/input_fruit

3.2 自定義MapReduce將本地數據導入到HBase

mapper類:

public class FruitMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

  @Override
  protected void map(LongWritable key, Text value, Context context) throws IOException,
          InterruptedException {
      //1001	Apple	Red
      String line = value.toString();
      if (StringUtils.isNotEmpty(line)) {
          String[] fields = line.split("\t");
          String rowKey = fields[0];
          String name = fields[1];
          String color = fields[2];
		  //ImmutableBytesWritable是RowKey的類型
          ImmutableBytesWritable k = new ImmutableBytesWritable(Bytes.toBytes(rowKey));
          Put put = new Put(Bytes.toBytes(rowKey));
          put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
          put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(color));
          context.write(k, put);
      }
  }
}

Reduce類:

public class FruitReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context)
            throws IOException, InterruptedException {
        for (Put value : values) {
            context.write(NullWritable.get(), value);
        }
    }
}

驅動Driver:

public class FruitDriver extends Configured implements Tool {

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = this.getConf();
        //1.獲取Job對象
        Job job = Job.getInstance(conf);
        //2.設置驅動類路徑
        job.setJarByClass(FruitDriver.class);
        //3.設置Mapper&Mapper輸出的KV類型
        job.setMapperClass(FruitMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        //4.設置Reducer類
        TableMapReduceUtil.initTableReducerJob("fruit", FruitReducer.class, job);
        //5.設置輸入輸出參數
        Path inPath = new Path("hdfs://hadoop100:9000/input_fruit/fruit.tsv");
        FileInputFormat.addInputPath(job, inPath);
        //6.提交任務
        boolean isSuccess = job.waitForCompletion(true);
        return isSuccess ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
        Configuration configuration = HBaseConfiguration.create();
        int status = ToolRunner.run(configuration, new FruitDriver(), args);
        System.exit(status);
    }
}

打包運行:

[root@hadoop100 hbase-1.3.1]# /opt/module/hadoop-2.7.2/bin/yarn jar hbase-1.0-SNAPSHOT.jar /
com.hucheng.mr.dfs2hbase.FruitDriver

3.3 從HBase導入數據到HBase

mapper類:

public class FruitMapper extends TableMapper<ImmutableBytesWritable, Put> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context)
            throws IOException, InterruptedException {
        Put put = new Put(key.get());
        Cell[] cells = value.rawCells();
        for (Cell cell : cells) {
            if ("info".equals(Bytes.toString(CellUtil.cloneFamily(cell))) &&
                    "name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                put.add(cell);
            }
        }
        context.write(key, put);
    }
}

Reduce類:

public class FruitReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) 
            throws IOException, InterruptedException {
        for (Put value : values) {
            context.write(NullWritable.get(), value);
        }
    }
}

驅動Driver:

public class FruitDriver extends Configured implements Tool {

   @Override
   public int run(String[] strings) throws Exception {
       Configuration conf = this.getConf();
       //1.獲取Job對象
       Job job = Job.getInstance(conf);
       //2.設置驅動類路徑
       job.setJarByClass(FruitDriver.class);
       //3.配置Job
       Scan scan = new Scan();
       scan.setCacheBlocks(false);
       scan.setCaching(500);
       //4.設置Mapper
       TableMapReduceUtil.initTableMapperJob(
               "fruit",//數據源表名
               scan,//scan掃描控制器
               FruitMapper.class,//Mapper類
               ImmutableBytesWritable.class,//Mapper輸出K
               Put.class,//Mapper輸出V
               job
       );
       //5.設置Reducer類
       TableMapReduceUtil.initTableReducerJob("fruit2", FruitReducer.class, job);
       //6.提交任務
       boolean isSuccess = job.waitForCompletion(true);
       return isSuccess ? 0 : 1;
   }

   public static void main(String[] args) throws Exception {
       Configuration configuration = HBaseConfiguration.create();
       int status = ToolRunner.run(configuration, new com.hucheng.mr.dfs2hbase.FruitDriver(),
        args);
       System.exit(status);
   }
}

打包運行:

[root@hadoop100 hbase-1.3.1]# /opt/module/hadoop-2.7.2/bin/yarn jar hbase-1.0-SNAPSHOT.jar /
com.hucheng.mr.hbase2hbase.FruitDriver
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章