上一篇介紹了HBase的基本概念,以及其在linux環境下的安裝和交互,本文將繼續介紹如何通過java和python來操作hbase。
在通過api操作hbase之前,首先要保證hadoop和hbase已經都啓動了。
Java操作HBase
創建一個maven項目,添加基本的依賴:
<properties>
<hadoop.version>2.10.0</hadoop.version>
<hbase.version>1.3.6</hbase.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
</dependencies>
通過java api創建表User,並列出所有的表,demo如下:
package cn.howe;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class HtableTest {
// 配置類
public static Configuration configuration ;
// 連接類
public static Connection connection ;
// 管理類
public static Admin admin;
public static void init() {
configuration = HBaseConfiguration.create();
configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase");
// //如果是獨立的zookeeper,設置zookeeper的地址,可以有多個,以逗號分隔
// configuration.set("hbase.zookeeper.quorum","dockerServer");
// //設置zookeeper的端口
// configuration.set("hbase.zookeeper.property.clientPort","2181");
try {
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin(); //這個admin是管理table時使用的,比如說創建表
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws Exception {
init(); // 初始化
createTable("User", new String[]{"info"});
listTable();
}
public static void listTable() throws Exception {
System.out.println("**********table:*******");
TableName[] tableNames = admin.listTableNames();
for (TableName tableName : tableNames) {
System.out.println(tableName);
}
}
public static void createTable(String tabName, String[] colFamily) throws IOException {
TableName tableName = TableName.valueOf(tabName);
if(admin.tableExists(tableName)) {
System.out.println("table exists: " + tabName);
} else {
HTableDescriptor desc = new HTableDescriptor(tableName);
// 創建列族
for (String family : colFamily) {
HColumnDescriptor fal = new HColumnDescriptor(family);
desc.addFamily(fal);
}
admin.createTable(desc);
System.out.println("Table create successful!");
}
}
}
可以通過命令行查看真的創建成功:
往表中添加數據:
public static void insertRow(String tabName, String rowKey, String colFalimy, String col, String value) {
try {
TableName tableName = TableName.valueOf(tabName);
Table table = connection.getTable(tableName);
Put put = new Put(rowKey.getBytes());
put.addColumn(colFalimy.getBytes(), col.getBytes(), value.getBytes());
table.put(put);
table.close();
System.out.println("新增記錄成功");
} catch (IOException e) {
e.printStackTrace();
}
}
在主函數中調用insertRow:
insertRow("User", "row1","info", "name", "xiaoming");
運行後,通過命令行查看錶數據:
通過代碼查看錶數據:
public static void getData(String tabName, String rowKey, String colFamily, String qualifier) {
try {
TableName tableName = TableName.valueOf(tabName);
Table table = connection.getTable(tableName);
Get get = new Get(rowKey.getBytes());
if (StringUtils.isNotEmpty(qualifier) && StringUtils.isNotEmpty(colFamily)) {
// 獲取列
get.addColumn(colFamily.getBytes(), qualifier.getBytes());
} else if (StringUtils.isNotEmpty(colFamily)) {
// 獲取列族
get.addFamily(colFamily.getBytes());
}
Result result = table.get(get);
if (!result.isEmpty()) {
showCell(result);
}
} catch (Exception e) {
e.printStackTrace();
}
}
private static void showCell(Result result) {
// 將get結果轉換成單元格數組
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.println("RowName:" + new String(CellUtil.cloneRow(cell)) + " ");
System.out.println("TimeStamp:" + cell.getTimestamp() + " ");
System.out.println("column Falimy:" + new String(CellUtil.cloneFamily(cell)) + " ");
System.out.println("column Name:" + new String(CellUtil.cloneQualifier(cell)) + " ");
System.out.println("value:" + new String(CellUtil.cloneValue(cell)) + " ");
}
}
調用getData("User", "row1", "info", "");
,結果如下:
這裏是將result轉換成單元格,然後通過單元格獲取各屬性。
還可以直接獲取qualifier的值:
public static void queryByQualifier(String tabName, String rowKey, String colFamily, String qualifier) throws Exception {
TableName tableName = TableName.valueOf(tabName);
Table table = connection.getTable(tableName);
Get get = new Get(rowKey.getBytes());
Result result = table.get(get);
byte[] name = result.getValue(colFamily.getBytes(), qualifier.getBytes());
System.out.println("data is: " + new String(name));
}
通過queryByQualifier("User", "row1" ,"info", "name");
進行調用。
全表掃描如下:
public static void scanData(String tabName) throws IOException {
TableName tableName = TableName.valueOf(tabName);
Table table = connection.getTable(tableName);
// 創建掃描器
Scan scan = new Scan();
// 獲取所有result集合
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner) {
showCell(result);
}
}
刪除數據和刪除表如下所示:
/**
* 刪除行數據
*/
public static void deleteRow(String tabName, String rowKey, String colFamily, String qualifier) {
try {
TableName tableName = TableName.valueOf(tabName);
Table table = connection.getTable(tableName); // 獲取表
Delete delete = new Delete(rowKey.getBytes());
if (StringUtils.isNotEmpty(qualifier) && StringUtils.isNotEmpty(colFamily)) {
// 刪除指定列數據
delete.addColumn(colFamily.getBytes(), qualifier.getBytes());
} else if (StringUtils.isNotEmpty(colFamily)) {
delete.addFamily(colFamily.getBytes());
}
table.delete(delete);
System.out.println("刪除數據成功");
table.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 刪除表
*/
public static void dropTable(String tabName) {
try {
TableName tableName = TableName.valueOf(tabName);
// 刪除表之前需要先禁用表
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println("刪除成功" + tabName);
close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void close() {
try {
if (admin != null) {
admin.close();
}
if (connection != null) {
connection.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
Python操作HBase
由於Hbase是用Java寫的,原生地提供了Java接口,如果用的是其他的語言,如python,則需要開啓連接原生地提供的thrift接口服務器。
開啓命令如下:
./hbase thrift start
開啓後可以看到默認參數,如端口號爲9090:
[main] thrift.ThriftServerRunner: starting TBoundedThreadPoolServer on /0.0.0.0:9090 with readTimeout 60000ms; min worker threads=16, max worker threads=1000, max queued requests=1000
habbybase連接hbase
通過happybase即可連接hbase。happybase爲python操作habse的常用第三方模塊,但不支持habse的刪除操作。
# -* coding: utf-8 *-
import happybase
conn = happybase.Connection("localhost", 9090)
print conn.tables()
table = conn.table("User")
row = table.row("row1")
print row
# 向表中添加數據,需要指定rowKey和一個字典形式的鍵值對
table.put("row2", {"info:name": "jack"})
# 通過指定列族來檢索數據
rows = table.rows(["row1", "row2"])
print rows
# 刪除操作
table.delete("row2"
print table.row("row2")
# 遍歷, scan方法可用於遍歷查詢表中的數據的數量
# 可以指定開始和結束的row key來進行局部查詢
# 可以指定前綴來進行匹配查詢
for key, value in table.scan():
print key, value
table.put("row2", {"info:name": "jack"})
print "get row1 to row3"
for key, value in table.scan(row_start="row1", row_stop="row3"):
print key, value
table.put("row3", {"info:name": "rose"})
print "get rowKey start with 'row'"
for key, value in table.scan(row_prefix="row"):
print key, value
輸出如下(在之前已創建了User表):
hbase-thrift連接hbase
當然,也可以通過hbase-thrift來進行hbase連接。
安裝第三方包:
thrift == 0.9.2
hbase_thrift == 0.20.4
然後通過hbase模塊建立連接如下:
# -* coding: utf-8 *-
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
# thrift默認端口是9090
socket = TSocket.TSocket("localhost", 9090)
socket.setTimeout(5000)
transport = TTransport.TBufferedTransport(socket)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
socket.open()
print client.getTableNames()
print client.get('User', 'row1', 'info:name')
輸出結果: