一、python hbase API (一) thrift2環境準備
python hbase API (一) thrift2環境準備
https://mp.csdn.net/postedit/86501781
二、python hbase API (二)
1、configuration.properties
將configuration.properties放在項目的resources資源目錄下
################################Database#######################################
## Hbase
hbase_host 172.8.xx.xx
hbase_port 9090
hbase_username 0
hbase_password 0
hbase_db xx
hbase_columnfamilies xx
## Redis
redis_host 172.8.xx.xxx
redis_port 6379
redis_username 0
redis_password "xxx"
redis_db 1
## MySQL
sql_host 172.8.xx.xxx
sql_port 3306
sql_username xxx
sql_password xxx
2、HbaseUtlis.py
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import pandas as pd
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import Mutation, BatchMutation
## READ CONFIGURATION FILE
config_file = pd.read_table(filepath_or_buffer="configuration.properties", header=None, delim_whitespace=True,
index_col=0).transpose()
Hbase_host = str(config_file['hbase_host'].iloc[0])
Hbase_port = str(config_file['hbase_port'].iloc[0])
Hbase_username = str(config_file['hbase_username'].iloc[0])
Hbase_password = str(config_file['hbase_password'].iloc[0])
Hbase_db = str(config_file['hbase_db'].iloc[0])
Hbase_columnfamilies = str(config_file['hbase_columnfamilies'].iloc[0])
class HbaseClient(object):
__slots__ = ['transport', 'client']
def __init__(self):
# server端地址和端口,web是HMaster也就是thriftServer主機名,9090是thriftServer默認端口
transport = TSocket.TSocket(Hbase_host, Hbase_port)
# 可以設置超時
transport.setTimeout(5000)
# 設置傳輸方式(TFramedTransport或TBufferedTransport)
self.transport = TTransport.TBufferedTransport(transport)
# 設置傳輸協議
protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
# 確定客戶端
self.client = Hbase.Client(protocol)
## 查詢表
def getTableNames(self):
self.transport.open()
tables = self.client.getTableNames()
self.transport.close()
return tables
## 查某一行某一列數據
def get(self, tableName, row, column):
self.transport.open()
result = self.client.get(Hbase_db + ':' + tableName, row, column)
self.transport.close()
return result
## 查某一行某多列數據
def getRowWithColumns(self, tableName, row, columns):
self.transport.open()
addfamliy=[]
for i in columns:
addfamliy.append(Hbase_columnfamilies+':'+i)
result = self.client.getRowWithColumns(Hbase_db + ':' + tableName, row, addfamliy)
data = {}
for item in result:
# print(item.row)
for column in columns:
data[column] = item.columns.get(Hbase_columnfamilies+':'+column).value
self.transport.close()
return data
## 查某一行數據
def getRow(self, tableName, row):
self.transport.open()
result = self.client.getRow(tableName, row)
for item in result:
data_dict = {}
for key in result[0].columns:
data_dict[key.replace('info:', '')] = item.columns.get(key).value
self.transport.close()
return data_dict
## 插入一行數據
def mutateRow(self, tableName, row, hat_data):
self.transport.open()
mutations = []
for key in hat_data.keys():
mutation = Mutation(column=Hbase_columnfamilies+':'+key, value=hat_data[key])
mutations.append(mutation)
self.client.mutateRow(Hbase_db + ':' + tableName, row, mutations)
self.transport.close()
## 插入多行數據
def mutateRows(self,tableName, dt,current_ruleVal):
self.transport.open()
batchMutation = []
for i in range(dt.shape[0]):
curr_df = dt.iloc[i, :].astype('str')
rowkey = str(current_ruleVal.machineID) +\
str(current_ruleVal.spindleID) + \
str(current_ruleVal.programNum) + \
str(curr_df['step_number']).zfill(5)
data_dict=curr_df.to_dict()
mutations=[]
for column in data_dict.keys():
message = data_dict[column]
mutations.append(Mutation(column=Hbase_columnfamilies+':'+column, value=message))
batchMutation.append(BatchMutation(rowkey, mutations))
self.client.mutateRows(tableName,batchMutation)
self.transport.close()
## 刪除一行數據
def deleteAllRow(self, tableName, row):
self.transport.open()
self.client.deleteAllRow(Hbase_db + ':' + tableName, row)
self.transport.close()
## 模糊查詢:起始rowkey掃描
def scannerOpenWithStop(self, tableName, startRow, stopRow, refer_data):
self.transport.open()
columes = []
for key in refer_data.keys():
columes.append(refer_data[key])
scannerId = self.client.scannerOpenWithStop(Hbase_db + ':' + tableName, startRow, stopRow, columes)
data_list = []
while True:
result = self.client.scannerGet(scannerId) # 根據ScannerID來獲取結果
if not result:
break
data = {}
for item in result:
# rowkey=item.row
for key in refer_data.keys():
data[refer_data[key]] = item.columns.get(refer_data[key]).value
data_list.append(data)
self.client.scannerClose(scannerId)
self.transport.close()
return data_list
## 模糊查詢:fliter 匹配
def scannerOpenWithPrefix(self, tableName, startAndPrefix, columns):
self.transport.open()
addfamliy = []
for i in columns:
addfamliy.append(Hbase_columnfamilies+':'+ i)
scannerId = self.client.scannerOpenWithPrefix(tableName, startAndPrefix, addfamliy)
data_list = []
while True:
result = self.client.scannerGet(scannerId) # 根據ScannerID來獲取結果
if not result:
break
data = {}
for item in result:
# rowkey=item.row
for column in columns:
data[column] = item.columns.get(Hbase_columnfamilies+':'+column).value
data_list.append(data)
data_df = pd.DataFrame(data_list)
self.client.scannerClose(scannerId)
self.transport.close()
return data_df
三、thrift生成的代碼中都提供了那些方法
提供的方法有:
void enableTable(Bytes tableName)
enable表
void disableTable(Bytes tableName)
disable表
bool isTableEnabled(Bytes tableName)
查看錶狀態
void compact(Bytes tableNameOrRegionName)
void majorCompact(Bytes tableNameOrRegionName)
getTableNames()
getColumnDescriptors(Text tableName)
getTableRegions(Text tableName)
void createTable(Text tableName, columnFamilies)
void deleteTable(Text tableName)
get(Text tableName, Text row, Text column)
getVer(Text tableName, Text row, Text column, i32 numVersions)
getVerTs(Text tableName, Text row, Text column, i64 timestamp, i32 numVersions)
getRow(Text tableName, Text row)
getRowWithColumns(Text tableName, Text row, columns)
getRowTs(Text tableName, Text row, i64 timestamp)
getRowWithColumnsTs(Text tableName, Text row, columns, i64 timestamp)
getRows(Text tableName, rows)
getRowsWithColumns(Text tableName, rows, columns)
getRowsTs(Text tableName, rows, i64 timestamp)
getRowsWithColumnsTs(Text tableName, rows, columns, i64 timestamp)
void mutateRow(Text tableName, Text row, mutations)
void mutateRowTs(Text tableName, Text row, mutations, i64 timestamp)
void mutateRows(Text tableName, rowBatches)
void mutateRowsTs(Text tableName, rowBatches, i64 timestamp)
i64 atomicIncrement(Text tableName, Text row, Text column, i64 value)
void deleteAll(Text tableName, Text row, Text column)
void deleteAllTs(Text tableName, Text row, Text column, i64 timestamp)
void deleteAllRow(Text tableName, Text row)
void deleteAllRowTs(Text tableName, Text row, i64 timestamp)
ScannerID scannerOpenWithScan(Text tableName, TScan scan)
ScannerID scannerOpen(Text tableName, Text startRow, columns)
ScannerID scannerOpenWithStop(Text tableName, Text startRow, Text stopRow, columns)
ScannerID scannerOpenWithPrefix(Text tableName, Text startAndPrefix, columns)
ScannerID scannerOpenTs(Text tableName, Text startRow, columns, i64 timestamp)
ScannerID scannerOpenWithStopTs(Text tableName, Text startRow, Text stopRow, columns, i64 timestamp)
scannerGet(ScannerID id)
scannerGetList(ScannerID id, i32 nbRows)
void scannerClose(ScannerID id)