源碼解讀--(3)hbase-examples MultiThreadedClientExample

源碼解讀--(1)hbase客戶端源代碼 http://aperise.iteye.com/blog/2372350
源碼解讀--(2)hbase-examples BufferedMutator Example http://aperise.iteye.com/blog/2372505
源碼解讀--(3)hbase-examples MultiThreadedClientExample http://aperise.iteye.com/blog/2372534

1.輕量級的table,重量級的connection

    hbase-examples裏面提供的另外一個例子MultiThreadedClientExample,講解了另外一種使用hbase客戶端的例子,在這個例子裏面table是一個輕量級的對象,在線程啓動時創建退出時銷燬,而table後面的connection從未關閉過,connection是重量級的對象,一直維持着和zookeeper的鏈接、異步操作和其他狀態,我們可以從中學習到另外一種多線程操作hbase客戶端的例子

 

2.MultiThreadedClientExample

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
/**
 * 本例用於展示在多線程中操作hbase客戶端
 * 本例中table是一個輕量級的對象,在線程啓動時創建退出時銷燬,而table後面的connection從未關閉過
 * 本例中connection是重量級的對象,一直維持着和zookeeper的鏈接、異步操作和其他狀態
 * 本例中模擬向hbase服務端提交500000次請求(其中30% 批量寫,20%單條寫,50%用於scans )
 *
 */
public class MultiThreadedClientExample extends Configured implements Tool {
    private static final Log LOG = LogFactory.getLog(MultiThreadedClientExample.class);
    private static final int DEFAULT_NUM_OPERATIONS = 500000;

    //默認測試的是hbase的數據表test列簇d
    private static final byte[] FAMILY = Bytes.toBytes("d");
    private static final byte[] QUAL = Bytes.toBytes("test");

    private final ExecutorService internalPool;//線程池
    private final int threads;//線程池大小

    public MultiThreadedClientExample() throws IOException {
        // Runtime.getRuntime().availableProcessors() 爲當前機器CPU核數,這裏取CPU核數* 4
        this.threads = Runtime.getRuntime().availableProcessors() * 4;

        // 這裏調用google的guava-12.0.0.1.jar的ThreadFactoryBuilder,默認創建的是Executors.defaultThreadFactory(),創建的是後臺線程工廠類,規範化了線程的名稱
        ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat("internal-pol-%d").build();
        //初始化線程池
        this.internalPool = Executors.newFixedThreadPool(threads, threadFactory);
    }

    @Override
    public int run(String[] args) throws Exception {
    	//參數個數只能爲2個,第一個爲表名,第二個爲操作的次數
        if (args.length < 1 || args.length > 2) {
            System.out.println("Usage: " + this.getClass().getName() + " tableName [num_operations]");
            return -1;
        }

        final TableName tableName = TableName.valueOf(args[0]);//如果傳入了表名,就使用傳入的hbase表名
        int numOperations = DEFAULT_NUM_OPERATIONS;
        if (args.length == 2) {
            numOperations = Integer.parseInt(args[1]);//如果傳入了操作次數,就使用傳入的操作次數
        }

        //Fork/Join框架是Java7提供了的一個用於並行執行任務的框架, 是一個把大任務分割成若干個小任務,最終彙總每個小任務結果後得到大任務結果的框架。
        //這裏ForkJoinPool相繼傳入org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation、org.apache.hadoop.hbase.client.HTable、org.apache.hadoop.hbase.client.AsyncProcess使用
        ExecutorService service = new ForkJoinPool(threads * 2);

        // 爲寫操作創建一個單獨的鏈接writeConnection
        final Connection writeConnection = ConnectionFactory.createConnection(getConf(), service);
        // 爲讀操作創建一個單獨的鏈接readConnection
        final Connection readConnection = ConnectionFactory.createConnection(getConf(), service);

        // hbase 表tableName的region信息加載到cache
        // 這個操作在region個數超過250000個時不要操作
        warmUpConnectionCache(readConnection, tableName);
        warmUpConnectionCache(writeConnection, tableName);

        List<Future<Boolean>> futures = new ArrayList<Future<Boolean>>(numOperations);
        for (int i = 0; i < numOperations; i++) {
        	//生成線程安全的隨機浮點數r
            double r = ThreadLocalRandom.current().nextDouble();
            Future<Boolean> f;
            
            if (r < .30) {//30% 批量寫
                f = internalPool.submit(new WriteExampleCallable(writeConnection, tableName));
            } else if (r < .50) {//20%單條寫
                f = internalPool.submit(new SingleWriteExampleCallable(writeConnection, tableName));
            } else {//50%用於scans
                f = internalPool.submit(new ReadExampleCallable(writeConnection, tableName));
            }
            futures.add(f);
        }

        // 等待每個操作完成,如果沒完成,等待10分鐘
        for (Future<Boolean> f : futures) {
            f.get(10, TimeUnit.MINUTES);
        }

        // 關閉線程池internalPool和service
        internalPool.shutdownNow();
        service.shutdownNow();
        return 0;
    }
    
    // hbase 表tableName的region信息加載到cache
    // 這個操作在region個數超過250000個時不要操作
    private void warmUpConnectionCache(Connection connection, TableName tn) throws IOException {
        try (RegionLocator locator = connection.getRegionLocator(tn)) {
            LOG.info("Warmed up region location cache for " + tn + " got " + locator.getAllRegionLocations().size());
        }
    }

    /**
     * 30% 批量寫任務
     */
    public static class WriteExampleCallable implements Callable<Boolean> {
        private final Connection connection;
        private final TableName tableName;

        public WriteExampleCallable(Connection connection, TableName tableName) {
            this.connection = connection;
            this.tableName = tableName;
        }

        @Override
        public Boolean call() throws Exception {
            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
        	//這裏利用jdk1.7裏的新特性try(必須實現java.io.Closeable的對象){}catch (Exception e) {}  
        	//相當於調用了finally功能,調用(必須實現java.io.Closeable的對象)的close()方法,也即會調用table.close()
            try (Table t = connection.getTable(tableName)) {
                byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));
                int rows = 30;

                // Array to put the batch
                ArrayList<Put> puts = new ArrayList<>(rows);
                for (int i = 0; i < 30; i++) {
                    byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
                    Put p = new Put(rk);
                    p.addImmutable(FAMILY, QUAL, value);
                    puts.add(p);
                }

                // 批量提交到hbase服務端
                t.put(puts);
            }
            return true;
        }
    }

    /**
     * 20%單條寫任務
     */
    public static class SingleWriteExampleCallable implements Callable<Boolean> {
        private final Connection connection;
        private final TableName tableName;

        public SingleWriteExampleCallable(Connection connection, TableName tableName) {
            this.connection = connection;
            this.tableName = tableName;
        }

        @Override
        public Boolean call() throws Exception {
            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
        	//這裏利用jdk1.7裏的新特性try(必須實現java.io.Closeable的對象){}catch (Exception e) {}  
        	//相當於調用了finally功能,調用(必須實現java.io.Closeable的對象)的close()方法,也即會調用table.close()
            try (Table t = connection.getTable(tableName)) {
                byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));
                byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
                Put p = new Put(rk);
                p.addImmutable(FAMILY, QUAL, value);
                t.put(p);
            }
            return true;
        }
    }

    /**
     * 50%用於scans
     */
    public static class ReadExampleCallable implements Callable<Boolean> {
        private final Connection connection;
        private final TableName tableName;

        public ReadExampleCallable(Connection connection, TableName tableName) {
            this.connection = connection;
            this.tableName = tableName;
        }

        @Override
        public Boolean call() throws Exception {
            // total length in bytes of all read rows.
            int result = 0;

            // Number of rows the scan will read before being considered done.
            int toRead = 100;
            try (Table t = connection.getTable(tableName)) {
            	//要朝找的rowkey的起始值
                byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
                Scan s = new Scan(rk);

                //設置scan的filter爲KeyOnlyFilter,意思是scan比較的時候只着重比較rowkey
                s.setFilter(new KeyOnlyFilter());

                //每次只取20條數據
                s.setCaching(20);

                //設置hbase不適用緩存,緩存是爲了下次取這些數據更快,就把之前的數據放置到hbase服務端的blockcache
                s.setCacheBlocks(false);

                // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
            	//這裏利用jdk1.7裏的新特性try(必須實現java.io.Closeable的對象){}catch (Exception e) {}  
            	//相當於調用了finally功能,調用(必須實現java.io.Closeable的對象)的close()方法,也即會調用ResultScanner.close()
                try (ResultScanner rs = t.getScanner(s)) {
                    // 遍歷hbase的行
                    for (Result r : rs) {
                        result += r.getRow().length;
                        toRead -= 1;

                        // 只取100條數據,達到100條就退出
                        if (toRead <= 0) {
                            break;
                        }
                    }
                }
            }
            return result > 0;
        }
    }

    public static void main(String[] args) throws Exception {
    	//調用工具類ToolRunner執行實現了接口Tool的對象MultiThreadedClientExample的run方法,同時會把String[] args傳入MultiThreadedClientExample的run方法  
        ToolRunner.run(new MultiThreadedClientExample(), args);
    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章