利用aio+epoll簡單改寫sendfile

其實說白了,也就是想直接嘗試利用一下aio +epoll結合代碼,在利用人家sendfile的接口,就是替換sendfile,自己隨便玩玩傳輸文件。我採用的是linux原生的libaio,glibc實現的aio貌似很多在吐槽,性能也不好直接別拋棄了。
這裏提一下linux libaio的相關結構體,這些可以在/usr/include/libaio.h文件看到,也有io_prep_pread和io_prep_pwrite的實現。

//  iocb是提交IO任務時用到的,可以完整地描述一個IO請求
struct iocb {

     void *data;  //data是留給用來自定義的指針:可以設置爲IO完成後的callback函數;

     unsigned key;  // 這個沒有用過

     short aio_lio_opcode; // 表示操作的類型:IO_CMD_PWRITE | IO_CMD_PREAD;

     short aio_reqprio;  // 優先級,不過看內核代碼,貌似沒有實現,應該屬於預留字段

     int aio_fildes;   // 當然是要操作的fd

     union {

         struct io_iocb_common c;  // 這個字段比較常用,下面會解釋

         struct io_iocb_vector v;

         struct io_iocb_poll poll;

         struct io_iocb_sockaddr saddr;

    } u;
};

 struct io_iocb_common {

        void *buf;              //  記錄IO要操作的buf  

        unsigned long nbytes;   //  記錄IO操作字節數

        long long offset;       //  記錄IO操作文件偏移量

        unsigned flags;         //  爲epoll結合設置

        unsigned resfd;         //  爲epoll結合設置

 };

// io_event是用來描述返回結果的:
struct io_event {

    void *data;         // callback回調函數使用

    struct iocb *obj;   //obj就是之前提交IO任務時的iocb;

    unsigned long res;   // 處理的字節數

    unsigned long res2;  // (res2 = 0)是aio處理

    };

sendfile的接口 n = sendfile64(fd, ifd, (off64_t*)&offset64, count);就是替換這個玩玩。 count是需要發送的長度,如果count > 100k我就採用aio讀取。最多有128個異步IO同時存在,每個IO最多讀取4k,最後在將每個IO讀取拷貝到一個大buf裏面,在用send做發送處理。這個寫這個代碼,讓我知道很多細節處理,部分代碼展示。

#define  AIO_NUM_EVENTS   128
#define  ALIGN_SIZE       512
#define  RD_WR_SIZE       (4 * 1024)
#define  BUF_SIZE_VAYNEDU  (512 * 1024)

static int aio_num = 0;
char send_buf_vaynedu[512 * 1024 + 1] = {0};
uint64_t   offset_vaynedu;

// 每個io處理回調函數,而且把每個io讀到數據拷貝到大buf
void aio_callback(io_context_t ctx, struct iocb *iocb, long res, long res2)
{
    int iosize = iocb->u.c.nbytes; 
    char *buf = (char *)iocb->u.c.buf;
    off_t offset = iocb->u.c.offset - offset_vaynedu;
    int i = offset / RD_WR_SIZE;
    char *p = send_buf_vaynedu;

    if(res2 != 0){
       printf("aio read\n");
    }

    //printf("request_type: %s, offset: %lld,length: %lu, res: %ld, res2: %ld, i:%d\n",(iocb->aio_lio_opcode == IO_CMD_PREAD) ? "READ" : "WRITE", offset, iocb->u.c.nbytes, res, res2, i);

    p += offset;
    memcpy(p, buf, iosize);

//printf("i= %d, offset=%lu, p=%p, send_buf_vaynedu=%d\n\n",   i, offset, p, strlen(send_buf_vaynedu));

}

// 對於小於100k的直接採用pread 和 send直接完成處理
int sendfile_small(int ofd, int ifd, off64_t *offset64, size_t count)
{
    int pread_num; 
    int send_len;
    int num;
    int send_num = 0;
    char *buf = (char *)malloc(count);
    char *p = buf;

    memset(buf, 0, count);
    pread_num = pread(ifd, buf, count, *offset64);
    if(pread_num < 0){
       printf("pread failed: %d\n", pread_num);
       exit(1);
    }else if(pread_num == 0){
       printf("pread finished\n");
    } 

    while(pread_num > 0){
            send_len = pread_num > 8192? 8192 : pread_num;
            num = send(ofd, p, send_len, 0);
            send_num += num;
            if(num >= 0 && num < send_len ){
               printf("num = %d\n", num);
               break;
            }else if(num < 0){
                if(errno == EAGAIN){
                    printf("sendfile_small %s\n", strerror(errno));
                    //continue;
                }
                break;
            }
            pread_num -= num;

            p += send_len;

    }

     printf("sendfile_small : ifd = %d, offset64 = %llu, pread_num = %d, send_num = %d\n\n", ifd,  *offset64, pread_num, send_num);

     free(buf);
     buf = NULL;

     return send_num;  

}

// 模擬改寫的sendfile
int sendfile_aio_test(int ofd, int ifd, off64_t *offset64, size_t count)
{
    int i, j, num; 
    int ret;
    int efd, epfd;
    io_context_t  ctx;
    struct iocb **ios;
    struct iocb *iosp;
    struct io_event *events;
    struct epoll_event epevent;
    struct timespec tms;
    char  **iobuf = NULL;

    offset_vaynedu = *offset64;
    if(count < 102400){
        return sendfile_small(ofd, ifd, offset64, count);
    }else{
        aio_num = AIO_NUM_EVENTS > (count/4096) ? (count/4096):AIO_NUM_EVENTS;
    }

    ios = (struct iocb **)malloc(aio_num * sizeof(struct iocb *));
    iosp = (struct iocb *)malloc(aio_num * sizeof(struct iocb));
    events = (struct io_event*)malloc(aio_num * sizeof(struct io_event));
    iobuf = (char **)malloc(aio_num * sizeof(char *));

    efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
    if(efd == -1){
       printf("eventfd failed\n");
       exit(1);
    }

    memset(&ctx, 0, sizeof(ctx));
    ret = io_setup(aio_num, &ctx);
    if(ret < 0){
       printf("is_setup failed\n");
           exit(1);
    }
    for(i = 0; i < aio_num; i++){
         ret = posix_memalign((void  **)&iobuf[i], ALIGN_SIZE, RD_WR_SIZE); 
        if(ret < 0){
           printf("posix_memalign failed\n");
           exit(1);
          }  
    }

    //memset(ios, 0, aio_num * sizeof(struct iocb *));
    //memset(iobuf, 0 ,aio_num * RD_WR_SIZE);
    memset(iosp, 0, aio_num * sizeof(struct iocb));
    for(i = 0; i < aio_num; i++){
        ios[i] = &iosp[i];
        io_prep_pread(&iosp[i], ifd, iobuf[i], RD_WR_SIZE, i*RD_WR_SIZE+(*offset64));
        io_set_eventfd(&iosp[i], efd);
        io_set_callback(&iosp[i], aio_callback);
    //  printf("ios[%d].u.c.offset = %d, ios[%d].u.c.nbytes = %d, ios[%d].aio_fildes = %d\n\n", 
    //          i, ios[i]->u.c.offset, i, ios[i]->u.c.nbytes, i, ios[i]->aio_fildes);
    } 

//  printf("aio_num = %d\n", aio_num);
    ret = io_submit(ctx, aio_num, ios);
    if(ret != aio_num){
        if(ret < 0){
          printf("io_submit error:%s\n", strerror(-ret));
        }else{
          printf("io_submit failed,could not submit IOs ret:%d, aio_num:%d\n", ret, aio_num);
        }
         exit(1);
    }

    epfd = epoll_create(1);
    if(epfd == -1){
       printf("epoll_create failed\n");
       exit(1);
    }

    epevent.events = EPOLLIN | EPOLLET;
    epevent.data.ptr = NULL;

    ret = epoll_ctl(epfd, EPOLL_CTL_ADD, efd, &epevent);
    if(ret < 0){
       printf("epoll_ctl failed\n");
       exit(1);
    }

    i = 0;
    while(i < aio_num){
        uint64_t  finished_aio; 

        if(epoll_wait(epfd, &epevent, 1, -1) != 1){
           printf("epoll_wait failed\n");
           exit(1);
        }

        if(read(efd, &finished_aio, sizeof(finished_aio)) != sizeof(finished_aio)){
           printf("read failed\n");
           exit(1);
        }

//      printf("finished_aio = %d\n", finished_aio);

        while(finished_aio >  0){
            tms.tv_sec = 0;
            tms.tv_nsec = 0;
            num = io_getevents(ctx, 1, aio_num, events, &tms);
            if(num > 0){
               for(j = 0; j < num; j++){
                  ((io_callback_t)(events[j].data))(ctx, events[j].obj,
                                                    events[j].res,
                                                    events[j].res2);
               }
               i += num;
               finished_aio -= num;
            } 

        }

    }

    int send_num = 0;
    int count_num = 0;
    int send_buf_vaynedu_len = strlen(send_buf_vaynedu);
    char *p = send_buf_vaynedu;
    printf("send_buf_vaynedu = %d,    ", send_buf_vaynedu_len);
    while(send_buf_vaynedu_len > 0){
      num = send(ofd, p, 4096, 0);
      send_num += num;
      if(num >= 0 && num < 4096){
         // printf("num = %d\n", num); 
         break;    
      }else if( num < 0){
         if(errno == EAGAIN){
           printf("send_aio_test: %s\n", strerror(errno));
           //continue;
         }
         break;
      }

      send_buf_vaynedu_len -= num;

      p += 4096;

    }
    printf("send_num = %d, send_buf_vaynedu = %d\n", send_num, send_buf_vaynedu_len);
    if((aio_num < AIO_NUM_EVENTS) && (count-aio_num *4096 > 0)){
        uint64_t offset64_small = aio_num * RD_WR_SIZE + *offset64;
        count_num =  sendfile_small(ofd, ifd, (off64_t *)&offset64_small, count - aio_num*4096);
        printf("aio_num = %d, count_num = %d\n", aio_num, count_num);
    }
    memset(send_buf_vaynedu, 0, 512 * 1024 + 1);

    close(epfd);
    close(efd);
    free(iosp);
    free(ios);
    free(events);
    for(i = 0; i < aio_num; i++){
       free(iobuf[i]);
    }

    io_destroy(ctx);
    if(count_num < 0){
       count_num = 0;
    }

    return send_num + count_num; 

}

代碼有嚴重的性能問題,因爲僅僅是玩玩,熟悉一下aio的用法。

參考:
http://www.kuqin.com/linux/20120908/330333.html
http://backend.blog.163.com/blog/static/20229412620135257159731/

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章