golang UDP發送實在太慢了 系統調用,上下文切換消耗可觀。

golang UDP WriteToUDP太慢了

由於發送大量的小包,導致系統調用過於頻繁,Packet Per Second: PPS=152300

golang目前沒有提供C中的API: sendmmsg, 這是個批量發送數據包的接口,一次系統調用可以發多個包。

udp_test.go

package main

import (
	"fmt"
	"net"
	"sync"
	"testing"
	"time"
)

func ListenUDP(addrStr string) (*net.UDPAddr, *net.UDPConn, error) {
	fmt.Println("Try to listen:", addrStr)
	addr, err := net.ResolveUDPAddr("udp", addrStr)
	if err != nil {
		fmt.Println("resolve udp addr failed:", err)
		return nil, nil, err
	}

	conn, err := net.ListenUDP("udp", addr)
	if err != nil {
		fmt.Println("listen udp svr failed:", err)
		return nil, nil, err
	}

	return addr, conn, nil
}
func TestUDPSendSpeed(t *testing.T) {
	_, svrConn, err := ListenUDP("127.0.0.1:21117")
	if err != nil {
		t.Fatal(err)
	}
	svrConn.SetWriteBuffer(300000000) //300MB
	cliAddr, err := net.ResolveUDPAddr("udp", "127.0.0.1:29875")
	data := []byte("Hello, test small packet")
	N := 100000
	M := 4
	begin := time.Now().UnixNano()
	var wg sync.WaitGroup
	for j := 0; j < M; j++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for i := 0; i < N; i++ {
				n, err := svrConn.WriteToUDP(data, cliAddr)
				if err != nil || n != len(data) {
					t.Fatal(err, n, len(data))
				}
			}
		}()
	}
	wg.Wait()
	end := time.Now().UnixNano()
	usedTimeMS := float64(end-begin) / 1e6
	usedTimeS := float64(end-begin) / 1e9
	fmt.Printf("Send %d packets, use %.2fms, PPS:%.0f\n", N*M, usedTimeMS, float64(N*M)/usedTimeS)

}

 

 

go test -v -cpuprofile=cpu.prof udp_test.go 
=== RUN   TestUDPSendSpeed
Try to listen: 127.0.0.1:21117
Send 1000000 packets, use 6565.98ms, PPS:152300
--- PASS: TestUDPSendSpeed (6.57s)
PASS
ok      command-line-arguments  6.713s

#性能分析
go tool pprof -http=:7897 cpu.prof 
Serving web UI on http://localhost:7897

 

如果使用C++呢 直接調用sendto呢?

經過多次運行,發線單線程效率最好,可達到25W PPS。 這也是單線程的極限了。

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <netdb.h>
#include <time.h>
//#include <gperftools/profiler.h>

#include <iostream>
#include <chrono>
#include <thread>
using namespace std;

#define BUF_SIZE 64
char data[BUF_SIZE];
sockaddr_in BuildAddr(const char *ip, int port)
{
    sockaddr_in svrAddr;
    memset(&svrAddr, 0, sizeof(svrAddr));
    svrAddr.sin_family = AF_INET;
    svrAddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svrAddr.sin_port = htons(port);
    return svrAddr;
}
int ListenUDP(const char *ip, int port)
{
    sockaddr_in svrAddr = BuildAddr(ip, port);
    int sockFd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockFd < 0)
    {
        perror("create socket failed!");
        return sockFd;
    }
    if (bind(sockFd, (sockaddr *)&svrAddr, sizeof(svrAddr)) < 0)
    {
        perror("bind svr addr failed");
        return -1;
    }
    return sockFd;
}

void SendPacket(int n, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char *data = new char[size];

    for (int i = 0; i < n; i++)
    {
        int ret = sendto(svrFd, data, size, 0, (sockaddr *)&targetAddr, sizeof(targetAddr));
        if (ret < 0)
        {
            perror("sendto failed");
        }
        //printf("send packet:%d\n", ret);
    }

    delete[] data;
}
void SendMsgPacket(int n, int m, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char *data = new char[size];
    struct iovec msg1;
    msg1.iov_base = data;
    msg1.iov_len = 1;
    mmsghdr *msg = new mmsghdr[m];
    for (int i = 0; i < m; i++)
    {
        msg[i].msg_len = 1;
        msg[i].msg_hdr.msg_name = &targetAddr;
        msg[i].msg_hdr.msg_namelen = sizeof(targetAddr);
        msg[i].msg_hdr.msg_iov = &msg1;
        msg[i].msg_hdr.msg_iovlen = 1;
    }
    for (int i = 0; i < n; i++)
    {
        int t = m;
        while (t > 0)
        {
            int ret = sendmmsg(svrFd, msg, m, 0);
            if (ret < 0)
            {
                perror("sendto failed");
            }
            //printf("send %d\n", t);
            t -= ret;
        }
    }
    delete[] msg;
    delete[] data;
}
void TestSendThread(int n, int svrFd)
{
    SendPacket(n, 32, svrFd, "127.0.0.1", 55667);
}
void TestSendMsgThread(int n, int m, int svrFd)
{
    SendMsgPacket(n, m, 32, svrFd, "127.0.0.1", 55667);
}
int main(int argc, char *argv[])
{
    int svrFd = ListenUDP("127.0.0.1", 65432);
    if (svrFd < 0)
    {
        perror("listen upd failed");
    }
    else
    {
        printf("UDP Server at 127.0.0.1:65432\n");
    }
    //ProfilerStart("udpsend.prof");
    auto begin = chrono::steady_clock::now();
    int n = 100000;
    int m = 100;
    TestSendThread(n, svrFd);
    //TestSendMsgThread(n, m, svrFd);
    auto end = chrono::steady_clock::now();
    //ProfilerStop();
    double dr_ms = std::chrono::duration<double, std::milli>(end - begin).count();
    printf("Use time:%.2fms, PPS:%.0f\n", dr_ms, n / (dr_ms / 1000));
    close(svrFd);
}
//g++ -O3 -o send_speed send_speed.cpp -lpthread

//g++  -g -pg -o send_speed send_speed.cpp -lpthread

/*
g++ -O3 -o send_speed send_speed.cpp 
valgrind --tool=callgrind ./send_speed
python ../../gprof2dot/gprof2dot.py -f callgrind -n 10 -s callgrind.out.2938509 > val.dot
dot -Tpng val.dot -o val.png
python -m http.server 7897
*/
./send_speed 
UDP Server at 127.0.0.1:65432
Use time:383.56ms, PPS:260712

./send_speed 
UDP Server at 127.0.0.1:65432
Use time:382.87ms, PPS:261182

 ./send_speed 
UDP Server at 127.0.0.1:65432
Use time:381.96ms, PPS:261810

經過分析發現,sendto系統調用佔用了34%

 

C++多線程 sento

120WQPS

本機是8核,所以用8線程。發送速度幾乎提升了8倍。CPU終於到了755%但是golang使用多現成確不能提升。golang的writeToUDP自帶鎖。所以只能達到單核心的25W PPS。

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <netdb.h>
#include <time.h>
//#include <gperftools/profiler.h>

#include <iostream>
#include <chrono>
#include <thread>
using namespace std;

#define BUF_SIZE 64
char data[BUF_SIZE];
sockaddr_in BuildAddr(const char *ip, int port)
{
    sockaddr_in svrAddr;
    memset(&svrAddr, 0, sizeof(svrAddr));
    svrAddr.sin_family = AF_INET;
    svrAddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svrAddr.sin_port = htons(port);
    return svrAddr;
}
int ListenUDP(const char *ip, int port)
{
    sockaddr_in svrAddr = BuildAddr(ip, port);
    int sockFd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockFd < 0)
    {
        perror("create socket failed!");
        return sockFd;
    }
    if (bind(sockFd, (sockaddr *)&svrAddr, sizeof(svrAddr)) < 0)
    {
        perror("bind svr addr failed");
        return -1;
    }
    return sockFd;
}

void SendPacket(int n, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char *data = new char[size];

    for (int i = 0; i < n; i++)
    {
        int ret = sendto(svrFd, data, size, 0, (sockaddr *)&targetAddr, sizeof(targetAddr));
        if (ret < 0)
        {
            perror("sendto failed");
        }
        //printf("send packet:%d\n", ret);
    }

    delete[] data;
}
void SendMsgPacket(int n, int m, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char data[] = "{\"cmd\":\"echo\"}";
    int iov_len = strlen(data);
    struct iovec msg2[2];
    msg2[0].iov_base = data;
    msg2[0].iov_len = iov_len;
    msg2[1] = msg2[0];
    mmsghdr *msg = new mmsghdr[m];
    for (int i = 0; i < m; i++)
    {
        msg[i].msg_len = 1;
        msg[i].msg_hdr.msg_name = &targetAddr;
        msg[i].msg_hdr.msg_namelen = sizeof(targetAddr);
        msg[i].msg_hdr.msg_iov = msg2;
        msg[i].msg_hdr.msg_iovlen = 1;
    }
    for (int i = 0; i < n; i++)
    {
        int t = m;
        while (t > 0)
        {
            int ret = sendmmsg(svrFd, msg, m, 0);
            if (ret < 0)
            {
                perror("sendto failed");
            }
            //printf("send %d\n", t);
            t -= ret;
        }
    }
    delete[] msg;
}
void TestSendThread(int n, int svrFd)
{
    SendPacket(n, 32, svrFd, "127.0.0.1", 16666);
}
void TestSendMsgThread(int n, int m, int svrFd)
{
    SendMsgPacket(n, m, 32, svrFd, "127.0.0.1", 16666);
}
int main(int argc, char *argv[])
{
    int svrFd = ListenUDP("127.0.0.1", 65432);
    if (svrFd < 0)
    {
        perror("listen upd failed");
    }
    else
    {
        printf("UDP Server at 127.0.0.1:65432\n");
    }
    int value = 200000000;
    ::setsockopt(svrFd, SOL_SOCKET, SO_RCVBUF, (char *)&value, sizeof(value));
    ::setsockopt(svrFd, SOL_SOCKET, SO_SNDBUF, (char *)&value, sizeof(value));
    //ProfilerStart("udpsend.prof");
    auto begin = chrono::steady_clock::now();
    int n = 1000000;
    int m = 1;
    //TestSendThread(n, svrFd);
    int t = 8;
    thread *threads[20];
    for (int i = 0; i < t; i++)
    {
        //threads[i] = new thread(TestSendMsgThread, n, m, svrFd);
        threads[i] = new thread(TestSendThread, n, svrFd);
    }
    for (int i = 0; i < t; i++)
    {
        threads[i]->join();
        delete threads[i];
    }

    auto end = chrono::steady_clock::now();
    //ProfilerStop();
    double dr_ms = std::chrono::duration<double, std::milli>(end - begin).count();
    printf("Use time:%.2fms, PPS:%.0f\n", dr_ms, n * m * t / (dr_ms / 1000));
    close(svrFd);
}

g++ -O3 -o send_speed send_speed.cpp -lpthread
./send_speed 
UDP Server at 127.0.0.1:65432
Use time:6903.19ms, PPS:1158884

 ./send_speed 
UDP Server at 127.0.0.1:65432
Use time:6737.55ms, PPS:1187375

 ./send_speed 
UDP Server at 127.0.0.1:65432
Use time:6742.78ms, PPS:1186455

 

C++多線程 sendmmsg

也就130W PPS,但是可以看出sendmmsg系統調用不是瓶頸。

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <netdb.h>
#include <time.h>
//#include <gperftools/profiler.h>

#include <iostream>
#include <chrono>
#include <thread>
using namespace std;

#define BUF_SIZE 64
char data[BUF_SIZE];
sockaddr_in BuildAddr(const char *ip, int port)
{
    sockaddr_in svrAddr;
    memset(&svrAddr, 0, sizeof(svrAddr));
    svrAddr.sin_family = AF_INET;
    svrAddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svrAddr.sin_port = htons(port);
    return svrAddr;
}
int ListenUDP(const char *ip, int port)
{
    sockaddr_in svrAddr = BuildAddr(ip, port);
    int sockFd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockFd < 0)
    {
        perror("create socket failed!");
        return sockFd;
    }
    if (bind(sockFd, (sockaddr *)&svrAddr, sizeof(svrAddr)) < 0)
    {
        perror("bind svr addr failed");
        return -1;
    }
    return sockFd;
}

void SendPacket(int n, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char *data = new char[size];

    for (int i = 0; i < n; i++)
    {
        int ret = sendto(svrFd, data, size, 0, (sockaddr *)&targetAddr, sizeof(targetAddr));
        if (ret < 0)
        {
            perror("sendto failed");
        }
        //printf("send packet:%d\n", ret);
    }

    delete[] data;
}
void SendMsgPacket(int n, int m, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char data[] = "{\"cmd\":\"echo\"}";
    int iov_len = strlen(data);
    struct iovec msg2[2];
    msg2[0].iov_base = data;
    msg2[0].iov_len = iov_len;
    msg2[1] = msg2[0];
    mmsghdr *msg = new mmsghdr[m];
    for (int i = 0; i < m; i++)
    {
        msg[i].msg_len = 1;
        msg[i].msg_hdr.msg_name = &targetAddr;
        msg[i].msg_hdr.msg_namelen = sizeof(targetAddr);
        msg[i].msg_hdr.msg_iov = msg2;
        msg[i].msg_hdr.msg_iovlen = 1;
    }
    for (int i = 0; i < n; i++)
    {
        int t = m;
        while (t > 0)
        {
            int ret = sendmmsg(svrFd, msg, m, 0);
            if (ret < 0)
            {
                perror("sendto failed");
            }
            //printf("send %d\n", t);
            t -= ret;
        }
    }
    delete[] msg;
}
void TestSendThread(int n, int svrFd)
{
    SendPacket(n, 32, svrFd, "127.0.0.1", 16666);
}
void TestSendMsgThread(int n, int m, int svrFd)
{
    SendMsgPacket(n, m, 32, svrFd, "127.0.0.1", 16666);
}
int main(int argc, char *argv[])
{
    int svrFd = ListenUDP("127.0.0.1", 65432);
    if (svrFd < 0)
    {
        perror("listen upd failed");
    }
    else
    {
        printf("UDP Server at 127.0.0.1:65432\n");
    }
    int value = 200000000;
    ::setsockopt(svrFd, SOL_SOCKET, SO_RCVBUF, (char *)&value, sizeof(value));
    ::setsockopt(svrFd, SOL_SOCKET, SO_SNDBUF, (char *)&value, sizeof(value));
    //ProfilerStart("udpsend.prof");
    auto begin = chrono::steady_clock::now();
    int n = 1000;
    int m = 1000;
    //TestSendThread(n, svrFd);
    int t = 8;
    thread *threads[20];
    for (int i = 0; i < t; i++)
    {
        threads[i] = new thread(TestSendMsgThread, n, m, svrFd);
        //threads[i] = new thread(TestSendThread, n, svrFd);
    }
    for (int i = 0; i < t; i++)
    {
        threads[i]->join();
        delete threads[i];
    }

    auto end = chrono::steady_clock::now();
    //ProfilerStop();
    double dr_ms = std::chrono::duration<double, std::milli>(end - begin).count();
    printf("Use time:%.2fms, PPS:%.0f\n", dr_ms, n * m * t / (dr_ms / 1000));
    close(svrFd);
}

//g++ -O3 -o send_speed send_speed.cpp -lpthread
//g++ -O3 -o send_speed send_speed.cpp -lpthread

//g++  -g -pg -o send_speed send_speed.cpp -lpthread

/*
g++ -O3 -o send_speed send_speed.cpp 
valgrind --tool=callgrind ./send_speed

}
//g++ -O3 -o send_speed send_speed.cpp -lpthread

//g++  -g -pg -o send_speed send_speed.cpp -lpthread

/*
g++ -O3 -o send_speed send_speed.cpp 
valgrind --tool=callgrind ./send_speed
python ../../gprof2dot/gprof2dot.py -f callgrind -n 10 -s callgrind.out.2938509 > val.dot
dot -Tpng val.dot -o val.png
python -m http.server 7897
*/
 ./send_speed 
UDP Server at 127.0.0.1:65432
Use time:6025.68ms, PPS:1327650

 ./send_speed 
UDP Server at 127.0.0.1:65432
Use time:5970.89ms, PPS:1339833

 ./send_speed 
UDP Server at 127.0.0.1:65432
Use time:6180.63ms, PPS:1294366

 

C++ 多socket同時發送

由於本機不支持SO_REUSEPORT,所以使用多個socket模擬。 PPS可達 170W。

操作系統版本:uname -a

Linux n227-020-135 4.14.81.bm.15-amd64 #1 SMP Debian 4.14.81.bm.15 Sun Sep 8 05:02:31 UTC 2019 x86_64 GNU/Linux

發現TCP可以reuse port, UDP就不行。

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <netdb.h>
#include <time.h>
//#include <gperftools/profiler.h>

#include <iostream>
#include <chrono>
#include <thread>
using namespace std;

#define BUF_SIZE 64
char data[BUF_SIZE];
sockaddr_in BuildAddr(const char *ip, int port)
{
    sockaddr_in svrAddr;
    memset(&svrAddr, 0, sizeof(svrAddr));
    svrAddr.sin_family = AF_INET;
    svrAddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svrAddr.sin_port = htons(port);
    return svrAddr;
}
int ListenUDP(const char *ip, int port)
{
    sockaddr_in svrAddr = BuildAddr(ip, port);
    int sockFd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockFd < 0)
    {
        perror("create socket failed!");
        return sockFd;
    }
    if (bind(sockFd, (sockaddr *)&svrAddr, sizeof(svrAddr)) < 0)
    {
        perror("bind svr addr failed");
        return -1;
    }
    int val = 1;
    if (setsockopt(sockFd, SOL_SOCKET, SO_REUSEPORT | SO_REUSEADDR, &val, sizeof(val)) < 0)
    {
        perror("setsockopt()");
    }
    return sockFd;
}

void SendPacket(int n, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char *data = new char[size];

    for (int i = 0; i < n; i++)
    {
        int ret = sendto(svrFd, data, size, 0, (sockaddr *)&targetAddr, sizeof(targetAddr));
        if (ret < 0)
        {
            perror("sendto failed");
        }
        //printf("send packet:%d\n", ret);
    }

    delete[] data;
}
void SendMsgPacket(int n, int m, int size, int svrFd, const char *targetIP, int targetPort)
{
    sockaddr_in targetAddr = BuildAddr(targetIP, targetPort);
    char data[] = "{\"cmd\":\"echo\"}";
    int iov_len = strlen(data);
    struct iovec msg2[2];
    msg2[0].iov_base = data;
    msg2[0].iov_len = iov_len;
    msg2[1] = msg2[0];
    mmsghdr *msg = new mmsghdr[m];
    for (int i = 0; i < m; i++)
    {
        msg[i].msg_len = 1;
        msg[i].msg_hdr.msg_name = &targetAddr;
        msg[i].msg_hdr.msg_namelen = sizeof(targetAddr);
        msg[i].msg_hdr.msg_iov = msg2;
        msg[i].msg_hdr.msg_iovlen = 1;
    }
    for (int i = 0; i < n; i++)
    {
        int t = m;
        while (t > 0)
        {
            int ret = sendmmsg(svrFd, msg, m, 0);
            if (ret < 0)
            {
                perror("sendto failed");
            }
            //printf("send %d\n", t);
            t -= ret;
        }
    }
    delete[] msg;
}
void TestSendThread(int n, int svrFd)
{
    SendPacket(n, 32, svrFd, "127.0.0.1", 16666);
}

int fds[128];
void TestSendMsgThread(int n, int m, int svrFd)
{
    if (svrFd <= 0)
    {
        int i = -svrFd;
        svrFd = ListenUDP("127.0.0.1", 65432 + svrFd);
        fds[i] = svrFd;
        printf("%d %d\n", i, svrFd);
        if (svrFd < 0)
        {
            perror("listen upd failed");
            exit(1);
        }
        else
        {
            printf("UDP Server at 127.0.0.1:%d\n", 65432 + svrFd);
        }
    }
    SendMsgPacket(n, m, 32, svrFd, "127.0.0.1", 16666);
}
int main(int argc, char *argv[])
{
    //int fds[20];

    //int value = 4000;
    //::setsockopt(svrFd, SOL_SOCKET, SO_RCVBUF, (char *)&value, sizeof(value));
    //::setsockopt(svrFd, SOL_SOCKET, SO_SNDBUF, (char *)&value, sizeof(value));
    //ProfilerStart("udpsend.prof");
    auto begin = chrono::steady_clock::now();
    int n = 1000;
    int m = 1000;
    //TestSendThread(n, svrFd);
    int t = 10;

    // int svrFd = ListenUDP("127.0.0.1", 65431);
    // //fork();
    // if (svrFd < 0)
    // {
    //     perror("listen upd failed");
    //     exit(1);
    // }
    // else
    // {
    //     printf("UDP Server at 127.0.0.1:65432\n");
    // }

    thread *threads[128];
    for (int i = 0; i < t; i++)
    {
        threads[i] = new thread(TestSendMsgThread, n, m, i >= 8 ? fds[i % 8] : -i);
        //threads[i] = new thread(TestSendThread, n, svrFd);
    }

    for (int i = 0; i < t; i++)
    {
        threads[i]->join();
        delete threads[i];
    }

    auto end = chrono::steady_clock::now();
    //ProfilerStop();
    double dr_ms = std::chrono::duration<double, std::milli>(end - begin).count();
    printf("Use time:%.2fms, PPS:%.0f\n", dr_ms, n * m * t / (dr_ms / 1000));
    // close(svrFd);
}

//g++ -O3 -o send_speed send_speed.cpp -lpthread
//g++ -O3 -o send_speed send_speed.cpp -lpthread

//g++  -g -pg -o send_speed send_speed.cpp -lpthread

/*
g++ -O3 -o send_speed send_speed.cpp 
valgrind --tool=callgrind ./send_speed

}
//g++ -O3 -o send_speed send_speed.cpp -lpthread

//g++  -g -pg -o send_speed send_speed.cpp -lpthread

/*
g++ -O3 -o send_speed send_speed.cpp 
valgrind --tool=callgrind ./send_speed
python ../../gprof2dot/gprof2dot.py -f callgrind -n 10 -s callgrind.out.2938509 > val.dot
dot -Tpng val.dot -o val.png
python -m http.server 7897
*/

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章