寫在最前
這裏是使用Parboil自帶的腳本編譯和使用的教程:https://blog.csdn.net/FishSeeker/article/details/79479714
最近因爲跑實驗的原因需要修改以及合併部分代碼然後再編譯,所以無法使用Parboil自帶的編譯功能,只能自己編譯。
編譯腳本
下面是cuda通用的編譯腳本,大部分東西都是固定的不需要改。詳情見註釋
# Location of the CUDA Toolkit,默認的路徑即可
CUDA_PATH ?= "/usr/local/cuda"
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
# 這裏添加編譯參數,比如-keep, -Xptxas 等等
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
SAMPLE_ENABLED := 1
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# 這裏添加include庫
INCLUDES := -I../../common/inc
INCLUDES += -I/home/fish/GPU/benchmark/Parboil/parboil/common/include
LIBRARIES :=
################################################################################
# 這裏寫你的GPU計算能力
SMS ?= 61
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
# 這裏寫最後生成的二進制名字
build: mm
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
# 這裏寫二進制的依賴
mm: mm.o io.o parboil_cuda.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
# 這裏寫obj文件的依賴
mm.o:main.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
io.o:io.cc
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
# 這個對於Parboil每個app都是必須的,路徑要寫對
parboil_cuda.o:/home/fish/GPU/benchmark/Parboil/parboil/common/src/parboil_cuda.c
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
run: build
$(EXEC) ./mm
# 這裏make clean時刪除什麼
clean:
rm -f mm mm.o io.o parboil_cuda.o
clobber: clean
編譯方法
修改上述編譯腳本,然後在Parboil文件夾的src文件夾中建立一個Makefile文件,把這個腳本複製進去執行make即可。如果不知道需要那些.o的文件,可以看看parboil各個app內的build文件夾內的文件,大概就能知道需要什麼.o文件了