0)寫在最前面
本來是研究virtio-gpu雙屏,但後面發現其對fb的支持有問題,並且VT切換時卡死,本文記錄了對這些問題的分析。
1)基礎環境調查
在xorg.conf中配置了:Option “Xinerama” “1”
spice彈出了兩個界面,應該是使能了兩個connector
但是xrandr中只能看到Virtual-0,說明另一個screen未鏈接。
實在不行只能通過fb0和fb1的方式寫入,但是看起來對fb的支持有問題:
# strace a.out
ioctl(3, FBIOGET_FSCREENINFO, 0x7ffc65153d90) = 0
ioctl(3, FBIOGET_VSCREENINFO, 0x7ffc65153de0) = 0
write(1, "1024x768, 32bpp\n", 161024x768, 32bpp
) = 16
mmap(NULL, 3145728, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = -1 EINVAL (Invalid argument)
dup(2) = 4
fcntl(4, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(4, {st_mode=S_IFCHR|0600, st_rdev=makedev(136, 0), ...}) = 0
write(4, "Error: failed to map framebuffer"..., 68Error: failed to map framebuffer device to memory: Invalid argument
看流程前面ioctl相關獲取都是正常的,到了mmap就報EINVAL
2)研究解決mmap問題
drm mmap調用棧:
Sep 16 12:14:56 Linx kernel: [ 2570.217975] [<ffffffffbd52b8d5>] ? dump_stack+0x5c/0x77
Sep 16 12:14:56 Linx kernel: [ 2570.218018] [<ffffffffc06dbfe2>] ? virtio_gpu_mmap+0x12/0x60 [virtio_gpu]
Sep 16 12:14:56 Linx kernel: [ 2570.218026] [<ffffffffbd3bdbb1>] ? mmap_region+0x341/0x590
Sep 16 12:14:56 Linx kernel: [ 2570.218030] [<ffffffffbd3be256>] ? do_mmap+0x456/0x560
Sep 16 12:14:56 Linx kernel: [ 2570.218035] [<ffffffffbd4a4986>] ? security_mmap_file+0x66/0xe0
Sep 16 12:14:56 Linx kernel: [ 2570.218041] [<ffffffffbd3a02ae>] ? vm_mmap_pgoff+0xbe/0x100
Sep 16 12:14:56 Linx kernel: [ 2570.218046] [<ffffffffbd3bc0d1>] ? SyS_mmap_pgoff+0x1b1/0x270
Sep 16 12:14:56 Linx kernel: [ 2570.218051] [<ffffffffbd203b5c>] ? do_syscall_64+0x5c/0x170
Sep 16 12:14:56 Linx kernel: [ 2570.218058] [<ffffffffbd7fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25
mmap初始化調用棧:
dump_stack+0x5c/0x77
virtio_gpufb_create+0x49/0x320 [virtio_gpu]
drm_setup_crtcs+0x372/0x990 [drm_kms_helper]
drm_fb_helper_initial_config+0x20f/0x3da [drm_kms_helper]
virtio_gpu_fbdev_init+0xde/0x100 [virtio_gpu]
virtio_gpu_driver_load+0x44c/0x650 [virtio_gpu]
wake_up_atomic_t+0x30/0x30
drm_dev_register+0x9c/0xc0 [drm]
drm_virtio_init+0x60/0x1a0 [virtio_gpu]
vp_finalize_features+0x6a/0x90 [virtio_pci]
virtio_dev_probe+0x141/0x1e0 [virtio]
driver_probe_device+0x21e/0x430
__driver_attach+0xd6/0xe0
driver_probe_device+0x430/0x430
bus_for_each_dev+0x67/0xb0
bus_add_driver+0x40/0x260
driver_register+0x57/0xd0
do_one_initcall+0x4c/0x180
preempt_schedule_common+0x14/0x20
_cond_resched+0x19/0x20
do_init_module+0x5a/0x1f1
load_module+0x24e3/0x28f0
__symbol_put+0x60/0x60
vfs_read+0x114/0x130
security_capable+0x41/0x60
SYSC_finit_module+0x8e/0xe0
entry_SYSCALL_64_fastpath+0x1e/0xad
看不出來,再看fb的mmap調用棧:
fb_mmap+0x45/0x140
mmap_region+0x341/0x590
do_mmap+0x456/0x560
security_mmap_file+0x66/0xe0
vm_mmap_pgoff+0xbe/0x100
SyS_mmap_pgoff+0x1b1/0x270
do_syscall_64+0x5c/0x170
entry_SYSCALL64_slow_path+0x25/0x25
看了下fb_mmap的代碼:
static int
fb_mmap(struct file *file, struct vm_area_struct * vma)
{
struct fb_info *info = file_fb_info(file);
struct fb_ops *fb;
unsigned long mmio_pgoff;
unsigned long start;
u32 len;
dump_stack();
if (!info)
return -ENODEV;
fb = info->fbops;
if (!fb)
return -ENODEV;
mutex_lock(&info->mm_lock);
if (fb->fb_mmap) {
int res;
res = fb->fb_mmap(info, vma);
mutex_unlock(&info->mm_lock);
return res;
}
start = info->fix.smem_start;
len = info->fix.smem_len;
mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
if (vma->vm_pgoff >= mmio_pgoff) {
if (info->var.accel_flags) {
mutex_unlock(&info->mm_lock);
return -EINVAL;
}
vma->vm_pgoff -= mmio_pgoff;
start = info->fix.mmio_start;
len = info->fix.mmio_len;
}
mutex_unlock(&info->mm_lock);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
fb_pgprotect(file, vma, start);
return vm_iomap_memory(vma, start, len);
}
再結合virtio_gpufb_create中的代碼,這裏並未設置fix.smem_start和fix.smem_len,因此這兩個參數會被默認置0:
static int virtio_gpufb_create(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
{
...
fb = &vfbdev->vgfb.base;
vfbdev->helper.fb = fb;
strcpy(info->fix.id, "virtiodrmfb");
info->flags = FBINFO_DEFAULT;
info->fbops = &virtio_gpufb_ops;
info->pixmap.flags = FB_PIXMAP_SYSTEM;
info->screen_base = obj->vmap;
info->screen_size = obj->gem_base.size;
drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
drm_fb_helper_fill_var(info, &vfbdev->helper,
sizes->fb_width, sizes->fb_height);
info->fix.mmio_start = 0;
info->fix.mmio_len = 0;
...
start = info->fix.mmio_start;
len = info->fix.mmio_len;
return vm_iomap_memory(vma, start, len);
}
以下調用設置了accel_flags,其中的FB_ACCELF_TEXT爲1
drm_fb_helper_fill_var
->info->var.accel_flags = FB_ACCELF_TEXT;
可以看出virtio_gpufb_create上的代碼直接導致fb_mmap中的判斷條件成立返回-EINVAL,這裏即便將info->var.accel_flags重置爲0,也沒用即便進去了也是io_remap_pfn_range早晚會出問題,因爲這裏傳入的iomap的start和len都爲0,這也就是virtio-gpu和qxl之間的分別,qxl使用的是“設備地址”,雖然這對虛擬設備來說也沒什麼卵用。
從上面可以看出fbmem的代碼只適應了物理設備內存,但是它在前面給出了一個接口,這裏其實只要實現了fb_mmap的接口就能將映射到虛擬內存:
static int
fb_mmap(struct file *file, struct vm_area_struct * vma)
{
struct fb_info *info = file_fb_info(file);
struct fb_ops *fb;
unsigned long mmio_pgoff;
unsigned long start;
u32 len;
if (!info)
return -ENODEV;
fb = info->fbops;
if (!fb)
return -ENODEV;
mutex_lock(&info->mm_lock);
if (fb->fb_mmap) {
int res;
res = fb->fb_mmap(info, vma);
mutex_unlock(&info->mm_lock);
return res;
}
。。。
}
之後mmap正常的,但是寫入過程中出現了SIGBUS的錯誤,如下:
# ./a.out
The framebuffer device was opened successfully.
1024x768, 32bpp
The framebuffer device was mapped to memory successfully.
總線錯誤
因爲mmap內存是“寫時分配”,也就是在寫入時候觸發page fault異常,然後才正式映射,調用流程如下:
ttm_bo_vm_fault+0x34/0x540
tty_insert_flip_string_fixed_flag+0x85/0xe0
list_del+0x9/0x20
remove_wait_queue+0x20/0x30
n_tty_write+0x2d7/0x470
__wake_up+0x34/0x50
__do_fault+0x84/0x190
handle_mm_fault+0x79d/0x1710
__do_page_fault+0x253/0x510
async_page_fault+0x28/0x30
排查後,在ttm_bo_vm_fault中,異常出現在如下區域,這裏的page_offset爲18446744073708503140,num pages爲768,明顯page_offset有問題。
page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff - drm_vma_node_start(&bo->vma_node);
page_last = vma_pages(vma) + vma->vm_pgoff -
drm_vma_node_start(&bo->vma_node);
if (unlikely(page_offset >= bo->num_pages)) {
retval = VM_FAULT_SIGBUS;
goto out_io_unlock;
}
細緻排查後page_offset的計算參數獲取如下(pages):address:34243474630; vma->vm_start:34243474530; vma->vm_pgoff:0; vma node start:1048576,這裏的page_offset的計算出現下溢,所以纔得到這麼大的page_offset。
對比了下qxl:
address:34332914588; vma->vm_start:34332914268; vma->vm_pgoff:1060031; vma node start:1060031;
qxl中的pgoff和vma node start是相同的,或許是virtio-gpu中沒有設置pgoff?
在xserver中會計算fboff:
fPtr->fboff = (unsigned long) fPtr->fix.smem_start & ~PAGE_MASK;
fPtr->fbmem_len = (fPtr->fboff+fPtr->fix.smem_len+~PAGE_MASK) &
PAGE_MASK;
修改驅動中fix項的smem_len,示例如下:
@@ -337,6 +403,9 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper,
info->fbops = &virtio_gpufb_ops;
info->pixmap.flags = FB_PIXMAP_SYSTEM;
+ info->fix.smem_len = obj->gem_base.size;
info->screen_base = obj->vmap;
之後能跑過,然後還是進不去主界面,查看日誌出現告警:
Sep 29 18:31:55 Linx kernel: [ 22.227166] Call Trace:
Sep 29 18:31:55 Linx kernel: [ 22.227169] [<ffffffff8212b955>] ? dump_stack+0x5c/0x77
Sep 29 18:31:55 Linx kernel: [ 22.227172] [<ffffffff81e77794>] ? __warn+0xc4/0xe0
Sep 29 18:31:55 Linx kernel: [ 22.227174] [<ffffffffc050b1df>] ? ttm_bo_vm_open+0x6f/0x80 [ttm]
Sep 29 18:31:55 Linx kernel: [ 22.227176] [<ffffffff81e7598a>] ? copy_process.part.33+0xd4a/0x1c50
Sep 29 18:31:55 Linx kernel: [ 22.227177] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227179] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227181] [<ffffffff81e03b5c>] ? do_syscall_64+0x5c/0x170
Sep 29 18:31:55 Linx kernel: [ 22.227183] [<ffffffff823fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25
Sep 29 18:31:55 Linx kernel: [ 22.227184] ---[ end trace 6e9f62b113d5170a ]---
Sep 29 18:31:55 Linx pulseaudio[1739]: Disabling timer-based scheduling because running in
這個告警的判斷如下,這個暫時先放下:
static void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
(void)ttm_bo_reference(bo);
}
現在的問題看起來是因爲屏幕不會自動刷新:
圖1(左上方區域文字未刷新導致無法顯示矩形圖像):
圖2(左上方區域刷新導致矩形圖像顯示):
有個辦法可以驗證這個問題:
1)在startx卡死時通過ssh登上去獲取fb數據
$ cat /dev/fb0 > screenap
2)關機切換爲VGA顯卡再啓動後執行
$ cat screenap > /dev/fb0
3)這裏只給出部分屏幕截圖,能看到主界面的,說明startx卡死時xserver是向fb中寫入了數據的
看來virtio-gpu缺乏定時刷新的功能,可以添加定時器對其進行定期更新,示例如下:
static void my_timer_func(unsigned long data)
{
virtio_gpu_dirty_update(svgfb, true, 0, 0, xxx, xxx);
add_timer_xxx();
}
添加後屏幕無法自動刷新的問題解決了,矩形框能正常顯示,但startx後仍然無法顯示,同時獲取/dev/fb0中的數據是正常的,這就說明或許xserver啓動後顯示設備的內存映射改變了,再回到先前的日誌告警和代碼:
Sep 29 18:31:55 Linx kernel: [ 22.227166] Call Trace:
Sep 29 18:31:55 Linx kernel: [ 22.227169] [<ffffffff8212b955>] ? dump_stack+0x5c/0x77
Sep 29 18:31:55 Linx kernel: [ 22.227172] [<ffffffff81e77794>] ? __warn+0xc4/0xe0
Sep 29 18:31:55 Linx kernel: [ 22.227174] [<ffffffffc050b1df>] ? ttm_bo_vm_open+0x6f/0x80 [ttm]
Sep 29 18:31:55 Linx kernel: [ 22.227176] [<ffffffff81e7598a>] ? copy_process.part.33+0xd4a/0x1c50
Sep 29 18:31:55 Linx kernel: [ 22.227177] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227179] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227181] [<ffffffff81e03b5c>] ? do_syscall_64+0x5c/0x170
Sep 29 18:31:55 Linx kernel: [ 22.227183] [<ffffffff823fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25
Sep 29 18:31:55 Linx kernel: [ 22.227184] ---[ end trace 6e9f62b113d5170a ]---
Sep 29 18:31:55 Linx pulseaudio[1739]: Disabling timer-based scheduling because running in
這個告警的判斷如下,但是看起來並沒有影響,而fb的內存在probe之後應該也是不會改變的,所以得換個角度思考:
static void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
(void)ttm_bo_reference(bo);
}
通過各種打印、延時和猜測最後查到該問題是由於KDSETMODE導致的,在startx過程中會切換VT模式爲Graphics模式,主要是爲了屏蔽光標和控制檯打印的影響:
1574 open("/dev/vc/1", O_RDWR|O_NONBLOCK) = -1 ENOENT (No such file or directory)
1574 open("/dev/tty1", O_RDWR|O_NONBLOCK) = 8
1574 ioctl(8, VT_GETSTATE, 0x7ffd0fee9b40) = 0
1574 ioctl(8, VT_ACTIVATE, 0x1) = 0
1574 ioctl(8, VT_WAITACTIVE, 0x1) = 0
1574 ioctl(8, VIDIOC_RESERVED or VT_GETMODE, 0x7ffd0fee9b50) = 0
1574 rt_sigaction(SIGUSR1, {0x5594241ab5f0, [USR1], SA_RESTORER|SA_RESTART, 0x7fe3d4396040}, {SIG_IGN, [], SA_RESTORER, 0x7fe3d4396040}, 8) = 0
1574 ioctl(8, VIDIOC_ENUM_FMT or VT_SETMODE, 0x7ffd0fee9b50) = 0
1574 ioctl(8, KDSETMODE, 0x1) = 0
這裏編寫了個復現示例代碼段如下:
int vconsole_fd;
vconsole_fd = open("/dev/tty1", O_RDWR);
ioctl( vconsole_fd, KDSETMODE, KD_GRAPHICS);
sleep(3);
ioctl( vconsole_fd, KDSETMODE, KD_TEXT);
close(vconsole_fd);
在該sleep期間向/dev/fb0中寫入的所有數據均不會在界面上顯示。
排查後發現是virtio_gpu_dirty_update的定時任務沒有調用virtio_gpu_cmd_resource_flush更新所致,判斷如下:
if (in_atomic() || store)
store_for_later = true;
這裏的in_atomic確保了此處不能處於spinlock下,因爲後續會調用wait_event,而wait_event中會sleep,再spinlock中不能睡眠。這裏舉個例子說明這個問題:
[進程A] 關搶佔
[進程A] 獲得鎖
[進程A] 睡眠調度 ...... 儘管已經關閉了搶佔,[1]依然可以通過主動調用schedule(), schedule_timeout()等主動讓出CPU,調度其它進程。
[進程B] 關搶佔 ...... [1]已經關閉搶佔,所以這裏相當於nop操作
[進程B] 獲得鎖失敗 ...... [1]已經獲得了鎖,並且還沒有釋放
[進程B] 反覆嘗試獲得鎖 ...... 由於關閉了搶佔,已經沒人能夠終止這個反覆嘗試的操作了,所以這裏出現了死鎖
而在init_timer中是自帶鎖的,因此init_timer永遠不會flush virtio cmd,解決辦法很簡單,也就是在virtio_gpu_dirty_update後邊添加一個支持搶佔的延時執行任務schedule_delayed_work就好。
結果:
後續測試startx後顯示均正常。