linux_dsm_epyc7002/drivers/gpu/drm/i915
Colin Xu a291e4fba2 drm/i915/gvt: Use GFP_ATOMIC instead of GFP_KERNEL in atomic context
GFP_KERNEL flag specifies a normal kernel allocation in which executing
in process context without any locks and can sleep.
mmio_diff takes sometime to finish all the diff compare and it has
locks, continue using GFP_KERNEL will output below trace if LOCKDEP
enabled.

Use GFP_ATOMIC instead.

V2: Rebase.

=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
5.7.0-rc2 #400 Not tainted
-----------------------------------------------------
is trying to acquire:
ffffffffb47bea20 (fs_reclaim){+.+.}-{0:0}, at: fs_reclaim_acquire.part.0+0x0/0x30

               and this task is already holding:
ffff88845b85cc90 (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2}, at: vgpu_mmio_diff_show+0xcf/0x2e0
which would create a new lock dependency:
 (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2} -> (fs_reclaim){+.+.}-{0:0}

               but this new dependency connects a SOFTIRQ-irq-safe lock:
 (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2}

               ... which became SOFTIRQ-irq-safe at:
  lock_acquire+0x175/0x4e0
  _raw_spin_lock_irqsave+0x2b/0x40
  shadow_context_status_change+0xfe/0x2f0
  notifier_call_chain+0x6a/0xa0
  __atomic_notifier_call_chain+0x5f/0xf0
  execlists_schedule_out+0x42a/0x820
  process_csb+0xe7/0x3e0
  execlists_submission_tasklet+0x5c/0x1d0
  tasklet_action_common.isra.0+0xeb/0x260
  __do_softirq+0x11d/0x56f
  irq_exit+0xf6/0x100
  do_IRQ+0x7f/0x160
  ret_from_intr+0x0/0x2a
  cpuidle_enter_state+0xcd/0x5b0
  cpuidle_enter+0x37/0x60
  do_idle+0x337/0x3f0
  cpu_startup_entry+0x14/0x20
  start_kernel+0x58b/0x5c5
  secondary_startup_64+0xa4/0xb0

               to a SOFTIRQ-irq-unsafe lock:
 (fs_reclaim){+.+.}-{0:0}

               ... which became SOFTIRQ-irq-unsafe at:
...
  lock_acquire+0x175/0x4e0
  fs_reclaim_acquire.part.0+0x20/0x30
  kmem_cache_alloc_node_trace+0x2e/0x290
  alloc_worker+0x2b/0xb0
  init_rescuer.part.0+0x17/0xe0
  workqueue_init+0x293/0x3bb
  kernel_init_freeable+0x149/0x325
  kernel_init+0x8/0x116
  ret_from_fork+0x3a/0x50

               other info that might help us debug this:

 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(fs_reclaim);
                               local_irq_disable();
                               lock(&gvt->scheduler.mmio_context_lock);
                               lock(fs_reclaim);
  <Interrupt>
    lock(&gvt->scheduler.mmio_context_lock);

                *** DEADLOCK ***

3 locks held by cat/1439:
 #0: ffff888444a23698 (&p->lock){+.+.}-{3:3}, at: seq_read+0x49/0x680
 #1: ffff88845b858068 (&gvt->lock){+.+.}-{3:3}, at: vgpu_mmio_diff_show+0xc7/0x2e0
 #2: ffff88845b85cc90 (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2}, at: vgpu_mmio_diff_show+0xcf/0x2e0

               the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&gvt->scheduler.mmio_context_lock){+.-.}-{2:2} ops: 31 {
   HARDIRQ-ON-W at:
                    lock_acquire+0x175/0x4e0
                    _raw_spin_lock_bh+0x2f/0x40
                    vgpu_mmio_diff_show+0xcf/0x2e0
                    seq_read+0x242/0x680
                    full_proxy_read+0x95/0xc0
                    vfs_read+0xc2/0x1b0
                    ksys_read+0xc4/0x160
                    do_syscall_64+0x63/0x290
                    entry_SYSCALL_64_after_hwframe+0x49/0xb3
   IN-SOFTIRQ-W at:
                    lock_acquire+0x175/0x4e0
                    _raw_spin_lock_irqsave+0x2b/0x40
                    shadow_context_status_change+0xfe/0x2f0
                    notifier_call_chain+0x6a/0xa0
                    __atomic_notifier_call_chain+0x5f/0xf0
                    execlists_schedule_out+0x42a/0x820
                    process_csb+0xe7/0x3e0
                    execlists_submission_tasklet+0x5c/0x1d0
                    tasklet_action_common.isra.0+0xeb/0x260
                    __do_softirq+0x11d/0x56f
                    irq_exit+0xf6/0x100
                    do_IRQ+0x7f/0x160
                    ret_from_intr+0x0/0x2a
                    cpuidle_enter_state+0xcd/0x5b0
                    cpuidle_enter+0x37/0x60
                    do_idle+0x337/0x3f0
                    cpu_startup_entry+0x14/0x20
                    start_kernel+0x58b/0x5c5
                    secondary_startup_64+0xa4/0xb0
   INITIAL USE at:
                   lock_acquire+0x175/0x4e0
                   _raw_spin_lock_irqsave+0x2b/0x40
                   shadow_context_status_change+0xfe/0x2f0
                   notifier_call_chain+0x6a/0xa0
                   __atomic_notifier_call_chain+0x5f/0xf0
                   execlists_schedule_in+0x2c8/0x690
                   __execlists_submission_tasklet+0x1303/0x1930
                   execlists_submit_request+0x1e7/0x230
                   submit_notify+0x105/0x2a4
                   __i915_sw_fence_complete+0xaa/0x380
                   __engine_park+0x313/0x5a0
                   ____intel_wakeref_put_last+0x3e/0x90
                   intel_gt_resume+0x41e/0x440
                   intel_gt_init+0x283/0xbc0
                   i915_gem_init+0x197/0x240
                   i915_driver_probe+0xc2d/0x12e0
                   i915_pci_probe+0xa2/0x1e0
                   local_pci_probe+0x6f/0xb0
                   pci_device_probe+0x171/0x230
                   really_probe+0x17a/0x380
                   driver_probe_device+0x70/0xf0
                   device_driver_attach+0x82/0x90
                   __driver_attach+0x60/0x100
                   bus_for_each_dev+0xe4/0x140
                   bus_add_driver+0x257/0x2a0
                   driver_register+0xd3/0x150
                   i915_init+0x6d/0x80
                   do_one_initcall+0xb8/0x3a0
                   kernel_init_freeable+0x2b4/0x325
                   kernel_init+0x8/0x116
                   ret_from_fork+0x3a/0x50
 }
__key.77812+0x0/0x40
 ... acquired at:
   lock_acquire+0x175/0x4e0
   fs_reclaim_acquire.part.0+0x20/0x30
   kmem_cache_alloc_trace+0x2e/0x260
   mmio_diff_handler+0xc0/0x150
   intel_gvt_for_each_tracked_mmio+0x7b/0x140
   vgpu_mmio_diff_show+0x111/0x2e0
   seq_read+0x242/0x680
   full_proxy_read+0x95/0xc0
   vfs_read+0xc2/0x1b0
   ksys_read+0xc4/0x160
   do_syscall_64+0x63/0x290
   entry_SYSCALL_64_after_hwframe+0x49/0xb3

               the dependencies between the lock to be acquired
 and SOFTIRQ-irq-unsafe lock:
-> (fs_reclaim){+.+.}-{0:0} ops: 1999031 {
   HARDIRQ-ON-W at:
                    lock_acquire+0x175/0x4e0
                    fs_reclaim_acquire.part.0+0x20/0x30
                    kmem_cache_alloc_node_trace+0x2e/0x290
                    alloc_worker+0x2b/0xb0
                    init_rescuer.part.0+0x17/0xe0
                    workqueue_init+0x293/0x3bb
                    kernel_init_freeable+0x149/0x325
                    kernel_init+0x8/0x116
                    ret_from_fork+0x3a/0x50
   SOFTIRQ-ON-W at:
                    lock_acquire+0x175/0x4e0
                    fs_reclaim_acquire.part.0+0x20/0x30
                    kmem_cache_alloc_node_trace+0x2e/0x290
                    alloc_worker+0x2b/0xb0
                    init_rescuer.part.0+0x17/0xe0
                    workqueue_init+0x293/0x3bb
                    kernel_init_freeable+0x149/0x325
                    kernel_init+0x8/0x116
                    ret_from_fork+0x3a/0x50
   INITIAL USE at:
                   lock_acquire+0x175/0x4e0
                   fs_reclaim_acquire.part.0+0x20/0x30
                   kmem_cache_alloc_node_trace+0x2e/0x290
                   alloc_worker+0x2b/0xb0
                   init_rescuer.part.0+0x17/0xe0
                   workqueue_init+0x293/0x3bb
                   kernel_init_freeable+0x149/0x325
                   kernel_init+0x8/0x116
                   ret_from_fork+0x3a/0x50
 }
__fs_reclaim_map+0x0/0x60
 ... acquired at:
   lock_acquire+0x175/0x4e0
   fs_reclaim_acquire.part.0+0x20/0x30
   kmem_cache_alloc_trace+0x2e/0x260
   mmio_diff_handler+0xc0/0x150
   intel_gvt_for_each_tracked_mmio+0x7b/0x140
   vgpu_mmio_diff_show+0x111/0x2e0
   seq_read+0x242/0x680
   full_proxy_read+0x95/0xc0
   vfs_read+0xc2/0x1b0
   ksys_read+0xc4/0x160
   do_syscall_64+0x63/0x290
   entry_SYSCALL_64_after_hwframe+0x49/0xb3

               stack backtrace:
CPU: 5 PID: 1439 Comm: cat Not tainted 5.7.0-rc2 #400
Hardware name: Intel(R) Client Systems NUC8i7BEH/NUC8BEB, BIOS BECFL357.86A.0056.2018.1128.1717 11/28/2018
Call Trace:
 dump_stack+0x97/0xe0
 check_irq_usage.cold+0x428/0x434
 ? check_usage_forwards+0x2c0/0x2c0
 ? class_equal+0x11/0x20
 ? __bfs+0xd2/0x2d0
 ? in_any_class_list+0xa0/0xa0
 ? check_path+0x22/0x40
 ? check_noncircular+0x150/0x2b0
 ? print_circular_bug.isra.0+0x1b0/0x1b0
 ? mark_lock+0x13d/0xc50
 ? __lock_acquire+0x1e32/0x39b0
 __lock_acquire+0x1e32/0x39b0
 ? timerqueue_add+0xc1/0x130
 ? register_lock_class+0xa60/0xa60
 ? mark_lock+0x13d/0xc50
 lock_acquire+0x175/0x4e0
 ? __zone_pcp_update+0x80/0x80
 ? check_flags.part.0+0x210/0x210
 ? mark_held_locks+0x65/0x90
 ? _raw_spin_unlock_irqrestore+0x32/0x40
 ? lockdep_hardirqs_on+0x190/0x290
 ? fwtable_read32+0x163/0x480
 ? mmio_diff_handler+0xc0/0x150
 fs_reclaim_acquire.part.0+0x20/0x30
 ? __zone_pcp_update+0x80/0x80
 kmem_cache_alloc_trace+0x2e/0x260
 mmio_diff_handler+0xc0/0x150
 ? vgpu_mmio_diff_open+0x30/0x30
 intel_gvt_for_each_tracked_mmio+0x7b/0x140
 vgpu_mmio_diff_show+0x111/0x2e0
 ? mmio_diff_handler+0x150/0x150
 ? rcu_read_lock_sched_held+0xa0/0xb0
 ? rcu_read_lock_bh_held+0xc0/0xc0
 ? kasan_unpoison_shadow+0x33/0x40
 ? __kasan_kmalloc.constprop.0+0xc2/0xd0
 seq_read+0x242/0x680
 ? debugfs_locked_down.isra.0+0x70/0x70
 full_proxy_read+0x95/0xc0
 vfs_read+0xc2/0x1b0
 ksys_read+0xc4/0x160
 ? kernel_write+0xb0/0xb0
 ? mark_held_locks+0x24/0x90
 do_syscall_64+0x63/0x290
 entry_SYSCALL_64_after_hwframe+0x49/0xb3
RIP: 0033:0x7ffbe3e6efb2
Code: c0 e9 c2 fe ff ff 50 48 8d 3d ca cb 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24
RSP: 002b:00007ffd021c08a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007ffbe3e6efb2
RDX: 0000000000020000 RSI: 00007ffbe34cd000 RDI: 0000000000000003
RBP: 00007ffbe34cd000 R08: 00007ffbe34cc010 R09: 0000000000000000
R10: 0000000000000022 R11: 0000000000000246 R12: 0000562b6f0a11f0
R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
------------[ cut here ]------------

Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200601035556.19999-1-colin.xu@intel.com
2020-06-17 12:36:19 +08:00
..
display drm/i915/display: Fix the encoder type check 2020-06-16 11:34:24 +03:00
gem drm fixes for 5.7-rc1 2020-06-11 12:27:06 -07:00
gt drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds 2020-06-16 11:34:24 +03:00
gvt drm/i915/gvt: Use GFP_ATOMIC instead of GFP_KERNEL in atomic context 2020-06-17 12:36:19 +08:00
selftests drm/i915/gt: Incrementally check for rewinding 2020-06-16 11:34:23 +03:00
.gitignore .gitignore: add SPDX License Identifier 2020-03-25 11:50:48 +01:00
i915_active_types.h
i915_active.c drm/i915: Allow asynchronous waits on the i915_active barriers 2020-04-06 19:48:06 +01:00
i915_active.h drm/i915: Allow asynchronous waits on the i915_active barriers 2020-04-06 19:48:06 +01:00
i915_buddy.c
i915_buddy.h
i915_cmd_parser.c drm/i915: Whitelist context-local timestamp in the gen9 cmdparser 2020-06-02 16:35:29 +03:00
i915_config.c drm/i915: Replace the hardcoded I915_FENCE_TIMEOUT 2020-05-09 12:57:57 +01:00
i915_debugfs_params.c
i915_debugfs_params.h
i915_debugfs.c Merge tag 'drm-intel-next-2020-05-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next 2020-05-20 13:36:45 +10:00
i915_debugfs.h drm/i915: have *_debugfs_init() functions return void. 2020-03-18 16:27:22 +01:00
i915_drv.c Merge tag 'drm-intel-next-2020-04-30' of git://anongit.freedesktop.org/drm/drm-intel into drm-next 2020-05-14 11:33:10 +10:00
i915_drv.h Merge tag 'drm-intel-next-2020-05-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next 2020-05-20 13:36:45 +10:00
i915_fixed.h
i915_gem_evict.c drm/i915: Handle idling during i915_gem_evict_something busy loops 2020-05-13 14:39:41 -07:00
i915_gem_gtt.c
i915_gem_gtt.h
i915_gem.c drm/i915/gem: Drop cached obj->bind_count 2020-04-02 01:17:39 +01:00
i915_gem.h
i915_getparam.c drm/i915: Store CS timestamp frequency in Hz 2020-05-14 19:59:53 +03:00
i915_globals.c
i915_globals.h
i915_gpu_error.c drm/i915: Avoid dereferencing a dead context 2020-05-04 10:35:47 -07:00
i915_gpu_error.h drm/i915: Replace zero-length array with flexible-array 2020-05-09 12:59:23 +01:00
i915_ioc32.c i915 compat ioctl(): just use drm_ioctl_kernel() 2020-05-01 20:35:26 -04:00
i915_ioc32.h
i915_irq.c drm/i915/icl+: Fix hotplug interrupt disabling after storm detection 2020-06-16 11:34:24 +03:00
i915_irq.h
i915_memcpy.c drm/i915: remove always-defined CONFIG_AS_MOVNTDQA 2020-04-09 00:01:59 +09:00
i915_memcpy.h
i915_mm.c mm: don't include asm/pgtable.h if linux/mm.h is already included 2020-06-09 09:39:13 -07:00
i915_params.c drm/i915/params: fix i915.reset module param type 2020-06-08 12:58:13 +03:00
i915_params.h drm/i915/params: don't expose inject_probe_failure in debugfs 2020-06-02 16:35:33 +03:00
i915_pci.c Merge tag 'drm-intel-next-2020-05-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next 2020-05-20 13:36:45 +10:00
i915_perf_types.h drm/i915/perf: Schedule oa_config after modifying the contexts 2020-03-30 18:20:34 +01:00
i915_perf.c Merge branch 'uaccess.i915' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs 2020-06-10 16:04:27 -07:00
i915_perf.h
i915_pmu.c drm/i915/pmu: avoid an maybe-uninitialized warning 2020-06-15 12:36:02 +03:00
i915_pmu.h
i915_priolist_types.h drm/i915/gt: Prevent timeslicing into unpreemptable requests 2020-06-16 11:34:23 +03:00
i915_pvinfo.h
i915_query.c i915: switch copy_perf_config_registers_or_number() to unsafe_put_user() 2020-05-01 20:35:07 -04:00
i915_query.h
i915_reg.h drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds 2020-06-16 11:34:24 +03:00
i915_request.c drm/i915: Check for awaits on still currently executing requests 2020-06-01 15:23:01 +03:00
i915_request.h drm/i915: Drop no-semaphore boosting 2020-05-14 06:14:33 +01:00
i915_scatterlist.c
i915_scatterlist.h
i915_scheduler_types.h drm/i915: Drop no-semaphore boosting 2020-05-14 06:14:33 +01:00
i915_scheduler.c drm/i915: Don't set queue-priority hint when supressing the reschedule 2020-05-25 15:40:26 +03:00
i915_scheduler.h drm/i915: Mark concurrent submissions with a weak-dependency 2020-05-11 10:54:04 -07:00
i915_selftest.h drm/i915/gem: Implement legacy MI_STORE_DATA_IMM 2020-05-04 15:15:04 +01:00
i915_suspend.c
i915_suspend.h
i915_sw_fence_work.c drm/i915: Immediately execute the fenced work 2020-03-25 13:05:04 +00:00
i915_sw_fence_work.h drm/i915: Immediately execute the fenced work 2020-03-25 13:05:04 +00:00
i915_sw_fence.c drm/i915: Tidy awaiting on dma-fences 2020-05-11 12:56:45 +01:00
i915_sw_fence.h
i915_switcheroo.c drm/i915/switcheroo: use struct drm_device based logging 2020-04-08 13:49:35 +03:00
i915_switcheroo.h
i915_syncmap.c
i915_syncmap.h
i915_sysfs.c
i915_sysfs.h
i915_trace_points.c
i915_trace.h
i915_user_extensions.c
i915_user_extensions.h
i915_utils.c drm/i915: Avoid setting timer->expires to 0 2020-04-03 16:33:09 +01:00
i915_utils.h drm/i915: be more solid in checking the alignment 2020-03-11 23:12:39 +02:00
i915_vgpu.c
i915_vgpu.h
i915_vma_types.h
i915_vma.c drm/i915: Check current i915_vma.pin_count status first on unbind 2020-05-06 15:36:54 -07:00
i915_vma.h drm/i915/gt: Make fence revocation unequivocal 2020-04-01 23:34:17 +01:00
intel_device_info.c drm/i915: Extract i915_cs_timestamp_{ns_to_ticks,tick_to_ns}() 2020-05-14 20:04:02 +03:00
intel_device_info.h drm/i915: Store CS timestamp frequency in Hz 2020-05-14 19:59:53 +03:00
intel_dram.c drm/i915/dram: prefer struct drm_device based logging 2020-04-08 13:49:35 +03:00
intel_dram.h
intel_gvt.c
intel_gvt.h
intel_memory_region.c
intel_memory_region.h
intel_pch.c
intel_pch.h
intel_pm.c drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds 2020-06-16 11:34:24 +03:00
intel_pm.h drm/i915: Make active_pipes check skl specific 2020-05-13 16:21:20 +03:00
intel_region_lmem.c
intel_region_lmem.h
intel_runtime_pm.c PM: sleep: core: Rename DPM_FLAG_NEVER_SKIP 2020-04-24 21:33:09 +02:00
intel_runtime_pm.h
intel_sideband.c drm/i915: Added required new PCode commands 2020-05-05 13:59:55 +03:00
intel_sideband.h
intel_uncore.c drm/i915: Remove unused HAS_FWTABLE macro 2020-05-11 16:57:36 +01:00
intel_uncore.h drm/i915/selftests: Measure the energy consumed while in RC6 2020-03-25 11:33:05 +00:00
intel_wakeref.c drm/i915: Extend intel_wakeref to support delayed puts 2020-03-23 12:51:05 +00:00
intel_wakeref.h drm/i915: Extend intel_wakeref to support delayed puts 2020-03-23 12:51:05 +00:00
intel_wopcm.c drm/i915: Remove cnl pre-prod workarounds 2020-05-04 18:44:52 +03:00
intel_wopcm.h
Kconfig
Kconfig.debug
Kconfig.profile drm/i915: Replace the hardcoded I915_FENCE_TIMEOUT 2020-05-09 12:57:57 +01:00
Kconfig.unstable
Makefile drm/i915: Replace the hardcoded I915_FENCE_TIMEOUT 2020-05-09 12:57:57 +01:00
vlv_suspend.c
vlv_suspend.h