mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-25 16:19:35 +07:00
d0dff2ac98
At the moment we allocate and register the Scsi_Host object corresponding to a zfcp adapter (FCP device) very early in the life cycle of the adapter - even before we fully discover and initialize the underlying firmware/hardware. This had the advantage that we could already use the Scsi_Host object, and fill in all its information during said discover and initialize. Due to commit737eb78e82
("block: Delay default elevator initialization") (first released in v5.4), we noticed a regression that would prevent us from using any storage volume if zfcp is configured with support for DIF or DIX (zfcp.dif=1 || zfcp.dix=1). Doing so would result in an illegal memory access as soon as the first request is sent with such an configuration. As example for a crash resulting from this: scsi host0: scsi_eh_0: sleeping scsi host0: zfcp qdio: 0.0.1900 ZFCP on SC 4bd using AI:1 QEBSM:0 PRI:1 TDD:1 SIGA: W AP scsi 0:0:0:0: scsi scan: INQUIRY pass 1 length 36 Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:0000000035c7c007 R3:00000001effcc007 S:00000001effd1000 P:000000000000003d Oops: 0004 ilc:3 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: ... CPU: 1 PID: 783 Comm: kworker/u760:5 Kdump: loaded Not tainted 5.6.0-rc2-bb-next+ #1 Hardware name: ... Workqueue: scsi_wq_0 fc_scsi_scan_rport [scsi_transport_fc] Krnl PSW : 0704e00180000000 000003ff801fcdae (scsi_queue_rq+0x436/0x740 [scsi_mod]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0fffffffffffffff 0000000000000000 0000000187150120 0000000000000000 000003ff80223d20 000000000000018e 000000018adc6400 0000000187711000 000003e0062337e8 00000001ae719000 0000000187711000 0000000187150000 00000001ab808100 0000000187150120 000003ff801fcd74 000003e0062336a0 Krnl Code: 000003ff801fcd9e: e310a35c0012 lt %r1,860(%r10) 000003ff801fcda4: a7840010 brc 8,000003ff801fcdc4 #000003ff801fcda8: e310b2900004 lg %r1,656(%r11) >000003ff801fcdae: d71710001000 xc 0(24,%r1),0(%r1) 000003ff801fcdb4: e310b2900004 lg %r1,656(%r11) 000003ff801fcdba: 41201018 la %r2,24(%r1) 000003ff801fcdbe: e32010000024 stg %r2,0(%r1) 000003ff801fcdc4: b904002b lgr %r2,%r11 Call Trace: [<000003ff801fcdae>] scsi_queue_rq+0x436/0x740 [scsi_mod] ([<000003ff801fcd74>] scsi_queue_rq+0x3fc/0x740 [scsi_mod]) [<00000000349c9970>] blk_mq_dispatch_rq_list+0x390/0x680 [<00000000349d1596>] blk_mq_sched_dispatch_requests+0x196/0x1a8 [<00000000349c7a04>] __blk_mq_run_hw_queue+0x144/0x160 [<00000000349c7ab6>] __blk_mq_delay_run_hw_queue+0x96/0x228 [<00000000349c7d5a>] blk_mq_run_hw_queue+0xd2/0xe0 [<00000000349d194a>] blk_mq_sched_insert_request+0x192/0x1d8 [<00000000349c17b8>] blk_execute_rq_nowait+0x80/0x90 [<00000000349c1856>] blk_execute_rq+0x6e/0xb0 [<000003ff801f8ac2>] __scsi_execute+0xe2/0x1f0 [scsi_mod] [<000003ff801fef98>] scsi_probe_and_add_lun+0x358/0x840 [scsi_mod] [<000003ff8020001c>] __scsi_scan_target+0xc4/0x228 [scsi_mod] [<000003ff80200254>] scsi_scan_target+0xd4/0x100 [scsi_mod] [<000003ff802d8b96>] fc_scsi_scan_rport+0x96/0xc0 [scsi_transport_fc] [<0000000034245ce8>] process_one_work+0x458/0x7d0 [<00000000342462a2>] worker_thread+0x242/0x448 [<0000000034250994>] kthread+0x15c/0x170 [<0000000034e1979c>] ret_from_fork+0x30/0x38 INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801fbc36>] scsi_add_cmd_to_list+0x9e/0xa8 [scsi_mod] Kernel panic - not syncing: Fatal exception: panic_on_oops While this issue is exposed by the commit named above, this is only by accident. The real issue exists for longer already - basically since it's possible to use blk-mq via scsi-mq, and blk-mq pre-allocates all requests for a tag-set during initialization of the same. For a given Scsi_Host object this is done when adding the object to the midlayer (`scsi_add_host()` and such). In `scsi_mq_setup_tags()` the midlayer calculates how much memory is required for a single scsi_cmnd, and its additional data, which also might include space for additional protection data - depending on whether the Scsi_Host has any form of protection capabilities (`scsi_host_get_prot()`). The problem is now thus, because zfcp does this step before we actually know whether the firmware/hardware has these capabilities, we don't set any protection capabilities in the Scsi_Host object. And so, no space is allocated for additional protection data for requests in the Scsi_Host tag-set. Once we go through discover and initialize the FCP device firmware/hardware fully (this is done via the firmware commands "Exchange Config Data" and "Exchange Port Data") we find out whether it actually supports DIF and DIX, and we set the corresponding capabilities in the Scsi_Host object (in `zfcp_scsi_set_prot()`). Now the Scsi_Host potentially has protection capabilities, but the already allocated requests in the tag-set don't have any space allocated for that. When we then trigger target scanning or add scsi_devices manually, the midlayer will use requests from that tag-set, and before sending most requests, it will also call `scsi_mq_prep_fn()`. To prepare the scsi_cmnd this function will check again whether the used Scsi_Host has any protection capabilities - and now it potentially has - and if so, it will try to initialize the assumed to be preallocated structures and thus it causes the crash, like shown above. Before delaying the default elevator initialization with the commit named above, we always would also allocate an elevator for any scsi_device before ever sending any requests - in contrast to now, where we do it after device-probing. That elevator in turn would have its own tag-set, and that is initialized after we went through discovery and initialization of the underlying firmware/hardware. So requests from that tag-set can be allocated properly, and if used - unless the user changes/disabled the default elevator - this would hide the underlying issue. To fix this for any configuration - with or without an elevator - we move the allocation and registration of the Scsi_Host object for a given FCP device to after the first complete discovery and initialization of the underlying firmware/hardware. By doing that we can make all basic properties of the Scsi_Host known to the midlayer by the time we call `scsi_add_host()`, including whether we have any protection capabilities. To do that we have to delay all the accesses that we would have done in the past during discovery and initialization, and do them instead once we are finished with it. The previous patches ramp up to this by fencing and factoring out all these accesses, and make it possible to re-do them later on. In addition we make also use of the diagnostic buffers we recently added with commit92953c6e0a
("scsi: zfcp: signal incomplete or error for sync exchange config/port data") commit7e418833e6
("scsi: zfcp: diagnostics buffer caching and use for exchange port data") commit088210233e
("scsi: zfcp: add diagnostics buffer for exchange config data") (first released in v5.5), because these already cache all the information we need for that "re-do operation" - the information cached are always updated during xconf or xport data, so it won't be stale. In addition to the move and re-do, this patch also updates the function-documentation of `zfcp_scsi_adapter_register()` and changes how it reports if a Scsi_Host object already exists. In that case future recovery-operations can skip this step completely and behave much like they would do in the past - zfcp does not release a once allocated Scsi_Host object unless the corresponding FCP device is deconstructed completely. Link: https://lore.kernel.org/r/030dd6da318bbb529f0b5268ec65cebcd20fc0a3.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier <maier@linux.ibm.com> Signed-off-by: Benjamin Block <bblock@linux.ibm.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
204 lines
9.6 KiB
C
204 lines
9.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* zfcp device driver
|
|
*
|
|
* External function declarations.
|
|
*
|
|
* Copyright IBM Corp. 2002, 2020
|
|
*/
|
|
|
|
#ifndef ZFCP_EXT_H
|
|
#define ZFCP_EXT_H
|
|
|
|
#include <linux/types.h>
|
|
#include <scsi/fc/fc_els.h>
|
|
#include "zfcp_def.h"
|
|
#include "zfcp_fc.h"
|
|
|
|
/* zfcp_aux.c */
|
|
extern struct zfcp_port *zfcp_get_port_by_wwpn(struct zfcp_adapter *, u64);
|
|
extern struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *);
|
|
extern struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *, u64, u32,
|
|
u32);
|
|
extern void zfcp_sg_free_table(struct scatterlist *, int);
|
|
extern int zfcp_sg_setup_table(struct scatterlist *, int);
|
|
extern void zfcp_adapter_release(struct kref *);
|
|
extern void zfcp_adapter_unregister(struct zfcp_adapter *);
|
|
|
|
/* zfcp_ccw.c */
|
|
extern struct ccw_driver zfcp_ccw_driver;
|
|
extern struct zfcp_adapter *zfcp_ccw_adapter_by_cdev(struct ccw_device *);
|
|
extern void zfcp_ccw_adapter_put(struct zfcp_adapter *);
|
|
|
|
/* zfcp_dbf.c */
|
|
extern int zfcp_dbf_adapter_register(struct zfcp_adapter *);
|
|
extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *);
|
|
extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *,
|
|
struct zfcp_port *, struct scsi_device *, u8, u8);
|
|
extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
|
|
struct zfcp_port *port,
|
|
struct scsi_device *sdev, u8 want, u8 need);
|
|
extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *);
|
|
extern void zfcp_dbf_rec_run_lvl(int level, char *tag,
|
|
struct zfcp_erp_action *erp);
|
|
extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64);
|
|
extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *);
|
|
extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *);
|
|
extern void zfcp_dbf_hba_fsf_fces(char *tag, const struct zfcp_fsf_req *req,
|
|
u64 wwpn, u32 fc_security_old,
|
|
u32 fc_security_new);
|
|
extern void zfcp_dbf_hba_bit_err(char *, struct zfcp_fsf_req *);
|
|
extern void zfcp_dbf_hba_def_err(struct zfcp_adapter *, u64, u16, void **);
|
|
extern void zfcp_dbf_hba_basic(char *, struct zfcp_adapter *);
|
|
extern void zfcp_dbf_san_req(char *, struct zfcp_fsf_req *, u32);
|
|
extern void zfcp_dbf_san_res(char *, struct zfcp_fsf_req *);
|
|
extern void zfcp_dbf_san_in_els(char *, struct zfcp_fsf_req *);
|
|
extern void zfcp_dbf_scsi_common(char *tag, int level, struct scsi_device *sdev,
|
|
struct scsi_cmnd *sc,
|
|
struct zfcp_fsf_req *fsf);
|
|
extern void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter,
|
|
unsigned int scsi_id, int ret);
|
|
|
|
/* zfcp_erp.c */
|
|
extern void zfcp_erp_set_adapter_status(struct zfcp_adapter *, u32);
|
|
extern void zfcp_erp_clear_adapter_status(struct zfcp_adapter *, u32);
|
|
extern void zfcp_erp_port_forced_no_port_dbf(char *dbftag,
|
|
struct zfcp_adapter *adapter,
|
|
u64 port_name, u32 port_id);
|
|
extern void zfcp_erp_adapter_reopen(struct zfcp_adapter *, int, char *);
|
|
extern void zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int, char *);
|
|
extern void zfcp_erp_set_port_status(struct zfcp_port *, u32);
|
|
extern void zfcp_erp_clear_port_status(struct zfcp_port *, u32);
|
|
extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear,
|
|
char *dbftag);
|
|
extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *);
|
|
extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *);
|
|
extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter,
|
|
int clear, char *dbftag);
|
|
extern void zfcp_erp_set_lun_status(struct scsi_device *, u32);
|
|
extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32);
|
|
extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *);
|
|
extern void zfcp_erp_lun_shutdown(struct scsi_device *, int, char *);
|
|
extern void zfcp_erp_lun_shutdown_wait(struct scsi_device *, char *);
|
|
extern int zfcp_erp_thread_setup(struct zfcp_adapter *);
|
|
extern void zfcp_erp_thread_kill(struct zfcp_adapter *);
|
|
extern void zfcp_erp_wait(struct zfcp_adapter *);
|
|
extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long);
|
|
extern void zfcp_erp_timeout_handler(struct timer_list *t);
|
|
extern void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter,
|
|
char *dbftag);
|
|
|
|
/* zfcp_fc.c */
|
|
extern struct kmem_cache *zfcp_fc_req_cache;
|
|
extern void zfcp_fc_enqueue_event(struct zfcp_adapter *,
|
|
enum fc_host_event_code event_code, u32);
|
|
extern void zfcp_fc_post_event(struct work_struct *);
|
|
extern void zfcp_fc_scan_ports(struct work_struct *);
|
|
extern void zfcp_fc_incoming_els(struct zfcp_fsf_req *);
|
|
extern void zfcp_fc_port_did_lookup(struct work_struct *);
|
|
extern void zfcp_fc_trigger_did_lookup(struct zfcp_port *);
|
|
extern void zfcp_fc_plogi_evaluate(struct zfcp_port *, struct fc_els_flogi *);
|
|
extern void zfcp_fc_test_link(struct zfcp_port *);
|
|
extern void zfcp_fc_link_test_work(struct work_struct *);
|
|
extern void zfcp_fc_wka_ports_force_offline(struct zfcp_fc_wka_ports *);
|
|
extern int zfcp_fc_gs_setup(struct zfcp_adapter *);
|
|
extern void zfcp_fc_gs_destroy(struct zfcp_adapter *);
|
|
extern int zfcp_fc_exec_bsg_job(struct bsg_job *);
|
|
extern int zfcp_fc_timeout_bsg_job(struct bsg_job *);
|
|
extern void zfcp_fc_sym_name_update(struct work_struct *);
|
|
extern unsigned int zfcp_fc_port_scan_backoff(void);
|
|
extern void zfcp_fc_conditional_port_scan(struct zfcp_adapter *);
|
|
extern void zfcp_fc_inverse_conditional_port_scan(struct zfcp_adapter *);
|
|
|
|
/* zfcp_fsf.c */
|
|
extern struct kmem_cache *zfcp_fsf_qtcb_cache;
|
|
extern int zfcp_fsf_open_port(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *);
|
|
extern int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *);
|
|
extern int zfcp_fsf_close_port(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_close_physical_port(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_open_lun(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_close_lun(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_exchange_config_data_sync(struct zfcp_qdio *,
|
|
struct fsf_qtcb_bottom_config *);
|
|
extern int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *);
|
|
extern int zfcp_fsf_exchange_port_data_sync(struct zfcp_qdio *,
|
|
struct fsf_qtcb_bottom_port *);
|
|
extern u32 zfcp_fsf_convert_portspeed(u32 fsf_speed);
|
|
extern void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *);
|
|
extern int zfcp_fsf_status_read(struct zfcp_qdio *);
|
|
extern int zfcp_status_read_refill(struct zfcp_adapter *adapter);
|
|
extern int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *, struct zfcp_fsf_ct_els *,
|
|
mempool_t *, unsigned int);
|
|
extern int zfcp_fsf_send_els(struct zfcp_adapter *, u32,
|
|
struct zfcp_fsf_ct_els *, unsigned int);
|
|
extern int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *);
|
|
extern void zfcp_fsf_req_free(struct zfcp_fsf_req *);
|
|
extern void zfcp_fsf_fc_host_link_down(struct zfcp_adapter *adapter);
|
|
extern struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev,
|
|
u8 tm_flags);
|
|
extern struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *);
|
|
extern void zfcp_fsf_reqid_check(struct zfcp_qdio *, int);
|
|
enum zfcp_fsf_print_fmt {
|
|
ZFCP_FSF_PRINT_FMT_LIST,
|
|
ZFCP_FSF_PRINT_FMT_SINGLEITEM,
|
|
};
|
|
extern ssize_t zfcp_fsf_scnprint_fc_security(char *buf, size_t size,
|
|
u32 fc_security,
|
|
enum zfcp_fsf_print_fmt fmt);
|
|
|
|
/* zfcp_qdio.c */
|
|
extern int zfcp_qdio_setup(struct zfcp_adapter *);
|
|
extern void zfcp_qdio_destroy(struct zfcp_qdio *);
|
|
extern int zfcp_qdio_sbal_get(struct zfcp_qdio *);
|
|
extern int zfcp_qdio_send(struct zfcp_qdio *, struct zfcp_qdio_req *);
|
|
extern int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *, struct zfcp_qdio_req *,
|
|
struct scatterlist *);
|
|
extern void zfcp_qdio_shost_update(struct zfcp_adapter *const adapter,
|
|
const struct zfcp_qdio *const qdio);
|
|
extern int zfcp_qdio_open(struct zfcp_qdio *);
|
|
extern void zfcp_qdio_close(struct zfcp_qdio *);
|
|
extern void zfcp_qdio_siosl(struct zfcp_adapter *);
|
|
|
|
/* zfcp_scsi.c */
|
|
extern bool zfcp_experimental_dix;
|
|
extern struct scsi_transport_template *zfcp_scsi_transport_template;
|
|
extern int zfcp_scsi_adapter_register(struct zfcp_adapter *);
|
|
extern void zfcp_scsi_adapter_unregister(struct zfcp_adapter *);
|
|
extern struct fc_function_template zfcp_transport_functions;
|
|
extern void zfcp_scsi_rport_work(struct work_struct *);
|
|
extern void zfcp_scsi_schedule_rport_register(struct zfcp_port *);
|
|
extern void zfcp_scsi_schedule_rport_block(struct zfcp_port *);
|
|
extern void zfcp_scsi_schedule_rports_block(struct zfcp_adapter *);
|
|
extern void zfcp_scsi_set_prot(struct zfcp_adapter *);
|
|
extern void zfcp_scsi_dif_sense_error(struct scsi_cmnd *, int);
|
|
extern void zfcp_scsi_shost_update_config_data(
|
|
struct zfcp_adapter *const adapter,
|
|
const struct fsf_qtcb_bottom_config *const bottom,
|
|
const bool bottom_incomplete);
|
|
extern void zfcp_scsi_shost_update_port_data(
|
|
struct zfcp_adapter *const adapter,
|
|
const struct fsf_qtcb_bottom_port *const bottom);
|
|
|
|
/* zfcp_sysfs.c */
|
|
extern const struct attribute_group *zfcp_unit_attr_groups[];
|
|
extern struct attribute_group zfcp_sysfs_adapter_attrs;
|
|
extern const struct attribute_group *zfcp_port_attr_groups[];
|
|
extern struct mutex zfcp_sysfs_port_units_mutex;
|
|
extern struct device_attribute *zfcp_sysfs_sdev_attrs[];
|
|
extern struct device_attribute *zfcp_sysfs_shost_attrs[];
|
|
extern const struct attribute_group zfcp_sysfs_diag_attr_group;
|
|
bool zfcp_sysfs_port_is_removing(const struct zfcp_port *const port);
|
|
|
|
/* zfcp_unit.c */
|
|
extern int zfcp_unit_add(struct zfcp_port *, u64);
|
|
extern int zfcp_unit_remove(struct zfcp_port *, u64);
|
|
extern struct zfcp_unit *zfcp_unit_find(struct zfcp_port *, u64);
|
|
extern struct scsi_device *zfcp_unit_sdev(struct zfcp_unit *unit);
|
|
extern void zfcp_unit_scsi_scan(struct zfcp_unit *);
|
|
extern void zfcp_unit_queue_scsi_scan(struct zfcp_port *);
|
|
extern unsigned int zfcp_unit_sdev_status(struct zfcp_unit *);
|
|
|
|
#endif /* ZFCP_EXT_H */
|