mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-25 06:28:09 +07:00
e63890f38a
In the current implementation of unaligned aio+dio, lock order behave as follow: in user process context: -> call io_submit() -> get i_mutex <== window1 -> get ip_unaligned_aio -> submit direct io to block device -> release i_mutex -> io_submit() return in dio work queue context(the work queue is created in __blockdev_direct_IO): -> release ip_unaligned_aio <== window2 -> get i_mutex -> clear unwritten flag & change i_size -> release i_mutex There is a limitation to the thread number of dio work queue. 256 at default. If all 256 thread are in the above 'window2' stage, and there is a user process in the 'window1' stage, the system will became deadlock. Since the user process hold i_mutex to wait ip_unaligned_aio lock, while there is a direct bio hold ip_unaligned_aio mutex who is waiting for a dio work queue thread to be schedule. But all the dio work queue thread is waiting for i_mutex lock in 'window2'. This case only happened in a test which send a large number(more than 256) of aio at one io_submit() call. My design is to remove ip_unaligned_aio lock. Change it to a sync io instead. Just like ip_unaligned_aio lock, serialize the unaligned aio dio. [akpm@linux-foundation.org: remove OCFS2_IOCB_UNALIGNED_IO, per Junxiao Bi] Signed-off-by: Ryan Ding <ryan.ding@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <joseph.qi@huawei.com> Cc: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
96 lines
3.1 KiB
C
96 lines
3.1 KiB
C
/* -*- mode: c; c-basic-offset: 8; -*-
|
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
|
*
|
|
* Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#ifndef OCFS2_AOPS_H
|
|
#define OCFS2_AOPS_H
|
|
|
|
#include <linux/fs.h>
|
|
|
|
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
|
|
struct page *page,
|
|
unsigned from,
|
|
unsigned to);
|
|
|
|
int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
|
|
struct inode *inode, unsigned int from,
|
|
unsigned int to, int new);
|
|
|
|
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
|
|
|
|
int walk_page_buffers( handle_t *handle,
|
|
struct buffer_head *head,
|
|
unsigned from,
|
|
unsigned to,
|
|
int *partial,
|
|
int (*fn)( handle_t *handle,
|
|
struct buffer_head *bh));
|
|
|
|
int ocfs2_write_end_nolock(struct address_space *mapping,
|
|
loff_t pos, unsigned len, unsigned copied,
|
|
struct page *page, void *fsdata);
|
|
|
|
typedef enum {
|
|
OCFS2_WRITE_BUFFER = 0,
|
|
OCFS2_WRITE_DIRECT,
|
|
OCFS2_WRITE_MMAP,
|
|
} ocfs2_write_type_t;
|
|
|
|
int ocfs2_write_begin_nolock(struct address_space *mapping,
|
|
loff_t pos, unsigned len, ocfs2_write_type_t type,
|
|
struct page **pagep, void **fsdata,
|
|
struct buffer_head *di_bh, struct page *mmap_page);
|
|
|
|
int ocfs2_read_inline_data(struct inode *inode, struct page *page,
|
|
struct buffer_head *di_bh);
|
|
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
|
|
|
|
int ocfs2_get_block(struct inode *inode, sector_t iblock,
|
|
struct buffer_head *bh_result, int create);
|
|
/* all ocfs2_dio_end_io()'s fault */
|
|
#define ocfs2_iocb_is_rw_locked(iocb) \
|
|
test_bit(0, (unsigned long *)&iocb->private)
|
|
static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
|
|
{
|
|
set_bit(0, (unsigned long *)&iocb->private);
|
|
if (level)
|
|
set_bit(1, (unsigned long *)&iocb->private);
|
|
else
|
|
clear_bit(1, (unsigned long *)&iocb->private);
|
|
}
|
|
|
|
/*
|
|
* Using a named enum representing lock types in terms of #N bit stored in
|
|
* iocb->private, which is going to be used for communication between
|
|
* ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
|
|
*/
|
|
enum ocfs2_iocb_lock_bits {
|
|
OCFS2_IOCB_RW_LOCK = 0,
|
|
OCFS2_IOCB_RW_LOCK_LEVEL,
|
|
OCFS2_IOCB_NUM_LOCKS
|
|
};
|
|
|
|
#define ocfs2_iocb_clear_rw_locked(iocb) \
|
|
clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
|
|
#define ocfs2_iocb_rw_locked_level(iocb) \
|
|
test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
|
|
|
|
#endif /* OCFS2_FILE_H */
|