linux_dsm_epyc7002/drivers/infiniband/hw/ehca/ehca_uverbs.c
Konstantin Khlebnikov 314e51b985 mm: kill vma flag VM_RESERVED and mm->reserved_vm counter
A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA,
currently it lost original meaning but still has some effects:

 | effect                 | alternative flags
-+------------------------+---------------------------------------------
1| account as reserved_vm | VM_IO
2| skip in core dump      | VM_IO, VM_DONTDUMP
3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP
4| do not mlock           | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP

This patch removes reserved_vm counter from mm_struct.  Seems like nobody
cares about it, it does not exported into userspace directly, it only
reduces total_vm showed in proc.

Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP.

remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP.
remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP.

[akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-09 16:22:19 +09:00

310 lines
8.1 KiB
C

/*
* IBM eServer eHCA Infiniband device driver for Linux on POWER
*
* userspace support verbs
*
* Authors: Christoph Raisch <raisch@de.ibm.com>
* Hoang-Nam Nguyen <hnguyen@de.ibm.com>
* Heiko J Schick <schickhj@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
* All rights reserved.
*
* This source code is distributed under a dual license of GPL v2.0 and OpenIB
* BSD.
*
* OpenIB BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/slab.h>
#include "ehca_classes.h"
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
#include "ehca_tools.h"
#include "hcp_if.h"
struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
struct ib_udata *udata)
{
struct ehca_ucontext *my_context;
my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
if (!my_context) {
ehca_err(device, "Out of memory device=%p", device);
return ERR_PTR(-ENOMEM);
}
return &my_context->ib_ucontext;
}
int ehca_dealloc_ucontext(struct ib_ucontext *context)
{
kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
return 0;
}
static void ehca_mm_open(struct vm_area_struct *vma)
{
u32 *count = (u32 *)vma->vm_private_data;
if (!count) {
ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
return;
}
(*count)++;
if (!(*count))
ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
vma->vm_start, vma->vm_end, *count);
}
static void ehca_mm_close(struct vm_area_struct *vma)
{
u32 *count = (u32 *)vma->vm_private_data;
if (!count) {
ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
return;
}
(*count)--;
ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
vma->vm_start, vma->vm_end, *count);
}
static const struct vm_operations_struct vm_ops = {
.open = ehca_mm_open,
.close = ehca_mm_close,
};
static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
u32 *mm_count)
{
int ret;
u64 vsize, physical;
vsize = vma->vm_end - vma->vm_start;
if (vsize < EHCA_PAGESIZE) {
ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
return -EINVAL;
}
physical = galpas->user.fw_handle;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
vma->vm_page_prot);
if (unlikely(ret)) {
ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
return -ENOMEM;
}
vma->vm_private_data = mm_count;
(*mm_count)++;
vma->vm_ops = &vm_ops;
return 0;
}
static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
u32 *mm_count)
{
int ret;
u64 start, ofs;
struct page *page;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
start = vma->vm_start;
for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
page = virt_to_page(virt_addr);
ret = vm_insert_page(vma, start, page);
if (unlikely(ret)) {
ehca_gen_err("vm_insert_page() failed rc=%i", ret);
return ret;
}
start += PAGE_SIZE;
}
vma->vm_private_data = mm_count;
(*mm_count)++;
vma->vm_ops = &vm_ops;
return 0;
}
static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
u32 rsrc_type)
{
int ret;
switch (rsrc_type) {
case 0: /* galpa fw handle */
ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
"ehca_mmap_fw() failed rc=%i cq_num=%x",
ret, cq->cq_number);
return ret;
}
break;
case 1: /* cq queue_addr */
ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
"ehca_mmap_queue() failed rc=%i cq_num=%x",
ret, cq->cq_number);
return ret;
}
break;
default:
ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
rsrc_type, cq->cq_number);
return -EINVAL;
}
return 0;
}
static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
u32 rsrc_type)
{
int ret;
switch (rsrc_type) {
case 0: /* galpa fw handle */
ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"remap_pfn_range() failed ret=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return -ENOMEM;
}
break;
case 1: /* qp rqueue_addr */
ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
&qp->mm_count_rqueue);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return ret;
}
break;
case 2: /* qp squeue_addr */
ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
&qp->mm_count_squeue);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return ret;
}
break;
default:
ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
rsrc_type, qp->ib_qp.qp_num);
return -EINVAL;
}
return 0;
}
int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
u64 fileoffset = vma->vm_pgoff;
u32 idr_handle = fileoffset & 0x1FFFFFF;
u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */
u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
u32 ret;
struct ehca_cq *cq;
struct ehca_qp *qp;
struct ib_uobject *uobject;
switch (q_type) {
case 0: /* CQ */
read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, idr_handle);
read_unlock(&ehca_cq_idr_lock);
/* make sure this mmap really belongs to the authorized user */
if (!cq)
return -EINVAL;
if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
return -EINVAL;
ret = ehca_mmap_cq(vma, cq, rsrc_type);
if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
"ehca_mmap_cq() failed rc=%i cq_num=%x",
ret, cq->cq_number);
return ret;
}
break;
case 1: /* QP */
read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, idr_handle);
read_unlock(&ehca_qp_idr_lock);
/* make sure this mmap really belongs to the authorized user */
if (!qp)
return -EINVAL;
uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
if (!uobject || uobject->context != context)
return -EINVAL;
ret = ehca_mmap_qp(vma, qp, rsrc_type);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_qp() failed rc=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return ret;
}
break;
default:
ehca_gen_err("bad queue type %x", q_type);
return -EINVAL;
}
return 0;
}