2006-01-12 03:17:48 +07:00
|
|
|
#include <linux/capability.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/seq_file.h>
|
2009-07-04 09:23:40 +07:00
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/ctype.h>
|
2009-12-15 09:01:06 +07:00
|
|
|
#include <linux/string.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
|
|
|
#include <linux/slab.h>
|
2009-07-04 09:23:40 +07:00
|
|
|
#include <linux/init.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#define LINE_SIZE 80
|
|
|
|
|
|
|
|
#include <asm/mtrr.h>
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#include "mtrr.h"
|
|
|
|
|
|
|
|
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
|
|
|
|
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
static const char *const mtrr_strings[MTRR_NUM_TYPES] =
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2009-07-04 09:23:40 +07:00
|
|
|
"uncachable", /* 0 */
|
|
|
|
"write-combining", /* 1 */
|
|
|
|
"?", /* 2 */
|
|
|
|
"?", /* 3 */
|
|
|
|
"write-through", /* 4 */
|
|
|
|
"write-protect", /* 5 */
|
|
|
|
"write-back", /* 6 */
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
const char *mtrr_attrib_to_str(int x)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
return (x <= 6) ? mtrr_strings[x] : "?";
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
|
|
|
|
static int
|
|
|
|
mtrr_file_add(unsigned long base, unsigned long size,
|
2008-01-30 19:30:31 +07:00
|
|
|
unsigned int type, bool increment, struct file *file, int page)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2009-07-04 09:23:40 +07:00
|
|
|
unsigned int *fcount = FILE_FCOUNT(file);
|
2005-04-17 05:20:36 +07:00
|
|
|
int reg, max;
|
|
|
|
|
|
|
|
max = num_var_ranges;
|
|
|
|
if (fcount == NULL) {
|
2006-12-07 08:14:13 +07:00
|
|
|
fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!fcount)
|
|
|
|
return -ENOMEM;
|
|
|
|
FILE_FCOUNT(file) = fcount;
|
|
|
|
}
|
|
|
|
if (!page) {
|
|
|
|
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
|
|
|
|
return -EINVAL;
|
|
|
|
base >>= PAGE_SHIFT;
|
|
|
|
size >>= PAGE_SHIFT;
|
|
|
|
}
|
2008-01-30 19:30:31 +07:00
|
|
|
reg = mtrr_add_page(base, size, type, true);
|
2005-04-17 05:20:36 +07:00
|
|
|
if (reg >= 0)
|
|
|
|
++fcount[reg];
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
mtrr_file_del(unsigned long base, unsigned long size,
|
|
|
|
struct file *file, int page)
|
|
|
|
{
|
|
|
|
unsigned int *fcount = FILE_FCOUNT(file);
|
2009-07-04 09:23:40 +07:00
|
|
|
int reg;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
if (!page) {
|
|
|
|
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
|
|
|
|
return -EINVAL;
|
|
|
|
base >>= PAGE_SHIFT;
|
|
|
|
size >>= PAGE_SHIFT;
|
|
|
|
}
|
|
|
|
reg = mtrr_del_page(-1, base, size);
|
|
|
|
if (reg < 0)
|
|
|
|
return reg;
|
|
|
|
if (fcount == NULL)
|
|
|
|
return reg;
|
|
|
|
if (fcount[reg] < 1)
|
|
|
|
return -EINVAL;
|
|
|
|
--fcount[reg];
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2009-07-04 09:23:40 +07:00
|
|
|
/*
|
|
|
|
* seq_file can seek but we ignore it.
|
|
|
|
*
|
|
|
|
* Format of control line:
|
|
|
|
* "base=%Lx size=%Lx type=%s" or "disable=%d"
|
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
static ssize_t
|
|
|
|
mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
|
|
|
|
{
|
|
|
|
int i, err;
|
|
|
|
unsigned long reg;
|
|
|
|
unsigned long long base, size;
|
|
|
|
char *ptr;
|
|
|
|
char line[LINE_SIZE];
|
2009-09-27 01:51:50 +07:00
|
|
|
int length;
|
2005-04-17 05:20:36 +07:00
|
|
|
size_t linelen;
|
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
memset(line, 0, LINE_SIZE);
|
2009-09-27 01:51:50 +07:00
|
|
|
|
|
|
|
length = len;
|
|
|
|
length--;
|
|
|
|
|
|
|
|
if (length > LINE_SIZE - 1)
|
|
|
|
length = LINE_SIZE - 1;
|
|
|
|
|
|
|
|
if (length < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (copy_from_user(line, buf, length))
|
2005-04-17 05:20:36 +07:00
|
|
|
return -EFAULT;
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
linelen = strlen(line);
|
|
|
|
ptr = line + linelen - 1;
|
|
|
|
if (linelen && *ptr == '\n')
|
|
|
|
*ptr = '\0';
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!strncmp(line, "disable=", 8)) {
|
|
|
|
reg = simple_strtoul(line + 8, &ptr, 0);
|
|
|
|
err = mtrr_del_page(reg, 0, 0);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
return len;
|
|
|
|
}
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
if (strncmp(line, "base=", 5))
|
|
|
|
return -EINVAL;
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
base = simple_strtoull(line + 5, &ptr, 0);
|
2009-12-15 09:01:06 +07:00
|
|
|
ptr = skip_spaces(ptr);
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
if (strncmp(ptr, "size=", 5))
|
|
|
|
return -EINVAL;
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
size = simple_strtoull(ptr + 5, &ptr, 0);
|
|
|
|
if ((base & 0xfff) || (size & 0xfff))
|
|
|
|
return -EINVAL;
|
2009-12-15 09:01:06 +07:00
|
|
|
ptr = skip_spaces(ptr);
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
if (strncmp(ptr, "type=", 5))
|
|
|
|
return -EINVAL;
|
2009-12-15 09:01:06 +07:00
|
|
|
ptr = skip_spaces(ptr + 5);
|
2009-07-04 09:23:40 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
|
|
|
|
if (strcmp(ptr, mtrr_strings[i]))
|
|
|
|
continue;
|
|
|
|
base >>= PAGE_SHIFT;
|
|
|
|
size >>= PAGE_SHIFT;
|
2009-07-04 09:23:40 +07:00
|
|
|
err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
|
2005-04-17 05:20:36 +07:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2005-10-31 05:59:44 +07:00
|
|
|
static long
|
|
|
|
mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2005-10-31 05:59:44 +07:00
|
|
|
int err = 0;
|
2005-04-17 05:20:36 +07:00
|
|
|
mtrr_type type;
|
2012-02-28 06:15:25 +07:00
|
|
|
unsigned long base;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
unsigned long size;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct mtrr_sentry sentry;
|
|
|
|
struct mtrr_gentry gentry;
|
|
|
|
void __user *arg = (void __user *) __arg;
|
|
|
|
|
2005-10-31 05:59:44 +07:00
|
|
|
switch (cmd) {
|
|
|
|
case MTRRIOC_ADD_ENTRY:
|
|
|
|
case MTRRIOC_SET_ENTRY:
|
|
|
|
case MTRRIOC_DEL_ENTRY:
|
|
|
|
case MTRRIOC_KILL_ENTRY:
|
|
|
|
case MTRRIOC_ADD_PAGE_ENTRY:
|
|
|
|
case MTRRIOC_SET_PAGE_ENTRY:
|
|
|
|
case MTRRIOC_DEL_PAGE_ENTRY:
|
|
|
|
case MTRRIOC_KILL_PAGE_ENTRY:
|
|
|
|
if (copy_from_user(&sentry, arg, sizeof sentry))
|
|
|
|
return -EFAULT;
|
|
|
|
break;
|
|
|
|
case MTRRIOC_GET_ENTRY:
|
|
|
|
case MTRRIOC_GET_PAGE_ENTRY:
|
|
|
|
if (copy_from_user(&gentry, arg, sizeof gentry))
|
|
|
|
return -EFAULT;
|
|
|
|
break;
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_ADD_ENTRY:
|
|
|
|
case MTRRIOC32_SET_ENTRY:
|
|
|
|
case MTRRIOC32_DEL_ENTRY:
|
|
|
|
case MTRRIOC32_KILL_ENTRY:
|
|
|
|
case MTRRIOC32_ADD_PAGE_ENTRY:
|
|
|
|
case MTRRIOC32_SET_PAGE_ENTRY:
|
|
|
|
case MTRRIOC32_DEL_PAGE_ENTRY:
|
|
|
|
case MTRRIOC32_KILL_PAGE_ENTRY: {
|
2009-07-04 09:23:40 +07:00
|
|
|
struct mtrr_sentry32 __user *s32;
|
|
|
|
|
|
|
|
s32 = (struct mtrr_sentry32 __user *)__arg;
|
2005-10-31 05:59:44 +07:00
|
|
|
err = get_user(sentry.base, &s32->base);
|
|
|
|
err |= get_user(sentry.size, &s32->size);
|
|
|
|
err |= get_user(sentry.type, &s32->type);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MTRRIOC32_GET_ENTRY:
|
|
|
|
case MTRRIOC32_GET_PAGE_ENTRY: {
|
2009-07-04 09:23:40 +07:00
|
|
|
struct mtrr_gentry32 __user *g32;
|
|
|
|
|
|
|
|
g32 = (struct mtrr_gentry32 __user *)__arg;
|
2005-10-31 05:59:44 +07:00
|
|
|
err = get_user(gentry.regnum, &g32->regnum);
|
|
|
|
err |= get_user(gentry.base, &g32->base);
|
|
|
|
err |= get_user(gentry.size, &g32->size);
|
|
|
|
err |= get_user(gentry.type, &g32->type);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
switch (cmd) {
|
|
|
|
default:
|
|
|
|
return -ENOTTY;
|
|
|
|
case MTRRIOC_ADD_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_ADD_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
err =
|
2008-01-30 19:30:31 +07:00
|
|
|
mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
|
2005-04-17 05:20:36 +07:00
|
|
|
file, 0);
|
|
|
|
break;
|
|
|
|
case MTRRIOC_SET_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_SET_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
2008-01-30 19:30:31 +07:00
|
|
|
err = mtrr_add(sentry.base, sentry.size, sentry.type, false);
|
2005-04-17 05:20:36 +07:00
|
|
|
break;
|
|
|
|
case MTRRIOC_DEL_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_DEL_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
err = mtrr_file_del(sentry.base, sentry.size, file, 0);
|
|
|
|
break;
|
|
|
|
case MTRRIOC_KILL_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_KILL_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
err = mtrr_del(-1, sentry.base, sentry.size);
|
|
|
|
break;
|
|
|
|
case MTRRIOC_GET_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_GET_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (gentry.regnum >= num_var_ranges)
|
|
|
|
return -EINVAL;
|
2012-02-28 06:15:25 +07:00
|
|
|
mtrr_if->get(gentry.regnum, &base, &size, &type);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Hide entries that go above 4GB */
|
2012-02-28 06:15:25 +07:00
|
|
|
if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
|| size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)))
|
2005-04-17 05:20:36 +07:00
|
|
|
gentry.base = gentry.size = gentry.type = 0;
|
|
|
|
else {
|
2012-02-28 06:15:25 +07:00
|
|
|
gentry.base = base << PAGE_SHIFT;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
gentry.size = size << PAGE_SHIFT;
|
2005-04-17 05:20:36 +07:00
|
|
|
gentry.type = type;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
case MTRRIOC_ADD_PAGE_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_ADD_PAGE_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
err =
|
2008-01-30 19:30:31 +07:00
|
|
|
mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
|
2005-04-17 05:20:36 +07:00
|
|
|
file, 1);
|
|
|
|
break;
|
|
|
|
case MTRRIOC_SET_PAGE_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_SET_PAGE_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
2008-01-30 19:30:31 +07:00
|
|
|
err =
|
|
|
|
mtrr_add_page(sentry.base, sentry.size, sentry.type, false);
|
2005-04-17 05:20:36 +07:00
|
|
|
break;
|
|
|
|
case MTRRIOC_DEL_PAGE_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_DEL_PAGE_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
err = mtrr_file_del(sentry.base, sentry.size, file, 1);
|
|
|
|
break;
|
|
|
|
case MTRRIOC_KILL_PAGE_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_KILL_PAGE_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
err = mtrr_del_page(-1, sentry.base, sentry.size);
|
|
|
|
break;
|
|
|
|
case MTRRIOC_GET_PAGE_ENTRY:
|
2007-02-13 19:26:26 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_GET_PAGE_ENTRY:
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
if (gentry.regnum >= num_var_ranges)
|
|
|
|
return -EINVAL;
|
2012-02-28 06:15:25 +07:00
|
|
|
mtrr_if->get(gentry.regnum, &base, &size, &type);
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
/* Hide entries that would overflow */
|
|
|
|
if (size != (__typeof__(gentry.size))size)
|
|
|
|
gentry.base = gentry.size = gentry.type = 0;
|
|
|
|
else {
|
2012-02-28 06:15:25 +07:00
|
|
|
gentry.base = base;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
gentry.size = size;
|
|
|
|
gentry.type = type;
|
|
|
|
}
|
2005-10-31 05:59:44 +07:00
|
|
|
break;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-10-31 05:59:44 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2009-07-04 09:23:40 +07:00
|
|
|
switch (cmd) {
|
2005-10-31 05:59:44 +07:00
|
|
|
case MTRRIOC_GET_ENTRY:
|
|
|
|
case MTRRIOC_GET_PAGE_ENTRY:
|
2005-04-17 05:20:36 +07:00
|
|
|
if (copy_to_user(arg, &gentry, sizeof gentry))
|
2005-10-31 05:59:44 +07:00
|
|
|
err = -EFAULT;
|
2005-04-17 05:20:36 +07:00
|
|
|
break;
|
2005-10-31 05:59:44 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
case MTRRIOC32_GET_ENTRY:
|
|
|
|
case MTRRIOC32_GET_PAGE_ENTRY: {
|
2009-07-04 09:23:40 +07:00
|
|
|
struct mtrr_gentry32 __user *g32;
|
|
|
|
|
|
|
|
g32 = (struct mtrr_gentry32 __user *)__arg;
|
2005-10-31 05:59:44 +07:00
|
|
|
err = put_user(gentry.base, &g32->base);
|
|
|
|
err |= put_user(gentry.size, &g32->size);
|
|
|
|
err |= put_user(gentry.regnum, &g32->regnum);
|
|
|
|
err |= put_user(gentry.type, &g32->type);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2005-10-31 05:59:44 +07:00
|
|
|
return err;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2009-07-04 09:23:40 +07:00
|
|
|
static int mtrr_close(struct inode *ino, struct file *file)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
unsigned int *fcount = FILE_FCOUNT(file);
|
2009-07-04 09:23:40 +07:00
|
|
|
int i, max;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
if (fcount != NULL) {
|
|
|
|
max = num_var_ranges;
|
|
|
|
for (i = 0; i < max; ++i) {
|
|
|
|
while (fcount[i] > 0) {
|
|
|
|
mtrr_del(i, 0, 0);
|
|
|
|
--fcount[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
kfree(fcount);
|
|
|
|
FILE_FCOUNT(file) = NULL;
|
|
|
|
}
|
|
|
|
return single_release(ino, file);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mtrr_seq_show(struct seq_file *seq, void *offset);
|
|
|
|
|
|
|
|
static int mtrr_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
2009-07-04 09:23:40 +07:00
|
|
|
if (!mtrr_if)
|
2005-04-17 05:20:36 +07:00
|
|
|
return -EIO;
|
2009-07-04 09:23:40 +07:00
|
|
|
if (!mtrr_if->get)
|
|
|
|
return -ENXIO;
|
2005-04-17 05:20:36 +07:00
|
|
|
return single_open(file, mtrr_seq_show, NULL);
|
|
|
|
}
|
|
|
|
|
2007-02-12 15:55:31 +07:00
|
|
|
static const struct file_operations mtrr_fops = {
|
2009-07-04 09:23:40 +07:00
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.open = mtrr_open,
|
|
|
|
.read = seq_read,
|
|
|
|
.llseek = seq_lseek,
|
|
|
|
.write = mtrr_write,
|
|
|
|
.unlocked_ioctl = mtrr_ioctl,
|
|
|
|
.compat_ioctl = mtrr_ioctl,
|
|
|
|
.release = mtrr_close,
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
static int mtrr_seq_show(struct seq_file *seq, void *offset)
|
|
|
|
{
|
|
|
|
char factor;
|
|
|
|
int i, max, len;
|
|
|
|
mtrr_type type;
|
[PATCH] i386: fix MTRR code
Until not so long ago, there were system log messages pointing to
inconsistent MTRR setup of the video frame buffer caused by the way vesafb
and X worked. While vesafb was fixed meanwhile, I believe fixing it there
only hides a shortcoming in the MTRR code itself, in that that code is not
symmetric with respect to the ordering of attempts to set up two (or more)
regions where one contains the other. In the current shape, it permits
only setting up sub-regions of pre-exisiting ones. The patch below makes
this symmetric.
While working on that I noticed a few more inconsistencies in that code,
namely
- use of 'unsigned int' for sizes in many, but not all places (the patch
is converting this to use 'unsigned long' everywhere, which specifically
might be necessary for x86-64 once a processor supporting more than 44
physical address bits would become available)
- the code to correct inconsistent settings during secondary processor
startup tried (if necessary) to correct, among other things, the value
in IA32_MTRR_DEF_TYPE, however the newly computed value would never get
used (i.e. stored in the respective MSR)
- the generic range validation code checked that the end of the
to-be-added range would be above 1MB; the value checked should have been
the start of the range
- when contained regions are detected, previously this was allowed only
when the old region was uncacheable; this can be symmetric (i.e. the new
region can also be uncacheable) and even further as per Intel's
documentation write-trough and write-back for either region is also
compatible with the respective opposite in the other
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2006-12-07 08:14:09 +07:00
|
|
|
unsigned long base, size;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
len = 0;
|
|
|
|
max = num_var_ranges;
|
|
|
|
for (i = 0; i < max; i++) {
|
|
|
|
mtrr_if->get(i, &base, &size, &type);
|
2009-07-04 09:23:40 +07:00
|
|
|
if (size == 0) {
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 19:33:18 +07:00
|
|
|
mtrr_usage_table[i] = 0;
|
2009-07-04 09:23:40 +07:00
|
|
|
continue;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2009-07-04 09:23:40 +07:00
|
|
|
if (size < (0x100000 >> PAGE_SHIFT)) {
|
|
|
|
/* less than 1MB */
|
|
|
|
factor = 'K';
|
|
|
|
size <<= PAGE_SHIFT - 10;
|
|
|
|
} else {
|
|
|
|
factor = 'M';
|
|
|
|
size >>= 20 - PAGE_SHIFT;
|
|
|
|
}
|
|
|
|
/* Base can be > 32bit */
|
|
|
|
len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
|
|
|
|
"(%5luMB), size=%5lu%cB, count=%d: %s\n",
|
|
|
|
i, base, base >> (20 - PAGE_SHIFT), size,
|
|
|
|
factor, mtrr_usage_table[i],
|
|
|
|
mtrr_attrib_to_str(type));
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init mtrr_if_init(void)
|
|
|
|
{
|
|
|
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
|
|
|
|
|
|
if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
|
|
|
|
(!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
|
|
|
|
(!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
|
|
|
|
(!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
|
|
|
|
return -ENODEV;
|
|
|
|
|
2009-03-26 02:48:06 +07:00
|
|
|
proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
|
2005-04-17 05:20:36 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arch_initcall(mtrr_if_init);
|
|
|
|
#endif /* CONFIG_PROC_FS */
|