linux_dsm_epyc7002/arch/mips/kernel/cevt-r4k.c
James Hogan 9d7f29cdb4 MIPS: cevt-r4k: Fix out-of-bounds array access
calculate_min_delta() may incorrectly access a 4th element of buf2[]
which only has 3 elements. This may trigger undefined behaviour and has
been reported to cause strange crashes in start_kernel() sometime after
timer initialization when built with GCC 5.3, possibly due to
register/stack corruption:

sched_clock: 32 bits at 200MHz, resolution 5ns, wraps every 10737418237ns
CPU 0 Unable to handle kernel paging request at virtual address ffffb0aa, epc == 8067daa8, ra == 8067da84
Oops[]:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.18 
task: 8065e3e0 task.stack: 80644000
$ 0   : 00000000 00000001 00000000 00000000
$ 4   : 8065b4d0 00000000 805d0000 00000010
$ 8   : 00000010 80321400 fffff000 812de408
$12   : 00000000 00000000 00000000 ffffffff
$16   : 00000002 ffffffff 80660000 806a666c
$20   : 806c0000 00000000 00000000 00000000
$24   : 00000000 00000010
$28   : 80644000 80645ed0 00000000 8067da84
Hi    : 00000000
Lo    : 00000000
epc   : 8067daa8 start_kernel+0x33c/0x500
ra    : 8067da84 start_kernel+0x318/0x500
Status: 11000402 KERNEL EXL
Cause : 4080040c (ExcCode 03)
BadVA : ffffb0aa
PrId  : 0501992c (MIPS 1004Kc)
Modules linked in:
Process swapper/0 (pid: 0, threadinfo=80644000, task=8065e3e0, tls=00000000)
Call Trace:
[<8067daa8>] start_kernel+0x33c/0x500
Code: 24050240  0c0131f9  24849c64 <a200b0a8> 41606020  000000c0  0c1a45e6 00000000  0c1a5f44

UBSAN also detects the same issue:

================================================================
UBSAN: Undefined behaviour in arch/mips/kernel/cevt-r4k.c:85:41
load of address 80647e4c with insufficient space
for an object of type 'unsigned int'
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.18 
Call Trace:
[<80028f70>] show_stack+0x88/0xa4
[<80312654>] dump_stack+0x84/0xc0
[<8034163c>] ubsan_epilogue+0x14/0x50
[<803417d8>] __ubsan_handle_type_mismatch+0x160/0x168
[<8002dab0>] r4k_clockevent_init+0x544/0x764
[<80684d34>] time_init+0x18/0x90
[<8067fa5c>] start_kernel+0x2f0/0x500
=================================================================

buf2[] is intentionally only 3 elements so that the last element is the
median once 5 samples have been inserted, so explicitly prevent the
possibility of comparing against the 4th element rather than extending
the array.

Fixes: 1fa405552e ("MIPS: cevt-r4k: Dynamically calculate min_delta_ns")
Reported-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Tested-by: Rabin Vincent <rabinv@axis.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.7.x-
Patchwork: https://patchwork.linux-mips.org/patch/15892/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-04-10 13:31:12 +02:00

299 lines
7.5 KiB
C

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2007 MIPS Technologies, Inc.
* Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
*/
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/smp.h>
#include <linux/irq.h>
#include <asm/time.h>
#include <asm/cevt-r4k.h>
static int mips_next_event(unsigned long delta,
struct clock_event_device *evt)
{
unsigned int cnt;
int res;
cnt = read_c0_count();
cnt += delta;
write_c0_compare(cnt);
res = ((int)(read_c0_count() - cnt) >= 0) ? -ETIME : 0;
return res;
}
/**
* calculate_min_delta() - Calculate a good minimum delta for mips_next_event().
*
* Running under virtualisation can introduce overhead into mips_next_event() in
* the form of hypervisor emulation of CP0_Count/CP0_Compare registers,
* potentially with an unnatural frequency, which makes a fixed min_delta_ns
* value inappropriate as it may be too small.
*
* It can also introduce occasional latency from the guest being descheduled.
*
* This function calculates a good minimum delta based roughly on the 75th
* percentile of the time taken to do the mips_next_event() sequence, in order
* to handle potentially higher overhead while also eliminating outliers due to
* unpredictable hypervisor latency (which can be handled by retries).
*
* Return: An appropriate minimum delta for the clock event device.
*/
static unsigned int calculate_min_delta(void)
{
unsigned int cnt, i, j, k, l;
unsigned int buf1[4], buf2[3];
unsigned int min_delta;
/*
* Calculate the median of 5 75th percentiles of 5 samples of how long
* it takes to set CP0_Compare = CP0_Count + delta.
*/
for (i = 0; i < 5; ++i) {
for (j = 0; j < 5; ++j) {
/*
* This is like the code in mips_next_event(), and
* directly measures the borderline "safe" delta.
*/
cnt = read_c0_count();
write_c0_compare(cnt);
cnt = read_c0_count() - cnt;
/* Sorted insert into buf1 */
for (k = 0; k < j; ++k) {
if (cnt < buf1[k]) {
l = min_t(unsigned int,
j, ARRAY_SIZE(buf1) - 1);
for (; l > k; --l)
buf1[l] = buf1[l - 1];
break;
}
}
if (k < ARRAY_SIZE(buf1))
buf1[k] = cnt;
}
/* Sorted insert of 75th percentile into buf2 */
for (k = 0; k < i && k < ARRAY_SIZE(buf2); ++k) {
if (buf1[ARRAY_SIZE(buf1) - 1] < buf2[k]) {
l = min_t(unsigned int,
i, ARRAY_SIZE(buf2) - 1);
for (; l > k; --l)
buf2[l] = buf2[l - 1];
break;
}
}
if (k < ARRAY_SIZE(buf2))
buf2[k] = buf1[ARRAY_SIZE(buf1) - 1];
}
/* Use 2 * median of 75th percentiles */
min_delta = buf2[ARRAY_SIZE(buf2) - 1] * 2;
/* Don't go too low */
if (min_delta < 0x300)
min_delta = 0x300;
pr_debug("%s: median 75th percentile=%#x, min_delta=%#x\n",
__func__, buf2[ARRAY_SIZE(buf2) - 1], min_delta);
return min_delta;
}
DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
int cp0_timer_irq_installed;
/*
* Possibly handle a performance counter interrupt.
* Return true if the timer interrupt should not be checked
*/
static inline int handle_perf_irq(int r2)
{
/*
* The performance counter overflow interrupt may be shared with the
* timer interrupt (cp0_perfcount_irq < 0). If it is and a
* performance counter has overflowed (perf_irq() == IRQ_HANDLED)
* and we can't reliably determine if a counter interrupt has also
* happened (!r2) then don't check for a timer interrupt.
*/
return (cp0_perfcount_irq < 0) &&
perf_irq() == IRQ_HANDLED &&
!r2;
}
irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
{
const int r2 = cpu_has_mips_r2_r6;
struct clock_event_device *cd;
int cpu = smp_processor_id();
/*
* Suckage alert:
* Before R2 of the architecture there was no way to see if a
* performance counter interrupt was pending, so we have to run
* the performance counter interrupt handler anyway.
*/
if (handle_perf_irq(r2))
return IRQ_HANDLED;
/*
* The same applies to performance counter interrupts. But with the
* above we now know that the reason we got here must be a timer
* interrupt. Being the paranoiacs we are we check anyway.
*/
if (!r2 || (read_c0_cause() & CAUSEF_TI)) {
/* Clear Count/Compare Interrupt */
write_c0_compare(read_c0_compare());
cd = &per_cpu(mips_clockevent_device, cpu);
cd->event_handler(cd);
return IRQ_HANDLED;
}
return IRQ_NONE;
}
struct irqaction c0_compare_irqaction = {
.handler = c0_compare_interrupt,
/*
* IRQF_SHARED: The timer interrupt may be shared with other interrupts
* such as perf counter and FDC interrupts.
*/
.flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED,
.name = "timer",
};
void mips_event_handler(struct clock_event_device *dev)
{
}
/*
* FIXME: This doesn't hold for the relocated E9000 compare interrupt.
*/
static int c0_compare_int_pending(void)
{
/* When cpu_has_mips_r2, this checks Cause.TI instead of Cause.IP7 */
return (read_c0_cause() >> cp0_compare_irq_shift) & (1ul << CAUSEB_IP);
}
/*
* Compare interrupt can be routed and latched outside the core,
* so wait up to worst case number of cycle counter ticks for timer interrupt
* changes to propagate to the cause register.
*/
#define COMPARE_INT_SEEN_TICKS 50
int c0_compare_int_usable(void)
{
unsigned int delta;
unsigned int cnt;
#ifdef CONFIG_KVM_GUEST
return 1;
#endif
/*
* IP7 already pending? Try to clear it by acking the timer.
*/
if (c0_compare_int_pending()) {
cnt = read_c0_count();
write_c0_compare(cnt);
back_to_back_c0_hazard();
while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
if (!c0_compare_int_pending())
break;
if (c0_compare_int_pending())
return 0;
}
for (delta = 0x10; delta <= 0x400000; delta <<= 1) {
cnt = read_c0_count();
cnt += delta;
write_c0_compare(cnt);
back_to_back_c0_hazard();
if ((int)(read_c0_count() - cnt) < 0)
break;
/* increase delta if the timer was already expired */
}
while ((int)(read_c0_count() - cnt) <= 0)
; /* Wait for expiry */
while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
if (c0_compare_int_pending())
break;
if (!c0_compare_int_pending())
return 0;
cnt = read_c0_count();
write_c0_compare(cnt);
back_to_back_c0_hazard();
while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
if (!c0_compare_int_pending())
break;
if (c0_compare_int_pending())
return 0;
/*
* Feels like a real count / compare timer.
*/
return 1;
}
unsigned int __weak get_c0_compare_int(void)
{
return MIPS_CPU_IRQ_BASE + cp0_compare_irq;
}
int r4k_clockevent_init(void)
{
unsigned int cpu = smp_processor_id();
struct clock_event_device *cd;
unsigned int irq, min_delta;
if (!cpu_has_counter || !mips_hpt_frequency)
return -ENXIO;
if (!c0_compare_int_usable())
return -ENXIO;
/*
* With vectored interrupts things are getting platform specific.
* get_c0_compare_int is a hook to allow a platform to return the
* interrupt number of its liking.
*/
irq = get_c0_compare_int();
cd = &per_cpu(mips_clockevent_device, cpu);
cd->name = "MIPS";
cd->features = CLOCK_EVT_FEAT_ONESHOT |
CLOCK_EVT_FEAT_C3STOP |
CLOCK_EVT_FEAT_PERCPU;
min_delta = calculate_min_delta();
cd->rating = 300;
cd->irq = irq;
cd->cpumask = cpumask_of(cpu);
cd->set_next_event = mips_next_event;
cd->event_handler = mips_event_handler;
clockevents_config_and_register(cd, mips_hpt_frequency, min_delta, 0x7fffffff);
if (cp0_timer_irq_installed)
return 0;
cp0_timer_irq_installed = 1;
setup_irq(irq, &c0_compare_irqaction);
return 0;
}