mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-15 10:16:43 +07:00
0429fbc0bd
Pull percpu consistent-ops changes from Tejun Heo: "Way back, before the current percpu allocator was implemented, static and dynamic percpu memory areas were allocated and handled separately and had their own accessors. The distinction has been gone for many years now; however, the now duplicate two sets of accessors remained with the pointer based ones - this_cpu_*() - evolving various other operations over time. During the process, we also accumulated other inconsistent operations. This pull request contains Christoph's patches to clean up the duplicate accessor situation. __get_cpu_var() uses are replaced with with this_cpu_ptr() and __this_cpu_ptr() with raw_cpu_ptr(). Unfortunately, the former sometimes is tricky thanks to C being a bit messy with the distinction between lvalues and pointers, which led to a rather ugly solution for cpumask_var_t involving the introduction of this_cpu_cpumask_var_ptr(). This converts most of the uses but not all. Christoph will follow up with the remaining conversions in this merge window and hopefully remove the obsolete accessors" * 'for-3.18-consistent-ops' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (38 commits) irqchip: Properly fetch the per cpu offset percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t -fix ia64: sn_nodepda cannot be assigned to after this_cpu conversion. Use __this_cpu_write. percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t Revert "powerpc: Replace __get_cpu_var uses" percpu: Remove __this_cpu_ptr clocksource: Replace __this_cpu_ptr with raw_cpu_ptr sparc: Replace __get_cpu_var uses avr32: Replace __get_cpu_var with __this_cpu_write blackfin: Replace __get_cpu_var uses tile: Use this_cpu_ptr() for hardware counters tile: Replace __get_cpu_var uses powerpc: Replace __get_cpu_var uses alpha: Replace __get_cpu_var ia64: Replace __get_cpu_var uses s390: cio driver &__get_cpu_var replacements s390: Replace __get_cpu_var uses mips: Replace __get_cpu_var uses MIPS: Replace __get_cpu_var uses in FPU emulator. arm: Replace __this_cpu_ptr with raw_cpu_ptr ...
301 lines
8.3 KiB
C
301 lines
8.3 KiB
C
/*
|
|
* Copyright 2010 Tilera Corporation. All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation, version 2.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
|
* NON INFRINGEMENT. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* Support the cycle counter clocksource and tile timer clock event device.
|
|
*/
|
|
|
|
#include <linux/time.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/clocksource.h>
|
|
#include <linux/clockchips.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/module.h>
|
|
#include <linux/timekeeper_internal.h>
|
|
#include <asm/irq_regs.h>
|
|
#include <asm/traps.h>
|
|
#include <asm/vdso.h>
|
|
#include <hv/hypervisor.h>
|
|
#include <arch/interrupts.h>
|
|
#include <arch/spr_def.h>
|
|
|
|
|
|
/*
|
|
* Define the cycle counter clock source.
|
|
*/
|
|
|
|
/* How many cycles per second we are running at. */
|
|
static cycles_t cycles_per_sec __write_once;
|
|
|
|
cycles_t get_clock_rate(void)
|
|
{
|
|
return cycles_per_sec;
|
|
}
|
|
|
|
#if CHIP_HAS_SPLIT_CYCLE()
|
|
cycles_t get_cycles(void)
|
|
{
|
|
unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH);
|
|
unsigned int low = __insn_mfspr(SPR_CYCLE_LOW);
|
|
unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH);
|
|
|
|
while (unlikely(high != high2)) {
|
|
low = __insn_mfspr(SPR_CYCLE_LOW);
|
|
high = high2;
|
|
high2 = __insn_mfspr(SPR_CYCLE_HIGH);
|
|
}
|
|
|
|
return (((cycles_t)high) << 32) | low;
|
|
}
|
|
EXPORT_SYMBOL(get_cycles);
|
|
#endif
|
|
|
|
/*
|
|
* We use a relatively small shift value so that sched_clock()
|
|
* won't wrap around very often.
|
|
*/
|
|
#define SCHED_CLOCK_SHIFT 10
|
|
|
|
static unsigned long sched_clock_mult __write_once;
|
|
|
|
static cycles_t clocksource_get_cycles(struct clocksource *cs)
|
|
{
|
|
return get_cycles();
|
|
}
|
|
|
|
static struct clocksource cycle_counter_cs = {
|
|
.name = "cycle counter",
|
|
.rating = 300,
|
|
.read = clocksource_get_cycles,
|
|
.mask = CLOCKSOURCE_MASK(64),
|
|
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
|
};
|
|
|
|
/*
|
|
* Called very early from setup_arch() to set cycles_per_sec.
|
|
* We initialize it early so we can use it to set up loops_per_jiffy.
|
|
*/
|
|
void __init setup_clock(void)
|
|
{
|
|
cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
|
|
sched_clock_mult =
|
|
clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
|
|
}
|
|
|
|
void __init calibrate_delay(void)
|
|
{
|
|
loops_per_jiffy = get_clock_rate() / HZ;
|
|
pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n",
|
|
loops_per_jiffy/(500000/HZ),
|
|
(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
|
|
}
|
|
|
|
/* Called fairly late in init/main.c, but before we go smp. */
|
|
void __init time_init(void)
|
|
{
|
|
/* Initialize and register the clock source. */
|
|
clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
|
|
|
|
/* Start up the tile-timer interrupt source on the boot cpu. */
|
|
setup_tile_timer();
|
|
}
|
|
|
|
/*
|
|
* Define the tile timer clock event device. The timer is driven by
|
|
* the TILE_TIMER_CONTROL register, which consists of a 31-bit down
|
|
* counter, plus bit 31, which signifies that the counter has wrapped
|
|
* from zero to (2**31) - 1. The INT_TILE_TIMER interrupt will be
|
|
* raised as long as bit 31 is set.
|
|
*
|
|
* The TILE_MINSEC value represents the largest range of real-time
|
|
* we can possibly cover with the timer, based on MAX_TICK combined
|
|
* with the slowest reasonable clock rate we might run at.
|
|
*/
|
|
|
|
#define MAX_TICK 0x7fffffff /* we have 31 bits of countdown timer */
|
|
#define TILE_MINSEC 5 /* timer covers no more than 5 seconds */
|
|
|
|
static int tile_timer_set_next_event(unsigned long ticks,
|
|
struct clock_event_device *evt)
|
|
{
|
|
BUG_ON(ticks > MAX_TICK);
|
|
__insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks);
|
|
arch_local_irq_unmask_now(INT_TILE_TIMER);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Whenever anyone tries to change modes, we just mask interrupts
|
|
* and wait for the next event to get set.
|
|
*/
|
|
static void tile_timer_set_mode(enum clock_event_mode mode,
|
|
struct clock_event_device *evt)
|
|
{
|
|
arch_local_irq_mask_now(INT_TILE_TIMER);
|
|
}
|
|
|
|
/*
|
|
* Set min_delta_ns to 1 microsecond, since it takes about
|
|
* that long to fire the interrupt.
|
|
*/
|
|
static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = {
|
|
.name = "tile timer",
|
|
.features = CLOCK_EVT_FEAT_ONESHOT,
|
|
.min_delta_ns = 1000,
|
|
.rating = 100,
|
|
.irq = -1,
|
|
.set_next_event = tile_timer_set_next_event,
|
|
.set_mode = tile_timer_set_mode,
|
|
};
|
|
|
|
void setup_tile_timer(void)
|
|
{
|
|
struct clock_event_device *evt = this_cpu_ptr(&tile_timer);
|
|
|
|
/* Fill in fields that are speed-specific. */
|
|
clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC);
|
|
evt->max_delta_ns = clockevent_delta2ns(MAX_TICK, evt);
|
|
|
|
/* Mark as being for this cpu only. */
|
|
evt->cpumask = cpumask_of(smp_processor_id());
|
|
|
|
/* Start out with timer not firing. */
|
|
arch_local_irq_mask_now(INT_TILE_TIMER);
|
|
|
|
/* Register tile timer. */
|
|
clockevents_register_device(evt);
|
|
}
|
|
|
|
/* Called from the interrupt vector. */
|
|
void do_timer_interrupt(struct pt_regs *regs, int fault_num)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
struct clock_event_device *evt = this_cpu_ptr(&tile_timer);
|
|
|
|
/*
|
|
* Mask the timer interrupt here, since we are a oneshot timer
|
|
* and there are now by definition no events pending.
|
|
*/
|
|
arch_local_irq_mask(INT_TILE_TIMER);
|
|
|
|
/* Track time spent here in an interrupt context */
|
|
irq_enter();
|
|
|
|
/* Track interrupt count. */
|
|
__this_cpu_inc(irq_stat.irq_timer_count);
|
|
|
|
/* Call the generic timer handler */
|
|
evt->event_handler(evt);
|
|
|
|
/*
|
|
* Track time spent against the current process again and
|
|
* process any softirqs if they are waiting.
|
|
*/
|
|
irq_exit();
|
|
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
/*
|
|
* Scheduler clock - returns current time in nanosec units.
|
|
* Note that with LOCKDEP, this is called during lockdep_init(), and
|
|
* we will claim that sched_clock() is zero for a little while, until
|
|
* we run setup_clock(), above.
|
|
*/
|
|
unsigned long long sched_clock(void)
|
|
{
|
|
return clocksource_cyc2ns(get_cycles(),
|
|
sched_clock_mult, SCHED_CLOCK_SHIFT);
|
|
}
|
|
|
|
int setup_profiling_timer(unsigned int multiplier)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Use the tile timer to convert nsecs to core clock cycles, relying
|
|
* on it having the same frequency as SPR_CYCLE.
|
|
*/
|
|
cycles_t ns2cycles(unsigned long nsecs)
|
|
{
|
|
/*
|
|
* We do not have to disable preemption here as each core has the same
|
|
* clock frequency.
|
|
*/
|
|
struct clock_event_device *dev = raw_cpu_ptr(&tile_timer);
|
|
|
|
/*
|
|
* as in clocksource.h and x86's timer.h, we split the calculation
|
|
* into 2 parts to avoid unecessary overflow of the intermediate
|
|
* value. This will not lead to any loss of precision.
|
|
*/
|
|
u64 quot = (u64)nsecs >> dev->shift;
|
|
u64 rem = (u64)nsecs & ((1ULL << dev->shift) - 1);
|
|
return quot * dev->mult + ((rem * dev->mult) >> dev->shift);
|
|
}
|
|
|
|
void update_vsyscall_tz(void)
|
|
{
|
|
write_seqcount_begin(&vdso_data->tz_seq);
|
|
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
|
|
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
|
|
write_seqcount_end(&vdso_data->tz_seq);
|
|
}
|
|
|
|
void update_vsyscall(struct timekeeper *tk)
|
|
{
|
|
if (tk->tkr.clock != &cycle_counter_cs)
|
|
return;
|
|
|
|
write_seqcount_begin(&vdso_data->tb_seq);
|
|
|
|
vdso_data->cycle_last = tk->tkr.cycle_last;
|
|
vdso_data->mask = tk->tkr.mask;
|
|
vdso_data->mult = tk->tkr.mult;
|
|
vdso_data->shift = tk->tkr.shift;
|
|
|
|
vdso_data->wall_time_sec = tk->xtime_sec;
|
|
vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
|
|
|
|
vdso_data->monotonic_time_sec = tk->xtime_sec
|
|
+ tk->wall_to_monotonic.tv_sec;
|
|
vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
|
|
+ ((u64)tk->wall_to_monotonic.tv_nsec
|
|
<< tk->tkr.shift);
|
|
while (vdso_data->monotonic_time_snsec >=
|
|
(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
|
|
vdso_data->monotonic_time_snsec -=
|
|
((u64)NSEC_PER_SEC) << tk->tkr.shift;
|
|
vdso_data->monotonic_time_sec++;
|
|
}
|
|
|
|
vdso_data->wall_time_coarse_sec = tk->xtime_sec;
|
|
vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
|
|
tk->tkr.shift);
|
|
|
|
vdso_data->monotonic_time_coarse_sec =
|
|
vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
|
|
vdso_data->monotonic_time_coarse_nsec =
|
|
vdso_data->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
|
|
|
|
while (vdso_data->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
|
|
vdso_data->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
|
|
vdso_data->monotonic_time_coarse_sec++;
|
|
}
|
|
|
|
write_seqcount_end(&vdso_data->tb_seq);
|
|
}
|