linux_dsm_epyc7002/arch/s390/kernel/time.c

865 lines
22 KiB
C
Raw Normal View History

/*
* Time of day based timer functions.
*
* S390 version
* Copyright IBM Corp. 1999, 2008
* Author(s): Hartmut Penner (hp@de.ibm.com),
* Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
*
* Derived from "arch/i386/kernel/time.c"
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*/
#define KMSG_COMPONENT "time"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/stop_machine.h>
#include <linux/time.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/types.h>
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/gfp.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <asm/facility.h>
#include <asm/delay.h>
#include <asm/div64.h>
#include <asm/vdso.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/vtimer.h>
#include <asm/stp.h>
#include <asm/cio.h>
#include "entry.h"
unsigned char tod_clock_base[16] __aligned(8) = {
/* Force to data section. */
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
EXPORT_SYMBOL_GPL(tod_clock_base);
u64 clock_comparator_max = -1ULL;
EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
EXPORT_SYMBOL(s390_epoch_delta_notifier);
unsigned char ptff_function_mask[16];
static unsigned long long lpar_offset;
static unsigned long long initial_leap_seconds;
static unsigned long long tod_steering_end;
static long long tod_steering_delta;
/*
* Get time offsets with PTFF
*/
void __init time_early_init(void)
{
struct ptff_qto qto;
struct ptff_qui qui;
/* Initialize TOD steering parameters */
tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
vdso_data->ts_end = tod_steering_end;
if (!test_facility(28))
return;
ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
/* get LPAR offset */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
lpar_offset = qto.tod_epoch_difference;
/* get initial leap seconds */
if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
initial_leap_seconds = (unsigned long long)
((long) qui.old_leap * 4096000000L);
}
/*
* Scheduler clock - returns current time in nanosec units.
*/
unsigned long long notrace sched_clock(void)
{
return tod_to_ns(get_tod_clock_monotonic());
}
NOKPROBE_SYMBOL(sched_clock);
/*
* Monotonic_clock - returns # of nanoseconds passed since time_init()
*/
unsigned long long monotonic_clock(void)
{
return sched_clock();
}
EXPORT_SYMBOL(monotonic_clock);
static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
{
unsigned long long high, low, rem, sec, nsec;
/* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */
high = (*(unsigned long long *) clk) >> 4;
low = (*(unsigned long long *)&clk[7]) << 4;
/* Calculate seconds and nano-seconds */
sec = high;
rem = do_div(sec, 1000000);
nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32;
xt->tv_sec = sec;
xt->tv_nsec = nsec;
}
void clock_comparator_work(void)
{
struct clock_event_device *cd;
S390_lowcore.clock_comparator = clock_comparator_max;
s390: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to this_cpu_inc(y) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> CC: linux390@de.ibm.com Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-18 00:30:45 +07:00
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
S390_lowcore.clock_comparator = get_tod_clock() + delta;
set_clock_comparator(S390_lowcore.clock_comparator);
return 0;
}
/*
* Set up lowcore and control register of the current cpu to
* enable TOD clock and clock comparator interrupts.
*/
void init_cpu_timer(void)
{
struct clock_event_device *cd;
int cpu;
S390_lowcore.clock_comparator = clock_comparator_max;
set_clock_comparator(S390_lowcore.clock_comparator);
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
cd->name = "comparator";
cd->features = CLOCK_EVT_FEAT_ONESHOT;
cd->mult = 16777;
cd->shift = 12;
cd->min_delta_ns = 1;
s390/time: Set ->min_delta_ticks and ->max_delta_ticks In preparation for making the clockevents core NTP correction aware, all clockevent device drivers must set ->min_delta_ticks and ->max_delta_ticks rather than ->min_delta_ns and ->max_delta_ns: a clockevent device's rate is going to change dynamically and thus, the ratio of ns to ticks ceases to stay invariant. Currently, the s390's CPU timer clockevent device is initialized as follows: cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; Note that the device's time to cycle conversion factor, i.e. cd->mult / (2^cd->shift), is approx. equal to 4. Hence, this would translate to cd->min_delta_ticks = 4; cd->max_delta_ticks = 4 * LONG_MAX; However, a minimum value of 1ns is in the range of noise anyway and the clockevent core will take care of this by increasing it to 1us or so. Furthermore, 4*LONG_MAX would overflow the unsigned long argument the clockevent devices gets programmed with. Thus, initialize ->min_delta_ticks with 1 and ->max_delta_ticks with ULONG_MAX. This patch alone doesn't introduce any change in functionality as the clockevents core still looks exclusively at the (untouched) ->min_delta_ns and ->max_delta_ns. As soon as this has changed, a followup patch will purge the initialization of ->min_delta_ns and ->max_delta_ns from this driver. Cc: Ingo Molnar <mingo@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Stephen Boyd <sboyd@codeaurora.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: David Hildenbrand <dahi@linux.vnet.ibm.com> Cc: linux-s390@vger.kernel.org Signed-off-by: Nicolai Stange <nicstange@gmail.com> Signed-off-by: John Stultz <john.stultz@linaro.org>
2017-03-31 02:54:55 +07:00
cd->min_delta_ticks = 1;
cd->max_delta_ns = LONG_MAX;
s390/time: Set ->min_delta_ticks and ->max_delta_ticks In preparation for making the clockevents core NTP correction aware, all clockevent device drivers must set ->min_delta_ticks and ->max_delta_ticks rather than ->min_delta_ns and ->max_delta_ns: a clockevent device's rate is going to change dynamically and thus, the ratio of ns to ticks ceases to stay invariant. Currently, the s390's CPU timer clockevent device is initialized as follows: cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; Note that the device's time to cycle conversion factor, i.e. cd->mult / (2^cd->shift), is approx. equal to 4. Hence, this would translate to cd->min_delta_ticks = 4; cd->max_delta_ticks = 4 * LONG_MAX; However, a minimum value of 1ns is in the range of noise anyway and the clockevent core will take care of this by increasing it to 1us or so. Furthermore, 4*LONG_MAX would overflow the unsigned long argument the clockevent devices gets programmed with. Thus, initialize ->min_delta_ticks with 1 and ->max_delta_ticks with ULONG_MAX. This patch alone doesn't introduce any change in functionality as the clockevents core still looks exclusively at the (untouched) ->min_delta_ns and ->max_delta_ns. As soon as this has changed, a followup patch will purge the initialization of ->min_delta_ns and ->max_delta_ns from this driver. Cc: Ingo Molnar <mingo@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Stephen Boyd <sboyd@codeaurora.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: David Hildenbrand <dahi@linux.vnet.ibm.com> Cc: linux-s390@vger.kernel.org Signed-off-by: Nicolai Stange <nicstange@gmail.com> Signed-off-by: John Stultz <john.stultz@linaro.org>
2017-03-31 02:54:55 +07:00
cd->max_delta_ticks = ULONG_MAX;
cd->rating = 400;
cd->cpumask = cpumask_of(cpu);
cd->set_next_event = s390_next_event;
clockevents_register_device(cd);
/* Enable clock comparator timer interrupt. */
__ctl_set_bit(0,11);
/* Always allow the timing alert external interrupt. */
__ctl_set_bit(0, 4);
}
static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned int param32,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
if (S390_lowcore.clock_comparator == clock_comparator_max)
set_clock_comparator(S390_lowcore.clock_comparator);
}
static void stp_timing_alert(struct stp_irq_parm *);
static void timing_alert_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
inc_irq_stat(IRQEXT_TLA);
if (param32 & 0x00038000)
stp_timing_alert((struct stp_irq_parm *) &param32);
}
static void stp_reset(void);
void read_persistent_clock64(struct timespec64 *ts)
{
unsigned char clk[STORE_CLOCK_EXT_SIZE];
__u64 delta;
delta = initial_leap_seconds + TOD_UNIX_EPOCH;
get_tod_clock_ext(clk);
*(__u64 *) &clk[1] -= delta;
if (*(__u64 *) &clk[1] > delta)
clk[0]--;
ext_to_timespec64(clk, ts);
}
void read_boot_clock64(struct timespec64 *ts)
{
unsigned char clk[STORE_CLOCK_EXT_SIZE];
__u64 delta;
delta = initial_leap_seconds + TOD_UNIX_EPOCH;
memcpy(clk, tod_clock_base, 16);
*(__u64 *) &clk[1] -= delta;
if (*(__u64 *) &clk[1] > delta)
clk[0]--;
ext_to_timespec64(clk, ts);
}
static u64 read_tod_clock(struct clocksource *cs)
{
unsigned long long now, adj;
preempt_disable(); /* protect from changes to steering parameters */
now = get_tod_clock();
adj = tod_steering_end - now;
if (unlikely((s64) adj >= 0))
/*
* manually steer by 1 cycle every 2^16 cycles. This
* corresponds to shifting the tod delta by 15. 1s is
* therefore steered in ~9h. The adjust will decrease
* over time, until it finally reaches 0.
*/
now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
preempt_enable();
return now;
}
static struct clocksource clocksource_tod = {
.name = "tod",
.rating = 400,
.read = read_tod_clock,
.mask = -1ULL,
.mult = 1000,
.shift = 12,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
struct clocksource * __init clocksource_default_clock(void)
{
return &clocksource_tod;
}
void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
if (tk->tkr_mono.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
}
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec =
(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
vdso_data->wtom_coarse_sec =
vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_coarse_nsec =
vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
vdso_data->wtom_coarse_sec++;
}
vdso_data->tk_mult = tk->tkr_mono.mult;
vdso_data->tk_shift = tk->tkr_mono.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
extern struct timezone sys_tz;
void update_vsyscall_tz(void)
{
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
}
/*
* Initialize the TOD clock and the CPU timer of
* the boot cpu.
*/
void __init time_init(void)
{
/* Reset time synchronization interfaces. */
stp_reset();
/* request the clock comparator external interrupt */
if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
panic("Couldn't request external interrupt 0x1004");
/* request the timing alert external interrupt */
if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
if (__clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
/* Enable TOD clock interrupts on the boot cpu. */
init_cpu_timer();
/* Enable cpu timer interrupts on the boot cpu. */
vtime_init();
}
static DEFINE_PER_CPU(atomic_t, clock_sync_word);
static DEFINE_MUTEX(clock_sync_mutex);
static unsigned long clock_sync_flags;
#define CLOCK_SYNC_HAS_STP 0
#define CLOCK_SYNC_STP 1
/*
* The get_clock function for the physical clock. It will get the current
* TOD clock, subtract the LPAR offset and write the result to *clock.
* The function returns 0 if the clock is in sync with the external time
* source. If the clock mode is local it will return -EOPNOTSUPP and
* -EAGAIN if the clock is not in sync with the external reference.
*/
int get_phys_clock(unsigned long *clock)
{
atomic_t *sw_ptr;
unsigned int sw0, sw1;
sw_ptr = &get_cpu_var(clock_sync_word);
sw0 = atomic_read(sw_ptr);
*clock = get_tod_clock() - lpar_offset;
sw1 = atomic_read(sw_ptr);
put_cpu_var(clock_sync_word);
if (sw0 == sw1 && (sw0 & 0x80000000U))
/* Success: time is in sync. */
return 0;
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
return -EACCES;
return -EAGAIN;
}
EXPORT_SYMBOL(get_phys_clock);
/*
* Make get_phys_clock() return -EAGAIN.
*/
static void disable_sync_clock(void *dummy)
{
s390: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to this_cpu_inc(y) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> CC: linux390@de.ibm.com Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-18 00:30:45 +07:00
atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
/*
* Clear the in-sync bit 2^31. All get_phys_clock calls will
* fail until the sync bit is turned back on. In addition
* increase the "sequence" counter to avoid the race of an
* stp event and the complete recovery against get_phys_clock.
*/
atomic_andnot(0x80000000, sw_ptr);
atomic_inc(sw_ptr);
}
/*
* Make get_phys_clock() return 0 again.
* Needs to be called from a context disabled for preemption.
*/
static void enable_sync_clock(void)
{
s390: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to this_cpu_inc(y) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> CC: linux390@de.ibm.com Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-18 00:30:45 +07:00
atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
atomic_or(0x80000000, sw_ptr);
}
/*
* Function to check if the clock is in sync.
*/
static inline int check_sync_clock(void)
{
atomic_t *sw_ptr;
int rc;
sw_ptr = &get_cpu_var(clock_sync_word);
rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
put_cpu_var(clock_sync_word);
return rc;
}
/*
* Apply clock delta to the global data structures.
* This is called once on the CPU that performed the clock sync.
*/
static void clock_sync_global(unsigned long long delta)
{
unsigned long now, adj;
struct ptff_qto qto;
/* Fixup the monotonic sched clock. */
*(unsigned long long *) &tod_clock_base[1] += delta;
if (*(unsigned long long *) &tod_clock_base[1] < delta)
/* Epoch overflow */
tod_clock_base[0]++;
/* Adjust TOD steering parameters. */
vdso_data->tb_update_count++;
now = get_tod_clock();
adj = tod_steering_end - now;
if (unlikely((s64) adj >= 0))
/* Calculate how much of the old adjustment is left. */
tod_steering_delta = (tod_steering_delta < 0) ?
-(adj >> 15) : (adj >> 15);
tod_steering_delta += delta;
if ((abs(tod_steering_delta) >> 48) != 0)
panic("TOD clock sync offset %lli is too large to drift\n",
tod_steering_delta);
tod_steering_end = now + (abs(tod_steering_delta) << 15);
vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1;
vdso_data->ts_end = tod_steering_end;
vdso_data->tb_update_count++;
/* Update LPAR offset. */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
lpar_offset = qto.tod_epoch_difference;
/* Call the TOD clock change notifier. */
atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta);
}
/*
* Apply clock delta to the per-CPU data structures of this CPU.
* This is called for each online CPU after the call to clock_sync_global.
*/
static void clock_sync_local(unsigned long long delta)
{
/* Add the delta to the clock comparator. */
if (S390_lowcore.clock_comparator != clock_comparator_max) {
S390_lowcore.clock_comparator += delta;
set_clock_comparator(S390_lowcore.clock_comparator);
}
/* Adjust the last_update_clock time-stamp. */
S390_lowcore.last_update_clock += delta;
}
/* Single threaded workqueue used for stp sync events */
static struct workqueue_struct *time_sync_wq;
static void __init time_init_wq(void)
{
if (time_sync_wq)
return;
time_sync_wq = create_singlethread_workqueue("timesync");
}
struct clock_sync_data {
atomic_t cpus;
int in_sync;
unsigned long long clock_delta;
};
/*
* Server Time Protocol (STP) code.
*/
static bool stp_online;
static struct stp_sstpi stp_info;
static void *stp_page;
static void stp_work_fn(struct work_struct *work);
static DEFINE_MUTEX(stp_work_mutex);
static DECLARE_WORK(stp_work, stp_work_fn);
static struct timer_list stp_timer;
static int __init early_parse_stp(char *p)
{
return kstrtobool(p, &stp_online);
}
early_param("stp", early_parse_stp);
/*
* Reset STP attachment.
*/
static void __init stp_reset(void)
{
int rc;
stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
pr_warn("The real or virtual hardware system does not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
stp_online = false;
}
}
static void stp_timeout(unsigned long dummy)
{
queue_work(time_sync_wq, &stp_work);
}
static int __init stp_init(void)
{
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return 0;
setup_timer(&stp_timer, stp_timeout, 0UL);
time_init_wq();
if (!stp_online)
return 0;
queue_work(time_sync_wq, &stp_work);
return 0;
}
arch_initcall(stp_init);
/*
* STP timing alert. There are three causes:
* 1) timing status change
* 2) link availability change
* 3) time control parameter change
* In all three cases we are only interested in the clock source state.
* If a STP clock source is now available use it.
*/
static void stp_timing_alert(struct stp_irq_parm *intparm)
{
if (intparm->tsc || intparm->lac || intparm->tcpc)
queue_work(time_sync_wq, &stp_work);
}
/*
* STP sync check machine check. This is called when the timing state
* changes from the synchronized state to the unsynchronized state.
* After a STP sync check the clock is not in sync. The machine check
* is broadcasted to all cpus at the same time.
*/
int stp_sync_check(void)
{
disable_sync_clock(NULL);
return 1;
}
/*
* STP island condition machine check. This is called when an attached
* server attempts to communicate over an STP link and the servers
* have matching CTN ids and have a valid stratum-1 configuration
* but the configurations do not match.
*/
int stp_island_check(void)
{
disable_sync_clock(NULL);
return 1;
}
void stp_queue_work(void)
{
queue_work(time_sync_wq, &stp_work);
}
static int stp_sync_clock(void *data)
{
struct clock_sync_data *sync = data;
s390/time: simplify stp time syncs The way we call do_adjtimex() today is broken. It has 0 effect, as ADJ_OFFSET_SINGLESHOT (0x0001) in the kernel maps to !ADJ_ADJTIME (in contrast to user space where it maps to ADJ_OFFSET_SINGLESHOT | ADJ_ADJTIME - 0x8001). !ADJ_ADJTIME will silently ignore all adjustments without STA_PLL being active. We could switch to ADJ_ADJTIME or turn STA_PLL on, but still we would run into some problems: - Even when switching to nanoseconds, we lose accuracy. - Successive calls to do_adjtimex() will simply overwrite any leftovers from the previous call (if not fully handled) - Anything that NTP does using the sysctl heavily interferes with our use. - !ADJ_ADJTIME will silently round stuff > or < than 0.5 seconds Reusing do_adjtimex() here just feels wrong. The whole STP synchronization works right now *somehow* only, as do_adjtimex() does nothing and our TOD clock jumps in time, although it shouldn't. This is especially bad as the clock could jump backwards in time. We will have to find another way to fix this up. As leap seconds are also not properly handled yet, let's just get rid of all this complex logic altogether and use the correct clock_delta for fixing up the clock comparator and keeping the sched_clock monotonic. This change should have 0 effect on the current STP mechanism. Once we know how to best handle sync events and leap second updates, we'll start with a fresh implementation. Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-07-14 18:09:57 +07:00
unsigned long long clock_delta;
static int first;
int rc;
enable_sync_clock();
if (xchg(&first, 1) == 0) {
/* Wait until all other cpus entered the sync function. */
while (atomic_read(&sync->cpus) != 0)
cpu_relax();
rc = 0;
if (stp_info.todoff[0] || stp_info.todoff[1] ||
stp_info.todoff[2] || stp_info.todoff[3] ||
stp_info.tmd != 2) {
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
&clock_delta);
if (rc == 0) {
sync->clock_delta = clock_delta;
clock_sync_global(clock_delta);
rc = chsc_sstpi(stp_page, &stp_info,
sizeof(struct stp_sstpi));
if (rc == 0 && stp_info.tmd != 2)
rc = -EAGAIN;
}
}
sync->in_sync = rc ? -EAGAIN : 1;
xchg(&first, 0);
} else {
/* Slave */
atomic_dec(&sync->cpus);
/* Wait for in_sync to be set. */
while (READ_ONCE(sync->in_sync) == 0)
__udelay(1);
}
if (sync->in_sync != 1)
/* Didn't work. Clear per-cpu in sync bit again. */
disable_sync_clock(NULL);
/* Apply clock delta to per-CPU fields of this CPU. */
clock_sync_local(sync->clock_delta);
return 0;
}
/*
* STP work. Check for the STP state and take over the clock
* synchronization if the STP clock source is usable.
*/
static void stp_work_fn(struct work_struct *work)
{
struct clock_sync_data stp_sync;
int rc;
/* prevent multiple execution. */
mutex_lock(&stp_work_mutex);
if (!stp_online) {
chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
del_timer_sync(&stp_timer);
goto out_unlock;
}
rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
if (rc)
goto out_unlock;
rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
if (rc || stp_info.c == 0)
goto out_unlock;
/* Skip synchronization if the clock is already in sync. */
if (check_sync_clock())
goto out_unlock;
memset(&stp_sync, 0, sizeof(stp_sync));
cpus_read_lock();
atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
cpus_read_unlock();
if (!check_sync_clock())
/*
* There is a usable clock but the synchonization failed.
* Retry after a second.
*/
mod_timer(&stp_timer, jiffies + HZ);
out_unlock:
mutex_unlock(&stp_work_mutex);
}
/*
* STP subsys sysfs interface functions
*/
static struct bus_type stp_subsys = {
.name = "stp",
.dev_name = "stp",
};
static ssize_t stp_ctn_id_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online)
return -ENODATA;
return sprintf(buf, "%016llx\n",
*(unsigned long long *) stp_info.ctnid);
}
static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
static ssize_t stp_ctn_type_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online)
return -ENODATA;
return sprintf(buf, "%i\n", stp_info.ctn);
}
static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
static ssize_t stp_dst_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online || !(stp_info.vbits & 0x2000))
return -ENODATA;
return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
}
static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
static ssize_t stp_leap_seconds_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online || !(stp_info.vbits & 0x8000))
return -ENODATA;
return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
}
static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
static ssize_t stp_stratum_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online)
return -ENODATA;
return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
}
static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
static ssize_t stp_time_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online || !(stp_info.vbits & 0x0800))
return -ENODATA;
return sprintf(buf, "%i\n", (int) stp_info.tto);
}
static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
static ssize_t stp_time_zone_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online || !(stp_info.vbits & 0x4000))
return -ENODATA;
return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
}
static DEVICE_ATTR(time_zone_offset, 0400,
stp_time_zone_offset_show, NULL);
static ssize_t stp_timing_mode_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online)
return -ENODATA;
return sprintf(buf, "%i\n", stp_info.tmd);
}
static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
static ssize_t stp_timing_state_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
if (!stp_online)
return -ENODATA;
return sprintf(buf, "%i\n", stp_info.tst);
}
static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
static ssize_t stp_online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%i\n", stp_online);
}
static ssize_t stp_online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
unsigned int value;
value = simple_strtoul(buf, NULL, 0);
if (value != 0 && value != 1)
return -EINVAL;
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
mutex_lock(&clock_sync_mutex);
stp_online = value;
if (stp_online)
set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
else
clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
queue_work(time_sync_wq, &stp_work);
mutex_unlock(&clock_sync_mutex);
return count;
}
/*
* Can't use DEVICE_ATTR because the attribute should be named
* stp/online but dev_attr_online already exists in this file ..
*/
static struct device_attribute dev_attr_stp_online = {
.attr = { .name = "online", .mode = 0600 },
.show = stp_online_show,
.store = stp_online_store,
};
static struct device_attribute *stp_attributes[] = {
&dev_attr_ctn_id,
&dev_attr_ctn_type,
&dev_attr_dst_offset,
&dev_attr_leap_seconds,
&dev_attr_stp_online,
&dev_attr_stratum,
&dev_attr_time_offset,
&dev_attr_time_zone_offset,
&dev_attr_timing_mode,
&dev_attr_timing_state,
NULL
};
static int __init stp_init_sysfs(void)
{
struct device_attribute **attr;
int rc;
rc = subsys_system_register(&stp_subsys, NULL);
if (rc)
goto out;
for (attr = stp_attributes; *attr; attr++) {
rc = device_create_file(stp_subsys.dev_root, *attr);
if (rc)
goto out_unreg;
}
return 0;
out_unreg:
for (; attr >= stp_attributes; attr--)
device_remove_file(stp_subsys.dev_root, *attr);
bus_unregister(&stp_subsys);
out:
return rc;
}
device_initcall(stp_init_sysfs);