2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* lppaca.h
|
|
|
|
* Copyright (C) 2001 Mike Corrigan IBM Corporation
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
2005-11-09 09:38:01 +07:00
|
|
|
#ifndef _ASM_POWERPC_LPPACA_H
|
|
|
|
#define _ASM_POWERPC_LPPACA_H
|
2005-12-17 04:43:46 +07:00
|
|
|
#ifdef __KERNEL__
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2012-04-10 23:22:53 +07:00
|
|
|
/*
|
|
|
|
* These definitions relate to hypervisors that only exist when using
|
2009-06-03 04:17:45 +07:00
|
|
|
* a server type processor
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S
|
|
|
|
|
2012-04-10 23:22:53 +07:00
|
|
|
/*
|
|
|
|
* This control block contains the data that is shared between the
|
|
|
|
* hypervisor and the OS.
|
|
|
|
*/
|
2006-08-07 13:19:19 +07:00
|
|
|
#include <linux/cache.h>
|
2011-03-07 01:02:31 +07:00
|
|
|
#include <linux/threads.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/types.h>
|
2006-08-07 13:19:19 +07:00
|
|
|
#include <asm/mmu.h>
|
2018-04-02 14:33:37 +07:00
|
|
|
#include <asm/firmware.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2011-03-07 01:02:31 +07:00
|
|
|
/*
|
2018-02-13 22:08:13 +07:00
|
|
|
* The lppaca is the "virtual processor area" registered with the hypervisor,
|
|
|
|
* H_REGISTER_VPA etc.
|
|
|
|
*
|
|
|
|
* According to PAPR, the structure is 640 bytes long, must be L1 cache line
|
|
|
|
* aligned, and must not cross a 4kB boundary. Its size field must be at
|
|
|
|
* least 640 bytes (but may be more).
|
|
|
|
*
|
|
|
|
* Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
|
|
|
|
* 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
|
|
|
|
* this structure as the canonical 640 byte size.
|
2012-04-10 23:22:53 +07:00
|
|
|
*/
|
2006-01-18 08:00:05 +07:00
|
|
|
struct lppaca {
|
2012-04-10 23:22:53 +07:00
|
|
|
/* cacheline 1 contains read-only data */
|
|
|
|
|
2013-08-06 23:01:45 +07:00
|
|
|
__be32 desc; /* Eye catcher 0xD397D781 */
|
|
|
|
__be16 size; /* Size of this struct */
|
2013-08-06 23:01:26 +07:00
|
|
|
u8 reserved1[3];
|
|
|
|
u8 __old_status; /* Old status, including shared proc */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 reserved3[14];
|
2013-08-06 23:01:45 +07:00
|
|
|
volatile __be32 dyn_hw_node_id; /* Dynamic hardware node id */
|
|
|
|
volatile __be32 dyn_hw_proc_id; /* Dynamic hardware proc id */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 reserved4[56];
|
|
|
|
volatile u8 vphn_assoc_counts[8]; /* Virtual processor home node */
|
|
|
|
/* associativity change counters */
|
|
|
|
u8 reserved5[32];
|
|
|
|
|
|
|
|
/* cacheline 2 contains local read-write data */
|
|
|
|
|
|
|
|
u8 reserved6[48];
|
|
|
|
u8 cede_latency_hint;
|
2013-06-28 15:15:18 +07:00
|
|
|
u8 ebb_regs_in_use;
|
|
|
|
u8 reserved7[6];
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 dtl_enable_mask; /* Dispatch Trace Log mask */
|
|
|
|
u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */
|
|
|
|
u8 fpregs_in_use;
|
|
|
|
u8 pmcregs_in_use;
|
|
|
|
u8 reserved8[28];
|
2013-08-06 23:01:45 +07:00
|
|
|
__be64 wait_state_cycles; /* Wait cycles for this proc */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 reserved9[28];
|
2013-08-06 23:01:45 +07:00
|
|
|
__be16 slb_count; /* # of SLBs to maintain */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 idle; /* Indicate OS is idle */
|
|
|
|
u8 vmxregs_in_use;
|
|
|
|
|
|
|
|
/* cacheline 3 is shared with other processors */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the yield_count. An "odd" value (low bit on) means that
|
|
|
|
* the processor is yielded (either because of an OS yield or a
|
|
|
|
* hypervisor preempt). An even value implies that the processor is
|
|
|
|
* currently executing.
|
2013-10-20 06:26:20 +07:00
|
|
|
* NOTE: Even dedicated processor partitions can yield so this
|
|
|
|
* field cannot be used to determine if we are shared or dedicated.
|
2012-04-10 23:22:53 +07:00
|
|
|
*/
|
2013-08-06 23:01:45 +07:00
|
|
|
volatile __be32 yield_count;
|
|
|
|
volatile __be32 dispersion_count; /* dispatch changed physical cpu */
|
|
|
|
volatile __be64 cmo_faults; /* CMO page fault count */
|
|
|
|
volatile __be64 cmo_fault_time; /* CMO page fault time */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 reserved10[104];
|
|
|
|
|
|
|
|
/* cacheline 4-5 */
|
|
|
|
|
2013-08-06 23:01:45 +07:00
|
|
|
__be32 page_ins; /* CMO Hint - # page ins by OS */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 reserved11[148];
|
2018-02-13 22:08:13 +07:00
|
|
|
volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
|
2012-04-10 23:22:53 +07:00
|
|
|
u8 reserved12[96];
|
2018-02-13 22:08:13 +07:00
|
|
|
} ____cacheline_aligned;
|
2006-01-13 06:26:42 +07:00
|
|
|
|
2018-02-13 22:08:12 +07:00
|
|
|
#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
|
2010-08-13 03:18:15 +07:00
|
|
|
|
2013-08-06 23:01:26 +07:00
|
|
|
/*
|
2013-10-20 06:26:20 +07:00
|
|
|
* We are using a non architected field to determine if a partition is
|
|
|
|
* shared or dedicated. This currently works on both KVM and PHYP, but
|
|
|
|
* we will have to transition to something better.
|
2013-08-06 23:01:26 +07:00
|
|
|
*/
|
|
|
|
#define LPPACA_OLD_SHARED_PROC 2
|
|
|
|
|
|
|
|
static inline bool lppaca_shared_proc(struct lppaca *l)
|
|
|
|
{
|
2018-04-02 14:33:37 +07:00
|
|
|
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
|
|
|
|
return false;
|
2013-10-20 06:26:20 +07:00
|
|
|
return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
|
2013-08-06 23:01:26 +07:00
|
|
|
}
|
|
|
|
|
2006-08-07 13:19:19 +07:00
|
|
|
/*
|
|
|
|
* SLB shadow buffer structure as defined in the PAPR. The save_area
|
|
|
|
* contains adjacent ESID and VSID pairs for each shadowed SLB. The
|
|
|
|
* ESID is stored in the lower 64bits, then the VSID.
|
|
|
|
*/
|
|
|
|
struct slb_shadow {
|
2013-08-06 23:01:45 +07:00
|
|
|
__be32 persistent; /* Number of persistent SLBs */
|
|
|
|
__be32 buffer_length; /* Total shadow buffer length */
|
|
|
|
__be64 reserved;
|
2006-08-07 13:19:19 +07:00
|
|
|
struct {
|
2013-08-06 23:01:45 +07:00
|
|
|
__be64 esid;
|
|
|
|
__be64 vsid;
|
2012-04-10 23:22:53 +07:00
|
|
|
} save_area[SLB_NUM_BOLTED];
|
2006-08-07 13:19:19 +07:00
|
|
|
} ____cacheline_aligned;
|
|
|
|
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
/*
|
|
|
|
* Layout of entries in the hypervisor's dispatch trace log buffer.
|
|
|
|
*/
|
|
|
|
struct dtl_entry {
|
|
|
|
u8 dispatch_reason;
|
|
|
|
u8 preempt_reason;
|
2013-08-06 23:01:45 +07:00
|
|
|
__be16 processor_id;
|
|
|
|
__be32 enqueue_to_dispatch_time;
|
|
|
|
__be32 ready_to_enqueue_time;
|
|
|
|
__be32 waiting_to_ready_time;
|
|
|
|
__be64 timebase;
|
|
|
|
__be64 fault_addr;
|
|
|
|
__be64 srr0;
|
|
|
|
__be64 srr1;
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
|
|
|
|
#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
|
|
|
|
|
2011-05-04 19:54:16 +07:00
|
|
|
extern struct kmem_cache *dtl_cache;
|
|
|
|
|
2010-08-31 08:59:53 +07:00
|
|
|
/*
|
2012-07-25 12:56:04 +07:00
|
|
|
* When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
|
2010-08-31 08:59:53 +07:00
|
|
|
* reading from the dispatch trace log. If other code wants to consume
|
|
|
|
* DTL entries, it can set this pointer to a function that will get
|
|
|
|
* called once for each DTL entry that gets processed.
|
|
|
|
*/
|
|
|
|
extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
|
|
|
|
|
2009-06-03 04:17:45 +07:00
|
|
|
#endif /* CONFIG_PPC_BOOK3S */
|
2005-12-17 04:43:46 +07:00
|
|
|
#endif /* __KERNEL__ */
|
2005-11-09 09:38:01 +07:00
|
|
|
#endif /* _ASM_POWERPC_LPPACA_H */
|