2005-04-17 05:20:36 +07:00
|
|
|
/*
|
2005-10-26 14:05:24 +07:00
|
|
|
* 64-bit pSeries and RS/6000 setup code.
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
|
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
|
|
* Adapted from 'alpha' version by Gary Thomas
|
|
|
|
* Modified by Cort Dougan (cort@cs.nmt.edu)
|
|
|
|
* Modified by PPC64 Team, IBM Corp
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bootup setup stuff..
|
|
|
|
*/
|
|
|
|
|
2005-07-08 07:56:30 +07:00
|
|
|
#include <linux/cpu.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
#include <linux/user.h>
|
|
|
|
#include <linux/tty.h>
|
|
|
|
#include <linux/major.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/reboot.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/ioport.h>
|
|
|
|
#include <linux/console.h>
|
|
|
|
#include <linux/pci.h>
|
2005-07-11 02:35:15 +07:00
|
|
|
#include <linux/utsname.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/adb.h>
|
2011-07-23 05:24:23 +07:00
|
|
|
#include <linux/export.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/irq.h>
|
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/root_dev.h>
|
2012-10-02 23:57:57 +07:00
|
|
|
#include <linux/of.h>
|
2015-09-04 23:50:10 +07:00
|
|
|
#include <linux/of_pci.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/io.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/prom.h>
|
|
|
|
#include <asm/rtas.h>
|
|
|
|
#include <asm/pci-bridge.h>
|
|
|
|
#include <asm/iommu.h>
|
|
|
|
#include <asm/dma.h>
|
|
|
|
#include <asm/machdep.h>
|
|
|
|
#include <asm/irq.h>
|
|
|
|
#include <asm/time.h>
|
|
|
|
#include <asm/nvram.h>
|
2005-08-09 08:13:36 +07:00
|
|
|
#include <asm/pmc.h>
|
2011-04-04 10:46:58 +07:00
|
|
|
#include <asm/xics.h>
|
2005-09-27 23:50:25 +07:00
|
|
|
#include <asm/ppc-pci.h>
|
2005-10-10 19:03:41 +07:00
|
|
|
#include <asm/i8259.h>
|
|
|
|
#include <asm/udbg.h>
|
2005-11-07 09:18:13 +07:00
|
|
|
#include <asm/smp.h>
|
2007-02-08 14:33:51 +07:00
|
|
|
#include <asm/firmware.h>
|
2007-03-04 13:04:44 +07:00
|
|
|
#include <asm/eeh.h>
|
2012-12-20 21:06:45 +07:00
|
|
|
#include <asm/reg.h>
|
2013-08-22 16:53:52 +07:00
|
|
|
#include <asm/plpar_wrappers.h>
|
2016-08-24 05:57:39 +07:00
|
|
|
#include <asm/kexec.h>
|
2017-01-30 14:11:55 +07:00
|
|
|
#include <asm/isa-bridge.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-02-08 14:33:51 +07:00
|
|
|
#include "pseries.h"
|
2005-11-03 11:33:31 +07:00
|
|
|
|
2008-08-16 02:07:31 +07:00
|
|
|
int CMO_PrPSP = -1;
|
|
|
|
int CMO_SecPSP = -1;
|
2013-12-09 14:17:01 +07:00
|
|
|
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
|
2008-08-21 08:16:26 +07:00
|
|
|
EXPORT_SYMBOL(CMO_PageSize);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
int fwnmi_active; /* TRUE if an FWNMI handler is present */
|
|
|
|
|
2006-01-11 07:00:02 +07:00
|
|
|
static void pSeries_show_cpuinfo(struct seq_file *m)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
struct device_node *root;
|
|
|
|
const char *model = "";
|
|
|
|
|
|
|
|
root = of_find_node_by_path("/");
|
|
|
|
if (root)
|
2007-04-03 19:26:41 +07:00
|
|
|
model = of_get_property(root, "model", NULL);
|
2005-04-17 05:20:36 +07:00
|
|
|
seq_printf(m, "machine\t\t: CHRP %s\n", model);
|
|
|
|
of_node_put(root);
|
2017-03-22 00:29:55 +07:00
|
|
|
if (radix_enabled())
|
|
|
|
seq_printf(m, "MMU\t\t: Radix\n");
|
|
|
|
else
|
|
|
|
seq_printf(m, "MMU\t\t: Hash\n");
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize firmware assisted non-maskable interrupts if
|
|
|
|
* the firmware supports this feature.
|
|
|
|
*/
|
|
|
|
static void __init fwnmi_init(void)
|
|
|
|
{
|
2005-12-04 14:39:33 +07:00
|
|
|
unsigned long system_reset_addr, machine_check_addr;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
int ibm_nmi_register = rtas_token("ibm,nmi-register");
|
|
|
|
if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
|
|
|
|
return;
|
2005-12-04 14:39:33 +07:00
|
|
|
|
|
|
|
/* If the kernel's not linked at zero we point the firmware at low
|
|
|
|
* addresses anyway, and use a trampoline to get to the real code. */
|
|
|
|
system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START;
|
|
|
|
machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
|
|
|
|
|
|
|
|
if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
|
|
|
|
machine_check_addr))
|
2005-04-17 05:20:36 +07:00
|
|
|
fwnmi_active = 1;
|
|
|
|
}
|
|
|
|
|
2015-09-14 15:42:37 +07:00
|
|
|
static void pseries_8259_cascade(struct irq_desc *desc)
|
2006-07-03 16:32:51 +07:00
|
|
|
{
|
2011-03-25 22:45:20 +07:00
|
|
|
struct irq_chip *chip = irq_desc_get_chip(desc);
|
2006-10-07 19:08:26 +07:00
|
|
|
unsigned int cascade_irq = i8259_irq();
|
2011-03-07 20:59:45 +07:00
|
|
|
|
2016-09-06 18:53:24 +07:00
|
|
|
if (cascade_irq)
|
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 20:55:46 +07:00
|
|
|
generic_handle_irq(cascade_irq);
|
2011-03-07 20:59:45 +07:00
|
|
|
|
|
|
|
chip->irq_eoi(&desc->irq_data);
|
2006-07-03 16:32:51 +07:00
|
|
|
}
|
|
|
|
|
2008-04-01 13:42:25 +07:00
|
|
|
static void __init pseries_setup_i8259_cascade(void)
|
2008-04-01 13:42:25 +07:00
|
|
|
{
|
|
|
|
struct device_node *np, *old, *found = NULL;
|
2008-04-01 13:42:25 +07:00
|
|
|
unsigned int cascade;
|
2008-04-01 13:42:25 +07:00
|
|
|
const u32 *addrp;
|
|
|
|
unsigned long intack = 0;
|
2008-04-01 13:42:25 +07:00
|
|
|
int naddr;
|
2008-04-01 13:42:25 +07:00
|
|
|
|
2008-04-01 13:42:25 +07:00
|
|
|
for_each_node_by_type(np, "interrupt-controller") {
|
2008-04-01 13:42:25 +07:00
|
|
|
if (of_device_is_compatible(np, "chrp,iic")) {
|
|
|
|
found = np;
|
|
|
|
break;
|
|
|
|
}
|
2008-04-01 13:42:25 +07:00
|
|
|
}
|
|
|
|
|
2008-04-01 13:42:25 +07:00
|
|
|
if (found == NULL) {
|
2008-04-01 13:42:25 +07:00
|
|
|
printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
|
2008-04-01 13:42:25 +07:00
|
|
|
return;
|
|
|
|
}
|
2008-04-01 13:42:25 +07:00
|
|
|
|
2008-04-01 13:42:25 +07:00
|
|
|
cascade = irq_of_parse_and_map(found, 0);
|
2016-09-06 18:53:24 +07:00
|
|
|
if (!cascade) {
|
2008-04-01 13:42:25 +07:00
|
|
|
printk(KERN_ERR "pic: failed to map cascade interrupt");
|
2008-04-01 13:42:25 +07:00
|
|
|
return;
|
|
|
|
}
|
2008-04-01 13:42:25 +07:00
|
|
|
pr_debug("pic: cascade mapped to irq %d\n", cascade);
|
2008-04-01 13:42:25 +07:00
|
|
|
|
|
|
|
for (old = of_node_get(found); old != NULL ; old = np) {
|
|
|
|
np = of_get_parent(old);
|
|
|
|
of_node_put(old);
|
|
|
|
if (np == NULL)
|
|
|
|
break;
|
|
|
|
if (strcmp(np->name, "pci") != 0)
|
|
|
|
continue;
|
|
|
|
addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
|
|
|
|
if (addrp == NULL)
|
|
|
|
continue;
|
|
|
|
naddr = of_n_addr_cells(np);
|
|
|
|
intack = addrp[naddr-1];
|
|
|
|
if (naddr > 1)
|
|
|
|
intack |= ((unsigned long)addrp[naddr-2]) << 32;
|
|
|
|
}
|
|
|
|
if (intack)
|
2008-04-01 13:42:25 +07:00
|
|
|
printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
|
2008-04-01 13:42:25 +07:00
|
|
|
i8259_init(found, intack);
|
|
|
|
of_node_put(found);
|
2011-03-25 22:45:20 +07:00
|
|
|
irq_set_chained_handler(cascade, pseries_8259_cascade);
|
2008-04-01 13:42:25 +07:00
|
|
|
}
|
|
|
|
|
2016-05-30 13:18:11 +07:00
|
|
|
static void __init pseries_init_irq(void)
|
2008-04-01 13:42:25 +07:00
|
|
|
{
|
2011-04-04 10:46:58 +07:00
|
|
|
xics_init();
|
2008-04-01 13:42:25 +07:00
|
|
|
pseries_setup_i8259_cascade();
|
2008-04-01 13:42:25 +07:00
|
|
|
}
|
|
|
|
|
2005-08-09 08:13:36 +07:00
|
|
|
static void pseries_lpar_enable_pmcs(void)
|
|
|
|
{
|
|
|
|
unsigned long set, reset;
|
|
|
|
|
|
|
|
set = 1UL << 63;
|
|
|
|
reset = 0;
|
|
|
|
plpar_hcall_norets(H_PERFMON, set, reset);
|
|
|
|
}
|
|
|
|
|
2014-11-25 00:58:01 +07:00
|
|
|
static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
|
2009-04-30 16:26:21 +07:00
|
|
|
{
|
2014-11-25 00:58:01 +07:00
|
|
|
struct of_reconfig_data *rd = data;
|
2015-08-27 11:12:37 +07:00
|
|
|
struct device_node *parent, *np = rd->dn;
|
|
|
|
struct pci_dn *pdn;
|
2009-04-30 16:26:21 +07:00
|
|
|
int err = NOTIFY_OK;
|
|
|
|
|
|
|
|
switch (action) {
|
2012-10-02 23:57:57 +07:00
|
|
|
case OF_RECONFIG_ATTACH_NODE:
|
2015-08-27 11:12:37 +07:00
|
|
|
parent = of_get_parent(np);
|
|
|
|
pdn = parent ? PCI_DN(parent) : NULL;
|
2016-05-20 13:41:37 +07:00
|
|
|
if (pdn)
|
2016-05-03 12:41:40 +07:00
|
|
|
pci_add_device_node_info(pdn->phb, np);
|
2015-08-27 11:12:37 +07:00
|
|
|
|
|
|
|
of_node_put(parent);
|
2009-04-30 16:26:21 +07:00
|
|
|
break;
|
2015-08-27 11:12:36 +07:00
|
|
|
case OF_RECONFIG_DETACH_NODE:
|
2015-08-27 11:12:37 +07:00
|
|
|
pdn = PCI_DN(np);
|
|
|
|
if (pdn)
|
|
|
|
list_del(&pdn->list);
|
2015-08-27 11:12:36 +07:00
|
|
|
break;
|
2009-04-30 16:26:21 +07:00
|
|
|
default:
|
|
|
|
err = NOTIFY_DONE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block pci_dn_reconfig_nb = {
|
|
|
|
.notifier_call = pci_dn_reconfig_notifier,
|
|
|
|
};
|
|
|
|
|
2011-05-04 19:54:16 +07:00
|
|
|
struct kmem_cache *dtl_cache;
|
|
|
|
|
2012-07-25 12:56:04 +07:00
|
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
/*
|
|
|
|
* Allocate space for the dispatch trace log for all possible cpus
|
|
|
|
* and register the buffers with the hypervisor. This is used for
|
|
|
|
* computing time stolen by the hypervisor.
|
|
|
|
*/
|
|
|
|
static int alloc_dispatch_logs(void)
|
|
|
|
{
|
|
|
|
int cpu, ret;
|
|
|
|
struct paca_struct *pp;
|
|
|
|
struct dtl_entry *dtl;
|
|
|
|
|
|
|
|
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
|
|
|
|
return 0;
|
|
|
|
|
2011-05-04 19:54:16 +07:00
|
|
|
if (!dtl_cache)
|
2011-04-14 02:45:59 +07:00
|
|
|
return 0;
|
|
|
|
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
pp = &paca[cpu];
|
2011-04-14 02:45:59 +07:00
|
|
|
dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
if (!dtl) {
|
|
|
|
pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
|
|
|
|
cpu);
|
|
|
|
pr_warn("Stolen time statistics will be unreliable\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
pp->dtl_ridx = 0;
|
|
|
|
pp->dispatch_log = dtl;
|
|
|
|
pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
|
|
|
|
pp->dtl_curr = dtl;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Register the DTL for the current (boot) cpu */
|
|
|
|
dtl = get_paca()->dispatch_log;
|
|
|
|
get_paca()->dtl_ridx = 0;
|
|
|
|
get_paca()->dtl_curr = dtl;
|
|
|
|
get_paca()->lppaca_ptr->dtl_idx = 0;
|
|
|
|
|
|
|
|
/* hypervisor reads buffer length from this field */
|
2013-08-06 23:01:46 +07:00
|
|
|
dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
|
|
|
|
if (ret)
|
2011-07-25 08:46:33 +07:00
|
|
|
pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
|
|
|
|
"with %d\n", smp_processor_id(),
|
|
|
|
hard_smp_processor_id(), ret);
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
get_paca()->lppaca_ptr->dtl_enable_mask = 2;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2012-07-25 12:56:04 +07:00
|
|
|
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
2011-05-04 19:54:16 +07:00
|
|
|
static inline int alloc_dispatch_logs(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2012-07-25 12:56:04 +07:00
|
|
|
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 02:56:43 +07:00
|
|
|
|
2011-05-04 19:54:16 +07:00
|
|
|
static int alloc_dispatch_log_kmem_cache(void)
|
|
|
|
{
|
|
|
|
dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
|
|
|
|
DISPATCH_LOG_BYTES, 0, NULL);
|
|
|
|
if (!dtl_cache) {
|
|
|
|
pr_warn("Failed to create dispatch trace log buffer cache\n");
|
|
|
|
pr_warn("Stolen time statistics will be unreliable\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return alloc_dispatch_logs();
|
|
|
|
}
|
2014-07-16 09:02:43 +07:00
|
|
|
machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
|
2011-05-04 19:54:16 +07:00
|
|
|
|
2013-09-06 01:55:06 +07:00
|
|
|
static void pseries_lpar_idle(void)
|
2011-11-30 09:46:55 +07:00
|
|
|
{
|
2014-01-30 00:45:10 +07:00
|
|
|
/*
|
|
|
|
* Default handler to go into low thread priority and possibly
|
2016-06-01 13:34:37 +07:00
|
|
|
* low power mode by ceding processor to hypervisor
|
2011-11-30 09:46:55 +07:00
|
|
|
*/
|
2014-01-30 00:45:10 +07:00
|
|
|
|
|
|
|
/* Indicate to hypervisor that we are idle. */
|
|
|
|
get_lppaca()->idle = 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Yield the processor to the hypervisor. We return if
|
|
|
|
* an external interrupt occurs (which are driven prior
|
|
|
|
* to returning here) or if a prod occurs from another
|
|
|
|
* processor. When returning here, external interrupts
|
|
|
|
* are enabled.
|
|
|
|
*/
|
|
|
|
cede_processor();
|
|
|
|
|
|
|
|
get_lppaca()->idle = 0;
|
2011-11-30 09:46:55 +07:00
|
|
|
}
|
|
|
|
|
2012-11-08 12:03:14 +07:00
|
|
|
/*
|
|
|
|
* Enable relocation on during exceptions. This has partition wide scope and
|
|
|
|
* may take a while to complete, if it takes longer than one second we will
|
|
|
|
* just give up rather than wasting any more time on this - if that turns out
|
|
|
|
* to ever be a problem in practice we can move this into a kernel thread to
|
|
|
|
* finish off the process later in boot.
|
|
|
|
*/
|
2016-07-05 12:03:49 +07:00
|
|
|
void pseries_enable_reloc_on_exc(void)
|
2012-11-08 12:03:14 +07:00
|
|
|
{
|
|
|
|
long rc;
|
|
|
|
unsigned int delay, total_delay = 0;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
rc = enable_reloc_on_exceptions();
|
2016-07-05 12:03:49 +07:00
|
|
|
if (!H_IS_LONG_BUSY(rc)) {
|
|
|
|
if (rc == H_P2) {
|
|
|
|
pr_info("Relocation on exceptions not"
|
|
|
|
" supported\n");
|
|
|
|
} else if (rc != H_SUCCESS) {
|
|
|
|
pr_warn("Unable to enable relocation"
|
|
|
|
" on exceptions: %ld\n", rc);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2012-11-08 12:03:14 +07:00
|
|
|
|
|
|
|
delay = get_longbusy_msecs(rc);
|
|
|
|
total_delay += delay;
|
|
|
|
if (total_delay > 1000) {
|
|
|
|
pr_warn("Warning: Giving up waiting to enable "
|
|
|
|
"relocation on exceptions (%u msec)!\n",
|
|
|
|
total_delay);
|
2016-07-05 12:03:49 +07:00
|
|
|
return;
|
2012-11-08 12:03:14 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
mdelay(delay);
|
|
|
|
}
|
|
|
|
}
|
2016-07-05 12:03:49 +07:00
|
|
|
EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
|
2012-11-08 12:03:14 +07:00
|
|
|
|
2016-07-05 12:03:49 +07:00
|
|
|
void pseries_disable_reloc_on_exc(void)
|
2012-11-08 12:40:28 +07:00
|
|
|
{
|
|
|
|
long rc;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
rc = disable_reloc_on_exceptions();
|
|
|
|
if (!H_IS_LONG_BUSY(rc))
|
2016-07-05 12:03:49 +07:00
|
|
|
break;
|
2012-11-08 12:40:28 +07:00
|
|
|
mdelay(get_longbusy_msecs(rc));
|
|
|
|
}
|
2016-07-05 12:03:49 +07:00
|
|
|
if (rc != H_SUCCESS)
|
|
|
|
pr_warning("Warning: Failed to disable relocation on "
|
|
|
|
"exceptions: %ld\n", rc);
|
2012-11-08 12:40:28 +07:00
|
|
|
}
|
2016-07-05 12:03:49 +07:00
|
|
|
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
|
2012-11-08 12:40:28 +07:00
|
|
|
|
2016-11-29 19:45:50 +07:00
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
2012-11-08 12:40:28 +07:00
|
|
|
static void pSeries_machine_kexec(struct kimage *image)
|
|
|
|
{
|
2016-07-05 12:03:49 +07:00
|
|
|
if (firmware_has_feature(FW_FEATURE_SET_MODE))
|
|
|
|
pseries_disable_reloc_on_exc();
|
2012-11-08 12:40:28 +07:00
|
|
|
|
|
|
|
default_machine_kexec(image);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-11-20 18:14:59 +07:00
|
|
|
#ifdef __LITTLE_ENDIAN__
|
2016-07-05 12:03:49 +07:00
|
|
|
void pseries_big_endian_exceptions(void)
|
2013-11-20 18:14:59 +07:00
|
|
|
{
|
|
|
|
long rc;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
rc = enable_big_endian_exceptions();
|
|
|
|
if (!H_IS_LONG_BUSY(rc))
|
2016-07-05 12:03:49 +07:00
|
|
|
break;
|
2013-11-20 18:14:59 +07:00
|
|
|
mdelay(get_longbusy_msecs(rc));
|
|
|
|
}
|
2016-07-05 12:03:49 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* At this point it is unlikely panic() will get anything
|
|
|
|
* out to the user, since this is called very late in kexec
|
|
|
|
* but at least this will stop us from continuing on further
|
|
|
|
* and creating an even more difficult to debug situation.
|
|
|
|
*
|
|
|
|
* There is a known problem when kdump'ing, if cpus are offline
|
|
|
|
* the above call will fail. Rather than panicking again, keep
|
|
|
|
* going and hope the kdump kernel is also little endian, which
|
|
|
|
* it usually is.
|
|
|
|
*/
|
|
|
|
if (rc && !kdump_in_progress())
|
|
|
|
panic("Could not enable big endian exceptions");
|
2013-11-20 18:14:59 +07:00
|
|
|
}
|
|
|
|
|
2016-07-05 12:03:49 +07:00
|
|
|
void pseries_little_endian_exceptions(void)
|
2013-11-20 18:14:59 +07:00
|
|
|
{
|
|
|
|
long rc;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
rc = enable_little_endian_exceptions();
|
|
|
|
if (!H_IS_LONG_BUSY(rc))
|
2016-07-05 12:03:49 +07:00
|
|
|
break;
|
2013-11-20 18:14:59 +07:00
|
|
|
mdelay(get_longbusy_msecs(rc));
|
|
|
|
}
|
2016-07-05 12:03:49 +07:00
|
|
|
if (rc) {
|
|
|
|
ppc_md.progress("H_SET_MODE LE exception fail", 0);
|
|
|
|
panic("Could not enable little endian exceptions");
|
|
|
|
}
|
2013-11-20 18:14:59 +07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-03-31 12:00:39 +07:00
|
|
|
static void __init find_and_init_phbs(void)
|
|
|
|
{
|
|
|
|
struct device_node *node;
|
|
|
|
struct pci_controller *phb;
|
|
|
|
struct device_node *root = of_find_node_by_path("/");
|
|
|
|
|
|
|
|
for_each_child_of_node(root, node) {
|
|
|
|
if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
|
|
|
|
strcmp(node->type, "pciex") != 0))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
phb = pcibios_alloc_controller(node);
|
|
|
|
if (!phb)
|
|
|
|
continue;
|
|
|
|
rtas_setup_phb(phb);
|
|
|
|
pci_process_bridge_OF_ranges(phb, node, 0);
|
|
|
|
isa_bridge_find_early(phb);
|
2015-03-31 12:00:50 +07:00
|
|
|
phb->controller_ops = pseries_pci_controller_ops;
|
2015-03-31 12:00:39 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
of_node_put(root);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
|
|
|
|
* in chosen.
|
|
|
|
*/
|
2015-09-04 23:50:10 +07:00
|
|
|
of_pci_check_probe_only();
|
2015-03-31 12:00:39 +07:00
|
|
|
}
|
|
|
|
|
2006-07-03 18:36:01 +07:00
|
|
|
static void __init pSeries_setup_arch(void)
|
|
|
|
{
|
2013-11-26 06:23:11 +07:00
|
|
|
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
|
2011-11-30 07:23:14 +07:00
|
|
|
|
2006-07-03 18:36:01 +07:00
|
|
|
/* Discover PIC type and setup ppc_md accordingly */
|
2016-05-30 13:18:12 +07:00
|
|
|
smp_init_pseries();
|
2016-05-30 13:18:11 +07:00
|
|
|
|
2006-07-03 18:36:01 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* openpic global configuration register (64-bit format). */
|
|
|
|
/* openpic Interrupt Source Unit pointer (64-bit format). */
|
|
|
|
/* python0 facility area (mmio) (64-bit format) REAL address. */
|
|
|
|
|
|
|
|
/* init to some ~sane value until calibrate_delay() runs */
|
|
|
|
loops_per_jiffy = 50000000;
|
|
|
|
|
|
|
|
fwnmi_init();
|
|
|
|
|
2016-02-25 01:51:11 +07:00
|
|
|
/* By default, only probe PCI (can be overridden by rtas_pci) */
|
2012-02-24 10:18:58 +07:00
|
|
|
pci_add_flags(PCI_PROBE_ONLY);
|
2012-02-24 10:18:58 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Find and initialize PCI host bridges */
|
|
|
|
init_pci_config_tokens();
|
|
|
|
find_and_init_phbs();
|
2012-10-02 23:57:57 +07:00
|
|
|
of_reconfig_notifier_register(&pci_dn_reconfig_nb);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
pSeries_nvram_init();
|
|
|
|
|
2013-09-06 01:55:06 +07:00
|
|
|
if (firmware_has_feature(FW_FEATURE_LPAR)) {
|
2005-08-03 11:40:16 +07:00
|
|
|
vpa_init(boot_cpuid);
|
2013-09-06 01:55:06 +07:00
|
|
|
ppc_md.power_save = pseries_lpar_idle;
|
2005-08-09 08:13:36 +07:00
|
|
|
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
|
2013-09-06 01:55:06 +07:00
|
|
|
} else {
|
|
|
|
/* No special idle routine */
|
2005-08-09 08:13:36 +07:00
|
|
|
ppc_md.enable_pmcs = power4_enable_pmcs;
|
2013-09-06 01:55:06 +07:00
|
|
|
}
|
2012-11-08 12:03:14 +07:00
|
|
|
|
2013-05-03 19:43:12 +07:00
|
|
|
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init pSeries_init_panel(void)
|
|
|
|
{
|
|
|
|
/* Manually leave the kernel version on the panel. */
|
2014-03-12 15:17:07 +07:00
|
|
|
#ifdef __BIG_ENDIAN__
|
2005-04-17 05:20:36 +07:00
|
|
|
ppc_md.progress("Linux ppc64\n", 0);
|
2014-03-12 15:17:07 +07:00
|
|
|
#else
|
|
|
|
ppc_md.progress("Linux ppc64le\n", 0);
|
|
|
|
#endif
|
2006-10-02 16:18:13 +07:00
|
|
|
ppc_md.progress(init_utsname()->version, 0);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2011-04-01 01:49:45 +07:00
|
|
|
machine_arch_initcall(pseries, pSeries_init_panel);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2012-09-07 04:24:56 +07:00
|
|
|
static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
|
2005-11-03 11:30:49 +07:00
|
|
|
{
|
2005-11-07 09:12:03 +07:00
|
|
|
return plpar_hcall_norets(H_SET_DABR, dabr);
|
2005-11-03 11:30:49 +07:00
|
|
|
}
|
|
|
|
|
2012-09-07 04:24:56 +07:00
|
|
|
static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
|
2005-11-07 09:12:03 +07:00
|
|
|
{
|
2012-09-07 04:24:56 +07:00
|
|
|
/* Have to set at least one bit in the DABRX according to PAPR */
|
|
|
|
if (dabrx == 0 && dabr == 0)
|
|
|
|
dabrx = DABRX_USER;
|
|
|
|
/* PAPR says we can only set kernel and user bits */
|
2012-09-07 04:24:57 +07:00
|
|
|
dabrx &= DABRX_KERNEL | DABRX_USER;
|
2012-09-07 04:24:56 +07:00
|
|
|
|
|
|
|
return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
|
2005-11-07 09:12:03 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2012-12-20 21:06:45 +07:00
|
|
|
static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
|
|
|
|
{
|
|
|
|
/* PAPR says we can't set HYP */
|
|
|
|
dawrx &= ~DAWRX_HYP;
|
|
|
|
|
|
|
|
return plapr_set_watchpoint0(dawr, dawrx);
|
|
|
|
}
|
|
|
|
|
2008-07-24 01:29:03 +07:00
|
|
|
#define CMO_CHARACTERISTICS_TOKEN 44
|
|
|
|
#define CMO_MAXLENGTH 1026
|
|
|
|
|
2011-05-04 13:01:20 +07:00
|
|
|
void pSeries_coalesce_init(void)
|
|
|
|
{
|
|
|
|
struct hvcall_mpp_x_data mpp_x_data;
|
|
|
|
|
|
|
|
if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
|
|
|
|
powerpc_firmware_features |= FW_FEATURE_XCMO;
|
|
|
|
else
|
|
|
|
powerpc_firmware_features &= ~FW_FEATURE_XCMO;
|
|
|
|
}
|
|
|
|
|
2008-07-24 01:29:03 +07:00
|
|
|
/**
|
|
|
|
* fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
|
|
|
|
* handle that here. (Stolen from parse_system_parameter_string)
|
|
|
|
*/
|
2014-08-20 05:55:18 +07:00
|
|
|
static void pSeries_cmo_feature_init(void)
|
2008-07-24 01:29:03 +07:00
|
|
|
{
|
|
|
|
char *ptr, *key, *value, *end;
|
|
|
|
int call_status;
|
2013-12-09 14:17:01 +07:00
|
|
|
int page_order = IOMMU_PAGE_SHIFT_4K;
|
2008-07-24 01:29:03 +07:00
|
|
|
|
|
|
|
pr_debug(" -> fw_cmo_feature_init()\n");
|
|
|
|
spin_lock(&rtas_data_buf_lock);
|
|
|
|
memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
|
|
|
|
call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
|
|
|
|
NULL,
|
|
|
|
CMO_CHARACTERISTICS_TOKEN,
|
|
|
|
__pa(rtas_data_buf),
|
|
|
|
RTAS_DATA_BUF_SIZE);
|
|
|
|
|
|
|
|
if (call_status != 0) {
|
|
|
|
spin_unlock(&rtas_data_buf_lock);
|
|
|
|
pr_debug("CMO not available\n");
|
|
|
|
pr_debug(" <- fw_cmo_feature_init()\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
end = rtas_data_buf + CMO_MAXLENGTH - 2;
|
|
|
|
ptr = rtas_data_buf + 2; /* step over strlen value */
|
|
|
|
key = value = ptr;
|
|
|
|
|
|
|
|
while (*ptr && (ptr <= end)) {
|
|
|
|
/* Separate the key and value by replacing '=' with '\0' and
|
|
|
|
* point the value at the string after the '='
|
|
|
|
*/
|
|
|
|
if (ptr[0] == '=') {
|
|
|
|
ptr[0] = '\0';
|
|
|
|
value = ptr + 1;
|
|
|
|
} else if (ptr[0] == '\0' || ptr[0] == ',') {
|
|
|
|
/* Terminate the string containing the key/value pair */
|
|
|
|
ptr[0] = '\0';
|
|
|
|
|
|
|
|
if (key == value) {
|
|
|
|
pr_debug("Malformed key/value pair\n");
|
|
|
|
/* Never found a '=', end processing */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-08-16 02:07:31 +07:00
|
|
|
if (0 == strcmp(key, "CMOPageSize"))
|
|
|
|
page_order = simple_strtol(value, NULL, 10);
|
|
|
|
else if (0 == strcmp(key, "PrPSP"))
|
|
|
|
CMO_PrPSP = simple_strtol(value, NULL, 10);
|
2008-07-24 01:29:03 +07:00
|
|
|
else if (0 == strcmp(key, "SecPSP"))
|
2008-08-16 02:07:31 +07:00
|
|
|
CMO_SecPSP = simple_strtol(value, NULL, 10);
|
2008-07-24 01:29:03 +07:00
|
|
|
value = key = ptr + 1;
|
|
|
|
}
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
|
2008-08-16 02:07:31 +07:00
|
|
|
/* Page size is returned as the power of 2 of the page size,
|
|
|
|
* convert to the page size in bytes before returning
|
|
|
|
*/
|
|
|
|
CMO_PageSize = 1 << page_order;
|
|
|
|
pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
|
|
|
|
|
|
|
|
if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
|
2008-07-24 01:29:03 +07:00
|
|
|
pr_info("CMO enabled\n");
|
2008-08-16 02:07:31 +07:00
|
|
|
pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
|
|
|
|
CMO_SecPSP);
|
2008-07-24 01:29:03 +07:00
|
|
|
powerpc_firmware_features |= FW_FEATURE_CMO;
|
2011-05-04 13:01:20 +07:00
|
|
|
pSeries_coalesce_init();
|
2008-07-24 01:29:03 +07:00
|
|
|
} else
|
2008-08-16 02:07:31 +07:00
|
|
|
pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
|
|
|
|
CMO_SecPSP);
|
2008-07-24 01:29:03 +07:00
|
|
|
spin_unlock(&rtas_data_buf_lock);
|
|
|
|
pr_debug(" <- fw_cmo_feature_init()\n");
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Early initialization. Relocation is on but do not reference unbolted pages
|
|
|
|
*/
|
2016-07-05 12:04:06 +07:00
|
|
|
static void __init pseries_init(void)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2016-07-05 12:04:06 +07:00
|
|
|
pr_debug(" -> pseries_init()\n");
|
2005-04-17 05:20:36 +07:00
|
|
|
|
powerpc/pseries: Re-implement HVSI as part of hvc_vio
On pseries machines, consoles are provided by the hypervisor using
a low level get_chars/put_chars type interface. However, this is
really just a transport to the service processor which implements
them either as "raw" console (networked consoles, HMC, ...) or as
"hvsi" serial ports.
The later is a simple packet protocol on top of the raw character
interface that is supposed to convey additional "serial port" style
semantics. In practice however, all it does is provide a way to
read the CD line and set/clear our DTR line, that's it.
We currently implement the "raw" protocol as an hvc console backend
(/dev/hvcN) and the "hvsi" protocol using a separate tty driver
(/dev/hvsi0).
However this is quite impractical. The arbitrary difference between
the two type of devices has been a major source of user (and distro)
confusion. Additionally, there's an additional mini -hvsi implementation
in the pseries platform code for our low level debug console and early
boot kernel messages, which means code duplication, though that low
level variant is impractical as it's incapable of doing the initial
protocol negociation to establish the link to the FSP.
This essentially replaces the dedicated hvsi driver and the platform
udbg code completely by extending the existing hvc_vio backend used
in "raw" mode so that:
- It now supports HVSI as well
- We add support for hvc backend providing tiocm{get,set}
- It also provides a udbg interface for early debug and boot console
This is overall less code, though this will only be obvious once we
remove the old "hvsi" driver, which is still available for now. When
the old driver is enabled, the new code still kicks in for the low
level udbg console, replacing the old mini implementation in the platform
code, it just doesn't provide the higher level "hvc" interface.
In addition to producing generally simler code, this has several benefits
over our current situation:
- The user/distro only has to deal with /dev/hvcN for the hypervisor
console, avoiding all sort of confusion that has plagued us in the past
- The tty, kernel and low level debug console all use the same code
base which supports the full protocol establishment process, thus the
console is now available much earlier than it used to be with the
old HVSI driver. The kernel console works much earlier and udbg is
available much earlier too. Hackers can enable a hard coded very-early
debug console as well that works with HVSI (previously that was only
supported for the "raw" mode).
I've tried to keep the same semantics as hvsi relative to how I react
to things like CD changes, with some subtle differences though:
- I clear DTR on close if HUPCL is set
- Current hvsi triggers a hangup if it detects a up->down transition
on CD (you can still open a console with CD down). My new implementation
triggers a hangup if the link to the FSP is severed, and severs it upon
detecting a up->down transition on CD.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-12 10:46:38 +07:00
|
|
|
#ifdef CONFIG_HVC_CONSOLE
|
2006-03-21 16:45:59 +07:00
|
|
|
if (firmware_has_feature(FW_FEATURE_LPAR))
|
powerpc/pseries: Re-implement HVSI as part of hvc_vio
On pseries machines, consoles are provided by the hypervisor using
a low level get_chars/put_chars type interface. However, this is
really just a transport to the service processor which implements
them either as "raw" console (networked consoles, HMC, ...) or as
"hvsi" serial ports.
The later is a simple packet protocol on top of the raw character
interface that is supposed to convey additional "serial port" style
semantics. In practice however, all it does is provide a way to
read the CD line and set/clear our DTR line, that's it.
We currently implement the "raw" protocol as an hvc console backend
(/dev/hvcN) and the "hvsi" protocol using a separate tty driver
(/dev/hvsi0).
However this is quite impractical. The arbitrary difference between
the two type of devices has been a major source of user (and distro)
confusion. Additionally, there's an additional mini -hvsi implementation
in the pseries platform code for our low level debug console and early
boot kernel messages, which means code duplication, though that low
level variant is impractical as it's incapable of doing the initial
protocol negociation to establish the link to the FSP.
This essentially replaces the dedicated hvsi driver and the platform
udbg code completely by extending the existing hvc_vio backend used
in "raw" mode so that:
- It now supports HVSI as well
- We add support for hvc backend providing tiocm{get,set}
- It also provides a udbg interface for early debug and boot console
This is overall less code, though this will only be obvious once we
remove the old "hvsi" driver, which is still available for now. When
the old driver is enabled, the new code still kicks in for the low
level udbg console, replacing the old mini implementation in the platform
code, it just doesn't provide the higher level "hvc" interface.
In addition to producing generally simler code, this has several benefits
over our current situation:
- The user/distro only has to deal with /dev/hvcN for the hypervisor
console, avoiding all sort of confusion that has plagued us in the past
- The tty, kernel and low level debug console all use the same code
base which supports the full protocol establishment process, thus the
console is now available much earlier than it used to be with the
old HVSI driver. The kernel console works much earlier and udbg is
available much earlier too. Hackers can enable a hard coded very-early
debug console as well that works with HVSI (previously that was only
supported for the "raw" mode).
I've tried to keep the same semantics as hvsi relative to how I react
to things like CD changes, with some subtle differences though:
- I clear DTR on close if HUPCL is set
- Current hvsi triggers a hangup if it detects a up->down transition
on CD (you can still open a console with CD down). My new implementation
triggers a hangup if the link to the FSP is severed, and severs it upon
detecting a up->down transition on CD.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-12 10:46:38 +07:00
|
|
|
hvc_vio_init_early();
|
|
|
|
#endif
|
2012-09-06 02:17:49 +07:00
|
|
|
if (firmware_has_feature(FW_FEATURE_XDABR))
|
2005-11-07 09:12:03 +07:00
|
|
|
ppc_md.set_dabr = pseries_set_xdabr;
|
2012-09-06 02:17:49 +07:00
|
|
|
else if (firmware_has_feature(FW_FEATURE_DABR))
|
|
|
|
ppc_md.set_dabr = pseries_set_dabr;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2012-12-20 21:06:45 +07:00
|
|
|
if (firmware_has_feature(FW_FEATURE_SET_MODE))
|
|
|
|
ppc_md.set_dawr = pseries_set_dawr;
|
|
|
|
|
2008-07-24 01:29:03 +07:00
|
|
|
pSeries_cmo_feature_init();
|
2005-04-17 05:20:36 +07:00
|
|
|
iommu_init_early_pSeries();
|
|
|
|
|
2016-07-05 12:04:06 +07:00
|
|
|
pr_debug(" <- pseries_init()\n");
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2014-10-13 21:01:09 +07:00
|
|
|
/**
|
|
|
|
* pseries_power_off - tell firmware about how to power off the system.
|
|
|
|
*
|
|
|
|
* This function calls either the power-off rtas token in normal cases
|
|
|
|
* or the ibm,power-off-ups token (if present & requested) in case of
|
|
|
|
* a power failure. If power-off token is used, power on will only be
|
|
|
|
* possible with power button press. If ibm,power-off-ups token is used
|
|
|
|
* it will allow auto poweron after power is restored.
|
|
|
|
*/
|
|
|
|
static void pseries_power_off(void)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");
|
|
|
|
|
|
|
|
if (rtas_flash_term_hook)
|
|
|
|
rtas_flash_term_hook(SYS_POWER_OFF);
|
|
|
|
|
|
|
|
if (rtas_poweron_auto == 0 ||
|
|
|
|
rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
|
|
|
|
rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
|
|
|
|
printk(KERN_INFO "RTAS power-off returned %d\n", rc);
|
|
|
|
} else {
|
|
|
|
rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
|
|
|
|
printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
|
|
|
|
}
|
|
|
|
for (;;);
|
|
|
|
}
|
|
|
|
|
2006-03-28 19:15:54 +07:00
|
|
|
static int __init pSeries_probe(void)
|
|
|
|
{
|
2016-07-05 12:04:00 +07:00
|
|
|
const char *dtype = of_get_property(of_root, "device_type", NULL);
|
2006-12-08 14:08:37 +07:00
|
|
|
|
2006-03-28 19:15:54 +07:00
|
|
|
if (dtype == NULL)
|
|
|
|
return 0;
|
|
|
|
if (strcmp(dtype, "chrp"))
|
2005-04-17 05:20:36 +07:00
|
|
|
return 0;
|
|
|
|
|
2006-06-07 09:04:18 +07:00
|
|
|
/* Cell blades firmware claims to be chrp while it's not. Until this
|
|
|
|
* is fixed, we need to avoid those here.
|
|
|
|
*/
|
2016-07-05 12:04:00 +07:00
|
|
|
if (of_machine_is_compatible("IBM,CPBW-1.0") ||
|
|
|
|
of_machine_is_compatible("IBM,CBEA"))
|
2006-06-07 09:04:18 +07:00
|
|
|
return 0;
|
|
|
|
|
2014-10-13 21:01:09 +07:00
|
|
|
pm_power_off = pseries_power_off;
|
|
|
|
|
2008-04-24 12:13:19 +07:00
|
|
|
pr_debug("Machine is%s LPAR !\n",
|
|
|
|
(powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
|
2006-03-21 16:45:59 +07:00
|
|
|
|
2016-07-05 12:04:06 +07:00
|
|
|
pseries_init();
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
ppc64: Set up PCI tree from Open Firmware device tree
This adds code which gives us the option on ppc64 of instantiating the
PCI tree (the tree of pci_bus and pci_dev structs) from the Open
Firmware device tree rather than by probing PCI configuration space.
The OF device tree has a node for each PCI device and bridge in the
system, with properties that tell us what addresses the firmware has
configured for them and other details.
There are a couple of reasons why this is needed. First, on systems
with a hypervisor, there is a PCI-PCI bridge per slot under the PCI
host bridges. These PCI-PCI bridges have special isolation features
for virtualization. We can't write to their config space, and we are
not supposed to be reading their config space either. The firmware
tells us about the address ranges that they pass in the OF device
tree.
Secondly, on powermacs, the interrupt controller is in a PCI device
that may be behind a PCI-PCI bridge. If we happened to take an
interrupt just at the point when the device or a bridge on the path to
it was disabled for probing, we would crash when we try to access the
interrupt controller.
I have implemented a platform-specific function which is called for
each PCI bridge (host or PCI-PCI) to say whether the code should look
in the device tree or use normal PCI probing for the devices under
that bridge. On pSeries machines we use the device tree if we're
running under a hypervisor, otherwise we use normal probing. On
powermacs we use normal probing for the AGP bridge, since the device
for the AGP bridge itself isn't shown in the device tree (at least on
my G5), and the device tree for everything else.
This has been tested on a dual G5 powermac, a partition on a POWER5
machine (running under the hypervisor), and a legacy iSeries
partition.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-09-12 14:17:36 +07:00
|
|
|
static int pSeries_pci_probe_mode(struct pci_bus *bus)
|
|
|
|
{
|
2006-03-21 16:45:59 +07:00
|
|
|
if (firmware_has_feature(FW_FEATURE_LPAR))
|
ppc64: Set up PCI tree from Open Firmware device tree
This adds code which gives us the option on ppc64 of instantiating the
PCI tree (the tree of pci_bus and pci_dev structs) from the Open
Firmware device tree rather than by probing PCI configuration space.
The OF device tree has a node for each PCI device and bridge in the
system, with properties that tell us what addresses the firmware has
configured for them and other details.
There are a couple of reasons why this is needed. First, on systems
with a hypervisor, there is a PCI-PCI bridge per slot under the PCI
host bridges. These PCI-PCI bridges have special isolation features
for virtualization. We can't write to their config space, and we are
not supposed to be reading their config space either. The firmware
tells us about the address ranges that they pass in the OF device
tree.
Secondly, on powermacs, the interrupt controller is in a PCI device
that may be behind a PCI-PCI bridge. If we happened to take an
interrupt just at the point when the device or a bridge on the path to
it was disabled for probing, we would crash when we try to access the
interrupt controller.
I have implemented a platform-specific function which is called for
each PCI bridge (host or PCI-PCI) to say whether the code should look
in the device tree or use normal PCI probing for the devices under
that bridge. On pSeries machines we use the device tree if we're
running under a hypervisor, otherwise we use normal probing. On
powermacs we use normal probing for the AGP bridge, since the device
for the AGP bridge itself isn't shown in the device tree (at least on
my G5), and the device tree for everything else.
This has been tested on a dual G5 powermac, a partition on a POWER5
machine (running under the hypervisor), and a legacy iSeries
partition.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-09-12 14:17:36 +07:00
|
|
|
return PCI_PROBE_DEVTREE;
|
|
|
|
return PCI_PROBE_NORMAL;
|
|
|
|
}
|
|
|
|
|
2015-03-31 12:00:50 +07:00
|
|
|
struct pci_controller_ops pseries_pci_controller_ops = {
|
|
|
|
.probe_mode = pSeries_pci_probe_mode,
|
|
|
|
};
|
|
|
|
|
2006-03-28 19:15:54 +07:00
|
|
|
define_machine(pseries) {
|
|
|
|
.name = "pSeries",
|
2005-04-17 05:20:36 +07:00
|
|
|
.probe = pSeries_probe,
|
|
|
|
.setup_arch = pSeries_setup_arch,
|
2016-05-30 13:18:11 +07:00
|
|
|
.init_IRQ = pseries_init_irq,
|
2005-10-20 17:48:19 +07:00
|
|
|
.show_cpuinfo = pSeries_show_cpuinfo,
|
2005-04-17 05:20:36 +07:00
|
|
|
.log_error = pSeries_log_error,
|
|
|
|
.pcibios_fixup = pSeries_final_fixup,
|
2005-11-03 10:41:19 +07:00
|
|
|
.restart = rtas_restart,
|
|
|
|
.halt = rtas_halt,
|
2007-12-03 05:30:04 +07:00
|
|
|
.panic = rtas_os_term,
|
2005-06-23 06:43:18 +07:00
|
|
|
.get_boot_time = rtas_get_boot_time,
|
|
|
|
.get_rtc_time = rtas_get_rtc_time,
|
|
|
|
.set_rtc_time = rtas_set_rtc_time,
|
2005-06-23 06:43:07 +07:00
|
|
|
.calibrate_decr = generic_calibrate_decr,
|
2005-06-23 06:43:28 +07:00
|
|
|
.progress = rtas_progress,
|
2005-04-17 05:20:36 +07:00
|
|
|
.system_reset_exception = pSeries_system_reset_exception,
|
|
|
|
.machine_check_exception = pSeries_machine_check_exception,
|
2016-11-29 19:45:50 +07:00
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
2012-11-08 12:40:28 +07:00
|
|
|
.machine_kexec = pSeries_machine_kexec,
|
2016-05-30 13:18:13 +07:00
|
|
|
.kexec_cpu_down = pseries_kexec_cpu_down,
|
2012-11-08 12:40:28 +07:00
|
|
|
#endif
|
2014-06-04 14:50:47 +07:00
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
|
|
|
|
.memory_block_size = pseries_memory_block_size,
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|