linux_dsm_epyc7002/arch/powerpc/mm/hash64_64k.c
Ram Pai 9d2edb1848 powerpc: Free up four 64K PTE bits in 4K backed HPTE pages
Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6,
in the 4K backed HPTE pages.These bits continue to be used
for 64K backed HPTE pages in this patch, but will be freed
up in the next patch. The bit numbers are big-endian as
defined in the ISA3.0

The patch does the following change to the 4k HTPE backed
64K PTE's format.

H_PAGE_BUSY moves from bit 3 to bit 9 (B bit in the figure
		below)
V0 which occupied bit 4 is not used anymore.
V1 which occupied bit 5 is not used anymore.
V2 which occupied bit 6 is not used anymore.
V3 which occupied bit 7 is not used anymore.

Before the patch, the 4k backed 64k PTE format was as follows

 0 1 2 3 4  5  6  7  8 9 10...........................63
 : : : : :  :  :  :  : : :                            :
 v v v v v  v  v  v  v v v                            v

,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-,
|x|x|x|B|V0|V1|V2|V3|x| | |x|x|................|x|x|x|x| <- primary pte
'_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_'
|S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte
'_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_'

After the patch, the 4k backed 64k PTE format is as follows

 0 1 2 3 4  5  6  7  8 9 10...........................63
 : : : : :  :  :  :  : : :                            :
 v v v v v  v  v  v  v v v                            v

,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-,
|x|x|x| |  |  |  |  |x|B| |x|x|................|.|.|.|.| <- primary pte
'_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_'
|S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte
'_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_'

the four bits S,G,I,X (one quadruplet per 4k HPTE) that
cache the hash-bucket slot value, is initialized to
1,1,1,1 indicating -- an invalid slot. If a HPTE gets
cached in a 1111 slot(i.e 7th slot of secondary hash
bucket), it is released immediately. In other words,
even though 1111 is a valid slot value in the hash
bucket, we consider it invalid and release the slot and
the HPTE. This gives us the opportunity to determine
the validity of S,G,I,X bits based on its contents and
not on any of the bits V0,V1,V2 or V3 in the primary PTE

When we release a HPTE cached in the 1111 slot
we also release a legitimate slot in the primary
hash bucket and unmap its corresponding HPTE. This
is to ensure that we do get a HPTE cached in a slot
of the primary hash bucket, the next time we retry.

Though treating 1111 slot as invalid, reduces the
number of available slots in the hash bucket and may
have an effect on the performance, the probabilty of
hitting a 1111 slot is extermely low.

Compared to the current scheme, the above scheme
reduces the number of false hash table updates
significantly and has the added advantage of releasing
four valuable PTE bits for other purpose.

NOTE:even though bits 3, 4, 5, 6, 7 are not used when
the 64K PTE is backed by 4k HPTE, they continue to be
used if the PTE gets backed by 64k HPTE. The next
patch will decouple that aswell, and truely release the
bits.

This idea was jointly developed by Paul Mackerras,
Aneesh, Michael Ellermen and myself.

4K PTE format remains unchanged currently.

The patch does the following code changes
a) PTE flags are split between 64k and 4k header files.
b) __hash_page_4K() is reimplemented to reflect the
 above logic.

Acked-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-12-20 18:57:20 +11:00

336 lines
9.2 KiB
C

/*
* Copyright IBM Corporation, 2015
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#include <linux/mm.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
/*
* Return true, if the entry has a slot value which
* the software considers as invalid.
*/
static inline bool hpte_soft_invalid(unsigned long hidx)
{
return ((hidx & 0xfUL) == 0xfUL);
}
/*
* index from 0 - 15
*/
bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
{
return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
}
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, int subpg_prot)
{
real_pte_t rpte;
unsigned long hpte_group;
unsigned int subpg_index;
unsigned long rflags, pa;
unsigned long old_pte, new_pte, subpg_pte;
unsigned long vpn, hash, slot, gslot;
unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
/*
* atomically mark the linux large page PTE busy and dirty
*/
do {
pte_t pte = READ_ONCE(*ptep);
old_pte = pte_val(pte);
/* If PTE busy, retry the access */
if (unlikely(old_pte & H_PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
if (unlikely(!check_pte_access(access, old_pte)))
return 1;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access. Since this is 4K insert of 64K page size
* also add H_PAGE_COMBO
*/
new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO;
if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
/*
* Handle the subpage protection bits
*/
subpg_pte = new_pte & ~subpg_prot;
rflags = htab_convert_pte_flags(subpg_pte);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
/*
* No CPU has hugepages but lacks no execute, so we
* don't need to worry about that case
*/
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
}
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn = hpt_vpn(ea, vsid, ssize);
rpte = __real_pte(__pte(old_pte), ptep);
/*
*None of the sub 4k page is hashed
*/
if (!(old_pte & H_PAGE_HASHPTE))
goto htab_insert_hpte;
/*
* Check if the pte was already inserted into the hash table
* as a 64k HW page, and invalidate the 64k HPTE if so.
*/
if (!(old_pte & H_PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
/*
* clear the old slot details from the old and new pte.
* On hash insert failure we use old pte value and we don't
* want slot information there if we have a insert failure.
*/
old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
goto htab_insert_hpte;
}
/*
* Check for sub page valid and update
*/
if (__rpte_sub_valid(rpte, subpg_index)) {
int ret;
gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
subpg_index);
ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize, flags);
/*
* If we failed because typically the HPTE wasn't really here
* we try an insertion.
*/
if (ret == -1)
goto htab_insert_hpte;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
htab_insert_hpte:
/*
* Initialize all hidx entries to invalid value, the first time
* the PTE is about to allocate a 4K HPTE.
*/
if (!(old_pte & H_PAGE_COMBO))
rpte.hidx = INVALID_RPTE_HIDX;
/*
* handle H_PAGE_4K_PFN case
*/
if (old_pte & H_PAGE_4K_PFN) {
/*
* All the sub 4k page have the same
* physical address.
*/
pa = pte_pfn(__pte(old_pte)) << HW_PAGE_SHIFT;
} else {
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
pa += (subpg_index << shift);
}
hash = hpt_hash(vpn, shift, ssize);
repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
bool soft_invalid;
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags, HPTE_V_SECONDARY,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize);
soft_invalid = hpte_soft_invalid(slot);
if (unlikely(soft_invalid)) {
/*
* We got a valid slot from a hardware point of view.
* but we cannot use it, because we use this special
* value; as defined by hpte_soft_invalid(), to track
* invalid slots. We cannot use it. So invalidate it.
*/
gslot = slot & _PTEIDX_GROUP_IX;
mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize, 0);
}
if (unlikely(slot == -1 || soft_invalid)) {
/*
* For soft invalid slot, let's ensure that we release a
* slot from the primary, with the hope that we will
* acquire that slot next time we try. This will ensure
* that we do not get the same soft-invalid slot.
*/
if (soft_invalid || (mftb() & 0x1))
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
goto repeat;
}
}
/*
* Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
*ptep = __pte(old_pte);
hash_failure_debug(ea, access, vsid, trap, ssize,
MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
return -1;
}
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
new_pte |= H_PAGE_HASHPTE;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
int __hash_page_64K(unsigned long ea, unsigned long access,
unsigned long vsid, pte_t *ptep, unsigned long trap,
unsigned long flags, int ssize)
{
unsigned long hpte_group;
unsigned long rflags, pa;
unsigned long old_pte, new_pte;
unsigned long vpn, hash, slot;
unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift;
/*
* atomically mark the linux large page PTE busy and dirty
*/
do {
pte_t pte = READ_ONCE(*ptep);
old_pte = pte_val(pte);
/* If PTE busy, retry the access */
if (unlikely(old_pte & H_PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
if (unlikely(!check_pte_access(access, old_pte)))
return 1;
/*
* Check if PTE has the cache-inhibit bit set
* If so, bail out and refault as a 4k page
*/
if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
unlikely(pte_ci(pte)))
return 0;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access.
*/
new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
vpn = hpt_vpn(ea, vsid, ssize);
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
/*
* There MIGHT be an HPTE for this pte
*/
hash = hpt_hash(vpn, shift, ssize);
if (old_pte & H_PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
MMU_PAGE_64K, ssize,
flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
if (likely(!(old_pte & H_PAGE_HASHPTE))) {
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
hash = hpt_hash(vpn, shift, ssize);
repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
MMU_PAGE_64K, MMU_PAGE_64K,
ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags,
HPTE_V_SECONDARY,
MMU_PAGE_64K,
MMU_PAGE_64K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
goto repeat;
}
}
/*
* Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
*ptep = __pte(old_pte);
hash_failure_debug(ea, access, vsid, trap, ssize,
MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}