mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-04 05:06:39 +07:00
b64f34cdfe
This patch implements VIRT_CPU_ACCOUNTING for ia64, which enable us to use more accurate cpu time accounting. The VIRT_CPU_ACCOUNTING is an item of kernel config, which s390 and powerpc arch have. By turning this config on, these archs change the mechanism of cpu time accounting from tick-sampling based one to state-transition based one. The state-transition based accounting is done by checking time (cycle counter in processor) at every state-transition point, such as entrance/exit of kernel, interrupt, softirq etc. The difference between point to point is the actual time consumed during in the state. There is no doubt about that this value is more accurate than that of tick-sampling based accounting. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
1015 lines
29 KiB
ArmAsm
1015 lines
29 KiB
ArmAsm
/*
|
|
* This file contains the light-weight system call handlers (fsyscall-handlers).
|
|
*
|
|
* Copyright (C) 2003 Hewlett-Packard Co
|
|
* David Mosberger-Tang <davidm@hpl.hp.com>
|
|
*
|
|
* 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
|
|
* 18-Feb-03 louisk Implement fsys_gettimeofday().
|
|
* 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
|
|
* probably broke it along the way... ;-)
|
|
* 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
|
|
* it capable of using memory based clocks without falling back to C code.
|
|
* 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
|
|
*
|
|
*/
|
|
|
|
#include <asm/asmmacro.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/sal.h>
|
|
#include <asm/signal.h>
|
|
#include <asm/system.h>
|
|
#include <asm/unistd.h>
|
|
|
|
#include "entry.h"
|
|
|
|
/*
|
|
* See Documentation/ia64/fsys.txt for details on fsyscalls.
|
|
*
|
|
* On entry to an fsyscall handler:
|
|
* r10 = 0 (i.e., defaults to "successful syscall return")
|
|
* r11 = saved ar.pfs (a user-level value)
|
|
* r15 = system call number
|
|
* r16 = "current" task pointer (in normal kernel-mode, this is in r13)
|
|
* r32-r39 = system call arguments
|
|
* b6 = return address (a user-level value)
|
|
* ar.pfs = previous frame-state (a user-level value)
|
|
* PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
|
|
* all other registers may contain values passed in from user-mode
|
|
*
|
|
* On return from an fsyscall handler:
|
|
* r11 = saved ar.pfs (as passed into the fsyscall handler)
|
|
* r15 = system call number (as passed into the fsyscall handler)
|
|
* r32-r39 = system call arguments (as passed into the fsyscall handler)
|
|
* b6 = return address (as passed into the fsyscall handler)
|
|
* ar.pfs = previous frame-state (as passed into the fsyscall handler)
|
|
*/
|
|
|
|
ENTRY(fsys_ni_syscall)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
mov r8=ENOSYS
|
|
mov r10=-1
|
|
FSYS_RETURN
|
|
END(fsys_ni_syscall)
|
|
|
|
ENTRY(fsys_getpid)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
ld4 r9=[r9]
|
|
add r8=IA64_TASK_TGID_OFFSET,r16
|
|
;;
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
ld4 r8=[r8] // r8 = current->tgid
|
|
;;
|
|
cmp.ne p8,p0=0,r9
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
FSYS_RETURN
|
|
END(fsys_getpid)
|
|
|
|
ENTRY(fsys_getppid)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
|
|
;;
|
|
ld8 r17=[r17] // r17 = current->group_leader
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
|
|
ld4 r9=[r9]
|
|
add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent
|
|
;;
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
|
|
1: ld8 r18=[r17] // r18 = current->group_leader->real_parent
|
|
;;
|
|
cmp.ne p8,p0=0,r9
|
|
add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid
|
|
;;
|
|
|
|
/*
|
|
* The .acq is needed to ensure that the read of tgid has returned its data before
|
|
* we re-check "real_parent".
|
|
*/
|
|
ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
|
|
#ifdef CONFIG_SMP
|
|
/*
|
|
* Re-read current->group_leader->real_parent.
|
|
*/
|
|
ld8 r19=[r17] // r19 = current->group_leader->real_parent
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
cmp.ne p6,p0=r18,r19 // did real_parent change?
|
|
mov r19=0 // i must not leak kernel bits...
|
|
(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
|
|
;;
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r18=0 // i must not leak kernel bits...
|
|
#else
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r18=0 // i must not leak kernel bits...
|
|
mov r19=0 // i must not leak kernel bits...
|
|
#endif
|
|
FSYS_RETURN
|
|
END(fsys_getppid)
|
|
|
|
ENTRY(fsys_set_tid_address)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
ld4 r9=[r9]
|
|
tnat.z p6,p7=r32 // check argument register for being NaT
|
|
;;
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
add r8=IA64_TASK_PID_OFFSET,r16
|
|
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
|
|
;;
|
|
ld4 r8=[r8]
|
|
cmp.ne p8,p0=0,r9
|
|
mov r17=-1
|
|
;;
|
|
(p6) st8 [r18]=r32
|
|
(p7) st8 [r18]=r17
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r18=0 // i must not leak kernel bits...
|
|
FSYS_RETURN
|
|
END(fsys_set_tid_address)
|
|
|
|
#if IA64_GTOD_LOCK_OFFSET !=0
|
|
#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
|
|
#endif
|
|
#if IA64_ITC_JITTER_OFFSET !=0
|
|
#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
|
|
#endif
|
|
#define CLOCK_REALTIME 0
|
|
#define CLOCK_MONOTONIC 1
|
|
#define CLOCK_DIVIDE_BY_1000 0x4000
|
|
#define CLOCK_ADD_MONOTONIC 0x8000
|
|
|
|
ENTRY(fsys_gettimeofday)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
mov r31 = r32
|
|
tnat.nz p6,p0 = r33 // guard against NaT argument
|
|
(p6) br.cond.spnt.few .fail_einval
|
|
mov r30 = CLOCK_DIVIDE_BY_1000
|
|
;;
|
|
.gettime:
|
|
// Register map
|
|
// Incoming r31 = pointer to address where to place result
|
|
// r30 = flags determining how time is processed
|
|
// r2,r3 = temp r4-r7 preserved
|
|
// r8 = result nanoseconds
|
|
// r9 = result seconds
|
|
// r10 = temporary storage for clock difference
|
|
// r11 = preserved: saved ar.pfs
|
|
// r12 = preserved: memory stack
|
|
// r13 = preserved: thread pointer
|
|
// r14 = address of mask / mask value
|
|
// r15 = preserved: system call number
|
|
// r16 = preserved: current task pointer
|
|
// r17 = (not used)
|
|
// r18 = (not used)
|
|
// r19 = address of itc_lastcycle
|
|
// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
|
|
// r21 = address of mmio_ptr
|
|
// r22 = address of wall_time or monotonic_time
|
|
// r23 = address of shift / value
|
|
// r24 = address mult factor / cycle_last value
|
|
// r25 = itc_lastcycle value
|
|
// r26 = address clocksource cycle_last
|
|
// r27 = (not used)
|
|
// r28 = sequence number at the beginning of critcal section
|
|
// r29 = address of itc_jitter
|
|
// r30 = time processing flags / memory address
|
|
// r31 = pointer to result
|
|
// Predicates
|
|
// p6,p7 short term use
|
|
// p8 = timesource ar.itc
|
|
// p9 = timesource mmio64
|
|
// p10 = timesource mmio32 - not used
|
|
// p11 = timesource not to be handled by asm code
|
|
// p12 = memory time source ( = p9 | p10) - not used
|
|
// p13 = do cmpxchg with itc_lastcycle
|
|
// p14 = Divide by 1000
|
|
// p15 = Add monotonic
|
|
//
|
|
// Note that instructions are optimized for McKinley. McKinley can
|
|
// process two bundles simultaneously and therefore we continuously
|
|
// try to feed the CPU two bundles and then a stop.
|
|
//
|
|
// Additional note that code has changed a lot. Optimization is TBD.
|
|
// Comments begin with "?" are maybe outdated.
|
|
tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
|
|
mov pr = r30,0xc000 // Set predicates according to function
|
|
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
|
|
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
|
|
;;
|
|
movl r29 = itc_jitter_data // itc_jitter
|
|
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
|
|
ld4 r2 = [r2] // process work pending flags
|
|
;;
|
|
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
|
|
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
|
|
add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
|
|
and r2 = TIF_ALLWORK_MASK,r2
|
|
(p6) br.cond.spnt.few .fail_einval // ? deferred branch
|
|
;;
|
|
add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
|
|
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
|
|
(p6) br.cond.spnt.many fsys_fallback_syscall
|
|
;;
|
|
// Begin critical section
|
|
.time_redo:
|
|
ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first
|
|
;;
|
|
and r28 = ~1,r28 // And make sequence even to force retry if odd
|
|
;;
|
|
ld8 r30 = [r21] // clocksource->mmio_ptr
|
|
add r24 = IA64_CLKSRC_MULT_OFFSET,r20
|
|
ld4 r2 = [r29] // itc_jitter value
|
|
add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
|
|
add r14 = IA64_CLKSRC_MASK_OFFSET,r20
|
|
;;
|
|
ld4 r3 = [r24] // clocksource mult value
|
|
ld8 r14 = [r14] // clocksource mask value
|
|
cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr
|
|
;;
|
|
setf.sig f7 = r3 // Setup for mult scaling of counter
|
|
(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13
|
|
ld4 r23 = [r23] // clocksource shift value
|
|
ld8 r24 = [r26] // get clksrc_cycle_last value
|
|
(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
|
|
;;
|
|
.pred.rel.mutex p8,p9
|
|
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
|
|
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
|
|
(p13) ld8 r25 = [r19] // get itc_lastcycle value
|
|
;; // ? could be removed by moving the last add upward
|
|
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
|
|
;;
|
|
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
|
|
(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
|
|
;;
|
|
(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
|
|
sub r10 = r2,r24 // current_cycle - last_cycle
|
|
;;
|
|
(p6) sub r10 = r25,r24 // time we got was less than last_cycle
|
|
(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
|
|
;;
|
|
(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv
|
|
;;
|
|
(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful
|
|
;;
|
|
(p7) sub r10 = r3,r24 // then use new last_cycle instead
|
|
;;
|
|
and r10 = r10,r14 // Apply mask
|
|
;;
|
|
setf.sig f8 = r10
|
|
nop.i 123
|
|
;;
|
|
// fault check takes 5 cycles and we have spare time
|
|
EX(.fail_efault, probe.w.fault r31, 3)
|
|
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
|
|
;;
|
|
// ? simulate tbit.nz.or p7,p0 = r28,0
|
|
getf.sig r2 = f8
|
|
mf
|
|
;;
|
|
ld4 r10 = [r20] // gtod_lock.sequence
|
|
shr.u r2 = r2,r23 // shift by factor
|
|
;; // ? overloaded 3 bundles!
|
|
add r8 = r8,r2 // Add xtime.nsecs
|
|
cmp4.ne p7,p0 = r28,r10
|
|
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
|
|
// End critical section.
|
|
// Now r8=tv->tv_nsec and r9=tv->tv_sec
|
|
mov r10 = r0
|
|
movl r2 = 1000000000
|
|
add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
|
|
(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
|
|
;;
|
|
.time_normalize:
|
|
mov r21 = r8
|
|
cmp.ge p6,p0 = r8,r2
|
|
(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
|
|
;;
|
|
(p14) setf.sig f8 = r20
|
|
(p6) sub r8 = r8,r2
|
|
(p6) add r9 = 1,r9 // two nops before the branch.
|
|
(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
|
|
(p6) br.cond.dpnt.few .time_normalize
|
|
;;
|
|
// Divided by 8 though shift. Now divide by 125
|
|
// The compiler was able to do that with a multiply
|
|
// and a shift and we do the same
|
|
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
|
|
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
|
|
;;
|
|
mov r8 = r0
|
|
(p14) getf.sig r2 = f8
|
|
;;
|
|
(p14) shr.u r21 = r2, 4
|
|
;;
|
|
EX(.fail_efault, st8 [r31] = r9)
|
|
EX(.fail_efault, st8 [r23] = r21)
|
|
FSYS_RETURN
|
|
.fail_einval:
|
|
mov r8 = EINVAL
|
|
mov r10 = -1
|
|
FSYS_RETURN
|
|
.fail_efault:
|
|
mov r8 = EFAULT
|
|
mov r10 = -1
|
|
FSYS_RETURN
|
|
END(fsys_gettimeofday)
|
|
|
|
ENTRY(fsys_clock_gettime)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
|
|
// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
|
|
(p6) br.spnt.few fsys_fallback_syscall
|
|
mov r31 = r33
|
|
shl r30 = r32,15
|
|
br.many .gettime
|
|
END(fsys_clock_gettime)
|
|
|
|
/*
|
|
* long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
|
|
*/
|
|
#if _NSIG_WORDS != 1
|
|
# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
|
|
#endif
|
|
ENTRY(fsys_rt_sigprocmask)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
|
|
add r2=IA64_TASK_BLOCKED_OFFSET,r16
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
cmp4.ltu p6,p0=SIG_SETMASK,r32
|
|
|
|
cmp.ne p15,p0=r0,r34 // oset != NULL?
|
|
tnat.nz p8,p0=r34
|
|
add r31=IA64_TASK_SIGHAND_OFFSET,r16
|
|
;;
|
|
ld8 r3=[r2] // read/prefetch current->blocked
|
|
ld4 r9=[r9]
|
|
tnat.nz.or p6,p0=r35
|
|
|
|
cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
|
|
tnat.nz.or p6,p0=r32
|
|
(p6) br.spnt.few .fail_einval // fail with EINVAL
|
|
;;
|
|
#ifdef CONFIG_SMP
|
|
ld8 r31=[r31] // r31 <- current->sighand
|
|
#endif
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
tnat.nz.or p8,p0=r33
|
|
;;
|
|
cmp.ne p7,p0=0,r9
|
|
cmp.eq p6,p0=r0,r33 // set == NULL?
|
|
add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
|
|
(p8) br.spnt.few .fail_efault // fail with EFAULT
|
|
(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
|
|
(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
|
|
|
|
/* Argh, we actually have to do some work and _update_ the signal mask: */
|
|
|
|
EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
|
|
EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
|
|
mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
|
|
;;
|
|
|
|
rsm psr.i // mask interrupt delivery
|
|
mov ar.ccv=0
|
|
andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
|
|
|
|
#ifdef CONFIG_SMP
|
|
mov r17=1
|
|
;;
|
|
cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
|
|
mov r8=EINVAL // default to EINVAL
|
|
;;
|
|
ld8 r3=[r2] // re-read current->blocked now that we hold the lock
|
|
cmp4.ne p6,p0=r18,r0
|
|
(p6) br.cond.spnt.many .lock_contention
|
|
;;
|
|
#else
|
|
ld8 r3=[r2] // re-read current->blocked now that we hold the lock
|
|
mov r8=EINVAL // default to EINVAL
|
|
#endif
|
|
add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
|
|
add r19=IA64_TASK_SIGNAL_OFFSET,r16
|
|
cmp4.eq p6,p0=SIG_BLOCK,r32
|
|
;;
|
|
ld8 r19=[r19] // r19 <- current->signal
|
|
cmp4.eq p7,p0=SIG_UNBLOCK,r32
|
|
cmp4.eq p8,p0=SIG_SETMASK,r32
|
|
;;
|
|
ld8 r18=[r18] // r18 <- current->pending.signal
|
|
.pred.rel.mutex p6,p7,p8
|
|
(p6) or r14=r3,r14 // SIG_BLOCK
|
|
(p7) andcm r14=r3,r14 // SIG_UNBLOCK
|
|
|
|
(p8) mov r14=r14 // SIG_SETMASK
|
|
(p6) mov r8=0 // clear error code
|
|
// recalc_sigpending()
|
|
add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
|
|
|
|
add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
|
|
;;
|
|
ld4 r17=[r17] // r17 <- current->signal->group_stop_count
|
|
(p7) mov r8=0 // clear error code
|
|
|
|
ld8 r19=[r19] // r19 <- current->signal->shared_pending
|
|
;;
|
|
cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
|
|
(p8) mov r8=0 // clear error code
|
|
|
|
or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
|
|
;;
|
|
// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
|
|
andcm r18=r18,r14
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
|
|
(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
|
|
mov r19=0 // i must not leak kernel bits...
|
|
(p6) br.cond.dpnt.many .sig_pending
|
|
;;
|
|
|
|
1: ld4 r17=[r9] // r17 <- current->thread_info->flags
|
|
;;
|
|
mov ar.ccv=r17
|
|
and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
|
|
;;
|
|
|
|
st8 [r2]=r14 // update current->blocked with new mask
|
|
cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
|
|
;;
|
|
cmp.ne p6,p0=r17,r8 // update failed?
|
|
(p6) br.cond.spnt.few 1b // yes -> retry
|
|
|
|
#ifdef CONFIG_SMP
|
|
st4.rel [r31]=r0 // release the lock
|
|
#endif
|
|
ssm psr.i
|
|
;;
|
|
|
|
srlz.d // ensure psr.i is set again
|
|
mov r18=0 // i must not leak kernel bits...
|
|
|
|
.store_mask:
|
|
EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
|
|
EX(.fail_efault, (p15) st8 [r34]=r3)
|
|
mov r2=0 // i must not leak kernel bits...
|
|
mov r3=0 // i must not leak kernel bits...
|
|
mov r8=0 // return 0
|
|
mov r9=0 // i must not leak kernel bits...
|
|
mov r14=0 // i must not leak kernel bits...
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r31=0 // i must not leak kernel bits...
|
|
FSYS_RETURN
|
|
|
|
.sig_pending:
|
|
#ifdef CONFIG_SMP
|
|
st4.rel [r31]=r0 // release the lock
|
|
#endif
|
|
ssm psr.i
|
|
;;
|
|
srlz.d
|
|
br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
|
|
|
|
#ifdef CONFIG_SMP
|
|
.lock_contention:
|
|
/* Rather than spinning here, fall back on doing a heavy-weight syscall. */
|
|
ssm psr.i
|
|
;;
|
|
srlz.d
|
|
br.sptk.many fsys_fallback_syscall
|
|
#endif
|
|
END(fsys_rt_sigprocmask)
|
|
|
|
/*
|
|
* fsys_getcpu doesn't use the third parameter in this implementation. It reads
|
|
* current_thread_info()->cpu and corresponding node in cpu_to_node_map.
|
|
*/
|
|
ENTRY(fsys_getcpu)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
;;
|
|
add r2=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
tnat.nz p6,p0 = r32 // guard against NaT argument
|
|
add r3=TI_CPU+IA64_TASK_SIZE,r16
|
|
;;
|
|
ld4 r3=[r3] // M r3 = thread_info->cpu
|
|
ld4 r2=[r2] // M r2 = thread_info->flags
|
|
(p6) br.cond.spnt.few .fail_einval // B
|
|
;;
|
|
tnat.nz p7,p0 = r33 // I guard against NaT argument
|
|
(p7) br.cond.spnt.few .fail_einval // B
|
|
#ifdef CONFIG_NUMA
|
|
movl r17=cpu_to_node_map
|
|
;;
|
|
EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
|
|
EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
|
|
shladd r18=r3,1,r17
|
|
;;
|
|
ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
|
|
and r2 = TIF_ALLWORK_MASK,r2
|
|
;;
|
|
cmp.ne p8,p0=0,r2
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
;;
|
|
EX(.fail_efault, st4 [r32] = r3)
|
|
EX(.fail_efault, st2 [r33] = r20)
|
|
mov r8=0
|
|
;;
|
|
#else
|
|
EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
|
|
EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
|
|
and r2 = TIF_ALLWORK_MASK,r2
|
|
;;
|
|
cmp.ne p8,p0=0,r2
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
EX(.fail_efault, st4 [r32] = r3)
|
|
EX(.fail_efault, st2 [r33] = r0)
|
|
mov r8=0
|
|
;;
|
|
#endif
|
|
FSYS_RETURN
|
|
END(fsys_getcpu)
|
|
|
|
ENTRY(fsys_fallback_syscall)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
/*
|
|
* We only get here from light-weight syscall handlers. Thus, we already
|
|
* know that r15 contains a valid syscall number. No need to re-check.
|
|
*/
|
|
adds r17=-1024,r15
|
|
movl r14=sys_call_table
|
|
;;
|
|
rsm psr.i
|
|
shladd r18=r17,3,r14
|
|
;;
|
|
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
|
|
mov r29=psr // read psr (12 cyc load latency)
|
|
mov r27=ar.rsc
|
|
mov r21=ar.fpsr
|
|
mov r26=ar.pfs
|
|
END(fsys_fallback_syscall)
|
|
/* FALL THROUGH */
|
|
GLOBAL_ENTRY(fsys_bubble_down)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
/*
|
|
* We get here for syscalls that don't have a lightweight
|
|
* handler. For those, we need to bubble down into the kernel
|
|
* and that requires setting up a minimal pt_regs structure,
|
|
* and initializing the CPU state more or less as if an
|
|
* interruption had occurred. To make syscall-restarts work,
|
|
* we setup pt_regs such that cr_iip points to the second
|
|
* instruction in syscall_via_break. Decrementing the IP
|
|
* hence will restart the syscall via break and not
|
|
* decrementing IP will return us to the caller, as usual.
|
|
* Note that we preserve the value of psr.pp rather than
|
|
* initializing it from dcr.pp. This makes it possible to
|
|
* distinguish fsyscall execution from other privileged
|
|
* execution.
|
|
*
|
|
* On entry:
|
|
* - normal fsyscall handler register usage, except
|
|
* that we also have:
|
|
* - r18: address of syscall entry point
|
|
* - r21: ar.fpsr
|
|
* - r26: ar.pfs
|
|
* - r27: ar.rsc
|
|
* - r29: psr
|
|
*
|
|
* We used to clear some PSR bits here but that requires slow
|
|
* serialization. Fortuntely, that isn't really necessary.
|
|
* The rationale is as follows: we used to clear bits
|
|
* ~PSR_PRESERVED_BITS in PSR.L. Since
|
|
* PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
|
|
* ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
|
|
* However,
|
|
*
|
|
* PSR.BE : already is turned off in __kernel_syscall_via_epc()
|
|
* PSR.AC : don't care (kernel normally turns PSR.AC on)
|
|
* PSR.I : already turned off by the time fsys_bubble_down gets
|
|
* invoked
|
|
* PSR.DFL: always 0 (kernel never turns it on)
|
|
* PSR.DFH: don't care --- kernel never touches f32-f127 on its own
|
|
* initiative
|
|
* PSR.DI : always 0 (kernel never turns it on)
|
|
* PSR.SI : always 0 (kernel never turns it on)
|
|
* PSR.DB : don't care --- kernel never enables kernel-level
|
|
* breakpoints
|
|
* PSR.TB : must be 0 already; if it wasn't zero on entry to
|
|
* __kernel_syscall_via_epc, the branch to fsys_bubble_down
|
|
* will trigger a taken branch; the taken-trap-handler then
|
|
* converts the syscall into a break-based system-call.
|
|
*/
|
|
/*
|
|
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
|
|
* The rest we have to synthesize.
|
|
*/
|
|
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
|
|
| (0x1 << IA64_PSR_RI_BIT) \
|
|
| IA64_PSR_BN | IA64_PSR_I)
|
|
|
|
invala // M0|1
|
|
movl r14=ia64_ret_from_syscall // X
|
|
|
|
nop.m 0
|
|
movl r28=__kernel_syscall_via_break // X create cr.iip
|
|
;;
|
|
|
|
mov r2=r16 // A get task addr to addl-addressable register
|
|
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
|
|
mov r31=pr // I0 save pr (2 cyc)
|
|
;;
|
|
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
|
|
addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
|
|
add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
|
|
;;
|
|
ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
|
|
lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
|
|
nop.i 0
|
|
;;
|
|
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
|
mov.m r30=ar.itc // M get cycle for accounting
|
|
#else
|
|
nop.m 0
|
|
#endif
|
|
nop.i 0
|
|
;;
|
|
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
|
|
mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
|
|
nop.i 0
|
|
;;
|
|
mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
|
|
movl r8=PSR_ONE_BITS // X
|
|
;;
|
|
mov r25=ar.unat // M2 (5 cyc) save ar.unat
|
|
mov r19=b6 // I0 save b6 (2 cyc)
|
|
mov r20=r1 // A save caller's gp in r20
|
|
;;
|
|
or r29=r8,r29 // A construct cr.ipsr value to save
|
|
mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
|
|
addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
|
|
|
|
mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
|
|
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
|
|
br.call.sptk.many b7=ia64_syscall_setup // B
|
|
;;
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
|
// mov.m r30=ar.itc is called in advance
|
|
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
|
|
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
|
|
;;
|
|
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
|
|
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
|
|
;;
|
|
ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
|
|
ld8 r21=[r17] // cumulated utime
|
|
sub r22=r19,r18 // stime before leave kernel
|
|
;;
|
|
st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
|
|
sub r18=r30,r19 // elapsed time in user mode
|
|
;;
|
|
add r20=r20,r22 // sum stime
|
|
add r21=r21,r18 // sum utime
|
|
;;
|
|
st8 [r16]=r20 // update stime
|
|
st8 [r17]=r21 // update utime
|
|
;;
|
|
#endif
|
|
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
|
|
mov rp=r14 // I0 set the real return addr
|
|
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
|
|
;;
|
|
ssm psr.i // M2 we're on kernel stacks now, reenable irqs
|
|
cmp.eq p8,p0=r3,r0 // A
|
|
(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
|
|
|
|
nop.m 0
|
|
(p8) br.call.sptk.many b6=b6 // B (ignore return address)
|
|
br.cond.spnt ia64_trace_syscall // B
|
|
END(fsys_bubble_down)
|
|
|
|
.rodata
|
|
.align 8
|
|
.globl fsyscall_table
|
|
|
|
data8 fsys_bubble_down
|
|
fsyscall_table:
|
|
data8 fsys_ni_syscall
|
|
data8 0 // exit // 1025
|
|
data8 0 // read
|
|
data8 0 // write
|
|
data8 0 // open
|
|
data8 0 // close
|
|
data8 0 // creat // 1030
|
|
data8 0 // link
|
|
data8 0 // unlink
|
|
data8 0 // execve
|
|
data8 0 // chdir
|
|
data8 0 // fchdir // 1035
|
|
data8 0 // utimes
|
|
data8 0 // mknod
|
|
data8 0 // chmod
|
|
data8 0 // chown
|
|
data8 0 // lseek // 1040
|
|
data8 fsys_getpid // getpid
|
|
data8 fsys_getppid // getppid
|
|
data8 0 // mount
|
|
data8 0 // umount
|
|
data8 0 // setuid // 1045
|
|
data8 0 // getuid
|
|
data8 0 // geteuid
|
|
data8 0 // ptrace
|
|
data8 0 // access
|
|
data8 0 // sync // 1050
|
|
data8 0 // fsync
|
|
data8 0 // fdatasync
|
|
data8 0 // kill
|
|
data8 0 // rename
|
|
data8 0 // mkdir // 1055
|
|
data8 0 // rmdir
|
|
data8 0 // dup
|
|
data8 0 // pipe
|
|
data8 0 // times
|
|
data8 0 // brk // 1060
|
|
data8 0 // setgid
|
|
data8 0 // getgid
|
|
data8 0 // getegid
|
|
data8 0 // acct
|
|
data8 0 // ioctl // 1065
|
|
data8 0 // fcntl
|
|
data8 0 // umask
|
|
data8 0 // chroot
|
|
data8 0 // ustat
|
|
data8 0 // dup2 // 1070
|
|
data8 0 // setreuid
|
|
data8 0 // setregid
|
|
data8 0 // getresuid
|
|
data8 0 // setresuid
|
|
data8 0 // getresgid // 1075
|
|
data8 0 // setresgid
|
|
data8 0 // getgroups
|
|
data8 0 // setgroups
|
|
data8 0 // getpgid
|
|
data8 0 // setpgid // 1080
|
|
data8 0 // setsid
|
|
data8 0 // getsid
|
|
data8 0 // sethostname
|
|
data8 0 // setrlimit
|
|
data8 0 // getrlimit // 1085
|
|
data8 0 // getrusage
|
|
data8 fsys_gettimeofday // gettimeofday
|
|
data8 0 // settimeofday
|
|
data8 0 // select
|
|
data8 0 // poll // 1090
|
|
data8 0 // symlink
|
|
data8 0 // readlink
|
|
data8 0 // uselib
|
|
data8 0 // swapon
|
|
data8 0 // swapoff // 1095
|
|
data8 0 // reboot
|
|
data8 0 // truncate
|
|
data8 0 // ftruncate
|
|
data8 0 // fchmod
|
|
data8 0 // fchown // 1100
|
|
data8 0 // getpriority
|
|
data8 0 // setpriority
|
|
data8 0 // statfs
|
|
data8 0 // fstatfs
|
|
data8 0 // gettid // 1105
|
|
data8 0 // semget
|
|
data8 0 // semop
|
|
data8 0 // semctl
|
|
data8 0 // msgget
|
|
data8 0 // msgsnd // 1110
|
|
data8 0 // msgrcv
|
|
data8 0 // msgctl
|
|
data8 0 // shmget
|
|
data8 0 // shmat
|
|
data8 0 // shmdt // 1115
|
|
data8 0 // shmctl
|
|
data8 0 // syslog
|
|
data8 0 // setitimer
|
|
data8 0 // getitimer
|
|
data8 0 // 1120
|
|
data8 0
|
|
data8 0
|
|
data8 0 // vhangup
|
|
data8 0 // lchown
|
|
data8 0 // remap_file_pages // 1125
|
|
data8 0 // wait4
|
|
data8 0 // sysinfo
|
|
data8 0 // clone
|
|
data8 0 // setdomainname
|
|
data8 0 // newuname // 1130
|
|
data8 0 // adjtimex
|
|
data8 0
|
|
data8 0 // init_module
|
|
data8 0 // delete_module
|
|
data8 0 // 1135
|
|
data8 0
|
|
data8 0 // quotactl
|
|
data8 0 // bdflush
|
|
data8 0 // sysfs
|
|
data8 0 // personality // 1140
|
|
data8 0 // afs_syscall
|
|
data8 0 // setfsuid
|
|
data8 0 // setfsgid
|
|
data8 0 // getdents
|
|
data8 0 // flock // 1145
|
|
data8 0 // readv
|
|
data8 0 // writev
|
|
data8 0 // pread64
|
|
data8 0 // pwrite64
|
|
data8 0 // sysctl // 1150
|
|
data8 0 // mmap
|
|
data8 0 // munmap
|
|
data8 0 // mlock
|
|
data8 0 // mlockall
|
|
data8 0 // mprotect // 1155
|
|
data8 0 // mremap
|
|
data8 0 // msync
|
|
data8 0 // munlock
|
|
data8 0 // munlockall
|
|
data8 0 // sched_getparam // 1160
|
|
data8 0 // sched_setparam
|
|
data8 0 // sched_getscheduler
|
|
data8 0 // sched_setscheduler
|
|
data8 0 // sched_yield
|
|
data8 0 // sched_get_priority_max // 1165
|
|
data8 0 // sched_get_priority_min
|
|
data8 0 // sched_rr_get_interval
|
|
data8 0 // nanosleep
|
|
data8 0 // nfsservctl
|
|
data8 0 // prctl // 1170
|
|
data8 0 // getpagesize
|
|
data8 0 // mmap2
|
|
data8 0 // pciconfig_read
|
|
data8 0 // pciconfig_write
|
|
data8 0 // perfmonctl // 1175
|
|
data8 0 // sigaltstack
|
|
data8 0 // rt_sigaction
|
|
data8 0 // rt_sigpending
|
|
data8 fsys_rt_sigprocmask // rt_sigprocmask
|
|
data8 0 // rt_sigqueueinfo // 1180
|
|
data8 0 // rt_sigreturn
|
|
data8 0 // rt_sigsuspend
|
|
data8 0 // rt_sigtimedwait
|
|
data8 0 // getcwd
|
|
data8 0 // capget // 1185
|
|
data8 0 // capset
|
|
data8 0 // sendfile
|
|
data8 0
|
|
data8 0
|
|
data8 0 // socket // 1190
|
|
data8 0 // bind
|
|
data8 0 // connect
|
|
data8 0 // listen
|
|
data8 0 // accept
|
|
data8 0 // getsockname // 1195
|
|
data8 0 // getpeername
|
|
data8 0 // socketpair
|
|
data8 0 // send
|
|
data8 0 // sendto
|
|
data8 0 // recv // 1200
|
|
data8 0 // recvfrom
|
|
data8 0 // shutdown
|
|
data8 0 // setsockopt
|
|
data8 0 // getsockopt
|
|
data8 0 // sendmsg // 1205
|
|
data8 0 // recvmsg
|
|
data8 0 // pivot_root
|
|
data8 0 // mincore
|
|
data8 0 // madvise
|
|
data8 0 // newstat // 1210
|
|
data8 0 // newlstat
|
|
data8 0 // newfstat
|
|
data8 0 // clone2
|
|
data8 0 // getdents64
|
|
data8 0 // getunwind // 1215
|
|
data8 0 // readahead
|
|
data8 0 // setxattr
|
|
data8 0 // lsetxattr
|
|
data8 0 // fsetxattr
|
|
data8 0 // getxattr // 1220
|
|
data8 0 // lgetxattr
|
|
data8 0 // fgetxattr
|
|
data8 0 // listxattr
|
|
data8 0 // llistxattr
|
|
data8 0 // flistxattr // 1225
|
|
data8 0 // removexattr
|
|
data8 0 // lremovexattr
|
|
data8 0 // fremovexattr
|
|
data8 0 // tkill
|
|
data8 0 // futex // 1230
|
|
data8 0 // sched_setaffinity
|
|
data8 0 // sched_getaffinity
|
|
data8 fsys_set_tid_address // set_tid_address
|
|
data8 0 // fadvise64_64
|
|
data8 0 // tgkill // 1235
|
|
data8 0 // exit_group
|
|
data8 0 // lookup_dcookie
|
|
data8 0 // io_setup
|
|
data8 0 // io_destroy
|
|
data8 0 // io_getevents // 1240
|
|
data8 0 // io_submit
|
|
data8 0 // io_cancel
|
|
data8 0 // epoll_create
|
|
data8 0 // epoll_ctl
|
|
data8 0 // epoll_wait // 1245
|
|
data8 0 // restart_syscall
|
|
data8 0 // semtimedop
|
|
data8 0 // timer_create
|
|
data8 0 // timer_settime
|
|
data8 0 // timer_gettime // 1250
|
|
data8 0 // timer_getoverrun
|
|
data8 0 // timer_delete
|
|
data8 0 // clock_settime
|
|
data8 fsys_clock_gettime // clock_gettime
|
|
data8 0 // clock_getres // 1255
|
|
data8 0 // clock_nanosleep
|
|
data8 0 // fstatfs64
|
|
data8 0 // statfs64
|
|
data8 0 // mbind
|
|
data8 0 // get_mempolicy // 1260
|
|
data8 0 // set_mempolicy
|
|
data8 0 // mq_open
|
|
data8 0 // mq_unlink
|
|
data8 0 // mq_timedsend
|
|
data8 0 // mq_timedreceive // 1265
|
|
data8 0 // mq_notify
|
|
data8 0 // mq_getsetattr
|
|
data8 0 // kexec_load
|
|
data8 0 // vserver
|
|
data8 0 // waitid // 1270
|
|
data8 0 // add_key
|
|
data8 0 // request_key
|
|
data8 0 // keyctl
|
|
data8 0 // ioprio_set
|
|
data8 0 // ioprio_get // 1275
|
|
data8 0 // move_pages
|
|
data8 0 // inotify_init
|
|
data8 0 // inotify_add_watch
|
|
data8 0 // inotify_rm_watch
|
|
data8 0 // migrate_pages // 1280
|
|
data8 0 // openat
|
|
data8 0 // mkdirat
|
|
data8 0 // mknodat
|
|
data8 0 // fchownat
|
|
data8 0 // futimesat // 1285
|
|
data8 0 // newfstatat
|
|
data8 0 // unlinkat
|
|
data8 0 // renameat
|
|
data8 0 // linkat
|
|
data8 0 // symlinkat // 1290
|
|
data8 0 // readlinkat
|
|
data8 0 // fchmodat
|
|
data8 0 // faccessat
|
|
data8 0
|
|
data8 0 // 1295
|
|
data8 0 // unshare
|
|
data8 0 // splice
|
|
data8 0 // set_robust_list
|
|
data8 0 // get_robust_list
|
|
data8 0 // sync_file_range // 1300
|
|
data8 0 // tee
|
|
data8 0 // vmsplice
|
|
data8 0
|
|
data8 fsys_getcpu // getcpu // 1304
|
|
|
|
// fill in zeros for the remaining entries
|
|
.zero:
|
|
.space fsyscall_table + 8*NR_syscalls - .zero, 0
|