linux_dsm_epyc7002/Documentation/vDSO/parse_vdso.c
Andy Lutomirski 98eedc3a9d Document the vDSO and add a reference parser
It turns out that parsing the vDSO is nontrivial if you don't already
have an ELF dynamic loader around.  So document it in Documentation/ABI
and add a reference CC0-licenced parser.

This code is dedicated to Go issue 1933:
http://code.google.com/p/go/issues/detail?id=1933

Signed-off-by: Andy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/a315a9514cd71bcf29436cc31e35aada21a5ff21.1310563276.git.luto@mit.edu
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-07-14 17:57:09 -07:00

257 lines
6.6 KiB
C

/*
* parse_vdso.c: Linux reference vDSO parser
* Written by Andrew Lutomirski, 2011.
*
* This code is meant to be linked in to various programs that run on Linux.
* As such, it is available with as few restrictions as possible. This file
* is licensed under the Creative Commons Zero License, version 1.0,
* available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
*
* The vDSO is a regular ELF DSO that the kernel maps into user space when
* it starts a program. It works equally well in statically and dynamically
* linked binaries.
*
* This code is tested on x86_64. In principle it should work on any 64-bit
* architecture that has a vDSO.
*/
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <elf.h>
/*
* To use this vDSO parser, first call one of the vdso_init_* functions.
* If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
* to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
* Then call vdso_sym for each symbol you want. For example, to look up
* gettimeofday on x86_64, use:
*
* <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
* or
* <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
*
* vdso_sym will return 0 if the symbol doesn't exist or if the init function
* failed or was not called. vdso_sym is a little slow, so its return value
* should be cached.
*
* vdso_sym is threadsafe; the init functions are not.
*
* These are the prototypes:
*/
extern void vdso_init_from_auxv(void *auxv);
extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
extern void *vdso_sym(const char *version, const char *name);
/* And here's the code. */
#ifndef __x86_64__
# error Not yet ported to non-x86_64 architectures
#endif
static struct vdso_info
{
bool valid;
/* Load information */
uintptr_t load_addr;
uintptr_t load_offset; /* load_addr - recorded vaddr */
/* Symbol table */
Elf64_Sym *symtab;
const char *symstrings;
Elf64_Word *bucket, *chain;
Elf64_Word nbucket, nchain;
/* Version table */
Elf64_Versym *versym;
Elf64_Verdef *verdef;
} vdso_info;
/* Straight from the ELF specification. */
static unsigned long elf_hash(const unsigned char *name)
{
unsigned long h = 0, g;
while (*name)
{
h = (h << 4) + *name++;
if (g = h & 0xf0000000)
h ^= g >> 24;
h &= ~g;
}
return h;
}
void vdso_init_from_sysinfo_ehdr(uintptr_t base)
{
size_t i;
bool found_vaddr = false;
vdso_info.valid = false;
vdso_info.load_addr = base;
Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
Elf64_Dyn *dyn = 0;
/*
* We need two things from the segment table: the load offset
* and the dynamic table.
*/
for (i = 0; i < hdr->e_phnum; i++)
{
if (pt[i].p_type == PT_LOAD && !found_vaddr) {
found_vaddr = true;
vdso_info.load_offset = base
+ (uintptr_t)pt[i].p_offset
- (uintptr_t)pt[i].p_vaddr;
} else if (pt[i].p_type == PT_DYNAMIC) {
dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
}
}
if (!found_vaddr || !dyn)
return; /* Failed */
/*
* Fish out the useful bits of the dynamic table.
*/
Elf64_Word *hash = 0;
vdso_info.symstrings = 0;
vdso_info.symtab = 0;
vdso_info.versym = 0;
vdso_info.verdef = 0;
for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
switch (dyn[i].d_tag) {
case DT_STRTAB:
vdso_info.symstrings = (const char *)
((uintptr_t)dyn[i].d_un.d_ptr
+ vdso_info.load_offset);
break;
case DT_SYMTAB:
vdso_info.symtab = (Elf64_Sym *)
((uintptr_t)dyn[i].d_un.d_ptr
+ vdso_info.load_offset);
break;
case DT_HASH:
hash = (Elf64_Word *)
((uintptr_t)dyn[i].d_un.d_ptr
+ vdso_info.load_offset);
break;
case DT_VERSYM:
vdso_info.versym = (Elf64_Versym *)
((uintptr_t)dyn[i].d_un.d_ptr
+ vdso_info.load_offset);
break;
case DT_VERDEF:
vdso_info.verdef = (Elf64_Verdef *)
((uintptr_t)dyn[i].d_un.d_ptr
+ vdso_info.load_offset);
break;
}
}
if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
return; /* Failed */
if (!vdso_info.verdef)
vdso_info.versym = 0;
/* Parse the hash table header. */
vdso_info.nbucket = hash[0];
vdso_info.nchain = hash[1];
vdso_info.bucket = &hash[2];
vdso_info.chain = &hash[vdso_info.nbucket + 2];
/* That's all we need. */
vdso_info.valid = true;
}
static bool vdso_match_version(Elf64_Versym ver,
const char *name, Elf64_Word hash)
{
/*
* This is a helper function to check if the version indexed by
* ver matches name (which hashes to hash).
*
* The version definition table is a mess, and I don't know how
* to do this in better than linear time without allocating memory
* to build an index. I also don't know why the table has
* variable size entries in the first place.
*
* For added fun, I can't find a comprehensible specification of how
* to parse all the weird flags in the table.
*
* So I just parse the whole table every time.
*/
/* First step: find the version definition */
ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
Elf64_Verdef *def = vdso_info.verdef;
while(true) {
if ((def->vd_flags & VER_FLG_BASE) == 0
&& (def->vd_ndx & 0x7fff) == ver)
break;
if (def->vd_next == 0)
return false; /* No definition. */
def = (Elf64_Verdef *)((char *)def + def->vd_next);
}
/* Now figure out whether it matches. */
Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
return def->vd_hash == hash
&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
}
void *vdso_sym(const char *version, const char *name)
{
unsigned long ver_hash;
if (!vdso_info.valid)
return 0;
ver_hash = elf_hash(version);
Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
Elf64_Sym *sym = &vdso_info.symtab[chain];
/* Check for a defined global or weak function w/ right name. */
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
continue;
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
continue;
if (sym->st_shndx == SHN_UNDEF)
continue;
if (strcmp(name, vdso_info.symstrings + sym->st_name))
continue;
/* Check symbol version. */
if (vdso_info.versym
&& !vdso_match_version(vdso_info.versym[chain],
version, ver_hash))
continue;
return (void *)(vdso_info.load_offset + sym->st_value);
}
return 0;
}
void vdso_init_from_auxv(void *auxv)
{
Elf64_auxv_t *elf_auxv = auxv;
for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
{
if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
return;
}
}
vdso_info.valid = false;
}