printk changes for 5.10

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEESH4wyp42V4tXvYsjUqAMR0iAlPIFAl+EN+oACgkQUqAMR0iA
 lPK/gA//WXBjC4FSPNr0j7kPFKQhADS3cUcp+GfuI4rYkYcJHV0yJn1kvctg1rUC
 Je+Hc+Hy5Nk93lwejj5BvQoc31zOeoPDyMje5zi5te4H2NQkaoGXHOMvUnaLcNeo
 g+HJvx+NU9MDjuc5amtK8YD69jzErD+eqrHpQOg4UToMXXcBXLafTThIi9vT1fzP
 9uwWBRlpdQyY7tYbbwFiDuu33PyoWlc6Ksp8qKdLBLz2AmGd1Rvaq+ePsq8b9tHJ
 pfv1agW0GTpzoN2pm5gFXOoYniHB/ooB1L0QLq7ylaociEyb8WbTtkn4v++EjxW8
 aGsO1WdO0MQeIWDxXQR5DYD3s+Me2DMhFPDqUc2/s0q2SGWUPFcsmCsvMAOx/clA
 HDfTWkyzB4FarZOTv0gZ7jYNOVukFzUQ1IBTtWpJifC9fT0xrRkKmKE1UgmWv0ei
 Hx5VFQyQGsDh3sUcRLhW91p4sqJCs7l01zw1A/0rb7a+QTHAqZRtbz5hyTjlViiT
 57XiyXynXW8N4Q5U6uAxCbkFFi+nP/XVQ5ggZ/QLn/4hfWWUcu0vt2bOGkRwryAT
 zYmDqViraEVWKIom74UzZ0nrIBtdhvtbFQIYuyiCQKpKMwytWXUQbUASZL2mfBZi
 h5eJx7etV6f5to5mNRsj8bbN5buX9UheEd0QFD9NJdS6aadqTac=
 =9vEl
 -----END PGP SIGNATURE-----

Merge tag 'printk-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux

Pull printk updates from Petr Mladek:
 "The big new thing is the fully lockless ringbuffer implementation,
  including the support for continuous lines. It will allow to store and
  read messages in any situation wihtout the risk of deadlocks and
  without the need of temporary per-CPU buffers.

  The access is still serialized by logbuf_lock. It synchronizes few
  more operations, for example, temporary buffer for formatting the
  message, syslog and kmsg_dump operations. The lock removal is being
  discussed and should be ready for the next release.

  The continuous lines are handled exactly the same way as before to
  avoid regressions in user space. It means that they are appended to
  the last message when the caller is the same. Only the last message
  can be extended.

  The data ring includes plain text of the messages. Except for an
  integer at the beginning of each message that points back to the
  descriptor ring with other metadata.

  The dictionary has to stay. journalctl uses it to filter the log. It
  allows to show messages related to a given device. The dictionary
  values are stored in the descriptor ring with the other metadata.

  This is the first part of the printk rework as discussed at Plumbers
  2019, see https://lore.kernel.org/r/87k1acz5rx.fsf@linutronix.de. The
  next big step will be handling consoles by kthreads during the normal
  system operation. It will require special handling of situations when
  the kthreads could not get scheduled, for example, early boot,
  suspend, panic.

  Other changes:

   - Add John Ogness as a reviewer for printk subsystem. He is author of
     the rework and is familiar with the code and history.

   - Fix locking in serial8250_do_startup() to prevent lockdep report.

   - Few code cleanups"

* tag 'printk-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux: (27 commits)
  printk: Use fallthrough pseudo-keyword
  printk: reduce setup_text_buf size to LOG_LINE_MAX
  printk: avoid and/or handle record truncation
  printk: remove dict ring
  printk: move dictionary keys to dev_printk_info
  printk: move printk_info into separate array
  printk: reimplement log_cont using record extension
  printk: ringbuffer: add finalization/extension support
  printk: ringbuffer: change representation of states
  printk: ringbuffer: clear initial reserved fields
  printk: ringbuffer: add BLK_DATALESS() macro
  printk: ringbuffer: relocate get_data()
  printk: ringbuffer: avoid memcpy() on state_var
  printk: ringbuffer: fix setting state in desc_read()
  kernel.h: Move oops_in_progress to printk.h
  scripts/gdb: update for lockless printk ringbuffer
  scripts/gdb: add utils.read_ulong()
  docs: vmcoreinfo: add lockless printk ringbuffer vmcoreinfo
  printk: reduce LOG_BUF_SHIFT range for H8300
  printk: ringbuffer: support dataless records
  ...
This commit is contained in:
Linus Torvalds 2020-10-13 15:58:10 -07:00
commit d594d8f411
18 changed files with 3416 additions and 737 deletions

View File

@ -170,57 +170,82 @@ document trapinfo
address the kernel panicked. address the kernel panicked.
end end
define dump_log_idx define dump_record
set $idx = $arg0 set var $desc = $arg0
if ($argc > 1) set var $info = $arg1
set $prev_flags = $arg1 if ($argc > 2)
set var $prev_flags = $arg2
else else
set $prev_flags = 0 set var $prev_flags = 0
end
set $msg = ((struct printk_log *) (log_buf + $idx))
set $prefix = 1
set $newline = 1
set $log = log_buf + $idx + sizeof(*$msg)
# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
if (($prev_flags & 8) && !($msg->flags & 4))
set $prefix = 0
end end
# msg->flags & LOG_CONT set var $prefix = 1
if ($msg->flags & 8) set var $newline = 1
set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits)
set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits)
# handle data-less record
if ($begin & 1)
set var $text_len = 0
set var $log = ""
else
# handle wrapping data block
if ($begin > $next)
set var $begin = 0
end
# skip over descriptor id
set var $begin = $begin + sizeof(long)
# handle truncated message
if ($next - $begin < $info->text_len)
set var $text_len = $next - $begin
else
set var $text_len = $info->text_len
end
set var $log = &prb->text_data_ring.data[$begin]
end
# prev & LOG_CONT && !(info->flags & LOG_PREIX)
if (($prev_flags & 8) && !($info->flags & 4))
set var $prefix = 0
end
# info->flags & LOG_CONT
if ($info->flags & 8)
# (prev & LOG_CONT && !(prev & LOG_NEWLINE)) # (prev & LOG_CONT && !(prev & LOG_NEWLINE))
if (($prev_flags & 8) && !($prev_flags & 2)) if (($prev_flags & 8) && !($prev_flags & 2))
set $prefix = 0 set var $prefix = 0
end end
# (!(msg->flags & LOG_NEWLINE)) # (!(info->flags & LOG_NEWLINE))
if (!($msg->flags & 2)) if (!($info->flags & 2))
set $newline = 0 set var $newline = 0
end end
end end
if ($prefix) if ($prefix)
printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000
end end
if ($msg->text_len != 0) if ($text_len)
eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len eval "printf \"%%%d.%ds\", $log", $text_len, $text_len
end end
if ($newline) if ($newline)
printf "\n" printf "\n"
end end
if ($msg->dict_len > 0)
set $dict = $log + $msg->text_len # handle dictionary data
set $idx = 0
set $line = 1 set var $dict = &$info->dev_info.subsystem[0]
while ($idx < $msg->dict_len) set var $dict_len = sizeof($info->dev_info.subsystem)
if ($line) if ($dict[0] != '\0')
printf " " printf " SUBSYSTEM="
set $line = 0 set var $idx = 0
end while ($idx < $dict_len)
set $c = $dict[$idx] set var $c = $dict[$idx]
if ($c == '\0') if ($c == '\0')
printf "\n" loop_break
set $line = 1
else else
if ($c < ' ' || $c >= 127 || $c == '\\') if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c printf "\\x%02x", $c
@ -228,33 +253,67 @@ define dump_log_idx
printf "%c", $c printf "%c", $c
end end
end end
set $idx = $idx + 1 set var $idx = $idx + 1
end
printf "\n"
end
set var $dict = &$info->dev_info.device[0]
set var $dict_len = sizeof($info->dev_info.device)
if ($dict[0] != '\0')
printf " DEVICE="
set var $idx = 0
while ($idx < $dict_len)
set var $c = $dict[$idx]
if ($c == '\0')
loop_break
else
if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c
else
printf "%c", $c
end
end
set var $idx = $idx + 1
end end
printf "\n" printf "\n"
end end
end end
document dump_log_idx document dump_record
Dump a single log given its index in the log buffer. The first Dump a single record. The first parameter is the descriptor,
parameter is the index into log_buf, the second is optional and the second parameter is the info, the third parameter is
specified the previous log buffer's flags, used for properly optional and specifies the previous record's flags, used for
formatting continued lines. properly formatting continued lines.
end end
define dmesg define dmesg
set $i = log_first_idx # definitions from kernel/printk/printk_ringbuffer.h
set $end_idx = log_first_idx set var $desc_committed = 1
set $prev_flags = 0 set var $desc_finalized = 2
set var $desc_sv_bits = sizeof(long) * 8
set var $desc_flags_shift = $desc_sv_bits - 2
set var $desc_flags_mask = 3 << $desc_flags_shift
set var $id_mask = ~$desc_flags_mask
set var $desc_count = 1U << prb->desc_ring.count_bits
set var $prev_flags = 0
set var $id = prb->desc_ring.tail_id.counter
set var $end_id = prb->desc_ring.head_id.counter
while (1) while (1)
set $msg = ((struct printk_log *) (log_buf + $i)) set var $desc = &prb->desc_ring.descs[$id % $desc_count]
if ($msg->len == 0) set var $info = &prb->desc_ring.infos[$id % $desc_count]
set $i = 0
else # skip non-committed record
dump_log_idx $i $prev_flags set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift)
set $i = $i + $msg->len if ($state == $desc_committed || $state == $desc_finalized)
set $prev_flags = $msg->flags dump_record $desc $info $prev_flags
set var $prev_flags = $info->flags
end end
if ($i == $end_idx)
set var $id = ($id + 1) & $id_mask
if ($id == $end_id)
loop_break loop_break
end end
end end

View File

@ -189,50 +189,123 @@ from this.
Free areas descriptor. User-space tools use this value to iterate the Free areas descriptor. User-space tools use this value to iterate the
free_area ranges. MAX_ORDER is used by the zone buddy allocator. free_area ranges. MAX_ORDER is used by the zone buddy allocator.
log_first_idx prb
------------- ---
Index of the first record stored in the buffer log_buf. Used by A pointer to the printk ringbuffer (struct printk_ringbuffer). This
user-space tools to read the strings in the log_buf. may be pointing to the static boot ringbuffer or the dynamically
allocated ringbuffer, depending on when the the core dump occurred.
Used by user-space tools to read the active kernel log buffer.
log_buf printk_rb_static
------- ----------------
Console output is written to the ring buffer log_buf at index A pointer to the static boot printk ringbuffer. If @prb has a
log_first_idx. Used to get the kernel log. different value, this is useful for viewing the initial boot messages,
which may have been overwritten in the dynamically allocated
ringbuffer.
log_buf_len clear_seq
-----------
log_buf's length.
clear_idx
--------- ---------
The index that the next printk() record to read after the last clear The sequence number of the printk() record after the last clear
command. It indicates the first record after the last SYSLOG_ACTION command. It indicates the first record after the last
_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space
the dmesg log. tools to dump a subset of the dmesg log.
log_next_idx printk_ringbuffer
------------ -----------------
The index of the next record to store in the buffer log_buf. Used to The size of a printk_ringbuffer structure. This structure contains all
compute the index of the current buffer position. information required for accessing the various components of the
kernel log buffer.
printk_log (printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail)
---------- -----------------------------------------------------------------
The size of a structure printk_log. Used to compute the size of Offsets for the various components of the printk ringbuffer. Used by
messages, and extract dmesg log. It encapsulates header information for user-space tools to view the kernel log buffer without requiring the
log_buf, such as timestamp, syslog level, etc. declaration of the structure.
(printk_log, ts_nsec|len|text_len|dict_len) prb_desc_ring
------------------------------------------- -------------
It represents field offsets in struct printk_log. User space tools The size of the prb_desc_ring structure. This structure contains
parse it and check whether the values of printk_log's members have been information about the set of record descriptors.
changed.
(prb_desc_ring, count_bits|descs|head_id|tail_id)
-------------------------------------------------
Offsets for the fields describing the set of record descriptors. Used
by user-space tools to be able to traverse the descriptors without
requiring the declaration of the structure.
prb_desc
--------
The size of the prb_desc structure. This structure contains
information about a single record descriptor.
(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos)
------------------------------------------------------
Offsets for the fields describing a record descriptors. Used by
user-space tools to be able to read descriptors without requiring
the declaration of the structure.
prb_data_blk_lpos
-----------------
The size of the prb_data_blk_lpos structure. This structure contains
information about where the text or dictionary data (data block) is
located within the respective data ring.
(prb_data_blk_lpos, begin|next)
-------------------------------
Offsets for the fields describing the location of a data block. Used
by user-space tools to be able to locate data blocks without
requiring the declaration of the structure.
printk_info
-----------
The size of the printk_info structure. This structure contains all
the meta-data for a record.
(printk_info, seq|ts_nsec|text_len|dict_len|caller_id)
------------------------------------------------------
Offsets for the fields providing the meta-data for a record. Used by
user-space tools to be able to read the information without requiring
the declaration of the structure.
prb_data_ring
-------------
The size of the prb_data_ring structure. This structure contains
information about a set of data blocks.
(prb_data_ring, size_bits|data|head_lpos|tail_lpos)
---------------------------------------------------
Offsets for the fields describing a set of data blocks. Used by
user-space tools to be able to access the data blocks without
requiring the declaration of the structure.
atomic_long_t
-------------
The size of the atomic_long_t structure. Used by user-space tools to
be able to copy the full structure, regardless of its
architecture-specific implementation.
(atomic_long_t, counter)
------------------------
Offset for the long value of an atomic_long_t variable. Used by
user-space tools to access the long value without requiring the
architecture-specific declaration.
(free_area.free_list, MIGRATE_TYPES) (free_area.free_list, MIGRATE_TYPES)
------------------------------------ ------------------------------------

View File

@ -13970,6 +13970,7 @@ PRINTK
M: Petr Mladek <pmladek@suse.com> M: Petr Mladek <pmladek@suse.com>
M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org> R: Steven Rostedt <rostedt@goodmis.org>
R: John Ogness <john.ogness@linutronix.de>
S: Maintained S: Maintained
F: include/linux/printk.h F: include/linux/printk.h
F: kernel/printk/ F: kernel/printk/

View File

@ -4061,22 +4061,21 @@ void device_shutdown(void)
*/ */
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
static int static void
create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
{ {
const char *subsys; const char *subsys;
size_t pos = 0;
memset(dev_info, 0, sizeof(*dev_info));
if (dev->class) if (dev->class)
subsys = dev->class->name; subsys = dev->class->name;
else if (dev->bus) else if (dev->bus)
subsys = dev->bus->name; subsys = dev->bus->name;
else else
return 0; return;
pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
if (pos >= hdrlen)
goto overflow;
/* /*
* Add device identifier DEVICE=: * Add device identifier DEVICE=:
@ -4092,41 +4091,28 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
c = 'b'; c = 'b';
else else
c = 'c'; c = 'c';
pos++;
pos += snprintf(hdr + pos, hdrlen - pos, snprintf(dev_info->device, sizeof(dev_info->device),
"DEVICE=%c%u:%u", "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
c, MAJOR(dev->devt), MINOR(dev->devt));
} else if (strcmp(subsys, "net") == 0) { } else if (strcmp(subsys, "net") == 0) {
struct net_device *net = to_net_dev(dev); struct net_device *net = to_net_dev(dev);
pos++; snprintf(dev_info->device, sizeof(dev_info->device),
pos += snprintf(hdr + pos, hdrlen - pos, "n%u", net->ifindex);
"DEVICE=n%u", net->ifindex);
} else { } else {
pos++; snprintf(dev_info->device, sizeof(dev_info->device),
pos += snprintf(hdr + pos, hdrlen - pos, "+%s:%s", subsys, dev_name(dev));
"DEVICE=+%s:%s", subsys, dev_name(dev));
} }
if (pos >= hdrlen)
goto overflow;
return pos;
overflow:
dev_WARN(dev, "device/subsystem name too long");
return 0;
} }
int dev_vprintk_emit(int level, const struct device *dev, int dev_vprintk_emit(int level, const struct device *dev,
const char *fmt, va_list args) const char *fmt, va_list args)
{ {
char hdr[128]; struct dev_printk_info dev_info;
size_t hdrlen;
hdrlen = create_syslog_header(dev, hdr, sizeof(hdr)); set_dev_info(dev, &dev_info);
return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args); return vprintk_emit(0, level, &dev_info, fmt, args);
} }
EXPORT_SYMBOL(dev_vprintk_emit); EXPORT_SYMBOL(dev_vprintk_emit);

View File

@ -55,6 +55,9 @@ phys_addr_t paddr_vmcoreinfo_note(void);
#define VMCOREINFO_OFFSET(name, field) \ #define VMCOREINFO_OFFSET(name, field) \
vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
(unsigned long)offsetof(struct name, field)) (unsigned long)offsetof(struct name, field))
#define VMCOREINFO_TYPE_OFFSET(name, field) \
vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
(unsigned long)offsetof(name, field))
#define VMCOREINFO_LENGTH(name, value) \ #define VMCOREINFO_LENGTH(name, value) \
vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
#define VMCOREINFO_NUMBER(name) \ #define VMCOREINFO_NUMBER(name) \

View File

@ -2,9 +2,9 @@
#ifndef __LINUX_DEBUG_LOCKING_H #ifndef __LINUX_DEBUG_LOCKING_H
#define __LINUX_DEBUG_LOCKING_H #define __LINUX_DEBUG_LOCKING_H
#include <linux/kernel.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/printk.h>
struct task_struct; struct task_struct;

View File

@ -21,6 +21,14 @@
struct device; struct device;
#define PRINTK_INFO_SUBSYSTEM_LEN 16
#define PRINTK_INFO_DEVICE_LEN 48
struct dev_printk_info {
char subsystem[PRINTK_INFO_SUBSYSTEM_LEN];
char device[PRINTK_INFO_DEVICE_LEN];
};
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
__printf(3, 0) __cold __printf(3, 0) __cold

View File

@ -526,7 +526,6 @@ extern unsigned int sysctl_oops_all_cpu_backtrace;
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
extern void bust_spinlocks(int yes); extern void bust_spinlocks(int yes);
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
extern int panic_timeout; extern int panic_timeout;
extern unsigned long panic_print; extern unsigned long panic_print;
extern int panic_on_oops; extern int panic_on_oops;

View File

@ -12,6 +12,8 @@
extern const char linux_banner[]; extern const char linux_banner[];
extern const char linux_proc_banner[]; extern const char linux_proc_banner[];
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
#define PRINTK_MAX_SINGLE_HEADER_LEN 2 #define PRINTK_MAX_SINGLE_HEADER_LEN 2
static inline int printk_get_level(const char *buffer) static inline int printk_get_level(const char *buffer)
@ -159,10 +161,12 @@ static inline void printk_nmi_direct_enter(void) { }
static inline void printk_nmi_direct_exit(void) { } static inline void printk_nmi_direct_exit(void) { }
#endif /* PRINTK_NMI */ #endif /* PRINTK_NMI */
struct dev_printk_info;
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
asmlinkage __printf(5, 0) asmlinkage __printf(4, 0)
int vprintk_emit(int facility, int level, int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen, const struct dev_printk_info *dev_info,
const char *fmt, va_list args); const char *fmt, va_list args);
asmlinkage __printf(1, 0) asmlinkage __printf(1, 0)

View File

@ -682,7 +682,8 @@ config IKHEADERS
config LOG_BUF_SHIFT config LOG_BUF_SHIFT
int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
range 12 25 range 12 25 if !H8300
range 12 19 if H8300
default 17 default 17
depends on PRINTK depends on PRINTK
help help

View File

@ -2,3 +2,4 @@
obj-y = printk.o obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_PRINTK) += printk_safe.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
obj-$(CONFIG_PRINTK) += printk_ringbuffer.o

View File

@ -14,9 +14,9 @@
extern raw_spinlock_t logbuf_lock; extern raw_spinlock_t logbuf_lock;
__printf(5, 0) __printf(4, 0)
int vprintk_store(int facility, int level, int vprintk_store(int facility, int level,
const char *dict, size_t dictlen, const struct dev_printk_info *dev_info,
const char *fmt, va_list args); const char *fmt, va_list args);
__printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_default(const char *fmt, va_list args);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,382 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _KERNEL_PRINTK_RINGBUFFER_H
#define _KERNEL_PRINTK_RINGBUFFER_H
#include <linux/atomic.h>
#include <linux/dev_printk.h>
/*
* Meta information about each stored message.
*
* All fields are set by the printk code except for @seq, which is
* set by the ringbuffer code.
*/
struct printk_info {
u64 seq; /* sequence number */
u64 ts_nsec; /* timestamp in nanoseconds */
u16 text_len; /* length of text message */
u8 facility; /* syslog facility */
u8 flags:5; /* internal record flags */
u8 level:3; /* syslog level */
u32 caller_id; /* thread id or processor id */
struct dev_printk_info dev_info;
};
/*
* A structure providing the buffers, used by writers and readers.
*
* Writers:
* Using prb_rec_init_wr(), a writer sets @text_buf_size before calling
* prb_reserve(). On success, prb_reserve() sets @info and @text_buf to
* buffers reserved for that writer.
*
* Readers:
* Using prb_rec_init_rd(), a reader sets all fields before calling
* prb_read_valid(). Note that the reader provides the @info and @text_buf,
* buffers. On success, the struct pointed to by @info will be filled and
* the char array pointed to by @text_buf will be filled with text data.
*/
struct printk_record {
struct printk_info *info;
char *text_buf;
unsigned int text_buf_size;
};
/* Specifies the logical position and span of a data block. */
struct prb_data_blk_lpos {
unsigned long begin;
unsigned long next;
};
/*
* A descriptor: the complete meta-data for a record.
*
* @state_var: A bitwise combination of descriptor ID and descriptor state.
*/
struct prb_desc {
atomic_long_t state_var;
struct prb_data_blk_lpos text_blk_lpos;
};
/* A ringbuffer of "ID + data" elements. */
struct prb_data_ring {
unsigned int size_bits;
char *data;
atomic_long_t head_lpos;
atomic_long_t tail_lpos;
};
/* A ringbuffer of "struct prb_desc" elements. */
struct prb_desc_ring {
unsigned int count_bits;
struct prb_desc *descs;
struct printk_info *infos;
atomic_long_t head_id;
atomic_long_t tail_id;
};
/*
* The high level structure representing the printk ringbuffer.
*
* @fail: Count of failed prb_reserve() calls where not even a data-less
* record was created.
*/
struct printk_ringbuffer {
struct prb_desc_ring desc_ring;
struct prb_data_ring text_data_ring;
atomic_long_t fail;
};
/*
* Used by writers as a reserve/commit handle.
*
* @rb: Ringbuffer where the entry is reserved.
* @irqflags: Saved irq flags to restore on entry commit.
* @id: ID of the reserved descriptor.
* @text_space: Total occupied buffer space in the text data ring, including
* ID, alignment padding, and wrapping data blocks.
*
* This structure is an opaque handle for writers. Its contents are only
* to be used by the ringbuffer implementation.
*/
struct prb_reserved_entry {
struct printk_ringbuffer *rb;
unsigned long irqflags;
unsigned long id;
unsigned int text_space;
};
/* The possible responses of a descriptor state-query. */
enum desc_state {
desc_miss = -1, /* ID mismatch (pseudo state) */
desc_reserved = 0x0, /* reserved, in use by writer */
desc_committed = 0x1, /* committed by writer, could get reopened */
desc_finalized = 0x2, /* committed, no further modification allowed */
desc_reusable = 0x3, /* free, not yet used by any writer */
};
#define _DATA_SIZE(sz_bits) (1UL << (sz_bits))
#define _DESCS_COUNT(ct_bits) (1U << (ct_bits))
#define DESC_SV_BITS (sizeof(unsigned long) * 8)
#define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2)
#define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT)
#define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT))
#define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id)
#define DESC_ID_MASK (~DESC_FLAGS_MASK)
#define DESC_ID(sv) ((sv) & DESC_ID_MASK)
#define FAILED_LPOS 0x1
#define NO_LPOS 0x3
#define FAILED_BLK_LPOS \
{ \
.begin = FAILED_LPOS, \
.next = FAILED_LPOS, \
}
/*
* Descriptor Bootstrap
*
* The descriptor array is minimally initialized to allow immediate usage
* by readers and writers. The requirements that the descriptor array
* initialization must satisfy:
*
* Req1
* The tail must point to an existing (committed or reusable) descriptor.
* This is required by the implementation of prb_first_seq().
*
* Req2
* Readers must see that the ringbuffer is initially empty.
*
* Req3
* The first record reserved by a writer is assigned sequence number 0.
*
* To satisfy Req1, the tail initially points to a descriptor that is
* minimally initialized (having no data block, i.e. data-less with the
* data block's lpos @begin and @next values set to FAILED_LPOS).
*
* To satisfy Req2, the initial tail descriptor is initialized to the
* reusable state. Readers recognize reusable descriptors as existing
* records, but skip over them.
*
* To satisfy Req3, the last descriptor in the array is used as the initial
* head (and tail) descriptor. This allows the first record reserved by a
* writer (head + 1) to be the first descriptor in the array. (Only the first
* descriptor in the array could have a valid sequence number of 0.)
*
* The first time a descriptor is reserved, it is assigned a sequence number
* with the value of the array index. A "first time reserved" descriptor can
* be recognized because it has a sequence number of 0 but does not have an
* index of 0. (Only the first descriptor in the array could have a valid
* sequence number of 0.) After the first reservation, all future reservations
* (recycling) simply involve incrementing the sequence number by the array
* count.
*
* Hack #1
* Only the first descriptor in the array is allowed to have the sequence
* number 0. In this case it is not possible to recognize if it is being
* reserved the first time (set to index value) or has been reserved
* previously (increment by the array count). This is handled by _always_
* incrementing the sequence number by the array count when reserving the
* first descriptor in the array. In order to satisfy Req3, the sequence
* number of the first descriptor in the array is initialized to minus
* the array count. Then, upon the first reservation, it is incremented
* to 0, thus satisfying Req3.
*
* Hack #2
* prb_first_seq() can be called at any time by readers to retrieve the
* sequence number of the tail descriptor. However, due to Req2 and Req3,
* initially there are no records to report the sequence number of
* (sequence numbers are u64 and there is nothing less than 0). To handle
* this, the sequence number of the initial tail descriptor is initialized
* to 0. Technically this is incorrect, because there is no record with
* sequence number 0 (yet) and the tail descriptor is not the first
* descriptor in the array. But it allows prb_read_valid() to correctly
* report the existence of a record for _any_ given sequence number at all
* times. Bootstrapping is complete when the tail is pushed the first
* time, thus finally pointing to the first descriptor reserved by a
* writer, which has the assigned sequence number 0.
*/
/*
* Initiating Logical Value Overflows
*
* Both logical position (lpos) and ID values can be mapped to array indexes
* but may experience overflows during the lifetime of the system. To ensure
* that printk_ringbuffer can handle the overflows for these types, initial
* values are chosen that map to the correct initial array indexes, but will
* result in overflows soon.
*
* BLK0_LPOS
* The initial @head_lpos and @tail_lpos for data rings. It is at index
* 0 and the lpos value is such that it will overflow on the first wrap.
*
* DESC0_ID
* The initial @head_id and @tail_id for the desc ring. It is at the last
* index of the descriptor array (see Req3 above) and the ID value is such
* that it will overflow on the second wrap.
*/
#define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits)))
#define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1))
#define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable)
/*
* Define a ringbuffer with an external text data buffer. The same as
* DEFINE_PRINTKRB() but requires specifying an external buffer for the
* text data.
*
* Note: The specified external buffer must be of the size:
* 2 ^ (descbits + avgtextbits)
*/
#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf) \
static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \
/* the initial head and tail */ \
[_DESCS_COUNT(descbits) - 1] = { \
/* reusable */ \
.state_var = ATOMIC_INIT(DESC0_SV(descbits)), \
/* no associated data block */ \
.text_blk_lpos = FAILED_BLK_LPOS, \
}, \
}; \
static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = { \
/* this will be the first record reserved by a writer */ \
[0] = { \
/* will be incremented to 0 on the first reservation */ \
.seq = -(u64)_DESCS_COUNT(descbits), \
}, \
/* the initial head and tail */ \
[_DESCS_COUNT(descbits) - 1] = { \
/* reports the first seq value during the bootstrap phase */ \
.seq = 0, \
}, \
}; \
static struct printk_ringbuffer name = { \
.desc_ring = { \
.count_bits = descbits, \
.descs = &_##name##_descs[0], \
.infos = &_##name##_infos[0], \
.head_id = ATOMIC_INIT(DESC0_ID(descbits)), \
.tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \
}, \
.text_data_ring = { \
.size_bits = (avgtextbits) + (descbits), \
.data = text_buf, \
.head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \
.tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \
}, \
.fail = ATOMIC_LONG_INIT(0), \
}
/**
* DEFINE_PRINTKRB() - Define a ringbuffer.
*
* @name: The name of the ringbuffer variable.
* @descbits: The number of descriptors as a power-of-2 value.
* @avgtextbits: The average text data size per record as a power-of-2 value.
*
* This is a macro for defining a ringbuffer and all internal structures
* such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a
* variant where the text data buffer can be specified externally.
*/
#define DEFINE_PRINTKRB(name, descbits, avgtextbits) \
static char _##name##_text[1U << ((avgtextbits) + (descbits))] \
__aligned(__alignof__(unsigned long)); \
_DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0])
/* Writer Interface */
/**
* prb_rec_init_wd() - Initialize a buffer for writing records.
*
* @r: The record to initialize.
* @text_buf_size: The needed text buffer size.
*/
static inline void prb_rec_init_wr(struct printk_record *r,
unsigned int text_buf_size)
{
r->info = NULL;
r->text_buf = NULL;
r->text_buf_size = text_buf_size;
}
bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
struct printk_record *r);
bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
struct printk_record *r, u32 caller_id, unsigned int max_size);
void prb_commit(struct prb_reserved_entry *e);
void prb_final_commit(struct prb_reserved_entry *e);
void prb_init(struct printk_ringbuffer *rb,
char *text_buf, unsigned int text_buf_size,
struct prb_desc *descs, unsigned int descs_count_bits,
struct printk_info *infos);
unsigned int prb_record_text_space(struct prb_reserved_entry *e);
/* Reader Interface */
/**
* prb_rec_init_rd() - Initialize a buffer for reading records.
*
* @r: The record to initialize.
* @info: A buffer to store record meta-data.
* @text_buf: A buffer to store text data.
* @text_buf_size: The size of @text_buf.
*
* Initialize all the fields that a reader is interested in. All arguments
* (except @r) are optional. Only record data for arguments that are
* non-NULL or non-zero will be read.
*/
static inline void prb_rec_init_rd(struct printk_record *r,
struct printk_info *info,
char *text_buf, unsigned int text_buf_size)
{
r->info = info;
r->text_buf = text_buf;
r->text_buf_size = text_buf_size;
}
/**
* prb_for_each_record() - Iterate over the records of a ringbuffer.
*
* @from: The sequence number to begin with.
* @rb: The ringbuffer to iterate over.
* @s: A u64 to store the sequence number on each iteration.
* @r: A printk_record to store the record on each iteration.
*
* This is a macro for conveniently iterating over a ringbuffer.
* Note that @s may not be the sequence number of the record on each
* iteration. For the sequence number, @r->info->seq should be checked.
*
* Context: Any context.
*/
#define prb_for_each_record(from, rb, s, r) \
for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1)
/**
* prb_for_each_info() - Iterate over the meta data of a ringbuffer.
*
* @from: The sequence number to begin with.
* @rb: The ringbuffer to iterate over.
* @s: A u64 to store the sequence number on each iteration.
* @i: A printk_info to store the record meta data on each iteration.
* @lc: An unsigned int to store the text line count of each record.
*
* This is a macro for conveniently iterating over a ringbuffer.
* Note that @s may not be the sequence number of the record on each
* iteration. For the sequence number, @r->info->seq should be checked.
*
* Context: Any context.
*/
#define prb_for_each_info(from, rb, s, i, lc) \
for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1)
bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
struct printk_record *r);
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
struct printk_info *info, unsigned int *line_count);
u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
u64 prb_next_seq(struct printk_ringbuffer *rb);
#endif /* _KERNEL_PRINTK_RINGBUFFER_H */

View File

@ -375,7 +375,7 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
raw_spin_trylock(&logbuf_lock)) { raw_spin_trylock(&logbuf_lock)) {
int len; int len;
len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
raw_spin_unlock(&logbuf_lock); raw_spin_unlock(&logbuf_lock);
defer_console_output(); defer_console_output();
return len; return len;

View File

@ -16,8 +16,13 @@ import sys
from linux import utils from linux import utils
printk_log_type = utils.CachedType("struct printk_log") printk_info_type = utils.CachedType("struct printk_info")
prb_data_blk_lpos_type = utils.CachedType("struct prb_data_blk_lpos")
prb_desc_type = utils.CachedType("struct prb_desc")
prb_desc_ring_type = utils.CachedType("struct prb_desc_ring")
prb_data_ring_type = utils.CachedType("struct prb_data_ring")
printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer")
atomic_long_type = utils.CachedType("atomic_long_t")
class LxDmesg(gdb.Command): class LxDmesg(gdb.Command):
"""Print Linux kernel log buffer.""" """Print Linux kernel log buffer."""
@ -26,44 +31,110 @@ class LxDmesg(gdb.Command):
super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA)
def invoke(self, arg, from_tty): def invoke(self, arg, from_tty):
log_buf_addr = int(str(gdb.parse_and_eval(
"(void *)'printk.c'::log_buf")).split()[0], 16)
log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
inf = gdb.inferiors()[0] inf = gdb.inferiors()[0]
start = log_buf_addr + log_first_idx
if log_first_idx < log_next_idx:
log_buf_2nd_half = -1
length = log_next_idx - log_first_idx
log_buf = utils.read_memoryview(inf, start, length).tobytes()
else:
log_buf_2nd_half = log_buf_len - log_first_idx
a = utils.read_memoryview(inf, start, log_buf_2nd_half)
b = utils.read_memoryview(inf, log_buf_addr, log_next_idx)
log_buf = a.tobytes() + b.tobytes()
length_offset = printk_log_type.get_type()['len'].bitpos // 8 # read in prb structure
text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8 prb_addr = int(str(gdb.parse_and_eval("(void *)'printk.c'::prb")).split()[0], 16)
time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8 sz = printk_ringbuffer_type.get_type().sizeof
text_offset = printk_log_type.get_type().sizeof prb = utils.read_memoryview(inf, prb_addr, sz).tobytes()
pos = 0 # read in descriptor ring structure
while pos < log_buf.__len__(): off = printk_ringbuffer_type.get_type()['desc_ring'].bitpos // 8
length = utils.read_u16(log_buf, pos + length_offset) addr = prb_addr + off
if length == 0: sz = prb_desc_ring_type.get_type().sizeof
if log_buf_2nd_half == -1: desc_ring = utils.read_memoryview(inf, addr, sz).tobytes()
gdb.write("Corrupted log buffer!\n")
# read in descriptor array
off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8
desc_ring_count = 1 << utils.read_u32(desc_ring, off)
desc_sz = prb_desc_type.get_type().sizeof
off = prb_desc_ring_type.get_type()['descs'].bitpos // 8
addr = utils.read_ulong(desc_ring, off)
descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes()
# read in info array
info_sz = printk_info_type.get_type().sizeof
off = prb_desc_ring_type.get_type()['infos'].bitpos // 8
addr = utils.read_ulong(desc_ring, off)
infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes()
# read in text data ring structure
off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8
addr = prb_addr + off
sz = prb_data_ring_type.get_type().sizeof
text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes()
# read in text data
off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8
text_data_sz = 1 << utils.read_u32(text_data_ring, off)
off = prb_data_ring_type.get_type()['data'].bitpos // 8
addr = utils.read_ulong(text_data_ring, off)
text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes()
counter_off = atomic_long_type.get_type()['counter'].bitpos // 8
sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8
off = prb_desc_type.get_type()['text_blk_lpos'].bitpos // 8
begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8)
next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8)
ts_off = printk_info_type.get_type()['ts_nsec'].bitpos // 8
len_off = printk_info_type.get_type()['text_len'].bitpos // 8
# definitions from kernel/printk/printk_ringbuffer.h
desc_committed = 1
desc_finalized = 2
desc_sv_bits = utils.get_long_type().sizeof * 8
desc_flags_shift = desc_sv_bits - 2
desc_flags_mask = 3 << desc_flags_shift
desc_id_mask = ~desc_flags_mask
# read in tail and head descriptor ids
off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8
tail_id = utils.read_u64(desc_ring, off + counter_off)
off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8
head_id = utils.read_u64(desc_ring, off + counter_off)
did = tail_id
while True:
ind = did % desc_ring_count
desc_off = desc_sz * ind
info_off = info_sz * ind
# skip non-committed record
state = 3 & (utils.read_u64(descs, desc_off + sv_off +
counter_off) >> desc_flags_shift)
if state != desc_committed and state != desc_finalized:
if did == head_id:
break break
pos = log_buf_2nd_half did = (did + 1) & desc_id_mask
continue continue
text_len = utils.read_u16(log_buf, pos + text_len_offset) begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz
text_start = pos + text_offset end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz
text = log_buf[text_start:text_start + text_len].decode(
encoding='utf8', errors='replace') # handle data-less record
time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset) if begin & 1 == 1:
text = ""
else:
# handle wrapping data block
if begin > end:
begin = 0
# skip over descriptor id
text_start = begin + utils.get_long_type().sizeof
text_len = utils.read_u16(infos, info_off + len_off)
# handle truncated message
if end - text_start < text_len:
text_len = end - text_start
text = text_data[text_start:text_start + text_len].decode(
encoding='utf8', errors='replace')
time_stamp = utils.read_u64(infos, info_off + ts_off)
for line in text.splitlines(): for line in text.splitlines():
msg = u"[{time:12.6f}] {line}\n".format( msg = u"[{time:12.6f}] {line}\n".format(
@ -75,7 +146,9 @@ class LxDmesg(gdb.Command):
msg = msg.encode(encoding='utf8', errors='replace') msg = msg.encode(encoding='utf8', errors='replace')
gdb.write(msg) gdb.write(msg)
pos += length if did == head_id:
break
did = (did + 1) & desc_id_mask
LxDmesg() LxDmesg()

View File

@ -123,6 +123,13 @@ def read_u64(buffer, offset):
return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32) return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32)
def read_ulong(buffer, offset):
if get_long_type().sizeof == 8:
return read_u64(buffer, offset)
else:
return read_u32(buffer, offset)
target_arch = None target_arch = None