aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c152
1 files changed, 105 insertions, 47 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 77dc0b25140e..60210fb5b211 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -482,6 +482,8 @@ struct ring_buffer_per_cpu {
unsigned long nr_pages;
unsigned int current_context;
struct list_head *pages;
+ /* pages generation counter, incremented when the list changes */
+ unsigned long cnt;
struct buffer_page *head_page; /* read from head */
struct buffer_page *tail_page; /* write to tail */
struct buffer_page *commit_page; /* committed pages */
@@ -1475,40 +1477,87 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
}
+static bool rb_check_links(struct ring_buffer_per_cpu *cpu_buffer,
+ struct list_head *list)
+{
+ if (RB_WARN_ON(cpu_buffer,
+ rb_list_head(rb_list_head(list->next)->prev) != list))
+ return false;
+
+ if (RB_WARN_ON(cpu_buffer,
+ rb_list_head(rb_list_head(list->prev)->next) != list))
+ return false;
+
+ return true;
+}
+
/**
* rb_check_pages - integrity check of buffer pages
* @cpu_buffer: CPU buffer with pages to test
*
* As a safety measure we check to make sure the data pages have not
* been corrupted.
- *
- * Callers of this function need to guarantee that the list of pages doesn't get
- * modified during the check. In particular, if it's possible that the function
- * is invoked with concurrent readers which can swap in a new reader page then
- * the caller should take cpu_buffer->reader_lock.
*/
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct list_head *head = rb_list_head(cpu_buffer->pages);
- struct list_head *tmp;
+ struct list_head *head, *tmp;
+ unsigned long buffer_cnt;
+ unsigned long flags;
+ int nr_loops = 0;
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(head->next)->prev) != head))
+ /*
+ * Walk the linked list underpinning the ring buffer and validate all
+ * its next and prev links.
+ *
+ * The check acquires the reader_lock to avoid concurrent processing
+ * with code that could be modifying the list. However, the lock cannot
+ * be held for the entire duration of the walk, as this would make the
+ * time when interrupts are disabled non-deterministic, dependent on the
+ * ring buffer size. Therefore, the code releases and re-acquires the
+ * lock after checking each page. The ring_buffer_per_cpu.cnt variable
+ * is then used to detect if the list was modified while the lock was
+ * not held, in which case the check needs to be restarted.
+ *
+ * The code attempts to perform the check at most three times before
+ * giving up. This is acceptable because this is only a self-validation
+ * to detect problems early on. In practice, the list modification
+ * operations are fairly spaced, and so this check typically succeeds at
+ * most on the second try.
+ */
+again:
+ if (++nr_loops > 3)
return;
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(head->prev)->next) != head))
- return;
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ head = rb_list_head(cpu_buffer->pages);
+ if (!rb_check_links(cpu_buffer, head))
+ goto out_locked;
+ buffer_cnt = cpu_buffer->cnt;
+ tmp = head;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
- return;
+ while (true) {
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
- return;
+ if (buffer_cnt != cpu_buffer->cnt) {
+ /* The list was updated, try again. */
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ goto again;
+ }
+
+ tmp = rb_list_head(tmp->next);
+ if (tmp == head)
+ /* The iteration circled back, all is done. */
+ goto out_locked;
+
+ if (!rb_check_links(cpu_buffer, tmp))
+ goto out_locked;
+
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
+
+out_locked:
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
/*
@@ -2384,9 +2433,9 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
* __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
* @size: the size in bytes per cpu that is needed.
* @flags: attributes to set for the ring buffer.
+ * @order: sub-buffer order
* @start: start of allocated range
* @range_size: size of allocated range
- * @order: sub-buffer order
* @key: ring buffer reader_lock_key.
*
* Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -2532,6 +2581,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
/* make sure pages points to a valid page in the ring buffer */
cpu_buffer->pages = next_page;
+ cpu_buffer->cnt++;
/* update head page */
if (head_bit)
@@ -2638,6 +2688,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
* pointer to point to end of list
*/
head_page->prev = last_page;
+ cpu_buffer->cnt++;
success = true;
break;
}
@@ -2873,12 +2924,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
*/
synchronize_rcu();
for_each_buffer_cpu(buffer, cpu) {
- unsigned long flags;
-
cpu_buffer = buffer->buffers[cpu];
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
atomic_dec(&buffer->record_disabled);
}
@@ -4010,7 +4057,7 @@ static const char *show_irq_str(int bits)
return type[bits];
}
-/* Assume this is an trace event */
+/* Assume this is a trace event */
static const char *show_flags(struct ring_buffer_event *event)
{
struct trace_entry *entry;
@@ -5296,6 +5343,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(&cpu_buffer->head_page);
+ cpu_buffer->cnt++;
local_inc(&cpu_buffer->pages_read);
/* Finally update the reader page to the new head */
@@ -5835,12 +5883,9 @@ void
ring_buffer_read_finish(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
- unsigned long flags;
/* Use this opportunity to check the integrity of the ring buffer. */
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->resize_disabled);
kfree(iter->event);
@@ -6725,39 +6770,39 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
}
for_each_buffer_cpu(buffer, cpu) {
+ struct buffer_data_page *old_free_data_page;
+ struct list_head old_pages;
+ unsigned long flags;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
continue;
cpu_buffer = buffer->buffers[cpu];
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
/* Clear the head bit to make the link list normal to read */
rb_head_page_deactivate(cpu_buffer);
- /* Now walk the list and free all the old sub buffers */
- list_for_each_entry_safe(bpage, tmp, cpu_buffer->pages, list) {
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
- }
- /* The above loop stopped an the last page needing to be freed */
- bpage = list_entry(cpu_buffer->pages, struct buffer_page, list);
- free_buffer_page(bpage);
-
- /* Free the current reader page */
- free_buffer_page(cpu_buffer->reader_page);
+ /*
+ * Collect buffers from the cpu_buffer pages list and the
+ * reader_page on old_pages, so they can be freed later when not
+ * under a spinlock. The pages list is a linked list with no
+ * head, adding old_pages turns it into a regular list with
+ * old_pages being the head.
+ */
+ list_add(&old_pages, cpu_buffer->pages);
+ list_add(&cpu_buffer->reader_page->list, &old_pages);
/* One page was allocated for the reader page */
cpu_buffer->reader_page = list_entry(cpu_buffer->new_pages.next,
struct buffer_page, list);
list_del_init(&cpu_buffer->reader_page->list);
- /* The cpu_buffer pages are a link list with no head */
+ /* Install the new pages, remove the head from the list */
cpu_buffer->pages = cpu_buffer->new_pages.next;
- cpu_buffer->new_pages.next->prev = cpu_buffer->new_pages.prev;
- cpu_buffer->new_pages.prev->next = cpu_buffer->new_pages.next;
-
- /* Clear the new_pages list */
- INIT_LIST_HEAD(&cpu_buffer->new_pages);
+ list_del_init(&cpu_buffer->new_pages);
+ cpu_buffer->cnt++;
cpu_buffer->head_page
= list_entry(cpu_buffer->pages, struct buffer_page, list);
@@ -6766,11 +6811,20 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
cpu_buffer->nr_pages = cpu_buffer->nr_pages_to_update;
cpu_buffer->nr_pages_to_update = 0;
- free_pages((unsigned long)cpu_buffer->free_page, old_order);
+ old_free_data_page = cpu_buffer->free_page;
cpu_buffer->free_page = NULL;
rb_head_page_activate(cpu_buffer);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ /* Free old sub buffers */
+ list_for_each_entry_safe(bpage, tmp, &old_pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ free_pages((unsigned long)old_free_data_page, old_order);
+
rb_check_pages(cpu_buffer);
}
@@ -6965,7 +7019,11 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
lockdep_assert_held(&cpu_buffer->mapping_lock);
nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
- nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
+ nr_pages = ((nr_subbufs + 1) << subbuf_order); /* + meta-page */
+ if (nr_pages <= pgoff)
+ return -EINVAL;
+
+ nr_pages -= pgoff;
nr_vma_pages = vma_pages(vma);
if (!nr_vma_pages || nr_vma_pages > nr_pages)