kernel/trace/trace_snapshot.c

root/kernel/trace/trace_snapshot.c
// SPDX-License-Identifier: GPL-2.0
#include <linux/fsnotify.h>

#include <asm/setup.h> /* COMMAND_LINE_SIZE */

#include "trace.h"

/* Used if snapshot allocated at boot */
static bool allocate_snapshot;
static bool snapshot_at_boot;

static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
static int boot_snapshot_index;

static int __init boot_alloc_snapshot(char *str)
{
        char *slot = boot_snapshot_info + boot_snapshot_index;
        int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
        int ret;

        if (str[0] == '=') {
                str++;
                if (strlen(str) >= left)
                        return -1;

                ret = snprintf(slot, left, "%s\t", str);
                boot_snapshot_index += ret;
        } else {
                allocate_snapshot = true;
                /* We also need the main ring buffer expanded */
                trace_set_ring_buffer_expanded(NULL);
        }
        return 1;
}
__setup("alloc_snapshot", boot_alloc_snapshot);


static int __init boot_snapshot(char *str)
{
        snapshot_at_boot = true;
        boot_alloc_snapshot(str);
        return 1;
}
__setup("ftrace_boot_snapshot", boot_snapshot);
static void tracing_snapshot_instance_cond(struct trace_array *tr,
                                           void *cond_data)
{
        unsigned long flags;

        if (in_nmi()) {
                trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
                trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
                return;
        }

        if (!tr->allocated_snapshot) {
                trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
                trace_array_puts(tr, "*** stopping trace here!   ***\n");
                tracer_tracing_off(tr);
                return;
        }

        if (tr->mapped) {
                trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
                trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
                return;
        }

        /* Note, snapshot can not be used when the tracer uses it */
        if (tracer_uses_snapshot(tr->current_trace)) {
                trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
                trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
                return;
        }

        local_irq_save(flags);
        update_max_tr(tr, current, smp_processor_id(), cond_data);
        local_irq_restore(flags);
}

void tracing_snapshot_instance(struct trace_array *tr)
{
        tracing_snapshot_instance_cond(tr, NULL);
}

/**
 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
 * @tr:         The tracing instance to snapshot
 * @cond_data:  The data to be tested conditionally, and possibly saved
 *
 * This is the same as tracing_snapshot() except that the snapshot is
 * conditional - the snapshot will only happen if the
 * cond_snapshot.update() implementation receiving the cond_data
 * returns true, which means that the trace array's cond_snapshot
 * update() operation used the cond_data to determine whether the
 * snapshot should be taken, and if it was, presumably saved it along
 * with the snapshot.
 */
void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
{
        tracing_snapshot_instance_cond(tr, cond_data);
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond);

/**
 * tracing_cond_snapshot_data - get the user data associated with a snapshot
 * @tr:         The tracing instance
 *
 * When the user enables a conditional snapshot using
 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
 * with the snapshot.  This accessor is used to retrieve it.
 *
 * Should not be called from cond_snapshot.update(), since it takes
 * the tr->max_lock lock, which the code calling
 * cond_snapshot.update() has already done.
 *
 * Returns the cond_data associated with the trace array's snapshot.
 */
void *tracing_cond_snapshot_data(struct trace_array *tr)
{
        void *cond_data = NULL;

        local_irq_disable();
        arch_spin_lock(&tr->max_lock);

        if (tr->cond_snapshot)
                cond_data = tr->cond_snapshot->cond_data;

        arch_spin_unlock(&tr->max_lock);
        local_irq_enable();

        return cond_data;
}
EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);

/* resize @tr's buffer to the size of @size_tr's entries */
int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
                                 struct array_buffer *size_buf, int cpu_id)
{
        int cpu, ret = 0;

        if (cpu_id == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
                        ret = ring_buffer_resize(trace_buf->buffer,
                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
                        if (ret < 0)
                                break;
                        per_cpu_ptr(trace_buf->data, cpu)->entries =
                                per_cpu_ptr(size_buf->data, cpu)->entries;
                }
        } else {
                ret = ring_buffer_resize(trace_buf->buffer,
                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
                if (ret == 0)
                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
        }

        return ret;
}

int tracing_alloc_snapshot_instance(struct trace_array *tr)
{
        int order;
        int ret;

        if (!tr->allocated_snapshot) {

                /* Make the snapshot buffer have the same order as main buffer */
                order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
                ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
                if (ret < 0)
                        return ret;

                /* allocate spare buffer */
                ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
                if (ret < 0)
                        return ret;

                tr->allocated_snapshot = true;
        }

        return 0;
}

void free_snapshot(struct trace_array *tr)
{
        /*
         * We don't free the ring buffer. instead, resize it because
         * The max_tr ring buffer has some state (e.g. ring->clock) and
         * we want preserve it.
         */
        ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
        ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
        trace_set_buffer_entries(&tr->snapshot_buffer, 1);
        tracing_reset_online_cpus(&tr->snapshot_buffer);
        tr->allocated_snapshot = false;
}

int tracing_arm_snapshot_locked(struct trace_array *tr)
{
        int ret;

        lockdep_assert_held(&trace_types_lock);

        spin_lock(&tr->snapshot_trigger_lock);
        if (tr->snapshot == UINT_MAX || tr->mapped) {
                spin_unlock(&tr->snapshot_trigger_lock);
                return -EBUSY;
        }

        tr->snapshot++;
        spin_unlock(&tr->snapshot_trigger_lock);

        ret = tracing_alloc_snapshot_instance(tr);
        if (ret) {
                spin_lock(&tr->snapshot_trigger_lock);
                tr->snapshot--;
                spin_unlock(&tr->snapshot_trigger_lock);
        }

        return ret;
}

int tracing_arm_snapshot(struct trace_array *tr)
{
        guard(mutex)(&trace_types_lock);
        return tracing_arm_snapshot_locked(tr);
}

void tracing_disarm_snapshot(struct trace_array *tr)
{
        spin_lock(&tr->snapshot_trigger_lock);
        if (!WARN_ON(!tr->snapshot))
                tr->snapshot--;
        spin_unlock(&tr->snapshot_trigger_lock);
}

/**
 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
 *
 * This is similar to tracing_snapshot(), but it will allocate the
 * snapshot buffer if it isn't already allocated. Use this only
 * where it is safe to sleep, as the allocation may sleep.
 *
 * This causes a swap between the snapshot buffer and the current live
 * tracing buffer. You can use this to take snapshots of the live
 * trace when some condition is triggered, but continue to trace.
 */
void tracing_snapshot_alloc(void)
{
        int ret;

        ret = tracing_alloc_snapshot();
        if (ret < 0)
                return;

        tracing_snapshot();
}
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);

/**
 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
 * @tr:         The tracing instance
 * @cond_data:  User data to associate with the snapshot
 * @update:     Implementation of the cond_snapshot update function
 *
 * Check whether the conditional snapshot for the given instance has
 * already been enabled, or if the current tracer is already using a
 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
 * save the cond_data and update function inside.
 *
 * Returns 0 if successful, error otherwise.
 */
int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
                                 cond_update_fn_t update)
{
        struct cond_snapshot *cond_snapshot __free(kfree) =
                kzalloc_obj(*cond_snapshot);
        int ret;

        if (!cond_snapshot)
                return -ENOMEM;

        cond_snapshot->cond_data = cond_data;
        cond_snapshot->update = update;

        guard(mutex)(&trace_types_lock);

        if (tracer_uses_snapshot(tr->current_trace))
                return -EBUSY;

        /*
         * The cond_snapshot can only change to NULL without the
         * trace_types_lock. We don't care if we race with it going
         * to NULL, but we want to make sure that it's not set to
         * something other than NULL when we get here, which we can
         * do safely with only holding the trace_types_lock and not
         * having to take the max_lock.
         */
        if (tr->cond_snapshot)
                return -EBUSY;

        ret = tracing_arm_snapshot_locked(tr);
        if (ret)
                return ret;

        local_irq_disable();
        arch_spin_lock(&tr->max_lock);
        tr->cond_snapshot = no_free_ptr(cond_snapshot);
        arch_spin_unlock(&tr->max_lock);
        local_irq_enable();

        return 0;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);

/**
 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
 * @tr:         The tracing instance
 *
 * Check whether the conditional snapshot for the given instance is
 * enabled; if so, free the cond_snapshot associated with it,
 * otherwise return -EINVAL.
 *
 * Returns 0 if successful, error otherwise.
 */
int tracing_snapshot_cond_disable(struct trace_array *tr)
{
        int ret = 0;

        local_irq_disable();
        arch_spin_lock(&tr->max_lock);

        if (!tr->cond_snapshot)
                ret = -EINVAL;
        else {
                kfree(tr->cond_snapshot);
                tr->cond_snapshot = NULL;
        }

        arch_spin_unlock(&tr->max_lock);
        local_irq_enable();

        tracing_disarm_snapshot(tr);

        return ret;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);

#ifdef CONFIG_TRACER_MAX_TRACE
#ifdef LATENCY_FS_NOTIFY
static struct workqueue_struct *fsnotify_wq;

static void latency_fsnotify_workfn(struct work_struct *work)
{
        struct trace_array *tr = container_of(work, struct trace_array,
                                              fsnotify_work);
        fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
}

static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
{
        struct trace_array *tr = container_of(iwork, struct trace_array,
                                              fsnotify_irqwork);
        queue_work(fsnotify_wq, &tr->fsnotify_work);
}

__init static int latency_fsnotify_init(void)
{
        fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
                                      WQ_UNBOUND | WQ_HIGHPRI, 0);
        if (!fsnotify_wq) {
                pr_err("Unable to allocate tr_max_lat_wq\n");
                return -ENOMEM;
        }
        return 0;
}

late_initcall_sync(latency_fsnotify_init);

void latency_fsnotify(struct trace_array *tr)
{
        if (!fsnotify_wq)
                return;
        /*
         * We cannot call queue_work(&tr->fsnotify_work) from here because it's
         * possible that we are called from __schedule() or do_idle(), which
         * could cause a deadlock.
         */
        irq_work_queue(&tr->fsnotify_irqwork);
}
#endif /* LATENCY_FS_NOTIFY */

static const struct file_operations tracing_max_lat_fops;

void trace_create_maxlat_file(struct trace_array *tr,
                              struct dentry *d_tracer)
{
#ifdef LATENCY_FS_NOTIFY
        INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
        init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
#endif
        tr->d_max_latency = trace_create_file("tracing_max_latency",
                                              TRACE_MODE_WRITE,
                                              d_tracer, tr,
                                              &tracing_max_lat_fops);
}

/*
 * Copy the new maximum trace into the separate maximum-trace
 * structure. (this way the maximum trace is permanently saved,
 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
 */
static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
        struct array_buffer *trace_buf = &tr->array_buffer;
        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
        struct array_buffer *max_buf = &tr->snapshot_buffer;
        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);

        max_buf->cpu = cpu;
        max_buf->time_start = data->preempt_timestamp;

        max_data->saved_latency = tr->max_latency;
        max_data->critical_start = data->critical_start;
        max_data->critical_end = data->critical_end;

        strscpy(max_data->comm, tsk->comm);
        max_data->pid = tsk->pid;
        /*
         * If tsk == current, then use current_uid(), as that does not use
         * RCU. The irq tracer can be called out of RCU scope.
         */
        if (tsk == current)
                max_data->uid = current_uid();
        else
                max_data->uid = task_uid(tsk);

        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
        max_data->policy = tsk->policy;
        max_data->rt_priority = tsk->rt_priority;

        /* record this tasks comm */
        tracing_record_cmdline(tsk);
        latency_fsnotify(tr);
}
#else
static inline void __update_max_tr(struct trace_array *tr,
                                   struct task_struct *tsk, int cpu) { }
#endif /* CONFIG_TRACER_MAX_TRACE */

/**
 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
 * @tr: tracer
 * @tsk: the task with the latency
 * @cpu: The cpu that initiated the trace.
 * @cond_data: User data associated with a conditional snapshot
 *
 * Flip the buffers between the @tr and the max_tr and record information
 * about which task was the cause of this latency.
 */
void
update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
              void *cond_data)
{
        if (tr->stop_count)
                return;

        WARN_ON_ONCE(!irqs_disabled());

        if (!tr->allocated_snapshot) {
                /* Only the nop tracer should hit this when disabling */
                WARN_ON_ONCE(tr->current_trace != &nop_trace);
                return;
        }

        arch_spin_lock(&tr->max_lock);

        /* Inherit the recordable setting from array_buffer */
        if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
                ring_buffer_record_on(tr->snapshot_buffer.buffer);
        else
                ring_buffer_record_off(tr->snapshot_buffer.buffer);

        if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
                arch_spin_unlock(&tr->max_lock);
                return;
        }

        swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);

        __update_max_tr(tr, tsk, cpu);

        arch_spin_unlock(&tr->max_lock);

        /* Any waiters on the old snapshot buffer need to wake up */
        ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
}

/**
 * update_max_tr_single - only copy one trace over, and reset the rest
 * @tr: tracer
 * @tsk: task with the latency
 * @cpu: the cpu of the buffer to copy.
 *
 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
 */
void
update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
        int ret;

        if (tr->stop_count)
                return;

        WARN_ON_ONCE(!irqs_disabled());
        if (!tr->allocated_snapshot) {
                /* Only the nop tracer should hit this when disabling */
                WARN_ON_ONCE(tr->current_trace != &nop_trace);
                return;
        }

        arch_spin_lock(&tr->max_lock);

        ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);

        if (ret == -EBUSY) {
                /*
                 * We failed to swap the buffer due to a commit taking
                 * place on this CPU. We fail to record, but we reset
                 * the max trace buffer (no one writes directly to it)
                 * and flag that it failed.
                 * Another reason is resize is in progress.
                 */
                trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
                        "Failed to swap buffers due to commit or resize in progress\n");
        }

        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);

        __update_max_tr(tr, tsk, cpu);
        arch_spin_unlock(&tr->max_lock);
}

static void show_snapshot_main_help(struct seq_file *m)
{
        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
                    "#                      Takes a snapshot of the main buffer.\n"
                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
                    "#                      (Doesn't have to be '2' works with any number that\n"
                    "#                       is not a '0' or '1')\n");
}

static void show_snapshot_percpu_help(struct seq_file *m)
{
        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
#else
        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
                    "#                     Must use main snapshot file to allocate.\n");
#endif
        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
                    "#                      (Doesn't have to be '2' works with any number that\n"
                    "#                       is not a '0' or '1')\n");
}

void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
{
        if (iter->tr->allocated_snapshot)
                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
        else
                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");

        seq_puts(m, "# Snapshot commands:\n");
        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
                show_snapshot_main_help(m);
        else
                show_snapshot_percpu_help(m);
}

static int tracing_snapshot_open(struct inode *inode, struct file *file)
{
        struct trace_array *tr = inode->i_private;
        struct trace_iterator *iter;
        struct seq_file *m;
        int ret;

        ret = tracing_check_open_get_tr(tr);
        if (ret)
                return ret;

        if (file->f_mode & FMODE_READ) {
                iter = __tracing_open(inode, file, true);
                if (IS_ERR(iter))
                        ret = PTR_ERR(iter);
        } else {
                /* Writes still need the seq_file to hold the private data */
                ret = -ENOMEM;
                m = kzalloc_obj(*m);
                if (!m)
                        goto out;
                iter = kzalloc_obj(*iter);
                if (!iter) {
                        kfree(m);
                        goto out;
                }
                ret = 0;

                iter->tr = tr;
                iter->array_buffer = &tr->snapshot_buffer;
                iter->cpu_file = tracing_get_cpu(inode);
                m->private = iter;
                file->private_data = m;
        }
out:
        if (ret < 0)
                trace_array_put(tr);

        return ret;
}

static void tracing_swap_cpu_buffer(void *tr)
{
        update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
}

static ssize_t
tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
                       loff_t *ppos)
{
        struct seq_file *m = filp->private_data;
        struct trace_iterator *iter = m->private;
        struct trace_array *tr = iter->tr;
        unsigned long val;
        int ret;

        ret = tracing_update_buffers(tr);
        if (ret < 0)
                return ret;

        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
        if (ret)
                return ret;

        guard(mutex)(&trace_types_lock);

        if (tracer_uses_snapshot(tr->current_trace))
                return -EBUSY;

        local_irq_disable();
        arch_spin_lock(&tr->max_lock);
        if (tr->cond_snapshot)
                ret = -EBUSY;
        arch_spin_unlock(&tr->max_lock);
        local_irq_enable();
        if (ret)
                return ret;

        switch (val) {
        case 0:
                if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
                        return -EINVAL;
                if (tr->allocated_snapshot)
                        free_snapshot(tr);
                break;
        case 1:
/* Only allow per-cpu swap if the ring buffer supports it */
#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
                if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
                        return -EINVAL;
#endif
                if (tr->allocated_snapshot)
                        ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
                                        &tr->array_buffer, iter->cpu_file);

                ret = tracing_arm_snapshot_locked(tr);
                if (ret)
                        return ret;

                /* Now, we're going to swap */
                if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
                        local_irq_disable();
                        update_max_tr(tr, current, smp_processor_id(), NULL);
                        local_irq_enable();
                } else {
                        smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
                                                 (void *)tr, 1);
                }
                tracing_disarm_snapshot(tr);
                break;
        default:
                if (tr->allocated_snapshot) {
                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
                                tracing_reset_online_cpus(&tr->snapshot_buffer);
                        else
                                tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
                }
                break;
        }

        if (ret >= 0) {
                *ppos += cnt;
                ret = cnt;
        }

        return ret;
}

static int tracing_snapshot_release(struct inode *inode, struct file *file)
{
        struct seq_file *m = file->private_data;
        int ret;

        ret = tracing_release(inode, file);

        if (file->f_mode & FMODE_READ)
                return ret;

        /* If write only, the seq_file is just a stub */
        if (m)
                kfree(m->private);
        kfree(m);

        return 0;
}

static int snapshot_raw_open(struct inode *inode, struct file *filp)
{
        struct ftrace_buffer_info *info;
        int ret;

        /* The following checks for tracefs lockdown */
        ret = tracing_buffers_open(inode, filp);
        if (ret < 0)
                return ret;

        info = filp->private_data;

        if (tracer_uses_snapshot(info->iter.trace)) {
                tracing_buffers_release(inode, filp);
                return -EBUSY;
        }

        info->iter.snapshot = true;
        info->iter.array_buffer = &info->iter.tr->snapshot_buffer;

        return ret;
}

const struct file_operations snapshot_fops = {
        .open           = tracing_snapshot_open,
        .read           = seq_read,
        .write          = tracing_snapshot_write,
        .llseek         = tracing_lseek,
        .release        = tracing_snapshot_release,
};

const struct file_operations snapshot_raw_fops = {
        .open           = snapshot_raw_open,
        .read           = tracing_buffers_read,
        .release        = tracing_buffers_release,
        .splice_read    = tracing_buffers_splice_read,
};

#ifdef CONFIG_TRACER_MAX_TRACE
static ssize_t
tracing_max_lat_read(struct file *filp, char __user *ubuf,
                     size_t cnt, loff_t *ppos)
{
        struct trace_array *tr = filp->private_data;

        return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
}

static ssize_t
tracing_max_lat_write(struct file *filp, const char __user *ubuf,
                      size_t cnt, loff_t *ppos)
{
        struct trace_array *tr = filp->private_data;

        return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
}

static const struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic_tr,
        .read           = tracing_max_lat_read,
        .write          = tracing_max_lat_write,
        .llseek         = generic_file_llseek,
        .release        = tracing_release_generic_tr,
};
#endif /* CONFIG_TRACER_MAX_TRACE */

int get_snapshot_map(struct trace_array *tr)
{
        int err = 0;

        /*
         * Called with mmap_lock held. lockdep would be unhappy if we would now
         * take trace_types_lock. Instead use the specific
         * snapshot_trigger_lock.
         */
        spin_lock(&tr->snapshot_trigger_lock);

        if (tr->snapshot || tr->mapped == UINT_MAX)
                err = -EBUSY;
        else
                tr->mapped++;

        spin_unlock(&tr->snapshot_trigger_lock);

        /* Wait for update_max_tr() to observe iter->tr->mapped */
        if (tr->mapped == 1)
                synchronize_rcu();

        return err;

}

void put_snapshot_map(struct trace_array *tr)
{
        spin_lock(&tr->snapshot_trigger_lock);
        if (!WARN_ON(!tr->mapped))
                tr->mapped--;
        spin_unlock(&tr->snapshot_trigger_lock);
}

#ifdef CONFIG_DYNAMIC_FTRACE
static void
ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
                struct trace_array *tr, struct ftrace_probe_ops *ops,
                void *data)
{
        tracing_snapshot_instance(tr);
}

static void
ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
                      struct trace_array *tr, struct ftrace_probe_ops *ops,
                      void *data)
{
        struct ftrace_func_mapper *mapper = data;
        long *count = NULL;

        if (mapper)
                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);

        if (count) {

                if (*count <= 0)
                        return;

                (*count)--;
        }

        tracing_snapshot_instance(tr);
}

static int
ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
                      struct ftrace_probe_ops *ops, void *data)
{
        struct ftrace_func_mapper *mapper = data;
        long *count = NULL;

        seq_printf(m, "%ps:", (void *)ip);

        seq_puts(m, "snapshot");

        if (mapper)
                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);

        if (count)
                seq_printf(m, ":count=%ld\n", *count);
        else
                seq_puts(m, ":unlimited\n");

        return 0;
}

static int
ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
                     unsigned long ip, void *init_data, void **data)
{
        struct ftrace_func_mapper *mapper = *data;

        if (!mapper) {
                mapper = allocate_ftrace_func_mapper();
                if (!mapper)
                        return -ENOMEM;
                *data = mapper;
        }

        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
}

static void
ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
                     unsigned long ip, void *data)
{
        struct ftrace_func_mapper *mapper = data;

        if (!ip) {
                if (!mapper)
                        return;
                free_ftrace_func_mapper(mapper, NULL);
                return;
        }

        ftrace_func_mapper_remove_ip(mapper, ip);
}

static struct ftrace_probe_ops snapshot_probe_ops = {
        .func                   = ftrace_snapshot,
        .print                  = ftrace_snapshot_print,
};

static struct ftrace_probe_ops snapshot_count_probe_ops = {
        .func                   = ftrace_count_snapshot,
        .print                  = ftrace_snapshot_print,
        .init                   = ftrace_snapshot_init,
        .free                   = ftrace_snapshot_free,
};

static int
ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
                               char *glob, char *cmd, char *param, int enable)
{
        struct ftrace_probe_ops *ops;
        void *count = (void *)-1;
        char *number;
        int ret;

        if (!tr)
                return -ENODEV;

        /* hash funcs only work with set_ftrace_filter */
        if (!enable)
                return -EINVAL;

        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;

        if (glob[0] == '!') {
                ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
                if (!ret)
                        tracing_disarm_snapshot(tr);

                return ret;
        }

        if (!param)
                goto out_reg;

        number = strsep(&param, ":");

        if (!strlen(number))
                goto out_reg;

        /*
         * We use the callback data field (which is a pointer)
         * as our counter.
         */
        ret = kstrtoul(number, 0, (unsigned long *)&count);
        if (ret)
                return ret;

 out_reg:
        ret = tracing_arm_snapshot(tr);
        if (ret < 0)
                return ret;

        ret = register_ftrace_function_probe(glob, tr, ops, count);
        if (ret < 0)
                tracing_disarm_snapshot(tr);

        return ret < 0 ? ret : 0;
}

static struct ftrace_func_command ftrace_snapshot_cmd = {
        .name                   = "snapshot",
        .func                   = ftrace_trace_snapshot_callback,
};

__init int register_snapshot_cmd(void)
{
        return register_ftrace_command(&ftrace_snapshot_cmd);
}
#endif /* CONFIG_DYNAMIC_FTRACE */

int trace_allocate_snapshot(struct trace_array *tr, int size)
{
        int ret;

        /* Fix mapped buffer trace arrays do not have snapshot buffers */
        if (tr->range_addr_start)
                return 0;

        /* allocate_snapshot can only be true during system boot */
        ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
                                    allocate_snapshot ? size : 1);
        if (ret < 0)
                return -ENOMEM;

        tr->allocated_snapshot = allocate_snapshot;

        allocate_snapshot = false;
        return 0;
}

__init static bool tr_needs_alloc_snapshot(const char *name)
{
        char *test;
        int len = strlen(name);
        bool ret;

        if (!boot_snapshot_index)
                return false;

        if (strncmp(name, boot_snapshot_info, len) == 0 &&
            boot_snapshot_info[len] == '\t')
                return true;

        test = kmalloc(strlen(name) + 3, GFP_KERNEL);
        if (!test)
                return false;

        sprintf(test, "\t%s\t", name);
        ret = strstr(boot_snapshot_info, test) == NULL;
        kfree(test);
        return ret;
}

__init void do_allocate_snapshot(const char *name)
{
        if (!tr_needs_alloc_snapshot(name))
                return;

        /*
         * When allocate_snapshot is set, the next call to
         * allocate_trace_buffers() (called by trace_array_get_by_name())
         * will allocate the snapshot buffer. That will also clear
         * this flag.
         */
        allocate_snapshot = true;
}

void __init ftrace_boot_snapshot(void)
{
        struct trace_array *tr;

        if (!snapshot_at_boot)
                return;

        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
                if (!tr->allocated_snapshot)
                        continue;

                tracing_snapshot_instance(tr);
                trace_array_puts(tr, "** Boot snapshot taken **\n");
        }
}
Linux