diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4e074a6608269793d52a2ea4f28061532bb9798b..52ef7a9d50aacbc3d3fbb0366852457d39718cdf 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -10,6 +10,8 @@ SYNOPSIS
 [verse]
 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
 'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
+'perf stat' report [-i file]
 
 DESCRIPTION
 -----------
@@ -22,6 +24,11 @@ OPTIONS
 <command>...::
 	Any command you can specify in a shell.
 
+record::
+	See STAT RECORD.
+
+report::
+	See STAT REPORT.
 
 -e::
 --event=::
@@ -159,6 +166,33 @@ filter out the startup phase of the program, which is often very different.
 
 Print statistics of transactional execution if supported.
 
+STAT RECORD
+-----------
+Stores stat data into perf data file.
+
+-o file::
+--output file::
+Output file name.
+
+STAT REPORT
+-----------
+Reads and reports stat data from perf data file.
+
+-i file::
+--input file::
+Input file name.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1435ef6265b6d4fe58ffd2264e60497fd23bcd70..9c5cdc2c44714a937cb04e09a523373773b3a4e8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -452,6 +452,8 @@ static void record__init_features(struct record *rec)
 
 	if (!rec->opts.full_auxtrace)
 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+
+	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
 static volatile int workload_exec_errno;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index bbf42eefd5e5b708f419e8c3e24a7d1f8aaf9dde..9805e03ab1638c782efa5626f41ed65b2497fc4f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,9 @@
 #include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/counts.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "asm/bug.h"
 
 #include <stdlib.h>
 #include <sys/prctl.h>
@@ -126,6 +129,21 @@ static bool			append_file;
 static const char		*output_name;
 static int			output_fd;
 
+struct perf_stat {
+	bool			 record;
+	struct perf_data_file	 file;
+	struct perf_session	*session;
+	u64			 bytes_written;
+	struct perf_tool	 tool;
+	bool			 maps_allocated;
+	struct cpu_map		*cpus;
+	struct thread_map	*threads;
+	enum aggr_mode		 aggr_mode;
+};
+
+static struct perf_stat		perf_stat;
+#define STAT_RECORD		perf_stat.record
+
 static volatile int done = 0;
 
 static struct perf_stat_config stat_config = {
@@ -166,7 +184,11 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 	 * like tracepoints. Clear it up for counting.
 	 */
 	attr->sample_period = 0;
-	attr->sample_type   = 0;
+	/*
+	 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+	 * while avoiding that older tools show confusing messages.
+	 */
+	attr->sample_type   = PERF_SAMPLE_IDENTIFIER;
 
 	/*
 	 * Disabling all counters initially, they will be enabled
@@ -202,6 +224,42 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
+		pr_err("failed to write perf data, error: %m\n");
+		return -1;
+	}
+
+	perf_stat.bytes_written += event->header.size;
+	return 0;
+}
+
+static int write_stat_round_event(u64 tm, u64 type)
+{
+	return perf_event__synthesize_stat_round(NULL, tm, type,
+						 process_synthesized_event,
+						 NULL);
+}
+
+#define WRITE_STAT_ROUND_EVENT(time, interval) \
+	write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static int
+perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
+			     struct perf_counts_values *count)
+{
+	struct perf_sample_id *sid = SID(counter, cpu, thread);
+
+	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
+					   process_synthesized_event, NULL);
+}
+
 /*
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
@@ -225,6 +283,13 @@ static int read_counter(struct perf_evsel *counter)
 			count = perf_counts(counter->counts, cpu, thread);
 			if (perf_evsel__read(counter, cpu, thread, count))
 				return -1;
+
+			if (STAT_RECORD) {
+				if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+					pr_err("failed to write stat event\n");
+					return -1;
+				}
+			}
 		}
 	}
 
@@ -258,6 +323,11 @@ static void process_interval(void)
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
 
+	if (STAT_RECORD) {
+		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
+			pr_err("failed to write stat round event\n");
+	}
+
 	print_counters(&rs, 0, NULL);
 }
 
@@ -288,6 +358,135 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
 	workload_exec_errno = info->si_value.sival_int;
 }
 
+static bool has_unit(struct perf_evsel *counter)
+{
+	return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+	return counter->scale != 1;
+}
+
+static int perf_stat_synthesize_config(bool is_pipe)
+{
+	struct perf_evsel *counter;
+	int err;
+
+	if (is_pipe) {
+		err = perf_event__synthesize_attrs(NULL, perf_stat.session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			return err;
+		}
+	}
+
+	/*
+	 * Synthesize other events stuff not carried within
+	 * attr event - unit, scale, name
+	 */
+	evlist__for_each(evsel_list, counter) {
+		if (!counter->supported)
+			continue;
+
+		/*
+		 * Synthesize unit and scale only if it's defined.
+		 */
+		if (has_unit(counter)) {
+			err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel unit.\n");
+				return err;
+			}
+		}
+
+		if (has_scale(counter)) {
+			err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel scale.\n");
+				return err;
+			}
+		}
+
+		if (counter->own_cpus) {
+			err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel scale.\n");
+				return err;
+			}
+		}
+
+		/*
+		 * Name is needed only for pipe output,
+		 * perf.data carries event names.
+		 */
+		if (is_pipe) {
+			err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel name.\n");
+				return err;
+			}
+		}
+	}
+
+	err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
+						process_synthesized_event,
+						NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
+					     process_synthesized_event, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_stat_config(NULL, &stat_config,
+						 process_synthesized_event, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize config.\n");
+		return err;
+	}
+
+	return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+static int __store_counter_ids(struct perf_evsel *counter,
+			       struct cpu_map *cpus,
+			       struct thread_map *threads)
+{
+	int cpu, thread;
+
+	for (cpu = 0; cpu < cpus->nr; cpu++) {
+		for (thread = 0; thread < threads->nr; thread++) {
+			int fd = FD(counter, cpu, thread);
+
+			if (perf_evlist__id_add_fd(evsel_list, counter,
+						   cpu, thread, fd) < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int store_counter_ids(struct perf_evsel *counter)
+{
+	struct cpu_map *cpus = counter->cpus;
+	struct thread_map *threads = counter->threads;
+
+	if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
+		return -ENOMEM;
+
+	return __store_counter_ids(counter, cpus, threads);
+}
+
 static int __run_perf_stat(int argc, const char **argv)
 {
 	int interval = stat_config.interval;
@@ -298,6 +497,7 @@ static int __run_perf_stat(int argc, const char **argv)
 	size_t l;
 	int status = 0;
 	const bool forks = (argc > 0);
+	bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
 
 	if (interval) {
 		ts.tv_sec  = interval / 1000;
@@ -308,7 +508,7 @@ static int __run_perf_stat(int argc, const char **argv)
 	}
 
 	if (forks) {
-		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
+		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
 						  workload_exec_failed_signal) < 0) {
 			perror("failed to prepare workload");
 			return -1;
@@ -352,6 +552,9 @@ static int __run_perf_stat(int argc, const char **argv)
 		l = strlen(counter->unit);
 		if (l > unit_width)
 			unit_width = l;
+
+		if (STAT_RECORD && store_counter_ids(counter))
+			return -1;
 	}
 
 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
@@ -361,6 +564,24 @@ static int __run_perf_stat(int argc, const char **argv)
 		return -1;
 	}
 
+	if (STAT_RECORD) {
+		int err, fd = perf_data_file__fd(&perf_stat.file);
+
+		if (is_pipe) {
+			err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
+		} else {
+			err = perf_session__write_header(perf_stat.session, evsel_list,
+							 fd, false);
+		}
+
+		if (err < 0)
+			return err;
+
+		err = perf_stat_synthesize_config(is_pipe);
+		if (err < 0)
+			return err;
+	}
+
 	/*
 	 * Enable counters and exec the command:
 	 */
@@ -827,8 +1048,8 @@ static void print_header(int argc, const char **argv)
 		else if (target.cpu_list)
 			fprintf(output, "\'CPU(s) %s", target.cpu_list);
 		else if (!target__has_task(&target)) {
-			fprintf(output, "\'%s", argv[0]);
-			for (i = 1; i < argc; i++)
+			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+			for (i = 1; argv && (i < argc); i++)
 				fprintf(output, " %s", argv[i]);
 		} else if (target.pid)
 			fprintf(output, "process id \'%s", target.pid);
@@ -864,6 +1085,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	struct perf_evsel *counter;
 	char buf[64], *prefix = NULL;
 
+	/* Do not print anything if we record to the pipe. */
+	if (STAT_RECORD && perf_stat.file.is_pipe)
+		return;
+
 	if (interval)
 		print_interval(prefix = buf, ts);
 	else
@@ -1102,6 +1327,101 @@ static void perf_stat__exit_aggr_mode(void)
 	cpus_aggr_map = NULL;
 }
 
+static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
+{
+	int cpu;
+
+	if (idx > map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	if (cpu >= env->nr_cpus_online)
+		return -1;
+
+	return cpu;
+}
+
+static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
+{
+	struct perf_env *env = data;
+	int cpu = perf_env__get_cpu(env, map, idx);
+
+	return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
+}
+
+static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
+{
+	struct perf_env *env = data;
+	int core = -1, cpu = perf_env__get_cpu(env, map, idx);
+
+	if (cpu != -1) {
+		int socket_id = env->cpu[cpu].socket_id;
+
+		/*
+		 * Encode socket in upper 16 bits
+		 * core_id is relative to socket, and
+		 * we need a global id. So we combine
+		 * socket + core id.
+		 */
+		core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
+	}
+
+	return core;
+}
+
+static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
+				      struct cpu_map **sockp)
+{
+	return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
+}
+
+static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
+				    struct cpu_map **corep)
+{
+	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
+}
+
+static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+{
+	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+{
+	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+	struct perf_env *env = &st->session->header.env;
+
+	switch (stat_config.aggr_mode) {
+	case AGGR_SOCKET:
+		if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+			perror("cannot build socket map");
+			return -1;
+		}
+		aggr_get_id = perf_stat__get_socket_file;
+		break;
+	case AGGR_CORE:
+		if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+			perror("cannot build core map");
+			return -1;
+		}
+		aggr_get_id = perf_stat__get_core_file;
+		break;
+	case AGGR_NONE:
+	case AGGR_GLOBAL:
+	case AGGR_THREAD:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1261,6 +1581,225 @@ static int add_default_attributes(void)
 	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
 }
 
+static const char * const recort_usage[] = {
+	"perf stat record [<options>]",
+	NULL,
+};
+
+static void init_features(struct perf_session *session)
+{
+	int feat;
+
+	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+		perf_header__set_feat(&session->header, feat);
+
+	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	struct perf_session *session;
+	struct perf_data_file *file = &perf_stat.file;
+
+	argc = parse_options(argc, argv, stat_options, record_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (output_name)
+		file->path = output_name;
+
+	if (run_count != 1 || forever) {
+		pr_err("Cannot use -r option with perf stat record.\n");
+		return -1;
+	}
+
+	session = perf_session__new(file, false, NULL);
+	if (session == NULL) {
+		pr_err("Perf session creation failed.\n");
+		return -1;
+	}
+
+	init_features(session);
+
+	session->evlist   = evsel_list;
+	perf_stat.session = session;
+	perf_stat.record  = true;
+	return argc;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_session *session)
+{
+	struct stat_round_event *round = &event->stat_round;
+	struct perf_evsel *counter;
+	struct timespec tsh, *ts = NULL;
+	const char **argv = session->header.env.cmdline_argv;
+	int argc = session->header.env.nr_cmdline;
+
+	evlist__for_each(evsel_list, counter)
+		perf_stat_process_counter(&stat_config, counter);
+
+	if (round->type == PERF_STAT_ROUND_TYPE__FINAL)
+		update_stats(&walltime_nsecs_stats, round->time);
+
+	if (stat_config.interval && round->time) {
+		tsh.tv_sec  = round->time / NSECS_PER_SEC;
+		tsh.tv_nsec = round->time % NSECS_PER_SEC;
+		ts = &tsh;
+	}
+
+	print_counters(ts, argc, argv);
+	return 0;
+}
+
+static
+int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_session *session __maybe_unused)
+{
+	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+	perf_event__read_stat_config(&stat_config, &event->stat_config);
+
+	if (cpu_map__empty(st->cpus)) {
+		if (st->aggr_mode != AGGR_UNSET)
+			pr_warning("warning: processing task data, aggregation mode not set\n");
+		return 0;
+	}
+
+	if (st->aggr_mode != AGGR_UNSET)
+		stat_config.aggr_mode = st->aggr_mode;
+
+	if (perf_stat.file.is_pipe)
+		perf_stat_init_aggr_mode();
+	else
+		perf_stat_init_aggr_mode_file(st);
+
+	return 0;
+}
+
+static int set_maps(struct perf_stat *st)
+{
+	if (!st->cpus || !st->threads)
+		return 0;
+
+	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
+		return -EINVAL;
+
+	perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
+
+	if (perf_evlist__alloc_stats(evsel_list, true))
+		return -ENOMEM;
+
+	st->maps_allocated = true;
+	return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_session *session __maybe_unused)
+{
+	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+	if (st->threads) {
+		pr_warning("Extra thread map event, ignoring.\n");
+		return 0;
+	}
+
+	st->threads = thread_map__new_event(&event->thread_map);
+	if (!st->threads)
+		return -ENOMEM;
+
+	return set_maps(st);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+			  union perf_event *event,
+			  struct perf_session *session __maybe_unused)
+{
+	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+	struct cpu_map *cpus;
+
+	if (st->cpus) {
+		pr_warning("Extra cpu map event, ignoring.\n");
+		return 0;
+	}
+
+	cpus = cpu_map__new_data(&event->cpu_map.data);
+	if (!cpus)
+		return -ENOMEM;
+
+	st->cpus = cpus;
+	return set_maps(st);
+}
+
+static const char * const report_usage[] = {
+	"perf stat report [<options>]",
+	NULL,
+};
+
+static struct perf_stat perf_stat = {
+	.tool = {
+		.attr		= perf_event__process_attr,
+		.event_update	= perf_event__process_event_update,
+		.thread_map	= process_thread_map_event,
+		.cpu_map	= process_cpu_map_event,
+		.stat_config	= process_stat_config_event,
+		.stat		= perf_event__process_stat_event,
+		.stat_round	= process_stat_round_event,
+	},
+	.aggr_mode = AGGR_UNSET,
+};
+
+static int __cmd_report(int argc, const char **argv)
+{
+	struct perf_session *session;
+	const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
+		     "aggregate counts per processor socket", AGGR_SOCKET),
+	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
+		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
+		     "disable CPU count aggregation", AGGR_NONE),
+	OPT_END()
+	};
+	struct stat st;
+	int ret;
+
+	argc = parse_options(argc, argv, options, report_usage, 0);
+
+	if (!input_name || !strlen(input_name)) {
+		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+			input_name = "-";
+		else
+			input_name = "perf.data";
+	}
+
+	perf_stat.file.path = input_name;
+	perf_stat.file.mode = PERF_DATA_MODE_READ;
+
+	session = perf_session__new(&perf_stat.file, false, &perf_stat.tool);
+	if (session == NULL)
+		return -1;
+
+	perf_stat.session  = session;
+	stat_config.output = stderr;
+	evsel_list         = session->evlist;
+
+	ret = perf_session__process_events(session);
+	if (ret)
+		return ret;
+
+	perf_session__delete(session);
+	return 0;
+}
+
 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const stat_usage[] = {
@@ -1271,6 +1810,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	const char *mode;
 	FILE *output = stderr;
 	unsigned int interval;
+	const char * const stat_subcommands[] = { "record", "report" };
 
 	setlocale(LC_ALL, "");
 
@@ -1278,12 +1818,30 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (evsel_list == NULL)
 		return -ENOMEM;
 
-	argc = parse_options(argc, argv, stat_options, stat_usage,
-		PARSE_OPT_STOP_AT_NON_OPTION);
+	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
+					(const char **) stat_usage,
+					PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (csv_sep) {
+		csv_output = true;
+		if (!strcmp(csv_sep, "\\t"))
+			csv_sep = "\t";
+	} else
+		csv_sep = DEFAULT_SEPARATOR;
+
+	if (argc && !strncmp(argv[0], "rec", 3)) {
+		argc = __cmd_record(argc, argv);
+		if (argc < 0)
+			return -1;
+	} else if (argc && !strncmp(argv[0], "rep", 3))
+		return __cmd_report(argc, argv);
 
 	interval = stat_config.interval;
 
-	if (output_name && strcmp(output_name, "-"))
+	/*
+	 * For record command the -o is already taken care of.
+	 */
+	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
 		output = NULL;
 
 	if (output_name && output_fd) {
@@ -1321,13 +1879,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	stat_config.output = output;
 
-	if (csv_sep) {
-		csv_output = true;
-		if (!strcmp(csv_sep, "\\t"))
-			csv_sep = "\t";
-	} else
-		csv_sep = DEFAULT_SEPARATOR;
-
 	/*
 	 * let the spreadsheet do the pretty-printing
 	 */
@@ -1450,6 +2001,41 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (!forever && status != -1 && !interval)
 		print_counters(NULL, argc, argv);
 
+	if (STAT_RECORD) {
+		/*
+		 * We synthesize the kernel mmap record just so that older tools
+		 * don't emit warnings about not being able to resolve symbols
+		 * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+		 * a saner message about no samples being in the perf.data file.
+		 *
+		 * This also serves to suppress a warning about f_header.data.size == 0
+		 * in header.c at the moment 'perf stat record' gets introduced, which
+		 * is not really needed once we start adding the stat specific PERF_RECORD_
+		 * records, but the need to suppress the kptr_restrict messages in older
+		 * tools remain  -acme
+		 */
+		int fd = perf_data_file__fd(&perf_stat.file);
+		int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+							     process_synthesized_event,
+							     &perf_stat.session->machines.host);
+		if (err) {
+			pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
+				   "older tools may produce warnings about this file\n.");
+		}
+
+		if (!interval) {
+			if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+				pr_err("failed to write stat round event\n");
+		}
+
+		if (!perf_stat.file.is_pipe) {
+			perf_stat.session->header.data_size += perf_stat.bytes_written;
+			perf_session__write_header(perf_stat.session, evsel_list, fd, true);
+		}
+
+		perf_session__delete(perf_stat.session);
+	}
+
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index f23fb7ed440026ba166f42ec9a166a2ebc8047ad..614899b88b377e07f9615d618ba7045f66051bea 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -34,6 +34,9 @@ perf-y += thread-map.o
 perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
 perf-y += bpf.o
 perf-y += topology.o
+perf-y += cpumap.o
+perf-y += stat.o
+perf-y += event_update.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 0372d594591002981733c0d2f11981b6fffbd1bf..f2b1dcac45d3065d90ef6fb01bdd57694577d809 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -179,6 +179,30 @@ static struct test generic_tests[] = {
 			.get_desc	= test__bpf_subtest_get_desc,
 		},
 	},
+	{
+		.desc = "Test thread map synthesize",
+		.func = test__thread_map_synthesize,
+	},
+	{
+		.desc = "Test cpu map synthesize",
+		.func = test__cpu_map_synthesize,
+	},
+	{
+		.desc = "Test stat config synthesize",
+		.func = test__synthesize_stat_config,
+	},
+	{
+		.desc = "Test stat synthesize",
+		.func = test__synthesize_stat,
+	},
+	{
+		.desc = "Test stat round synthesize",
+		.func = test__synthesize_stat_round,
+	},
+	{
+		.desc = "Test attr update synthesize",
+		.func = test__event_update,
+	},
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
new file mode 100644
index 0000000000000000000000000000000000000000..4cb6418a8ffc33d6c4b71c66c112420191c68956
--- /dev/null
+++ b/tools/perf/tests/cpumap.c
@@ -0,0 +1,88 @@
+#include "tests.h"
+#include "cpumap.h"
+
+static int process_event_mask(struct perf_tool *tool __maybe_unused,
+			 union perf_event *event,
+			 struct perf_sample *sample __maybe_unused,
+			 struct machine *machine __maybe_unused)
+{
+	struct cpu_map_event *map_event = &event->cpu_map;
+	struct cpu_map_mask *mask;
+	struct cpu_map_data *data;
+	struct cpu_map *map;
+	int i;
+
+	data = &map_event->data;
+
+	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK);
+
+	mask = (struct cpu_map_mask *)data->data;
+
+	TEST_ASSERT_VAL("wrong nr",   mask->nr == 1);
+
+	for (i = 0; i < 20; i++) {
+		TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask));
+	}
+
+	map = cpu_map__new_data(data);
+	TEST_ASSERT_VAL("wrong nr",  map->nr == 20);
+
+	for (i = 0; i < 20; i++) {
+		TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+	}
+
+	cpu_map__put(map);
+	return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+			 union perf_event *event,
+			 struct perf_sample *sample __maybe_unused,
+			 struct machine *machine __maybe_unused)
+{
+	struct cpu_map_event *map_event = &event->cpu_map;
+	struct cpu_map_entries *cpus;
+	struct cpu_map_data *data;
+	struct cpu_map *map;
+
+	data = &map_event->data;
+
+	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS);
+
+	cpus = (struct cpu_map_entries *)data->data;
+
+	TEST_ASSERT_VAL("wrong nr",   cpus->nr == 2);
+	TEST_ASSERT_VAL("wrong cpu",  cpus->cpu[0] == 1);
+	TEST_ASSERT_VAL("wrong cpu",  cpus->cpu[1] == 256);
+
+	map = cpu_map__new_data(data);
+	TEST_ASSERT_VAL("wrong nr",  map->nr == 2);
+	TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
+	TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+	TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1);
+	cpu_map__put(map);
+	return 0;
+}
+
+
+int test__cpu_map_synthesize(int subtest __maybe_unused)
+{
+	struct cpu_map *cpus;
+
+	/* This one is better stores in mask. */
+	cpus = cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
+
+	cpu_map__put(cpus);
+
+	/* This one is better stores in cpu values. */
+	cpus = cpu_map__new("1,256");
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
+
+	cpu_map__put(cpus);
+	return 0;
+}
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
new file mode 100644
index 0000000000000000000000000000000000000000..012eab5d1df115e7cbeb7d8a505a46d0e43f854a
--- /dev/null
+++ b/tools/perf/tests/event_update.c
@@ -0,0 +1,117 @@
+#include <linux/compiler.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "tests.h"
+#include "debug.h"
+
+static int process_event_unit(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct event_update_event *ev = (struct event_update_event *) event;
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT);
+	TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA"));
+	return 0;
+}
+
+static int process_event_scale(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine __maybe_unused)
+{
+	struct event_update_event *ev = (struct event_update_event *) event;
+	struct event_update_event_scale *ev_data;
+
+	ev_data = (struct event_update_event_scale *) ev->data;
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
+	TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
+	return 0;
+}
+
+struct event_name {
+	struct perf_tool tool;
+	const char *name;
+};
+
+static int process_event_name(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct event_name *tmp = container_of(tool, struct event_name, tool);
+	struct event_update_event *ev = (struct event_update_event*) event;
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME);
+	TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name));
+	return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct event_update_event *ev = (struct event_update_event*) event;
+	struct event_update_event_cpus *ev_data;
+	struct cpu_map *map;
+
+	ev_data = (struct event_update_event_cpus*) ev->data;
+
+	map = cpu_map__new_data(&ev_data->cpus);
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
+	TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
+	TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
+	TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
+	TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+	cpu_map__put(map);
+	return 0;
+}
+
+int test__event_update(int subtest __maybe_unused)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct event_name tmp;
+
+	evlist = perf_evlist__new_default();
+	TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+	evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("failed to allos ids",
+			!perf_evsel__alloc_id(evsel, 1, 1));
+
+	perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+
+	evsel->unit = strdup("KRAVA");
+
+	TEST_ASSERT_VAL("failed to synthesize attr update unit",
+			!perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit));
+
+	evsel->scale = 0.123;
+
+	TEST_ASSERT_VAL("failed to synthesize attr update scale",
+			!perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
+
+	tmp.name = perf_evsel__name(evsel);
+
+	TEST_ASSERT_VAL("failed to synthesize attr update name",
+			!perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
+
+	evsel->own_cpus = cpu_map__new("1,2,3");
+
+	TEST_ASSERT_VAL("failed to synthesize attr update cpus",
+			!perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus));
+
+	cpu_map__put(evsel->own_cpus);
+	return 0;
+}
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
new file mode 100644
index 0000000000000000000000000000000000000000..6a20ff2326bb1eb4127b8b6a76eda044d5ecb641
--- /dev/null
+++ b/tools/perf/tests/stat.c
@@ -0,0 +1,111 @@
+#include <linux/compiler.h>
+#include "event.h"
+#include "tests.h"
+#include "stat.h"
+#include "counts.h"
+#include "debug.h"
+
+static bool has_term(struct stat_config_event *config,
+		     u64 tag, u64 val)
+{
+	unsigned i;
+
+	for (i = 0; i < config->nr; i++) {
+		if ((config->data[i].tag == tag) &&
+		    (config->data[i].val == val))
+			return true;
+	}
+
+	return false;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	struct stat_config_event *config = &event->stat_config;
+	struct perf_stat_config stat_config;
+
+#define HAS(term, val) \
+	has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
+
+	TEST_ASSERT_VAL("wrong nr",        config->nr == PERF_STAT_CONFIG_TERM__MAX);
+	TEST_ASSERT_VAL("wrong aggr_mode", HAS(AGGR_MODE, AGGR_CORE));
+	TEST_ASSERT_VAL("wrong scale",     HAS(SCALE, 1));
+	TEST_ASSERT_VAL("wrong interval",  HAS(INTERVAL, 1));
+
+#undef HAS
+
+	perf_event__read_stat_config(&stat_config, config);
+
+	TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
+	TEST_ASSERT_VAL("wrong scale",     stat_config.scale == 1);
+	TEST_ASSERT_VAL("wrong interval",  stat_config.interval == 1);
+	return 0;
+}
+
+int test__synthesize_stat_config(int subtest __maybe_unused)
+{
+	struct perf_stat_config stat_config = {
+		.aggr_mode	= AGGR_CORE,
+		.scale		= 1,
+		.interval	= 1,
+	};
+
+	TEST_ASSERT_VAL("failed to synthesize stat_config",
+		!perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
+
+	return 0;
+}
+
+static int process_stat_event(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct stat_event *st = &event->stat;
+
+	TEST_ASSERT_VAL("wrong cpu",    st->cpu    == 1);
+	TEST_ASSERT_VAL("wrong thread", st->thread == 2);
+	TEST_ASSERT_VAL("wrong id",     st->id     == 3);
+	TEST_ASSERT_VAL("wrong val",    st->val    == 100);
+	TEST_ASSERT_VAL("wrong run",    st->ena    == 200);
+	TEST_ASSERT_VAL("wrong ena",    st->run    == 300);
+	return 0;
+}
+
+int test__synthesize_stat(int subtest __maybe_unused)
+{
+	struct perf_counts_values count;
+
+	count.val = 100;
+	count.ena = 200;
+	count.run = 300;
+
+	TEST_ASSERT_VAL("failed to synthesize stat_config",
+		!perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+
+	return 0;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_sample *sample __maybe_unused,
+				    struct machine *machine __maybe_unused)
+{
+	struct stat_round_event *stat_round = &event->stat_round;
+
+	TEST_ASSERT_VAL("wrong time", stat_round->time == 0xdeadbeef);
+	TEST_ASSERT_VAL("wrong type", stat_round->type == PERF_STAT_ROUND_TYPE__INTERVAL);
+	return 0;
+}
+
+int test__synthesize_stat_round(int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("failed to synthesize stat_config",
+		!perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
+						   process_stat_round_event, NULL));
+
+	return 0;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index a0733aaad0812556de6d01083ca7d0194cbf50b5..82b2b5e6ba7c7613ca58f9a46cda688b36444915 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -79,6 +79,12 @@ int test__bpf(int subtest);
 const char *test__bpf_subtest_get_desc(int subtest);
 int test__bpf_subtest_get_nr(void);
 int test_session_topology(int subtest);
+int test__thread_map_synthesize(int subtest);
+int test__cpu_map_synthesize(int subtest);
+int test__synthesize_stat_config(int subtest);
+int test__synthesize_stat(int subtest);
+int test__synthesize_stat_round(int subtest);
+int test__event_update(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 2be02d303e8267b6ce46cd5b7449c1610ac5eba9..fccde848fe9c0d0d67f1d6081b7e09ed7fc05610 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -40,3 +40,46 @@ int test__thread_map(int subtest __maybe_unused)
 	thread_map__put(map);
 	return 0;
 }
+
+static int process_event(struct perf_tool *tool __maybe_unused,
+			 union perf_event *event,
+			 struct perf_sample *sample __maybe_unused,
+			 struct machine *machine __maybe_unused)
+{
+	struct thread_map_event *map = &event->thread_map;
+	struct thread_map *threads;
+
+	TEST_ASSERT_VAL("wrong nr",   map->nr == 1);
+	TEST_ASSERT_VAL("wrong pid",  map->entries[0].pid == (u64) getpid());
+	TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, "perf"));
+
+	threads = thread_map__new_event(&event->thread_map);
+	TEST_ASSERT_VAL("failed to alloc map", threads);
+
+	TEST_ASSERT_VAL("wrong nr", threads->nr == 1);
+	TEST_ASSERT_VAL("wrong pid",
+			thread_map__pid(threads, 0) == getpid());
+	TEST_ASSERT_VAL("wrong comm",
+			thread_map__comm(threads, 0) &&
+			!strcmp(thread_map__comm(threads, 0), "perf"));
+	TEST_ASSERT_VAL("wrong refcnt",
+			atomic_read(&threads->refcnt) == 1);
+	thread_map__put(threads);
+	return 0;
+}
+
+int test__thread_map_synthesize(int subtest __maybe_unused)
+{
+	struct thread_map *threads;
+
+	/* test map on current pid */
+	threads = thread_map__new_by_pid(getpid());
+	TEST_ASSERT_VAL("failed to alloc map", threads);
+
+	thread_map__read_comms(threads);
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+
+	return 0;
+}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 10af1e7524fbd24de791c38fa23c7d730d54a193..a0717b93d8f5baa8f54a2d80a2fa4ce6ad61f350 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -5,6 +5,7 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <linux/bitmap.h>
 #include "asm/bug.h"
 
 static struct cpu_map *cpu_map__default_new(void)
@@ -179,6 +180,47 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
 	return cpus;
 }
 
+static struct cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
+{
+	struct cpu_map *map;
+
+	map = cpu_map__empty_new(cpus->nr);
+	if (map) {
+		unsigned i;
+
+		for (i = 0; i < cpus->nr; i++)
+			map->map[i] = (int)cpus->cpu[i];
+	}
+
+	return map;
+}
+
+static struct cpu_map *cpu_map__from_mask(struct cpu_map_mask *mask)
+{
+	struct cpu_map *map;
+	int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE;
+
+	nr = bitmap_weight(mask->mask, nbits);
+
+	map = cpu_map__empty_new(nr);
+	if (map) {
+		int cpu, i = 0;
+
+		for_each_set_bit(cpu, mask->mask, nbits)
+			map->map[i++] = cpu;
+	}
+	return map;
+
+}
+
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data)
+{
+	if (data->type == PERF_CPU_MAP__CPUS)
+		return cpu_map__from_entries((struct cpu_map_entries *)data->data);
+	else
+		return cpu_map__from_mask((struct cpu_map_mask *)data->data);
+}
+
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
 {
 	int i;
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 85f7772457fa091655d62212067f2edddf6e55ae..71c41b9efabb3b38dd17a7374413985dfd944f77 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -17,6 +17,7 @@ struct cpu_map {
 struct cpu_map *cpu_map__new(const char *cpu_list);
 struct cpu_map *cpu_map__empty_new(int nr);
 struct cpu_map *cpu_map__dummy_new(void);
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data);
 struct cpu_map *cpu_map__read(FILE *file);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
 int cpu_map__get_socket_id(int cpu);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8b10621b415c684564a6dd5af130ebc9d3ab33c7..cd61bb1f3917f1c2a03f7274250a0f7e26531edc 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -10,6 +10,8 @@
 #include "thread.h"
 #include "thread_map.h"
 #include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
 
 static const char *perf_event__names[] = {
 	[0]					= "TOTAL",
@@ -37,6 +39,12 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_AUXTRACE_INFO]		= "AUXTRACE_INFO",
 	[PERF_RECORD_AUXTRACE]			= "AUXTRACE",
 	[PERF_RECORD_AUXTRACE_ERROR]		= "AUXTRACE_ERROR",
+	[PERF_RECORD_THREAD_MAP]		= "THREAD_MAP",
+	[PERF_RECORD_CPU_MAP]			= "CPU_MAP",
+	[PERF_RECORD_STAT_CONFIG]		= "STAT_CONFIG",
+	[PERF_RECORD_STAT]			= "STAT",
+	[PERF_RECORD_STAT_ROUND]		= "STAT_ROUND",
+	[PERF_RECORD_EVENT_UPDATE]		= "EVENT_UPDATE",
 };
 
 const char *perf_event__name(unsigned int id)
@@ -699,6 +707,274 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 	return err;
 }
 
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+				      struct thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	union perf_event *event;
+	int i, err, size;
+
+	size  = sizeof(event->thread_map);
+	size +=	threads->nr * sizeof(event->thread_map.entries[0]);
+
+	event = zalloc(size);
+	if (!event)
+		return -ENOMEM;
+
+	event->header.type = PERF_RECORD_THREAD_MAP;
+	event->header.size = size;
+	event->thread_map.nr = threads->nr;
+
+	for (i = 0; i < threads->nr; i++) {
+		struct thread_map_event_entry *entry = &event->thread_map.entries[i];
+		char *comm = thread_map__comm(threads, i);
+
+		if (!comm)
+			comm = (char *) "";
+
+		entry->pid = thread_map__pid(threads, i);
+		strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+	}
+
+	err = process(tool, event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+			    struct cpu_map *map)
+{
+	int i;
+
+	cpus->nr = map->nr;
+
+	for (i = 0; i < map->nr; i++)
+		cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct cpu_map_mask *mask,
+			    struct cpu_map *map, int max)
+{
+	int i;
+
+	mask->nr = BITS_TO_LONGS(max);
+	mask->long_size = sizeof(long);
+
+	for (i = 0; i < map->nr; i++)
+		set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct cpu_map *map)
+{
+	return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct cpu_map *map, int *max)
+{
+	int i;
+
+	*max = 0;
+
+	for (i = 0; i < map->nr; i++) {
+		/* bit possition of the cpu is + 1 */
+		int bit = map->map[i] + 1;
+
+		if (bit > *max)
+			*max = bit;
+	}
+
+	return sizeof(struct cpu_map_mask) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max)
+{
+	size_t size_cpus, size_mask;
+	bool is_dummy = cpu_map__empty(map);
+
+	/*
+	 * Both array and mask data have variable size based
+	 * on the number of cpus and their actual values.
+	 * The size of the 'struct cpu_map_data' is:
+	 *
+	 *   array = size of 'struct cpu_map_entries' +
+	 *           number of cpus * sizeof(u64)
+	 *
+	 *   mask  = size of 'struct cpu_map_mask' +
+	 *           maximum cpu bit converted to size of longs
+	 *
+	 * and finaly + the size of 'struct cpu_map_data'.
+	 */
+	size_cpus = cpus_size(map);
+	size_mask = mask_size(map, max);
+
+	if (is_dummy || (size_cpus < size_mask)) {
+		*size += size_cpus;
+		*type  = PERF_CPU_MAP__CPUS;
+	} else {
+		*size += size_mask;
+		*type  = PERF_CPU_MAP__MASK;
+	}
+
+	*size += sizeof(struct cpu_map_data);
+	return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+			      u16 type, int max)
+{
+	data->type = type;
+
+	switch (type) {
+	case PERF_CPU_MAP__CPUS:
+		synthesize_cpus((struct cpu_map_entries *) data->data, map);
+		break;
+	case PERF_CPU_MAP__MASK:
+		synthesize_mask((struct cpu_map_mask *) data->data, map, max);
+	default:
+		break;
+	};
+}
+
+static struct cpu_map_event* cpu_map_event__new(struct cpu_map *map)
+{
+	size_t size = sizeof(struct cpu_map_event);
+	struct cpu_map_event *event;
+	int max;
+	u16 type;
+
+	event = cpu_map_data__alloc(map, &size, &type, &max);
+	if (!event)
+		return NULL;
+
+	event->header.type = PERF_RECORD_CPU_MAP;
+	event->header.size = size;
+	event->data.type   = type;
+
+	cpu_map_data__synthesize(&event->data, map, type, max);
+	return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+				   struct cpu_map *map,
+				   perf_event__handler_t process,
+				   struct machine *machine)
+{
+	struct cpu_map_event *event;
+	int err;
+
+	event = cpu_map_event__new(map);
+	if (!event)
+		return -ENOMEM;
+
+	err = process(tool, (union perf_event *) event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+				       struct perf_stat_config *config,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	struct stat_config_event *event;
+	int size, i = 0, err;
+
+	size  = sizeof(*event);
+	size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+	event = zalloc(size);
+	if (!event)
+		return -ENOMEM;
+
+	event->header.type = PERF_RECORD_STAT_CONFIG;
+	event->header.size = size;
+	event->nr          = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val)					\
+	event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;	\
+	event->data[i].val = __val;				\
+	i++;
+
+	ADD(AGGR_MODE,	config->aggr_mode)
+	ADD(INTERVAL,	config->interval)
+	ADD(SCALE,	config->scale)
+
+	WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+		  "stat config terms unbalanced\n");
+#undef ADD
+
+	err = process(tool, (union perf_event *) event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+				u32 cpu, u32 thread, u64 id,
+				struct perf_counts_values *count,
+				perf_event__handler_t process,
+				struct machine *machine)
+{
+	struct stat_event event;
+
+	event.header.type = PERF_RECORD_STAT;
+	event.header.size = sizeof(event);
+	event.header.misc = 0;
+
+	event.id        = id;
+	event.cpu       = cpu;
+	event.thread    = thread;
+	event.val       = count->val;
+	event.ena       = count->ena;
+	event.run       = count->run;
+
+	return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+				      u64 evtime, u64 type,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	struct stat_round_event event;
+
+	event.header.type = PERF_RECORD_STAT_ROUND;
+	event.header.size = sizeof(event);
+	event.header.misc = 0;
+
+	event.time = evtime;
+	event.type = type;
+
+	return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+				  struct stat_config_event *event)
+{
+	unsigned i;
+
+	for (i = 0; i < event->nr; i++) {
+
+		switch (event->data[i].tag) {
+#define CASE(__term, __val)					\
+		case PERF_STAT_CONFIG_TERM__##__term:		\
+			config->__val = event->data[i].val;	\
+			break;
+
+		CASE(AGGR_MODE, aggr_mode)
+		CASE(SCALE,     scale)
+		CASE(INTERVAL,  interval)
+#undef CASE
+		default:
+			pr_warning("unknown stat config term %" PRIu64 "\n",
+				   event->data[i].tag);
+		}
+	}
+}
+
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
 {
 	const char *s;
@@ -783,6 +1059,38 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
 		       event->mmap2.filename);
 }
 
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+	struct thread_map *threads = thread_map__new_event(&event->thread_map);
+	size_t ret;
+
+	ret = fprintf(fp, " nr: ");
+
+	if (threads)
+		ret += thread_map__fprintf(threads, fp);
+	else
+		ret += fprintf(fp, "failed to get threads from event\n");
+
+	thread_map__put(threads);
+	return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+	struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+	size_t ret;
+
+	ret = fprintf(fp, " nr: ");
+
+	if (cpus)
+		ret += cpu_map__fprintf(cpus, fp);
+	else
+		ret += fprintf(fp, "failed to get cpumap from event\n");
+
+	cpu_map__put(cpus);
+	return ret;
+}
+
 int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct perf_sample *sample,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a0dbcbd4f6d82d6564d56e48c0672ea56cba45bc..b7ffb7ee9971f020f1b2320baa478ed1421a01e9 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -226,6 +226,12 @@ enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_AUXTRACE_INFO		= 70,
 	PERF_RECORD_AUXTRACE			= 71,
 	PERF_RECORD_AUXTRACE_ERROR		= 72,
+	PERF_RECORD_THREAD_MAP			= 73,
+	PERF_RECORD_CPU_MAP			= 74,
+	PERF_RECORD_STAT_CONFIG			= 75,
+	PERF_RECORD_STAT			= 76,
+	PERF_RECORD_STAT_ROUND			= 77,
+	PERF_RECORD_EVENT_UPDATE		= 78,
 	PERF_RECORD_HEADER_MAX
 };
 
@@ -270,12 +276,61 @@ struct events_stats {
 	u32 nr_proc_map_timeout;
 };
 
+enum {
+	PERF_CPU_MAP__CPUS = 0,
+	PERF_CPU_MAP__MASK = 1,
+};
+
+struct cpu_map_entries {
+	u16	nr;
+	u16	cpu[];
+};
+
+struct cpu_map_mask {
+	u16	nr;
+	u16	long_size;
+	unsigned long mask[];
+};
+
+struct cpu_map_data {
+	u16	type;
+	char	data[];
+};
+
+struct cpu_map_event {
+	struct perf_event_header	header;
+	struct cpu_map_data		data;
+};
+
 struct attr_event {
 	struct perf_event_header header;
 	struct perf_event_attr attr;
 	u64 id[];
 };
 
+enum {
+	PERF_EVENT_UPDATE__UNIT  = 0,
+	PERF_EVENT_UPDATE__SCALE = 1,
+	PERF_EVENT_UPDATE__NAME  = 2,
+	PERF_EVENT_UPDATE__CPUS  = 3,
+};
+
+struct event_update_event_cpus {
+	struct cpu_map_data cpus;
+};
+
+struct event_update_event_scale {
+	double scale;
+};
+
+struct event_update_event {
+	struct perf_event_header header;
+	u64 type;
+	u64 id;
+
+	char data[];
+};
+
 #define MAX_EVENT_NAME 64
 
 struct perf_trace_event_type {
@@ -356,6 +411,63 @@ struct context_switch_event {
 	u32 next_prev_tid;
 };
 
+struct thread_map_event_entry {
+	u64	pid;
+	char	comm[16];
+};
+
+struct thread_map_event {
+	struct perf_event_header	header;
+	u64				nr;
+	struct thread_map_event_entry	entries[];
+};
+
+enum {
+	PERF_STAT_CONFIG_TERM__AGGR_MODE	= 0,
+	PERF_STAT_CONFIG_TERM__INTERVAL		= 1,
+	PERF_STAT_CONFIG_TERM__SCALE		= 2,
+	PERF_STAT_CONFIG_TERM__MAX		= 3,
+};
+
+struct stat_config_event_entry {
+	u64	tag;
+	u64	val;
+};
+
+struct stat_config_event {
+	struct perf_event_header	header;
+	u64				nr;
+	struct stat_config_event_entry	data[];
+};
+
+struct stat_event {
+	struct perf_event_header	header;
+
+	u64	id;
+	u32	cpu;
+	u32	thread;
+
+	union {
+		struct {
+			u64 val;
+			u64 ena;
+			u64 run;
+		};
+		u64 values[3];
+	};
+};
+
+enum {
+	PERF_STAT_ROUND_TYPE__INTERVAL	= 0,
+	PERF_STAT_ROUND_TYPE__FINAL	= 1,
+};
+
+struct stat_round_event {
+	struct perf_event_header	header;
+	u64				type;
+	u64				time;
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct mmap_event		mmap;
@@ -368,6 +480,7 @@ union perf_event {
 	struct throttle_event		throttle;
 	struct sample_event		sample;
 	struct attr_event		attr;
+	struct event_update_event	event_update;
 	struct event_type_event		event_type;
 	struct tracing_data_event	tracing_data;
 	struct build_id_event		build_id;
@@ -378,12 +491,20 @@ union perf_event {
 	struct aux_event		aux;
 	struct itrace_start_event	itrace_start;
 	struct context_switch_event	context_switch;
+	struct thread_map_event		thread_map;
+	struct cpu_map_event		cpu_map;
+	struct stat_config_event	stat_config;
+	struct stat_event		stat;
+	struct stat_round_event		stat_round;
 };
 
 void perf_event__print_totals(void);
 
 struct perf_tool;
 struct thread_map;
+struct cpu_map;
+struct perf_stat_config;
+struct perf_counts_values;
 
 typedef int (*perf_event__handler_t)(struct perf_tool *tool,
 				     union perf_event *event,
@@ -395,6 +516,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 				      perf_event__handler_t process,
 				      struct machine *machine, bool mmap_data,
 				      unsigned int proc_map_timeout);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+				      struct thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+				   struct cpu_map *cpus,
+				   perf_event__handler_t process,
+				   struct machine *machine);
 int perf_event__synthesize_threads(struct perf_tool *tool,
 				   perf_event__handler_t process,
 				   struct machine *machine, bool mmap_data,
@@ -402,7 +531,21 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
 				       struct machine *machine);
-
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+				       struct perf_stat_config *config,
+				       perf_event__handler_t process,
+				       struct machine *machine);
+void perf_event__read_stat_config(struct perf_stat_config *config,
+				  struct stat_config_event *event);
+int perf_event__synthesize_stat(struct perf_tool *tool,
+				u32 cpu, u32 thread, u64 id,
+				struct perf_counts_values *count,
+				perf_event__handler_t process,
+				struct machine *machine);
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+				      u64 time, u64 type,
+				      perf_event__handler_t process,
+				      struct machine *machine);
 int perf_event__synthesize_modules(struct perf_tool *tool,
 				   perf_event__handler_t process,
 				   struct machine *machine);
@@ -499,9 +642,14 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
 u64 kallsyms__get_function_start(const char *kallsyms_filename,
 				 const char *symbol_name);
 
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
+void  cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+			       u16 type, int max);
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8c44aadb9810160f417d7d04a07d39dc8e0cd353..b9eac0daa0b9cff642cf8d1348ebf7ead0249385 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -534,9 +534,9 @@ void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 	evsel->id[evsel->ids++] = id;
 }
 
-static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
-				  struct perf_evsel *evsel,
-				  int cpu, int thread, int fd)
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+			   struct perf_evsel *evsel,
+			   int cpu, int thread, int fd)
 {
 	u64 read_data[4] = { 0, };
 	int id_idx = 1; /* The first entry is the counter value */
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a459fe71b452e0b721d798cc73cc6287ae87dc1c..139a50038097f225c07134880277268bf2753b8e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -97,6 +97,9 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
 
 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 			 int cpu, int thread, u64 id);
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+			   struct perf_evsel *evsel,
+			   int cpu, int thread, int fd);
 
 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 5ac7bdb0dff79b9d3af11f2f6d8a9539ea340619..f50b7235ecb6558d167a475bf4199e8b05f52143 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -868,6 +868,13 @@ static int write_auxtrace(int fd, struct perf_header *h,
 	return err;
 }
 
+static int write_stat(int fd __maybe_unused,
+		      struct perf_header *h __maybe_unused,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
 static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
 			   FILE *fp)
 {
@@ -1159,6 +1166,12 @@ static void print_auxtrace(struct perf_header *ph __maybe_unused,
 	fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
 }
 
+static void print_stat(struct perf_header *ph __maybe_unused,
+		       int fd __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains stat data\n");
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
 			       FILE *fp)
 {
@@ -1948,6 +1961,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPP(HEADER_PMU_MAPPINGS,	pmu_mappings),
 	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
 	FEAT_OPP(HEADER_AUXTRACE,	auxtrace),
+	FEAT_OPA(HEADER_STAT,		stat),
 };
 
 struct header_print_data {
@@ -2686,6 +2700,152 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
 	return err;
 }
 
+static struct event_update_event *
+event_update_event__new(size_t size, u64 type, u64 id)
+{
+	struct event_update_event *ev;
+
+	size += sizeof(*ev);
+	size  = PERF_ALIGN(size, sizeof(u64));
+
+	ev = zalloc(size);
+	if (ev) {
+		ev->header.type = PERF_RECORD_EVENT_UPDATE;
+		ev->header.size = (u16)size;
+		ev->type = type;
+		ev->id = id;
+	}
+	return ev;
+}
+
+int
+perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+					 struct perf_evsel *evsel,
+					 perf_event__handler_t process)
+{
+	struct event_update_event *ev;
+	size_t size = strlen(evsel->unit);
+	int err;
+
+	ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	strncpy(ev->data, evsel->unit, size);
+	err = process(tool, (union perf_event *)ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int
+perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+					  struct perf_evsel *evsel,
+					  perf_event__handler_t process)
+{
+	struct event_update_event *ev;
+	struct event_update_event_scale *ev_data;
+	int err;
+
+	ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	ev_data = (struct event_update_event_scale *) ev->data;
+	ev_data->scale = evsel->scale;
+	err = process(tool, (union perf_event*) ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int
+perf_event__synthesize_event_update_name(struct perf_tool *tool,
+					 struct perf_evsel *evsel,
+					 perf_event__handler_t process)
+{
+	struct event_update_event *ev;
+	size_t len = strlen(evsel->name);
+	int err;
+
+	ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	strncpy(ev->data, evsel->name, len);
+	err = process(tool, (union perf_event*) ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int
+perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+					struct perf_evsel *evsel,
+					perf_event__handler_t process)
+{
+	size_t size = sizeof(struct event_update_event);
+	struct event_update_event *ev;
+	int max, err;
+	u16 type;
+
+	if (!evsel->own_cpus)
+		return 0;
+
+	ev = cpu_map_data__alloc(evsel->own_cpus, &size, &type, &max);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->header.type = PERF_RECORD_EVENT_UPDATE;
+	ev->header.size = (u16)size;
+	ev->type = PERF_EVENT_UPDATE__CPUS;
+	ev->id   = evsel->id[0];
+
+	cpu_map_data__synthesize((struct cpu_map_data *) ev->data,
+				 evsel->own_cpus,
+				 type, max);
+
+	err = process(tool, (union perf_event*) ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+	struct event_update_event *ev = &event->event_update;
+	struct event_update_event_scale *ev_scale;
+	struct event_update_event_cpus *ev_cpus;
+	struct cpu_map *map;
+	size_t ret;
+
+	ret = fprintf(fp, "\n... id:    %" PRIu64 "\n", ev->id);
+
+	switch (ev->type) {
+	case PERF_EVENT_UPDATE__SCALE:
+		ev_scale = (struct event_update_event_scale *) ev->data;
+		ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+		break;
+	case PERF_EVENT_UPDATE__UNIT:
+		ret += fprintf(fp, "... unit:  %s\n", ev->data);
+		break;
+	case PERF_EVENT_UPDATE__NAME:
+		ret += fprintf(fp, "... name:  %s\n", ev->data);
+		break;
+	case PERF_EVENT_UPDATE__CPUS:
+		ev_cpus = (struct event_update_event_cpus *) ev->data;
+		ret += fprintf(fp, "... ");
+
+		map = cpu_map__new_data(&ev_cpus->cpus);
+		if (map)
+			ret += cpu_map__fprintf(map, fp);
+		else
+			ret += fprintf(fp, "failed to get cpus\n");
+		break;
+	default:
+		ret += fprintf(fp, "... unknown type\n");
+		break;
+	}
+
+	return ret;
+}
+
 int perf_event__synthesize_attrs(struct perf_tool *tool,
 				   struct perf_session *session,
 				   perf_event__handler_t process)
@@ -2745,6 +2905,51 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_evlist **pevlist)
+{
+	struct event_update_event *ev = &event->event_update;
+	struct event_update_event_scale *ev_scale;
+	struct event_update_event_cpus *ev_cpus;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct cpu_map *map;
+
+	if (!pevlist || *pevlist == NULL)
+		return -EINVAL;
+
+	evlist = *pevlist;
+
+	evsel = perf_evlist__id2evsel(evlist, ev->id);
+	if (evsel == NULL)
+		return -EINVAL;
+
+	switch (ev->type) {
+	case PERF_EVENT_UPDATE__UNIT:
+		evsel->unit = strdup(ev->data);
+		break;
+	case PERF_EVENT_UPDATE__NAME:
+		evsel->name = strdup(ev->data);
+		break;
+	case PERF_EVENT_UPDATE__SCALE:
+		ev_scale = (struct event_update_event_scale *) ev->data;
+		evsel->scale = ev_scale->scale;
+	case PERF_EVENT_UPDATE__CPUS:
+		ev_cpus = (struct event_update_event_cpus *) ev->data;
+
+		map = cpu_map__new_data(&ev_cpus->cpus);
+		if (map)
+			evsel->own_cpus = map;
+		else
+			pr_err("failed to get event_update cpus\n");
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
 					struct perf_evlist *evlist,
 					perf_event__handler_t process)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 05f27cb6b7e36a2e663f7c18fb22513bc5280af2..cff9892452ee393764726a12895ad95d36f766fe 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -31,6 +31,7 @@ enum {
 	HEADER_PMU_MAPPINGS,
 	HEADER_GROUP_DESC,
 	HEADER_AUXTRACE,
+	HEADER_STAT,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
@@ -105,8 +106,24 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
 int perf_event__synthesize_attrs(struct perf_tool *tool,
 				 struct perf_session *session,
 				 perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+					     struct perf_evsel *evsel,
+					     perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+					      struct perf_evsel *evsel,
+					      perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool,
+					     struct perf_evsel *evsel,
+					     perf_event__handler_t process);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+					     struct perf_evsel *evsel,
+					     perf_event__handler_t process);
 int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
 			     struct perf_evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
 
 int perf_event__synthesize_tracing_data(struct perf_tool *tool,
 					int fd, struct perf_evlist *evlist,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9774686525b432d096b85da44435613a7b31e35d..d5636ba94b20f722d3f521614e8fae08b620fad4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -17,6 +17,7 @@
 #include "asm/bug.h"
 #include "auxtrace.h"
 #include "thread-stack.h"
+#include "stat.h"
 
 static int perf_session__deliver_event(struct perf_session *session,
 				       union perf_event *event,
@@ -36,6 +37,9 @@ static int perf_session__open(struct perf_session *session)
 	if (perf_data_file__is_pipe(file))
 		return 0;
 
+	if (perf_header__has_feat(&session->header, HEADER_STAT))
+		return 0;
+
 	if (!perf_evlist__valid_sample_type(session->evlist)) {
 		pr_err("non matching sample_type\n");
 		return -1;
@@ -205,6 +209,18 @@ static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+						 union perf_event *event __maybe_unused,
+						 struct perf_evlist **pevlist
+						 __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_event_update(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
 static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
 				     union perf_event *event __maybe_unused,
 				     struct perf_sample *sample __maybe_unused,
@@ -296,6 +312,67 @@ int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
+
+static
+int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct perf_session *session __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_thread_map(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event __maybe_unused,
+			       struct perf_session *session __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_cpu_map(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event __maybe_unused,
+				   struct perf_session *session __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat_config(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_stat_stub(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event __maybe_unused,
+			     struct perf_session *perf_session
+			     __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event __maybe_unused,
+				   struct perf_session *perf_session
+				   __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat_round(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
 void perf_tool__fill_defaults(struct perf_tool *tool)
 {
 	if (tool->sample == NULL)
@@ -328,6 +405,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->unthrottle = process_event_stub;
 	if (tool->attr == NULL)
 		tool->attr = process_event_synth_attr_stub;
+	if (tool->event_update == NULL)
+		tool->event_update = process_event_synth_event_update_stub;
 	if (tool->tracing_data == NULL)
 		tool->tracing_data = process_event_synth_tracing_data_stub;
 	if (tool->build_id == NULL)
@@ -346,6 +425,16 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->auxtrace = process_event_auxtrace_stub;
 	if (tool->auxtrace_error == NULL)
 		tool->auxtrace_error = process_event_auxtrace_error_stub;
+	if (tool->thread_map == NULL)
+		tool->thread_map = process_event_thread_map_stub;
+	if (tool->cpu_map == NULL)
+		tool->cpu_map = process_event_cpu_map_stub;
+	if (tool->stat_config == NULL)
+		tool->stat_config = process_event_stat_config_stub;
+	if (tool->stat == NULL)
+		tool->stat = process_stat_stub;
+	if (tool->stat_round == NULL)
+		tool->stat_round = process_stat_round_stub;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -569,6 +658,13 @@ static void perf_event__hdr_attr_swap(union perf_event *event,
 	mem_bswap_64(event->attr.id, size);
 }
 
+static void perf_event__event_update_swap(union perf_event *event,
+					  bool sample_id_all __maybe_unused)
+{
+	event->event_update.type = bswap_64(event->event_update.type);
+	event->event_update.id   = bswap_64(event->event_update.id);
+}
+
 static void perf_event__event_type_swap(union perf_event *event,
 					bool sample_id_all __maybe_unused)
 {
@@ -616,6 +712,81 @@ static void perf_event__auxtrace_error_swap(union perf_event *event,
 	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
 }
 
+static void perf_event__thread_map_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	unsigned i;
+
+	event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+	for (i = 0; i < event->thread_map.nr; i++)
+		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+				     bool sample_id_all __maybe_unused)
+{
+	struct cpu_map_data *data = &event->cpu_map.data;
+	struct cpu_map_entries *cpus;
+	struct cpu_map_mask *mask;
+	unsigned i;
+
+	data->type = bswap_64(data->type);
+
+	switch (data->type) {
+	case PERF_CPU_MAP__CPUS:
+		cpus = (struct cpu_map_entries *)data->data;
+
+		cpus->nr = bswap_16(cpus->nr);
+
+		for (i = 0; i < cpus->nr; i++)
+			cpus->cpu[i] = bswap_16(cpus->cpu[i]);
+		break;
+	case PERF_CPU_MAP__MASK:
+		mask = (struct cpu_map_mask *) data->data;
+
+		mask->nr = bswap_16(mask->nr);
+		mask->long_size = bswap_16(mask->long_size);
+
+		switch (mask->long_size) {
+		case 4: mem_bswap_32(&mask->mask, mask->nr); break;
+		case 8: mem_bswap_64(&mask->mask, mask->nr); break;
+		default:
+			pr_err("cpu_map swap: unsupported long size\n");
+		}
+	default:
+		break;
+	}
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+					 bool sample_id_all __maybe_unused)
+{
+	u64 size;
+
+	size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+	size += 1; /* nr item itself */
+	mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+				  bool sample_id_all __maybe_unused)
+{
+	event->stat.id     = bswap_64(event->stat.id);
+	event->stat.thread = bswap_32(event->stat.thread);
+	event->stat.cpu    = bswap_32(event->stat.cpu);
+	event->stat.val    = bswap_64(event->stat.val);
+	event->stat.ena    = bswap_64(event->stat.ena);
+	event->stat.run    = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	event->stat_round.type = bswap_64(event->stat_round.type);
+	event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
 typedef void (*perf_event__swap_op)(union perf_event *event,
 				    bool sample_id_all);
 
@@ -643,6 +814,12 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
 	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
 	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
+	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
+	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
+	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
+	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
+	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
+	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
 	[PERF_RECORD_HEADER_MAX]	  = NULL,
 };
 
@@ -1154,6 +1331,8 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 			perf_session__set_comm_exec(session);
 		}
 		return err;
+	case PERF_RECORD_EVENT_UPDATE:
+		return tool->event_update(tool, event, &session->evlist);
 	case PERF_RECORD_HEADER_EVENT_TYPE:
 		/*
 		 * Depreceated, but we need to handle it for sake
@@ -1179,6 +1358,16 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 	case PERF_RECORD_AUXTRACE_ERROR:
 		perf_session__auxtrace_error_inc(session, event);
 		return tool->auxtrace_error(tool, event, session);
+	case PERF_RECORD_THREAD_MAP:
+		return tool->thread_map(tool, event, session);
+	case PERF_RECORD_CPU_MAP:
+		return tool->cpu_map(tool, event, session);
+	case PERF_RECORD_STAT_CONFIG:
+		return tool->stat_config(tool, event, session);
+	case PERF_RECORD_STAT:
+		return tool->stat(tool, event, session);
+	case PERF_RECORD_STAT_ROUND:
+		return tool->stat_round(tool, event, session);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2d9d8306dbd3f97f3d1f9734ace17b25d9ed583a..2f901d15e06370d26d579869d351811f4d31f940 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -341,3 +341,65 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 
 	return 0;
 }
+
+int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event,
+				   struct perf_session *session)
+{
+	struct perf_counts_values count;
+	struct stat_event *st = &event->stat;
+	struct perf_evsel *counter;
+
+	count.val = st->val;
+	count.ena = st->ena;
+	count.run = st->run;
+
+	counter = perf_evlist__id2evsel(session->evlist, st->id);
+	if (!counter) {
+		pr_err("Failed to resolve counter for stat event.\n");
+		return -EINVAL;
+	}
+
+	*perf_counts(counter->counts, st->cpu, st->thread) = count;
+	counter->supported = true;
+	return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+	struct stat_event *st = (struct stat_event *) event;
+	size_t ret;
+
+	ret  = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
+		       st->id, st->cpu, st->thread);
+	ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
+		       st->val, st->ena, st->run);
+
+	return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+	struct stat_round_event *rd = (struct stat_round_event *)event;
+	size_t ret;
+
+	ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
+		      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+	return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+	struct perf_stat_config sc;
+	size_t ret;
+
+	perf_event__read_stat_config(&sc, &event->stat_config);
+
+	ret  = fprintf(fp, "\n");
+	ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+	ret += fprintf(fp, "... scale     %d\n", sc.scale);
+	ret += fprintf(fp, "... interval  %u\n", sc.interval);
+
+	return ret;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index da1d11c4f8c193cff476fec76868277b12f7c623..086f4e128d6351f0e0de862c2ebcc44801cbc2f8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,4 +90,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct perf_evsel *counter);
+struct perf_tool;
+union perf_event;
+struct perf_session;
+int perf_event__process_stat_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_session *session);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
 #endif
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 371fb28fe5b1b95481c910cb2bf56068fe0509e2..08afc69099538f66172968dc3827fd9b7b40d5c2 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -13,6 +13,7 @@
 #include "thread_map.h"
 #include "util.h"
 #include "debug.h"
+#include "event.h"
 
 /* Skip "." and ".." directories */
 static int filter(const struct dirent *dir)
@@ -409,3 +410,29 @@ void thread_map__read_comms(struct thread_map *threads)
 	for (i = 0; i < threads->nr; ++i)
 		comm_init(threads, i);
 }
+
+static void thread_map__copy_event(struct thread_map *threads,
+				   struct thread_map_event *event)
+{
+	unsigned i;
+
+	threads->nr = (int) event->nr;
+
+	for (i = 0; i < event->nr; i++) {
+		thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+		threads->map[i].comm = strndup(event->entries[i].comm, 16);
+	}
+
+	atomic_set(&threads->refcnt, 1);
+}
+
+struct thread_map *thread_map__new_event(struct thread_map_event *event)
+{
+	struct thread_map *threads;
+
+	threads = thread_map__alloc(event->nr);
+	if (threads)
+		thread_map__copy_event(threads, event);
+
+	return threads;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index af679d8a50f852ebb3457491f5dc4b96f6520743..85e4c7c4fbde1fd5455ed0fa58375aac3934b445 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -16,11 +16,14 @@ struct thread_map {
 	struct thread_map_data map[];
 };
 
+struct thread_map_event;
+
 struct thread_map *thread_map__new_dummy(void);
 struct thread_map *thread_map__new_by_pid(pid_t pid);
 struct thread_map *thread_map__new_by_tid(pid_t tid);
 struct thread_map *thread_map__new_by_uid(uid_t uid);
 struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__new_event(struct thread_map_event *event);
 
 struct thread_map *thread_map__get(struct thread_map *map);
 void thread_map__put(struct thread_map *map);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index cab8cc24831bf479d3776f818c07996dad7ac57c..55de4cffcd4e9ae2ee2063dc5efc05d0d72fc7db 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -50,12 +50,18 @@ struct perf_tool {
 			throttle,
 			unthrottle;
 	event_attr_op	attr;
+	event_attr_op	event_update;
 	event_op2	tracing_data;
 	event_oe	finished_round;
 	event_op2	build_id,
 			id_index,
 			auxtrace_info,
-			auxtrace_error;
+			auxtrace_error,
+			thread_map,
+			cpu_map,
+			stat_config,
+			stat,
+			stat_round;
 	event_op3	auxtrace;
 	bool		ordered_events;
 	bool		ordering_requires_timestamps;