334 lines
12 KiB
C
Raw Normal View History

2018-11-04 14:38:02 +01:00
/*
* Prof
* ====
*
* Self-contained C/C++ profiler library for Linux.
*
* Prof offers a quick way to measure performance events (CPU clock cycles,
* cache misses, branch mispredictions, etc.) of C/C++ code snippets. Prof is
* just a wrapper around the `perf_event_open` system call, its main goal is to
* be easy to setup and painless to use for targeted optimizations, namely, when
* the hot spot has already been identified. In no way Prof is a replacement for
* a fully-fledged profiler like perf, gprof, callgrind, etc.
*
* Please be aware that Prof uses `__attribute__((constructor))` to be as more
* straightforward to setup as possible, so it cannot be included more than
* once.
*
* Examples
* --------
*
* ### Minimal
*
* The following snippet prints the rough number of CPU clock cycles spent in
* executing the code between the two Prof calls:
*
* ```c
* #include "prof.h"
*
* int main()
* {
* PROF_START();
* // slow code goes here...
* PROF_STDOUT();
* }
* ```
*
* ### Custom options
*
* The following snippet instead counts both read and write faults of the level
* 1 data cache that occur in the userland code between the two Prof calls:
*
* ```c
* #include <stdio.h>
*
* #define PROF_USER_EVENTS_ONLY
* #define PROF_EVENT_LIST \
* PROF_EVENT_CACHE(L1D, READ, MISS) \
* PROF_EVENT_CACHE(L1D, WRITE, MISS)
* #include "prof.h"
*
* int main()
* {
* uint64_t faults[2] = { 0 };
*
* PROF_START();
* // slow code goes here...
* PROF_DO(faults[index] += counter);
*
* // fast or uninteresting code goes here...
*
* PROF_START();
* // slow code goes here...
* PROF_DO(faults[index] += counter);
*
* printf("Total L1 faults: R = %lu; W = %lu\n", faults[0], faults[1]);
* }
* ```
*
* Installation
* ------------
*
* Just include `prof.h`. Here is a quick way to fetch the latest version:
*
* wget -q https://raw.githubusercontent.com/cyrus-and/prof/master/prof.h
*/
#ifndef PROF_H
#define PROF_H
#include <errno.h>
#include <linux/perf_event.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>
/*
* API
* ---
*/
/*
* Reset the counters and (re)start counting the events.
*
* The events to be monitored are specified by setting the `PROF_EVENT_LIST`
* macro before including this file to a list of `PROF_EVENT_*` invocations;
* defaults to counting the number CPU clock cycles.
*
* If the `PROF_USER_EVENTS_ONLY` macro is defined before including this file
* then kernel and hypervisor events are excluded from the count.
*/
#define PROF_START() \
do { \
PROF_IOCTL_(ENABLE); \
PROF_IOCTL_(RESET); \
} while (0)
/*
* Specify an event to be monitored, `type` and `config` are defined in the
* documentation of the `perf_event_open` system call.
*/
#define PROF_EVENT(type, config) \
(uint32_t)(type), (uint64_t)(config),
/*
* Same as `PROF_EVENT` but for hardware events; prefix `PERF_COUNT_HW_` must be
* omitted from `config`.
*/
#define PROF_EVENT_HW(config) \
PROF_EVENT(PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## config)
/*
* Same as `PROF_EVENT` but for software events; prefix `PERF_COUNT_SW_` must be
* omitted from `config`.
*/
#define PROF_EVENT_SW(config) \
PROF_EVENT(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ ## config)
/*
* Same as `PROF_EVENT` but for cache events; prefixes `PERF_COUNT_HW_CACHE_`,
* `PERF_COUNT_HW_CACHE_OP_` and `PERF_COUNT_HW_CACHE_RESULT_` must be omitted
* from `cache`, `op` and `result`, respectively. Again `cache`, `op` and
* `result` are defined in the documentation of the `perf_event_open` system
* call.
*/
#define PROF_EVENT_CACHE(cache, op, result) \
PROF_EVENT(PERF_TYPE_HW_CACHE, \
(PERF_COUNT_HW_CACHE_ ## cache) | \
(PERF_COUNT_HW_CACHE_OP_ ## op << 8) | \
(PERF_COUNT_HW_CACHE_RESULT_ ## result << 16))
/*
* Stop counting the events. The counter array can then be accessed with
* `PROF_COUNTERS`.
*/
#define PROF_STOP() \
do { \
PROF_IOCTL_(DISABLE); \
PROF_READ_COUNTERS_(prof_event_buf_); \
} while (0)
/*
* Access the counter array. The order of counters is the same of the events
* defined in `PROF_EVENT_LIST`. Elements of this array are 64 bit unsigned
* integers.
*/
#define PROF_COUNTERS \
(prof_event_buf_ + 1)
/*
* Stop counting the events and execute the code provided by `block` for each
* event. Within `code`: `index` refers to the event position index in the
* counter array defined by `PROF_COUNTERS`; `counter` is the actual value of
* the counter. `index` is a 64 bit unsigned integer.
*/
#define PROF_DO(block) \
do { \
uint64_t i_; \
PROF_STOP(); \
for (i_ = 0; i_ < prof_event_cnt_; i_++) { \
uint64_t index = i_; \
uint64_t counter = prof_event_buf_[i_ + 1]; \
(void)index; \
(void)counter; \
block; \
} \
} while (0)
/*
* Same as `PROF_DO` except that `callback` is the name of a *callable* object
* (e.g. a function) which, for each event, is be called with the two parameters
* `index` and `counter`.
*/
#define PROF_CALL(callback) \
PROF_DO(callback(index, counter))
/*
* Stop counting the events and write to `file` (a stdio.h `FILE *`) as many
* lines as are events in `PROF_EVENT_LIST`. Each line contains `index` and
* `counter` (as defined by `PROF_DO`) separated by a tabulation character. If
* there is only one event then `index` is omitted.
*/
#define PROF_FILE(file) \
PROF_DO(if (prof_event_cnt_ > 1) { \
fprintf((file), "%lu\t%lu\n", index, counter); \
} else { \
fprintf((file), "%lu\n", counter); \
} \
)
/*
* Same as `PROF_LOG_FILE` except that `file` is `stdout`.
*/
#define PROF_STDOUT() \
PROF_FILE(stdout)
/*
* Same as `PROF_LOG_FILE` except that `file` is `stderr`.
*/
#define PROF_STDERR() \
PROF_FILE(stderr)
/* DEFAULTS ----------------------------------------------------------------- */
#ifndef PROF_EVENT_LIST
#ifdef PERF_COUNT_HW_REF_CPU_CYCLES /* since Linux 3.3 */
#define PROF_EVENT_LIST PROF_EVENT_HW(REF_CPU_CYCLES)
#else
#define PROF_EVENT_LIST PROF_EVENT_HW(CPU_CYCLES)
#endif
#endif
/* UTILITY ------------------------------------------------------------------ */
#define PROF_ASSERT_(x) \
do { \
if (!(x)) { \
fprintf(stderr, "# %s:%d: PROF error", __FILE__, __LINE__); \
if (errno) { \
fprintf(stderr, " (%s)", strerror(errno)); \
} \
printf("\n"); \
abort(); \
} \
} while (0)
#define PROF_IOCTL_(mode) \
do { \
PROF_ASSERT_(ioctl(prof_fd_, \
PERF_EVENT_IOC_ ## mode, \
PERF_IOC_FLAG_GROUP) != -1); \
} while (0)
#define PROF_READ_COUNTERS_(buffer) \
do { \
const ssize_t to_read = sizeof(uint64_t) * (prof_event_cnt_ + 1); \
PROF_ASSERT_(read(prof_fd_, buffer, to_read) == to_read); \
} while (0)
/* SETUP -------------------------------------------------------------------- */
static int prof_fd_;
static uint64_t prof_event_cnt_;
static uint64_t *prof_event_buf_;
static void prof_init_(uint64_t dummy, ...) {
uint32_t type;
va_list ap;
prof_fd_ = -1;
prof_event_cnt_ = 0;
va_start(ap, dummy);
while (type = va_arg(ap, uint32_t), type != (uint32_t)-1) {
struct perf_event_attr pe;
uint64_t config;
int fd;
config = va_arg(ap, uint64_t);
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.size = sizeof(struct perf_event_attr);
pe.read_format = PERF_FORMAT_GROUP;
pe.type = type;
pe.config = config;
#ifdef PROF_USER_EVENTS_ONLY
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
#endif
fd = syscall(__NR_perf_event_open, &pe, 0, -1, prof_fd_, 0);
PROF_ASSERT_(fd != -1);
if (prof_fd_ == -1) {
prof_fd_ = fd;
}
prof_event_cnt_++;
}
va_end(ap);
prof_event_buf_ = (uint64_t *)malloc((prof_event_cnt_ + 1) *
sizeof(uint64_t));
}
void __attribute__((constructor)) prof_init()
{
prof_init_(0, PROF_EVENT_LIST /*,*/ (uint32_t)-1);
}
void __attribute__((destructor)) prof_fini()
{
PROF_ASSERT_(close(prof_fd_) != -1);
free(prof_event_buf_);
}
#endif
/*
* License
* -------
*
* Copyright (c) 2017 Andrea Cardaci <cyrus.and@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/