[Virtio-fs] [RFC PATCH 1/2] virtiofsd: add stat tools
Eryu Guan
eguan at linux.alibaba.com
Mon Aug 19 05:14:43 UTC 2019
On Mon, Aug 19, 2019 at 11:41:13AM +0800, Gang Deng wrote:
> There exist two components: vtrace && vstat. vtrace is embeded in virtiofsd,
> it will put raw statistics data into share memory. Then the vstat tool could
> parse it and do some post processing works. The performance overhead of
> vtrace is very small because it does very simple things.
>
> For example, if we call open(2)/close(2) frequently in guest, and
> randwite a file whose length is greater than the size of dax window.
> We'll get the output as below:
>
> op inflt op/s svctm/us %util
> FUSE_OPEN(14) 0 8379.87 3.24 2.71%
> FUSE_RELEASE(18) 0 8379.87 1.77 1.48%
> FUSE_FLUSH(25) 0 8379.87 2.04 1.71%
> FUSE_SETUPMAPPING(48) 1 6393.90 34.72 22.20%
> FUSE_REMOVEMAPPING(49) 0 6404.90 37.61 24.09%
> TOTAL 1 37938.39 13.76 52.20%
>
> The meaning of fields:
>
> - op
> The type of fuse requests, 'TOTAL' is sum of all.
>
> - inflt
> The number of the inflight requests, it must be ethier 0 or 1 because
> virtiofsd can only process fuse requests serially.
>
> - op/s
> The number of fuse requests completed per second.
>
> - svctm/us
> The average service time (in microseconds) for fuse requests.
>
> - %util
> Percentage of elapsed time during which virtiofsd was processing the fuse
> requests.
>
> when virtiofsd is hang, e.g. we support flock in host (just for example,
> this has been fxied), we'll get this:
>
> op inflt op/s svctm/us %util
> FUSE_SETLKW(33) 1 0.00 0.00 100.00%
> TOTAL 1 0.00 0.00 100.00%
>
> the utilization is 100% and op/s equals zero, it indicates hang.
>
> If virtiofsd is idle, then the output looks like this:
>
> op inflt op/s svctm/us %util
> TOTAL 0 0.00 0.00 0.00%
>
> Signed-off-by: Gang Deng <gavin.dg at linux.alibaba.com>
This looks useful to me! Would you like to provide an example command
line usage in commit log too? Because I find that it's not obvious how
the vtrace/vstat are used.
Thanks,
Eryu
> ---
> contrib/virtiofsd/vstat.c | 680 +++++++++++++++++++++++++++++++++++++
> contrib/virtiofsd/vtrace.c | 95 ++++++
> contrib/virtiofsd/vtrace.h | 53 +++
> 3 files changed, 828 insertions(+)
> create mode 100644 contrib/virtiofsd/vstat.c
> create mode 100644 contrib/virtiofsd/vtrace.c
> create mode 100644 contrib/virtiofsd/vtrace.h
>
> diff --git a/contrib/virtiofsd/vstat.c b/contrib/virtiofsd/vstat.c
> new file mode 100644
> index 0000000000..aa7048fba4
> --- /dev/null
> +++ b/contrib/virtiofsd/vstat.c
> @@ -0,0 +1,680 @@
> +#include <assert.h>
> +#include <dirent.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/queue.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <time.h>
> +#include <unistd.h>
> +
> +#include "fuse_kernel.h"
> +#include "vtrace.h"
> +
> +/*
> + * There exist two components: vtrace && vstat. vtrace is embeded in virtiofsd,
> + * it will put raw statistics data into share memory. Then the vstat tool could
> + * parse it and do some post processing works. The performance overhead of
> + * vtrace is very small because it does very simple things.
> + *
> + * For example, if we call open(2)/close(2) frequently in guest, and
> + * randwite a file whose length is greater than the size of dax window.
> + * We'll get the output as below:
> + *
> + * op inflt op/s svctm/us %util
> + * FUSE_OPEN(14) 0 8379.87 3.24 2.71%
> + * FUSE_RELEASE(18) 0 8379.87 1.77 1.48%
> + * FUSE_FLUSH(25) 0 8379.87 2.04 1.71%
> + * FUSE_SETUPMAPPING(48) 1 6393.90 34.72 22.20%
> + * FUSE_REMOVEMAPPING(49) 0 6404.90 37.61 24.09%
> + * TOTAL 1 37938.39 13.76 52.20%
> + *
> + * The meaning of fields:
> + *
> + * - op
> + * The type of fuse requests, 'TOTAL' is sum of all.
> + *
> + * - inflt
> + * The number of the inflight requests, it must be ethier 0 or 1 because
> + * virtiofsd can only process fuse requests serially.
> + *
> + * - op/s
> + * The number of fuse requests completed per second.
> + *
> + * - svctm/us
> + * The average service time (in microseconds) for fuse requests.
> + *
> + * - %util
> + * Percentage of elapsed time during which virtiofsd was processing the fuse
> + * requests.
> + *
> + * when virtiofsd is hang, e.g. we support flock in host (just for example,
> + * this has been fxied), we'll get this:
> + *
> + * op inflt op/s svctm/us %util
> + * FUSE_SETLKW(33) 1 0.00 0.00 100.00%
> + * TOTAL 1 0.00 0.00 100.00%
> + *
> + * the utilization is 100% and op/s equals zero, it indicates hang.
> + *
> + * If virtiofsd is idle, then the output looks like this:
> + *
> + * op inflt op/s svctm/us %util
> + * TOTAL 0 0.00 0.00 0.00%
> + *
> + *
> + *TODO:
> + * Vstat was designed to scan VIRTIOFS_TRACE_DIR directory to get all virtiofs
> + * devices. However it's not supported yet. Because virtiofsd couldn't unlink
> + * the trace file when exited due to the sandboxing, actually we unlink the
> + * trace file when inited. Then vstat can only access the trace file through
> + * the /proc/<virtiofs-pid>/fd/<trace-file> (which needs root privilege)
> + * This should be refactored later if virtiofsd could access /dev/shm
> + * directory, then vstat can run as nobody and be able to scan all devices
> + * like iostat tool.
> + */
> +
> +#define MS_PER_SEC 1000
> +#define US_PER_SEC 1000000
> +#define NS_PER_SEC 1000000000
> +#define SEC_PER_DAY (3600 * 24)
> +#define usec_to_sec(usec) ((double)(usec) / US_PER_SEC)
> +#define usec_to_nsec(usec) (usec * MS_PER_SEC)
> +#define sec_to_usec(sec) (sec * US_PER_SEC)
> +
> +struct trace_entry {
> + int valid;
> + struct virtiofs_trace trace[2];
> + char trace_path[PATH_MAX];
> +
> + TAILQ_ENTRY(trace_entry) entries;
> +};
> +
> +static uint64_t tsc_resolution_hz;
> +static bool g_print_timestamp = false;
> +static int g_interval = 1;
> +static struct timeval g_ts[2];
> +
> +TAILQ_HEAD(, trace_entry) g_trace_head;
> +
> +
> +static inline double us_to_tick(uint64_t us)
> +{
> + return ((double)us) / US_PER_SEC * tsc_resolution_hz;
> +}
> +static inline double tick_to_us(uint64_t tick)
> +{
> + return ((double)tick) / tsc_resolution_hz * US_PER_SEC;
> +}
> +
> +static inline double tick_to_ms(uint64_t tick)
> +{
> + return ((double)tick) / tsc_resolution_hz * MS_PER_SEC;
> +}
> +
> +#define __case_ret_str(val) case(val): return #val
> +static const char *fuse_op2str(int op)
> +{
> + switch(op) {
> + __case_ret_str(FUSE_LOOKUP);
> + __case_ret_str(FUSE_FORGET);
> + __case_ret_str(FUSE_GETATTR);
> + __case_ret_str(FUSE_SETATTR);
> + __case_ret_str(FUSE_READLINK);
> + __case_ret_str(FUSE_SYMLINK);
> + __case_ret_str(FUSE_MKNOD);
> + __case_ret_str(FUSE_MKDIR);
> + __case_ret_str(FUSE_UNLINK);
> + __case_ret_str(FUSE_RMDIR);
> + __case_ret_str(FUSE_RENAME);
> + __case_ret_str(FUSE_LINK);
> + __case_ret_str(FUSE_OPEN);
> + __case_ret_str(FUSE_READ);
> + __case_ret_str(FUSE_WRITE);
> + __case_ret_str(FUSE_STATFS);
> + __case_ret_str(FUSE_RELEASE);
> + __case_ret_str(FUSE_FSYNC);
> + __case_ret_str(FUSE_SETXATTR);
> + __case_ret_str(FUSE_GETXATTR);
> + __case_ret_str(FUSE_LISTXATTR);
> + __case_ret_str(FUSE_REMOVEXATTR);
> + __case_ret_str(FUSE_FLUSH);
> + __case_ret_str(FUSE_INIT);
> + __case_ret_str(FUSE_OPENDIR);
> + __case_ret_str(FUSE_READDIR);
> + __case_ret_str(FUSE_RELEASEDIR);
> + __case_ret_str(FUSE_FSYNCDIR);
> + __case_ret_str(FUSE_GETLK);
> + __case_ret_str(FUSE_SETLK);
> + __case_ret_str(FUSE_SETLKW);
> + __case_ret_str(FUSE_ACCESS);
> + __case_ret_str(FUSE_CREATE);
> + __case_ret_str(FUSE_INTERRUPT);
> + __case_ret_str(FUSE_BMAP);
> + __case_ret_str(FUSE_DESTROY);
> + __case_ret_str(FUSE_IOCTL);
> + __case_ret_str(FUSE_POLL);
> + __case_ret_str(FUSE_NOTIFY_REPLY);
> + __case_ret_str(FUSE_BATCH_FORGET);
> + __case_ret_str(FUSE_FALLOCATE);
> + __case_ret_str(FUSE_READDIRPLUS);
> + __case_ret_str(FUSE_RENAME2);
> + __case_ret_str(FUSE_LSEEK);
> + __case_ret_str(FUSE_COPY_FILE_RANGE);
> + __case_ret_str(FUSE_SETUPMAPPING);
> + __case_ret_str(FUSE_REMOVEMAPPING);
> + default: return "OP_UNKNOWN";
> + }
> +}
> +
> +static void virtiofs_dump_trace(struct virtiofs_trace *trace)
> +{
> + int op;
> + char op_buf[24];
> + struct fuse_op_stat *stat;
> +
> + fprintf(stdout, "mountpoint %s\n", trace->mountpoint);
> + fprintf(stdout, "version 0x%x\n", trace->version);
> + fprintf(stdout, "%-24s %6s %16s %16s\n",
> + "op", "inflt", "done", "elapsed(ms)");
> + for (op = 0; op < VIRTIOFS_MAX_OP; op++) {
> + stat = &trace->stats[op];
> +
> + /* filter noop */
> + if (!stat->done && !stat->inflight)
> + continue;
> + snprintf(op_buf, sizeof(op_buf), "%s(%d)", fuse_op2str(op), op);
> + fprintf(stdout, "%-24s %6"PRIu64" %16"PRIu64" %16.2f\n",
> + op_buf,
> + stat->inflight,
> + stat->done,
> + tick_to_ms(stat->elapsed_ticks));
> + }
> +}
> +
> +
> +/*
> + * TODO: trace_lookup/add/del was supposed to be used when vstat scan and show
> + * multiple devices.
> + */
> +static inline struct trace_entry *trace_lookup(const char *trace_path)
> +{
> + struct trace_entry *entry;
> +
> + TAILQ_FOREACH(entry, &g_trace_head, entries) {
> + if (!strncmp(trace_path, entry->trace_path,
> + sizeof(entry->trace_path)))
> + return entry;
> + }
> +
> + return NULL;
> +}
> +
> +static inline void trace_add(struct trace_entry *entry)
> +{
> + TAILQ_INSERT_TAIL(&g_trace_head, entry, entries);
> +}
> +
> +static inline void trace_del(struct trace_entry *entry)
> +{
> + TAILQ_REMOVE(&g_trace_head, entry, entries);
> +}
> +
> +static struct trace_entry *get_trace_by_path(const char *trace_path)
> +{
> + struct trace_entry *entry = trace_lookup(trace_path);
> + if (entry)
> + return entry;
> +
> + entry = (struct trace_entry *)malloc(sizeof(*entry));
> + if (entry == NULL)
> + return NULL;
> + memset(entry, 0, sizeof(*entry));
> + snprintf(entry->trace_path, sizeof(entry->trace_path),
> + "%s", trace_path);
> + trace_add(entry);
> + return entry;
> +}
> +
> +static void trace_put(struct trace_entry *entry)
> +{
> + trace_del(entry);
> + free(entry);
> +}
> +
> +static int virtiofs_read_trace_one_int(const char *trace_path,
> + struct virtiofs_trace *trace)
> +{
> + int trace_fd, ret;
> + struct stat s;
> + ssize_t read_len;
> +
> + if (!trace_path)
> + return -1;
> +
> + trace_fd = open(trace_path, O_RDONLY);
> + if (trace_fd == -1) {
> + fprintf(stderr, "open %s failed [%d]\n", trace_path, -errno);
> + return -1;
> + }
> +
> + ret = fstat(trace_fd, &s);
> + if (ret != 0) {
> + fprintf(stderr, "fstat %s failed [%d]\n", trace_path, -errno);
> + goto failed;
> + }
> +
> + /*
> + * Treat it as version not compatible, just a stub because we have only
> + * one version.
> + */
> + if ((size_t)s.st_size != (sizeof(struct virtiofs_trace))) {
> + fprintf(stderr, "size %zu != %zu miss matched\n",
> + s.st_size, sizeof(struct virtiofs_trace));
> + goto failed;
> + }
> +
> + /* read stat file */
> + read_len = read(trace_fd, trace, s.st_size);
> + if (read_len != s.st_size) {
> + fprintf(stderr, "read failed %ld(%ld)\n", read_len, s.st_size);
> + goto failed;
> + }
> +
> + /*
> + * Check version, vstat must be compatible to old vtrace version if
> + * it exist.
> + */
> + if (trace->version != VIRTIOFS_TRACE_VERSION) {
> + fprintf(stderr, "version 0x%x != 0x%x miss matched\n",
> + trace->version, VIRTIOFS_TRACE_VERSION);
> + goto failed;
> + }
> +
> + close(trace_fd);
> + return 0;
> +failed:
> + close(trace_fd);
> + return -1;
> +}
> +
> +static int virtiofs_read_trace_one(const char *trace_path, int curr)
> +{
> + struct trace_entry *entry = get_trace_by_path(trace_path);
> + if (entry == NULL)
> + return -1;
> + if (!virtiofs_read_trace_one_int(trace_path, &entry->trace[curr]))
> + entry->valid = 1;
> + return 0;
> +}
> +
> +/* Detect whether it's a trace file. */
> +static bool is_virtiofs_pci(const char *trace_path)
> +{
> + int fd;
> + uint64_t val;
> + ssize_t len;
> +
> + if (access(trace_path, R_OK))
> + return false;
> +
> + fd = open(trace_path, O_RDONLY);
> + if (fd == -1)
> + return false;
> + len = read(fd, &val, sizeof(val));
> + close(fd);
> +
> + if (len != sizeof(val))
> + return false;
> +
> + return (val == VIRTIOFS_TRACE_MAGIC);
> +}
> +
> +static void destroy_invalid_trace(void)
> +{
> + struct trace_entry *entry;
> + TAILQ_FOREACH(entry, &g_trace_head, entries) {
> + if (!entry->valid) {
> + trace_put(entry);
> + continue;
> + }
> + /* mark invalid for next round */
> + entry->valid = 0;
> + }
> +}
> +
> +static void virtiofs_read_trace(int curr, const char *trace_path)
> +{
> + virtiofs_read_trace_one(trace_path, curr);
> + destroy_invalid_trace();
> +}
> +
> +static void alarm_handler(int sig)
> +{
> + alarm(g_interval);
> +}
> +
> +static time_t get_localtime(struct tm *rectime, int d_off)
> +{
> + time_t timer;
> + struct tm *ltm;
> +
> + time(&timer);
> + timer -= SEC_PER_DAY * d_off;
> + ltm = localtime(&timer);
> +
> + if (ltm)
> + *rectime = *ltm;
> + return timer;
> +}
> +
> +static inline uint64_t timeval_sub(struct timeval t1, struct timeval t2)
> +{
> + int64_t ret;
> +
> + t1.tv_usec -= t2.tv_usec;
> + t1.tv_sec -= t2.tv_sec;
> +
> + ret = sec_to_usec(t1.tv_sec) + t1.tv_usec;
> + return (ret < 0) ? 0 : ret;
> +}
> +
> +/* clamp_positive */
> +static inline int64_t cp(int64_t n)
> +{
> + return (n < 0) ? 0 : n;
> +}
> +
> +#define SUM_FIELD(i, j, field) (((i)->field) += ((j)->field))
> +#define DIFF_FIELD(i, j, field) (cp(((i)->field) - ((j)->field)))
> +#define DIV(m, n) (((n) == 0) ? 0 : ((double)(m) / (n)))
> +#define DVDF_FIELD(i, j, field, n) \
> + DIV((DIFF_FIELD(i, j, field)), (n))
> +#define DVDFF_FIELD(i, j, field0, field1) \
> + DIV((DIFF_FIELD(i, j, field0)), (DIFF_FIELD(i, j, field1)))
> +
> +static void virtiofs_trace_output_one(struct trace_entry *entry,
> + int curr,
> + uint64_t dur /* in us */)
> +{
> + struct virtiofs_trace *t_curr = &entry->trace[curr],
> + *t_prev = &entry->trace[(curr ^ 1)];
> + struct fuse_op_stat *s_curr, *s_prev, tot_curr = {0}, tot_prev = {0};
> + int op;
> + char op_buf[24];
> + uint64_t deco_ticks;
> +
> + fprintf(stdout, "%-24s %6s %12s %12s %7s\n",
> + "op", "inflt", "op/s", "svctm/us", "%util");
> + for (op = 0; op < VIRTIOFS_MAX_OP; op++) {
> + s_curr = &t_curr->stats[op];
> + s_prev = &t_prev->stats[op];
> +
> + /* filter noop */
> + if (!DIFF_FIELD(s_curr, s_prev, done) && !s_curr->inflight)
> + continue;
> +
> + /* To detect hung */
> + if ((s_curr->done == s_prev->done) &&
> + (s_curr->inflight >= s_prev->inflight) &&
> + (s_prev->inflight > 0))
> + deco_ticks = us_to_tick(dur);
> + else
> + deco_ticks = 0;
> + s_curr->elapsed_ticks += deco_ticks;
> + snprintf(op_buf, sizeof(op_buf), "%s(%d)", fuse_op2str(op), op);
> +#define __OUTPUT_ONE(curr, prev) do { \
> + fprintf(stdout, "%-24s %6"PRIu64" %12.2f %12.2f %6.2f%%\n", \
> + op_buf, \
> + (curr)->inflight, \
> + DVDF_FIELD(curr, prev, done, usec_to_sec(dur)), \
> + tick_to_us(DVDFF_FIELD(curr, prev, elapsed_ticks, done)), \
> + DVDF_FIELD(curr, prev, elapsed_ticks, us_to_tick(dur)) * 100);\
> +} while (0)
> + __OUTPUT_ONE(s_curr, s_prev);
> +
> +#define __ACCUMULATE(tot, s) do { \
> + SUM_FIELD(tot, s, inflight); \
> + SUM_FIELD(tot, s, done); \
> + SUM_FIELD(tot, s, elapsed_ticks); \
> +} while (0)
> + __ACCUMULATE(&tot_curr, s_curr);
> + __ACCUMULATE(&tot_prev, s_prev);
> + s_curr->elapsed_ticks -= deco_ticks;
> + }
> +
> + snprintf(op_buf, sizeof(op_buf), "%s", "TOTAL");
> + __OUTPUT_ONE(&tot_curr, &tot_prev);
> +}
> +
> +static void virtiofs_trace_output(int curr, struct tm *rectime)
> +{
> + struct trace_entry *entry;
> + uint64_t duration;
> + char timestamp[64];
> +
> + duration = timeval_sub(g_ts[curr], g_ts[curr^1]);
> + if (duration == 0) {
> + fprintf(stderr, "duration is zero, impossible\n");
> + return;
> + }
> +
> + fprintf(stdout, "\n");
> + if (g_print_timestamp) {
> + strftime(timestamp, sizeof(timestamp),
> + "%Y-%m-%d %H:%M:%S", rectime);
> + fprintf(stdout, "%s\n", timestamp);
> + }
> +
> + /* output body */
> + TAILQ_FOREACH(entry, &g_trace_head, entries)
> + virtiofs_trace_output_one(entry, curr, duration);
> +}
> +
> +static int do_stat_summary(const char *trace_path)
> +{
> + struct virtiofs_trace trace;
> + int ret;
> +
> + ret = virtiofs_read_trace_one_int(trace_path, &trace);
> + if (ret)
> + return ret;
> + virtiofs_dump_trace(&trace);
> + return 0;
> +}
> +
> +static int do_stat_loop(int interval, int count, const char *trace_path)
> +{
> + int curr = 0;
> + struct sigaction alrm_act;
> + struct tm rectime;
> +
> + TAILQ_INIT(&g_trace_head);
> +
> + if (interval <= 0)
> + return -1;
> +
> + g_interval = interval;
> + memset(&alrm_act, 0, sizeof(alrm_act));
> + alrm_act.sa_handler = alarm_handler;
> + sigaction(SIGALRM, &alrm_act, NULL);
> + alarm(g_interval);
> +
> + get_localtime(&rectime, 0);
> +
> + do {
> + /* read traces */
> + gettimeofday(&g_ts[curr], NULL);
> + virtiofs_read_trace(curr, trace_path);
> +
> + /* write stats */
> + get_localtime(&rectime, 0);
> + virtiofs_trace_output(curr, &rectime);
> + if (count > 0)
> + count--;
> +
> + if (count) {
> + curr ^= 1;
> + pause();
> + }
> + } while (count);
> + return 0;
> +
> +}
> +
> +static void vstat_usage(void)
> +{
> + fprintf(stderr,
> + "\tvstat [-t] [-s] <trace_path> [interval [count]]\n");
> +}
> +
> +/* parse the interval and count like iostat */
> +static int get_interval_count(int *argcp, char **argv, int *iv, int *cnt)
> +{
> + long interval = 1, count = 0;
> + int argc = *argcp;
> +
> + /* Determin if the last argument is an virtio-fs-pci device id */
> + if (argc > 0 && !is_virtiofs_pci(argv[argc - 1])) {
> + char *end;
> +
> + errno = 0;
> + interval = strtoul(argv[argc - 1], &end, 10);
> +
> + if (*end == '\0' && errno == 0) {
> + if (interval == 0) {
> + fprintf(stderr, "interval cannot be zero\n");
> + vstat_usage();
> + return 1;
> + }
> + /* Ignore the last parameter */
> + argc--;
> +
> + /* run until CTRL^c */
> + count = -1;
> + } else {
> + /*
> + * If this is not a valid number, just plow on. The
> + * user will get a more informative error message later
> + * on.
> + */
> + interval = 0;
> + }
> + }
> +
> + /*
> + * If the last argument is also an integer, then we have both a count
> + * and an interval.
> + */
> + if (argc > 0 && !is_virtiofs_pci(argv[argc - 1])) {
> + char *end;
> +
> + errno = 0;
> + count = interval;
> + interval = strtoul(argv[argc - 1], &end, 10);
> +
> + if (*end == '\0' && errno == 0) {
> + if (interval == 0) {
> + fprintf(stderr, "interval cannot be zero\n");
> + vstat_usage();
> + return 1;
> + }
> + /* Ignore the last parameter */
> + argc--;
> + } else
> + interval = 0;
> + }
> +
> + /* might be overflow. someone is insane! */
> + *iv = (int)interval;
> + *cnt = (int)count;
> + *argcp = argc;
> +
> + return 0;
> +}
> +
> +/* Most code are copied from DPDK. */
> +static uint64_t get_tsc_freq(void)
> +{
> + struct timespec sleeptime = {.tv_nsec = 5E8 }; /* 1/2 second */
> +
> + struct timespec t_start, t_end;
> + uint64_t tsc_hz;
> +
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
> + uint64_t ns, end, start = vtrace_rdtsc();
> + nanosleep(&sleeptime,NULL);
> + clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
> + end = vtrace_rdtsc();
> + ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC);
> + ns += (t_end.tv_nsec - t_start.tv_nsec);
> +
> + double secs = (double)ns/NS_PER_SEC;
> + tsc_hz = (uint64_t)((end - start)/secs);
> + return tsc_hz;
> + } else {
> + uint64_t start = vtrace_rdtsc();
> + sleep(1);
> + return vtrace_rdtsc() - start;
> + }
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + char *trace_path = NULL;
> + bool summary = false;
> + int interval = 1, count = -1;
> + int c, ret;
> +
> + while ((c = getopt(argc, argv, "sth")) != -1) {
> + switch (c) {
> + case 's':
> + summary = true;
> + break;
> + case 't':
> + g_print_timestamp = true;
> + break;
> + case 'h':
> + vstat_usage();
> + return 0;
> + default:
> + vstat_usage();
> + return 1;
> + }
> + }
> +
> + argc -= optind;
> + argv += optind;
> +
> + ret = get_interval_count(&argc, argv, &interval, &count);
> + if (ret)
> + return ret;
> +
> + if (argc) {
> + assert(argc == 1);
> + trace_path = argv[0];
> + }
> +
> + /* Trace_path must be supplied currently, maybe refactored later. */
> + if (!trace_path || !is_virtiofs_pci(trace_path)) {
> + fprintf(stderr, "Must specify correct trace path.\n");
> + vstat_usage();
> + return -1;
> + }
> +
> + tsc_resolution_hz = get_tsc_freq();
> +
> + if (summary)
> + return do_stat_summary(trace_path);
> + else
> + return do_stat_loop(interval, count, trace_path);
> +}
> diff --git a/contrib/virtiofsd/vtrace.c b/contrib/virtiofsd/vtrace.c
> new file mode 100644
> index 0000000000..b94f9c68f5
> --- /dev/null
> +++ b/contrib/virtiofsd/vtrace.c
> @@ -0,0 +1,95 @@
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +#include "fuse_log.h"
> +#include "vtrace.h"
> +
> +struct virtiofs_trace *virtiofs_trace_init(const char *mountpoint)
> +{
> + struct virtiofs_trace *trace = NULL;
> + char trace_path[PATH_MAX];
> + size_t trace_size = sizeof(struct virtiofs_trace);
> + int trace_fd;
> +
> + if (!mountpoint) {
> + fuse_err("virtiofs_trace null mountpoint\n");
> + return NULL;
> + }
> +
> + /* TODO: vm id and the tag may be more suitable */
> + snprintf(trace_path, sizeof(trace_path), "/vtrace%d", getpid());
> + trace_fd = shm_open(trace_path, O_CREAT | O_RDWR, 0644);
> + if (trace_fd == -1) {
> + fuse_err("shm_open: %s failed[%d]\n", trace_path, -errno);
> + return NULL;
> + }
> + if (ftruncate(trace_fd, trace_size) != 0) {
> + fuse_err("ftruncate: %s failed[%d]\n", trace_path, -errno);
> + goto fail;
> + }
> + trace = mmap(0, trace_size, PROT_READ | PROT_WRITE,
> + MAP_SHARED | MAP_LOCKED, trace_fd, 0);
> + if (trace == MAP_FAILED) {
> + fuse_err("mmap: %s failed[%d]\n", trace_path, -errno);
> + goto fail;
> + }
> + memset(trace, 0, trace_size);
> + strncpy(trace->mountpoint, mountpoint, sizeof(trace->mountpoint));
> + trace->version = VIRTIOFS_TRACE_VERSION;
> + trace->trace_fd = trace_fd;
> + trace->magic = VIRTIOFS_TRACE_MAGIC;
> + shm_unlink(trace_path);
> + fuse_info("create virtiofs trace %s succeed\n", trace_path);
> + return trace;
> +
> +fail:
> + if (trace && (trace != MAP_FAILED))
> + munmap(trace, trace_size);
> + if (trace_fd != -1)
> + close(trace_fd);
> + shm_unlink(trace_path);
> + return NULL;
> +}
> +
> +void virtiofs_trace_fin(struct virtiofs_trace *trace)
> +{
> + if (!trace)
> + return;
> +
> + close(trace->trace_fd);
> + munmap(trace, sizeof(struct virtiofs_trace));
> +}
> +
> +void virtiofs_trace_account_op_begin(struct virtiofs_trace *trace, int op)
> +{
> + struct fuse_op_stat *stat;
> +
> + if (op >= VIRTIOFS_MAX_OP)
> + return;
> +
> + stat = &trace->stats[op];
> + stat->inflight++;
> +}
> +
> +void virtiofs_trace_account_op_end(struct virtiofs_trace *trace, int op,
> + int64_t ticks)
> +{
> + struct fuse_op_stat *stat;
> +
> + if (op >= VIRTIOFS_MAX_OP)
> + return;
> +
> + if (ticks < 0) /* tsc overflow */
> + ticks = 0;
> + stat = &trace->stats[op];
> + stat->inflight--;
> + stat->done++;
> + stat->elapsed_ticks += ticks;
> +}
> diff --git a/contrib/virtiofsd/vtrace.h b/contrib/virtiofsd/vtrace.h
> new file mode 100644
> index 0000000000..5ca1b966d6
> --- /dev/null
> +++ b/contrib/virtiofsd/vtrace.h
> @@ -0,0 +1,53 @@
> +#ifndef _VIRTIOFS_TRACE_H_
> +#define _VIRTIOFS_TRACE_H_
> +
> +#include <linux/limits.h>
> +#include <stdint.h>
> +
> +#define VIRTIOFS_TRACE_VERSION 0x1
> +#define VIRTIOFS_TRACE_DIR "/dev/shm"
> +/* hexadecimal format of 'vtrace' */
> +#define VIRTIOFS_TRACE_MAGIC 0x767472616365
> +
> +
> +/* Reserve op num for new added fuse operation. */
> +#define VIRTIOFS_MAX_OP 64
> +
> +struct fuse_op_stat {
> + uint64_t inflight;
> + uint64_t done;
> + uint64_t elapsed_ticks;
> +};
> +
> +struct virtiofs_trace {
> + uint64_t magic;
> + uint32_t version;
> + int trace_fd;
> + struct fuse_op_stat stats[VIRTIOFS_MAX_OP];
> + char mountpoint[PATH_MAX];
> +};
> +
> +struct virtiofs_trace *virtiofs_trace_init(const char *mountpoint);
> +void virtiofs_trace_fin(struct virtiofs_trace *trace);
> +void virtiofs_trace_account_op_begin(struct virtiofs_trace *trace, int op);
> +void virtiofs_trace_account_op_end(struct virtiofs_trace *trace, int op,
> + int64_t ticks);
> +
> +/* NOTE: only x86_64 implementation, copied from DPDK.*/
> +static inline uint64_t vtrace_rdtsc(void)
> +{
> + union {
> + uint64_t tsc_64;
> + struct {
> + uint32_t lo_32;
> + uint32_t hi_32;
> + };
> + } tsc;
> +
> + asm volatile("rdtsc" :
> + "=a" (tsc.lo_32),
> + "=d" (tsc.hi_32));
> + return tsc.tsc_64;
> +}
> +
> +#endif /* _VIRTIOFS_TRACE_H_ */
> --
> 2.20.1.7.g153144c
>
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs at redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs
More information about the Virtio-fs
mailing list