1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
|
#ifndef FIO_STAT_H
#define FIO_STAT_H
#include "iolog.h"
#include "lib/output_buffer.h"
#include "diskutil.h"
#include "json.h"
struct group_run_stats {
uint64_t max_run[DDIR_RWDIR_CNT], min_run[DDIR_RWDIR_CNT];
uint64_t max_bw[DDIR_RWDIR_CNT], min_bw[DDIR_RWDIR_CNT];
uint64_t iobytes[DDIR_RWDIR_CNT];
uint64_t agg[DDIR_RWDIR_CNT];
uint32_t kb_base;
uint32_t unit_base;
uint32_t sig_figs;
uint32_t groupid;
uint32_t unified_rw_rep;
} __attribute__((packed));
/*
* How many depth levels to log
*/
#define FIO_IO_U_MAP_NR 7
#define FIO_IO_U_LAT_N_NR 10
#define FIO_IO_U_LAT_U_NR 10
#define FIO_IO_U_LAT_M_NR 12
/*
* Constants for clat percentiles
*/
#define FIO_IO_U_PLAT_BITS 6
#define FIO_IO_U_PLAT_VAL (1 << FIO_IO_U_PLAT_BITS)
#define FIO_IO_U_PLAT_GROUP_NR 29
#define FIO_IO_U_PLAT_NR (FIO_IO_U_PLAT_GROUP_NR * FIO_IO_U_PLAT_VAL)
#define FIO_IO_U_LIST_MAX_LEN 20 /* The size of the default and user-specified
list of percentiles */
/*
* Aggregate latency samples for reporting percentile(s).
*
* EXECUTIVE SUMMARY
*
* FIO_IO_U_PLAT_BITS determines the maximum statistical error on the
* value of resulting percentiles. The error will be approximately
* 1/2^(FIO_IO_U_PLAT_BITS+1) of the value.
*
* FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the maximum
* range being tracked for latency samples. The maximum value tracked
* accurately will be 2^(GROUP_NR + PLAT_BITS - 1) nanoseconds.
*
* FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the memory
* requirement of storing those aggregate counts. The memory used will
* be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(uint64_t)
* bytes.
*
* FIO_IO_U_PLAT_NR is the total number of buckets.
*
* DETAILS
*
* Suppose the lat varies from 0 to 999 (usec), the straightforward
* method is to keep an array of (999 + 1) buckets, in which a counter
* keeps the count of samples which fall in the bucket, e.g.,
* {[0],[1],...,[999]}. However this consumes a huge amount of space,
* and can be avoided if an approximation is acceptable.
*
* One such method is to let the range of the bucket to be greater
* than one. This method has low accuracy when the value is small. For
* example, let the buckets be {[0,99],[100,199],...,[900,999]}, and
* the represented value of each bucket be the mean of the range. Then
* a value 0 has a round-off error of 49.5. To improve on this, we
* use buckets with non-uniform ranges, while bounding the error of
* each bucket within a ratio of the sample value. A simple example
* would be when error_bound = 0.005, buckets are {
* {[0],[1],...,[99]}, {[100,101],[102,103],...,[198,199]},..,
* {[900,909],[910,919]...} }. The total range is partitioned into
* groups with different ranges, then buckets with uniform ranges. An
* upper bound of the error is (range_of_bucket/2)/value_of_bucket
*
* For better efficiency, we implement this using base two. We group
* samples by their Most Significant Bit (MSB), extract the next M bit
* of them as an index within the group, and discard the rest of the
* bits.
*
* E.g., assume a sample 'x' whose MSB is bit n (starting from bit 0),
* and use M bit for indexing
*
* | n | M bits | bit (n-M-1) ... bit 0 |
*
* Because x is at least 2^n, and bit 0 to bit (n-M-1) is at most
* (2^(n-M) - 1), discarding bit 0 to (n-M-1) makes the round-off
* error
*
* 2^(n-M)-1 2^(n-M) 1
* e <= --------- <= ------- = ---
* 2^n 2^n 2^M
*
* Furthermore, we use "mean" of the range to represent the bucket,
* the error e can be lowered by half to 1 / 2^(M+1). By using M bits
* as the index, each group must contains 2^M buckets.
*
* E.g. Let M (FIO_IO_U_PLAT_BITS) be 6
* Error bound is 1/2^(6+1) = 0.0078125 (< 1%)
*
* Group MSB #discarded range of #buckets
* error_bits value
* ----------------------------------------------------------------
* 0* 0~5 0 [0,63] 64
* 1* 6 0 [64,127] 64
* 2 7 1 [128,255] 64
* 3 8 2 [256,511] 64
* 4 9 3 [512,1023] 64
* ... ... ... [...,...] ...
* 28 33 27 [8589934592,+inf]** 64
*
* * Special cases: when n < (M-1) or when n == (M-1), in both cases,
* the value cannot be rounded off. Use all bits of the sample as
* index.
*
* ** If a sample's MSB is greater than 33, it will be counted as 33.
*/
/*
* Trim cycle count measurements
*/
#define MAX_NR_BLOCK_INFOS 8192
#define BLOCK_INFO_STATE_SHIFT 29
#define BLOCK_INFO_TRIMS(block_info) \
((block_info) & ((1 << BLOCK_INFO_STATE_SHIFT) - 1))
#define BLOCK_INFO_STATE(block_info) \
((block_info) >> BLOCK_INFO_STATE_SHIFT)
#define BLOCK_INFO(state, trim_cycles) \
((trim_cycles) | ((unsigned int) (state) << BLOCK_INFO_STATE_SHIFT))
#define BLOCK_INFO_SET_STATE(block_info, state) \
BLOCK_INFO(state, BLOCK_INFO_TRIMS(block_info))
enum block_info_state {
BLOCK_STATE_UNINIT,
BLOCK_STATE_TRIMMED,
BLOCK_STATE_WRITTEN,
BLOCK_STATE_TRIM_FAILURE,
BLOCK_STATE_WRITE_FAILURE,
BLOCK_STATE_COUNT,
};
#define FIO_JOBNAME_SIZE 128
#define FIO_JOBDESC_SIZE 256
#define FIO_VERROR_SIZE 128
#define UNIFIED_SPLIT 0
#define UNIFIED_MIXED 1
#define UNIFIED_BOTH 2
enum fio_lat {
FIO_SLAT = 0,
FIO_CLAT,
FIO_LAT,
FIO_LAT_CNT = 3,
};
struct clat_prio_stat {
uint64_t io_u_plat[FIO_IO_U_PLAT_NR];
struct io_stat clat_stat;
uint32_t ioprio;
};
struct thread_stat {
char name[FIO_JOBNAME_SIZE];
char verror[FIO_VERROR_SIZE];
uint32_t error;
uint32_t thread_number;
uint32_t groupid;
uint64_t job_start; /* Time job was started, as clock_gettime(job_start_clock_id) */
uint32_t pid;
char description[FIO_JOBDESC_SIZE];
uint32_t members;
uint32_t unified_rw_rep;
uint32_t disable_prio_stat;
/*
* bandwidth and latency stats
*/
struct io_stat sync_stat __attribute__((aligned(8)));/* fsync etc stats */
struct io_stat clat_stat[DDIR_RWDIR_CNT]; /* completion latency */
struct io_stat slat_stat[DDIR_RWDIR_CNT]; /* submission latency */
struct io_stat lat_stat[DDIR_RWDIR_CNT]; /* total latency */
struct io_stat bw_stat[DDIR_RWDIR_CNT]; /* bandwidth stats */
struct io_stat iops_stat[DDIR_RWDIR_CNT]; /* IOPS stats */
/*
* fio system usage accounting
*/
uint64_t usr_time;
uint64_t sys_time;
uint64_t ctx;
uint64_t minf, majf;
/*
* IO depth and latency stats
*/
uint32_t clat_percentiles;
uint32_t lat_percentiles;
uint32_t slat_percentiles;
uint32_t pad;
uint64_t percentile_precision;
fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
uint64_t io_u_map[FIO_IO_U_MAP_NR];
uint64_t io_u_submit[FIO_IO_U_MAP_NR];
uint64_t io_u_complete[FIO_IO_U_MAP_NR];
uint64_t io_u_lat_n[FIO_IO_U_LAT_N_NR];
uint64_t io_u_lat_u[FIO_IO_U_LAT_U_NR];
uint64_t io_u_lat_m[FIO_IO_U_LAT_M_NR];
uint64_t io_u_plat[FIO_LAT_CNT][DDIR_RWDIR_CNT][FIO_IO_U_PLAT_NR];
uint64_t io_u_sync_plat[FIO_IO_U_PLAT_NR];
uint64_t total_io_u[DDIR_RWDIR_SYNC_CNT];
uint64_t short_io_u[DDIR_RWDIR_CNT];
uint64_t drop_io_u[DDIR_RWDIR_CNT];
uint64_t total_submit;
uint64_t total_complete;
uint64_t io_bytes[DDIR_RWDIR_CNT];
uint64_t runtime[DDIR_RWDIR_CNT];
uint64_t total_run_time;
/*
* IO Error related stats
*/
union {
uint16_t continue_on_error;
uint32_t pad2;
};
uint32_t first_error;
uint64_t total_err_count;
/* ZBD stats */
uint64_t nr_zone_resets;
uint64_t nr_block_infos;
uint32_t block_infos[MAX_NR_BLOCK_INFOS];
uint32_t kb_base;
uint32_t unit_base;
uint32_t latency_depth;
uint32_t pad3;
uint64_t latency_target;
fio_fp64_t latency_percentile;
uint64_t latency_window;
uint32_t sig_figs;
uint64_t ss_dur;
uint32_t ss_state;
uint32_t ss_head;
fio_fp64_t ss_limit;
fio_fp64_t ss_slope;
fio_fp64_t ss_deviation;
fio_fp64_t ss_criterion;
/* A mirror of td->ioprio. */
uint32_t ioprio;
union {
uint64_t *ss_iops_data;
/*
* For FIO_NET_CMD_TS, the pointed to data will temporarily
* be stored at this offset from the start of the payload.
*/
uint64_t ss_iops_data_offset;
uint64_t pad4;
};
union {
uint64_t *ss_bw_data;
/*
* For FIO_NET_CMD_TS, the pointed to data will temporarily
* be stored at this offset from the start of the payload.
*/
uint64_t ss_bw_data_offset;
uint64_t pad5;
};
union {
struct clat_prio_stat *clat_prio[DDIR_RWDIR_CNT];
/*
* For FIO_NET_CMD_TS, the pointed to data will temporarily
* be stored at this offset from the start of the payload.
*/
uint64_t clat_prio_offset[DDIR_RWDIR_CNT];
uint64_t pad6;
};
uint32_t nr_clat_prio[DDIR_RWDIR_CNT];
uint64_t cachehit;
uint64_t cachemiss;
} __attribute__((packed));
#define JOBS_ETA { \
uint32_t nr_running; \
uint32_t nr_ramp; \
\
uint32_t nr_pending; \
uint32_t nr_setting_up; \
\
uint64_t m_rate[DDIR_RWDIR_CNT]; \
uint64_t t_rate[DDIR_RWDIR_CNT]; \
uint64_t rate[DDIR_RWDIR_CNT]; \
uint32_t m_iops[DDIR_RWDIR_CNT]; \
uint32_t t_iops[DDIR_RWDIR_CNT]; \
uint32_t iops[DDIR_RWDIR_CNT]; \
uint32_t pad; \
uint64_t elapsed_sec; \
uint64_t eta_sec; \
uint32_t is_pow2; \
uint32_t unit_base; \
\
uint32_t sig_figs; \
\
uint32_t files_open; \
\
/* \
* Network 'copy' of run_str[] \
*/ \
uint32_t nr_threads; \
uint32_t pad2; \
uint8_t run_str[]; \
}
struct jobs_eta JOBS_ETA;
struct jobs_eta_packed JOBS_ETA __attribute__((packed));
struct io_u_plat_entry {
struct flist_head list;
uint64_t io_u_plat[FIO_IO_U_PLAT_NR];
};
extern struct fio_sem *stat_sem;
extern struct jobs_eta *get_jobs_eta(bool force, size_t *size);
extern void stat_init(void);
extern void stat_exit(void);
extern struct json_object * show_thread_status(struct thread_stat *ts, struct group_run_stats *rs, struct flist_head *, struct buf_output *);
extern void show_group_stats(struct group_run_stats *rs, struct buf_output *);
extern void display_thread_status(struct jobs_eta *je);
extern void __show_run_stats(void);
extern int __show_running_run_stats(void);
extern void show_running_run_stats(void);
extern void check_for_running_stats(void);
extern void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src);
extern void sum_group_stats(struct group_run_stats *dst, struct group_run_stats *src);
extern void init_thread_stat_min_vals(struct thread_stat *ts);
extern void init_thread_stat(struct thread_stat *ts);
extern void init_group_run_stat(struct group_run_stats *gs);
extern void eta_to_str(char *str, unsigned long eta_sec);
extern bool calc_lat(struct io_stat *is, unsigned long long *min, unsigned long long *max, double *mean, double *dev);
extern unsigned int calc_clat_percentiles(uint64_t *io_u_plat, unsigned long long nr, fio_fp64_t *plist, unsigned long long **output, unsigned long long *maxv, unsigned long long *minv);
extern void stat_calc_lat_n(struct thread_stat *ts, double *io_u_lat);
extern void stat_calc_lat_m(struct thread_stat *ts, double *io_u_lat);
extern void stat_calc_lat_u(struct thread_stat *ts, double *io_u_lat);
extern void stat_calc_dist(uint64_t *map, unsigned long total, double *io_u_dist);
extern void reset_io_stats(struct thread_data *);
extern void update_rusage_stat(struct thread_data *);
extern void clear_rusage_stat(struct thread_data *);
extern void add_lat_sample(struct thread_data *, enum fio_ddir,
unsigned long long, unsigned long long,
struct io_u *);
extern void add_clat_sample(struct thread_data *, enum fio_ddir,
unsigned long long, unsigned long long,
struct io_u *);
extern void add_slat_sample(struct thread_data *, struct io_u *);
extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long);
extern void add_iops_sample(struct thread_data *, struct io_u *,
unsigned int);
extern void add_bw_sample(struct thread_data *, struct io_u *,
unsigned int, unsigned long long);
extern void add_sync_clat_sample(struct thread_stat *ts,
unsigned long long nsec);
extern int calc_log_samples(void);
extern void free_clat_prio_stats(struct thread_stat *);
extern int alloc_clat_prio_stat_ddir(struct thread_stat *, enum fio_ddir, int);
extern void print_disk_util(struct disk_util_stat *, struct disk_util_agg *, int terse, struct buf_output *);
extern void json_array_add_disk_util(struct disk_util_stat *dus,
struct disk_util_agg *agg, struct json_array *parent);
extern struct io_log *agg_io_log[DDIR_RWDIR_CNT];
extern bool write_bw_log;
static inline bool nsec_to_usec(unsigned long long *min,
unsigned long long *max, double *mean,
double *dev)
{
if (*min > 2000 && *max > 99999 && *dev > 1000.0) {
*min /= 1000;
*max /= 1000;
*mean /= 1000.0;
*dev /= 1000.0;
return true;
}
return false;
}
static inline bool nsec_to_msec(unsigned long long *min,
unsigned long long *max, double *mean,
double *dev)
{
if (*min > 2000000 && *max > 99999999ULL && *dev > 1000000.0) {
*min /= 1000000;
*max /= 1000000;
*mean /= 1000000.0;
*dev /= 1000000.0;
return true;
}
return false;
}
/*
* Worst level condensing would be 1:5, so allow enough room for that
*/
#define __THREAD_RUNSTR_SZ(nr) ((nr) * 5)
#define THREAD_RUNSTR_SZ __THREAD_RUNSTR_SZ(thread_number)
uint32_t *io_u_block_info(struct thread_data *td, struct io_u *io_u);
#endif
|