[go: up one dir, main page]

zng-app 0.19.1

Part of the zng project.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
#![cfg(all(
    feature = "trace_recorder",
    not(any(target_arch = "wasm32", target_os = "android", target_os = "ios"))
))]

//! Trace recording and data model.
//!
//! All tracing instrumentation in Zng projects is done using the `tracing` crate, this module uses the `tracing-chrome` crate
//! to record traces that can be viewed in `chrome://tracing` or `ui.perfetto.dev` and can be parsed to the [`Trace`] data model.

use std::{
    collections::HashMap,
    fmt::{self, Write as _},
    io::{self, Read},
    path::{Path, PathBuf},
    time::{Duration, SystemTime},
};

use parking_lot::Mutex;
use serde::Deserialize as _;
use tracing_subscriber::{filter::EnvFilter, layer::SubscriberExt as _, util::SubscriberInitExt as _};
use zng_txt::{ToTxt as _, Txt};

/// Represents a recorded trace.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Trace {
    /// Traced app processes.
    pub processes: Vec<ProcessTrace>,
}

/// Represents a single app process in a recorded trace.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct ProcessTrace {
    /// System process ID.
    pub id: u64,

    /// Process name.
    pub name: Txt,

    /// Traced threads on the process.
    pub threads: Vec<ThreadTrace>,

    /// Process start instant.
    ///
    /// This time stamp is system dependent, if the system time changes before a second app process starts it can show as starting first.
    ///
    /// If [`SystemTime::UNIX_EPOCH`] if the recorder does not support time.
    pub start: SystemTime,
}

/// Represents a single thread in an app process in a recorded trace.
#[derive(Clone)]
#[non_exhaustive]
pub struct ThreadTrace {
    /// Thread name.
    pub name: Txt,

    /// Events that happened on the thread.
    pub events: Vec<EventTrace>,
    /// Spans started and ended on the thread.
    pub spans: Vec<SpanTrace>,
}
impl fmt::Debug for ThreadTrace {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("ThreadTrace")
            .field("name", &self.name)
            .field("events.len()", &self.events.len())
            .field("spans.len()", &self.spans.len())
            .finish()
    }
}

/// Represents a traced event.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct EventTrace {
    /// Event info.
    pub info: Info,
    /// Moment from the recording start when this event happened.
    pub instant: Duration,
}

/// Represents a traced span.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct SpanTrace {
    /// Span info.
    pub info: Info,

    /// Moment from the recording start when this span started.
    pub start: Duration,
    /// Moment from the recording start when this span ended.
    pub end: Duration,
}

/// Common info traced about events and spans.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Info {
    /// Event or span name.
    pub name: Txt,
    /// Categories.
    ///
    /// Zng recordings usually write two categories, "target" and "level".
    pub categories: Vec<Txt>,
    /// File where the event or span was traced.
    pub file: Txt,
    /// Code line where the event or span was traced.
    pub line: u32,
    /// Custom args traced with the event or span.
    pub args: HashMap<Txt, Txt>,
}

impl Trace {
    /// Read and parse a Chrome JSON Array format trace.
    ///
    /// See [`parse_chrome_trace`] for more details.
    ///
    /// [`parse_chrome_trace`]: Self::parse_chrome_trace
    pub fn read_chrome_trace(json_path: impl AsRef<Path>) -> io::Result<Self> {
        let json = std::fs::read_to_string(json_path)?;
        let trace = Self::parse_chrome_trace(&json)?;
        Ok(trace)
    }

    /// Parse a Chrome JSON Array format trace.
    ///
    /// Only supports the "phases" emitted by `tracing-chrome` in `TraceStyle::Threaded` mode, those are `B, E, i, M` for `M` only
    /// supports `thread_name` metadata. Also parses the custom messages that define the process name and start timestamp as defined
    /// by the `zng::app::trace_recorder` documentation.
    pub fn parse_chrome_trace(json: &str) -> io::Result<Self> {
        fn invalid_data(msg: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> io::Error {
            io::Error::new(io::ErrorKind::InvalidData, msg)
        }

        // skip the array opening
        let json = json.trim_start();
        if !json.starts_with('[') {
            return Err(invalid_data("expected JSON array"));
        }
        let json = &json[1..];

        enum Phase {
            Begin,
            End,
            Event,
        }
        struct Entry {
            phase: Phase,
            pid: u64,
            tid: u64,
            ts: Duration,
            name: Txt,
            cat: Vec<Txt>,
            file: Txt,
            line: u32,
            args: HashMap<Txt, Txt>,
        }
        let mut process_sys_pid = HashMap::new();
        let mut process_names = HashMap::new();
        let mut process_record_start = HashMap::new();
        let mut thread_names = HashMap::new();
        let mut entries = vec![];

        let mut reader = std::io::Cursor::new(json.as_bytes());
        loop {
            // skip white space and commas to the next object
            let mut pos = reader.position();
            let mut buf = [0u8];
            while reader.read(&mut buf).is_ok() {
                if !b" \r\n\t,".contains(&buf[0]) {
                    break;
                }
                pos = reader.position();
            }
            reader.set_position(pos);
            let mut de = serde_json::Deserializer::from_reader(&mut reader);
            match serde_json::Value::deserialize(&mut de) {
                Ok(entry) => match entry {
                    serde_json::Value::Object(map) => {
                        let phase_str = match map.get("ph") {
                            Some(serde_json::Value::String(ph)) => match ph.as_str() {
                                "B" | "E" | "i" | "M" => ph.as_str(),
                                u => {
                                    tracing::error!("ignoring unknown or unsupported phase `{u:?}`");
                                    continue;
                                }
                            },
                            _ => return Err(invalid_data("expected \"ph\"")),
                        };
                        let pid = match map.get("pid") {
                            Some(serde_json::Value::Number(n)) => match n.as_u64() {
                                Some(pid) => pid,
                                None => return Err(invalid_data("expected \"pid\"")),
                            },
                            _ => return Err(invalid_data("expected \"pid\"")),
                        };
                        let name = match map.get("name") {
                            Some(serde_json::Value::String(name)) => name.to_txt(),
                            _ => return Err(invalid_data("expected \"name\"")),
                        };

                        let args: HashMap<Txt, Txt> = match map.get("args") {
                            Some(a) => match serde_json::from_value(a.clone()) {
                                Ok(a) => a,
                                Err(e) => {
                                    tracing::error!("only simple text args are supported, {e}");
                                    continue;
                                }
                            },
                            _ => HashMap::new(),
                        };

                        if phase_str == "M" && name == "process_name" {
                            if let Some(n) = args.get("name") {
                                process_names.insert(pid, n.to_txt());
                            }
                            continue;
                        }

                        let tid = match map.get("tid") {
                            Some(serde_json::Value::Number(n)) => match n.as_u64() {
                                Some(tid) => tid,
                                None => return Err(invalid_data("expected \"tid\"")),
                            },
                            _ => return Err(invalid_data("expected \"tid\"")),
                        };

                        let phase = match phase_str {
                            "B" => Phase::Begin,
                            "E" => Phase::End,
                            "i" => Phase::Event,
                            "M" => {
                                if name == "thread_name"
                                    && let Some(n) = args.get("name")
                                {
                                    thread_names.insert(tid, n.to_txt());
                                }
                                continue;
                            }
                            _ => unreachable!(),
                        };

                        let ts = match map.get("ts") {
                            Some(serde_json::Value::Number(ts)) => match ts.as_f64() {
                                Some(ts) => Duration::from_nanos((ts * 1000.0).round() as u64),
                                None => return Err(invalid_data("expected \"ts\"")),
                            },
                            _ => return Err(invalid_data("expected \"ts\"")),
                        };
                        let cat = match map.get("cat") {
                            Some(serde_json::Value::String(cat)) => cat.split(',').map(|c| c.trim().to_txt()).collect(),
                            _ => vec![],
                        };
                        let file = match map.get(".file") {
                            Some(serde_json::Value::String(file)) => file.to_txt(),
                            _ => Txt::from_static(""),
                        };
                        let line = match map.get(".line") {
                            Some(serde_json::Value::Number(line)) => line.as_u64().unwrap_or(0) as u32,
                            _ => 0,
                        };

                        if let Some(msg) = args.get("message") {
                            if let Some(process_ts) = msg.strip_prefix("zng-record-start: ") {
                                if let Ok(process_ts) = process_ts.parse::<u64>() {
                                    process_record_start.insert(pid, SystemTime::UNIX_EPOCH + Duration::from_micros(process_ts));
                                }
                            } else if let Some(rest) = msg.strip_prefix("pid: ")
                                && let Some((sys_pid, p_name)) = rest.split_once(", name: ")
                                && let Ok(sys_pid) = sys_pid.parse::<u64>()
                            {
                                process_sys_pid.insert(pid, sys_pid);
                                process_names.insert(pid, p_name.to_txt());
                            }
                        }

                        entries.push(Entry {
                            phase,
                            pid,
                            tid,
                            ts,
                            name,
                            cat,
                            file,
                            line,
                            args,
                        });
                    }
                    _ => return Err(invalid_data("expected JSON array of objects")),
                },
                Err(_) => {
                    // EOF
                    break;
                }
            }
        }

        let mut out = Trace { processes: vec![] };

        for entry in entries {
            let sys_pid = *process_sys_pid.entry(entry.pid).or_insert(entry.pid);
            let process = if let Some(p) = out.processes.iter_mut().find(|p| p.id == sys_pid) {
                p
            } else {
                out.processes.push(ProcessTrace {
                    id: sys_pid,
                    name: process_names.entry(entry.pid).or_insert_with(|| sys_pid.to_txt()).clone(),
                    threads: vec![],
                    start: process_record_start.get(&entry.pid).copied().unwrap_or(SystemTime::UNIX_EPOCH),
                });
                out.processes.last_mut().unwrap()
            };

            let thread_name = thread_names.entry(entry.tid).or_insert_with(|| entry.tid.to_txt()).clone();
            let thread = if let Some(t) = process.threads.iter_mut().find(|t| t.name == thread_name) {
                t
            } else {
                process.threads.push(ThreadTrace {
                    name: thread_name,
                    events: vec![],
                    spans: vec![],
                });
                process.threads.last_mut().unwrap()
            };

            fn entry_to_info(entry: Entry) -> Info {
                Info {
                    name: entry.name,
                    categories: entry.cat,
                    file: entry.file,
                    line: entry.line,
                    args: entry.args,
                }
            }

            match entry.phase {
                Phase::Begin => thread.spans.push(SpanTrace {
                    start: entry.ts,
                    end: entry.ts,
                    info: entry_to_info(entry),
                }),
                Phase::End => {
                    let end = entry.ts;
                    let info = entry_to_info(entry);
                    // recording always closes inner first (example: [begin1,begin2,end2,end1])
                    // this will search [begin1,begin2] in reverse for end2 and close that first, even if begin1 has the same name
                    if let Some(open) = thread.spans.iter_mut().rev().find(|s| s.start == s.end && s.info.name == info.name) {
                        open.end = end;
                        if open.start == open.end {
                            // timing is in microseconds so we can use 1ns here to help the logic
                            open.end += Duration::from_nanos(1);
                        }
                        open.info.merge(info);
                    }
                }
                Phase::Event => thread.events.push(EventTrace {
                    instant: entry.ts,
                    info: entry_to_info(entry),
                }),
            }
        }

        Ok(out)
    }

    /// Convert the trace to Chrome JSON Array format.
    pub fn to_chrome_trace(&self) -> Txt {
        let mut out = String::new();

        let _ = writeln!(&mut out, "[");

        let mut sep = "";
        for p in &self.processes {
            let _ = write!(
                &mut out,
                r#"{sep}{{"ph":"M","pid":{},"name":"process_name","args":{{"name":"{}"}}}}"#,
                p.id, p.name
            );
            sep = ",\n";
            for (tid, t) in p.threads.iter().enumerate() {
                let _ = write!(
                    &mut out,
                    r#"{sep}{{"ph":"M","pid":{}, "tid":{tid},"name":"process_name","args":{{"name":"{}"}}}}"#,
                    p.id, t.name
                );

                let mut items = Vec::with_capacity(t.events.len() + t.spans.len() * 2);
                for ev in &t.events {
                    let obj = serde_json::json!({
                        "ph": "i",
                        "s": "t",
                        "ts": (ev.instant.as_nanos() as f64 / 1000.0),
                        "pid": p.id,
                        "tid": tid,
                        "name": ev.info.name,
                        "cat": ev.info.categories.iter().fold(String::new(), |a, b| format!("{a},{b}")),
                        "args": ev.info.args,
                        ".file": ev.info.file,
                        ".line": ev.info.line,
                    });
                    items.push((ev.instant, obj));
                }
                for sp in &t.spans {
                    let start = serde_json::json!({
                        "ph": "B",
                        "s": "t",
                        "ts": (sp.start.as_nanos() as f64 / 1000.0),
                        "pid": p.id,
                        "tid": tid,
                        "name": sp.info.name,
                        "cat": sp.info.categories.iter().fold(String::new(), |a, b| format!("{a},{b}")),
                        "args": sp.info.args,
                        ".file": sp.info.file,
                        ".line": sp.info.line,
                    });
                    items.push((sp.start, start));

                    let end = serde_json::json!({
                        "ph": "E",
                        "s": "t",
                        "ts": (sp.end.as_nanos() as f64 / 1000.0),
                        "pid": p.id,
                        "tid": tid,
                        "name": sp.info.name,
                        "cat": sp.info.categories.iter().fold(String::new(), |a, b| format!("{a},{b}")),
                        "args": sp.info.args,
                        ".file": sp.info.file,
                        ".line": sp.info.line,
                    });
                    items.push((sp.end, end));
                }

                items.sort_by(|a, b| a.0.cmp(&b.0));

                for (_, item) in items {
                    let item = serde_json::to_string(&item).unwrap();
                    let _ = write!(&mut out, "{sep}{item}");
                }
            }
        }

        let _ = writeln!(&mut out, "]");
        out.to_txt()
    }

    /// Convert and write the trace to Chrome JSON Array format.
    pub fn write_chrome_trace(&self, json_path: impl AsRef<Path>) -> io::Result<()> {
        std::fs::write(json_path, self.to_chrome_trace().as_str().as_bytes())
    }

    /// Merge `other` into this.
    pub fn merge(&mut self, other: Self) {
        for p in other.processes {
            if let Some(ep) = self.processes.iter_mut().find(|ep| ep.id == p.id && ep.name == p.name) {
                ep.merge(p);
            } else {
                self.processes.push(p);
            }
        }
    }

    /// Sort processes processes and threads by start time then name, events by instant and spans by start.
    pub fn sort(&mut self) {
        self.processes.sort_by(|a, b| a.start.cmp(&b.start).then(a.name.cmp(&b.name)));
        for p in &mut self.processes {
            p.sort();
        }
    }
}

impl ProcessTrace {
    /// Merge `other` into this.
    pub fn merge(&mut self, other: Self) {
        for t in other.threads {
            if let Some(et) = self.threads.iter_mut().find(|et| et.name == t.name) {
                et.merge(t);
            } else {
                self.threads.push(t);
            }
        }
    }

    /// Sort threads by name, events by instant and spans by start.
    pub fn sort(&mut self) {
        self.threads.sort_by(|a, b| a.start().cmp(&b.start()).then(a.name.cmp(&b.name)));
        for t in &mut self.threads {
            t.sort();
        }
    }
}

impl ThreadTrace {
    /// Gets the minimum event or span start in the thread.
    pub fn start(&self) -> Duration {
        self.events
            .iter()
            .map(|e| e.instant)
            .min()
            .unwrap_or(Duration::MAX)
            .min(self.spans.iter().map(|e| e.start).min().unwrap_or(Duration::MAX))
    }

    /// Merge `other` into this.
    pub fn merge(&mut self, mut other: Self) {
        self.events.append(&mut other.events);
        self.spans.append(&mut other.spans);
    }

    /// Sort events by instant and spans by start (then reverse end).
    ///
    /// After sorting if a span starts within the start..=end of the previous it is "inside" it.
    pub fn sort(&mut self) {
        self.events.sort_by(|a, b| a.instant.cmp(&b.instant));
        self.spans.sort_by(|a, b| a.start.cmp(&b.start).then(b.start.cmp(&a.start)));
    }
}

impl Info {
    /// Merge `other` into this.
    pub fn merge(&mut self, info: Info) {
        if !info.file.is_empty() {
            self.file = info.file;
            self.line = info.line;
        }
        self.args.extend(info.args);
    }
}

/// Starts recording, stops on process exit or on [`stop_recording`].
///
/// Note that this is called automatically on startup if the `"ZNG_RECORD_TRACE"` environment variable is set and that is
/// the recommended way of enabling recording as it record all processes not just the calling process.
///
/// # Config and Output
///
/// See the `zng::app::trace_recorder` module documentation for details on how to configure the recording and the output file structure.
///
/// # Panics
///
/// Panics if another `tracing` subscriber was already inited.
///
/// Note that this can cause panics on any subsequent attempt to init subscribers, no other log subscriber must run after recording starts,
/// including attempts to restart recording after stopping.
///
/// Panics cannot write to the output dir.
pub fn start_recording(output_dir: Option<PathBuf>) {
    let mut rec = recording();
    if rec.is_some() {
        // already recording
        return;
    }

    let process_start = std::time::SystemTime::now()
        .duration_since(std::time::SystemTime::UNIX_EPOCH)
        .expect("cannot define process start timestamp")
        .as_micros();

    let output_dir = output_dir.unwrap_or_else(|| std::env::current_dir().expect("`current_dir` error").join("zng-trace"));

    // first process sets the timestamp
    let timestamp = match std::env::var("ZNG_RECORD_TRACE_TIMESTAMP") {
        Ok(t) => t,
        Err(_) => {
            let t = process_start.to_string();
            // SAFETY: safe, only read by this pure Rust code in subsequent started processes.
            unsafe {
                std::env::set_var("ZNG_RECORD_TRACE_TIMESTAMP", t.clone());
            }
            t
        }
    };

    let output_dir = output_dir.join(timestamp);
    std::fs::create_dir_all(&output_dir).expect("cannot create `output_dir`");
    let output_file = output_dir.join(format!("{}.json", std::process::id()));

    let (chrome_layer, guard) = tracing_chrome::ChromeLayerBuilder::new()
        .include_args(true)
        .file(output_file)
        .category_fn(Box::new(|es| match es {
            tracing_chrome::EventOrSpan::Event(event) => format!("{},{}", event.metadata().target(), event.metadata().level()),
            tracing_chrome::EventOrSpan::Span(span_ref) => format!("{},{}", span_ref.metadata().target(), span_ref.metadata().level()),
        }))
        .build();
    *rec = Some(guard);

    let env_layer = EnvFilter::try_from_env("ZNG_RECORD_TRACE_FILTER")
        .or_else(|_| EnvFilter::try_from_default_env())
        .unwrap_or_else(|_| EnvFilter::new("trace"));

    tracing_subscriber::registry().with(env_layer).with(chrome_layer).init();
    zng_env::on_process_exit(|_| stop_recording());

    tracing::info!("zng-record-start: {process_start}");
}

/// Stops recording and flushes.
///
/// Note that this is called automatically on process exit.
pub fn stop_recording() {
    *recording() = None;
}

zng_env::on_process_start!(|_| {
    if std::env::var("ZNG_RECORD_TRACE").is_ok() {
        start_recording(std::env::var("ZNG_RECORD_TRACE_DIR").ok().map(PathBuf::from));
    }
});

zng_app_context::hot_static! {
    static RECORDING: Mutex<Option<tracing_chrome::FlushGuard>> = Mutex::new(None);
}
fn recording() -> parking_lot::MutexGuard<'static, Option<tracing_chrome::FlushGuard>> {
    zng_app_context::hot_static_ref!(RECORDING).lock()
}