[go: up one dir, main page]

re_tuid/
lib.rs

1//! TUID: Time-based Unique Identifiers.
2//!
3//! Time-ordered unique 128-bit identifiers.
4//!
5//! ## Format
6//! The default string format is big-endian hex, e.g. `182342300C5F8C327a7b4a6e5a379ac4`.
7//! This means the string representation sorts the same.
8//!
9//! ## Namespace prefix
10//! It is common to prefix an TUID with a _namespace_. This is done as:
11//! `{namespace}_{tuid}` where `namespace` can be anything but is _recommended_ to be:
12//! * Lowercase
13//! * ASCII
14//! * Short ("row", "user", "chunk", …)
15//!
16//! For instance, `user_182342300C5F8C327a7b4a6e5a379ac4`.
17//!
18//! The idiomatic way of implementing this is to wrap [`Tuid`] in a newtype struct
19//! (e.g. `struct UserId(Tuid)`) and implement the prefix there.
20//!
21//! It is recommended that
22//! * Finding the wrong prefix is an error
23//! * A missing prefix is NOT an error
24//!
25//! Thus, `user_182342300C5F8C327a7b4a6e5a379ac4` and `182342300C5F8C327a7b4a6e5a379ac4`
26//! are both valid `UserId`:s, but `chunk_182342300C5F8C327a7b4a6e5a379ac4` is NOT.
27//!
28//! The namespace if ONLY part of the _string_ representation, and is there to help
29//! a user identify what would otherwise be just random hex.
30//! In other words, it's mainly for _debugging_ purposes.
31//!
32//! When storing the TUID in e.g. an Arrow column, use 16 bytes for each id.
33//!
34//! ## Feature flags
35#![doc = document_features::document_features!()]
36//!
37
38/// TUID: Time-based Unique Identifier.
39///
40/// Time-ordered globally unique 128-bit identifiers.
41///
42/// The raw bytes of the `Tuid` sorts in time order as the `Tuid` itself,
43/// and the `Tuid` is byte-aligned so you can just transmute between `Tuid` and raw bytes.
44#[repr(C, align(1))]
45#[derive(Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
46#[cfg_attr(
47    feature = "bytemuck",
48    derive(bytemuck::AnyBitPattern, bytemuck::NoUninit)
49)]
50pub struct Tuid {
51    /// Approximate nanoseconds since epoch.
52    ///
53    /// A big-endian u64 encoded as bytes to keep the alignment of `Tuid` to 1.
54    ///
55    /// We use big-endian so that the raw bytes of the `Tuid` sorts in time order.
56    time_nanos: [u8; 8],
57
58    /// Initialized to something random on each thread,
59    /// then incremented for each new [`Tuid`] being allocated.
60    ///
61    /// Uses big-endian u64 encoded as bytes to keep the alignment of `Tuid` to 1.
62    ///
63    /// We use big-endian so that the raw bytes of the `Tuid` sorts in creation order.
64    inc: [u8; 8],
65}
66
67impl Tuid {
68    /// We give an actual name to [`Tuid`], and inject that name into the Arrow datatype extensions,
69    /// as a hack so that we can compactly format them when printing Arrow data to the terminal.
70    /// Check out `re_arrow_util::format` for context.
71    pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID";
72}
73
74/// Formats the [`Tuid`] as a hex string.
75///
76/// The format uses upper case for the first 16 hex digits, and lower case for the last 16 hex digits.
77/// This is to make it easily distinguished from other hex strings.
78///
79/// Example: `182342300C5F8C327a7b4a6e5a379ac4`
80impl std::fmt::Display for Tuid {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        write!(f, "{:016X}{:016x}", self.nanos_since_epoch(), self.inc())
83    }
84}
85
86impl std::str::FromStr for Tuid {
87    type Err = std::num::ParseIntError;
88
89    fn from_str(s: &str) -> Result<Self, Self::Err> {
90        u128::from_str_radix(s, 16).map(Self::from_u128)
91    }
92}
93
94impl std::fmt::Debug for Tuid {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        write!(f, "{self}")
97    }
98}
99
100impl From<Tuid> for std::borrow::Cow<'_, Tuid> {
101    #[inline]
102    fn from(value: Tuid) -> Self {
103        std::borrow::Cow::Owned(value)
104    }
105}
106
107impl<'a> From<&'a Tuid> for std::borrow::Cow<'a, Tuid> {
108    #[inline]
109    fn from(value: &'a Tuid) -> Self {
110        std::borrow::Cow::Borrowed(value)
111    }
112}
113
114impl Tuid {
115    /// All zeroes.
116    pub const ZERO: Self = Self {
117        time_nanos: [0; 8],
118        inc: [0; 8],
119    };
120
121    /// All ones.
122    pub const MAX: Self = Self {
123        time_nanos: u64::MAX.to_be_bytes(),
124        inc: u64::MAX.to_be_bytes(),
125    };
126
127    /// Create a new unique [`Tuid`] based on the current time.
128    #[expect(clippy::new_without_default)]
129    #[inline]
130    pub fn new() -> Self {
131        use std::cell::RefCell;
132
133        thread_local! {
134            pub static LATEST_TUID: RefCell<Tuid> = RefCell::new(Tuid::from_nanos_and_inc(
135                 monotonic_nanos_since_epoch(),
136
137                // Leave top bit at zero so we have plenty of room to grow.
138                 random_u64() & !(1_u64 << 63),
139            ));
140        }
141
142        LATEST_TUID.with(|latest_tuid| {
143            let mut latest = latest_tuid.borrow_mut();
144
145            let new = Self::from_nanos_and_inc(monotonic_nanos_since_epoch(), latest.inc() + 1);
146
147            debug_assert!(
148                latest.nanos_since_epoch() <= new.nanos_since_epoch(),
149                "Time should be monotonically increasing"
150            );
151
152            *latest = new;
153
154            new
155        })
156    }
157
158    /// Construct a [`Tuid`] from the upper and lower halves of a u128-bit.
159    /// The first should be nano-seconds since epoch.
160    #[inline]
161    pub fn from_nanos_and_inc(time_nanos: u64, inc: u64) -> Self {
162        Self {
163            time_nanos: time_nanos.to_be_bytes(),
164            inc: inc.to_be_bytes(),
165        }
166    }
167
168    #[inline]
169    pub fn from_u128(id: u128) -> Self {
170        Self::from_nanos_and_inc((id >> 64) as u64, (id & (!0 >> 64)) as u64)
171    }
172
173    #[inline]
174    pub fn as_u128(&self) -> u128 {
175        ((self.nanos_since_epoch() as u128) << 64) | (self.inc() as u128)
176    }
177
178    #[inline]
179    pub fn from_bytes(bytes: [u8; 16]) -> Self {
180        Self::from_u128(u128::from_be_bytes(bytes))
181    }
182
183    /// Returns most significant byte first (big endian).
184    #[inline]
185    pub fn as_bytes(&self) -> [u8; 16] {
186        self.as_u128().to_be_bytes()
187    }
188
189    /// Approximate nanoseconds since unix epoch.
190    ///
191    /// The upper 64 bits of the [`Tuid`].
192    #[inline]
193    pub fn nanos_since_epoch(&self) -> u64 {
194        u64::from_be_bytes(self.time_nanos)
195    }
196
197    /// The increment part of the [`Tuid`].
198    ///
199    /// The lower 64 bits of the [`Tuid`].
200    #[inline]
201    pub fn inc(&self) -> u64 {
202        u64::from_be_bytes(self.inc)
203    }
204
205    /// Returns the next logical [`Tuid`].
206    ///
207    /// Wraps the monotonically increasing back to zero on overflow.
208    ///
209    /// Beware: wrong usage can easily lead to conflicts.
210    /// Prefer [`Tuid::new`] when unsure.
211    #[must_use]
212    #[inline]
213    pub fn next(&self) -> Self {
214        let Self { time_nanos, inc } = *self;
215
216        Self {
217            time_nanos,
218            inc: u64::from_be_bytes(inc).wrapping_add(1).to_be_bytes(),
219        }
220    }
221
222    /// Returns the `n`-next logical [`Tuid`].
223    ///
224    /// This is equivalent to calling [`Tuid::next`] `n` times.
225    /// Wraps the monotonically increasing back to zero on overflow.
226    ///
227    /// Beware: wrong usage can easily lead to conflicts.
228    /// Prefer [`Tuid::new`] when unsure.
229    #[must_use]
230    #[inline]
231    pub fn incremented_by(&self, n: u64) -> Self {
232        let Self { time_nanos, inc } = *self;
233        Self {
234            time_nanos,
235            inc: u64::from_be_bytes(inc).wrapping_add(n).to_be_bytes(),
236        }
237    }
238
239    /// A shortened string representation of the `Tuid`.
240    #[inline]
241    pub fn short_string(&self) -> String {
242        // We still want this to look like a part of the full TUID (i.e. what is printed on
243        // `std::fmt::Display`).
244        // Per Thread randomness plus increment is in the last part, so show only that.
245        // (the first half is time in nanoseconds which for the _most part_ doesn't change that
246        // often)
247        let str = self.to_string();
248        str[(str.len() - 8)..].to_string()
249    }
250}
251
252/// Returns a high-precision, monotonically increasing count that approximates nanoseconds since unix epoch.
253#[inline]
254fn monotonic_nanos_since_epoch() -> u64 {
255    // This can maybe be optimized
256
257    use web_time::Instant;
258
259    static START_TIME: std::sync::LazyLock<(u64, Instant)> =
260        std::sync::LazyLock::new(|| (nanos_since_epoch(), Instant::now()));
261    START_TIME.0 + START_TIME.1.elapsed().as_nanos() as u64
262}
263
264fn nanos_since_epoch() -> u64 {
265    if let Ok(duration_since_epoch) = web_time::SystemTime::UNIX_EPOCH.elapsed() {
266        let mut nanos_since_epoch = duration_since_epoch.as_nanos() as u64;
267
268        if cfg!(target_arch = "wasm32") {
269            // Web notriously round to the nearest millisecond (because of spectre/meltdown)
270            // so we add a bit of extra randomenss here to increase our entropy and reduce the chance of collisions:
271            nanos_since_epoch += random_u64() % 1_000_000;
272        }
273
274        nanos_since_epoch
275    } else {
276        // system time is set before 1970. this should be quite rare.
277        0
278    }
279}
280
281#[inline]
282fn random_u64() -> u64 {
283    let mut bytes = [0_u8; 8];
284    getrandom::fill(&mut bytes).expect("Couldn't get random bytes");
285    u64::from_be_bytes(bytes)
286}
287
288impl re_byte_size::SizeBytes for Tuid {
289    #[inline]
290    fn heap_size_bytes(&self) -> u64 {
291        0
292    }
293}
294
295#[test]
296fn test_tuid() {
297    use std::collections::{BTreeSet, HashSet};
298
299    fn is_sorted<T>(data: &[T]) -> bool
300    where
301        T: Ord,
302    {
303        data.windows(2).all(|w| w[0] <= w[1])
304    }
305
306    let num = 100_000;
307    let mut ids = Vec::with_capacity(num);
308    ids.push(Tuid::ZERO);
309    ids.push(Tuid::from_nanos_and_inc(123_456, 789_123));
310    ids.push(Tuid::from_nanos_and_inc(123_456, u64::MAX));
311    ids.extend((0..num - 5).map(|_| Tuid::new()));
312    ids.push(Tuid::from_nanos_and_inc(u64::MAX, 1));
313    ids.push(Tuid::MAX);
314
315    assert!(is_sorted(&ids));
316    assert_eq!(ids.iter().copied().collect::<HashSet::<Tuid>>().len(), num);
317    assert_eq!(ids.iter().copied().collect::<BTreeSet::<Tuid>>().len(), num);
318
319    for &tuid in &ids {
320        assert_eq!(tuid, Tuid::from_u128(tuid.as_u128()));
321        assert_eq!(tuid, tuid.to_string().parse().unwrap());
322    }
323
324    let id_strings: Vec<String> = ids.iter().map(|id| id.to_string()).collect();
325    assert!(
326        is_sorted(&id_strings),
327        "Ids should sort the same when converted to strings"
328    );
329}
330
331#[test]
332fn test_tuid_size_and_alignment() {
333    assert_eq!(std::mem::size_of::<Tuid>(), 16);
334    assert_eq!(std::mem::align_of::<Tuid>(), 1);
335}
336
337#[test]
338fn test_tuid_formatting() {
339    assert_eq!(
340        Tuid::from_u128(0x182342300c5f8c327a7b4a6e5a379ac4).to_string(),
341        "182342300C5F8C327a7b4a6e5a379ac4"
342    );
343}
344
345// -------------------------------------------------------------------------------
346
347// For backwards compatibility with our MsgPack encoder/decoder
348#[cfg(feature = "serde")]
349#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
350struct LegacyTuid {
351    time_nanos: u64,
352    inc: u64,
353}
354
355#[cfg(feature = "serde")]
356impl serde::Serialize for Tuid {
357    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
358    where
359        S: serde::Serializer,
360    {
361        LegacyTuid {
362            time_nanos: self.nanos_since_epoch(),
363            inc: self.inc(),
364        }
365        .serialize(serializer)
366    }
367}
368
369#[cfg(feature = "serde")]
370impl<'de> serde::Deserialize<'de> for Tuid {
371    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
372    where
373        D: serde::Deserializer<'de>,
374    {
375        let LegacyTuid { time_nanos, inc } = serde::Deserialize::deserialize(deserializer)?;
376        Ok(Self::from_nanos_and_inc(time_nanos, inc))
377    }
378}