Skip to main content

cadmus_core/
logging.rs

1//! Structured logging infrastructure with JSON output and OpenTelemetry integration.
2//!
3//! This module provides logging functionality for Cadmus, including:
4//! - JSON-structured logs written to rotating files
5//! - Configurable log levels and filtering
6//! - Automatic log file cleanup based on retention policies
7//! - Optional OpenTelemetry export (when `otel` feature is enabled)
8//! - Unique run ID for correlating logs across a session
9//!
10//! # Architecture
11//!
12//! The logging system is built on the `tracing` crate ecosystem:
13//! - `tracing_subscriber` for composable logging layers
14//! - `tracing_appender` for non-blocking file I/O
15//! - JSON formatting for structured, machine-readable logs
16//! - `EnvFilter` for flexible log level control
17//!
18//! Each application run generates a unique Run ID (UUID v7) that appears in:
19//! - The log filename: `cadmus-<run_id>.json`
20//! - OpenTelemetry resource attributes
21//! - All log entries for correlation
22//!
23//! # Log File Management
24//!
25//! Log files are automatically managed:
26//! - Files are named with the run ID: `cadmus-<run_id>.json`
27//! - Older files are deleted when `max_files` limit is exceeded
28//! - Cleanup happens at initialization, keeping only the most recent files
29//!
30//! # Configuration
31//!
32//! Logging is configured via `LoggingSettings`:
33//!
34//! ```toml
35//! [logging]
36//! enabled = true
37//! level = "info"
38//! max-files = 3
39//! directory = "logs"
40//! otlp-endpoint = "http://localhost:4318"  # Optional
41//! ```
42//!
43//! The log level can be overridden with the `RUST_LOG` environment variable:
44//!
45//! ```bash
46//! RUST_LOG=debug ./cadmus
47//! RUST_LOG=cadmus::view=trace,info ./cadmus
48//! ```
49//!
50//! # Example Usage
51//!
52//! ```rust
53//! use cadmus_core::settings::LoggingSettings;
54//! use cadmus_core::logging::{init_logging, shutdown_logging, get_run_id};
55//!
56//! let settings = LoggingSettings {
57//!     enabled: true,
58//!     level: "info".to_string(),
59//!     max_files: 3,
60//!     directory: "logs".into(),
61//!     otlp_endpoint: None,
62//!     pyroscope_endpoint: None,
63//!     enable_kern_log: false,
64//!     enable_dbus_log: false,
65//! };
66//!
67//! // Initialize at application startup
68//! init_logging(&settings)?;
69//! eprintln!("Started with run ID: {}", get_run_id());
70//!
71//! // Use tracing macros throughout the application
72//! tracing::info!("Application started");
73//!
74//! // Shutdown at application exit (flushes buffers)
75//! shutdown_logging();
76//! # Ok::<(), anyhow::Error>(())
77//! ```
78
79use crate::settings::LoggingSettings;
80#[cfg(feature = "tracing")]
81use crate::telemetry;
82use crate::version::get_current_version;
83use anyhow::{Context, Error};
84use arc_swap::ArcSwap;
85use std::fs;
86use std::fs::DirEntry;
87use std::path::Path;
88use std::sync::mpsc;
89use std::sync::{Arc, Mutex, OnceLock};
90use std::thread;
91use std::time::Duration;
92use tracing_appender::non_blocking::{NonBlocking, WorkerGuard};
93use tracing_subscriber::prelude::*;
94use tracing_subscriber::EnvFilter;
95use uuid::Uuid;
96
97mod kern;
98
99const LOG_FILE_PREFIX: &str = "cadmus-";
100const LOG_FILE_SUFFIX: &str = "json";
101
102static LOG_GUARD: OnceLock<Mutex<Option<WorkerGuard>>> = OnceLock::new();
103static RUN_ID: OnceLock<String> = OnceLock::new();
104static WRITER_INNER: OnceLock<ArcSwap<NonBlocking>> = OnceLock::new();
105
106struct SwappableWriter {
107    inner: &'static ArcSwap<NonBlocking>,
108}
109
110impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for SwappableWriter {
111    type Writer = NonBlocking;
112
113    /// Performs a lock-free atomic load of the current writer.
114    fn make_writer(&'a self) -> Self::Writer {
115        self.inner.load().as_ref().clone()
116    }
117}
118
119/// Returns the unique run ID for this application session.
120///
121/// The run ID is a UUID v7 generated at first access and remains constant
122/// for the lifetime of the process. It is used to:
123/// - Name the log file: `cadmus-<run_id>.json`
124/// - Tag OpenTelemetry telemetry exports
125/// - Correlate all operations within a single run
126///
127/// # Returns
128///
129/// A string slice containing the run ID, valid for the program's lifetime.
130///
131/// # Example
132///
133/// ```
134/// use cadmus_core::logging::get_run_id;
135///
136/// let run_id = get_run_id();
137/// eprintln!("Application run ID: {}", run_id);
138/// assert_eq!(get_run_id(), run_id); // Consistent across calls
139/// ```
140pub fn get_run_id() -> &'static str {
141    RUN_ID.get_or_init(|| Uuid::now_v7().to_string()).as_str()
142}
143
144/// Removes old log files to maintain the configured retention limit.
145///
146/// This function scans the log directory for files matching the pattern
147/// `cadmus-*.json` and deletes the oldest files if the count exceeds `max_files`.
148///
149/// Note: this relies on the run ID being a UUID v7 (time-ordered). Filenames are
150/// `cadmus-<run_id>.json` where `<run_id>` is generated with `Uuid::now_v7()`,
151/// so lexicographic sorting of the filenames corresponds to chronological order.
152/// Sorting by file name therefore yields oldest-first ordering for removal.
153///
154/// # Arguments
155///
156/// * `log_dir` - Path to the directory containing log files
157/// * `max_files` - Maximum number of log files to retain (0 = keep all)
158///
159/// # Returns
160///
161/// Returns `Ok(())` on success.
162///
163/// # Errors
164///
165/// Returns an error if:
166/// - The log directory cannot be read
167/// - Individual directory entries cannot be read
168/// - Old log files cannot be deleted
169fn cleanup_run_logs(log_dir: &std::path::Path, max_files: usize) -> Result<(), Error> {
170    if max_files == 0 {
171        return Ok(());
172    }
173
174    let mut entries = collect_run_log_entries(log_dir)?;
175    if entries.len() <= max_files {
176        return Ok(());
177    }
178
179    entries.sort_by_key(|entry| entry.file_name());
180    let remove_count = entries.len().saturating_sub(max_files);
181    for entry in entries.into_iter().take(remove_count) {
182        fs::remove_file(entry.path())
183            .with_context(|| format!("can't remove old log file {}", entry.path().display()))?;
184    }
185
186    Ok(())
187}
188
189/// Collects all Cadmus log file entries from the specified directory.
190///
191/// Only files matching the pattern `cadmus-*.json` are collected.
192///
193/// # Arguments
194///
195/// * `log_dir` - Path to the directory to scan
196///
197/// # Returns
198///
199/// Returns a vector of directory entries representing log files.
200///
201/// # Errors
202///
203/// Returns an error if the directory cannot be read or entries are inaccessible.
204fn collect_run_log_entries(log_dir: &std::path::Path) -> Result<Vec<DirEntry>, Error> {
205    let mut entries = Vec::new();
206    for entry in fs::read_dir(log_dir)
207        .with_context(|| format!("can't read log directory {}", log_dir.display()))?
208    {
209        let entry = entry.context("can't read log directory entry")?;
210        if is_run_log_entry(&entry) {
211            entries.push(entry);
212        }
213    }
214
215    Ok(entries)
216}
217
218/// Determines whether a directory entry is a Cadmus log file.
219///
220/// Returns `true` if the filename starts with `cadmus-` and ends with `.json`.
221///
222/// # Arguments
223///
224/// * `entry` - Directory entry to check
225///
226/// # Returns
227///
228/// `true` if the entry is a log file, `false` otherwise.
229fn is_run_log_entry(entry: &DirEntry) -> bool {
230    let file_name = entry.file_name();
231    let file_name = file_name.to_string_lossy();
232    if !file_name.starts_with(LOG_FILE_PREFIX) {
233        return false;
234    }
235
236    file_name.ends_with(LOG_FILE_SUFFIX)
237}
238
239/// Initializes the logging system with JSON output and optional OpenTelemetry export.
240///
241/// This function sets up the complete logging infrastructure:
242/// - Creates the log directory if it doesn't exist
243/// - Cleans up old log files based on retention policy
244/// - Configures a rolling file appender with non-blocking I/O
245/// - Applies log level filtering from settings or environment
246/// - Sets up JSON formatting for structured logs
247/// - Initializes tracing export if the `tracing` feature is enabled
248/// - Bridges `log::` records (e.g. from pyroscope-rs) into the tracing pipeline
249///   so they appear in Loki alongside tracing events via the tracing-log
250///   layer automatically enabled by tracing-subscriber.
251///
252/// The function should only be called once at application startup.
253/// The logging system remains active until `shutdown_logging()` is called.
254///
255/// # Arguments
256///
257/// * `settings` - Logging configuration including level, directory, and retention
258///
259/// # Returns
260///
261/// Returns `Ok(())` on successful initialization.
262///
263/// # Errors
264///
265/// Returns an error if:
266/// - The current working directory cannot be determined
267/// - The log directory cannot be created
268/// - Log file cleanup fails
269/// - The rolling file appender cannot be initialized
270/// - The log filter configuration is invalid
271/// - The tracing subscriber cannot be initialized
272/// - OpenTelemetry initialization fails (when `otel` feature is enabled)
273///
274/// # Example
275///
276/// ```
277/// use cadmus_core::settings::LoggingSettings;
278/// use cadmus_core::logging::init_logging;
279///
280/// let settings = LoggingSettings {
281///     enabled: true,
282///     level: "debug".to_string(),
283///     max_files: 5,
284///     directory: "logs".into(),
285///     otlp_endpoint: Some("http://localhost:4318".to_string()),
286///     pyroscope_endpoint: None,
287///     enable_kern_log: false,
288///     enable_dbus_log: false,
289/// };
290///
291/// init_logging(&settings)?;
292/// # Ok::<(), anyhow::Error>(())
293/// ```
294pub fn init_logging(settings: &LoggingSettings) -> Result<(), Error> {
295    if !settings.enabled {
296        return Ok(());
297    }
298
299    let current_working_dir =
300        std::env::current_dir().context("can't get current working directory")?;
301    let log_dir = current_working_dir.join(&settings.directory);
302    fs::create_dir_all(&log_dir)
303        .with_context(|| format!("can't create log directory {}", &log_dir.display()))?;
304
305    cleanup_run_logs(&log_dir, settings.max_files)?;
306
307    let appender = tracing_appender::rolling::Builder::new()
308        .rotation(tracing_appender::rolling::Rotation::NEVER)
309        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
310        .filename_suffix(LOG_FILE_SUFFIX)
311        .max_log_files(settings.max_files)
312        .build(&log_dir)
313        .context("can't initialize rolling log file appender")?;
314
315    let (non_blocking, guard) = tracing_appender::non_blocking(appender);
316    let _ = LOG_GUARD.set(Mutex::new(Some(guard)));
317    let _ = WRITER_INNER.set(ArcSwap::new(Arc::new(non_blocking)));
318
319    let swappable = SwappableWriter {
320        inner: WRITER_INNER.get().expect("WRITER_INNER just set"),
321    };
322
323    let filter = build_filter(settings)?;
324
325    let fmt_layer = tracing_subscriber::fmt::layer()
326        .json()
327        .with_ansi(false)
328        .with_writer(swappable)
329        .with_current_span(true);
330
331    #[cfg(feature = "tracing")]
332    {
333        let subscriber = tracing_subscriber::registry()
334            .with(filter)
335            .with(telemetry::tracing::init_telemetry(settings, get_run_id())?)
336            .with(fmt_layer);
337
338        subscriber
339            .try_init()
340            .context("can't initialize tracing subscriber")?;
341    }
342
343    #[cfg(not(feature = "tracing"))]
344    {
345        let subscriber = tracing_subscriber::registry().with(filter).with(fmt_layer);
346
347        subscriber
348            .try_init()
349            .context("can't initialize tracing subscriber")?;
350    }
351
352    eprintln!(
353        "Cadmus run started with ID: {} (version {})",
354        get_run_id(),
355        get_current_version()
356    );
357
358    #[cfg(feature = "test")]
359    if settings.enable_kern_log {
360        kern::spawn_kern_log_thread();
361    }
362
363    Ok(())
364}
365
366/// Gracefully shuts down the logging system and flushes buffered data.
367///
368/// This function ensures all buffered log data is written to disk and, if enabled,
369/// exported to OpenTelemetry endpoints before the application exits. It:
370/// - Flushes the file appender buffer (happens automatically via `LOG_GUARD` drop)
371/// - Shuts down OpenTelemetry providers (when `otel` feature is enabled)
372/// - Ensures no log data is lost on exit
373///
374/// This function should be called once at application shutdown.
375///
376/// # Example
377///
378/// ```no_run
379/// use cadmus_core::logging::{init_logging, shutdown_logging};
380/// use cadmus_core::settings::LoggingSettings;
381///
382/// // At application start
383/// let settings = LoggingSettings::default();
384/// init_logging(&settings)?;
385///
386/// // ... application runs ...
387///
388/// // At application exit
389/// shutdown_logging();
390/// # Ok::<(), anyhow::Error>(())
391/// ```
392pub fn shutdown_logging() {
393    if let Some(mutex) = LOG_GUARD.get() {
394        if let Ok(mut guard_opt) = mutex.lock() {
395            if let Some(guard) = guard_opt.take() {
396                let (tx, rx) = mpsc::channel();
397
398                thread::spawn(move || {
399                    drop(guard);
400                    let _ = tx.send(());
401                });
402
403                let _ = rx.recv_timeout(Duration::from_secs(5));
404                eprintln!("Logging shutdown complete.");
405            }
406        }
407    }
408
409    #[cfg(feature = "tracing")]
410    telemetry::tracing::shutdown_telemetry();
411}
412
413/// Redirects log output to `dir`, flushing the current file first.
414///
415/// Used to keep logging alive across USB share when /mnt/onboard is unmounted.
416///
417/// This function creates a new rolling file appender in the specified directory and updates the
418/// logging system to use it. The old appender is dropped, which flushes any buffered data to disk
419/// after the new appender is in place to avoid log loss.
420pub fn redirect_log_to_dir(dir: &Path, settings: &LoggingSettings) -> Result<(), Error> {
421    let (Some(writer_swap), Some(guard_mutex)) = (WRITER_INNER.get(), LOG_GUARD.get()) else {
422        return Ok(());
423    };
424
425    fs::create_dir_all(dir)
426        .with_context(|| format!("can't create log directory {}", dir.display()))?;
427
428    let appender = tracing_appender::rolling::Builder::new()
429        .rotation(tracing_appender::rolling::Rotation::NEVER)
430        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
431        .filename_suffix(LOG_FILE_SUFFIX)
432        .max_log_files(settings.max_files)
433        .build(dir)
434        .context("can't build log appender for redirect")?;
435
436    let (non_blocking, new_guard) = tracing_appender::non_blocking(appender);
437
438    writer_swap.store(Arc::new(non_blocking));
439
440    let old_guard = {
441        let mut guard_opt = guard_mutex
442            .lock()
443            .map_err(|e| anyhow::anyhow!("failed to get lock for guard during redirect: {e}"))?;
444        let old = guard_opt.take();
445        *guard_opt = Some(new_guard);
446        old
447    };
448
449    drop(old_guard);
450
451    Ok(())
452}
453
454/// Builds an `EnvFilter` from settings or environment variables.
455///
456/// The function checks for the `RUST_LOG` environment variable first, which
457/// overrides the `level` setting. If `RUST_LOG` is not set, it uses the
458/// level from `LoggingSettings` (defaulting to "info" if empty).
459///
460/// # Arguments
461///
462/// * `settings` - Logging settings containing the default level
463///
464/// # Returns
465///
466/// Returns a configured `EnvFilter` instance.
467///
468/// # Errors
469///
470/// Returns an error if the log level string cannot be parsed.
471///
472/// # Example Filter Syntax
473///
474/// ```bash
475/// # Global level
476/// RUST_LOG=debug
477///
478/// # Per-module levels
479/// RUST_LOG=cadmus::view=trace,info
480///
481/// # Complex filtering
482/// RUST_LOG=warn,cadmus::document=debug,cadmus::sync=trace
483/// ```
484fn build_filter(settings: &LoggingSettings) -> Result<EnvFilter, Error> {
485    if let Ok(filter) = EnvFilter::try_from_default_env() {
486        return Ok(filter);
487    }
488
489    let level = settings.level.trim();
490    let level = if level.is_empty() { "info" } else { level };
491
492    EnvFilter::builder()
493        .parse(level)
494        .context("invalid logging level")
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use std::sync::OnceLock;
501    use tempfile::TempDir;
502
503    /// Guard that ensures `init_logging` is called at most once per test binary.
504    ///
505    /// `init_logging` registers a global tracing subscriber via `try_init()`, which
506    /// panics (or returns an error) on a second call within the same process. All
507    /// tests that need the logging statics populated must go through this helper.
508    static LOGGING_INIT: OnceLock<TempDir> = OnceLock::new();
509
510    /// Initialise logging once for the whole test binary and return the log dir.
511    ///
512    /// Subsequent calls return the already-initialised directory, so the test can
513    /// be run together with other tests without conflicts.
514    fn ensure_logging_init() -> &'static std::path::Path {
515        LOGGING_INIT
516            .get_or_init(|| {
517                let dir = TempDir::new().expect("failed to create temp dir for logging init");
518                let settings = LoggingSettings {
519                    enabled: true,
520                    level: "info".to_string(),
521                    max_files: 5,
522                    directory: dir.path().to_path_buf(),
523                    otlp_endpoint: None,
524                    pyroscope_endpoint: None,
525                    enable_kern_log: false,
526                    enable_dbus_log: false,
527                };
528                init_logging(&settings).expect("failed to initialize logging for tests");
529                dir
530            })
531            .path()
532    }
533
534    fn create_log_file(dir: &std::path::Path, index: usize) -> Result<(), Error> {
535        let file_name = format!("{}{:04}.{}", LOG_FILE_PREFIX, index, LOG_FILE_SUFFIX);
536        fs::write(dir.join(file_name), b"{}")?;
537        Ok(())
538    }
539
540    fn collect_log_file_names(dir: &std::path::Path) -> Result<Vec<String>, Error> {
541        let mut entries = collect_run_log_entries(dir)?;
542        entries.sort_by_key(|entry| entry.file_name());
543        Ok(entries
544            .into_iter()
545            .map(|entry| entry.file_name().to_string_lossy().into_owned())
546            .collect())
547    }
548
549    #[test]
550    fn test_cleanup_run_logs_removes_oldest_entries() -> Result<(), Error> {
551        let temp_dir = TempDir::new()?;
552        for index in 1..=5 {
553            create_log_file(temp_dir.path(), index)?;
554        }
555
556        cleanup_run_logs(temp_dir.path(), 3)?;
557
558        let remaining = collect_log_file_names(temp_dir.path())?;
559        assert_eq!(remaining.len(), 3);
560        assert_eq!(
561            remaining,
562            vec![
563                format!("{}0003.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
564                format!("{}0004.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
565                format!("{}0005.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
566            ]
567        );
568
569        Ok(())
570    }
571
572    #[test]
573    fn test_cleanup_run_logs_max_files_zero_keeps_all() -> Result<(), Error> {
574        let temp_dir = TempDir::new()?;
575        for index in 1..=3 {
576            create_log_file(temp_dir.path(), index)?;
577        }
578
579        cleanup_run_logs(temp_dir.path(), 0)?;
580
581        let remaining = collect_log_file_names(temp_dir.path())?;
582        assert_eq!(remaining.len(), 3);
583
584        Ok(())
585    }
586
587    /// Verify that `redirect_log_to_dir` creates a log file in the new target
588    /// directory and that the file name matches the expected `cadmus-<run_id>.json`
589    /// pattern.
590    ///
591    /// # Why this test needs `init_logging`
592    ///
593    /// `redirect_log_to_dir` is a no-op when the global `WRITER_INNER` / `LOG_GUARD`
594    /// statics are unset. Those statics are populated only by `init_logging`, so we
595    /// must call it (once, via `ensure_logging_init`) before exercising the redirect
596    /// path. The `ensure_logging_init` helper uses a `OnceLock` so that the global
597    /// tracing subscriber is registered at most once per test binary, avoiding the
598    /// "subscriber already set" error that `try_init()` would otherwise produce.
599    #[test]
600    fn test_redirect_log_to_dir_creates_log_file_in_new_dir() -> Result<(), Error> {
601        ensure_logging_init();
602
603        let redirect_dir = TempDir::new()?;
604
605        let settings = LoggingSettings {
606            enabled: true,
607            level: "info".to_string(),
608            max_files: 5,
609            directory: redirect_dir.path().to_path_buf(),
610            otlp_endpoint: None,
611            pyroscope_endpoint: None,
612            enable_kern_log: false,
613            enable_dbus_log: false,
614        };
615
616        redirect_log_to_dir(redirect_dir.path(), &settings)?;
617
618        // After redirect a non-blocking appender is set up for `redirect_dir`.
619        // The underlying file is created lazily on the first write; emit one log
620        // event so the file is flushed to disk before we inspect the directory.
621        tracing::info!("test redirect log event");
622
623        let expected_file_name = format!("{}{}.{}", LOG_FILE_PREFIX, get_run_id(), LOG_FILE_SUFFIX);
624
625        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
626        #[allow(unused_assignments)]
627        let mut log_files = Vec::new();
628
629        loop {
630            log_files = collect_log_file_names(redirect_dir.path())?;
631
632            if !log_files.is_empty() && log_files.iter().any(|name| name == &expected_file_name) {
633                break;
634            }
635
636            if std::time::Instant::now() >= deadline {
637                break;
638            }
639
640            std::thread::sleep(std::time::Duration::from_millis(50));
641        }
642
643        assert!(
644            !log_files.is_empty(),
645            "expected at least one log file in the redirect directory, but found none"
646        );
647
648        assert!(
649            log_files.iter().any(|name| name == &expected_file_name),
650            "expected a log file named '{}' in redirect dir, but found: {:?}",
651            expected_file_name,
652            log_files,
653        );
654
655        Ok(())
656    }
657}