cadmus_core/
logging.rs

1//! Structured logging infrastructure with JSON output and OpenTelemetry integration.
2//!
3//! This module provides logging functionality for Cadmus, including:
4//! - JSON-structured logs written to rotating files
5//! - Configurable log levels and filtering
6//! - Automatic log file cleanup based on retention policies
7//! - Optional OpenTelemetry export (when `otel` feature is enabled)
8//! - Unique run ID for correlating logs across a session
9//!
10//! # Architecture
11//!
12//! The logging system is built on the `tracing` crate ecosystem:
13//! - `tracing_subscriber` for composable logging layers
14//! - `tracing_appender` for non-blocking file I/O
15//! - JSON formatting for structured, machine-readable logs
16//! - `EnvFilter` for flexible log level control
17//!
18//! Each application run generates a unique Run ID (UUID v7) that appears in:
19//! - The log filename: `cadmus-<run_id>.json`
20//! - OpenTelemetry resource attributes
21//! - All log entries for correlation
22//!
23//! # Log File Management
24//!
25//! Log files are automatically managed:
26//! - Files are named with the run ID: `cadmus-<run_id>.json`
27//! - Older files are deleted when `max_files` limit is exceeded
28//! - Cleanup happens at initialization, keeping only the most recent files
29//!
30//! # Configuration
31//!
32//! Logging is configured via `LoggingSettings`:
33//!
34//! ```toml
35//! [logging]
36//! enabled = true
37//! level = "info"
38//! max-files = 3
39//! directory = "logs"
40//! otlp-endpoint = "http://localhost:4318"  # Optional
41//! ```
42//!
43//! The log level can be overridden with the `RUST_LOG` environment variable:
44//!
45//! ```bash
46//! RUST_LOG=debug ./cadmus
47//! RUST_LOG=cadmus::view=trace,info ./cadmus
48//! ```
49//!
50//! # Example Usage
51//!
52//! ```rust
53//! use cadmus_core::settings::LoggingSettings;
54//! use cadmus_core::logging::{init_logging, shutdown_logging, get_run_id};
55//!
56//! let settings = LoggingSettings {
57//!     enabled: true,
58//!     level: "info".to_string(),
59//!     max_files: 3,
60//!     directory: "logs".into(),
61//!     otlp_endpoint: None,
62//!     enable_kern_log: false,
63//!     enable_dbus_log: false,
64//! };
65//!
66//! // Initialize at application startup
67//! init_logging(&settings)?;
68//! eprintln!("Started with run ID: {}", get_run_id());
69//!
70//! // Use tracing macros throughout the application
71//! tracing::info!("Application started");
72//!
73//! // Shutdown at application exit (flushes buffers)
74//! shutdown_logging();
75//! # Ok::<(), anyhow::Error>(())
76//! ```
77
78use crate::settings::LoggingSettings;
79#[cfg(feature = "otel")]
80use crate::telemetry;
81use crate::version::get_current_version;
82use anyhow::{Context, Error};
83use arc_swap::ArcSwap;
84use std::fs;
85use std::fs::DirEntry;
86use std::path::Path;
87use std::sync::mpsc;
88use std::sync::{Arc, Mutex, OnceLock};
89use std::thread;
90use std::time::Duration;
91use tracing_appender::non_blocking::{NonBlocking, WorkerGuard};
92use tracing_subscriber::prelude::*;
93use tracing_subscriber::EnvFilter;
94use uuid::Uuid;
95
96mod kern;
97
98const LOG_FILE_PREFIX: &str = "cadmus-";
99const LOG_FILE_SUFFIX: &str = "json";
100
101static LOG_GUARD: OnceLock<Mutex<Option<WorkerGuard>>> = OnceLock::new();
102static RUN_ID: OnceLock<String> = OnceLock::new();
103static WRITER_INNER: OnceLock<ArcSwap<NonBlocking>> = OnceLock::new();
104
105struct SwappableWriter {
106    inner: &'static ArcSwap<NonBlocking>,
107}
108
109impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for SwappableWriter {
110    type Writer = NonBlocking;
111
112    /// Performs a lock-free atomic load of the current writer.
113    fn make_writer(&'a self) -> Self::Writer {
114        self.inner.load().as_ref().clone()
115    }
116}
117
118/// Returns the unique run ID for this application session.
119///
120/// The run ID is a UUID v7 generated at first access and remains constant
121/// for the lifetime of the process. It is used to:
122/// - Name the log file: `cadmus-<run_id>.json`
123/// - Tag OpenTelemetry telemetry exports
124/// - Correlate all operations within a single run
125///
126/// # Returns
127///
128/// A string slice containing the run ID, valid for the program's lifetime.
129///
130/// # Example
131///
132/// ```
133/// use cadmus_core::logging::get_run_id;
134///
135/// let run_id = get_run_id();
136/// eprintln!("Application run ID: {}", run_id);
137/// assert_eq!(get_run_id(), run_id); // Consistent across calls
138/// ```
139pub fn get_run_id() -> &'static str {
140    RUN_ID.get_or_init(|| Uuid::now_v7().to_string()).as_str()
141}
142
143/// Removes old log files to maintain the configured retention limit.
144///
145/// This function scans the log directory for files matching the pattern
146/// `cadmus-*.json` and deletes the oldest files if the count exceeds `max_files`.
147///
148/// Note: this relies on the run ID being a UUID v7 (time-ordered). Filenames are
149/// `cadmus-<run_id>.json` where `<run_id>` is generated with `Uuid::now_v7()`,
150/// so lexicographic sorting of the filenames corresponds to chronological order.
151/// Sorting by file name therefore yields oldest-first ordering for removal.
152///
153/// # Arguments
154///
155/// * `log_dir` - Path to the directory containing log files
156/// * `max_files` - Maximum number of log files to retain (0 = keep all)
157///
158/// # Returns
159///
160/// Returns `Ok(())` on success.
161///
162/// # Errors
163///
164/// Returns an error if:
165/// - The log directory cannot be read
166/// - Individual directory entries cannot be read
167/// - Old log files cannot be deleted
168fn cleanup_run_logs(log_dir: &std::path::Path, max_files: usize) -> Result<(), Error> {
169    if max_files == 0 {
170        return Ok(());
171    }
172
173    let mut entries = collect_run_log_entries(log_dir)?;
174    if entries.len() <= max_files {
175        return Ok(());
176    }
177
178    entries.sort_by_key(|entry| entry.file_name());
179    let remove_count = entries.len().saturating_sub(max_files);
180    for entry in entries.into_iter().take(remove_count) {
181        fs::remove_file(entry.path())
182            .with_context(|| format!("can't remove old log file {}", entry.path().display()))?;
183    }
184
185    Ok(())
186}
187
188/// Collects all Cadmus log file entries from the specified directory.
189///
190/// Only files matching the pattern `cadmus-*.json` are collected.
191///
192/// # Arguments
193///
194/// * `log_dir` - Path to the directory to scan
195///
196/// # Returns
197///
198/// Returns a vector of directory entries representing log files.
199///
200/// # Errors
201///
202/// Returns an error if the directory cannot be read or entries are inaccessible.
203fn collect_run_log_entries(log_dir: &std::path::Path) -> Result<Vec<DirEntry>, Error> {
204    let mut entries = Vec::new();
205    for entry in fs::read_dir(log_dir)
206        .with_context(|| format!("can't read log directory {}", log_dir.display()))?
207    {
208        let entry = entry.context("can't read log directory entry")?;
209        if is_run_log_entry(&entry) {
210            entries.push(entry);
211        }
212    }
213
214    Ok(entries)
215}
216
217/// Determines whether a directory entry is a Cadmus log file.
218///
219/// Returns `true` if the filename starts with `cadmus-` and ends with `.json`.
220///
221/// # Arguments
222///
223/// * `entry` - Directory entry to check
224///
225/// # Returns
226///
227/// `true` if the entry is a log file, `false` otherwise.
228fn is_run_log_entry(entry: &DirEntry) -> bool {
229    let file_name = entry.file_name();
230    let file_name = file_name.to_string_lossy();
231    if !file_name.starts_with(LOG_FILE_PREFIX) {
232        return false;
233    }
234
235    file_name.ends_with(LOG_FILE_SUFFIX)
236}
237
238/// Initializes the logging system with JSON output and optional OpenTelemetry export.
239///
240/// This function sets up the complete logging infrastructure:
241/// - Creates the log directory if it doesn't exist
242/// - Cleans up old log files based on retention policy
243/// - Configures a rolling file appender with non-blocking I/O
244/// - Applies log level filtering from settings or environment
245/// - Sets up JSON formatting for structured logs
246/// - Initializes OpenTelemetry export if the `otel` feature is enabled
247///
248/// The function should only be called once at application startup.
249/// The logging system remains active until `shutdown_logging()` is called.
250///
251/// # Arguments
252///
253/// * `settings` - Logging configuration including level, directory, and retention
254///
255/// # Returns
256///
257/// Returns `Ok(())` on successful initialization.
258///
259/// # Errors
260///
261/// Returns an error if:
262/// - The current working directory cannot be determined
263/// - The log directory cannot be created
264/// - Log file cleanup fails
265/// - The rolling file appender cannot be initialized
266/// - The log filter configuration is invalid
267/// - The tracing subscriber cannot be initialized
268/// - OpenTelemetry initialization fails (when `otel` feature is enabled)
269///
270/// # Example
271///
272/// ```
273/// use cadmus_core::settings::LoggingSettings;
274/// use cadmus_core::logging::init_logging;
275///
276/// let settings = LoggingSettings {
277///     enabled: true,
278///     level: "debug".to_string(),
279///     max_files: 5,
280///     directory: "logs".into(),
281///     otlp_endpoint: Some("http://localhost:4318".to_string()),
282///     enable_kern_log: false,
283///     enable_dbus_log: false,
284/// };
285///
286/// init_logging(&settings)?;
287/// # Ok::<(), anyhow::Error>(())
288/// ```
289pub fn init_logging(settings: &LoggingSettings) -> Result<(), Error> {
290    if !settings.enabled {
291        return Ok(());
292    }
293
294    let current_working_dir =
295        std::env::current_dir().context("can't get current working directory")?;
296    let log_dir = current_working_dir.join(&settings.directory);
297    fs::create_dir_all(&log_dir)
298        .with_context(|| format!("can't create log directory {}", &log_dir.display()))?;
299
300    cleanup_run_logs(&log_dir, settings.max_files)?;
301
302    let appender = tracing_appender::rolling::Builder::new()
303        .rotation(tracing_appender::rolling::Rotation::NEVER)
304        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
305        .filename_suffix(LOG_FILE_SUFFIX)
306        .max_log_files(settings.max_files)
307        .build(&log_dir)
308        .context("can't initialize rolling log file appender")?;
309
310    let (non_blocking, guard) = tracing_appender::non_blocking(appender);
311    let _ = LOG_GUARD.set(Mutex::new(Some(guard)));
312    let _ = WRITER_INNER.set(ArcSwap::new(Arc::new(non_blocking)));
313
314    let swappable = SwappableWriter {
315        inner: WRITER_INNER.get().expect("WRITER_INNER just set"),
316    };
317
318    let filter = build_filter(settings)?;
319
320    let fmt_layer = tracing_subscriber::fmt::layer()
321        .json()
322        .with_ansi(false)
323        .with_writer(swappable)
324        .with_current_span(true);
325
326    #[cfg(feature = "otel")]
327    {
328        let subscriber = tracing_subscriber::registry()
329            .with(filter)
330            .with(telemetry::init_telemetry(settings, get_run_id())?)
331            .with(fmt_layer);
332
333        subscriber
334            .try_init()
335            .context("can't initialize tracing subscriber")?;
336    }
337
338    #[cfg(not(feature = "otel"))]
339    {
340        let subscriber = tracing_subscriber::registry().with(filter).with(fmt_layer);
341
342        subscriber
343            .try_init()
344            .context("can't initialize tracing subscriber")?;
345    }
346
347    eprintln!(
348        "Cadmus run started with ID: {} (version {})",
349        get_run_id(),
350        get_current_version()
351    );
352
353    #[cfg(feature = "test")]
354    if settings.enable_kern_log {
355        kern::spawn_kern_log_thread();
356    }
357
358    Ok(())
359}
360
361/// Gracefully shuts down the logging system and flushes buffered data.
362///
363/// This function ensures all buffered log data is written to disk and, if enabled,
364/// exported to OpenTelemetry endpoints before the application exits. It:
365/// - Flushes the file appender buffer (happens automatically via `LOG_GUARD` drop)
366/// - Shuts down OpenTelemetry providers (when `otel` feature is enabled)
367/// - Ensures no log data is lost on exit
368///
369/// This function should be called once at application shutdown.
370///
371/// # Example
372///
373/// ```no_run
374/// use cadmus_core::logging::{init_logging, shutdown_logging};
375/// use cadmus_core::settings::LoggingSettings;
376///
377/// // At application start
378/// let settings = LoggingSettings::default();
379/// init_logging(&settings)?;
380///
381/// // ... application runs ...
382///
383/// // At application exit
384/// shutdown_logging();
385/// # Ok::<(), anyhow::Error>(())
386/// ```
387pub fn shutdown_logging() {
388    if let Some(mutex) = LOG_GUARD.get() {
389        if let Ok(mut guard_opt) = mutex.lock() {
390            if let Some(guard) = guard_opt.take() {
391                let (tx, rx) = mpsc::channel();
392
393                thread::spawn(move || {
394                    drop(guard);
395                    let _ = tx.send(());
396                });
397
398                let _ = rx.recv_timeout(Duration::from_secs(5));
399                eprintln!("Logging shutdown complete.");
400            }
401        }
402    }
403
404    #[cfg(feature = "otel")]
405    telemetry::shutdown_telemetry();
406}
407
408/// Redirects log output to `dir`, flushing the current file first.
409///
410/// Used to keep logging alive across USB share when /mnt/onboard is unmounted.
411///
412/// This function creates a new rolling file appender in the specified directory and updates the
413/// logging system to use it. The old appender is dropped, which flushes any buffered data to disk
414/// after the new appender is in place to avoid log loss.
415pub fn redirect_log_to_dir(dir: &Path, settings: &LoggingSettings) -> Result<(), Error> {
416    let (Some(writer_swap), Some(guard_mutex)) = (WRITER_INNER.get(), LOG_GUARD.get()) else {
417        return Ok(());
418    };
419
420    fs::create_dir_all(dir)
421        .with_context(|| format!("can't create log directory {}", dir.display()))?;
422
423    let appender = tracing_appender::rolling::Builder::new()
424        .rotation(tracing_appender::rolling::Rotation::NEVER)
425        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
426        .filename_suffix(LOG_FILE_SUFFIX)
427        .max_log_files(settings.max_files)
428        .build(dir)
429        .context("can't build log appender for redirect")?;
430
431    let (non_blocking, new_guard) = tracing_appender::non_blocking(appender);
432
433    writer_swap.store(Arc::new(non_blocking));
434
435    let old_guard = {
436        let mut guard_opt = guard_mutex
437            .lock()
438            .map_err(|e| anyhow::anyhow!("failed to get lock for guard during redirect: {e}"))?;
439        let old = guard_opt.take();
440        *guard_opt = Some(new_guard);
441        old
442    };
443
444    drop(old_guard);
445
446    Ok(())
447}
448
449/// Builds an `EnvFilter` from settings or environment variables.
450///
451/// The function checks for the `RUST_LOG` environment variable first, which
452/// overrides the `level` setting. If `RUST_LOG` is not set, it uses the
453/// level from `LoggingSettings` (defaulting to "info" if empty).
454///
455/// # Arguments
456///
457/// * `settings` - Logging settings containing the default level
458///
459/// # Returns
460///
461/// Returns a configured `EnvFilter` instance.
462///
463/// # Errors
464///
465/// Returns an error if the log level string cannot be parsed.
466///
467/// # Example Filter Syntax
468///
469/// ```bash
470/// # Global level
471/// RUST_LOG=debug
472///
473/// # Per-module levels
474/// RUST_LOG=cadmus::view=trace,info
475///
476/// # Complex filtering
477/// RUST_LOG=warn,cadmus::document=debug,cadmus::sync=trace
478/// ```
479fn build_filter(settings: &LoggingSettings) -> Result<EnvFilter, Error> {
480    if let Ok(filter) = EnvFilter::try_from_default_env() {
481        return Ok(filter);
482    }
483
484    let level = settings.level.trim();
485    let level = if level.is_empty() { "info" } else { level };
486
487    EnvFilter::builder()
488        .parse(level)
489        .context("invalid logging level")
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495    use std::sync::OnceLock;
496    use tempfile::TempDir;
497
498    /// Guard that ensures `init_logging` is called at most once per test binary.
499    ///
500    /// `init_logging` registers a global tracing subscriber via `try_init()`, which
501    /// panics (or returns an error) on a second call within the same process. All
502    /// tests that need the logging statics populated must go through this helper.
503    static LOGGING_INIT: OnceLock<TempDir> = OnceLock::new();
504
505    /// Initialise logging once for the whole test binary and return the log dir.
506    ///
507    /// Subsequent calls return the already-initialised directory, so the test can
508    /// be run together with other tests without conflicts.
509    fn ensure_logging_init() -> &'static std::path::Path {
510        LOGGING_INIT
511            .get_or_init(|| {
512                let dir = TempDir::new().expect("failed to create temp dir for logging init");
513                let settings = LoggingSettings {
514                    enabled: true,
515                    level: "info".to_string(),
516                    max_files: 5,
517                    directory: dir.path().to_path_buf(),
518                    otlp_endpoint: None,
519                    enable_kern_log: false,
520                    enable_dbus_log: false,
521                };
522                init_logging(&settings).expect("failed to initialize logging for tests");
523                dir
524            })
525            .path()
526    }
527
528    fn create_log_file(dir: &std::path::Path, index: usize) -> Result<(), Error> {
529        let file_name = format!("{}{:04}.{}", LOG_FILE_PREFIX, index, LOG_FILE_SUFFIX);
530        fs::write(dir.join(file_name), b"{}")?;
531        Ok(())
532    }
533
534    fn collect_log_file_names(dir: &std::path::Path) -> Result<Vec<String>, Error> {
535        let mut entries = collect_run_log_entries(dir)?;
536        entries.sort_by_key(|entry| entry.file_name());
537        Ok(entries
538            .into_iter()
539            .map(|entry| entry.file_name().to_string_lossy().into_owned())
540            .collect())
541    }
542
543    #[test]
544    fn test_cleanup_run_logs_removes_oldest_entries() -> Result<(), Error> {
545        let temp_dir = TempDir::new()?;
546        for index in 1..=5 {
547            create_log_file(temp_dir.path(), index)?;
548        }
549
550        cleanup_run_logs(temp_dir.path(), 3)?;
551
552        let remaining = collect_log_file_names(temp_dir.path())?;
553        assert_eq!(remaining.len(), 3);
554        assert_eq!(
555            remaining,
556            vec![
557                format!("{}0003.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
558                format!("{}0004.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
559                format!("{}0005.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
560            ]
561        );
562
563        Ok(())
564    }
565
566    #[test]
567    fn test_cleanup_run_logs_max_files_zero_keeps_all() -> Result<(), Error> {
568        let temp_dir = TempDir::new()?;
569        for index in 1..=3 {
570            create_log_file(temp_dir.path(), index)?;
571        }
572
573        cleanup_run_logs(temp_dir.path(), 0)?;
574
575        let remaining = collect_log_file_names(temp_dir.path())?;
576        assert_eq!(remaining.len(), 3);
577
578        Ok(())
579    }
580
581    /// Verify that `redirect_log_to_dir` creates a log file in the new target
582    /// directory and that the file name matches the expected `cadmus-<run_id>.json`
583    /// pattern.
584    ///
585    /// # Why this test needs `init_logging`
586    ///
587    /// `redirect_log_to_dir` is a no-op when the global `WRITER_INNER` / `LOG_GUARD`
588    /// statics are unset. Those statics are populated only by `init_logging`, so we
589    /// must call it (once, via `ensure_logging_init`) before exercising the redirect
590    /// path. The `ensure_logging_init` helper uses a `OnceLock` so that the global
591    /// tracing subscriber is registered at most once per test binary, avoiding the
592    /// "subscriber already set" error that `try_init()` would otherwise produce.
593    #[test]
594    fn test_redirect_log_to_dir_creates_log_file_in_new_dir() -> Result<(), Error> {
595        ensure_logging_init();
596
597        let redirect_dir = TempDir::new()?;
598
599        let settings = LoggingSettings {
600            enabled: true,
601            level: "info".to_string(),
602            max_files: 5,
603            directory: redirect_dir.path().to_path_buf(),
604            otlp_endpoint: None,
605            enable_kern_log: false,
606            enable_dbus_log: false,
607        };
608
609        redirect_log_to_dir(redirect_dir.path(), &settings)?;
610
611        // After redirect a non-blocking appender is set up for `redirect_dir`.
612        // The underlying file is created lazily on the first write; emit one log
613        // event so the file is flushed to disk before we inspect the directory.
614        tracing::info!("test redirect log event");
615
616        let expected_file_name = format!("{}{}.{}", LOG_FILE_PREFIX, get_run_id(), LOG_FILE_SUFFIX);
617
618        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
619        #[allow(unused_assignments)]
620        let mut log_files = Vec::new();
621
622        loop {
623            log_files = collect_log_file_names(redirect_dir.path())?;
624
625            if !log_files.is_empty() && log_files.iter().any(|name| name == &expected_file_name) {
626                break;
627            }
628
629            if std::time::Instant::now() >= deadline {
630                break;
631            }
632
633            std::thread::sleep(std::time::Duration::from_millis(50));
634        }
635
636        assert!(
637            !log_files.is_empty(),
638            "expected at least one log file in the redirect directory, but found none"
639        );
640
641        assert!(
642            log_files.iter().any(|name| name == &expected_file_name),
643            "expected a log file named '{}' in redirect dir, but found: {:?}",
644            expected_file_name,
645            log_files,
646        );
647
648        Ok(())
649    }
650}