cadmus_core/
logging.rs

1//! Structured logging infrastructure with JSON output and OpenTelemetry integration.
2//!
3//! This module provides logging functionality for Cadmus, including:
4//! - JSON-structured logs written to rotating files
5//! - Configurable log levels and filtering  
6//! - Automatic log file cleanup based on retention policies
7//! - Optional OpenTelemetry export (when `otel` feature is enabled)
8//! - Unique run ID for correlating logs across a session
9//!
10//! # Architecture
11//!
12//! The logging system is built on the `tracing` crate ecosystem:
13//! - `tracing_subscriber` for composable logging layers
14//! - `tracing_appender` for non-blocking file I/O
15//! - JSON formatting for structured, machine-readable logs
16//! - `EnvFilter` for flexible log level control
17//!
18//! Each application run generates a unique Run ID (UUID v7) that appears in:
19//! - The log filename: `cadmus-<run_id>.json`
20//! - OpenTelemetry resource attributes
21//! - All log entries for correlation
22//!
23//! # Log File Management
24//!
25//! Log files are automatically managed:
26//! - Files are named with the run ID: `cadmus-<run_id>.json`
27//! - Older files are deleted when `max_files` limit is exceeded
28//! - Cleanup happens at initialization, keeping only the most recent files
29//!
30//! # Configuration
31//!
32//! Logging is configured via `LoggingSettings`:
33//!
34//! ```toml
35//! [logging]
36//! enabled = true
37//! level = "info"
38//! max-files = 3
39//! directory = "logs"
40//! otlp-endpoint = "http://localhost:4318"  # Optional
41//! ```
42//!
43//! The log level can be overridden with the `RUST_LOG` environment variable:
44//!
45//! ```bash
46//! RUST_LOG=debug ./cadmus
47//! RUST_LOG=cadmus::view=trace,info ./cadmus
48//! ```
49//!
50//! # Example Usage
51//!
52//! ```rust
53//! use cadmus_core::settings::LoggingSettings;
54//! use cadmus_core::logging::{init_logging, shutdown_logging, get_run_id};
55//!
56//! let settings = LoggingSettings {
57//!     enabled: true,
58//!     level: "info".to_string(),
59//!     max_files: 3,
60//!     directory: "logs".into(),
61//!     otlp_endpoint: None,
62//! };
63//!
64//! // Initialize at application startup
65//! init_logging(&settings)?;
66//! eprintln!("Started with run ID: {}", get_run_id());
67//!
68//! // Use tracing macros throughout the application
69//! tracing::info!("Application started");
70//!
71//! // Shutdown at application exit (flushes buffers)
72//! shutdown_logging();
73//! # Ok::<(), anyhow::Error>(())
74//! ```
75
76use crate::settings::LoggingSettings;
77#[cfg(feature = "otel")]
78use crate::telemetry;
79use anyhow::{Context, Error};
80use std::fs;
81use std::fs::DirEntry;
82use std::sync::mpsc;
83use std::sync::{Mutex, OnceLock};
84use std::thread;
85use std::time::Duration;
86use tracing_appender::non_blocking::WorkerGuard;
87use tracing_subscriber::prelude::*;
88use tracing_subscriber::EnvFilter;
89use uuid::Uuid;
90
91const GIT_VERSION: &str = env!("GIT_VERSION");
92const LOG_FILE_PREFIX: &str = "cadmus-";
93const LOG_FILE_SUFFIX: &str = "json";
94
95static LOG_GUARD: OnceLock<Mutex<Option<WorkerGuard>>> = OnceLock::new();
96static RUN_ID: OnceLock<String> = OnceLock::new();
97
98/// Returns the unique run ID for this application session.
99///
100/// The run ID is a UUID v7 generated at first access and remains constant
101/// for the lifetime of the process. It is used to:
102/// - Name the log file: `cadmus-<run_id>.json`
103/// - Tag OpenTelemetry telemetry exports
104/// - Correlate all operations within a single run
105///
106/// # Returns
107///
108/// A string slice containing the run ID, valid for the program's lifetime.
109///
110/// # Example
111///
112/// ```
113/// use cadmus_core::logging::get_run_id;
114///
115/// let run_id = get_run_id();
116/// eprintln!("Application run ID: {}", run_id);
117/// assert_eq!(get_run_id(), run_id); // Consistent across calls
118/// ```
119pub fn get_run_id() -> &'static str {
120    RUN_ID.get_or_init(|| Uuid::now_v7().to_string()).as_str()
121}
122
123/// Removes old log files to maintain the configured retention limit.
124///
125/// This function scans the log directory for files matching the pattern
126/// `cadmus-*.json` and deletes the oldest files if the count exceeds `max_files`.
127///
128/// Note: this relies on the run ID being a UUID v7 (time-ordered). Filenames are
129/// `cadmus-<run_id>.json` where `<run_id>` is generated with `Uuid::now_v7()`,
130/// so lexicographic sorting of the filenames corresponds to chronological order.
131/// Sorting by file name therefore yields oldest-first ordering for removal.
132///
133/// # Arguments
134///
135/// * `log_dir` - Path to the directory containing log files
136/// * `max_files` - Maximum number of log files to retain (0 = keep all)
137///
138/// # Returns
139///
140/// Returns `Ok(())` on success.
141///
142/// # Errors
143///
144/// Returns an error if:
145/// - The log directory cannot be read
146/// - Individual directory entries cannot be read
147/// - Old log files cannot be deleted
148fn cleanup_run_logs(log_dir: &std::path::Path, max_files: usize) -> Result<(), Error> {
149    if max_files == 0 {
150        return Ok(());
151    }
152
153    let mut entries = collect_run_log_entries(log_dir)?;
154    if entries.len() <= max_files {
155        return Ok(());
156    }
157
158    entries.sort_by_key(|entry| entry.file_name());
159    let remove_count = entries.len().saturating_sub(max_files);
160    for entry in entries.into_iter().take(remove_count) {
161        fs::remove_file(entry.path())
162            .with_context(|| format!("can't remove old log file {}", entry.path().display()))?;
163    }
164
165    Ok(())
166}
167
168/// Collects all Cadmus log file entries from the specified directory.
169///
170/// Only files matching the pattern `cadmus-*.json` are collected.
171///
172/// # Arguments
173///
174/// * `log_dir` - Path to the directory to scan
175///
176/// # Returns
177///
178/// Returns a vector of directory entries representing log files.
179///
180/// # Errors
181///
182/// Returns an error if the directory cannot be read or entries are inaccessible.
183fn collect_run_log_entries(log_dir: &std::path::Path) -> Result<Vec<DirEntry>, Error> {
184    let mut entries = Vec::new();
185    for entry in fs::read_dir(log_dir)
186        .with_context(|| format!("can't read log directory {}", log_dir.display()))?
187    {
188        let entry = entry.context("can't read log directory entry")?;
189        if is_run_log_entry(&entry) {
190            entries.push(entry);
191        }
192    }
193
194    Ok(entries)
195}
196
197/// Determines whether a directory entry is a Cadmus log file.
198///
199/// Returns `true` if the filename starts with `cadmus-` and ends with `.json`.
200///
201/// # Arguments
202///
203/// * `entry` - Directory entry to check
204///
205/// # Returns
206///
207/// `true` if the entry is a log file, `false` otherwise.
208fn is_run_log_entry(entry: &DirEntry) -> bool {
209    let file_name = entry.file_name();
210    let file_name = file_name.to_string_lossy();
211    if !file_name.starts_with(LOG_FILE_PREFIX) {
212        return false;
213    }
214
215    file_name.ends_with(LOG_FILE_SUFFIX)
216}
217
218/// Initializes the logging system with JSON output and optional OpenTelemetry export.
219///
220/// This function sets up the complete logging infrastructure:
221/// - Creates the log directory if it doesn't exist
222/// - Cleans up old log files based on retention policy
223/// - Configures a rolling file appender with non-blocking I/O
224/// - Applies log level filtering from settings or environment
225/// - Sets up JSON formatting for structured logs
226/// - Initializes OpenTelemetry export if the `otel` feature is enabled
227///
228/// The function should only be called once at application startup.
229/// The logging system remains active until `shutdown_logging()` is called.
230///
231/// # Arguments
232///
233/// * `settings` - Logging configuration including level, directory, and retention
234///
235/// # Returns
236///
237/// Returns `Ok(())` on successful initialization.
238///
239/// # Errors
240///
241/// Returns an error if:
242/// - The current working directory cannot be determined
243/// - The log directory cannot be created
244/// - Log file cleanup fails
245/// - The rolling file appender cannot be initialized
246/// - The log filter configuration is invalid
247/// - The tracing subscriber cannot be initialized
248/// - OpenTelemetry initialization fails (when `otel` feature is enabled)
249///
250/// # Example
251///
252/// ```
253/// use cadmus_core::settings::LoggingSettings;
254/// use cadmus_core::logging::init_logging;
255///
256/// let settings = LoggingSettings {
257///     enabled: true,
258///     level: "debug".to_string(),
259///     max_files: 5,
260///     directory: "logs".into(),
261///     otlp_endpoint: Some("http://localhost:4318".to_string()),
262/// };
263///
264/// init_logging(&settings)?;
265/// # Ok::<(), anyhow::Error>(())
266/// ```
267pub fn init_logging(settings: &LoggingSettings) -> Result<(), Error> {
268    if !settings.enabled {
269        return Ok(());
270    }
271
272    let current_working_dir =
273        std::env::current_dir().context("can't get current working directory")?;
274    let log_dir = current_working_dir.join(&settings.directory);
275    fs::create_dir_all(&log_dir)
276        .with_context(|| format!("can't create log directory {}", &log_dir.display()))?;
277
278    cleanup_run_logs(&log_dir, settings.max_files)?;
279
280    let appender = tracing_appender::rolling::Builder::new()
281        .rotation(tracing_appender::rolling::Rotation::NEVER)
282        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
283        .filename_suffix(LOG_FILE_SUFFIX)
284        .max_log_files(settings.max_files)
285        .build(&log_dir)
286        .context("can't initialize rolling log file appender")?;
287
288    let (non_blocking, guard) = tracing_appender::non_blocking(appender);
289    let _ = LOG_GUARD.set(Mutex::new(Some(guard)));
290
291    let filter = build_filter(settings)?;
292
293    let fmt_layer = tracing_subscriber::fmt::layer()
294        .json()
295        .with_ansi(false)
296        .with_writer(non_blocking)
297        .with_current_span(true);
298
299    #[cfg(feature = "otel")]
300    {
301        let subscriber = tracing_subscriber::registry()
302            .with(filter)
303            .with(telemetry::init_telemetry(settings, get_run_id())?)
304            .with(fmt_layer);
305
306        subscriber
307            .try_init()
308            .context("can't initialize tracing subscriber")?;
309    }
310
311    #[cfg(not(feature = "otel"))]
312    {
313        let subscriber = tracing_subscriber::registry().with(filter).with(fmt_layer);
314
315        subscriber
316            .try_init()
317            .context("can't initialize tracing subscriber")?;
318    }
319
320    eprintln!(
321        "Cadmus run started with ID: {} (version {})",
322        get_run_id(),
323        GIT_VERSION
324    );
325
326    Ok(())
327}
328
329/// Gracefully shuts down the logging system and flushes buffered data.
330///
331/// This function ensures all buffered log data is written to disk and, if enabled,
332/// exported to OpenTelemetry endpoints before the application exits. It:
333/// - Flushes the file appender buffer (happens automatically via `LOG_GUARD` drop)
334/// - Shuts down OpenTelemetry providers (when `otel` feature is enabled)
335/// - Ensures no log data is lost on exit
336///
337/// This function should be called once at application shutdown.
338///
339/// # Example
340///
341/// ```no_run
342/// use cadmus_core::logging::{init_logging, shutdown_logging};
343/// use cadmus_core::settings::LoggingSettings;
344///
345/// // At application start
346/// let settings = LoggingSettings::default();
347/// init_logging(&settings)?;
348///
349/// // ... application runs ...
350///
351/// // At application exit
352/// shutdown_logging();
353/// # Ok::<(), anyhow::Error>(())
354/// ```
355pub fn shutdown_logging() {
356    if let Some(mutex) = LOG_GUARD.get() {
357        if let Ok(mut guard_opt) = mutex.lock() {
358            if let Some(guard) = guard_opt.take() {
359                let (tx, rx) = mpsc::channel();
360
361                thread::spawn(move || {
362                    drop(guard);
363                    let _ = tx.send(());
364                });
365
366                let _ = rx.recv_timeout(Duration::from_secs(5));
367                eprintln!("Logging shutdown complete.");
368            }
369        }
370    }
371
372    #[cfg(feature = "otel")]
373    telemetry::shutdown_telemetry();
374}
375
376/// Builds an `EnvFilter` from settings or environment variables.
377///
378/// The function checks for the `RUST_LOG` environment variable first, which
379/// overrides the `level` setting. If `RUST_LOG` is not set, it uses the
380/// level from `LoggingSettings` (defaulting to "info" if empty).
381///
382/// # Arguments
383///
384/// * `settings` - Logging settings containing the default level
385///
386/// # Returns
387///
388/// Returns a configured `EnvFilter` instance.
389///
390/// # Errors
391///
392/// Returns an error if the log level string cannot be parsed.
393///
394/// # Example Filter Syntax
395///
396/// ```bash
397/// # Global level
398/// RUST_LOG=debug
399///
400/// # Per-module levels
401/// RUST_LOG=cadmus::view=trace,info
402///
403/// # Complex filtering
404/// RUST_LOG=warn,cadmus::document=debug,cadmus::sync=trace
405/// ```
406fn build_filter(settings: &LoggingSettings) -> Result<EnvFilter, Error> {
407    if let Ok(filter) = EnvFilter::try_from_default_env() {
408        return Ok(filter);
409    }
410
411    let level = settings.level.trim();
412    let level = if level.is_empty() { "info" } else { level };
413
414    EnvFilter::builder()
415        .parse(level)
416        .context("invalid logging level")
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    use tempfile::TempDir;
423
424    fn create_log_file(dir: &std::path::Path, index: usize) -> Result<(), Error> {
425        let file_name = format!("{}{:04}.{}", LOG_FILE_PREFIX, index, LOG_FILE_SUFFIX);
426        fs::write(dir.join(file_name), b"{}")?;
427        Ok(())
428    }
429
430    fn collect_log_file_names(dir: &std::path::Path) -> Result<Vec<String>, Error> {
431        let mut entries = collect_run_log_entries(dir)?;
432        entries.sort_by_key(|entry| entry.file_name());
433        Ok(entries
434            .into_iter()
435            .map(|entry| entry.file_name().to_string_lossy().into_owned())
436            .collect())
437    }
438
439    #[test]
440    fn test_cleanup_run_logs_removes_oldest_entries() -> Result<(), Error> {
441        let temp_dir = TempDir::new()?;
442        for index in 1..=5 {
443            create_log_file(temp_dir.path(), index)?;
444        }
445
446        cleanup_run_logs(temp_dir.path(), 3)?;
447
448        let remaining = collect_log_file_names(temp_dir.path())?;
449        assert_eq!(remaining.len(), 3);
450        assert_eq!(
451            remaining,
452            vec![
453                format!("{}0003.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
454                format!("{}0004.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
455                format!("{}0005.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
456            ]
457        );
458
459        Ok(())
460    }
461
462    #[test]
463    fn test_cleanup_run_logs_max_files_zero_keeps_all() -> Result<(), Error> {
464        let temp_dir = TempDir::new()?;
465        for index in 1..=3 {
466            create_log_file(temp_dir.path(), index)?;
467        }
468
469        cleanup_run_logs(temp_dir.path(), 0)?;
470
471        let remaining = collect_log_file_names(temp_dir.path())?;
472        assert_eq!(remaining.len(), 3);
473
474        Ok(())
475    }
476}