vault_audit_tools/commands/
system_overview.rs

1//! System-wide audit log overview.
2//!
3//! Provides high-level statistics and insights about Vault usage
4//! across the entire audit log. Supports analyzing multiple log files
5//! (compressed or uncompressed) for long-term trend analysis.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Single file (plain or compressed)
11//! vault-audit system-overview audit.log
12//! vault-audit system-overview audit.log.gz
13//!
14//! # Multiple files for week-long analysis
15//! vault-audit system-overview day1.log day2.log day3.log
16//!
17//! # Using shell globbing with compressed files
18//! vault-audit system-overview logs/vault_audit.2025-10-*.log.gz
19//! ```
20//!
21//! **Compressed File Support**: Automatically detects and decompresses `.gz` (gzip)
22//! and `.zst` (zstandard) files with streaming processing - no temp files needed.
23//!
24//! # Output
25//!
26//! Displays comprehensive statistics:
27//! - Total entries processed
28//! - Unique entities
29//! - Unique paths accessed
30//! - Operation breakdown (read, write, list, delete)
31//! - Top paths by access count
32//! - Mount point usage
33//! - Authentication method breakdown
34//! - Time range covered
35//! - Error rate
36//!
37//! Useful for:
38//! - Understanding overall Vault usage
39//! - Capacity planning
40//! - Identifying hotspots
41//! - Security audits
42
43use crate::audit::types::AuditEntry;
44use crate::utils::progress::ProgressBar;
45use crate::utils::reader::open_file;
46use anyhow::Result;
47use std::collections::{HashMap, HashSet};
48use std::io::{BufRead, BufReader};
49
50/// Path access statistics
51#[derive(Debug)]
52struct PathData {
53    count: usize,
54    operations: HashMap<String, usize>,
55    entities: HashSet<String>,
56}
57
58impl PathData {
59    fn new() -> Self {
60        Self {
61            count: 0,
62            operations: HashMap::new(),
63            entities: HashSet::new(),
64        }
65    }
66}
67
68fn format_number(n: usize) -> String {
69    let s = n.to_string();
70    let mut result = String::new();
71    for (i, c) in s.chars().rev().enumerate() {
72        if i > 0 && i % 3 == 0 {
73            result.push(',');
74        }
75        result.push(c);
76    }
77    result.chars().rev().collect()
78}
79
80pub fn run(log_files: &[String], top: usize, min_operations: usize) -> Result<()> {
81    let mut path_operations: HashMap<String, PathData> = HashMap::new();
82    let mut operation_types: HashMap<String, usize> = HashMap::new();
83    let mut path_prefixes: HashMap<String, usize> = HashMap::new();
84    let mut entity_paths: HashMap<String, HashMap<String, usize>> = HashMap::new();
85    let mut entity_names: HashMap<String, String> = HashMap::new();
86    let mut total_lines = 0;
87
88    // Process each log file sequentially
89    for (file_idx, log_file) in log_files.iter().enumerate() {
90        eprintln!(
91            "[{}/{}] Processing: {}",
92            file_idx + 1,
93            log_files.len(),
94            log_file
95        );
96
97        // Get file size for progress tracking
98        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
99        let mut progress = if let Some(size) = file_size {
100            ProgressBar::new(size, "Processing")
101        } else {
102            ProgressBar::new_spinner("Processing")
103        };
104
105        let file = open_file(log_file)?;
106        let reader = BufReader::new(file);
107
108        let mut file_lines = 0;
109        let mut bytes_read = 0;
110
111        for line in reader.lines() {
112            file_lines += 1;
113            total_lines += 1;
114            let line = line?;
115            bytes_read += line.len() + 1; // +1 for newline
116
117            // Update progress every 10k lines for smooth animation
118            if file_lines % 10_000 == 0 {
119                if let Some(size) = file_size {
120                    progress.update(bytes_read.min(size)); // Cap at file size
121                } else {
122                    progress.update(file_lines);
123                }
124            }
125
126            let entry: AuditEntry = match serde_json::from_str(&line) {
127                Ok(e) => e,
128                Err(_) => continue,
129            };
130
131            let request = match &entry.request {
132                Some(r) => r,
133                None => continue,
134            };
135
136            let path = match &request.path {
137                Some(p) => p.as_str(),
138                None => continue,
139            };
140
141            let operation = match &request.operation {
142                Some(o) => o.as_str(),
143                None => continue,
144            };
145
146            let entity_id = entry
147                .auth
148                .as_ref()
149                .and_then(|a| a.entity_id.as_deref())
150                .unwrap_or("no-entity");
151
152            let display_name = entry
153                .auth
154                .as_ref()
155                .and_then(|a| a.display_name.as_deref())
156                .unwrap_or("N/A");
157
158            if path.is_empty() || operation.is_empty() {
159                continue;
160            }
161
162            // Track by full path
163            let path_data = path_operations
164                .entry(path.to_string())
165                .or_insert_with(PathData::new);
166            path_data.count += 1;
167            *path_data
168                .operations
169                .entry(operation.to_string())
170                .or_insert(0) += 1;
171            // Track all entities including "no-entity" to match Python behavior
172            path_data.entities.insert(entity_id.to_string());
173
174            // Track by operation type
175            *operation_types.entry(operation.to_string()).or_insert(0) += 1;
176
177            // Track by path prefix
178            let parts: Vec<&str> = path.trim_matches('/').split('/').collect();
179            let prefix = if parts.len() >= 2 {
180                format!("{}/{}", parts[0], parts[1])
181            } else if !parts.is_empty() {
182                parts[0].to_string()
183            } else {
184                "root".to_string()
185            };
186            *path_prefixes.entry(prefix).or_insert(0) += 1;
187
188            // Track entity usage for all entities (including "no-entity")
189            let entity_map = entity_paths.entry(entity_id.to_string()).or_default();
190            *entity_map.entry(path.to_string()).or_insert(0) += 1;
191            entity_names
192                .entry(entity_id.to_string())
193                .or_insert_with(|| display_name.to_string());
194        }
195
196        // Ensure 100% progress for this file
197        if let Some(size) = file_size {
198            progress.update(size);
199        }
200
201        progress.finish_with_message(&format!(
202            "Processed {} lines from this file",
203            format_number(file_lines)
204        ));
205    }
206
207    eprintln!("\nTotal: Processed {} lines", format_number(total_lines));
208
209    let total_operations: usize = operation_types.values().sum();
210
211    // Print results
212    println!("\n{}", "=".repeat(100));
213    println!("High-Volume Vault Operations Analysis");
214    println!("{}", "=".repeat(100));
215
216    // 1. Operation Types Summary
217    println!("\n1. Operation Types (Overall)");
218    println!("{}", "-".repeat(100));
219    println!("{:<20} {:>15} {:>12}", "Operation", "Count", "Percentage");
220    println!("{}", "-".repeat(100));
221
222    let mut sorted_ops: Vec<_> = operation_types.iter().collect();
223    sorted_ops.sort_by(|a, b| b.1.cmp(a.1));
224
225    for (op, count) in sorted_ops {
226        let pct = if total_operations > 0 {
227            (*count as f64 / total_operations as f64) * 100.0
228        } else {
229            0.0
230        };
231        println!("{:<20} {:>15} {:>11.2}%", op, format_number(*count), pct);
232    }
233
234    println!("{}", "-".repeat(100));
235    println!(
236        "{:<20} {:>15} {:>11.2}%",
237        "TOTAL",
238        format_number(total_operations),
239        100.0
240    );
241
242    // 2. Top Path Prefixes
243    println!("\n2. Top Path Prefixes (First 2 components)");
244    println!("{}", "-".repeat(100));
245    println!(
246        "{:<40} {:>15} {:>12}",
247        "Path Prefix", "Operations", "Percentage"
248    );
249    println!("{}", "-".repeat(100));
250
251    let mut sorted_prefixes: Vec<_> = path_prefixes.iter().collect();
252    sorted_prefixes.sort_by(|a, b| b.1.cmp(a.1));
253
254    for (prefix, count) in sorted_prefixes.iter().take(top) {
255        let pct = if total_operations > 0 {
256            (**count as f64 / total_operations as f64) * 100.0
257        } else {
258            0.0
259        };
260        println!(
261            "{:<40} {:>15} {:>11.2}%",
262            prefix,
263            format_number(**count),
264            pct
265        );
266    }
267
268    // 3. Top Individual Paths
269    println!("\n3. Top {} Individual Paths (Highest Volume)", top);
270    println!("{}", "-".repeat(100));
271    println!(
272        "{:<60} {:>10} {:>10} {:>15}",
273        "Path", "Ops", "Entities", "Top Op"
274    );
275    println!("{}", "-".repeat(100));
276
277    let mut sorted_paths: Vec<_> = path_operations.iter().collect();
278    sorted_paths.sort_by(|a, b| b.1.count.cmp(&a.1.count));
279
280    for (path, data) in sorted_paths.iter().take(top) {
281        if data.count < min_operations {
282            break;
283        }
284        let top_op = data
285            .operations
286            .iter()
287            .max_by_key(|x| x.1)
288            .map(|x| x.0.as_str())
289            .unwrap_or("N/A");
290        let path_display = if path.len() > 60 {
291            format!("{}...", &path[..58])
292        } else {
293            path.to_string()
294        };
295        println!(
296            "{:<60} {:>10} {:>10} {:>15}",
297            path_display,
298            format_number(data.count),
299            format_number(data.entities.len()),
300            top_op
301        );
302    }
303
304    // 4. Top Entities by Total Operations
305    println!("\n4. Top {} Entities by Total Operations", top);
306    println!("{}", "-".repeat(100));
307    println!(
308        "{:<50} {:<38} {:>10}",
309        "Display Name", "Entity ID", "Total Ops"
310    );
311    println!("{}", "-".repeat(100));
312
313    let mut entity_totals: HashMap<String, usize> = HashMap::new();
314    for (entity_id, paths) in &entity_paths {
315        let total: usize = paths.values().sum();
316        entity_totals.insert(entity_id.clone(), total);
317    }
318
319    let mut sorted_entities: Vec<_> = entity_totals.iter().collect();
320    sorted_entities.sort_by(|a, b| b.1.cmp(a.1));
321
322    for (entity_id, total) in sorted_entities.iter().take(top) {
323        let name = entity_names
324            .get(*entity_id)
325            .map(|s| s.as_str())
326            .unwrap_or("N/A");
327        let name_display = if name.len() > 48 { &name[..48] } else { name };
328        let entity_short = if entity_id.len() > 36 {
329            &entity_id[..36]
330        } else {
331            entity_id
332        };
333        println!(
334            "{:<50} {:<38} {:>10}",
335            name_display,
336            entity_short,
337            format_number(**total)
338        );
339    }
340
341    // 5. Potential Stress Points
342    println!("\n5. Potential System Stress Points");
343    println!("{}", "-".repeat(100));
344
345    #[derive(Debug)]
346    struct StressPoint {
347        path: String,
348        entity_name: String,
349        operations: usize,
350    }
351
352    let mut stress_points = Vec::new();
353
354    for (path, data) in &path_operations {
355        if data.count >= min_operations {
356            for entity_id in &data.entities {
357                if let Some(entity_ops_map) = entity_paths.get(entity_id) {
358                    if let Some(&entity_ops) = entity_ops_map.get(path) {
359                        if entity_ops >= min_operations {
360                            stress_points.push(StressPoint {
361                                path: path.clone(),
362                                entity_name: entity_names
363                                    .get(entity_id)
364                                    .cloned()
365                                    .unwrap_or_else(|| "N/A".to_string()),
366                                operations: entity_ops,
367                            });
368                        }
369                    }
370                }
371            }
372        }
373    }
374
375    stress_points.sort_by(|a, b| b.operations.cmp(&a.operations));
376
377    println!("{:<40} {:<40} {:>10}", "Entity", "Path", "Ops");
378    println!("{}", "-".repeat(100));
379
380    for sp in stress_points.iter().take(top) {
381        let entity_display = if sp.entity_name.len() > 38 {
382            &sp.entity_name[..38]
383        } else {
384            &sp.entity_name
385        };
386        let path_display = if sp.path.len() > 38 {
387            &sp.path[..38]
388        } else {
389            &sp.path
390        };
391        println!(
392            "{:<40} {:<40} {:>10}",
393            entity_display,
394            path_display,
395            format_number(sp.operations)
396        );
397    }
398
399    println!("{}", "=".repeat(100));
400    println!("\nTotal Lines Processed: {}", format_number(total_lines));
401    println!("Total Operations: {}", format_number(total_operations));
402    println!("{}", "=".repeat(100));
403
404    Ok(())
405}