vault_audit_tools/commands/
entity_timeline.rs

1//! Entity timeline visualization command.
2//!
3//! ⚠️ **DEPRECATED**: Use `entity-analysis timeline` instead.
4//!
5//! ```bash
6//! # Old (deprecated):
7//! vault-audit entity-timeline logs/*.log --entity-id abc-123-def
8//!
9//! # New (recommended):
10//! vault-audit entity-analysis timeline --entity-id abc-123-def logs/*.log
11//! ```
12//!
13//! See [`entity_analysis`](crate::commands::entity_analysis) for the unified command.
14//!
15//! ---
16//!
17//! Generates a detailed timeline of all operations performed by a specific entity,
18//! useful for understanding entity behavior and troubleshooting issues.
19//! Supports multi-file analysis to track entities across multiple days.
20//!
21//! # Usage
22//!
23//! ```bash
24//! # Single file
25//! vault-audit entity-timeline audit.log --entity-id abc-123-def
26//!
27//! # Multi-day timeline
28//! vault-audit entity-timeline day1.log day2.log day3.log --entity-id abc-123-def
29//! ```
30//!
31//! # Output
32//!
33//! Displays a chronological view of the entity's activity:
34//! - Timestamp
35//! - Operation type (read, write, list, etc.)
36//! - Path accessed
37//! - Response status
38//!
39//! Also provides:
40//! - Activity summary (operations by type)
41//! - Time-based patterns (hourly distribution)
42//! - Mount point usage
43//! - First and last seen timestamps
44
45use crate::audit::types::AuditEntry;
46use crate::utils::format::format_number;
47use crate::utils::progress::ProgressBar;
48use crate::utils::reader::open_file;
49use anyhow::Result;
50use chrono::{DateTime, Timelike, Utc};
51use std::collections::HashMap;
52use std::io::{BufRead, BufReader};
53
54#[derive(Clone)]
55#[allow(dead_code)]
56struct Operation {
57    timestamp: DateTime<Utc>,
58    path: String,
59    op: String,
60}
61
62pub fn run(log_files: &[String], entity_id: &str, display_name: Option<&String>) -> Result<()> {
63    println!("Analyzing timeline for entity: {}", entity_id);
64    if let Some(name) = display_name {
65        println!("Display name: {}", name);
66    }
67    println!();
68
69    let mut operations_by_hour: HashMap<String, HashMap<String, usize>> = HashMap::new();
70    let mut operations_by_type: HashMap<String, usize> = HashMap::new();
71    let mut paths_accessed: HashMap<String, usize> = HashMap::new();
72    let mut operations_timeline: Vec<Operation> = Vec::new();
73    let mut total_lines = 0;
74    let mut entity_operations = 0;
75
76    // Process each log file sequentially
77    for (file_idx, log_file) in log_files.iter().enumerate() {
78        eprintln!(
79            "[{}/{}] Processing: {}",
80            file_idx + 1,
81            log_files.len(),
82            log_file
83        );
84
85        // Get file size for progress tracking
86        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
87        let mut progress = if let Some(size) = file_size {
88            ProgressBar::new(size, "Processing")
89        } else {
90            ProgressBar::new_spinner("Processing")
91        };
92
93        let file = open_file(log_file)?;
94        let reader = BufReader::new(file);
95
96        let mut file_lines = 0;
97        let mut bytes_read = 0;
98
99        for line in reader.lines() {
100            file_lines += 1;
101            total_lines += 1;
102            let line = line?;
103            bytes_read += line.len() + 1; // +1 for newline
104
105            if file_lines % 10_000 == 0 {
106                if let Some(size) = file_size {
107                    progress.update(bytes_read.min(size));
108                } else {
109                    progress.update(file_lines);
110                }
111            }
112
113            let entry: AuditEntry = match serde_json::from_str(&line) {
114                Ok(e) => e,
115                Err(_) => continue,
116            };
117
118            // Check if this is our entity
119            let entry_entity_id = match &entry.auth {
120                Some(auth) => match &auth.entity_id {
121                    Some(id) => id.as_str(),
122                    None => continue,
123                },
124                None => continue,
125            };
126
127            if entry_entity_id != entity_id {
128                continue;
129            }
130
131            entity_operations += 1;
132
133            let path = entry
134                .request
135                .as_ref()
136                .and_then(|r| r.path.as_deref())
137                .unwrap_or("")
138                .to_string();
139            let operation = entry
140                .request
141                .as_ref()
142                .and_then(|r| r.operation.as_deref())
143                .unwrap_or("")
144                .to_string();
145
146            if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&entry.time) {
147                let ts_utc = ts.with_timezone(&Utc);
148
149                // Track by hour
150                let hour_key = ts_utc.format("%Y-%m-%d %H:00").to_string();
151                let hour_ops = operations_by_hour.entry(hour_key).or_default();
152                *hour_ops.entry("total".to_string()).or_insert(0) += 1;
153                *hour_ops.entry(operation.clone()).or_insert(0) += 1;
154
155                // Store operation for timeline
156                operations_timeline.push(Operation {
157                    timestamp: ts_utc,
158                    path: path.clone(),
159                    op: operation.clone(),
160                });
161            }
162
163            // Track operation types
164            *operations_by_type.entry(operation).or_insert(0) += 1;
165
166            // Track paths
167            *paths_accessed.entry(path).or_insert(0) += 1;
168        }
169
170        // Ensure 100% progress for this file
171        if let Some(size) = file_size {
172            progress.update(size);
173        }
174
175        progress.finish_with_message(&format!(
176            "Processed {} lines from this file",
177            format_number(file_lines)
178        ));
179    }
180
181    eprintln!(
182        "\nTotal: Processed {} lines, found {} operations for entity: {}",
183        format_number(total_lines),
184        format_number(entity_operations),
185        entity_id
186    );
187
188    if entity_operations == 0 {
189        println!("\nNo operations found for this entity!");
190        return Ok(());
191    }
192
193    // Sort timeline
194    operations_timeline.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
195
196    // Calculate time span
197    let (first_op, last_op, time_span_hours) = if operations_timeline.is_empty() {
198        return Ok(());
199    } else {
200        let first = operations_timeline.first().unwrap().timestamp;
201        let last = operations_timeline.last().unwrap().timestamp;
202        let span = (last - first).num_seconds() as f64 / 3600.0;
203        (first, last, span)
204    };
205
206    // Analysis and reporting
207    println!("\n{}", "=".repeat(100));
208    println!("TIMELINE ANALYSIS FOR: {}", entity_id);
209    println!("{}", "=".repeat(100));
210
211    // 1. Summary statistics
212    println!("\n1. SUMMARY STATISTICS");
213    println!("{}", "-".repeat(100));
214    println!("Total operations: {}", format_number(entity_operations));
215    println!(
216        "Time span: {:.2} hours ({:.2} days)",
217        time_span_hours,
218        time_span_hours / 24.0
219    );
220    println!(
221        "Average rate: {:.1} operations/hour ({:.2}/minute)",
222        entity_operations as f64 / time_span_hours,
223        entity_operations as f64 / time_span_hours / 60.0
224    );
225    println!("First operation: {}", first_op.format("%Y-%m-%d %H:%M:%S"));
226    println!("Last operation: {}", last_op.format("%Y-%m-%d %H:%M:%S"));
227
228    // 2. Operation type distribution
229    println!("\n2. OPERATION TYPE DISTRIBUTION");
230    println!("{}", "-".repeat(100));
231    println!("{:<30} {:<15} {:<15}", "Operation", "Count", "Percentage");
232    println!("{}", "-".repeat(100));
233
234    let mut sorted_ops: Vec<_> = operations_by_type.iter().collect();
235    sorted_ops.sort_by(|a, b| b.1.cmp(a.1));
236
237    for (op, count) in sorted_ops {
238        let percentage = (*count as f64 / entity_operations as f64) * 100.0;
239        println!(
240            "{:<30} {:<15} {:<15.2}%",
241            op,
242            format_number(*count),
243            percentage
244        );
245    }
246
247    // 3. Top paths accessed
248    println!("\n3. TOP 30 PATHS ACCESSED");
249    println!("{}", "-".repeat(100));
250    println!("{:<70} {:<15} {:<15}", "Path", "Count", "Percentage");
251    println!("{}", "-".repeat(100));
252
253    let mut sorted_paths: Vec<_> = paths_accessed.iter().collect();
254    sorted_paths.sort_by(|a, b| b.1.cmp(a.1));
255
256    for (path, count) in sorted_paths.iter().take(30) {
257        let percentage = (**count as f64 / entity_operations as f64) * 100.0;
258        let display_path = if path.len() > 68 {
259            format!("{}...", &path[..65])
260        } else {
261            (*path).to_string()
262        };
263        println!(
264            "{:<70} {:<15} {:<15.2}%",
265            display_path,
266            format_number(**count),
267            percentage
268        );
269    }
270
271    // 4. Hourly activity pattern
272    println!("\n4. HOURLY ACTIVITY PATTERN (Top 30 Hours)");
273    println!("{}", "-".repeat(100));
274    println!(
275        "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
276        "Hour", "Total Ops", "read", "update", "list", "Other"
277    );
278    println!("{}", "-".repeat(100));
279
280    let mut sorted_hours: Vec<_> = operations_by_hour.iter().collect();
281    sorted_hours.sort_by(|a, b| {
282        let a_total = a.1.get("total").unwrap_or(&0);
283        let b_total = b.1.get("total").unwrap_or(&0);
284        b_total.cmp(a_total)
285    });
286
287    for (hour, ops) in sorted_hours.iter().take(30) {
288        let total = *ops.get("total").unwrap_or(&0);
289        let read = *ops.get("read").unwrap_or(&0);
290        let update = *ops.get("update").unwrap_or(&0);
291        let list_operations = *ops.get("list").unwrap_or(&0);
292        let other = total - read - update - list_operations;
293
294        println!(
295            "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
296            hour,
297            format_number(total),
298            format_number(read),
299            format_number(update),
300            format_number(list_operations),
301            format_number(other)
302        );
303    }
304
305    // 5. Activity distribution by hour of day
306    println!("\n5. ACTIVITY DISTRIBUTION BY HOUR OF DAY");
307    println!("{}", "-".repeat(100));
308
309    let mut hour_of_day_stats: HashMap<u32, usize> = HashMap::new();
310    for op in &operations_timeline {
311        let hour = op.timestamp.hour();
312        *hour_of_day_stats.entry(hour).or_insert(0) += 1;
313    }
314
315    println!("{:<10} {:<15} {:<50}", "Hour", "Operations", "Bar Chart");
316    println!("{}", "-".repeat(100));
317
318    let max_ops_in_hour = hour_of_day_stats.values().max().copied().unwrap_or(1);
319
320    for hour in 0..24 {
321        let ops = *hour_of_day_stats.get(&hour).unwrap_or(&0);
322        let bar_length = if max_ops_in_hour > 0 {
323            (ops * 50) / max_ops_in_hour
324        } else {
325            0
326        };
327        let bar = "█".repeat(bar_length);
328        println!("{:02}:00     {:<15} {}", hour, format_number(ops), bar);
329    }
330
331    // 6. Peak activity analysis
332    println!("\n6. PEAK ACTIVITY WINDOWS");
333    println!("{}", "-".repeat(100));
334
335    let mut window_counts: HashMap<DateTime<Utc>, usize> = HashMap::new();
336
337    for op in &operations_timeline {
338        // Round to 5-minute window
339        let minute = (op.timestamp.minute() / 5) * 5;
340        let window_start = op
341            .timestamp
342            .with_minute(minute)
343            .unwrap()
344            .with_second(0)
345            .unwrap()
346            .with_nanosecond(0)
347            .unwrap();
348        *window_counts.entry(window_start).or_insert(0) += 1;
349    }
350
351    let mut sorted_windows: Vec<_> = window_counts.iter().collect();
352    sorted_windows.sort_by(|a, b| b.1.cmp(a.1));
353
354    println!(
355        "{:<25} {:<15} {:<20}",
356        "5-Minute Window", "Operations", "Rate (ops/sec)"
357    );
358    println!("{}", "-".repeat(100));
359
360    for (window, count) in sorted_windows.iter().take(20) {
361        let rate = **count as f64 / 300.0;
362        println!(
363            "{:<25} {:<15} {:<20.3}",
364            window.format("%Y-%m-%d %H:%M"),
365            format_number(**count),
366            rate
367        );
368    }
369
370    // 7. Behavioral patterns
371    println!("\n7. BEHAVIORAL PATTERNS");
372    println!("{}", "-".repeat(100));
373
374    if time_span_hours > 1.0 {
375        let ops_per_hour = entity_operations as f64 / time_span_hours;
376        if ops_per_hour > 100.0 {
377            println!(
378                "⚠️  HIGH FREQUENCY: {:.0} operations/hour suggests automated polling",
379                ops_per_hour
380            );
381            println!("   Recommended action: Implement caching or increase polling interval");
382        }
383
384        // Check for token lookup abuse
385        let token_lookup_paths: Vec<_> = paths_accessed
386            .keys()
387            .filter(|p| p.contains("token/lookup"))
388            .collect();
389        let total_token_lookups: usize = token_lookup_paths
390            .iter()
391            .map(|p| paths_accessed.get(*p).unwrap_or(&0))
392            .sum();
393
394        if total_token_lookups > 1000 {
395            println!(
396                "⚠️  TOKEN LOOKUP ABUSE: {} token lookups detected",
397                format_number(total_token_lookups)
398            );
399            println!(
400                "   Rate: {:.1} lookups/hour = {:.2} lookups/second",
401                total_token_lookups as f64 / time_span_hours,
402                total_token_lookups as f64 / time_span_hours / 3600.0
403            );
404            println!("   Recommended action: Implement client-side token TTL tracking");
405        }
406
407        // Check for path concentration
408        if let Some((top_path, top_count)) = sorted_paths.first() {
409            let top_path_pct = (**top_count as f64 / entity_operations as f64) * 100.0;
410            if top_path_pct > 30.0 {
411                println!(
412                    "⚠️  PATH CONCENTRATION: {:.1}% of operations on single path",
413                    top_path_pct
414                );
415                println!("   Path: {}", top_path);
416                println!(
417                    "   Recommended action: Review why this path is accessed {} times",
418                    format_number(**top_count)
419                );
420            }
421        }
422
423        // Check for 24/7 activity
424        let hours_with_activity = (0..24)
425            .filter(|h| hour_of_day_stats.contains_key(h))
426            .count();
427        if hours_with_activity >= 20 {
428            println!(
429                "⚠️  24/7 ACTIVITY: Active in {}/24 hours",
430                hours_with_activity
431            );
432            println!("   Suggests automated system or background process");
433        }
434    }
435
436    println!("\n{}", "=".repeat(100));
437
438    Ok(())
439}