vault_audit_tools/commands/
entity_timeline.rs

1//! Entity timeline visualization command.
2//!
3//! ⚠️ **DEPRECATED**: Use `entity-analysis timeline` instead.
4//!
5//! ```bash
6//! # Old (deprecated):
7//! vault-audit entity-timeline logs/*.log --entity-id abc-123-def
8//!
9//! # New (recommended):
10//! vault-audit entity-analysis timeline --entity-id abc-123-def logs/*.log
11//! ```
12//!
13//! See [`entity_analysis`](crate::commands::entity_analysis) for the unified command.
14//!
15//! ---
16//!
17//! Generates a detailed timeline of all operations performed by a specific entity,
18//! useful for understanding entity behavior and troubleshooting issues.
19//! Supports multi-file analysis to track entities across multiple days.
20//!
21//! # Usage
22//!
23//! ```bash
24//! # Single file
25//! vault-audit entity-timeline audit.log --entity-id abc-123-def
26//!
27//! # Multi-day timeline
28//! vault-audit entity-timeline day1.log day2.log day3.log --entity-id abc-123-def
29//! ```
30//!
31//! # Output
32//!
33//! Displays a chronological view of the entity's activity:
34//! - Timestamp
35//! - Operation type (read, write, list, etc.)
36//! - Path accessed
37//! - Response status
38//!
39//! Also provides:
40//! - Activity summary (operations by type)
41//! - Time-based patterns (hourly distribution)
42//! - Mount point usage
43//! - First and last seen timestamps
44
45use crate::audit::types::AuditEntry;
46use crate::utils::progress::ProgressBar;
47use crate::utils::reader::open_file;
48use anyhow::Result;
49use chrono::{DateTime, Timelike, Utc};
50use std::collections::HashMap;
51use std::io::{BufRead, BufReader};
52
53fn format_number(n: usize) -> String {
54    let s = n.to_string();
55    let mut result = String::new();
56    for (i, c) in s.chars().rev().enumerate() {
57        if i > 0 && i % 3 == 0 {
58            result.push(',');
59        }
60        result.push(c);
61    }
62    result.chars().rev().collect()
63}
64
65#[derive(Clone)]
66#[allow(dead_code)]
67struct Operation {
68    timestamp: DateTime<Utc>,
69    path: String,
70    operation: String,
71}
72
73pub fn run(log_files: &[String], entity_id: &str, display_name: &Option<String>) -> Result<()> {
74    println!("Analyzing timeline for entity: {}", entity_id);
75    if let Some(name) = display_name {
76        println!("Display name: {}", name);
77    }
78    println!();
79
80    let mut operations_by_hour: HashMap<String, HashMap<String, usize>> = HashMap::new();
81    let mut operations_by_type: HashMap<String, usize> = HashMap::new();
82    let mut paths_accessed: HashMap<String, usize> = HashMap::new();
83    let mut operations_timeline: Vec<Operation> = Vec::new();
84    let mut total_lines = 0;
85    let mut entity_operations = 0;
86
87    // Process each log file sequentially
88    for (file_idx, log_file) in log_files.iter().enumerate() {
89        eprintln!(
90            "[{}/{}] Processing: {}",
91            file_idx + 1,
92            log_files.len(),
93            log_file
94        );
95
96        // Get file size for progress tracking
97        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
98        let mut progress = if let Some(size) = file_size {
99            ProgressBar::new(size, "Processing")
100        } else {
101            ProgressBar::new_spinner("Processing")
102        };
103
104        let file = open_file(log_file)?;
105        let reader = BufReader::new(file);
106
107        let mut file_lines = 0;
108        let mut bytes_read = 0;
109
110        for line in reader.lines() {
111            file_lines += 1;
112            total_lines += 1;
113            let line = line?;
114            bytes_read += line.len() + 1; // +1 for newline
115
116            if file_lines % 10_000 == 0 {
117                if let Some(size) = file_size {
118                    progress.update(bytes_read.min(size));
119                } else {
120                    progress.update(file_lines);
121                }
122            }
123
124            let entry: AuditEntry = match serde_json::from_str(&line) {
125                Ok(e) => e,
126                Err(_) => continue,
127            };
128
129            // Check if this is our entity
130            let entry_entity_id = match &entry.auth {
131                Some(auth) => match &auth.entity_id {
132                    Some(id) => id.as_str(),
133                    None => continue,
134                },
135                None => continue,
136            };
137
138            if entry_entity_id != entity_id {
139                continue;
140            }
141
142            entity_operations += 1;
143
144            let path = entry
145                .request
146                .as_ref()
147                .and_then(|r| r.path.as_deref())
148                .unwrap_or("")
149                .to_string();
150            let operation = entry
151                .request
152                .as_ref()
153                .and_then(|r| r.operation.as_deref())
154                .unwrap_or("")
155                .to_string();
156
157            if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&entry.time) {
158                let ts_utc = ts.with_timezone(&Utc);
159
160                // Track by hour
161                let hour_key = ts_utc.format("%Y-%m-%d %H:00").to_string();
162                let hour_ops = operations_by_hour.entry(hour_key).or_default();
163                *hour_ops.entry("total".to_string()).or_insert(0) += 1;
164                *hour_ops.entry(operation.clone()).or_insert(0) += 1;
165
166                // Store operation for timeline
167                operations_timeline.push(Operation {
168                    timestamp: ts_utc,
169                    path: path.clone(),
170                    operation: operation.clone(),
171                });
172            }
173
174            // Track operation types
175            *operations_by_type.entry(operation).or_insert(0) += 1;
176
177            // Track paths
178            *paths_accessed.entry(path).or_insert(0) += 1;
179        }
180
181        // Ensure 100% progress for this file
182        if let Some(size) = file_size {
183            progress.update(size);
184        }
185
186        progress.finish_with_message(&format!(
187            "Processed {} lines from this file",
188            format_number(file_lines)
189        ));
190    }
191
192    eprintln!(
193        "\nTotal: Processed {} lines, found {} operations for entity: {}",
194        format_number(total_lines),
195        format_number(entity_operations),
196        entity_id
197    );
198
199    if entity_operations == 0 {
200        println!("\nNo operations found for this entity!");
201        return Ok(());
202    }
203
204    // Sort timeline
205    operations_timeline.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
206
207    // Calculate time span
208    let (first_op, last_op, time_span_hours) = if !operations_timeline.is_empty() {
209        let first = operations_timeline.first().unwrap().timestamp;
210        let last = operations_timeline.last().unwrap().timestamp;
211        let span = (last - first).num_seconds() as f64 / 3600.0;
212        (first, last, span)
213    } else {
214        return Ok(());
215    };
216
217    // Analysis and reporting
218    println!("\n{}", "=".repeat(100));
219    println!("TIMELINE ANALYSIS FOR: {}", entity_id);
220    println!("{}", "=".repeat(100));
221
222    // 1. Summary statistics
223    println!("\n1. SUMMARY STATISTICS");
224    println!("{}", "-".repeat(100));
225    println!("Total operations: {}", format_number(entity_operations));
226    println!(
227        "Time span: {:.2} hours ({:.2} days)",
228        time_span_hours,
229        time_span_hours / 24.0
230    );
231    println!(
232        "Average rate: {:.1} operations/hour ({:.2}/minute)",
233        entity_operations as f64 / time_span_hours,
234        entity_operations as f64 / time_span_hours / 60.0
235    );
236    println!("First operation: {}", first_op.format("%Y-%m-%d %H:%M:%S"));
237    println!("Last operation: {}", last_op.format("%Y-%m-%d %H:%M:%S"));
238
239    // 2. Operation type distribution
240    println!("\n2. OPERATION TYPE DISTRIBUTION");
241    println!("{}", "-".repeat(100));
242    println!("{:<30} {:<15} {:<15}", "Operation", "Count", "Percentage");
243    println!("{}", "-".repeat(100));
244
245    let mut sorted_ops: Vec<_> = operations_by_type.iter().collect();
246    sorted_ops.sort_by(|a, b| b.1.cmp(a.1));
247
248    for (op, count) in sorted_ops {
249        let percentage = (*count as f64 / entity_operations as f64) * 100.0;
250        println!(
251            "{:<30} {:<15} {:<15.2}%",
252            op,
253            format_number(*count),
254            percentage
255        );
256    }
257
258    // 3. Top paths accessed
259    println!("\n3. TOP 30 PATHS ACCESSED");
260    println!("{}", "-".repeat(100));
261    println!("{:<70} {:<15} {:<15}", "Path", "Count", "Percentage");
262    println!("{}", "-".repeat(100));
263
264    let mut sorted_paths: Vec<_> = paths_accessed.iter().collect();
265    sorted_paths.sort_by(|a, b| b.1.cmp(a.1));
266
267    for (path, count) in sorted_paths.iter().take(30) {
268        let percentage = (**count as f64 / entity_operations as f64) * 100.0;
269        let display_path = if path.len() > 68 {
270            format!("{}...", &path[..65])
271        } else {
272            path.to_string()
273        };
274        println!(
275            "{:<70} {:<15} {:<15.2}%",
276            display_path,
277            format_number(**count),
278            percentage
279        );
280    }
281
282    // 4. Hourly activity pattern
283    println!("\n4. HOURLY ACTIVITY PATTERN (Top 30 Hours)");
284    println!("{}", "-".repeat(100));
285    println!(
286        "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
287        "Hour", "Total Ops", "read", "update", "list", "Other"
288    );
289    println!("{}", "-".repeat(100));
290
291    let mut sorted_hours: Vec<_> = operations_by_hour.iter().collect();
292    sorted_hours.sort_by(|a, b| {
293        let a_total = a.1.get("total").unwrap_or(&0);
294        let b_total = b.1.get("total").unwrap_or(&0);
295        b_total.cmp(a_total)
296    });
297
298    for (hour, ops) in sorted_hours.iter().take(30) {
299        let total = *ops.get("total").unwrap_or(&0);
300        let read = *ops.get("read").unwrap_or(&0);
301        let update = *ops.get("update").unwrap_or(&0);
302        let list_op = *ops.get("list").unwrap_or(&0);
303        let other = total - read - update - list_op;
304
305        println!(
306            "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
307            hour,
308            format_number(total),
309            format_number(read),
310            format_number(update),
311            format_number(list_op),
312            format_number(other)
313        );
314    }
315
316    // 5. Activity distribution by hour of day
317    println!("\n5. ACTIVITY DISTRIBUTION BY HOUR OF DAY");
318    println!("{}", "-".repeat(100));
319
320    let mut hour_of_day_stats: HashMap<u32, usize> = HashMap::new();
321    for op in &operations_timeline {
322        let hour = op.timestamp.hour();
323        *hour_of_day_stats.entry(hour).or_insert(0) += 1;
324    }
325
326    println!("{:<10} {:<15} {:<50}", "Hour", "Operations", "Bar Chart");
327    println!("{}", "-".repeat(100));
328
329    let max_ops_in_hour = hour_of_day_stats.values().max().copied().unwrap_or(1);
330
331    for hour in 0..24 {
332        let ops = *hour_of_day_stats.get(&hour).unwrap_or(&0);
333        let bar_length = if max_ops_in_hour > 0 {
334            (ops * 50) / max_ops_in_hour
335        } else {
336            0
337        };
338        let bar = "█".repeat(bar_length);
339        println!("{:02}:00     {:<15} {}", hour, format_number(ops), bar);
340    }
341
342    // 6. Peak activity analysis
343    println!("\n6. PEAK ACTIVITY WINDOWS");
344    println!("{}", "-".repeat(100));
345
346    let mut window_counts: HashMap<DateTime<Utc>, usize> = HashMap::new();
347
348    for op in &operations_timeline {
349        // Round to 5-minute window
350        let minute = (op.timestamp.minute() / 5) * 5;
351        let window_start = op
352            .timestamp
353            .with_minute(minute)
354            .unwrap()
355            .with_second(0)
356            .unwrap()
357            .with_nanosecond(0)
358            .unwrap();
359        *window_counts.entry(window_start).or_insert(0) += 1;
360    }
361
362    let mut sorted_windows: Vec<_> = window_counts.iter().collect();
363    sorted_windows.sort_by(|a, b| b.1.cmp(a.1));
364
365    println!(
366        "{:<25} {:<15} {:<20}",
367        "5-Minute Window", "Operations", "Rate (ops/sec)"
368    );
369    println!("{}", "-".repeat(100));
370
371    for (window, count) in sorted_windows.iter().take(20) {
372        let rate = **count as f64 / 300.0;
373        println!(
374            "{:<25} {:<15} {:<20.3}",
375            window.format("%Y-%m-%d %H:%M"),
376            format_number(**count),
377            rate
378        );
379    }
380
381    // 7. Behavioral patterns
382    println!("\n7. BEHAVIORAL PATTERNS");
383    println!("{}", "-".repeat(100));
384
385    if time_span_hours > 1.0 {
386        let ops_per_hour = entity_operations as f64 / time_span_hours;
387        if ops_per_hour > 100.0 {
388            println!(
389                "⚠️  HIGH FREQUENCY: {:.0} operations/hour suggests automated polling",
390                ops_per_hour
391            );
392            println!("   Recommended action: Implement caching or increase polling interval");
393        }
394
395        // Check for token lookup abuse
396        let token_lookup_paths: Vec<_> = paths_accessed
397            .keys()
398            .filter(|p| p.contains("token/lookup"))
399            .collect();
400        let total_token_lookups: usize = token_lookup_paths
401            .iter()
402            .map(|p| paths_accessed.get(*p).unwrap_or(&0))
403            .sum();
404
405        if total_token_lookups > 1000 {
406            println!(
407                "⚠️  TOKEN LOOKUP ABUSE: {} token lookups detected",
408                format_number(total_token_lookups)
409            );
410            println!(
411                "   Rate: {:.1} lookups/hour = {:.2} lookups/second",
412                total_token_lookups as f64 / time_span_hours,
413                total_token_lookups as f64 / time_span_hours / 3600.0
414            );
415            println!("   Recommended action: Implement client-side token TTL tracking");
416        }
417
418        // Check for path concentration
419        if let Some((top_path, top_count)) = sorted_paths.first() {
420            let top_path_pct = (**top_count as f64 / entity_operations as f64) * 100.0;
421            if top_path_pct > 30.0 {
422                println!(
423                    "⚠️  PATH CONCENTRATION: {:.1}% of operations on single path",
424                    top_path_pct
425                );
426                println!("   Path: {}", top_path);
427                println!(
428                    "   Recommended action: Review why this path is accessed {} times",
429                    format_number(**top_count)
430                );
431            }
432        }
433
434        // Check for 24/7 activity
435        let hours_with_activity = (0..24)
436            .filter(|h| hour_of_day_stats.contains_key(h))
437            .count();
438        if hours_with_activity >= 20 {
439            println!(
440                "⚠️  24/7 ACTIVITY: Active in {}/24 hours",
441                hours_with_activity
442            );
443            println!("   Suggests automated system or background process");
444        }
445    }
446
447    println!("\n{}", "=".repeat(100));
448
449    Ok(())
450}