vault_audit_tools/commands/
entity_timeline.rs

1//! Entity timeline visualization command.
2//!
3//! ⚠️ **DEPRECATED**: Use `entity-analysis timeline` instead.
4//!
5//! ```bash
6//! # Old (deprecated):
7//! vault-audit entity-timeline logs/*.log --entity-id abc-123-def
8//!
9//! # New (recommended):
10//! vault-audit entity-analysis timeline --entity-id abc-123-def logs/*.log
11//! ```
12//!
13//! See [`entity_analysis`](crate::commands::entity_analysis) for the unified command.
14//!
15//! ---
16//!
17//! Generates a detailed timeline of all operations performed by a specific entity,
18//! useful for understanding entity behavior and troubleshooting issues.
19//! Supports multi-file analysis to track entities across multiple days.
20//!
21//! # Usage
22//!
23//! ```bash
24//! # Single file
25//! vault-audit entity-timeline audit.log --entity-id abc-123-def
26//!
27//! # Multi-day timeline
28//! vault-audit entity-timeline day1.log day2.log day3.log --entity-id abc-123-def
29//! ```
30//!
31//! # Output
32//!
33//! Displays a chronological view of the entity's activity:
34//! - Timestamp
35//! - Operation type (read, write, list, etc.)
36//! - Path accessed
37//! - Response status
38//!
39//! Also provides:
40//! - Activity summary (operations by type)
41//! - Time-based patterns (hourly distribution)
42//! - Mount point usage
43//! - First and last seen timestamps
44
45use crate::audit::types::AuditEntry;
46use crate::utils::format::format_number;
47use crate::utils::progress::ProgressBar;
48use crate::utils::reader::open_file;
49use anyhow::Result;
50use chrono::{DateTime, Timelike, Utc};
51use std::collections::HashMap;
52use std::io::{BufRead, BufReader};
53
54#[derive(Clone)]
55#[allow(dead_code)]
56struct Operation {
57    timestamp: DateTime<Utc>,
58    path: String,
59    op: String,
60}
61
62pub fn run(log_files: &[String], entity_id: &str, display_name: Option<&String>) -> Result<()> {
63    println!("Analyzing timeline for entity: {}", entity_id);
64    if let Some(name) = display_name {
65        println!("Display name: {}", name);
66    }
67    println!();
68
69    let mut operations_by_hour: HashMap<String, HashMap<String, usize>> = HashMap::new();
70    let mut operations_by_type: HashMap<String, usize> = HashMap::new();
71    let mut paths_accessed: HashMap<String, usize> = HashMap::new();
72    let mut operations_timeline: Vec<Operation> = Vec::new();
73    let mut total_lines = 0;
74    let mut entity_operations = 0;
75
76    // Process each log file sequentially
77    for (file_idx, log_file) in log_files.iter().enumerate() {
78        eprintln!(
79            "[{}/{}] Processing: {}",
80            file_idx + 1,
81            log_files.len(),
82            log_file
83        );
84
85        // Count lines in file first for accurate progress tracking
86        eprintln!("Scanning file to determine total lines...");
87        let total_file_lines = crate::utils::parallel::count_file_lines(log_file)?;
88
89        let progress = ProgressBar::new(total_file_lines, "Processing");
90
91        let file = open_file(log_file)?;
92        let reader = BufReader::new(file);
93
94        let mut file_lines = 0;
95
96        for line in reader.lines() {
97            file_lines += 1;
98            total_lines += 1;
99            let line = line?;
100
101            if file_lines % 10_000 == 0 {
102                progress.update(file_lines);
103            }
104
105            let entry: AuditEntry = match serde_json::from_str(&line) {
106                Ok(e) => e,
107                Err(_) => continue,
108            };
109
110            // Check if this is our entity
111            let entry_entity_id = match &entry.auth {
112                Some(auth) => match &auth.entity_id {
113                    Some(id) => id.as_str(),
114                    None => continue,
115                },
116                None => continue,
117            };
118
119            if entry_entity_id != entity_id {
120                continue;
121            }
122
123            entity_operations += 1;
124
125            let path = entry
126                .request
127                .as_ref()
128                .and_then(|r| r.path.as_deref())
129                .unwrap_or("")
130                .to_string();
131            let operation = entry
132                .request
133                .as_ref()
134                .and_then(|r| r.operation.as_deref())
135                .unwrap_or("")
136                .to_string();
137
138            if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&entry.time) {
139                let ts_utc = ts.with_timezone(&Utc);
140
141                // Track by hour
142                let hour_key = ts_utc.format("%Y-%m-%d %H:00").to_string();
143                let hour_ops = operations_by_hour.entry(hour_key).or_default();
144                *hour_ops.entry("total".to_string()).or_insert(0) += 1;
145                *hour_ops.entry(operation.clone()).or_insert(0) += 1;
146
147                // Store operation for timeline
148                operations_timeline.push(Operation {
149                    timestamp: ts_utc,
150                    path: path.clone(),
151                    op: operation.clone(),
152                });
153            }
154
155            // Track operation types
156            *operations_by_type.entry(operation).or_insert(0) += 1;
157
158            // Track paths
159            *paths_accessed.entry(path).or_insert(0) += 1;
160        }
161
162        // Ensure 100% progress for this file
163        progress.update(total_file_lines);
164
165        progress.finish_with_message(&format!(
166            "Processed {} lines from this file",
167            format_number(file_lines)
168        ));
169    }
170
171    eprintln!(
172        "\nTotal: Processed {} lines, found {} operations for entity: {}",
173        format_number(total_lines),
174        format_number(entity_operations),
175        entity_id
176    );
177
178    if entity_operations == 0 {
179        println!("\nNo operations found for this entity!");
180        return Ok(());
181    }
182
183    // Sort timeline
184    operations_timeline.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
185
186    // Calculate time span
187    let (first_op, last_op, time_span_hours) = if operations_timeline.is_empty() {
188        return Ok(());
189    } else {
190        let first = operations_timeline.first().unwrap().timestamp;
191        let last = operations_timeline.last().unwrap().timestamp;
192        let span = (last - first).num_seconds() as f64 / 3600.0;
193        (first, last, span)
194    };
195
196    // Analysis and reporting
197    println!("\n{}", "=".repeat(100));
198    println!("TIMELINE ANALYSIS FOR: {}", entity_id);
199    println!("{}", "=".repeat(100));
200
201    // 1. Summary statistics
202    println!("\n1. SUMMARY STATISTICS");
203    println!("{}", "-".repeat(100));
204    println!("Total operations: {}", format_number(entity_operations));
205    println!(
206        "Time span: {:.2} hours ({:.2} days)",
207        time_span_hours,
208        time_span_hours / 24.0
209    );
210    println!(
211        "Average rate: {:.1} operations/hour ({:.2}/minute)",
212        entity_operations as f64 / time_span_hours,
213        entity_operations as f64 / time_span_hours / 60.0
214    );
215    println!("First operation: {}", first_op.format("%Y-%m-%d %H:%M:%S"));
216    println!("Last operation: {}", last_op.format("%Y-%m-%d %H:%M:%S"));
217
218    // 2. Operation type distribution
219    println!("\n2. OPERATION TYPE DISTRIBUTION");
220    println!("{}", "-".repeat(100));
221    println!("{:<30} {:<15} {:<15}", "Operation", "Count", "Percentage");
222    println!("{}", "-".repeat(100));
223
224    let mut sorted_ops: Vec<_> = operations_by_type.iter().collect();
225    sorted_ops.sort_by(|a, b| b.1.cmp(a.1));
226
227    for (op, count) in sorted_ops {
228        let percentage = (*count as f64 / entity_operations as f64) * 100.0;
229        println!(
230            "{:<30} {:<15} {:<15.2}%",
231            op,
232            format_number(*count),
233            percentage
234        );
235    }
236
237    // 3. Top paths accessed
238    println!("\n3. TOP 30 PATHS ACCESSED");
239    println!("{}", "-".repeat(100));
240    println!("{:<70} {:<15} {:<15}", "Path", "Count", "Percentage");
241    println!("{}", "-".repeat(100));
242
243    let mut sorted_paths: Vec<_> = paths_accessed.iter().collect();
244    sorted_paths.sort_by(|a, b| b.1.cmp(a.1));
245
246    for (path, count) in sorted_paths.iter().take(30) {
247        let percentage = (**count as f64 / entity_operations as f64) * 100.0;
248        let display_path = if path.len() > 68 {
249            format!("{}...", &path[..65])
250        } else {
251            (*path).to_string()
252        };
253        println!(
254            "{:<70} {:<15} {:<15.2}%",
255            display_path,
256            format_number(**count),
257            percentage
258        );
259    }
260
261    // 4. Hourly activity pattern
262    println!("\n4. HOURLY ACTIVITY PATTERN (Top 30 Hours)");
263    println!("{}", "-".repeat(100));
264    println!(
265        "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
266        "Hour", "Total Ops", "read", "update", "list", "Other"
267    );
268    println!("{}", "-".repeat(100));
269
270    let mut sorted_hours: Vec<_> = operations_by_hour.iter().collect();
271    sorted_hours.sort_by(|a, b| {
272        let a_total = a.1.get("total").unwrap_or(&0);
273        let b_total = b.1.get("total").unwrap_or(&0);
274        b_total.cmp(a_total)
275    });
276
277    for (hour, ops) in sorted_hours.iter().take(30) {
278        let total = *ops.get("total").unwrap_or(&0);
279        let read = *ops.get("read").unwrap_or(&0);
280        let update = *ops.get("update").unwrap_or(&0);
281        let list_operations = *ops.get("list").unwrap_or(&0);
282        let other = total - read - update - list_operations;
283
284        println!(
285            "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
286            hour,
287            format_number(total),
288            format_number(read),
289            format_number(update),
290            format_number(list_operations),
291            format_number(other)
292        );
293    }
294
295    // 5. Activity distribution by hour of day
296    println!("\n5. ACTIVITY DISTRIBUTION BY HOUR OF DAY");
297    println!("{}", "-".repeat(100));
298
299    let mut hour_of_day_stats: HashMap<u32, usize> = HashMap::new();
300    for op in &operations_timeline {
301        let hour = op.timestamp.hour();
302        *hour_of_day_stats.entry(hour).or_insert(0) += 1;
303    }
304
305    println!("{:<10} {:<15} {:<50}", "Hour", "Operations", "Bar Chart");
306    println!("{}", "-".repeat(100));
307
308    let max_ops_in_hour = hour_of_day_stats.values().max().copied().unwrap_or(1);
309
310    for hour in 0..24 {
311        let ops = *hour_of_day_stats.get(&hour).unwrap_or(&0);
312        let bar_length = if max_ops_in_hour > 0 {
313            (ops * 50) / max_ops_in_hour
314        } else {
315            0
316        };
317        let bar = "█".repeat(bar_length);
318        println!("{:02}:00     {:<15} {}", hour, format_number(ops), bar);
319    }
320
321    // 6. Peak activity analysis
322    println!("\n6. PEAK ACTIVITY WINDOWS");
323    println!("{}", "-".repeat(100));
324
325    let mut window_counts: HashMap<DateTime<Utc>, usize> = HashMap::new();
326
327    for op in &operations_timeline {
328        // Round to 5-minute window
329        let minute = (op.timestamp.minute() / 5) * 5;
330        let window_start = op
331            .timestamp
332            .with_minute(minute)
333            .unwrap()
334            .with_second(0)
335            .unwrap()
336            .with_nanosecond(0)
337            .unwrap();
338        *window_counts.entry(window_start).or_insert(0) += 1;
339    }
340
341    let mut sorted_windows: Vec<_> = window_counts.iter().collect();
342    sorted_windows.sort_by(|a, b| b.1.cmp(a.1));
343
344    println!(
345        "{:<25} {:<15} {:<20}",
346        "5-Minute Window", "Operations", "Rate (ops/sec)"
347    );
348    println!("{}", "-".repeat(100));
349
350    for (window, count) in sorted_windows.iter().take(20) {
351        let rate = **count as f64 / 300.0;
352        println!(
353            "{:<25} {:<15} {:<20.3}",
354            window.format("%Y-%m-%d %H:%M"),
355            format_number(**count),
356            rate
357        );
358    }
359
360    // 7. Behavioral patterns
361    println!("\n7. BEHAVIORAL PATTERNS");
362    println!("{}", "-".repeat(100));
363
364    if time_span_hours > 1.0 {
365        let ops_per_hour = entity_operations as f64 / time_span_hours;
366        if ops_per_hour > 100.0 {
367            println!(
368                "⚠️  HIGH FREQUENCY: {:.0} operations/hour suggests automated polling",
369                ops_per_hour
370            );
371            println!("   Recommended action: Implement caching or increase polling interval");
372        }
373
374        // Check for token lookup abuse
375        let token_lookup_paths: Vec<_> = paths_accessed
376            .keys()
377            .filter(|p| p.contains("token/lookup"))
378            .collect();
379        let total_token_lookups: usize = token_lookup_paths
380            .iter()
381            .map(|p| paths_accessed.get(*p).unwrap_or(&0))
382            .sum();
383
384        if total_token_lookups > 1000 {
385            println!(
386                "⚠️  TOKEN LOOKUP ABUSE: {} token lookups detected",
387                format_number(total_token_lookups)
388            );
389            println!(
390                "   Rate: {:.1} lookups/hour = {:.2} lookups/second",
391                total_token_lookups as f64 / time_span_hours,
392                total_token_lookups as f64 / time_span_hours / 3600.0
393            );
394            println!("   Recommended action: Implement client-side token TTL tracking");
395        }
396
397        // Check for path concentration
398        if let Some((top_path, top_count)) = sorted_paths.first() {
399            let top_path_pct = (**top_count as f64 / entity_operations as f64) * 100.0;
400            if top_path_pct > 30.0 {
401                println!(
402                    "⚠️  PATH CONCENTRATION: {:.1}% of operations on single path",
403                    top_path_pct
404                );
405                println!("   Path: {}", top_path);
406                println!(
407                    "   Recommended action: Review why this path is accessed {} times",
408                    format_number(**top_count)
409                );
410            }
411        }
412
413        // Check for 24/7 activity
414        let hours_with_activity = (0..24)
415            .filter(|h| hour_of_day_stats.contains_key(h))
416            .count();
417        if hours_with_activity >= 20 {
418            println!(
419                "⚠️  24/7 ACTIVITY: Active in {}/24 hours",
420                hours_with_activity
421            );
422            println!("   Suggests automated system or background process");
423        }
424    }
425
426    println!("\n{}", "=".repeat(100));
427
428    Ok(())
429}