vault_audit_tools/commands/
entity_timeline.rs

1//! Entity timeline visualization command.
2//!
3//! Generates a detailed timeline of all operations performed by a specific entity,
4//! useful for understanding entity behavior and troubleshooting issues.
5//! Supports multi-file analysis to track entities across multiple days.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Single file
11//! vault-audit entity-timeline audit.log --entity-id abc-123-def
12//!
13//! # Multi-day timeline
14//! vault-audit entity-timeline day1.log day2.log day3.log --entity-id abc-123-def
15//! ```
16//!
17//! # Output
18//!
19//! Displays a chronological view of the entity's activity:
20//! - Timestamp
21//! - Operation type (read, write, list, etc.)
22//! - Path accessed
23//! - Response status
24//!
25//! Also provides:
26//! - Activity summary (operations by type)
27//! - Time-based patterns (hourly distribution)
28//! - Mount point usage
29//! - First and last seen timestamps
30
31use crate::audit::types::AuditEntry;
32use crate::utils::progress::ProgressBar;
33use crate::utils::reader::open_file;
34use anyhow::Result;
35use chrono::{DateTime, Timelike, Utc};
36use std::collections::HashMap;
37use std::io::{BufRead, BufReader};
38
39fn format_number(n: usize) -> String {
40    let s = n.to_string();
41    let mut result = String::new();
42    for (i, c) in s.chars().rev().enumerate() {
43        if i > 0 && i % 3 == 0 {
44            result.push(',');
45        }
46        result.push(c);
47    }
48    result.chars().rev().collect()
49}
50
51#[derive(Clone)]
52#[allow(dead_code)]
53struct Operation {
54    timestamp: DateTime<Utc>,
55    path: String,
56    operation: String,
57}
58
59pub fn run(log_files: &[String], entity_id: &str, display_name: &Option<String>) -> Result<()> {
60    println!("Analyzing timeline for entity: {}", entity_id);
61    if let Some(name) = display_name {
62        println!("Display name: {}", name);
63    }
64    println!();
65
66    let mut operations_by_hour: HashMap<String, HashMap<String, usize>> = HashMap::new();
67    let mut operations_by_type: HashMap<String, usize> = HashMap::new();
68    let mut paths_accessed: HashMap<String, usize> = HashMap::new();
69    let mut operations_timeline: Vec<Operation> = Vec::new();
70    let mut total_lines = 0;
71    let mut entity_operations = 0;
72
73    // Process each log file sequentially
74    for (file_idx, log_file) in log_files.iter().enumerate() {
75        eprintln!(
76            "[{}/{}] Processing: {}",
77            file_idx + 1,
78            log_files.len(),
79            log_file
80        );
81
82        // Get file size for progress tracking
83        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
84        let mut progress = if let Some(size) = file_size {
85            ProgressBar::new(size, "Processing")
86        } else {
87            ProgressBar::new_spinner("Processing")
88        };
89
90        let file = open_file(log_file)?;
91        let reader = BufReader::new(file);
92
93        let mut file_lines = 0;
94        let mut bytes_read = 0;
95
96        for line in reader.lines() {
97            file_lines += 1;
98            total_lines += 1;
99            let line = line?;
100            bytes_read += line.len() + 1; // +1 for newline
101
102            if file_lines % 10_000 == 0 {
103                if let Some(size) = file_size {
104                    progress.update(bytes_read.min(size));
105                } else {
106                    progress.update(file_lines);
107                }
108            }
109
110            let entry: AuditEntry = match serde_json::from_str(&line) {
111                Ok(e) => e,
112                Err(_) => continue,
113            };
114
115            // Check if this is our entity
116            let entry_entity_id = match &entry.auth {
117                Some(auth) => match &auth.entity_id {
118                    Some(id) => id.as_str(),
119                    None => continue,
120                },
121                None => continue,
122            };
123
124            if entry_entity_id != entity_id {
125                continue;
126            }
127
128            entity_operations += 1;
129
130            let path = entry
131                .request
132                .as_ref()
133                .and_then(|r| r.path.as_deref())
134                .unwrap_or("")
135                .to_string();
136            let operation = entry
137                .request
138                .as_ref()
139                .and_then(|r| r.operation.as_deref())
140                .unwrap_or("")
141                .to_string();
142
143            if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&entry.time) {
144                let ts_utc = ts.with_timezone(&Utc);
145
146                // Track by hour
147                let hour_key = ts_utc.format("%Y-%m-%d %H:00").to_string();
148                let hour_ops = operations_by_hour.entry(hour_key).or_default();
149                *hour_ops.entry("total".to_string()).or_insert(0) += 1;
150                *hour_ops.entry(operation.clone()).or_insert(0) += 1;
151
152                // Store operation for timeline
153                operations_timeline.push(Operation {
154                    timestamp: ts_utc,
155                    path: path.clone(),
156                    operation: operation.clone(),
157                });
158            }
159
160            // Track operation types
161            *operations_by_type.entry(operation).or_insert(0) += 1;
162
163            // Track paths
164            *paths_accessed.entry(path).or_insert(0) += 1;
165        }
166
167        // Ensure 100% progress for this file
168        if let Some(size) = file_size {
169            progress.update(size);
170        }
171
172        progress.finish_with_message(&format!(
173            "Processed {} lines from this file",
174            format_number(file_lines)
175        ));
176    }
177
178    eprintln!(
179        "\nTotal: Processed {} lines, found {} operations for entity: {}",
180        format_number(total_lines),
181        format_number(entity_operations),
182        entity_id
183    );
184
185    if entity_operations == 0 {
186        println!("\nNo operations found for this entity!");
187        return Ok(());
188    }
189
190    // Sort timeline
191    operations_timeline.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
192
193    // Calculate time span
194    let (first_op, last_op, time_span_hours) = if !operations_timeline.is_empty() {
195        let first = operations_timeline.first().unwrap().timestamp;
196        let last = operations_timeline.last().unwrap().timestamp;
197        let span = (last - first).num_seconds() as f64 / 3600.0;
198        (first, last, span)
199    } else {
200        return Ok(());
201    };
202
203    // Analysis and reporting
204    println!("\n{}", "=".repeat(100));
205    println!("TIMELINE ANALYSIS FOR: {}", entity_id);
206    println!("{}", "=".repeat(100));
207
208    // 1. Summary statistics
209    println!("\n1. SUMMARY STATISTICS");
210    println!("{}", "-".repeat(100));
211    println!("Total operations: {}", format_number(entity_operations));
212    println!(
213        "Time span: {:.2} hours ({:.2} days)",
214        time_span_hours,
215        time_span_hours / 24.0
216    );
217    println!(
218        "Average rate: {:.1} operations/hour ({:.2}/minute)",
219        entity_operations as f64 / time_span_hours,
220        entity_operations as f64 / time_span_hours / 60.0
221    );
222    println!("First operation: {}", first_op.format("%Y-%m-%d %H:%M:%S"));
223    println!("Last operation: {}", last_op.format("%Y-%m-%d %H:%M:%S"));
224
225    // 2. Operation type distribution
226    println!("\n2. OPERATION TYPE DISTRIBUTION");
227    println!("{}", "-".repeat(100));
228    println!("{:<30} {:<15} {:<15}", "Operation", "Count", "Percentage");
229    println!("{}", "-".repeat(100));
230
231    let mut sorted_ops: Vec<_> = operations_by_type.iter().collect();
232    sorted_ops.sort_by(|a, b| b.1.cmp(a.1));
233
234    for (op, count) in sorted_ops {
235        let percentage = (*count as f64 / entity_operations as f64) * 100.0;
236        println!(
237            "{:<30} {:<15} {:<15.2}%",
238            op,
239            format_number(*count),
240            percentage
241        );
242    }
243
244    // 3. Top paths accessed
245    println!("\n3. TOP 30 PATHS ACCESSED");
246    println!("{}", "-".repeat(100));
247    println!("{:<70} {:<15} {:<15}", "Path", "Count", "Percentage");
248    println!("{}", "-".repeat(100));
249
250    let mut sorted_paths: Vec<_> = paths_accessed.iter().collect();
251    sorted_paths.sort_by(|a, b| b.1.cmp(a.1));
252
253    for (path, count) in sorted_paths.iter().take(30) {
254        let percentage = (**count as f64 / entity_operations as f64) * 100.0;
255        let display_path = if path.len() > 68 {
256            format!("{}...", &path[..65])
257        } else {
258            path.to_string()
259        };
260        println!(
261            "{:<70} {:<15} {:<15.2}%",
262            display_path,
263            format_number(**count),
264            percentage
265        );
266    }
267
268    // 4. Hourly activity pattern
269    println!("\n4. HOURLY ACTIVITY PATTERN (Top 30 Hours)");
270    println!("{}", "-".repeat(100));
271    println!(
272        "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
273        "Hour", "Total Ops", "read", "update", "list", "Other"
274    );
275    println!("{}", "-".repeat(100));
276
277    let mut sorted_hours: Vec<_> = operations_by_hour.iter().collect();
278    sorted_hours.sort_by(|a, b| {
279        let a_total = a.1.get("total").unwrap_or(&0);
280        let b_total = b.1.get("total").unwrap_or(&0);
281        b_total.cmp(a_total)
282    });
283
284    for (hour, ops) in sorted_hours.iter().take(30) {
285        let total = *ops.get("total").unwrap_or(&0);
286        let read = *ops.get("read").unwrap_or(&0);
287        let update = *ops.get("update").unwrap_or(&0);
288        let list_op = *ops.get("list").unwrap_or(&0);
289        let other = total - read - update - list_op;
290
291        println!(
292            "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
293            hour,
294            format_number(total),
295            format_number(read),
296            format_number(update),
297            format_number(list_op),
298            format_number(other)
299        );
300    }
301
302    // 5. Activity distribution by hour of day
303    println!("\n5. ACTIVITY DISTRIBUTION BY HOUR OF DAY");
304    println!("{}", "-".repeat(100));
305
306    let mut hour_of_day_stats: HashMap<u32, usize> = HashMap::new();
307    for op in &operations_timeline {
308        let hour = op.timestamp.hour();
309        *hour_of_day_stats.entry(hour).or_insert(0) += 1;
310    }
311
312    println!("{:<10} {:<15} {:<50}", "Hour", "Operations", "Bar Chart");
313    println!("{}", "-".repeat(100));
314
315    let max_ops_in_hour = hour_of_day_stats.values().max().copied().unwrap_or(1);
316
317    for hour in 0..24 {
318        let ops = *hour_of_day_stats.get(&hour).unwrap_or(&0);
319        let bar_length = if max_ops_in_hour > 0 {
320            (ops * 50) / max_ops_in_hour
321        } else {
322            0
323        };
324        let bar = "█".repeat(bar_length);
325        println!("{:02}:00     {:<15} {}", hour, format_number(ops), bar);
326    }
327
328    // 6. Peak activity analysis
329    println!("\n6. PEAK ACTIVITY WINDOWS");
330    println!("{}", "-".repeat(100));
331
332    let mut window_counts: HashMap<DateTime<Utc>, usize> = HashMap::new();
333
334    for op in &operations_timeline {
335        // Round to 5-minute window
336        let minute = (op.timestamp.minute() / 5) * 5;
337        let window_start = op
338            .timestamp
339            .with_minute(minute)
340            .unwrap()
341            .with_second(0)
342            .unwrap()
343            .with_nanosecond(0)
344            .unwrap();
345        *window_counts.entry(window_start).or_insert(0) += 1;
346    }
347
348    let mut sorted_windows: Vec<_> = window_counts.iter().collect();
349    sorted_windows.sort_by(|a, b| b.1.cmp(a.1));
350
351    println!(
352        "{:<25} {:<15} {:<20}",
353        "5-Minute Window", "Operations", "Rate (ops/sec)"
354    );
355    println!("{}", "-".repeat(100));
356
357    for (window, count) in sorted_windows.iter().take(20) {
358        let rate = **count as f64 / 300.0;
359        println!(
360            "{:<25} {:<15} {:<20.3}",
361            window.format("%Y-%m-%d %H:%M"),
362            format_number(**count),
363            rate
364        );
365    }
366
367    // 7. Behavioral patterns
368    println!("\n7. BEHAVIORAL PATTERNS");
369    println!("{}", "-".repeat(100));
370
371    if time_span_hours > 1.0 {
372        let ops_per_hour = entity_operations as f64 / time_span_hours;
373        if ops_per_hour > 100.0 {
374            println!(
375                "⚠️  HIGH FREQUENCY: {:.0} operations/hour suggests automated polling",
376                ops_per_hour
377            );
378            println!("   Recommended action: Implement caching or increase polling interval");
379        }
380
381        // Check for token lookup abuse
382        let token_lookup_paths: Vec<_> = paths_accessed
383            .keys()
384            .filter(|p| p.contains("token/lookup"))
385            .collect();
386        let total_token_lookups: usize = token_lookup_paths
387            .iter()
388            .map(|p| paths_accessed.get(*p).unwrap_or(&0))
389            .sum();
390
391        if total_token_lookups > 1000 {
392            println!(
393                "⚠️  TOKEN LOOKUP ABUSE: {} token lookups detected",
394                format_number(total_token_lookups)
395            );
396            println!(
397                "   Rate: {:.1} lookups/hour = {:.2} lookups/second",
398                total_token_lookups as f64 / time_span_hours,
399                total_token_lookups as f64 / time_span_hours / 3600.0
400            );
401            println!("   Recommended action: Implement client-side token TTL tracking");
402        }
403
404        // Check for path concentration
405        if let Some((top_path, top_count)) = sorted_paths.first() {
406            let top_path_pct = (**top_count as f64 / entity_operations as f64) * 100.0;
407            if top_path_pct > 30.0 {
408                println!(
409                    "⚠️  PATH CONCENTRATION: {:.1}% of operations on single path",
410                    top_path_pct
411                );
412                println!("   Path: {}", top_path);
413                println!(
414                    "   Recommended action: Review why this path is accessed {} times",
415                    format_number(**top_count)
416                );
417            }
418        }
419
420        // Check for 24/7 activity
421        let hours_with_activity = (0..24)
422            .filter(|h| hour_of_day_stats.contains_key(h))
423            .count();
424        if hours_with_activity >= 20 {
425            println!(
426                "⚠️  24/7 ACTIVITY: Active in {}/24 hours",
427                hours_with_activity
428            );
429            println!("   Suggests automated system or background process");
430        }
431    }
432
433    println!("\n{}", "=".repeat(100));
434
435    Ok(())
436}