vault_audit_tools/commands/
entity_timeline.rs1use crate::audit::types::AuditEntry;
32use crate::utils::progress::ProgressBar;
33use crate::utils::reader::open_file;
34use anyhow::Result;
35use chrono::{DateTime, Timelike, Utc};
36use std::collections::HashMap;
37use std::io::{BufRead, BufReader};
38
39fn format_number(n: usize) -> String {
40 let s = n.to_string();
41 let mut result = String::new();
42 for (i, c) in s.chars().rev().enumerate() {
43 if i > 0 && i % 3 == 0 {
44 result.push(',');
45 }
46 result.push(c);
47 }
48 result.chars().rev().collect()
49}
50
51#[derive(Clone)]
52#[allow(dead_code)]
53struct Operation {
54 timestamp: DateTime<Utc>,
55 path: String,
56 operation: String,
57}
58
59pub fn run(log_files: &[String], entity_id: &str, display_name: &Option<String>) -> Result<()> {
60 println!("Analyzing timeline for entity: {}", entity_id);
61 if let Some(name) = display_name {
62 println!("Display name: {}", name);
63 }
64 println!();
65
66 let mut operations_by_hour: HashMap<String, HashMap<String, usize>> = HashMap::new();
67 let mut operations_by_type: HashMap<String, usize> = HashMap::new();
68 let mut paths_accessed: HashMap<String, usize> = HashMap::new();
69 let mut operations_timeline: Vec<Operation> = Vec::new();
70 let mut total_lines = 0;
71 let mut entity_operations = 0;
72
73 for (file_idx, log_file) in log_files.iter().enumerate() {
75 eprintln!(
76 "[{}/{}] Processing: {}",
77 file_idx + 1,
78 log_files.len(),
79 log_file
80 );
81
82 let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
84 let mut progress = if let Some(size) = file_size {
85 ProgressBar::new(size, "Processing")
86 } else {
87 ProgressBar::new_spinner("Processing")
88 };
89
90 let file = open_file(log_file)?;
91 let reader = BufReader::new(file);
92
93 let mut file_lines = 0;
94 let mut bytes_read = 0;
95
96 for line in reader.lines() {
97 file_lines += 1;
98 total_lines += 1;
99 let line = line?;
100 bytes_read += line.len() + 1; if file_lines % 10_000 == 0 {
103 if let Some(size) = file_size {
104 progress.update(bytes_read.min(size));
105 } else {
106 progress.update(file_lines);
107 }
108 }
109
110 let entry: AuditEntry = match serde_json::from_str(&line) {
111 Ok(e) => e,
112 Err(_) => continue,
113 };
114
115 let entry_entity_id = match &entry.auth {
117 Some(auth) => match &auth.entity_id {
118 Some(id) => id.as_str(),
119 None => continue,
120 },
121 None => continue,
122 };
123
124 if entry_entity_id != entity_id {
125 continue;
126 }
127
128 entity_operations += 1;
129
130 let path = entry
131 .request
132 .as_ref()
133 .and_then(|r| r.path.as_deref())
134 .unwrap_or("")
135 .to_string();
136 let operation = entry
137 .request
138 .as_ref()
139 .and_then(|r| r.operation.as_deref())
140 .unwrap_or("")
141 .to_string();
142
143 if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&entry.time) {
144 let ts_utc = ts.with_timezone(&Utc);
145
146 let hour_key = ts_utc.format("%Y-%m-%d %H:00").to_string();
148 let hour_ops = operations_by_hour.entry(hour_key).or_default();
149 *hour_ops.entry("total".to_string()).or_insert(0) += 1;
150 *hour_ops.entry(operation.clone()).or_insert(0) += 1;
151
152 operations_timeline.push(Operation {
154 timestamp: ts_utc,
155 path: path.clone(),
156 operation: operation.clone(),
157 });
158 }
159
160 *operations_by_type.entry(operation).or_insert(0) += 1;
162
163 *paths_accessed.entry(path).or_insert(0) += 1;
165 }
166
167 if let Some(size) = file_size {
169 progress.update(size);
170 }
171
172 progress.finish_with_message(&format!(
173 "Processed {} lines from this file",
174 format_number(file_lines)
175 ));
176 }
177
178 eprintln!(
179 "\nTotal: Processed {} lines, found {} operations for entity: {}",
180 format_number(total_lines),
181 format_number(entity_operations),
182 entity_id
183 );
184
185 if entity_operations == 0 {
186 println!("\nNo operations found for this entity!");
187 return Ok(());
188 }
189
190 operations_timeline.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
192
193 let (first_op, last_op, time_span_hours) = if !operations_timeline.is_empty() {
195 let first = operations_timeline.first().unwrap().timestamp;
196 let last = operations_timeline.last().unwrap().timestamp;
197 let span = (last - first).num_seconds() as f64 / 3600.0;
198 (first, last, span)
199 } else {
200 return Ok(());
201 };
202
203 println!("\n{}", "=".repeat(100));
205 println!("TIMELINE ANALYSIS FOR: {}", entity_id);
206 println!("{}", "=".repeat(100));
207
208 println!("\n1. SUMMARY STATISTICS");
210 println!("{}", "-".repeat(100));
211 println!("Total operations: {}", format_number(entity_operations));
212 println!(
213 "Time span: {:.2} hours ({:.2} days)",
214 time_span_hours,
215 time_span_hours / 24.0
216 );
217 println!(
218 "Average rate: {:.1} operations/hour ({:.2}/minute)",
219 entity_operations as f64 / time_span_hours,
220 entity_operations as f64 / time_span_hours / 60.0
221 );
222 println!("First operation: {}", first_op.format("%Y-%m-%d %H:%M:%S"));
223 println!("Last operation: {}", last_op.format("%Y-%m-%d %H:%M:%S"));
224
225 println!("\n2. OPERATION TYPE DISTRIBUTION");
227 println!("{}", "-".repeat(100));
228 println!("{:<30} {:<15} {:<15}", "Operation", "Count", "Percentage");
229 println!("{}", "-".repeat(100));
230
231 let mut sorted_ops: Vec<_> = operations_by_type.iter().collect();
232 sorted_ops.sort_by(|a, b| b.1.cmp(a.1));
233
234 for (op, count) in sorted_ops {
235 let percentage = (*count as f64 / entity_operations as f64) * 100.0;
236 println!(
237 "{:<30} {:<15} {:<15.2}%",
238 op,
239 format_number(*count),
240 percentage
241 );
242 }
243
244 println!("\n3. TOP 30 PATHS ACCESSED");
246 println!("{}", "-".repeat(100));
247 println!("{:<70} {:<15} {:<15}", "Path", "Count", "Percentage");
248 println!("{}", "-".repeat(100));
249
250 let mut sorted_paths: Vec<_> = paths_accessed.iter().collect();
251 sorted_paths.sort_by(|a, b| b.1.cmp(a.1));
252
253 for (path, count) in sorted_paths.iter().take(30) {
254 let percentage = (**count as f64 / entity_operations as f64) * 100.0;
255 let display_path = if path.len() > 68 {
256 format!("{}...", &path[..65])
257 } else {
258 path.to_string()
259 };
260 println!(
261 "{:<70} {:<15} {:<15.2}%",
262 display_path,
263 format_number(**count),
264 percentage
265 );
266 }
267
268 println!("\n4. HOURLY ACTIVITY PATTERN (Top 30 Hours)");
270 println!("{}", "-".repeat(100));
271 println!(
272 "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
273 "Hour", "Total Ops", "read", "update", "list", "Other"
274 );
275 println!("{}", "-".repeat(100));
276
277 let mut sorted_hours: Vec<_> = operations_by_hour.iter().collect();
278 sorted_hours.sort_by(|a, b| {
279 let a_total = a.1.get("total").unwrap_or(&0);
280 let b_total = b.1.get("total").unwrap_or(&0);
281 b_total.cmp(a_total)
282 });
283
284 for (hour, ops) in sorted_hours.iter().take(30) {
285 let total = *ops.get("total").unwrap_or(&0);
286 let read = *ops.get("read").unwrap_or(&0);
287 let update = *ops.get("update").unwrap_or(&0);
288 let list_op = *ops.get("list").unwrap_or(&0);
289 let other = total - read - update - list_op;
290
291 println!(
292 "{:<20} {:<12} {:<10} {:<10} {:<10} {:<10}",
293 hour,
294 format_number(total),
295 format_number(read),
296 format_number(update),
297 format_number(list_op),
298 format_number(other)
299 );
300 }
301
302 println!("\n5. ACTIVITY DISTRIBUTION BY HOUR OF DAY");
304 println!("{}", "-".repeat(100));
305
306 let mut hour_of_day_stats: HashMap<u32, usize> = HashMap::new();
307 for op in &operations_timeline {
308 let hour = op.timestamp.hour();
309 *hour_of_day_stats.entry(hour).or_insert(0) += 1;
310 }
311
312 println!("{:<10} {:<15} {:<50}", "Hour", "Operations", "Bar Chart");
313 println!("{}", "-".repeat(100));
314
315 let max_ops_in_hour = hour_of_day_stats.values().max().copied().unwrap_or(1);
316
317 for hour in 0..24 {
318 let ops = *hour_of_day_stats.get(&hour).unwrap_or(&0);
319 let bar_length = if max_ops_in_hour > 0 {
320 (ops * 50) / max_ops_in_hour
321 } else {
322 0
323 };
324 let bar = "█".repeat(bar_length);
325 println!("{:02}:00 {:<15} {}", hour, format_number(ops), bar);
326 }
327
328 println!("\n6. PEAK ACTIVITY WINDOWS");
330 println!("{}", "-".repeat(100));
331
332 let mut window_counts: HashMap<DateTime<Utc>, usize> = HashMap::new();
333
334 for op in &operations_timeline {
335 let minute = (op.timestamp.minute() / 5) * 5;
337 let window_start = op
338 .timestamp
339 .with_minute(minute)
340 .unwrap()
341 .with_second(0)
342 .unwrap()
343 .with_nanosecond(0)
344 .unwrap();
345 *window_counts.entry(window_start).or_insert(0) += 1;
346 }
347
348 let mut sorted_windows: Vec<_> = window_counts.iter().collect();
349 sorted_windows.sort_by(|a, b| b.1.cmp(a.1));
350
351 println!(
352 "{:<25} {:<15} {:<20}",
353 "5-Minute Window", "Operations", "Rate (ops/sec)"
354 );
355 println!("{}", "-".repeat(100));
356
357 for (window, count) in sorted_windows.iter().take(20) {
358 let rate = **count as f64 / 300.0;
359 println!(
360 "{:<25} {:<15} {:<20.3}",
361 window.format("%Y-%m-%d %H:%M"),
362 format_number(**count),
363 rate
364 );
365 }
366
367 println!("\n7. BEHAVIORAL PATTERNS");
369 println!("{}", "-".repeat(100));
370
371 if time_span_hours > 1.0 {
372 let ops_per_hour = entity_operations as f64 / time_span_hours;
373 if ops_per_hour > 100.0 {
374 println!(
375 "⚠️ HIGH FREQUENCY: {:.0} operations/hour suggests automated polling",
376 ops_per_hour
377 );
378 println!(" Recommended action: Implement caching or increase polling interval");
379 }
380
381 let token_lookup_paths: Vec<_> = paths_accessed
383 .keys()
384 .filter(|p| p.contains("token/lookup"))
385 .collect();
386 let total_token_lookups: usize = token_lookup_paths
387 .iter()
388 .map(|p| paths_accessed.get(*p).unwrap_or(&0))
389 .sum();
390
391 if total_token_lookups > 1000 {
392 println!(
393 "⚠️ TOKEN LOOKUP ABUSE: {} token lookups detected",
394 format_number(total_token_lookups)
395 );
396 println!(
397 " Rate: {:.1} lookups/hour = {:.2} lookups/second",
398 total_token_lookups as f64 / time_span_hours,
399 total_token_lookups as f64 / time_span_hours / 3600.0
400 );
401 println!(" Recommended action: Implement client-side token TTL tracking");
402 }
403
404 if let Some((top_path, top_count)) = sorted_paths.first() {
406 let top_path_pct = (**top_count as f64 / entity_operations as f64) * 100.0;
407 if top_path_pct > 30.0 {
408 println!(
409 "⚠️ PATH CONCENTRATION: {:.1}% of operations on single path",
410 top_path_pct
411 );
412 println!(" Path: {}", top_path);
413 println!(
414 " Recommended action: Review why this path is accessed {} times",
415 format_number(**top_count)
416 );
417 }
418 }
419
420 let hours_with_activity = (0..24)
422 .filter(|h| hour_of_day_stats.contains_key(h))
423 .count();
424 if hours_with_activity >= 20 {
425 println!(
426 "⚠️ 24/7 ACTIVITY: Active in {}/24 hours",
427 hours_with_activity
428 );
429 println!(" Suggests automated system or background process");
430 }
431 }
432
433 println!("\n{}", "=".repeat(100));
434
435 Ok(())
436}