vault_audit_tools/commands/
token_analysis.rs

1//! Unified token analysis command.
2//!
3//! Consolidates token operations tracking, abuse detection, and data export
4//! into a single powerful command. Supports multi-file analysis (compressed
5//! or uncompressed) for comprehensive token usage analysis.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Overview of all token operations by entity
11//! vault-audit token-analysis logs/*.log
12//! vault-audit token-analysis logs/*.log.gz
13//!
14//! # Detect token lookup abuse (default threshold: 1000)
15//! vault-audit token-analysis logs/*.log --abuse-threshold 1000
16//!
17//! # Filter specific operation types
18//! vault-audit token-analysis logs/*.log --filter lookup
19//! vault-audit token-analysis logs/*.log --filter create,renew
20//!
21//! # Export to CSV for further analysis
22//! vault-audit token-analysis logs/*.log --export token_data.csv
23//!
24//! # Export only high-volume token accessors (individual tokens)
25//! vault-audit token-analysis logs/*.log --min-operations 1000 --export high_volume_tokens.csv
26//!
27//! # Combine abuse detection with export
28//! vault-audit token-analysis logs/*.log --abuse-threshold 500 --export abuse_patterns.csv
29//! ```
30//!
31//! **Compressed File Support**: Automatically handles `.gz` and `.zst` files.
32//!
33//! # Understanding Entities vs Accessors
34//!
35//! - **Entity**: A user or service identity (e.g., "fg-PIOP0SRVDEVOPS")
36//!   - One entity can have multiple tokens over time
37//!   - Summary view shows per-entity totals
38//!
39//! - **Accessor**: A unique token identifier (individual token)
40//!   - Each accessor belongs to one entity
41//!   - CSV export shows per-accessor detail with timestamps
42//!   - Example: An entity with 100k operations might have 3 accessors with 50k, 30k, 20k operations each
43//!
44//! # Output Modes
45//!
46//! ## Default: Operations Summary (Per-Entity)
47//! Displays aggregated breakdown of all token operations by entity:
48//! - lookup-self, renew-self, revoke-self, create, login, other
49//! - Shows top 50 entities sorted by total operations
50//! - One row per entity (combines all tokens for that entity)
51//!
52//! ## Abuse Detection Mode (--abuse-threshold)
53//! Identifies entities exceeding lookup threshold:
54//! - Entity details and lookup count
55//! - Time range and rate (lookups/hour)
56//! - Helps find misconfigured apps or compromised credentials
57//!
58//! ## Export Mode (--export) - Per-Accessor Detail
59//! Generates CSV with per-token accessor granularity:
60//! - entity_id, display_name, accessor (token identifier)
61//! - operations, first_seen, last_seen, duration_hours
62//! - Shows individual token lifecycle and usage patterns
63//! - Use --min-operations to filter low-activity tokens
64//! - First/last seen timestamps
65//! - Duration
66
67use crate::audit::types::AuditEntry;
68use crate::utils::progress::ProgressBar;
69use crate::utils::reader::open_file;
70use crate::utils::time::parse_timestamp;
71use anyhow::{Context, Result};
72use std::collections::HashMap;
73use std::fs::File;
74use std::io::{BufRead, BufReader, Write};
75
76/// Type alias for the complex return type from process_logs
77type ProcessLogsResult = (
78    HashMap<String, TokenOps>,
79    HashMap<String, EntityAccessors>,
80    usize,
81);
82
83/// Token operation statistics for a single entity
84#[derive(Debug, Default)]
85struct TokenOps {
86    lookup_self: usize,
87    renew_self: usize,
88    revoke_self: usize,
89    create: usize,
90    login: usize,
91    other: usize,
92    display_name: Option<String>,
93    username: Option<String>,
94    first_seen: Option<String>,
95    last_seen: Option<String>,
96}
97
98impl TokenOps {
99    fn total(&self) -> usize {
100        self.lookup_self
101            + self.renew_self
102            + self.revoke_self
103            + self.create
104            + self.login
105            + self.other
106    }
107
108    fn update_timestamps(&mut self, timestamp: &str) {
109        if self.first_seen.is_none() {
110            self.first_seen = Some(timestamp.to_string());
111        }
112        self.last_seen = Some(timestamp.to_string());
113    }
114}
115
116/// Token accessor-specific data for detailed analysis
117#[derive(Debug, Default)]
118struct AccessorData {
119    operations: usize,
120    first_seen: String,
121    last_seen: String,
122}
123
124/// Tracks per-accessor token activity for an entity
125#[derive(Debug, Default)]
126struct EntityAccessors {
127    accessors: HashMap<String, AccessorData>,
128    display_name: Option<String>,
129}
130
131fn format_number(n: usize) -> String {
132    let s = n.to_string();
133    let mut result = String::new();
134    for (i, c) in s.chars().rev().enumerate() {
135        if i > 0 && i % 3 == 0 {
136            result.push(',');
137        }
138        result.push(c);
139    }
140    result.chars().rev().collect()
141}
142
143fn calculate_time_span_hours(first_seen: &str, last_seen: &str) -> f64 {
144    match (parse_timestamp(first_seen), parse_timestamp(last_seen)) {
145        (Ok(first), Ok(last)) => {
146            let duration = last.signed_duration_since(first);
147            duration.num_seconds() as f64 / 3600.0
148        }
149        _ => 0.0,
150    }
151}
152
153/// Process audit logs and collect token operation data
154fn process_logs(
155    log_files: &[String],
156    operation_filter: Option<&Vec<String>>,
157) -> Result<ProcessLogsResult> {
158    let mut token_ops: HashMap<String, TokenOps> = HashMap::new();
159    let mut accessor_data: HashMap<String, EntityAccessors> = HashMap::new();
160    let mut total_lines = 0;
161
162    for (file_idx, log_file) in log_files.iter().enumerate() {
163        eprintln!(
164            "[{}/{}] Processing: {}",
165            file_idx + 1,
166            log_files.len(),
167            log_file
168        );
169
170        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
171        let mut progress = if let Some(size) = file_size {
172            ProgressBar::new(size, "Processing")
173        } else {
174            ProgressBar::new_spinner("Processing")
175        };
176
177        let mut file_lines = 0;
178        let mut bytes_read = 0;
179
180        let file = open_file(log_file)?;
181        let reader = BufReader::new(file);
182
183        for line in reader.lines() {
184            file_lines += 1;
185            total_lines += 1;
186            let line = line?;
187            bytes_read += line.len() + 1;
188
189            if file_lines % 10_000 == 0 {
190                if let Some(size) = file_size {
191                    progress.update(bytes_read.min(size));
192                } else {
193                    progress.update(file_lines);
194                }
195            }
196
197            let entry: AuditEntry = match serde_json::from_str(&line) {
198                Ok(e) => e,
199                Err(_) => continue,
200            };
201
202            // Skip if no request or auth info
203            let request = match entry.request {
204                Some(r) => r,
205                None => continue,
206            };
207
208            let auth = match entry.auth {
209                Some(a) => a,
210                None => continue,
211            };
212
213            let entity_id = match auth.entity_id {
214                Some(ref id) if !id.is_empty() => id.clone(),
215                _ => continue,
216            };
217
218            // Determine operation type
219            let path = request.path.as_deref().unwrap_or("");
220            let operation = request.operation.as_deref().unwrap_or("");
221
222            let op_type = if path == "auth/token/lookup-self" {
223                "lookup"
224            } else if path == "auth/token/renew-self" {
225                "renew"
226            } else if path == "auth/token/revoke-self" {
227                "revoke"
228            } else if path == "auth/token/create" {
229                "create"
230            } else if path.starts_with("auth/") && operation == "update" {
231                "login"
232            } else if path.starts_with("auth/token/") {
233                "other"
234            } else {
235                continue; // Not a token operation
236            };
237
238            // Apply operation filter if specified
239            if let Some(filters) = operation_filter {
240                if !filters.iter().any(|f| op_type.contains(f.as_str())) {
241                    continue;
242                }
243            }
244
245            // Update token operations summary
246            let ops = token_ops.entry(entity_id.clone()).or_default();
247            match op_type {
248                "lookup" => ops.lookup_self += 1,
249                "renew" => ops.renew_self += 1,
250                "revoke" => ops.revoke_self += 1,
251                "create" => ops.create += 1,
252                "login" => ops.login += 1,
253                _ => ops.other += 1,
254            }
255
256            if ops.display_name.is_none() {
257                ops.display_name = auth.display_name.clone();
258            }
259            if ops.username.is_none() {
260                ops.username = auth.metadata.as_ref().and_then(|m| {
261                    m.get("username")
262                        .and_then(|v| v.as_str())
263                        .map(|s| s.to_string())
264                });
265            }
266            ops.update_timestamps(&entry.time);
267
268            // Track accessor-level data for detailed analysis
269            if let Some(accessor) = auth.accessor {
270                let entity_acc = accessor_data.entry(entity_id.clone()).or_default();
271                if entity_acc.display_name.is_none() {
272                    entity_acc.display_name = auth.display_name.clone();
273                }
274
275                let acc_data =
276                    entity_acc
277                        .accessors
278                        .entry(accessor)
279                        .or_insert_with(|| AccessorData {
280                            operations: 0,
281                            first_seen: entry.time.clone(),
282                            last_seen: entry.time.clone(),
283                        });
284                acc_data.operations += 1;
285                acc_data.last_seen = entry.time;
286            }
287        }
288
289        progress.finish();
290        eprintln!("  Processed {} lines", format_number(file_lines));
291    }
292
293    Ok((token_ops, accessor_data, total_lines))
294}
295
296/// Display operations summary
297fn display_summary(token_ops: &HashMap<String, TokenOps>, total_lines: usize) {
298    let mut ops_vec: Vec<_> = token_ops.iter().collect();
299    ops_vec.sort_by(|a, b| b.1.total().cmp(&a.1.total()));
300
301    // Calculate totals
302    let total_ops: usize = ops_vec.iter().map(|(_, ops)| ops.total()).sum();
303    let total_lookup: usize = ops_vec.iter().map(|(_, ops)| ops.lookup_self).sum();
304    let total_renew: usize = ops_vec.iter().map(|(_, ops)| ops.renew_self).sum();
305    let total_revoke: usize = ops_vec.iter().map(|(_, ops)| ops.revoke_self).sum();
306    let total_create: usize = ops_vec.iter().map(|(_, ops)| ops.create).sum();
307    let total_login: usize = ops_vec.iter().map(|(_, ops)| ops.login).sum();
308    let total_other: usize = ops_vec.iter().map(|(_, ops)| ops.other).sum();
309
310    println!("Total: Processed {} lines\n", format_number(total_lines));
311    println!("{}", "=".repeat(150));
312    println!(
313        "{:<30} {:<25} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}",
314        "Display Name",
315        "Username",
316        "Total",
317        "Lookup",
318        "Renew",
319        "Revoke",
320        "Create",
321        "Login",
322        "Other"
323    );
324    println!("{}", "=".repeat(150));
325
326    // Show top 50
327    for (_, ops) in ops_vec.iter().take(50) {
328        let display = ops.display_name.as_deref().unwrap_or("");
329        let username = ops.username.as_deref().unwrap_or("");
330
331        println!(
332            "{:<30} {:<25} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}",
333            if display.len() > 30 {
334                &display[..30]
335            } else {
336                display
337            },
338            if username.len() > 25 {
339                &username[..25]
340            } else {
341                username
342            },
343            format_number(ops.total()),
344            format_number(ops.lookup_self),
345            format_number(ops.renew_self),
346            format_number(ops.revoke_self),
347            format_number(ops.create),
348            format_number(ops.login),
349            format_number(ops.other)
350        );
351    }
352
353    println!("{}", "=".repeat(150));
354    println!(
355        "TOTAL (top 50)                                                       {:>10}",
356        format_number(total_ops)
357    );
358    println!(
359        "TOTAL ENTITIES                                                       {:>10}",
360        format_number(token_ops.len())
361    );
362    println!("{}", "=".repeat(150));
363    println!();
364    println!("Operation Type Breakdown:");
365    println!("{}", "-".repeat(60));
366    println!(
367        "Lookup (lookup-self):   {:>12}  ({:>5.1}%)",
368        format_number(total_lookup),
369        (total_lookup as f64 / total_ops as f64) * 100.0
370    );
371    println!(
372        "Renew (renew-self):     {:>12}  ({:>5.1}%)",
373        format_number(total_renew),
374        (total_renew as f64 / total_ops as f64) * 100.0
375    );
376    println!(
377        "Revoke (revoke-self):   {:>12}  ({:>5.1}%)",
378        format_number(total_revoke),
379        (total_revoke as f64 / total_ops as f64) * 100.0
380    );
381    println!(
382        "Create (child token):   {:>12}  ({:>5.1}%)",
383        format_number(total_create),
384        (total_create as f64 / total_ops as f64) * 100.0
385    );
386    println!(
387        "Login (auth token):     {:>12}  ({:>5.1}%)",
388        format_number(total_login),
389        (total_login as f64 / total_ops as f64) * 100.0
390    );
391    println!(
392        "Other:                  {:>12}  ({:>5.1}%)",
393        format_number(total_other),
394        (total_other as f64 / total_ops as f64) * 100.0
395    );
396    println!("{}", "-".repeat(60));
397    println!("TOTAL:              {:>16}", format_number(total_ops));
398}
399
400/// Display abuse detection results
401fn display_abuse(token_ops: &HashMap<String, TokenOps>, threshold: usize) {
402    let mut abusers: Vec<_> = token_ops
403        .iter()
404        .filter(|(_, ops)| ops.lookup_self >= threshold)
405        .collect();
406
407    abusers.sort_by(|a, b| b.1.lookup_self.cmp(&a.1.lookup_self));
408
409    if abusers.is_empty() {
410        println!(
411            "\n No entities found exceeding threshold of {} lookup operations",
412            format_number(threshold)
413        );
414        return;
415    }
416
417    println!(
418        "\n Found {} entities exceeding {} lookup operations:\n",
419        abusers.len(),
420        format_number(threshold)
421    );
422
423    println!(
424        "{:<50} {:>12} {:>20} {:>12}",
425        "Entity", "Lookups", "Time Span", "Rate/Hour"
426    );
427    println!("{}", "=".repeat(106));
428
429    for (entity_id, ops) in abusers {
430        let display = ops
431            .display_name
432            .as_deref()
433            .or(ops.username.as_deref())
434            .unwrap_or(entity_id);
435
436        let time_span = if let (Some(first), Some(last)) = (&ops.first_seen, &ops.last_seen) {
437            calculate_time_span_hours(first, last)
438        } else {
439            0.0
440        };
441
442        let rate = if time_span > 0.0 {
443            ops.lookup_self as f64 / time_span
444        } else {
445            0.0
446        };
447
448        println!(
449            "{:<50} {:>12} {:>17.1}h {:>12.1}",
450            if display.len() > 50 {
451                format!("{}...", &display[..47])
452            } else {
453                display.to_string()
454            },
455            format_number(ops.lookup_self),
456            time_span,
457            rate
458        );
459    }
460}
461
462/// Export data to CSV
463fn export_csv(
464    accessor_data: &HashMap<String, EntityAccessors>,
465    output: &str,
466    min_operations: usize,
467) -> Result<()> {
468    let mut file = File::create(output)
469        .with_context(|| format!("Failed to create output file: {}", output))?;
470
471    writeln!(
472        file,
473        "entity_id,display_name,accessor,operations,first_seen,last_seen,duration_hours"
474    )?;
475
476    let mut rows: Vec<_> = accessor_data
477        .iter()
478        .flat_map(|(entity_id, entity_data)| {
479            entity_data
480                .accessors
481                .iter()
482                .map(move |(accessor, data)| (entity_id, &entity_data.display_name, accessor, data))
483        })
484        .filter(|(_, _, _, data)| data.operations >= min_operations)
485        .collect();
486
487    rows.sort_by(|a, b| b.3.operations.cmp(&a.3.operations));
488
489    for (entity_id, display_name, accessor, data) in rows {
490        let duration = calculate_time_span_hours(&data.first_seen, &data.last_seen);
491        let display = display_name.as_deref().unwrap_or(entity_id);
492
493        writeln!(
494            file,
495            "{},{},{},{},{},{},{:.2}",
496            entity_id,
497            display,
498            accessor,
499            data.operations,
500            data.first_seen,
501            data.last_seen,
502            duration
503        )?;
504    }
505
506    Ok(())
507}
508
509/// Main entry point for token analysis command
510pub fn run(
511    log_files: &[String],
512    abuse_threshold: Option<usize>,
513    operation_filter: Option<Vec<String>>,
514    export_path: Option<&str>,
515    min_operations: usize,
516) -> Result<()> {
517    eprintln!("Token Analysis");
518    eprintln!("   Files: {}", log_files.len());
519    if let Some(ref filters) = operation_filter {
520        eprintln!("   Filter: {}", filters.join(", "));
521    }
522    if let Some(threshold) = abuse_threshold {
523        eprintln!("   Abuse threshold: {}", format_number(threshold));
524    }
525    if let Some(output) = export_path {
526        eprintln!("   Export: {}", output);
527    }
528    eprintln!();
529
530    let (token_ops, accessor_data, total_lines) =
531        process_logs(log_files, operation_filter.as_ref())?;
532
533    eprintln!("\n Processed {} total lines", format_number(total_lines));
534    eprintln!(
535        "  {} unique entities with token operations",
536        format_number(token_ops.len())
537    );
538
539    // Display based on mode
540    if let Some(threshold) = abuse_threshold {
541        display_abuse(&token_ops, threshold);
542    } else {
543        display_summary(&token_ops, total_lines);
544    }
545
546    // Export if requested
547    if let Some(output) = export_path {
548        export_csv(&accessor_data, output, min_operations)?;
549        eprintln!("\n Exported data to: {}", output);
550    }
551
552    Ok(())
553}