vault_audit_tools/commands/
token_lookup_abuse.rs

1//! Token lookup abuse detection.
2//!
3//! **⚠️ DEPRECATED**: Use `token-analysis --abuse-threshold` instead.
4//!
5//! This command has been consolidated into the unified `token-analysis` command.
6//! Use the `--abuse-threshold` flag for abuse detection.
7//!
8//! ```bash
9//! # Old command (deprecated)
10//! vault-audit token-lookup-abuse audit.log --threshold 500
11//!
12//! # New command (recommended)
13//! vault-audit token-analysis audit.log --abuse-threshold 500
14//! ```
15//!
16//! See [`token_analysis`](crate::commands::token_analysis) module for full documentation.
17//!
18//! ---
19//!
20//! Identifies entities performing excessive token lookup operations,
21//! which can indicate misconfigured applications or potential security issues.
22//! Supports multi-file analysis (compressed or uncompressed) for pattern detection over time.
23//!
24//! # Usage
25//!
26//! ```bash
27//! # Single file with default threshold (100 lookups per entity)
28//! vault-audit token-lookup-abuse audit.log
29//! vault-audit token-lookup-abuse audit.log.gz
30//!
31//! # Multi-day analysis with custom threshold (compressed files)
32//! vault-audit token-lookup-abuse logs/*.log.gz --threshold 500
33//! ```
34//!
35//! **Compressed File Support**: Analyzes `.gz` and `.zst` files directly.
36//!
37//! # Output
38//!
39//! Displays entities exceeding the lookup threshold with:
40//! - Entity ID and display name
41//! - Total lookup operations
42//! - Time range (first seen to last seen)
43//! - Rate (lookups per hour)
44//!
45//! Helps identify:
46//! - Applications polling tokens too frequently
47//! - Misconfigured token renewal logic
48//! - Potential reconnaissance activity
49
50use crate::audit::types::AuditEntry;
51use crate::utils::format::format_number;
52use crate::utils::progress::ProgressBar;
53use crate::utils::reader::open_file;
54use crate::utils::time::parse_timestamp;
55use anyhow::{Context, Result};
56use std::collections::HashMap;
57use std::io::{BufRead, BufReader};
58
59/// Tracks token lookup statistics for an entity
60#[derive(Debug)]
61struct TokenData {
62    lookups: usize,
63    first_seen: String,
64    last_seen: String,
65}
66
67impl TokenData {
68    fn new(timestamp: String) -> Self {
69        Self {
70            lookups: 1,
71            first_seen: timestamp.clone(),
72            last_seen: timestamp,
73        }
74    }
75}
76
77fn calculate_time_span_hours(first_seen: &str, last_seen: &str) -> Result<f64> {
78    let first = parse_timestamp(first_seen)
79        .with_context(|| format!("Failed to parse first timestamp: {}", first_seen))?;
80    let last = parse_timestamp(last_seen)
81        .with_context(|| format!("Failed to parse last timestamp: {}", last_seen))?;
82
83    let duration = last.signed_duration_since(first);
84    Ok(duration.num_seconds() as f64 / 3600.0)
85}
86
87pub fn run(log_files: &[String], threshold: usize) -> Result<()> {
88    // entity_id -> accessor -> TokenData
89    let mut patterns: HashMap<String, HashMap<String, TokenData>> = HashMap::new();
90    let mut total_lines = 0;
91    let mut lookup_lines = 0;
92
93    // Process each log file sequentially
94    for (file_idx, log_file) in log_files.iter().enumerate() {
95        eprintln!(
96            "[{}/{}] Processing: {}",
97            file_idx + 1,
98            log_files.len(),
99            log_file
100        );
101
102        // Count lines in file first for accurate progress tracking
103        eprintln!("Scanning file to determine total lines...");
104        let total_file_lines = crate::utils::parallel::count_file_lines(log_file)?;
105
106        let progress = ProgressBar::new(total_file_lines, "Processing");
107
108        let file = open_file(log_file)?;
109        let reader = BufReader::new(file);
110
111        let mut file_lines = 0;
112
113        for line in reader.lines() {
114            file_lines += 1;
115            total_lines += 1;
116            let line = line?;
117
118            // Update progress every 10k lines for smooth animation
119            if file_lines % 10_000 == 0 {
120                progress.update(file_lines);
121            }
122
123            let entry: AuditEntry = match serde_json::from_str(&line) {
124                Ok(e) => e,
125                Err(_) => continue,
126            };
127
128            // Filter for token lookup-self operations
129            let Some(request) = &entry.request else {
130                continue;
131            };
132
133            let path = match &request.path {
134                Some(p) => p.as_str(),
135                None => continue,
136            };
137
138            if path != "auth/token/lookup-self" {
139                continue;
140            }
141
142            let Some(auth) = &entry.auth else { continue };
143
144            let entity_id = match &auth.entity_id {
145                Some(id) => id.as_str(),
146                None => continue,
147            };
148
149            let accessor = match &auth.accessor {
150                Some(a) => a.clone(),
151                None => continue,
152            };
153
154            lookup_lines += 1;
155
156            let entity_map = patterns.entry(entity_id.to_string()).or_default();
157
158            entity_map
159                .entry(accessor)
160                .and_modify(|data| {
161                    data.lookups += 1;
162                    data.last_seen.clone_from(&entry.time);
163                })
164                .or_insert_with(|| TokenData::new(entry.time.clone()));
165        }
166
167        // Ensure 100% progress for this file
168        progress.update(total_file_lines);
169
170        progress.finish_with_message(&format!(
171            "Processed {} lines from this file",
172            format_number(file_lines)
173        ));
174    }
175
176    eprintln!(
177        "\nTotal: Processed {} lines, found {} lookup-self operations",
178        format_number(total_lines),
179        format_number(lookup_lines)
180    );
181
182    // Find entities with excessive lookups
183    let mut excessive_patterns = Vec::new();
184
185    for (entity_id, tokens) in &patterns {
186        for (accessor, data) in tokens {
187            if data.lookups >= threshold {
188                let time_span = calculate_time_span_hours(&data.first_seen, &data.last_seen)
189                    .unwrap_or_else(|err| {
190                        eprintln!(
191                            "Warning: Failed to calculate time span for accessor {}: {}",
192                            accessor, err
193                        );
194                        0.0
195                    });
196                let lookups_per_hour = if time_span > 0.0 {
197                    data.lookups as f64 / time_span
198                } else {
199                    0.0
200                };
201
202                // Truncate accessor for display
203                let accessor_display = if accessor.len() > 23 {
204                    format!("{}...", &accessor[..20])
205                } else {
206                    accessor.clone()
207                };
208
209                excessive_patterns.push((
210                    entity_id.clone(),
211                    accessor_display,
212                    data.lookups,
213                    time_span,
214                    lookups_per_hour,
215                    data.first_seen.clone(),
216                    data.last_seen.clone(),
217                ));
218            }
219        }
220    }
221
222    // Sort by number of lookups (descending)
223    excessive_patterns.sort_by(|a, b| b.2.cmp(&a.2));
224
225    // Print summary
226    println!("\n{}", "=".repeat(120));
227    println!("Token Lookup Pattern Analysis");
228    println!("{}", "=".repeat(120));
229    println!("\nTotal Entities: {}", format_number(patterns.len()));
230    println!(
231        "Entities with ≥{} lookups on same token: {}",
232        threshold,
233        format_number(excessive_patterns.len())
234    );
235
236    if !excessive_patterns.is_empty() {
237        let top = 20;
238        println!("\nTop {} Entities with Excessive Token Lookups:", top);
239        println!("{}", "-".repeat(120));
240        println!(
241            "{:<40} {:<25} {:>10} {:>12} {:>15}",
242            "Entity ID", "Token Accessor", "Lookups", "Time Span", "Rate"
243        );
244        println!(
245            "{:<40} {:<25} {:>10} {:>12} {:>15}",
246            "", "", "", "(hours)", "(lookups/hr)"
247        );
248        println!("{}", "-".repeat(120));
249
250        for (entity_id, accessor, lookups, time_span, rate, _first, _last) in
251            excessive_patterns.iter().take(top)
252        {
253            println!(
254                "{:<40} {:<25} {:>10} {:>12.1} {:>15.1}",
255                entity_id,
256                accessor,
257                format_number(*lookups),
258                time_span,
259                rate
260            );
261        }
262
263        // Statistics
264        let total_excessive_lookups: usize = excessive_patterns.iter().map(|p| p.2).sum();
265        let avg_lookups = total_excessive_lookups as f64 / excessive_patterns.len() as f64;
266        let max_lookups = excessive_patterns[0].2;
267
268        println!("\n{}", "-".repeat(120));
269        println!(
270            "Total Excessive Lookups: {}",
271            format_number(total_excessive_lookups)
272        );
273        println!("Average Lookups per Entity: {:.1}", avg_lookups);
274        println!(
275            "Maximum Lookups (single token): {}",
276            format_number(max_lookups)
277        );
278
279        // Find highest rate
280        let mut by_rate = excessive_patterns.clone();
281        by_rate.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap_or(std::cmp::Ordering::Equal));
282
283        if by_rate[0].4 > 0.0 {
284            println!("\nHighest Rate: {:.1} lookups/hour", by_rate[0].4);
285            println!("  Entity: {}", by_rate[0].0);
286            println!("  Lookups: {}", format_number(by_rate[0].2));
287        }
288    }
289
290    println!("{}", "=".repeat(120));
291
292    Ok(())
293}