vault_audit_tools/commands/
token_lookup_abuse.rs

1//! Token lookup abuse detection.
2//!
3//! **⚠️ DEPRECATED**: Use `token-analysis --abuse-threshold` instead.
4//!
5//! This command has been consolidated into the unified `token-analysis` command.
6//! Use the `--abuse-threshold` flag for abuse detection.
7//!
8//! ```bash
9//! # Old command (deprecated)
10//! vault-audit token-lookup-abuse audit.log --threshold 500
11//!
12//! # New command (recommended)
13//! vault-audit token-analysis audit.log --abuse-threshold 500
14//! ```
15//!
16//! See [`token_analysis`](crate::commands::token_analysis) module for full documentation.
17//!
18//! ---
19//!
20//! Identifies entities performing excessive token lookup operations,
21//! which can indicate misconfigured applications or potential security issues.
22//! Supports multi-file analysis (compressed or uncompressed) for pattern detection over time.
23//!
24//! # Usage
25//!
26//! ```bash
27//! # Single file with default threshold (100 lookups per entity)
28//! vault-audit token-lookup-abuse audit.log
29//! vault-audit token-lookup-abuse audit.log.gz
30//!
31//! # Multi-day analysis with custom threshold (compressed files)
32//! vault-audit token-lookup-abuse logs/*.log.gz --threshold 500
33//! ```
34//!
35//! **Compressed File Support**: Analyzes `.gz` and `.zst` files directly.
36//!
37//! # Output
38//!
39//! Displays entities exceeding the lookup threshold with:
40//! - Entity ID and display name
41//! - Total lookup operations
42//! - Time range (first seen to last seen)
43//! - Rate (lookups per hour)
44//!
45//! Helps identify:
46//! - Applications polling tokens too frequently
47//! - Misconfigured token renewal logic
48//! - Potential reconnaissance activity
49
50use crate::audit::types::AuditEntry;
51use crate::utils::progress::ProgressBar;
52use crate::utils::reader::open_file;
53use crate::utils::time::parse_timestamp;
54use anyhow::Result;
55use std::collections::HashMap;
56use std::io::{BufRead, BufReader};
57
58/// Tracks token lookup statistics for an entity
59#[derive(Debug)]
60struct TokenData {
61    lookups: usize,
62    first_seen: String,
63    last_seen: String,
64}
65
66impl TokenData {
67    fn new(timestamp: String) -> Self {
68        Self {
69            lookups: 1,
70            first_seen: timestamp.clone(),
71            last_seen: timestamp,
72        }
73    }
74}
75
76fn calculate_time_span_hours(first_seen: &str, last_seen: &str) -> f64 {
77    match (parse_timestamp(first_seen), parse_timestamp(last_seen)) {
78        (Ok(first), Ok(last)) => {
79            let duration = last.signed_duration_since(first);
80            duration.num_seconds() as f64 / 3600.0
81        }
82        _ => 0.0,
83    }
84}
85
86fn format_number(n: usize) -> String {
87    let s = n.to_string();
88    let mut result = String::new();
89    for (i, c) in s.chars().rev().enumerate() {
90        if i > 0 && i % 3 == 0 {
91            result.push(',');
92        }
93        result.push(c);
94    }
95    result.chars().rev().collect()
96}
97
98pub fn run(log_files: &[String], threshold: usize) -> Result<()> {
99    // entity_id -> accessor -> TokenData
100    let mut patterns: HashMap<String, HashMap<String, TokenData>> = HashMap::new();
101    let mut total_lines = 0;
102    let mut lookup_lines = 0;
103
104    // Process each log file sequentially
105    for (file_idx, log_file) in log_files.iter().enumerate() {
106        eprintln!(
107            "[{}/{}] Processing: {}",
108            file_idx + 1,
109            log_files.len(),
110            log_file
111        );
112
113        // Get file size for progress tracking
114        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
115        let mut progress = if let Some(size) = file_size {
116            ProgressBar::new(size, "Processing")
117        } else {
118            ProgressBar::new_spinner("Processing")
119        };
120
121        let file = open_file(log_file)?;
122        let reader = BufReader::new(file);
123
124        let mut file_lines = 0;
125        let mut bytes_read = 0;
126
127        for line in reader.lines() {
128            file_lines += 1;
129            total_lines += 1;
130            let line = line?;
131            bytes_read += line.len() + 1; // +1 for newline
132
133            // Update progress every 10k lines for smooth animation
134            if file_lines % 10_000 == 0 {
135                if let Some(size) = file_size {
136                    progress.update(bytes_read.min(size)); // Cap at file size
137                } else {
138                    progress.update(file_lines);
139                }
140            }
141
142            let entry: AuditEntry = match serde_json::from_str(&line) {
143                Ok(e) => e,
144                Err(_) => continue,
145            };
146
147            // Filter for token lookup-self operations
148            let request = match &entry.request {
149                Some(r) => r,
150                None => continue,
151            };
152
153            let path = match &request.path {
154                Some(p) => p.as_str(),
155                None => continue,
156            };
157
158            if path != "auth/token/lookup-self" {
159                continue;
160            }
161
162            let auth = match &entry.auth {
163                Some(a) => a,
164                None => continue,
165            };
166
167            let entity_id = match &auth.entity_id {
168                Some(id) => id.as_str(),
169                None => continue,
170            };
171
172            let accessor = match &auth.accessor {
173                Some(a) => a.clone(),
174                None => continue,
175            };
176
177            lookup_lines += 1;
178
179            let entity_map = patterns.entry(entity_id.to_string()).or_default();
180
181            entity_map
182                .entry(accessor)
183                .and_modify(|data| {
184                    data.lookups += 1;
185                    data.last_seen = entry.time.clone();
186                })
187                .or_insert_with(|| TokenData::new(entry.time.clone()));
188        }
189
190        // Ensure 100% progress for this file
191        if let Some(size) = file_size {
192            progress.update(size);
193        }
194
195        progress.finish_with_message(&format!(
196            "Processed {} lines from this file",
197            format_number(file_lines)
198        ));
199    }
200
201    eprintln!(
202        "\nTotal: Processed {} lines, found {} lookup-self operations",
203        format_number(total_lines),
204        format_number(lookup_lines)
205    );
206
207    // Find entities with excessive lookups
208    let mut excessive_patterns = Vec::new();
209
210    for (entity_id, tokens) in &patterns {
211        for (accessor, data) in tokens {
212            if data.lookups >= threshold {
213                let time_span = calculate_time_span_hours(&data.first_seen, &data.last_seen);
214                let lookups_per_hour = if time_span > 0.0 {
215                    data.lookups as f64 / time_span
216                } else {
217                    0.0
218                };
219
220                // Truncate accessor for display
221                let accessor_display = if accessor.len() > 23 {
222                    format!("{}...", &accessor[..20])
223                } else {
224                    accessor.clone()
225                };
226
227                excessive_patterns.push((
228                    entity_id.clone(),
229                    accessor_display,
230                    data.lookups,
231                    time_span,
232                    lookups_per_hour,
233                    data.first_seen.clone(),
234                    data.last_seen.clone(),
235                ));
236            }
237        }
238    }
239
240    // Sort by number of lookups (descending)
241    excessive_patterns.sort_by(|a, b| b.2.cmp(&a.2));
242
243    // Print summary
244    println!("\n{}", "=".repeat(120));
245    println!("Token Lookup Pattern Analysis");
246    println!("{}", "=".repeat(120));
247    println!("\nTotal Entities: {}", format_number(patterns.len()));
248    println!(
249        "Entities with ≥{} lookups on same token: {}",
250        threshold,
251        format_number(excessive_patterns.len())
252    );
253
254    if !excessive_patterns.is_empty() {
255        let top = 20;
256        println!("\nTop {} Entities with Excessive Token Lookups:", top);
257        println!("{}", "-".repeat(120));
258        println!(
259            "{:<40} {:<25} {:>10} {:>12} {:>15}",
260            "Entity ID", "Token Accessor", "Lookups", "Time Span", "Rate"
261        );
262        println!(
263            "{:<40} {:<25} {:>10} {:>12} {:>15}",
264            "", "", "", "(hours)", "(lookups/hr)"
265        );
266        println!("{}", "-".repeat(120));
267
268        for (entity_id, accessor, lookups, time_span, rate, _first, _last) in
269            excessive_patterns.iter().take(top)
270        {
271            println!(
272                "{:<40} {:<25} {:>10} {:>12.1} {:>15.1}",
273                entity_id,
274                accessor,
275                format_number(*lookups),
276                time_span,
277                rate
278            );
279        }
280
281        // Statistics
282        let total_excessive_lookups: usize = excessive_patterns.iter().map(|p| p.2).sum();
283        let avg_lookups = total_excessive_lookups as f64 / excessive_patterns.len() as f64;
284        let max_lookups = excessive_patterns[0].2;
285
286        println!("\n{}", "-".repeat(120));
287        println!(
288            "Total Excessive Lookups: {}",
289            format_number(total_excessive_lookups)
290        );
291        println!("Average Lookups per Entity: {:.1}", avg_lookups);
292        println!(
293            "Maximum Lookups (single token): {}",
294            format_number(max_lookups)
295        );
296
297        // Find highest rate
298        let mut by_rate = excessive_patterns.clone();
299        by_rate.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap_or(std::cmp::Ordering::Equal));
300
301        if by_rate[0].4 > 0.0 {
302            println!("\nHighest Rate: {:.1} lookups/hour", by_rate[0].4);
303            println!("  Entity: {}", by_rate[0].0);
304            println!("  Lookups: {}", format_number(by_rate[0].2));
305        }
306    }
307
308    println!("{}", "=".repeat(120));
309
310    Ok(())
311}