vault_audit_tools/commands/
token_export.rs

1//! Token lookup pattern exporter.
2//!
3//! Exports token lookup patterns to CSV for further analysis.
4//! Identifies entities with high token lookup volumes and their patterns.
5//! Supports multi-file analysis for historical trending.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Single file export
11//! vault-audit token-export audit.log --output lookups.csv
12//!
13//! # Multi-day export with filtering
14//! vault-audit token-export *.log --output lookups.csv --min-lookups 100
15//! ```
16//!
17//! # Output
18//!
19//! Generates a CSV file with columns:
20//! - Entity ID
21//! - Display name
22//! - Token accessor
23//! - Lookup count
24//! - First seen timestamp
25//! - Last seen timestamp
26//! - Duration (time between first and last seen)
27//!
28//! Useful for:
29//! - Token usage trending
30//! - Token lifetime analysis
31//! - Identifying long-lived vs short-lived tokens
32
33use crate::audit::types::AuditEntry;
34use crate::utils::progress::ProgressBar;
35use anyhow::{Context, Result};
36use std::collections::HashMap;
37use std::fs::File;
38use std::io::{BufRead, BufReader};
39
40/// Token activity statistics
41#[derive(Debug, Default)]
42struct TokenData {
43    lookups: usize,
44    first_seen: String,
45    last_seen: String,
46}
47
48/// Entity with associated token data
49#[derive(Debug)]
50struct EntityData {
51    display_name: String,
52    tokens: HashMap<String, TokenData>,
53}
54
55pub fn format_number(n: usize) -> String {
56    let s = n.to_string();
57    let mut result = String::new();
58    for (i, c) in s.chars().rev().enumerate() {
59        if i > 0 && i % 3 == 0 {
60            result.push(',');
61        }
62        result.push(c);
63    }
64    result.chars().rev().collect()
65}
66
67fn calculate_time_span_hours(first: &str, last: &str) -> f64 {
68    use chrono::DateTime;
69
70    let first_dt = DateTime::parse_from_rfc3339(first).ok();
71    let last_dt = DateTime::parse_from_rfc3339(last).ok();
72
73    if let (Some(first), Some(last)) = (first_dt, last_dt) {
74        let duration = last.signed_duration_since(first);
75        duration.num_seconds() as f64 / 3600.0
76    } else {
77        0.0
78    }
79}
80
81pub fn run(log_files: &[String], output: &str, min_lookups: usize) -> Result<()> {
82    let mut entities: HashMap<String, EntityData> = HashMap::new();
83    let mut total_lines = 0;
84    let mut lookup_count = 0;
85
86    // Process each log file sequentially
87    for (file_idx, log_file) in log_files.iter().enumerate() {
88        eprintln!(
89            "[{}/{}] Processing: {}",
90            file_idx + 1,
91            log_files.len(),
92            log_file
93        );
94
95        // Get file size for progress tracking
96        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
97        let mut progress = if let Some(size) = file_size {
98            ProgressBar::new(size, "Processing")
99        } else {
100            ProgressBar::new_spinner("Processing")
101        };
102
103        let file = File::open(log_file)?;
104        let reader = BufReader::new(file);
105
106        let mut file_lines = 0;
107        let mut bytes_read = 0;
108
109        for line in reader.lines() {
110            file_lines += 1;
111            total_lines += 1;
112            let line = line?;
113            bytes_read += line.len() + 1; // +1 for newline
114
115            // Update progress every 10k lines for smooth animation
116            if file_lines % 10_000 == 0 {
117                if let Some(size) = file_size {
118                    progress.update(bytes_read.min(size)); // Cap at file size
119                } else {
120                    progress.update(file_lines);
121                }
122            }
123
124            let entry: AuditEntry = match serde_json::from_str(&line) {
125                Ok(e) => e,
126                Err(_) => continue,
127            };
128
129            // Filter for token lookup operations
130            let request = match &entry.request {
131                Some(r) => r,
132                None => continue,
133            };
134
135            let path = match &request.path {
136                Some(p) => p.as_str(),
137                None => continue,
138            };
139
140            if !path.starts_with("auth/token/lookup") {
141                continue;
142            }
143
144            let entity_id = match entry.auth.as_ref().and_then(|a| a.entity_id.as_deref()) {
145                Some(id) => id,
146                None => continue,
147            };
148
149            lookup_count += 1;
150
151            let display_name = entry
152                .auth
153                .as_ref()
154                .and_then(|a| a.display_name.as_deref())
155                .unwrap_or("N/A");
156
157            let entity_data = entities
158                .entry(entity_id.to_string())
159                .or_insert_with(|| EntityData {
160                    display_name: display_name.to_string(),
161                    tokens: HashMap::new(),
162                });
163
164            let accessor = entry
165                .auth
166                .as_ref()
167                .and_then(|a| a.accessor.as_deref())
168                .unwrap_or("unknown")
169                .to_string();
170
171            let timestamp = entry.time.clone();
172
173            let token_data = entity_data.tokens.entry(accessor).or_default();
174            token_data.lookups += 1;
175
176            if token_data.first_seen.is_empty() {
177                token_data.first_seen = timestamp.clone();
178            }
179            token_data.last_seen = timestamp;
180        }
181
182        // Ensure 100% progress for this file
183        if let Some(size) = file_size {
184            progress.update(size);
185        }
186
187        progress.finish_with_message(&format!(
188            "Processed {} lines from this file",
189            format_number(file_lines)
190        ));
191    }
192
193    eprintln!(
194        "\nTotal: Processed {} lines, found {} token lookups from {} entities",
195        format_number(total_lines),
196        format_number(lookup_count),
197        format_number(entities.len())
198    );
199
200    // Prepare CSV rows
201    let mut rows: Vec<_> = entities
202        .iter()
203        .flat_map(|(entity_id, entity_data)| {
204            entity_data
205                .tokens
206                .iter()
207                .map(move |(accessor, token_data)| {
208                    let time_span =
209                        calculate_time_span_hours(&token_data.first_seen, &token_data.last_seen);
210                    let lookups_per_hour = if time_span > 0.0 {
211                        token_data.lookups as f64 / time_span
212                    } else {
213                        0.0
214                    };
215
216                    (
217                        entity_id.clone(),
218                        entity_data.display_name.clone(),
219                        accessor.clone(),
220                        token_data.lookups,
221                        time_span,
222                        lookups_per_hour,
223                        token_data.first_seen.clone(),
224                        token_data.last_seen.clone(),
225                    )
226                })
227        })
228        .collect();
229
230    // Sort by total lookups descending
231    rows.sort_by(|a, b| b.3.cmp(&a.3));
232
233    // Filter by minimum lookups
234    rows.retain(|row| row.3 >= min_lookups);
235
236    // Create output directory if needed
237    if let Some(parent) = std::path::Path::new(output).parent() {
238        std::fs::create_dir_all(parent).context("Failed to create output directory")?;
239    }
240
241    // Write CSV
242    let file = File::create(output).context("Failed to create output file")?;
243    let mut writer = csv::Writer::from_writer(file);
244
245    writer.write_record([
246        "entity_id",
247        "display_name",
248        "token_accessor",
249        "total_lookups",
250        "time_span_hours",
251        "lookups_per_hour",
252        "first_seen",
253        "last_seen",
254    ])?;
255
256    for (entity_id, display_name, accessor, lookups, time_span, rate, first, last) in &rows {
257        writer.write_record([
258            entity_id,
259            display_name,
260            accessor,
261            &lookups.to_string(),
262            &format!("{:.2}", time_span),
263            &format!("{:.2}", rate),
264            first,
265            last,
266        ])?;
267    }
268
269    writer.flush()?;
270
271    eprintln!(
272        "\n[SUCCESS] Exported {} token lookup records to: {}",
273        format_number(rows.len()),
274        output
275    );
276
277    // Print summary
278    let total_lookups: usize = rows.iter().map(|r| r.3).sum();
279    let unique_entities = entities.len();
280    let unique_tokens = rows.len();
281
282    eprintln!("\n{}", "=".repeat(80));
283    eprintln!("Summary Statistics:");
284    eprintln!("{}", "-".repeat(80));
285    eprintln!(
286        "Total Token Lookup Operations: {}",
287        format_number(total_lookups)
288    );
289    eprintln!("Unique Entities: {}", format_number(unique_entities));
290    eprintln!("Unique Token Accessors: {}", format_number(unique_tokens));
291    eprintln!(
292        "Average Lookups per Token: {:.1}",
293        total_lookups as f64 / unique_tokens as f64
294    );
295
296    // Top 5 entities by lookup count
297    let mut entity_totals: HashMap<String, usize> = HashMap::new();
298    let mut entity_names: HashMap<String, String> = HashMap::new();
299    for (entity_id, display_name, _, lookups, _, _, _, _) in &rows {
300        *entity_totals.entry(entity_id.clone()).or_insert(0) += lookups;
301        entity_names.insert(entity_id.clone(), display_name.clone());
302    }
303
304    let mut top_entities: Vec<_> = entity_totals.into_iter().collect();
305    top_entities.sort_by(|a, b| b.1.cmp(&a.1));
306
307    eprintln!("\nTop 5 Entities by Lookup Count:");
308    eprintln!("{}", "-".repeat(80));
309    for (i, (entity_id, count)) in top_entities.iter().take(5).enumerate() {
310        let name = entity_names.get(entity_id).unwrap();
311        eprintln!(
312            "{}. {} ({}): {} lookups",
313            i + 1,
314            name,
315            entity_id,
316            format_number(*count)
317        );
318    }
319
320    eprintln!("{}", "=".repeat(80));
321    eprintln!("\n✓ Token lookup data exported to: {}", output);
322
323    Ok(())
324}