vault_audit_tools/commands/
token_export.rs

1//! Token lookup pattern exporter.
2//!
3//! **⚠️ DEPRECATED**: Use `token-analysis --export` instead.
4//!
5//! This command has been consolidated into the unified `token-analysis` command.
6//! Use the `--export` flag for CSV export with per-accessor detail.
7//!
8//! ```bash
9//! # Old command (deprecated)
10//! vault-audit token-export audit.log --output lookups.csv --min-lookups 100
11//!
12//! # New command (recommended)
13//! vault-audit token-analysis audit.log --export lookups.csv --min-operations 100
14//! ```
15//!
16//! See [`token_analysis`](crate::commands::token_analysis) module for full documentation.
17//!
18//! ---
19//!
20//! Exports token lookup patterns to CSV for further analysis.
21//! Identifies entities with high token lookup volumes and their patterns.
22//! Supports multi-file analysis for historical trending.
23//!
24//! # Usage
25//!
26//! ```bash
27//! # Single file export
28//! vault-audit token-export audit.log --output lookups.csv
29//!
30//! # Multi-day export with filtering
31//! vault-audit token-export *.log --output lookups.csv --min-lookups 100
32//! ```
33//!
34//! # Output
35//!
36//! Generates a CSV file with columns:
37//! - Entity ID
38//! - Display name
39//! - Token accessor
40//! - Lookup count
41//! - First seen timestamp
42//! - Last seen timestamp
43//! - Duration (time between first and last seen)
44//!
45//! Useful for:
46//! - Token usage trending
47//! - Token lifetime analysis
48//! - Identifying long-lived vs short-lived tokens
49
50use crate::audit::types::AuditEntry;
51use crate::utils::progress::ProgressBar;
52use crate::utils::reader::open_file;
53use anyhow::{Context, Result};
54use std::collections::HashMap;
55use std::fs::File;
56use std::io::{BufRead, BufReader};
57
58/// Token activity statistics
59#[derive(Debug, Default)]
60struct TokenData {
61    lookups: usize,
62    first_seen: String,
63    last_seen: String,
64}
65
66/// Entity with associated token data
67#[derive(Debug)]
68struct EntityData {
69    display_name: String,
70    tokens: HashMap<String, TokenData>,
71}
72
73pub fn format_number(n: usize) -> String {
74    let s = n.to_string();
75    let mut result = String::new();
76    for (i, c) in s.chars().rev().enumerate() {
77        if i > 0 && i % 3 == 0 {
78            result.push(',');
79        }
80        result.push(c);
81    }
82    result.chars().rev().collect()
83}
84
85fn calculate_time_span_hours(first: &str, last: &str) -> f64 {
86    use chrono::DateTime;
87
88    let first_dt = DateTime::parse_from_rfc3339(first).ok();
89    let last_dt = DateTime::parse_from_rfc3339(last).ok();
90
91    if let (Some(first), Some(last)) = (first_dt, last_dt) {
92        let duration = last.signed_duration_since(first);
93        duration.num_seconds() as f64 / 3600.0
94    } else {
95        0.0
96    }
97}
98
99pub fn run(log_files: &[String], output: &str, min_lookups: usize) -> Result<()> {
100    let mut entities: HashMap<String, EntityData> = HashMap::new();
101    let mut total_lines = 0;
102    let mut lookup_count = 0;
103
104    // Process each log file sequentially
105    for (file_idx, log_file) in log_files.iter().enumerate() {
106        eprintln!(
107            "[{}/{}] Processing: {}",
108            file_idx + 1,
109            log_files.len(),
110            log_file
111        );
112
113        // Get file size for progress tracking
114        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
115        let mut progress = if let Some(size) = file_size {
116            ProgressBar::new(size, "Processing")
117        } else {
118            ProgressBar::new_spinner("Processing")
119        };
120
121        let file = open_file(log_file)?;
122        let reader = BufReader::new(file);
123
124        let mut file_lines = 0;
125        let mut bytes_read = 0;
126
127        for line in reader.lines() {
128            file_lines += 1;
129            total_lines += 1;
130            let line = line?;
131            bytes_read += line.len() + 1; // +1 for newline
132
133            // Update progress every 10k lines for smooth animation
134            if file_lines % 10_000 == 0 {
135                if let Some(size) = file_size {
136                    progress.update(bytes_read.min(size)); // Cap at file size
137                } else {
138                    progress.update(file_lines);
139                }
140            }
141
142            let entry: AuditEntry = match serde_json::from_str(&line) {
143                Ok(e) => e,
144                Err(_) => continue,
145            };
146
147            // Filter for token lookup operations
148            let request = match &entry.request {
149                Some(r) => r,
150                None => continue,
151            };
152
153            let path = match &request.path {
154                Some(p) => p.as_str(),
155                None => continue,
156            };
157
158            if !path.starts_with("auth/token/lookup") {
159                continue;
160            }
161
162            let entity_id = match entry.auth.as_ref().and_then(|a| a.entity_id.as_deref()) {
163                Some(id) => id,
164                None => continue,
165            };
166
167            lookup_count += 1;
168
169            let display_name = entry
170                .auth
171                .as_ref()
172                .and_then(|a| a.display_name.as_deref())
173                .unwrap_or("N/A");
174
175            let entity_data = entities
176                .entry(entity_id.to_string())
177                .or_insert_with(|| EntityData {
178                    display_name: display_name.to_string(),
179                    tokens: HashMap::new(),
180                });
181
182            let accessor = entry
183                .auth
184                .as_ref()
185                .and_then(|a| a.accessor.as_deref())
186                .unwrap_or("unknown")
187                .to_string();
188
189            let timestamp = entry.time.clone();
190
191            let token_data = entity_data.tokens.entry(accessor).or_default();
192            token_data.lookups += 1;
193
194            if token_data.first_seen.is_empty() {
195                token_data.first_seen = timestamp.clone();
196            }
197            token_data.last_seen = timestamp;
198        }
199
200        // Ensure 100% progress for this file
201        if let Some(size) = file_size {
202            progress.update(size);
203        }
204
205        progress.finish_with_message(&format!(
206            "Processed {} lines from this file",
207            format_number(file_lines)
208        ));
209    }
210
211    eprintln!(
212        "\nTotal: Processed {} lines, found {} token lookups from {} entities",
213        format_number(total_lines),
214        format_number(lookup_count),
215        format_number(entities.len())
216    );
217
218    // Prepare CSV rows
219    let mut rows: Vec<_> = entities
220        .iter()
221        .flat_map(|(entity_id, entity_data)| {
222            entity_data
223                .tokens
224                .iter()
225                .map(move |(accessor, token_data)| {
226                    let time_span =
227                        calculate_time_span_hours(&token_data.first_seen, &token_data.last_seen);
228                    let lookups_per_hour = if time_span > 0.0 {
229                        token_data.lookups as f64 / time_span
230                    } else {
231                        0.0
232                    };
233
234                    (
235                        entity_id.clone(),
236                        entity_data.display_name.clone(),
237                        accessor.clone(),
238                        token_data.lookups,
239                        time_span,
240                        lookups_per_hour,
241                        token_data.first_seen.clone(),
242                        token_data.last_seen.clone(),
243                    )
244                })
245        })
246        .collect();
247
248    // Sort by total lookups descending
249    rows.sort_by(|a, b| b.3.cmp(&a.3));
250
251    // Filter by minimum lookups
252    rows.retain(|row| row.3 >= min_lookups);
253
254    // Create output directory if needed
255    if let Some(parent) = std::path::Path::new(output).parent() {
256        std::fs::create_dir_all(parent).context("Failed to create output directory")?;
257    }
258
259    // Write CSV
260    let file = File::create(output).context("Failed to create output file")?;
261    let mut writer = csv::Writer::from_writer(file);
262
263    writer.write_record([
264        "entity_id",
265        "display_name",
266        "token_accessor",
267        "total_lookups",
268        "time_span_hours",
269        "lookups_per_hour",
270        "first_seen",
271        "last_seen",
272    ])?;
273
274    for (entity_id, display_name, accessor, lookups, time_span, rate, first, last) in &rows {
275        writer.write_record([
276            entity_id,
277            display_name,
278            accessor,
279            &lookups.to_string(),
280            &format!("{:.2}", time_span),
281            &format!("{:.2}", rate),
282            first,
283            last,
284        ])?;
285    }
286
287    writer.flush()?;
288
289    eprintln!(
290        "\n[SUCCESS] Exported {} token lookup records to: {}",
291        format_number(rows.len()),
292        output
293    );
294
295    // Print summary
296    let total_lookups: usize = rows.iter().map(|r| r.3).sum();
297    let unique_entities = entities.len();
298    let unique_tokens = rows.len();
299
300    eprintln!("\n{}", "=".repeat(80));
301    eprintln!("Summary Statistics:");
302    eprintln!("{}", "-".repeat(80));
303    eprintln!(
304        "Total Token Lookup Operations: {}",
305        format_number(total_lookups)
306    );
307    eprintln!("Unique Entities: {}", format_number(unique_entities));
308    eprintln!("Unique Token Accessors: {}", format_number(unique_tokens));
309    eprintln!(
310        "Average Lookups per Token: {:.1}",
311        total_lookups as f64 / unique_tokens as f64
312    );
313
314    // Top 5 entities by lookup count
315    let mut entity_totals: HashMap<String, usize> = HashMap::new();
316    let mut entity_names: HashMap<String, String> = HashMap::new();
317    for (entity_id, display_name, _, lookups, _, _, _, _) in &rows {
318        *entity_totals.entry(entity_id.clone()).or_insert(0) += lookups;
319        entity_names.insert(entity_id.clone(), display_name.clone());
320    }
321
322    let mut top_entities: Vec<_> = entity_totals.into_iter().collect();
323    top_entities.sort_by(|a, b| b.1.cmp(&a.1));
324
325    eprintln!("\nTop 5 Entities by Lookup Count:");
326    eprintln!("{}", "-".repeat(80));
327    for (i, (entity_id, count)) in top_entities.iter().take(5).enumerate() {
328        let name = entity_names.get(entity_id).unwrap();
329        eprintln!(
330            "{}. {} ({}): {} lookups",
331            i + 1,
332            name,
333            entity_id,
334            format_number(*count)
335        );
336    }
337
338    eprintln!("{}", "=".repeat(80));
339    eprintln!("\n✓ Token lookup data exported to: {}", output);
340
341    Ok(())
342}