vault_audit_tools/commands/
token_export.rs

1//! Token lookup pattern exporter.
2//!
3//! Exports token lookup patterns to CSV for further analysis.
4//! Identifies entities with high token lookup volumes and their patterns.
5//! Supports multi-file analysis for historical trending.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Single file export
11//! vault-audit token-export audit.log --output lookups.csv
12//!
13//! # Multi-day export with filtering
14//! vault-audit token-export *.log --output lookups.csv --min-lookups 100
15//! ```
16//!
17//! # Output
18//!
19//! Generates a CSV file with columns:
20//! - Entity ID
21//! - Display name
22//! - Token accessor
23//! - Lookup count
24//! - First seen timestamp
25//! - Last seen timestamp
26//! - Duration (time between first and last seen)
27//!
28//! Useful for:
29//! - Token usage trending
30//! - Token lifetime analysis
31//! - Identifying long-lived vs short-lived tokens
32
33use crate::audit::types::AuditEntry;
34use crate::utils::progress::ProgressBar;
35use crate::utils::reader::open_file;
36use anyhow::{Context, Result};
37use std::collections::HashMap;
38use std::fs::File;
39use std::io::{BufRead, BufReader};
40
41/// Token activity statistics
42#[derive(Debug, Default)]
43struct TokenData {
44    lookups: usize,
45    first_seen: String,
46    last_seen: String,
47}
48
49/// Entity with associated token data
50#[derive(Debug)]
51struct EntityData {
52    display_name: String,
53    tokens: HashMap<String, TokenData>,
54}
55
56pub fn format_number(n: usize) -> String {
57    let s = n.to_string();
58    let mut result = String::new();
59    for (i, c) in s.chars().rev().enumerate() {
60        if i > 0 && i % 3 == 0 {
61            result.push(',');
62        }
63        result.push(c);
64    }
65    result.chars().rev().collect()
66}
67
68fn calculate_time_span_hours(first: &str, last: &str) -> f64 {
69    use chrono::DateTime;
70
71    let first_dt = DateTime::parse_from_rfc3339(first).ok();
72    let last_dt = DateTime::parse_from_rfc3339(last).ok();
73
74    if let (Some(first), Some(last)) = (first_dt, last_dt) {
75        let duration = last.signed_duration_since(first);
76        duration.num_seconds() as f64 / 3600.0
77    } else {
78        0.0
79    }
80}
81
82pub fn run(log_files: &[String], output: &str, min_lookups: usize) -> Result<()> {
83    let mut entities: HashMap<String, EntityData> = HashMap::new();
84    let mut total_lines = 0;
85    let mut lookup_count = 0;
86
87    // Process each log file sequentially
88    for (file_idx, log_file) in log_files.iter().enumerate() {
89        eprintln!(
90            "[{}/{}] Processing: {}",
91            file_idx + 1,
92            log_files.len(),
93            log_file
94        );
95
96        // Get file size for progress tracking
97        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
98        let mut progress = if let Some(size) = file_size {
99            ProgressBar::new(size, "Processing")
100        } else {
101            ProgressBar::new_spinner("Processing")
102        };
103
104        let file = open_file(log_file)?;
105        let reader = BufReader::new(file);
106
107        let mut file_lines = 0;
108        let mut bytes_read = 0;
109
110        for line in reader.lines() {
111            file_lines += 1;
112            total_lines += 1;
113            let line = line?;
114            bytes_read += line.len() + 1; // +1 for newline
115
116            // Update progress every 10k lines for smooth animation
117            if file_lines % 10_000 == 0 {
118                if let Some(size) = file_size {
119                    progress.update(bytes_read.min(size)); // Cap at file size
120                } else {
121                    progress.update(file_lines);
122                }
123            }
124
125            let entry: AuditEntry = match serde_json::from_str(&line) {
126                Ok(e) => e,
127                Err(_) => continue,
128            };
129
130            // Filter for token lookup operations
131            let request = match &entry.request {
132                Some(r) => r,
133                None => continue,
134            };
135
136            let path = match &request.path {
137                Some(p) => p.as_str(),
138                None => continue,
139            };
140
141            if !path.starts_with("auth/token/lookup") {
142                continue;
143            }
144
145            let entity_id = match entry.auth.as_ref().and_then(|a| a.entity_id.as_deref()) {
146                Some(id) => id,
147                None => continue,
148            };
149
150            lookup_count += 1;
151
152            let display_name = entry
153                .auth
154                .as_ref()
155                .and_then(|a| a.display_name.as_deref())
156                .unwrap_or("N/A");
157
158            let entity_data = entities
159                .entry(entity_id.to_string())
160                .or_insert_with(|| EntityData {
161                    display_name: display_name.to_string(),
162                    tokens: HashMap::new(),
163                });
164
165            let accessor = entry
166                .auth
167                .as_ref()
168                .and_then(|a| a.accessor.as_deref())
169                .unwrap_or("unknown")
170                .to_string();
171
172            let timestamp = entry.time.clone();
173
174            let token_data = entity_data.tokens.entry(accessor).or_default();
175            token_data.lookups += 1;
176
177            if token_data.first_seen.is_empty() {
178                token_data.first_seen = timestamp.clone();
179            }
180            token_data.last_seen = timestamp;
181        }
182
183        // Ensure 100% progress for this file
184        if let Some(size) = file_size {
185            progress.update(size);
186        }
187
188        progress.finish_with_message(&format!(
189            "Processed {} lines from this file",
190            format_number(file_lines)
191        ));
192    }
193
194    eprintln!(
195        "\nTotal: Processed {} lines, found {} token lookups from {} entities",
196        format_number(total_lines),
197        format_number(lookup_count),
198        format_number(entities.len())
199    );
200
201    // Prepare CSV rows
202    let mut rows: Vec<_> = entities
203        .iter()
204        .flat_map(|(entity_id, entity_data)| {
205            entity_data
206                .tokens
207                .iter()
208                .map(move |(accessor, token_data)| {
209                    let time_span =
210                        calculate_time_span_hours(&token_data.first_seen, &token_data.last_seen);
211                    let lookups_per_hour = if time_span > 0.0 {
212                        token_data.lookups as f64 / time_span
213                    } else {
214                        0.0
215                    };
216
217                    (
218                        entity_id.clone(),
219                        entity_data.display_name.clone(),
220                        accessor.clone(),
221                        token_data.lookups,
222                        time_span,
223                        lookups_per_hour,
224                        token_data.first_seen.clone(),
225                        token_data.last_seen.clone(),
226                    )
227                })
228        })
229        .collect();
230
231    // Sort by total lookups descending
232    rows.sort_by(|a, b| b.3.cmp(&a.3));
233
234    // Filter by minimum lookups
235    rows.retain(|row| row.3 >= min_lookups);
236
237    // Create output directory if needed
238    if let Some(parent) = std::path::Path::new(output).parent() {
239        std::fs::create_dir_all(parent).context("Failed to create output directory")?;
240    }
241
242    // Write CSV
243    let file = File::create(output).context("Failed to create output file")?;
244    let mut writer = csv::Writer::from_writer(file);
245
246    writer.write_record([
247        "entity_id",
248        "display_name",
249        "token_accessor",
250        "total_lookups",
251        "time_span_hours",
252        "lookups_per_hour",
253        "first_seen",
254        "last_seen",
255    ])?;
256
257    for (entity_id, display_name, accessor, lookups, time_span, rate, first, last) in &rows {
258        writer.write_record([
259            entity_id,
260            display_name,
261            accessor,
262            &lookups.to_string(),
263            &format!("{:.2}", time_span),
264            &format!("{:.2}", rate),
265            first,
266            last,
267        ])?;
268    }
269
270    writer.flush()?;
271
272    eprintln!(
273        "\n[SUCCESS] Exported {} token lookup records to: {}",
274        format_number(rows.len()),
275        output
276    );
277
278    // Print summary
279    let total_lookups: usize = rows.iter().map(|r| r.3).sum();
280    let unique_entities = entities.len();
281    let unique_tokens = rows.len();
282
283    eprintln!("\n{}", "=".repeat(80));
284    eprintln!("Summary Statistics:");
285    eprintln!("{}", "-".repeat(80));
286    eprintln!(
287        "Total Token Lookup Operations: {}",
288        format_number(total_lookups)
289    );
290    eprintln!("Unique Entities: {}", format_number(unique_entities));
291    eprintln!("Unique Token Accessors: {}", format_number(unique_tokens));
292    eprintln!(
293        "Average Lookups per Token: {:.1}",
294        total_lookups as f64 / unique_tokens as f64
295    );
296
297    // Top 5 entities by lookup count
298    let mut entity_totals: HashMap<String, usize> = HashMap::new();
299    let mut entity_names: HashMap<String, String> = HashMap::new();
300    for (entity_id, display_name, _, lookups, _, _, _, _) in &rows {
301        *entity_totals.entry(entity_id.clone()).or_insert(0) += lookups;
302        entity_names.insert(entity_id.clone(), display_name.clone());
303    }
304
305    let mut top_entities: Vec<_> = entity_totals.into_iter().collect();
306    top_entities.sort_by(|a, b| b.1.cmp(&a.1));
307
308    eprintln!("\nTop 5 Entities by Lookup Count:");
309    eprintln!("{}", "-".repeat(80));
310    for (i, (entity_id, count)) in top_entities.iter().take(5).enumerate() {
311        let name = entity_names.get(entity_id).unwrap();
312        eprintln!(
313            "{}. {} ({}): {} lookups",
314            i + 1,
315            name,
316            entity_id,
317            format_number(*count)
318        );
319    }
320
321    eprintln!("{}", "=".repeat(80));
322    eprintln!("\n✓ Token lookup data exported to: {}", output);
323
324    Ok(())
325}