vault_audit_tools/commands/
token_export.rs

1//! Token lookup pattern exporter.
2//!
3//! **⚠️ DEPRECATED**: Use `token-analysis --export` instead.
4//!
5//! This command has been consolidated into the unified `token-analysis` command.
6//! Use the `--export` flag for CSV export with per-accessor detail.
7//!
8//! ```bash
9//! # Old command (deprecated)
10//! vault-audit token-export audit.log --output lookups.csv --min-lookups 100
11//!
12//! # New command (recommended)
13//! vault-audit token-analysis audit.log --export lookups.csv --min-operations 100
14//! ```
15//!
16//! See [`token_analysis`](crate::commands::token_analysis) module for full documentation.
17//!
18//! ---
19//!
20//! Exports token lookup patterns to CSV for further analysis.
21//! Identifies entities with high token lookup volumes and their patterns.
22//! Supports multi-file analysis for historical trending.
23//!
24//! # Usage
25//!
26//! ```bash
27//! # Single file export
28//! vault-audit token-export audit.log --output lookups.csv
29//!
30//! # Multi-day export with filtering
31//! vault-audit token-export *.log --output lookups.csv --min-lookups 100
32//! ```
33//!
34//! # Output
35//!
36//! Generates a CSV file with columns:
37//! - Entity ID
38//! - Display name
39//! - Token accessor
40//! - Lookup count
41//! - First seen timestamp
42//! - Last seen timestamp
43//! - Duration (time between first and last seen)
44//!
45//! Useful for:
46//! - Token usage trending
47//! - Token lifetime analysis
48//! - Identifying long-lived vs short-lived tokens
49
50use crate::audit::types::AuditEntry;
51use crate::utils::format::format_number;
52use crate::utils::progress::ProgressBar;
53use crate::utils::reader::open_file;
54use crate::utils::time::parse_timestamp;
55use anyhow::{Context, Result};
56use std::collections::HashMap;
57use std::fs::File;
58use std::io::{BufRead, BufReader};
59
60/// Token activity statistics
61#[derive(Debug, Default)]
62struct TokenData {
63    lookups: usize,
64    first_seen: String,
65    last_seen: String,
66}
67
68/// Entity with associated token data
69#[derive(Debug)]
70struct EntityData {
71    display_name: String,
72    tokens: HashMap<String, TokenData>,
73}
74
75fn calculate_time_span_hours(first: &str, last: &str) -> Result<f64> {
76    let first_dt = parse_timestamp(first)
77        .with_context(|| format!("Failed to parse first timestamp: {}", first))?;
78    let last_dt = parse_timestamp(last)
79        .with_context(|| format!("Failed to parse last timestamp: {}", last))?;
80
81    let duration = last_dt.signed_duration_since(first_dt);
82    Ok(duration.num_seconds() as f64 / 3600.0)
83}
84
85pub fn run(log_files: &[String], output: &str, min_lookups: usize) -> Result<()> {
86    let mut entities: HashMap<String, EntityData> = HashMap::new();
87    let mut total_lines = 0;
88    let mut lookup_count = 0;
89
90    // Process each log file sequentially
91    for (file_idx, log_file) in log_files.iter().enumerate() {
92        eprintln!(
93            "[{}/{}] Processing: {}",
94            file_idx + 1,
95            log_files.len(),
96            log_file
97        );
98
99        // Count lines in file first for accurate progress tracking
100        eprintln!("Scanning file to determine total lines...");
101        let total_file_lines = crate::utils::parallel::count_file_lines(log_file)?;
102
103        let progress = ProgressBar::new(total_file_lines, "Processing");
104
105        let file = open_file(log_file)?;
106        let reader = BufReader::new(file);
107
108        let mut file_lines = 0;
109
110        for line in reader.lines() {
111            file_lines += 1;
112            total_lines += 1;
113            let line = line?;
114
115            // Update progress every 10k lines for smooth animation
116            if file_lines % 10_000 == 0 {
117                progress.update(file_lines);
118            }
119
120            let entry: AuditEntry = match serde_json::from_str(&line) {
121                Ok(e) => e,
122                Err(_) => continue,
123            };
124
125            // Filter for token lookup operations
126            let Some(request) = &entry.request else {
127                continue;
128            };
129
130            let path = match &request.path {
131                Some(p) => p.as_str(),
132                None => continue,
133            };
134
135            if !path.starts_with("auth/token/lookup") {
136                continue;
137            }
138
139            let Some(entity_id) = entry.auth.as_ref().and_then(|a| a.entity_id.as_deref()) else {
140                continue;
141            };
142
143            lookup_count += 1;
144
145            let display_name = entry
146                .auth
147                .as_ref()
148                .and_then(|a| a.display_name.as_deref())
149                .unwrap_or("N/A");
150
151            let entity_data = entities
152                .entry(entity_id.to_string())
153                .or_insert_with(|| EntityData {
154                    display_name: display_name.to_string(),
155                    tokens: HashMap::new(),
156                });
157
158            let accessor = entry
159                .auth
160                .as_ref()
161                .and_then(|a| a.accessor.as_deref())
162                .unwrap_or("unknown")
163                .to_string();
164
165            let timestamp = entry.time.clone();
166
167            let token_data = entity_data.tokens.entry(accessor).or_default();
168            token_data.lookups += 1;
169
170            if token_data.first_seen.is_empty() {
171                token_data.first_seen.clone_from(&timestamp);
172            }
173            token_data.last_seen = timestamp;
174        }
175
176        // Ensure 100% progress for this file
177        progress.update(total_file_lines);
178
179        progress.finish_with_message(&format!(
180            "Processed {} lines from this file",
181            format_number(file_lines)
182        ));
183    }
184
185    eprintln!(
186        "\nTotal: Processed {} lines, found {} token lookups from {} entities",
187        format_number(total_lines),
188        format_number(lookup_count),
189        format_number(entities.len())
190    );
191
192    // Prepare CSV rows
193    let mut rows: Vec<_> = entities
194        .iter()
195        .flat_map(|(entity_id, entity_data)| {
196            entity_data
197                .tokens
198                .iter()
199                .map(move |(accessor, token_data)| {
200                    let time_span =
201                        calculate_time_span_hours(&token_data.first_seen, &token_data.last_seen)
202                            .unwrap_or_else(|err| {
203                                eprintln!(
204                                    "Warning: Failed to calculate time span for accessor {}: {}",
205                                    accessor, err
206                                );
207                                0.0
208                            });
209                    let lookups_per_hour = if time_span > 0.0 {
210                        token_data.lookups as f64 / time_span
211                    } else {
212                        0.0
213                    };
214
215                    (
216                        entity_id.clone(),
217                        entity_data.display_name.clone(),
218                        accessor.clone(),
219                        token_data.lookups,
220                        time_span,
221                        lookups_per_hour,
222                        token_data.first_seen.clone(),
223                        token_data.last_seen.clone(),
224                    )
225                })
226        })
227        .collect();
228
229    // Sort by total lookups descending
230    rows.sort_by(|a, b| b.3.cmp(&a.3));
231
232    // Filter by minimum lookups
233    rows.retain(|row| row.3 >= min_lookups);
234
235    // Create output directory if needed
236    if let Some(parent) = std::path::Path::new(output).parent() {
237        std::fs::create_dir_all(parent).context("Failed to create output directory")?;
238    }
239
240    // Write CSV
241    let file = File::create(output).context("Failed to create output file")?;
242    let mut writer = csv::Writer::from_writer(file);
243
244    writer.write_record([
245        "entity_id",
246        "display_name",
247        "token_accessor",
248        "total_lookups",
249        "time_span_hours",
250        "lookups_per_hour",
251        "first_seen",
252        "last_seen",
253    ])?;
254
255    for (entity_id, display_name, accessor, lookups, time_span, rate, first, last) in &rows {
256        writer.write_record([
257            entity_id,
258            display_name,
259            accessor,
260            &lookups.to_string(),
261            &format!("{:.2}", time_span),
262            &format!("{:.2}", rate),
263            first,
264            last,
265        ])?;
266    }
267
268    writer.flush()?;
269
270    eprintln!(
271        "\n[SUCCESS] Exported {} token lookup records to: {}",
272        format_number(rows.len()),
273        output
274    );
275
276    // Print summary
277    let total_lookups: usize = rows.iter().map(|r| r.3).sum();
278    let unique_entities = entities.len();
279    let unique_tokens = rows.len();
280
281    eprintln!("\n{}", "=".repeat(80));
282    eprintln!("Summary Statistics:");
283    eprintln!("{}", "-".repeat(80));
284    eprintln!(
285        "Total Token Lookup Operations: {}",
286        format_number(total_lookups)
287    );
288    eprintln!("Unique Entities: {}", format_number(unique_entities));
289    eprintln!("Unique Token Accessors: {}", format_number(unique_tokens));
290    eprintln!(
291        "Average Lookups per Token: {:.1}",
292        total_lookups as f64 / unique_tokens as f64
293    );
294
295    // Top 5 entities by lookup count
296    let mut entity_totals: HashMap<String, usize> = HashMap::new();
297    let mut entity_names: HashMap<String, String> = HashMap::new();
298    for (entity_id, display_name, _, lookups, _, _, _, _) in &rows {
299        *entity_totals.entry(entity_id.clone()).or_insert(0) += lookups;
300        entity_names.insert(entity_id.clone(), display_name.clone());
301    }
302
303    let mut top_entities: Vec<_> = entity_totals.into_iter().collect();
304    top_entities.sort_by(|a, b| b.1.cmp(&a.1));
305
306    eprintln!("\nTop 5 Entities by Lookup Count:");
307    eprintln!("{}", "-".repeat(80));
308    for (i, (entity_id, count)) in top_entities.iter().take(5).enumerate() {
309        let name = entity_names.get(entity_id).unwrap();
310        eprintln!(
311            "{}. {} ({}): {} lookups",
312            i + 1,
313            name,
314            entity_id,
315            format_number(*count)
316        );
317    }
318
319    eprintln!("{}", "=".repeat(80));
320    eprintln!("\n✓ Token lookup data exported to: {}", output);
321
322    Ok(())
323}