vault_audit_tools/commands/
token_export.rs

1//! Token lookup pattern exporter.
2//!
3//! **⚠️ DEPRECATED**: Use `token-analysis --export` instead.
4//!
5//! This command has been consolidated into the unified `token-analysis` command.
6//! Use the `--export` flag for CSV export with per-accessor detail.
7//!
8//! ```bash
9//! # Old command (deprecated)
10//! vault-audit token-export audit.log --output lookups.csv --min-lookups 100
11//!
12//! # New command (recommended)
13//! vault-audit token-analysis audit.log --export lookups.csv --min-operations 100
14//! ```
15//!
16//! See [`token_analysis`](crate::commands::token_analysis) module for full documentation.
17//!
18//! ---
19//!
20//! Exports token lookup patterns to CSV for further analysis.
21//! Identifies entities with high token lookup volumes and their patterns.
22//! Supports multi-file analysis for historical trending.
23//!
24//! # Usage
25//!
26//! ```bash
27//! # Single file export
28//! vault-audit token-export audit.log --output lookups.csv
29//!
30//! # Multi-day export with filtering
31//! vault-audit token-export *.log --output lookups.csv --min-lookups 100
32//! ```
33//!
34//! # Output
35//!
36//! Generates a CSV file with columns:
37//! - Entity ID
38//! - Display name
39//! - Token accessor
40//! - Lookup count
41//! - First seen timestamp
42//! - Last seen timestamp
43//! - Duration (time between first and last seen)
44//!
45//! Useful for:
46//! - Token usage trending
47//! - Token lifetime analysis
48//! - Identifying long-lived vs short-lived tokens
49
50use crate::audit::types::AuditEntry;
51use crate::utils::format::format_number;
52use crate::utils::progress::ProgressBar;
53use crate::utils::reader::open_file;
54use crate::utils::time::parse_timestamp;
55use anyhow::{Context, Result};
56use std::collections::HashMap;
57use std::fs::File;
58use std::io::{BufRead, BufReader};
59
60/// Token activity statistics
61#[derive(Debug, Default)]
62struct TokenData {
63    lookups: usize,
64    first_seen: String,
65    last_seen: String,
66}
67
68/// Entity with associated token data
69#[derive(Debug)]
70struct EntityData {
71    display_name: String,
72    tokens: HashMap<String, TokenData>,
73}
74
75fn calculate_time_span_hours(first: &str, last: &str) -> Result<f64> {
76    let first_dt = parse_timestamp(first)
77        .with_context(|| format!("Failed to parse first timestamp: {}", first))?;
78    let last_dt = parse_timestamp(last)
79        .with_context(|| format!("Failed to parse last timestamp: {}", last))?;
80
81    let duration = last_dt.signed_duration_since(first_dt);
82    Ok(duration.num_seconds() as f64 / 3600.0)
83}
84
85pub fn run(log_files: &[String], output: &str, min_lookups: usize) -> Result<()> {
86    let mut entities: HashMap<String, EntityData> = HashMap::new();
87    let mut total_lines = 0;
88    let mut lookup_count = 0;
89
90    // Process each log file sequentially
91    for (file_idx, log_file) in log_files.iter().enumerate() {
92        eprintln!(
93            "[{}/{}] Processing: {}",
94            file_idx + 1,
95            log_files.len(),
96            log_file
97        );
98
99        // Get file size for progress tracking
100        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
101        let mut progress = if let Some(size) = file_size {
102            ProgressBar::new(size, "Processing")
103        } else {
104            ProgressBar::new_spinner("Processing")
105        };
106
107        let file = open_file(log_file)?;
108        let reader = BufReader::new(file);
109
110        let mut file_lines = 0;
111        let mut bytes_read = 0;
112
113        for line in reader.lines() {
114            file_lines += 1;
115            total_lines += 1;
116            let line = line?;
117            bytes_read += line.len() + 1; // +1 for newline
118
119            // Update progress every 10k lines for smooth animation
120            if file_lines % 10_000 == 0 {
121                if let Some(size) = file_size {
122                    progress.update(bytes_read.min(size)); // Cap at file size
123                } else {
124                    progress.update(file_lines);
125                }
126            }
127
128            let entry: AuditEntry = match serde_json::from_str(&line) {
129                Ok(e) => e,
130                Err(_) => continue,
131            };
132
133            // Filter for token lookup operations
134            let Some(request) = &entry.request else {
135                continue;
136            };
137
138            let path = match &request.path {
139                Some(p) => p.as_str(),
140                None => continue,
141            };
142
143            if !path.starts_with("auth/token/lookup") {
144                continue;
145            }
146
147            let Some(entity_id) = entry.auth.as_ref().and_then(|a| a.entity_id.as_deref()) else {
148                continue;
149            };
150
151            lookup_count += 1;
152
153            let display_name = entry
154                .auth
155                .as_ref()
156                .and_then(|a| a.display_name.as_deref())
157                .unwrap_or("N/A");
158
159            let entity_data = entities
160                .entry(entity_id.to_string())
161                .or_insert_with(|| EntityData {
162                    display_name: display_name.to_string(),
163                    tokens: HashMap::new(),
164                });
165
166            let accessor = entry
167                .auth
168                .as_ref()
169                .and_then(|a| a.accessor.as_deref())
170                .unwrap_or("unknown")
171                .to_string();
172
173            let timestamp = entry.time.clone();
174
175            let token_data = entity_data.tokens.entry(accessor).or_default();
176            token_data.lookups += 1;
177
178            if token_data.first_seen.is_empty() {
179                token_data.first_seen.clone_from(&timestamp);
180            }
181            token_data.last_seen = timestamp;
182        }
183
184        // Ensure 100% progress for this file
185        if let Some(size) = file_size {
186            progress.update(size);
187        }
188
189        progress.finish_with_message(&format!(
190            "Processed {} lines from this file",
191            format_number(file_lines)
192        ));
193    }
194
195    eprintln!(
196        "\nTotal: Processed {} lines, found {} token lookups from {} entities",
197        format_number(total_lines),
198        format_number(lookup_count),
199        format_number(entities.len())
200    );
201
202    // Prepare CSV rows
203    let mut rows: Vec<_> = entities
204        .iter()
205        .flat_map(|(entity_id, entity_data)| {
206            entity_data
207                .tokens
208                .iter()
209                .map(move |(accessor, token_data)| {
210                    let time_span =
211                        calculate_time_span_hours(&token_data.first_seen, &token_data.last_seen)
212                            .unwrap_or_else(|err| {
213                                eprintln!(
214                                    "Warning: Failed to calculate time span for accessor {}: {}",
215                                    accessor, err
216                                );
217                                0.0
218                            });
219                    let lookups_per_hour = if time_span > 0.0 {
220                        token_data.lookups as f64 / time_span
221                    } else {
222                        0.0
223                    };
224
225                    (
226                        entity_id.clone(),
227                        entity_data.display_name.clone(),
228                        accessor.clone(),
229                        token_data.lookups,
230                        time_span,
231                        lookups_per_hour,
232                        token_data.first_seen.clone(),
233                        token_data.last_seen.clone(),
234                    )
235                })
236        })
237        .collect();
238
239    // Sort by total lookups descending
240    rows.sort_by(|a, b| b.3.cmp(&a.3));
241
242    // Filter by minimum lookups
243    rows.retain(|row| row.3 >= min_lookups);
244
245    // Create output directory if needed
246    if let Some(parent) = std::path::Path::new(output).parent() {
247        std::fs::create_dir_all(parent).context("Failed to create output directory")?;
248    }
249
250    // Write CSV
251    let file = File::create(output).context("Failed to create output file")?;
252    let mut writer = csv::Writer::from_writer(file);
253
254    writer.write_record([
255        "entity_id",
256        "display_name",
257        "token_accessor",
258        "total_lookups",
259        "time_span_hours",
260        "lookups_per_hour",
261        "first_seen",
262        "last_seen",
263    ])?;
264
265    for (entity_id, display_name, accessor, lookups, time_span, rate, first, last) in &rows {
266        writer.write_record([
267            entity_id,
268            display_name,
269            accessor,
270            &lookups.to_string(),
271            &format!("{:.2}", time_span),
272            &format!("{:.2}", rate),
273            first,
274            last,
275        ])?;
276    }
277
278    writer.flush()?;
279
280    eprintln!(
281        "\n[SUCCESS] Exported {} token lookup records to: {}",
282        format_number(rows.len()),
283        output
284    );
285
286    // Print summary
287    let total_lookups: usize = rows.iter().map(|r| r.3).sum();
288    let unique_entities = entities.len();
289    let unique_tokens = rows.len();
290
291    eprintln!("\n{}", "=".repeat(80));
292    eprintln!("Summary Statistics:");
293    eprintln!("{}", "-".repeat(80));
294    eprintln!(
295        "Total Token Lookup Operations: {}",
296        format_number(total_lookups)
297    );
298    eprintln!("Unique Entities: {}", format_number(unique_entities));
299    eprintln!("Unique Token Accessors: {}", format_number(unique_tokens));
300    eprintln!(
301        "Average Lookups per Token: {:.1}",
302        total_lookups as f64 / unique_tokens as f64
303    );
304
305    // Top 5 entities by lookup count
306    let mut entity_totals: HashMap<String, usize> = HashMap::new();
307    let mut entity_names: HashMap<String, String> = HashMap::new();
308    for (entity_id, display_name, _, lookups, _, _, _, _) in &rows {
309        *entity_totals.entry(entity_id.clone()).or_insert(0) += lookups;
310        entity_names.insert(entity_id.clone(), display_name.clone());
311    }
312
313    let mut top_entities: Vec<_> = entity_totals.into_iter().collect();
314    top_entities.sort_by(|a, b| b.1.cmp(&a.1));
315
316    eprintln!("\nTop 5 Entities by Lookup Count:");
317    eprintln!("{}", "-".repeat(80));
318    for (i, (entity_id, count)) in top_entities.iter().take(5).enumerate() {
319        let name = entity_names.get(entity_id).unwrap();
320        eprintln!(
321            "{}. {} ({}): {} lookups",
322            i + 1,
323            name,
324            entity_id,
325            format_number(*count)
326        );
327    }
328
329    eprintln!("{}", "=".repeat(80));
330    eprintln!("\n✓ Token lookup data exported to: {}", output);
331
332    Ok(())
333}