vault_audit_tools/commands/
preprocess_entities.rs

1//! Entity mapping preprocessor.
2//!
3//! Extracts entity-to-alias mappings from audit logs and exports to JSON or CSV,
4//! creating a baseline for subsequent entity analysis.
5//! Supports multi-file processing for comprehensive entity mapping.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Single file preprocessing (JSON default)
11//! vault-audit preprocess-entities audit.log --output entity-mappings.json
12//!
13//! # Multi-day comprehensive mapping (CSV)
14//! vault-audit preprocess-entities logs/*.log --output entity-mappings.csv --format csv
15//!
16//! # JSON format for entity-creation command
17//! vault-audit preprocess-entities logs/*.log --output entity-mappings.json --format json
18//! ```
19//!
20//! # Output
21//!
22//! Generates JSON or CSV containing:
23//! - Entity ID
24//! - Display name
25//! - Mount path and accessor
26//! - Username (if available)
27//! - Login count
28//! - First and last seen timestamps
29//!
30//! This output can be used as a baseline for:
31//! - `entity-creation` command (accepts both CSV and JSON)
32//! - `client-activity` command (JSON format)
33//! - External analysis tools
34//! - Historical trending
35
36use crate::audit::types::AuditEntry;
37use crate::utils::progress::ProgressBar;
38use anyhow::{Context, Result};
39use serde::{Deserialize, Serialize};
40use std::collections::HashMap;
41use std::fs::File;
42use std::io::{BufRead, BufReader, Write};
43
44/// Entity mapping with login statistics
45#[derive(Debug, Serialize, Deserialize)]
46struct EntityMapping {
47    display_name: String,
48    mount_path: String,
49    mount_accessor: String,
50    #[serde(skip_serializing_if = "Option::is_none")]
51    username: Option<String>,
52    login_count: usize,
53    first_seen: String,
54    last_seen: String,
55}
56
57pub fn run(log_files: &[String], output: &str, format: &str) -> Result<()> {
58    eprintln!("Preprocessing audit logs...");
59    eprintln!("Extracting entity → display_name mappings from login events...\n");
60
61    let mut entity_map: HashMap<String, EntityMapping> = HashMap::new();
62    let mut login_events = 0;
63    let mut lines_processed = 0;
64
65    // Process each log file sequentially
66    for (file_idx, log_file) in log_files.iter().enumerate() {
67        eprintln!(
68            "[{}/{}] Processing: {}",
69            file_idx + 1,
70            log_files.len(),
71            log_file
72        );
73
74        // Get file size for progress tracking
75        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
76
77        let file = File::open(log_file)
78            .with_context(|| format!("Failed to open audit log file: {}", log_file))?;
79        let reader = BufReader::new(file);
80
81        let mut progress = if let Some(size) = file_size {
82            ProgressBar::new(size, "Processing")
83        } else {
84            ProgressBar::new_spinner("Processing")
85        };
86        let mut bytes_read = 0;
87        let mut file_lines = 0;
88
89        for line in reader.lines() {
90            file_lines += 1;
91            lines_processed += 1;
92            let line = line?;
93            bytes_read += line.len() + 1; // +1 for newline
94
95            // Update progress every 10k lines for smooth animation
96            if file_lines % 10_000 == 0 {
97                if let Some(size) = file_size {
98                    progress.update(bytes_read.min(size)); // Cap at file size
99                } else {
100                    progress.update(file_lines);
101                }
102            }
103            let entry: AuditEntry = match serde_json::from_str(&line) {
104                Ok(e) => e,
105                Err(_) => continue,
106            };
107
108            // Look for login events in auth paths
109            let request = match &entry.request {
110                Some(r) => r,
111                None => continue,
112            };
113
114            let path = match &request.path {
115                Some(p) => p,
116                None => continue,
117            };
118
119            if !path.starts_with("auth/") {
120                continue;
121            }
122
123            if !path.contains("/login") {
124                continue;
125            }
126
127            // Skip if no auth info
128            let auth = match &entry.auth {
129                Some(a) => a,
130                None => continue,
131            };
132
133            // Skip if no entity_id or display_name
134            let entity_id = match &auth.entity_id {
135                Some(id) if !id.is_empty() => id.clone(),
136                _ => continue,
137            };
138
139            let display_name = match &auth.display_name {
140                Some(name) if !name.is_empty() => name.clone(),
141                _ => continue,
142            };
143
144            login_events += 1;
145
146            // Extract mount path from the auth path (e.g., "auth/github/login" -> "auth/github")
147            let mount_path = path
148                .trim_end_matches("/login")
149                .trim_end_matches(&format!("/{}", display_name))
150                .to_string();
151
152            let mount_accessor = auth.accessor.clone().unwrap_or_default();
153            let username = auth
154                .metadata
155                .as_ref()
156                .and_then(|m| m.get("username"))
157                .and_then(|v| v.as_str())
158                .map(|s| s.to_string());
159
160            // Update or insert entity mapping
161            entity_map
162                .entry(entity_id)
163                .and_modify(|mapping| {
164                    mapping.login_count += 1;
165                    mapping.last_seen = entry.time.clone();
166                    // Update display_name if it's newer (handle case variations)
167                    if entry.time > mapping.last_seen {
168                        mapping.display_name = display_name.clone();
169                    }
170                })
171                .or_insert_with(|| EntityMapping {
172                    display_name,
173                    mount_path,
174                    mount_accessor,
175                    username,
176                    login_count: 1,
177                    first_seen: entry.time.clone(),
178                    last_seen: entry.time.clone(),
179                });
180        }
181
182        // Ensure we show 100% complete for this file
183        if let Some(size) = file_size {
184            progress.update(size);
185        } else {
186            progress.update(file_lines);
187        }
188
189        progress.finish_with_message(&format!("Processed {} lines from this file", file_lines));
190    }
191
192    eprintln!(
193        "\nTotal: Processed {} lines, found {} login events, tracked {} entities",
194        lines_processed,
195        login_events,
196        entity_map.len()
197    );
198
199    // Write output based on format
200    eprintln!("\nWriting entity mappings to: {}", output);
201
202    match format.to_lowercase().as_str() {
203        "json" => {
204            let output_file = File::create(output)
205                .with_context(|| format!("Failed to create output file: {}", output))?;
206            let mut writer = std::io::BufWriter::new(output_file);
207
208            // Write as pretty JSON for readability
209            let json = serde_json::to_string_pretty(&entity_map)
210                .context("Failed to serialize entity mappings")?;
211            writer.write_all(json.as_bytes())?;
212            writer.flush()?;
213
214            eprintln!("✓ JSON entity mapping file created successfully!\n");
215        }
216        "csv" => {
217            let output_file = File::create(output)
218                .with_context(|| format!("Failed to create output file: {}", output))?;
219            let mut csv_writer = csv::Writer::from_writer(output_file);
220
221            // Write CSV header
222            csv_writer.write_record([
223                "entity_id",
224                "display_name",
225                "mount_path",
226                "mount_accessor",
227                "username",
228                "login_count",
229                "first_seen",
230                "last_seen",
231            ])?;
232
233            // Write entity data
234            for (entity_id, mapping) in &entity_map {
235                csv_writer.write_record([
236                    entity_id,
237                    &mapping.display_name,
238                    &mapping.mount_path,
239                    &mapping.mount_accessor,
240                    mapping.username.as_deref().unwrap_or(""),
241                    &mapping.login_count.to_string(),
242                    &mapping.first_seen,
243                    &mapping.last_seen,
244                ])?;
245            }
246
247            csv_writer.flush()?;
248            eprintln!("✓ CSV entity mapping file created successfully!\n");
249        }
250        _ => {
251            anyhow::bail!("Invalid format '{}'. Use 'csv' or 'json'", format);
252        }
253    }
254
255    eprintln!("Usage with client-activity command:");
256    eprintln!(
257        "  vault-audit client-activity --start <START> --end <END> --entity-map {}",
258        output
259    );
260
261    Ok(())
262}