vault_audit_tools/commands/
preprocess_entities.rs

1//! Entity mapping preprocessor.
2//!
3//! ⚠️ **DEPRECATED**: Use `entity-analysis preprocess` instead.
4//!
5//! ```bash
6//! # Old (deprecated):
7//! vault-audit preprocess-entities logs/*.log --output mappings.json
8//!
9//! # New (recommended):
10//! vault-audit entity-analysis preprocess logs/*.log --output mappings.json
11//! ```
12//!
13//! **Note**: Most commands now auto-preprocess entity mappings, so this is rarely needed!
14//!
15//! See [`entity_analysis`](crate::commands::entity_analysis) for the unified command.
16//!
17//! ---
18//!
19//! Extracts entity-to-alias mappings from audit logs and exports to JSON or CSV,
20//! creating a baseline for subsequent entity analysis.
21//! Supports multi-file processing for comprehensive entity mapping.
22//!
23//! # Usage
24//!
25//! ```bash
26//! # Single file preprocessing (JSON default)
27//! vault-audit preprocess-entities audit.log --output entity-mappings.json
28//!
29//! # Multi-day comprehensive mapping (CSV)
30//! vault-audit preprocess-entities logs/*.log --output entity-mappings.csv --format csv
31//!
32//! # JSON format for entity-creation command
33//! vault-audit preprocess-entities logs/*.log --output entity-mappings.json --format json
34//! ```
35//!
36//! # Output
37//!
38//! Generates JSON or CSV containing:
39//! - Entity ID
40//! - Display name
41//! - Mount path and accessor
42//! - Username (if available)
43//! - Login count
44//! - First and last seen timestamps
45//!
46//! This output can be used as a baseline for:
47//! - `entity-creation` command (accepts both CSV and JSON)
48//! - `client-activity` command (JSON format)
49//! - External analysis tools
50//! - Historical trending
51
52use crate::audit::types::AuditEntry;
53use crate::utils::progress::ProgressBar;
54use crate::utils::reader::open_file;
55use anyhow::{Context, Result};
56use serde::{Deserialize, Serialize};
57use std::collections::HashMap;
58use std::fs::File;
59use std::io::{BufRead, BufReader, Write};
60
61/// Entity mapping with login statistics
62#[derive(Debug, Serialize, Deserialize)]
63pub struct EntityMapping {
64    pub display_name: String,
65    pub mount_path: String,
66    pub mount_accessor: String,
67    #[serde(skip_serializing_if = "Option::is_none")]
68    pub username: Option<String>,
69    pub login_count: usize,
70    pub first_seen: String,
71    pub last_seen: String,
72}
73
74/// Build entity mappings from audit logs without writing to file.
75/// Returns `HashMap` of `entity_id` -> `EntityMapping` for reuse by other commands.
76pub fn build_entity_map(log_files: &[String]) -> Result<HashMap<String, EntityMapping>> {
77    let mut entity_map: HashMap<String, EntityMapping> = HashMap::new();
78    let mut login_events = 0;
79    let mut lines_processed = 0;
80
81    // Process each log file sequentially
82    for (file_idx, log_file) in log_files.iter().enumerate() {
83        eprintln!(
84            "[{}/{}] Processing: {}",
85            file_idx + 1,
86            log_files.len(),
87            log_file
88        );
89
90        // Get file size for progress tracking
91        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
92
93        let file = open_file(log_file)
94            .with_context(|| format!("Failed to open audit log file: {}", log_file))?;
95        let reader = BufReader::new(file);
96
97        let mut progress = if let Some(size) = file_size {
98            ProgressBar::new(size, "Processing")
99        } else {
100            ProgressBar::new_spinner("Processing")
101        };
102        let mut bytes_read = 0;
103        let mut file_lines = 0;
104
105        for line in reader.lines() {
106            file_lines += 1;
107            lines_processed += 1;
108            let line = line?;
109            bytes_read += line.len() + 1; // +1 for newline
110
111            // Update progress every 10k lines for smooth animation
112            if file_lines % 10_000 == 0 {
113                if let Some(size) = file_size {
114                    progress.update(bytes_read.min(size)); // Cap at file size
115                } else {
116                    progress.update(file_lines);
117                }
118            }
119            let entry: AuditEntry = match serde_json::from_str(&line) {
120                Ok(e) => e,
121                Err(_) => continue,
122            };
123
124            // Look for login events in auth paths
125            let Some(request) = &entry.request else {
126                continue;
127            };
128
129            let Some(path) = &request.path else {
130                continue;
131            };
132
133            if !path.starts_with("auth/") {
134                continue;
135            }
136
137            if !path.contains("/login") {
138                continue;
139            }
140
141            // Skip if no auth info
142            let Some(auth) = &entry.auth else {
143                continue;
144            };
145
146            // Skip if no entity_id or display_name
147            let entity_id = match &auth.entity_id {
148                Some(id) if !id.is_empty() => id.clone(),
149                _ => continue,
150            };
151
152            let display_name = match &auth.display_name {
153                Some(name) if !name.is_empty() => name.clone(),
154                _ => continue,
155            };
156
157            login_events += 1;
158
159            // Extract mount path from the auth path (e.g., "auth/github/login" -> "auth/github")
160            let mount_path = path
161                .trim_end_matches("/login")
162                .trim_end_matches(&format!("/{}", display_name))
163                .to_string();
164
165            let mount_accessor = auth.accessor.clone().unwrap_or_default();
166            let username = auth
167                .metadata
168                .as_ref()
169                .and_then(|m| m.get("username"))
170                .and_then(|v| v.as_str())
171                .map(std::string::ToString::to_string);
172
173            // Update or insert entity mapping
174            entity_map
175                .entry(entity_id)
176                .and_modify(|mapping| {
177                    mapping.login_count += 1;
178                    mapping.last_seen.clone_from(&entry.time);
179                    // Update display_name if it's newer (handle case variations)
180                    if entry.time > mapping.last_seen {
181                        mapping.display_name.clone_from(&display_name);
182                    }
183                })
184                .or_insert_with(|| EntityMapping {
185                    display_name,
186                    mount_path,
187                    mount_accessor,
188                    username,
189                    login_count: 1,
190                    first_seen: entry.time.clone(),
191                    last_seen: entry.time.clone(),
192                });
193        }
194
195        // Ensure we show 100% complete for this file
196        if let Some(size) = file_size {
197            progress.update(size);
198        } else {
199            progress.update(file_lines);
200        }
201
202        progress.finish_with_message(&format!("Processed {} lines from this file", file_lines));
203    }
204
205    eprintln!(
206        "\nTotal: Processed {} lines, found {} login events, tracked {} entities\n",
207        lines_processed,
208        login_events,
209        entity_map.len()
210    );
211
212    Ok(entity_map)
213}
214
215pub fn run(log_files: &[String], output: &str, format: &str) -> Result<()> {
216    eprintln!("Preprocessing audit logs...");
217    eprintln!("Extracting entity → display_name mappings from login events...\n");
218
219    let entity_map = build_entity_map(log_files)?;
220
221    // Write output based on format
222    eprintln!("\nWriting entity mappings to: {}", output);
223
224    match format.to_lowercase().as_str() {
225        "json" => {
226            let output_file = File::create(output)
227                .with_context(|| format!("Failed to create output file: {}", output))?;
228            let mut writer = std::io::BufWriter::new(output_file);
229
230            // Write as pretty JSON for readability
231            let json = serde_json::to_string_pretty(&entity_map)
232                .context("Failed to serialize entity mappings")?;
233            writer.write_all(json.as_bytes())?;
234            writer.flush()?;
235
236            eprintln!("JSON entity mapping file created successfully!\n");
237        }
238        "csv" => {
239            let output_file = File::create(output)
240                .with_context(|| format!("Failed to create output file: {}", output))?;
241            let mut csv_writer = csv::Writer::from_writer(output_file);
242
243            // Write CSV header
244            csv_writer.write_record([
245                "entity_id",
246                "display_name",
247                "mount_path",
248                "mount_accessor",
249                "username",
250                "login_count",
251                "first_seen",
252                "last_seen",
253            ])?;
254
255            // Write entity data
256            for (entity_id, mapping) in &entity_map {
257                csv_writer.write_record([
258                    entity_id,
259                    &mapping.display_name,
260                    &mapping.mount_path,
261                    &mapping.mount_accessor,
262                    mapping.username.as_deref().unwrap_or(""),
263                    &mapping.login_count.to_string(),
264                    &mapping.first_seen,
265                    &mapping.last_seen,
266                ])?;
267            }
268
269            csv_writer.flush()?;
270            eprintln!("✓ CSV entity mapping file created successfully!\n");
271        }
272        _ => {
273            anyhow::bail!("Invalid format '{}'. Use 'csv' or 'json'", format);
274        }
275    }
276
277    eprintln!("Usage with client-activity command:");
278    eprintln!(
279        "  vault-audit client-activity --start <START> --end <END> --entity-map {}",
280        output
281    );
282
283    Ok(())
284}