vault_audit_tools/commands/
preprocess_entities.rs

1//! Entity mapping preprocessor.
2//!
3//! Extracts entity-to-alias mappings from audit logs and exports to JSON or CSV,
4//! creating a baseline for subsequent entity analysis.
5//! Supports multi-file processing for comprehensive entity mapping.
6//!
7//! # Usage
8//!
9//! ```bash
10//! # Single file preprocessing (JSON default)
11//! vault-audit preprocess-entities audit.log --output entity-mappings.json
12//!
13//! # Multi-day comprehensive mapping (CSV)
14//! vault-audit preprocess-entities logs/*.log --output entity-mappings.csv --format csv
15//!
16//! # JSON format for entity-creation command
17//! vault-audit preprocess-entities logs/*.log --output entity-mappings.json --format json
18//! ```
19//!
20//! # Output
21//!
22//! Generates JSON or CSV containing:
23//! - Entity ID
24//! - Display name
25//! - Mount path and accessor
26//! - Username (if available)
27//! - Login count
28//! - First and last seen timestamps
29//!
30//! This output can be used as a baseline for:
31//! - `entity-creation` command (accepts both CSV and JSON)
32//! - `client-activity` command (JSON format)
33//! - External analysis tools
34//! - Historical trending
35
36use crate::audit::types::AuditEntry;
37use crate::utils::progress::ProgressBar;
38use crate::utils::reader::open_file;
39use anyhow::{Context, Result};
40use serde::{Deserialize, Serialize};
41use std::collections::HashMap;
42use std::fs::File;
43use std::io::{BufRead, BufReader, Write};
44
45/// Entity mapping with login statistics
46#[derive(Debug, Serialize, Deserialize)]
47struct EntityMapping {
48    display_name: String,
49    mount_path: String,
50    mount_accessor: String,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    username: Option<String>,
53    login_count: usize,
54    first_seen: String,
55    last_seen: String,
56}
57
58pub fn run(log_files: &[String], output: &str, format: &str) -> Result<()> {
59    eprintln!("Preprocessing audit logs...");
60    eprintln!("Extracting entity → display_name mappings from login events...\n");
61
62    let mut entity_map: HashMap<String, EntityMapping> = HashMap::new();
63    let mut login_events = 0;
64    let mut lines_processed = 0;
65
66    // Process each log file sequentially
67    for (file_idx, log_file) in log_files.iter().enumerate() {
68        eprintln!(
69            "[{}/{}] Processing: {}",
70            file_idx + 1,
71            log_files.len(),
72            log_file
73        );
74
75        // Get file size for progress tracking
76        let file_size = std::fs::metadata(log_file).ok().map(|m| m.len() as usize);
77
78        let file = open_file(log_file)
79            .with_context(|| format!("Failed to open audit log file: {}", log_file))?;
80        let reader = BufReader::new(file);
81
82        let mut progress = if let Some(size) = file_size {
83            ProgressBar::new(size, "Processing")
84        } else {
85            ProgressBar::new_spinner("Processing")
86        };
87        let mut bytes_read = 0;
88        let mut file_lines = 0;
89
90        for line in reader.lines() {
91            file_lines += 1;
92            lines_processed += 1;
93            let line = line?;
94            bytes_read += line.len() + 1; // +1 for newline
95
96            // Update progress every 10k lines for smooth animation
97            if file_lines % 10_000 == 0 {
98                if let Some(size) = file_size {
99                    progress.update(bytes_read.min(size)); // Cap at file size
100                } else {
101                    progress.update(file_lines);
102                }
103            }
104            let entry: AuditEntry = match serde_json::from_str(&line) {
105                Ok(e) => e,
106                Err(_) => continue,
107            };
108
109            // Look for login events in auth paths
110            let request = match &entry.request {
111                Some(r) => r,
112                None => continue,
113            };
114
115            let path = match &request.path {
116                Some(p) => p,
117                None => continue,
118            };
119
120            if !path.starts_with("auth/") {
121                continue;
122            }
123
124            if !path.contains("/login") {
125                continue;
126            }
127
128            // Skip if no auth info
129            let auth = match &entry.auth {
130                Some(a) => a,
131                None => continue,
132            };
133
134            // Skip if no entity_id or display_name
135            let entity_id = match &auth.entity_id {
136                Some(id) if !id.is_empty() => id.clone(),
137                _ => continue,
138            };
139
140            let display_name = match &auth.display_name {
141                Some(name) if !name.is_empty() => name.clone(),
142                _ => continue,
143            };
144
145            login_events += 1;
146
147            // Extract mount path from the auth path (e.g., "auth/github/login" -> "auth/github")
148            let mount_path = path
149                .trim_end_matches("/login")
150                .trim_end_matches(&format!("/{}", display_name))
151                .to_string();
152
153            let mount_accessor = auth.accessor.clone().unwrap_or_default();
154            let username = auth
155                .metadata
156                .as_ref()
157                .and_then(|m| m.get("username"))
158                .and_then(|v| v.as_str())
159                .map(|s| s.to_string());
160
161            // Update or insert entity mapping
162            entity_map
163                .entry(entity_id)
164                .and_modify(|mapping| {
165                    mapping.login_count += 1;
166                    mapping.last_seen = entry.time.clone();
167                    // Update display_name if it's newer (handle case variations)
168                    if entry.time > mapping.last_seen {
169                        mapping.display_name = display_name.clone();
170                    }
171                })
172                .or_insert_with(|| EntityMapping {
173                    display_name,
174                    mount_path,
175                    mount_accessor,
176                    username,
177                    login_count: 1,
178                    first_seen: entry.time.clone(),
179                    last_seen: entry.time.clone(),
180                });
181        }
182
183        // Ensure we show 100% complete for this file
184        if let Some(size) = file_size {
185            progress.update(size);
186        } else {
187            progress.update(file_lines);
188        }
189
190        progress.finish_with_message(&format!("Processed {} lines from this file", file_lines));
191    }
192
193    eprintln!(
194        "\nTotal: Processed {} lines, found {} login events, tracked {} entities",
195        lines_processed,
196        login_events,
197        entity_map.len()
198    );
199
200    // Write output based on format
201    eprintln!("\nWriting entity mappings to: {}", output);
202
203    match format.to_lowercase().as_str() {
204        "json" => {
205            let output_file = File::create(output)
206                .with_context(|| format!("Failed to create output file: {}", output))?;
207            let mut writer = std::io::BufWriter::new(output_file);
208
209            // Write as pretty JSON for readability
210            let json = serde_json::to_string_pretty(&entity_map)
211                .context("Failed to serialize entity mappings")?;
212            writer.write_all(json.as_bytes())?;
213            writer.flush()?;
214
215            eprintln!("✓ JSON entity mapping file created successfully!\n");
216        }
217        "csv" => {
218            let output_file = File::create(output)
219                .with_context(|| format!("Failed to create output file: {}", output))?;
220            let mut csv_writer = csv::Writer::from_writer(output_file);
221
222            // Write CSV header
223            csv_writer.write_record([
224                "entity_id",
225                "display_name",
226                "mount_path",
227                "mount_accessor",
228                "username",
229                "login_count",
230                "first_seen",
231                "last_seen",
232            ])?;
233
234            // Write entity data
235            for (entity_id, mapping) in &entity_map {
236                csv_writer.write_record([
237                    entity_id,
238                    &mapping.display_name,
239                    &mapping.mount_path,
240                    &mapping.mount_accessor,
241                    mapping.username.as_deref().unwrap_or(""),
242                    &mapping.login_count.to_string(),
243                    &mapping.first_seen,
244                    &mapping.last_seen,
245                ])?;
246            }
247
248            csv_writer.flush()?;
249            eprintln!("✓ CSV entity mapping file created successfully!\n");
250        }
251        _ => {
252            anyhow::bail!("Invalid format '{}'. Use 'csv' or 'json'", format);
253        }
254    }
255
256    eprintln!("Usage with client-activity command:");
257    eprintln!(
258        "  vault-audit client-activity --start <START> --end <END> --entity-map {}",
259        output
260    );
261
262    Ok(())
263}