vault_audit_tools/commands/
entity_creation.rs

1//! Entity creation analysis command.
2//!
3//! ⚠️ **DEPRECATED**: Use `entity-analysis creation` instead.
4//!
5//! ```bash
6//! # Old (deprecated):
7//! vault-audit entity-creation logs/*.log
8//!
9//! # New (recommended):
10//! vault-audit entity-analysis creation logs/*.log
11//! ```
12//!
13//! See [`entity_analysis`](crate::commands::entity_analysis) for the unified command.
14//!
15//! ---
16//!
17//! Identifies when entities first appear in audit logs, grouped by
18//! authentication method and mount path.
19//! Supports multi-file analysis for tracking entity creation over time.
20//!
21//! # Usage
22//!
23//! ```bash
24//! # Single file analysis
25//! vault-audit entity-creation audit.log
26//!
27//! # Multi-day analysis
28//! vault-audit entity-creation logs/*.log --output entity-creation.json
29//!
30//! # With entity mappings from entity-list (CSV or JSON)
31//! vault-audit entity-creation logs/*.log --entity-map entities.csv
32//! vault-audit entity-creation logs/*.log --entity-map entities.json
33//! ```
34//!
35//! # Output
36//!
37//! Displays entity creation events grouped by authentication path:
38//! - Entity ID
39//! - Display name
40//! - Mount path (authentication method)
41//! - First seen timestamp
42//! - Creation count by auth method
43//!
44//! Use `--json` to output structured data for further processing.
45
46use crate::audit::types::AuditEntry;
47use crate::utils::format::format_number;
48use crate::utils::progress::ProgressBar;
49use crate::utils::reader::open_file;
50use anyhow::{Context, Result};
51use chrono::{DateTime, Utc};
52use serde::{Deserialize, Serialize};
53use std::collections::{HashMap, HashSet};
54use std::fs::File;
55use std::io::{BufRead, BufReader};
56
57/// Entity mapping data structure for JSON output
58#[derive(Debug, Serialize, Deserialize)]
59pub struct EntityMapping {
60    pub display_name: String,
61    pub mount_path: String,
62    #[allow(dead_code)]
63    pub mount_accessor: String,
64    #[allow(dead_code)]
65    pub username: Option<String>,
66    #[allow(dead_code)]
67    pub login_count: usize,
68    #[allow(dead_code)]
69    pub first_seen: String,
70    #[allow(dead_code)]
71    pub last_seen: String,
72}
73
74/// Represents a single entity creation event
75#[derive(Debug)]
76struct EntityCreation {
77    entity_id: String,
78    display_name: String,
79    mount_path: String,
80    mount_type: String,
81    first_seen: DateTime<Utc>,
82    login_count: usize,
83}
84
85#[derive(Debug)]
86struct MountStats {
87    mount_path: String,
88    mount_type: String,
89    entities_created: usize,
90    total_logins: usize,
91    sample_entities: Vec<String>, // Store up to 5 sample display names
92}
93
94/// Load entity mappings from either JSON or CSV format
95pub fn load_entity_mappings(path: &str) -> Result<HashMap<String, EntityMapping>> {
96    let file =
97        File::open(path).with_context(|| format!("Failed to open entity map file: {}", path))?;
98
99    // Auto-detect format based on file extension or content
100    let path_lower = path.to_lowercase();
101    if std::path::Path::new(&path_lower)
102        .extension()
103        .is_some_and(|ext| ext.eq_ignore_ascii_case("json"))
104    {
105        // JSON format (from preprocess-entities)
106        serde_json::from_reader(file)
107            .with_context(|| format!("Failed to parse entity map JSON: {}", path))
108    } else if std::path::Path::new(&path_lower)
109        .extension()
110        .is_some_and(|ext| ext.eq_ignore_ascii_case("csv"))
111    {
112        // CSV format (from entity-list)
113        let mut reader = csv::Reader::from_reader(file);
114        let mut mappings = HashMap::new();
115
116        for result in reader.records() {
117            let record = result?;
118            if record.len() < 8 {
119                continue; // Skip malformed rows
120            }
121
122            let entity_id = record.get(0).unwrap_or("").to_string();
123            let display_name = record.get(1).unwrap_or("").to_string();
124            let mount_path = record.get(7).unwrap_or("").to_string(); // mount_path column
125            let mount_accessor = record.get(9).unwrap_or("").to_string(); // mount_accessor column
126
127            if !entity_id.is_empty() {
128                mappings.insert(
129                    entity_id,
130                    EntityMapping {
131                        display_name,
132                        mount_path,
133                        mount_accessor,
134                        username: None,
135                        login_count: 0,
136                        first_seen: String::new(),
137                        last_seen: String::new(),
138                    },
139                );
140            }
141        }
142
143        Ok(mappings)
144    } else {
145        // Try JSON first, fall back to CSV
146        let file = File::open(path)?;
147        if let Ok(mappings) = serde_json::from_reader::<_, HashMap<String, EntityMapping>>(file) {
148            Ok(mappings)
149        } else {
150            // Try CSV
151            let file = File::open(path)?;
152            let mut reader = csv::Reader::from_reader(file);
153            let mut mappings = HashMap::new();
154
155            for result in reader.records() {
156                let record = result?;
157                if record.len() < 8 {
158                    continue;
159                }
160
161                let entity_id = record.get(0).unwrap_or("").to_string();
162                let display_name = record.get(1).unwrap_or("").to_string();
163                let mount_path = record.get(7).unwrap_or("").to_string();
164                let mount_accessor = record.get(9).unwrap_or("").to_string();
165
166                if !entity_id.is_empty() {
167                    mappings.insert(
168                        entity_id,
169                        EntityMapping {
170                            display_name,
171                            mount_path,
172                            mount_accessor,
173                            username: None,
174                            login_count: 0,
175                            first_seen: String::new(),
176                            last_seen: String::new(),
177                        },
178                    );
179                }
180            }
181
182            Ok(mappings)
183        }
184    }
185}
186
187pub fn run(
188    log_files: &[String],
189    entity_map_file: Option<&str>,
190    output: Option<&str>,
191) -> Result<()> {
192    eprintln!("Analyzing entity creation by authentication path...\n");
193
194    // Load entity mappings if provided (supports both JSON and CSV)
195    let entity_mappings: HashMap<String, EntityMapping> = if let Some(map_file) = entity_map_file {
196        eprintln!("Loading entity mappings from: {}", map_file);
197        load_entity_mappings(map_file)?
198    } else {
199        HashMap::new()
200    };
201
202    if !entity_mappings.is_empty() {
203        eprintln!(
204            "Loaded {} entity mappings for display name enrichment\n",
205            format_number(entity_mappings.len())
206        );
207    }
208
209    let mut entity_creations: HashMap<String, EntityCreation> = HashMap::new();
210    let mut seen_entities: HashSet<String> = HashSet::new();
211    let mut lines_processed = 0;
212    let mut login_events = 0;
213    let mut new_entities_found = 0;
214
215    // Process each log file sequentially
216    for (file_idx, log_file) in log_files.iter().enumerate() {
217        eprintln!(
218            "[{}/{}] Processing: {}",
219            file_idx + 1,
220            log_files.len(),
221            log_file
222        );
223
224        // Count lines in file first for accurate progress tracking
225        eprintln!("Scanning file to determine total lines...");
226        let total_file_lines = crate::utils::parallel::count_file_lines(log_file)?;
227
228        let file = open_file(log_file)
229            .with_context(|| format!("Failed to open audit log file: {}", log_file))?;
230        let reader = BufReader::new(file);
231
232        let progress = ProgressBar::new(total_file_lines, "Processing");
233        let mut file_lines = 0;
234
235        for line in reader.lines() {
236            file_lines += 1;
237            lines_processed += 1;
238            let line = line?;
239
240            if file_lines % 10_000 == 0 {
241                progress.update(file_lines);
242            }
243
244            let entry: AuditEntry = match serde_json::from_str(&line) {
245                Ok(e) => e,
246                Err(_) => continue,
247            };
248
249            // Look for login events in auth paths
250            let Some(request) = &entry.request else {
251                continue;
252            };
253
254            let path = match &request.path {
255                Some(p) => p.as_str(),
256                None => continue,
257            };
258
259            if !path.starts_with("auth/") || !path.contains("/login") {
260                continue;
261            }
262
263            let Some(auth) = &entry.auth else { continue };
264
265            let entity_id = match &auth.entity_id {
266                Some(id) if !id.is_empty() => id.clone(),
267                _ => continue,
268            };
269
270            login_events += 1;
271
272            // Check if this is the first time we've seen this entity
273            let is_new_entity = seen_entities.insert(entity_id.clone());
274
275            if is_new_entity {
276                new_entities_found += 1;
277
278                let display_name = auth
279                    .display_name
280                    .clone()
281                    .or_else(|| {
282                        entity_mappings
283                            .get(&entity_id)
284                            .map(|m| m.display_name.clone())
285                    })
286                    .unwrap_or_else(|| "unknown".to_string());
287
288                let mount_path = path
289                    .trim_end_matches("/login")
290                    .trim_end_matches(&format!("/{}", display_name))
291                    .to_string();
292
293                let mount_type = request
294                    .mount_type
295                    .clone()
296                    .unwrap_or_else(|| "unknown".to_string());
297
298                let first_seen = match chrono::DateTime::parse_from_rfc3339(&entry.time) {
299                    Ok(dt) => dt.with_timezone(&Utc),
300                    Err(_) => Utc::now(),
301                };
302
303                entity_creations.insert(
304                    entity_id.clone(),
305                    EntityCreation {
306                        entity_id,
307                        display_name,
308                        mount_path,
309                        mount_type,
310                        first_seen,
311                        login_count: 1,
312                    },
313                );
314            } else {
315                // Increment login count for existing entity
316                if let Some(creation) = entity_creations.get_mut(&entity_id) {
317                    creation.login_count += 1;
318                }
319            }
320        }
321
322        progress.update(total_file_lines);
323
324        progress.finish_with_message(&format!("Processed {} lines from this file", file_lines));
325    }
326
327    eprintln!(
328        "\nTotal: Processed {} lines, {} login events, {} new entities created",
329        format_number(lines_processed),
330        format_number(login_events),
331        format_number(new_entities_found)
332    );
333
334    // Aggregate by mount path
335    let mut mount_stats: HashMap<String, MountStats> = HashMap::new();
336
337    for creation in entity_creations.values() {
338        let key = creation.mount_path.clone();
339        mount_stats
340            .entry(key.clone())
341            .and_modify(|stats| {
342                stats.entities_created += 1;
343                stats.total_logins += creation.login_count;
344                if stats.sample_entities.len() < 5 {
345                    stats.sample_entities.push(creation.display_name.clone());
346                }
347            })
348            .or_insert_with(|| MountStats {
349                mount_path: creation.mount_path.clone(),
350                mount_type: creation.mount_type.clone(),
351                entities_created: 1,
352                total_logins: creation.login_count,
353                sample_entities: vec![creation.display_name.clone()],
354            });
355    }
356
357    // Sort by entities created
358    let mut sorted_mounts: Vec<_> = mount_stats.values().collect();
359    sorted_mounts.sort_by(|a, b| b.entities_created.cmp(&a.entities_created));
360
361    // Print report
362    eprintln!("\n{}", "=".repeat(100));
363    eprintln!("ENTITY CREATION ANALYSIS BY AUTHENTICATION PATH");
364    eprintln!("{}", "=".repeat(100));
365    eprintln!();
366    eprintln!("Summary:");
367    eprintln!("  Total login events: {}", format_number(login_events));
368    eprintln!(
369        "  Unique entities discovered: {}",
370        format_number(new_entities_found)
371    );
372    eprintln!(
373        "  Authentication methods: {}",
374        format_number(mount_stats.len())
375    );
376    eprintln!();
377    eprintln!("{}", "-".repeat(100));
378    eprintln!(
379        "{:<50} {:<15} {:>15} {:>15}",
380        "Authentication Path", "Mount Type", "Entities", "Total Logins"
381    );
382    eprintln!("{}", "-".repeat(100));
383
384    for stats in &sorted_mounts {
385        eprintln!(
386            "{:<50} {:<15} {:>15} {:>15}",
387            if stats.mount_path.len() > 49 {
388                format!("{}...", &stats.mount_path[..46])
389            } else {
390                stats.mount_path.clone()
391            },
392            if stats.mount_type.len() > 14 {
393                format!("{}...", &stats.mount_type[..11])
394            } else {
395                stats.mount_type.clone()
396            },
397            format_number(stats.entities_created),
398            format_number(stats.total_logins)
399        );
400    }
401
402    eprintln!("{}", "-".repeat(100));
403    eprintln!();
404
405    // Show top 10 with sample entities
406    eprintln!("Top 10 Authentication Paths with Sample Entities:");
407    eprintln!("{}", "=".repeat(100));
408    for (i, stats) in sorted_mounts.iter().take(10).enumerate() {
409        eprintln!();
410        eprintln!("{}. {} ({})", i + 1, stats.mount_path, stats.mount_type);
411        eprintln!(
412            "   Entities created: {} | Total logins: {}",
413            format_number(stats.entities_created),
414            format_number(stats.total_logins)
415        );
416        eprintln!("   Sample entities:");
417        for (j, name) in stats.sample_entities.iter().enumerate() {
418            eprintln!("      {}. {}", j + 1, name);
419        }
420    }
421    eprintln!();
422    eprintln!("{}", "=".repeat(100));
423
424    // Write detailed output if requested
425    if let Some(output_file) = output {
426        eprintln!(
427            "\nWriting detailed entity creation data to: {}",
428            output_file
429        );
430
431        let mut entities: Vec<_> = entity_creations.values().collect();
432        entities.sort_by(|a, b| a.first_seen.cmp(&b.first_seen));
433
434        #[derive(Serialize)]
435        struct EntityCreationOutput {
436            entity_id: String,
437            display_name: String,
438            mount_path: String,
439            mount_type: String,
440            first_seen: String,
441            login_count: usize,
442        }
443
444        let output_data: Vec<EntityCreationOutput> = entities
445            .into_iter()
446            .map(|e| EntityCreationOutput {
447                entity_id: e.entity_id.clone(),
448                display_name: e.display_name.clone(),
449                mount_path: e.mount_path.clone(),
450                mount_type: e.mount_type.clone(),
451                first_seen: e.first_seen.to_rfc3339(),
452                login_count: e.login_count,
453            })
454            .collect();
455
456        let output_file_handle = File::create(output_file)
457            .with_context(|| format!("Failed to create output file: {}", output_file))?;
458        serde_json::to_writer_pretty(output_file_handle, &output_data)
459            .with_context(|| format!("Failed to write JSON output: {}", output_file))?;
460
461        eprintln!(
462            "✓ Wrote {} entity records to {}",
463            format_number(output_data.len()),
464            output_file
465        );
466    }
467
468    Ok(())
469}