1use crate::audit::types::AuditEntry;
84use crate::utils::progress::ProgressBar;
85use anyhow::{Context, Result};
86use chrono::{DateTime, Utc};
87use serde::{Deserialize, Serialize};
88use std::collections::{HashMap, HashSet};
89use std::fs::File;
90use std::io::{BufRead, BufReader};
91use std::path::Path;
92
93#[derive(Debug, Serialize, Deserialize)]
95struct EntityMapping {
96 display_name: String,
97 mount_path: String,
98 #[allow(dead_code)]
99 mount_accessor: String,
100 #[allow(dead_code)]
101 login_count: usize,
102 #[allow(dead_code)]
103 first_seen: String,
104 #[allow(dead_code)]
105 last_seen: String,
106}
107
108#[derive(Debug, Serialize, Clone)]
110struct EntityChurnRecord {
111 entity_id: String,
112 display_name: String,
113 mount_path: String,
114 mount_type: String,
115 token_type: String,
116 first_seen_file: String,
117 first_seen_time: DateTime<Utc>,
118 last_seen_file: String,
119 last_seen_time: DateTime<Utc>,
120 files_appeared: Vec<String>,
121 total_logins: usize,
122 lifecycle: String, activity_pattern: String, is_ephemeral_pattern: bool,
125 ephemeral_confidence: f32, ephemeral_reasons: Vec<String>,
127 #[serde(skip_serializing_if = "Option::is_none")]
129 baseline_entity_name: Option<String>,
130 #[serde(skip_serializing_if = "Option::is_none")]
131 baseline_created: Option<String>,
132 #[serde(skip_serializing_if = "Option::is_none")]
133 baseline_alias_name: Option<String>,
134 #[serde(skip_serializing_if = "Option::is_none")]
135 baseline_mount_path: Option<String>,
136 #[serde(skip_serializing_if = "Option::is_none")]
138 historical_display_name: Option<String>,
139 #[serde(skip_serializing_if = "Option::is_none")]
140 historical_first_seen: Option<String>,
141 #[serde(skip_serializing_if = "Option::is_none")]
142 historical_last_seen: Option<String>,
143 #[serde(skip_serializing_if = "Option::is_none")]
144 historical_login_count: Option<usize>,
145}
146
147#[derive(Debug, Serialize)]
149struct EntityChurnRecordCsv {
150 entity_id: String,
151 display_name: String,
152 mount_path: String,
153 mount_type: String,
154 token_type: String,
155 first_seen_file: String,
156 first_seen_time: String,
157 last_seen_file: String,
158 last_seen_time: String,
159 files_appeared: String, days_active: usize,
161 total_logins: usize,
162 lifecycle: String,
163 activity_pattern: String,
164 is_ephemeral_pattern: bool,
165 ephemeral_confidence: f32,
166 ephemeral_reasons: String, baseline_entity_name: String,
168 baseline_created: String,
169 baseline_alias_name: String,
170 baseline_mount_path: String,
171 historical_display_name: String,
172 historical_first_seen: String,
173 historical_last_seen: String,
174 historical_login_count: String,
175}
176
177impl From<EntityChurnRecord> for EntityChurnRecordCsv {
178 fn from(record: EntityChurnRecord) -> Self {
179 EntityChurnRecordCsv {
180 entity_id: record.entity_id,
181 display_name: record.display_name,
182 mount_path: record.mount_path,
183 mount_type: record.mount_type,
184 token_type: record.token_type,
185 first_seen_file: record.first_seen_file,
186 first_seen_time: record.first_seen_time.to_rfc3339(),
187 last_seen_file: record.last_seen_file,
188 last_seen_time: record.last_seen_time.to_rfc3339(),
189 files_appeared: record.files_appeared.join(", "),
190 days_active: record.files_appeared.len(),
191 total_logins: record.total_logins,
192 lifecycle: record.lifecycle,
193 activity_pattern: record.activity_pattern,
194 is_ephemeral_pattern: record.is_ephemeral_pattern,
195 ephemeral_confidence: record.ephemeral_confidence,
196 ephemeral_reasons: record.ephemeral_reasons.join("; "),
197 baseline_entity_name: record.baseline_entity_name.unwrap_or_default(),
198 baseline_created: record.baseline_created.unwrap_or_default(),
199 baseline_alias_name: record.baseline_alias_name.unwrap_or_default(),
200 baseline_mount_path: record.baseline_mount_path.unwrap_or_default(),
201 historical_display_name: record.historical_display_name.unwrap_or_default(),
202 historical_first_seen: record.historical_first_seen.unwrap_or_default(),
203 historical_last_seen: record.historical_last_seen.unwrap_or_default(),
204 historical_login_count: record
205 .historical_login_count
206 .map(|n| n.to_string())
207 .unwrap_or_default(),
208 }
209 }
210}
211
212#[derive(Debug)]
213struct DailyStats {
214 #[allow(dead_code)]
215 file_name: String,
216 new_entities: usize,
217 returning_entities: usize,
218 total_logins: usize,
219}
220
221#[derive(Debug)]
223struct EphemeralPatternAnalyzer {
224 total_files: usize,
225 short_lived_patterns: Vec<ShortLivedPattern>,
226}
227
228#[derive(Debug)]
229struct ShortLivedPattern {
230 days_active: usize,
231 display_name: String,
232 mount_path: String,
233}
234
235impl EphemeralPatternAnalyzer {
236 fn new(total_files: usize) -> Self {
237 Self {
238 total_files,
239 short_lived_patterns: Vec::new(),
240 }
241 }
242
243 fn learn_from_entities(&mut self, entities: &HashMap<String, EntityChurnRecord>) {
245 for entity in entities.values() {
246 let days_active = entity.files_appeared.len();
247
248 if days_active <= 2 {
250 self.short_lived_patterns.push(ShortLivedPattern {
251 days_active,
252 display_name: entity.display_name.clone(),
253 mount_path: entity.mount_path.clone(),
254 });
255 }
256 }
257 }
258
259 fn analyze_entity(&self, entity: &EntityChurnRecord) -> (bool, f32, Vec<String>) {
261 let days_active = entity.files_appeared.len();
262 let mut confidence = 0.0;
263 let mut reasons = Vec::new();
264
265 if days_active == 1 {
267 confidence += 0.5;
268 reasons.push(format!("Appeared only 1 day ({})", entity.first_seen_file));
269 } else if days_active == 2 {
270 confidence += 0.3;
271 reasons.push(format!(
272 "Appeared only 2 days: {}, {}",
273 entity.files_appeared.first().unwrap_or(&String::new()),
274 entity.files_appeared.last().unwrap_or(&String::new())
275 ));
276 }
277
278 if days_active <= 2 {
280 let similar_count = self
282 .short_lived_patterns
283 .iter()
284 .filter(|p| {
285 if p.mount_path == entity.mount_path && p.days_active <= 2 {
287 return true;
288 }
289 if entity.display_name.contains(':') && p.display_name.contains(':') {
291 let entity_prefix = entity.display_name.split(':').next().unwrap_or("");
292 let pattern_prefix = p.display_name.split(':').next().unwrap_or("");
293 if entity_prefix == pattern_prefix && !entity_prefix.is_empty() {
294 return true;
295 }
296 }
297 false
298 })
299 .count();
300
301 if similar_count > 5 {
302 confidence += 0.2;
303 reasons.push(format!(
304 "Matches pattern seen in {} other short-lived entities",
305 similar_count
306 ));
307 } else if similar_count > 0 {
308 confidence += 0.1;
309 reasons.push(format!(
310 "Similar to {} other short-lived entities",
311 similar_count
312 ));
313 }
314 }
315
316 if entity.total_logins <= 5 && days_active <= 2 {
318 confidence += 0.1;
319 reasons.push(format!(
320 "Low activity: only {} login(s)",
321 entity.total_logins
322 ));
323 }
324
325 if days_active >= 2 {
327 let first_day_idx = entity.files_appeared.first().and_then(|f| {
328 f.split('_')
329 .next_back()
330 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
331 });
332 let last_day_idx = entity.files_appeared.last().and_then(|f| {
333 f.split('_')
334 .next_back()
335 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
336 });
337
338 if let (Some(first), Some(last)) = (first_day_idx, last_day_idx) {
339 let span = last - first + 1;
340 if span > days_active {
341 confidence *= 0.7;
343 reasons.push(
344 "Has gaps in activity (possibly sporadic access, not churned)".to_string(),
345 );
346 }
347 }
348 }
349
350 confidence = f32::min(confidence, 1.0);
352 let is_ephemeral = confidence >= 0.4; if is_ephemeral && days_active < self.total_files {
356 reasons.push(format!(
357 "Not seen in most recent {} file(s)",
358 self.total_files - days_active
359 ));
360 }
361
362 (is_ephemeral, confidence, reasons)
363 }
364
365 fn classify_activity_pattern(&self, entity: &EntityChurnRecord) -> String {
367 let days_active = entity.files_appeared.len();
368
369 if days_active == 1 {
370 return "single_burst".to_string();
371 }
372
373 if days_active == self.total_files {
374 return "consistent".to_string();
375 }
376
377 if days_active >= (self.total_files * 2) / 3 {
378 return "consistent".to_string();
379 }
380
381 if let (Some(_first_file), Some(last_file)) =
383 (entity.files_appeared.first(), entity.files_appeared.last())
384 {
385 let last_file_num = last_file
387 .split('_')
388 .next_back()
389 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
390 .unwrap_or(self.total_files);
391
392 if last_file_num < self.total_files / 2 {
393 return "declining".to_string();
394 }
395 }
396
397 if days_active <= 2 {
398 return "single_burst".to_string();
399 }
400
401 "sporadic".to_string()
402 }
403}
404
405fn format_number(n: usize) -> String {
406 let s = n.to_string();
407 let mut result = String::new();
408 for (i, c) in s.chars().rev().enumerate() {
409 if i > 0 && i % 3 == 0 {
410 result.push(',');
411 }
412 result.push(c);
413 }
414 result.chars().rev().collect()
415}
416
417fn get_file_size(path: &str) -> Result<u64> {
418 Ok(std::fs::metadata(path)?.len())
419}
420
421fn load_entity_mappings(path: &str) -> Result<HashMap<String, EntityMapping>> {
422 let file = File::open(path).context("Failed to open entity map file")?;
423 let mappings: HashMap<String, EntityMapping> =
424 serde_json::from_reader(file).context("Failed to parse entity map JSON")?;
425 Ok(mappings)
426}
427
428#[derive(Debug, Deserialize, Clone)]
429#[allow(dead_code)]
430struct BaselineEntity {
431 entity_id: String,
432 #[serde(default)]
434 entity_name: String,
435 #[serde(default)]
436 entity_disabled: bool,
437 #[serde(default)]
438 entity_created: String,
439 #[serde(default)]
440 entity_updated: String,
441 #[serde(default)]
442 alias_id: String,
443 #[serde(default)]
444 alias_name: String,
445 #[serde(default)]
446 mount_path: String,
447 #[serde(default)]
448 mount_type: String,
449 #[serde(default)]
450 mount_accessor: String,
451 #[serde(default)]
452 alias_created: String,
453 #[serde(default)]
454 alias_updated: String,
455 #[serde(default)]
456 alias_metadata: String,
457}
458
459impl BaselineEntity {
460 fn get_name(&self) -> String {
462 if !self.entity_name.is_empty() {
463 self.entity_name.clone()
464 } else if !self.alias_name.is_empty() {
465 self.alias_name.clone()
466 } else {
467 String::new()
468 }
469 }
470
471 fn get_created(&self) -> String {
473 self.entity_created.clone()
474 }
475}
476
477fn load_baseline_entities(path: &str) -> Result<HashMap<String, BaselineEntity>> {
478 let file = File::open(path).context("Failed to open baseline entities file")?;
479
480 let path_lower = path.to_lowercase();
482 if path_lower.ends_with(".json") {
483 let entities: Vec<BaselineEntity> =
485 serde_json::from_reader(file).context("Failed to parse baseline entities JSON")?;
486 Ok(entities
487 .into_iter()
488 .map(|e| (e.entity_id.clone(), e))
489 .collect())
490 } else {
491 let mut reader = csv::Reader::from_reader(file);
493 let mut entities = HashMap::new();
494
495 for result in reader.deserialize() {
496 let entity: BaselineEntity = result.context("Failed to parse baseline CSV row")?;
497 entities.entry(entity.entity_id.clone()).or_insert(entity);
499 }
500
501 Ok(entities)
502 }
503}
504
505pub fn run(
506 log_files: &[String],
507 entity_map: Option<&str>,
508 baseline_entities: Option<&str>,
509 output: Option<&str>,
510 format: Option<&str>,
511) -> Result<()> {
512 println!("\n=== Multi-Day Entity Churn Analysis ===\n");
513 println!("Analyzing {} log files:", log_files.len());
514 for (i, file) in log_files.iter().enumerate() {
515 let size = get_file_size(file)?;
516 println!(
517 " Day {}: {} ({:.2} GB)",
518 i + 1,
519 file,
520 size as f64 / 1_000_000_000.0
521 );
522 }
523 println!();
524
525 let baseline = if let Some(path) = baseline_entities {
527 println!(
528 "Loading baseline entity list (Vault API metadata) from {}...",
529 path
530 );
531 let baseline_set = load_baseline_entities(path)?;
532 println!(
533 "Loaded {} pre-existing entities from Vault API baseline",
534 format_number(baseline_set.len())
535 );
536 println!();
537 Some(baseline_set)
538 } else {
539 println!("No baseline entity list provided. Cannot distinguish truly NEW entities from pre-existing.");
540 println!(" All Day 1 entities will be marked as 'pre_existing_or_new_day_1'.");
541 println!(" To get accurate results, run: ./vault-audit entity-list --output baseline_entities.json\n");
542 None
543 };
544
545 let entity_mappings = if let Some(path) = entity_map {
547 println!(
548 "Loading historical entity mappings (audit log enrichment) from {}...",
549 path
550 );
551 let mappings = load_entity_mappings(path)?;
552 println!(
553 "Loaded {} entity mappings with historical audit log data",
554 format_number(mappings.len())
555 );
556 println!();
557 Some(mappings)
558 } else {
559 None
560 };
561
562 let mut entities: HashMap<String, EntityChurnRecord> = HashMap::new();
564 let mut daily_stats: Vec<DailyStats> = Vec::new();
565
566 for (file_idx, log_file) in log_files.iter().enumerate() {
568 let file_name = Path::new(log_file)
569 .file_name()
570 .unwrap()
571 .to_string_lossy()
572 .to_string();
573
574 println!("\nProcessing Day {} ({})...", file_idx + 1, file_name);
575
576 let file = File::open(log_file)
577 .with_context(|| format!("Failed to open log file: {}", log_file))?;
578 let file_size = get_file_size(log_file)? as usize;
579
580 let reader = BufReader::new(file);
581 let mut progress = ProgressBar::new(file_size, "Processing");
582
583 let mut new_entities_this_file = 0;
584 let mut returning_entities_this_file = HashSet::new();
585 let mut logins_this_file = 0;
586 let mut bytes_processed = 0;
587
588 for line in reader.lines() {
589 let line = line.context("Failed to read line from log file")?;
590 bytes_processed += line.len() + 1; if bytes_processed % 10_000 == 0 {
594 progress.update(bytes_processed.min(file_size));
595 }
596
597 let trimmed = line.trim();
598 if trimmed.is_empty() {
599 continue;
600 }
601
602 let entry: AuditEntry = match serde_json::from_str(trimmed) {
603 Ok(e) => e,
604 Err(_) => continue,
605 };
606
607 let Some(ref request) = entry.request else {
609 continue;
610 };
611 let Some(ref path) = request.path else {
612 continue;
613 };
614 if !path.ends_with("/login") {
615 continue;
616 }
617
618 logins_this_file += 1;
619
620 let Some(ref auth) = entry.auth else {
622 continue;
623 };
624 let Some(ref entity_id) = auth.entity_id else {
625 continue;
626 };
627
628 let display_name = auth
629 .display_name
630 .clone()
631 .unwrap_or_else(|| entity_id.clone());
632 let mount_path = request.path.clone().unwrap_or_default();
633 let mount_type = request.mount_type.clone().unwrap_or_default();
634 let token_type = auth.token_type.clone().unwrap_or_default();
635
636 let first_seen_time = chrono::DateTime::parse_from_rfc3339(&entry.time)
638 .ok()
639 .map(|dt| dt.with_timezone(&Utc))
640 .unwrap_or_else(Utc::now);
641
642 if let Some(entity_record) = entities.get_mut(entity_id) {
644 entity_record.total_logins += 1;
646 entity_record.last_seen_file = file_name.clone();
647 entity_record.last_seen_time = first_seen_time;
648 if !entity_record.files_appeared.contains(&file_name) {
649 entity_record.files_appeared.push(file_name.clone());
650 }
651 returning_entities_this_file.insert(entity_id.clone());
652 } else {
653 new_entities_this_file += 1;
655
656 let lifecycle = if let Some(ref baseline_set) = baseline {
658 if baseline_set.contains_key(entity_id) {
659 "pre_existing_baseline".to_string()
660 } else {
661 match file_idx {
663 0 => "new_day_1".to_string(),
664 1 => "new_day_2".to_string(),
665 2 => "new_day_3".to_string(),
666 _ => format!("new_day_{}", file_idx + 1),
667 }
668 }
669 } else {
670 match file_idx {
672 0 => "pre_existing_or_new_day_1".to_string(),
673 1 => "new_day_2".to_string(),
674 2 => "new_day_3".to_string(),
675 _ => format!("new_day_{}", file_idx + 1),
676 }
677 };
678
679 let (
681 baseline_entity_name,
682 baseline_created,
683 baseline_alias_name,
684 baseline_mount_path,
685 ) = if let Some(ref baseline_map) = baseline {
686 if let Some(baseline_entity) = baseline_map.get(entity_id) {
687 let name = baseline_entity.get_name();
688 let created = baseline_entity.get_created();
689 (
690 if !name.is_empty() { Some(name) } else { None },
691 if !created.is_empty() {
692 Some(created)
693 } else {
694 None
695 },
696 if !baseline_entity.alias_name.is_empty() {
697 Some(baseline_entity.alias_name.clone())
698 } else {
699 None
700 },
701 if !baseline_entity.mount_path.is_empty() {
702 Some(baseline_entity.mount_path.clone())
703 } else {
704 None
705 },
706 )
707 } else {
708 (None, None, None, None)
709 }
710 } else {
711 (None, None, None, None)
712 };
713
714 let (
716 historical_display_name,
717 historical_first_seen,
718 historical_last_seen,
719 historical_login_count,
720 ) = if let Some(ref mappings) = entity_mappings {
721 if let Some(mapping) = mappings.get(entity_id) {
722 (
723 Some(mapping.display_name.clone()),
724 Some(mapping.first_seen.clone()),
725 Some(mapping.last_seen.clone()),
726 Some(mapping.login_count),
727 )
728 } else {
729 (None, None, None, None)
730 }
731 } else {
732 (None, None, None, None)
733 };
734
735 entities.insert(
736 entity_id.clone(),
737 EntityChurnRecord {
738 entity_id: entity_id.clone(),
739 display_name: display_name.clone(),
740 mount_path: mount_path.clone(),
741 mount_type: mount_type.clone(),
742 token_type: token_type.clone(),
743 first_seen_file: file_name.clone(),
744 first_seen_time,
745 last_seen_file: file_name.clone(),
746 last_seen_time: first_seen_time,
747 files_appeared: vec![file_name.clone()],
748 total_logins: 1,
749 lifecycle,
750 activity_pattern: "unknown".to_string(), is_ephemeral_pattern: false, ephemeral_confidence: 0.0, ephemeral_reasons: Vec::new(), baseline_entity_name,
755 baseline_created,
756 baseline_alias_name,
757 baseline_mount_path,
758 historical_display_name,
759 historical_first_seen,
760 historical_last_seen,
761 historical_login_count,
762 },
763 );
764 }
765 }
766
767 progress.finish();
768
769 daily_stats.push(DailyStats {
770 file_name,
771 new_entities: new_entities_this_file,
772 returning_entities: returning_entities_this_file.len(),
773 total_logins: logins_this_file,
774 });
775
776 println!(
777 "Day {} Summary: {} new entities, {} returning, {} logins",
778 file_idx + 1,
779 format_number(new_entities_this_file),
780 format_number(returning_entities_this_file.len()),
781 format_number(logins_this_file)
782 );
783 }
784
785 println!("\nAnalyzing entity behavior patterns...");
787
788 let mut analyzer = EphemeralPatternAnalyzer::new(log_files.len());
789
790 analyzer.learn_from_entities(&entities);
792 println!(
793 "Learned from {} short-lived entity patterns",
794 format_number(analyzer.short_lived_patterns.len())
795 );
796
797 let entity_ids: Vec<String> = entities.keys().cloned().collect();
799 for entity_id in entity_ids {
800 if let Some(entity) = entities.get_mut(&entity_id) {
801 entity.activity_pattern = analyzer.classify_activity_pattern(entity);
803
804 let (is_ephemeral, confidence, reasons) = analyzer.analyze_entity(entity);
806 entity.is_ephemeral_pattern = is_ephemeral;
807 entity.ephemeral_confidence = confidence;
808 entity.ephemeral_reasons = reasons;
809 }
810 }
811
812 println!("\n=== Entity Churn Analysis ===\n");
814
815 println!("Daily Breakdown:");
816 for (idx, stats) in daily_stats.iter().enumerate() {
817 println!(
818 " Day {}: {} new, {} returning, {} total logins",
819 idx + 1,
820 format_number(stats.new_entities),
821 format_number(stats.returning_entities),
822 format_number(stats.total_logins)
823 );
824 }
825
826 let mut lifecycle_counts: HashMap<String, usize> = HashMap::new();
828 let mut entities_by_file_count: HashMap<usize, usize> = HashMap::new();
829
830 for entity in entities.values() {
831 *lifecycle_counts
832 .entry(entity.lifecycle.clone())
833 .or_insert(0) += 1;
834 *entities_by_file_count
835 .entry(entity.files_appeared.len())
836 .or_insert(0) += 1;
837 }
838
839 println!("\nEntity Lifecycle Classification:");
840 let mut lifecycle_vec: Vec<_> = lifecycle_counts.iter().collect();
841 lifecycle_vec.sort_by_key(|(k, _)| *k);
842 for (lifecycle, count) in lifecycle_vec {
843 println!(" {}: {}", lifecycle, format_number(*count));
844 }
845
846 println!("\nEntity Persistence:");
847 for day_count in 1..=log_files.len() {
848 if let Some(count) = entities_by_file_count.get(&day_count) {
849 let label = if day_count == 1 {
850 "Appeared 1 day only"
851 } else if day_count == log_files.len() {
852 "Appeared all days (persistent)"
853 } else {
854 "Appeared some days"
855 };
856 println!(
857 " {} day(s): {} entities ({})",
858 day_count,
859 format_number(*count),
860 label
861 );
862 }
863 }
864
865 let mut activity_pattern_counts: HashMap<String, usize> = HashMap::new();
867 let mut ephemeral_entities = Vec::new();
868
869 for entity in entities.values() {
870 *activity_pattern_counts
871 .entry(entity.activity_pattern.clone())
872 .or_insert(0) += 1;
873
874 if entity.is_ephemeral_pattern {
875 ephemeral_entities.push(entity.clone());
876 }
877 }
878
879 println!("\nActivity Pattern Distribution:");
880 let mut pattern_vec: Vec<_> = activity_pattern_counts.iter().collect();
881 pattern_vec.sort_by(|a, b| b.1.cmp(a.1));
882 for (pattern, count) in pattern_vec {
883 println!(" {}: {}", pattern, format_number(*count));
884 }
885
886 println!("\nEphemeral Entity Detection:");
887 println!(
888 " Detected {} likely ephemeral entities (confidence ≥ 0.4)",
889 format_number(ephemeral_entities.len())
890 );
891
892 if !ephemeral_entities.is_empty() {
893 ephemeral_entities.sort_by(|a, b| {
895 b.ephemeral_confidence
896 .partial_cmp(&a.ephemeral_confidence)
897 .unwrap_or(std::cmp::Ordering::Equal)
898 });
899
900 println!(" Top 10 by confidence:");
901 for (idx, entity) in ephemeral_entities.iter().take(10).enumerate() {
902 println!(
903 " {}. {} (confidence: {:.1}%)",
904 idx + 1,
905 entity.display_name,
906 entity.ephemeral_confidence * 100.0
907 );
908 for reason in &entity.ephemeral_reasons {
909 println!(" - {}", reason);
910 }
911 }
912
913 let high_conf = ephemeral_entities
915 .iter()
916 .filter(|e| e.ephemeral_confidence >= 0.7)
917 .count();
918 let med_conf = ephemeral_entities
919 .iter()
920 .filter(|e| e.ephemeral_confidence >= 0.5 && e.ephemeral_confidence < 0.7)
921 .count();
922 let low_conf = ephemeral_entities
923 .iter()
924 .filter(|e| e.ephemeral_confidence >= 0.4 && e.ephemeral_confidence < 0.5)
925 .count();
926
927 println!("\n Confidence distribution:");
928 println!(" High (≥70%): {}", format_number(high_conf));
929 println!(" Medium (50-69%): {}", format_number(med_conf));
930 println!(" Low (40-49%): {}", format_number(low_conf));
931 }
932
933 let mut mount_stats: HashMap<String, (usize, String)> = HashMap::new();
935 for entity in entities.values() {
936 let entry = mount_stats
937 .entry(entity.mount_path.clone())
938 .or_insert((0, entity.mount_type.clone()));
939 entry.0 += 1;
940 }
941
942 println!("\nTop Authentication Methods (Total Entities):");
943 let mut mount_vec: Vec<_> = mount_stats.iter().collect();
944 mount_vec.sort_by(|a, b| b.1 .0.cmp(&a.1 .0));
945
946 for (idx, (path, (count, mount_type))) in mount_vec.iter().take(20).enumerate() {
947 println!(
948 " {}. {} ({}): {}",
949 idx + 1,
950 path,
951 mount_type,
952 format_number(*count)
953 );
954 }
955
956 let github_entities: Vec<_> = entities
958 .values()
959 .filter(|e| e.mount_path.contains("/github"))
960 .collect();
961
962 if !github_entities.is_empty() {
963 println!("\n=== GitHub Entity Analysis ===");
964 println!(
965 "Total GitHub entities: {}",
966 format_number(github_entities.len())
967 );
968
969 let mut repo_counts: HashMap<String, usize> = HashMap::new();
971 for entity in &github_entities {
972 if let Some(repo) = entity.display_name.split(':').nth(1) {
974 *repo_counts.entry(repo.to_string()).or_insert(0) += 1;
975 }
976 }
977
978 println!("Unique repositories: {}", format_number(repo_counts.len()));
979 println!("\nTop repositories by entity count:");
980 let mut repo_vec: Vec<_> = repo_counts.iter().collect();
981 repo_vec.sort_by(|a, b| b.1.cmp(a.1));
982
983 for (idx, (repo, count)) in repo_vec.iter().take(20).enumerate() {
984 if **count > 1 {
985 println!(
986 " {}. {}: {} entities",
987 idx + 1,
988 repo,
989 format_number(**count)
990 );
991 }
992 }
993 }
994
995 if let Some(output_path) = output {
997 let mut entities_vec: Vec<_> = entities.into_values().collect();
998 entities_vec.sort_by(|a, b| a.first_seen_time.cmp(&b.first_seen_time));
999
1000 let output_format = format.unwrap_or_else(|| {
1002 if output_path.ends_with(".csv") {
1003 "csv"
1004 } else {
1005 "json"
1006 }
1007 });
1008
1009 println!(
1010 "\nExporting detailed entity records to {} (format: {})...",
1011 output_path, output_format
1012 );
1013
1014 let output_file = File::create(output_path)
1015 .with_context(|| format!("Failed to create output file: {}", output_path))?;
1016
1017 match output_format {
1018 "csv" => {
1019 let mut writer = csv::Writer::from_writer(output_file);
1020 for entity in &entities_vec {
1021 let csv_record: EntityChurnRecordCsv = entity.clone().into();
1022 writer
1023 .serialize(&csv_record)
1024 .context("Failed to write CSV record")?;
1025 }
1026 writer.flush().context("Failed to flush CSV writer")?;
1027 }
1028 _ => {
1029 serde_json::to_writer_pretty(output_file, &entities_vec)
1031 .context("Failed to write JSON output")?;
1032 }
1033 }
1034
1035 println!(
1036 "Exported {} entity records",
1037 format_number(entities_vec.len())
1038 );
1039 }
1040
1041 println!("\n=== Analysis Complete ===\n");
1042 Ok(())
1043}