1use crate::audit::types::AuditEntry;
102use crate::utils::format::format_number;
103use crate::utils::progress::ProgressBar;
104use crate::utils::reader::open_file;
105use anyhow::{Context, Result};
106use chrono::{DateTime, Utc};
107use serde::{Deserialize, Serialize};
108use std::collections::{HashMap, HashSet};
109use std::fs::File;
110use std::io::{BufRead, BufReader};
111use std::path::Path;
112
113#[derive(Debug, Serialize, Deserialize)]
115struct EntityMapping {
116 display_name: String,
117 mount_path: String,
118 #[allow(dead_code)]
119 mount_accessor: String,
120 #[allow(dead_code)]
121 login_count: usize,
122 #[allow(dead_code)]
123 first_seen: String,
124 #[allow(dead_code)]
125 last_seen: String,
126}
127
128#[derive(Debug, Serialize, Clone)]
130struct EntityChurnRecord {
131 entity_id: String,
132 display_name: String,
133 mount_path: String,
134 mount_type: String,
135 token_type: String,
136 first_seen_file: String,
137 first_seen_time: DateTime<Utc>,
138 last_seen_file: String,
139 last_seen_time: DateTime<Utc>,
140 files_appeared: Vec<String>,
141 total_logins: usize,
142 lifecycle: String, activity_pattern: String, is_ephemeral_pattern: bool,
145 ephemeral_confidence: f32, ephemeral_reasons: Vec<String>,
147 #[serde(skip_serializing_if = "Option::is_none")]
149 baseline_entity_name: Option<String>,
150 #[serde(skip_serializing_if = "Option::is_none")]
151 baseline_created: Option<String>,
152 #[serde(skip_serializing_if = "Option::is_none")]
153 baseline_alias_name: Option<String>,
154 #[serde(skip_serializing_if = "Option::is_none")]
155 baseline_mount_path: Option<String>,
156 #[serde(skip_serializing_if = "Option::is_none")]
158 historical_display_name: Option<String>,
159 #[serde(skip_serializing_if = "Option::is_none")]
160 historical_first_seen: Option<String>,
161 #[serde(skip_serializing_if = "Option::is_none")]
162 historical_last_seen: Option<String>,
163 #[serde(skip_serializing_if = "Option::is_none")]
164 historical_login_count: Option<usize>,
165}
166
167#[derive(Debug, Serialize)]
169struct EntityChurnRecordCsv {
170 entity_id: String,
171 display_name: String,
172 mount_path: String,
173 mount_type: String,
174 token_type: String,
175 first_seen_file: String,
176 first_seen_time: String,
177 last_seen_file: String,
178 last_seen_time: String,
179 files_appeared: String, days_active: usize,
181 total_logins: usize,
182 lifecycle: String,
183 activity_pattern: String,
184 is_ephemeral_pattern: bool,
185 ephemeral_confidence: f32,
186 ephemeral_reasons: String, baseline_entity_name: String,
188 baseline_created: String,
189 baseline_alias_name: String,
190 baseline_mount_path: String,
191 historical_display_name: String,
192 historical_first_seen: String,
193 historical_last_seen: String,
194 historical_login_count: String,
195}
196
197impl From<EntityChurnRecord> for EntityChurnRecordCsv {
198 fn from(record: EntityChurnRecord) -> Self {
199 Self {
200 entity_id: record.entity_id,
201 display_name: record.display_name,
202 mount_path: record.mount_path,
203 mount_type: record.mount_type,
204 token_type: record.token_type,
205 first_seen_file: record.first_seen_file,
206 first_seen_time: record.first_seen_time.to_rfc3339(),
207 last_seen_file: record.last_seen_file,
208 last_seen_time: record.last_seen_time.to_rfc3339(),
209 files_appeared: record.files_appeared.join(", "),
210 days_active: record.files_appeared.len(),
211 total_logins: record.total_logins,
212 lifecycle: record.lifecycle,
213 activity_pattern: record.activity_pattern,
214 is_ephemeral_pattern: record.is_ephemeral_pattern,
215 ephemeral_confidence: record.ephemeral_confidence,
216 ephemeral_reasons: record.ephemeral_reasons.join("; "),
217 baseline_entity_name: record.baseline_entity_name.unwrap_or_default(),
218 baseline_created: record.baseline_created.unwrap_or_default(),
219 baseline_alias_name: record.baseline_alias_name.unwrap_or_default(),
220 baseline_mount_path: record.baseline_mount_path.unwrap_or_default(),
221 historical_display_name: record.historical_display_name.unwrap_or_default(),
222 historical_first_seen: record.historical_first_seen.unwrap_or_default(),
223 historical_last_seen: record.historical_last_seen.unwrap_or_default(),
224 historical_login_count: record
225 .historical_login_count
226 .map(|n| n.to_string())
227 .unwrap_or_default(),
228 }
229 }
230}
231
232#[derive(Debug, Clone)]
233struct DailyStats {
234 #[allow(dead_code)]
235 file_name: String,
236 new_entities: usize,
237 returning_entities: usize,
238 total_logins: usize,
239}
240
241#[derive(Debug)]
243struct EphemeralPatternAnalyzer {
244 total_files: usize,
245 short_lived_patterns: Vec<ShortLivedPattern>,
246}
247
248#[derive(Debug)]
249struct ShortLivedPattern {
250 days_active: usize,
251 display_name: String,
252 mount_path: String,
253}
254
255impl EphemeralPatternAnalyzer {
256 const fn new(total_files: usize) -> Self {
257 Self {
258 total_files,
259 short_lived_patterns: Vec::new(),
260 }
261 }
262
263 fn learn_from_entities(&mut self, entities: &HashMap<String, EntityChurnRecord>) {
265 for entity in entities.values() {
266 let days_active = entity.files_appeared.len();
267
268 if days_active <= 2 {
270 self.short_lived_patterns.push(ShortLivedPattern {
271 days_active,
272 display_name: entity.display_name.clone(),
273 mount_path: entity.mount_path.clone(),
274 });
275 }
276 }
277 }
278
279 fn analyze_entity(&self, entity: &EntityChurnRecord) -> (bool, f32, Vec<String>) {
281 let days_active = entity.files_appeared.len();
282 let mut confidence = 0.0;
283 let mut reasons = Vec::new();
284
285 if days_active == 1 {
287 confidence += 0.5;
288 reasons.push(format!("Appeared only 1 day ({})", entity.first_seen_file));
289 } else if days_active == 2 {
290 confidence += 0.3;
291 reasons.push(format!(
292 "Appeared only 2 days: {}, {}",
293 entity.files_appeared.first().unwrap_or(&String::new()),
294 entity.files_appeared.last().unwrap_or(&String::new())
295 ));
296 }
297
298 if days_active <= 2 {
300 let similar_count = self
302 .short_lived_patterns
303 .iter()
304 .filter(|p| {
305 if p.mount_path == entity.mount_path && p.days_active <= 2 {
307 return true;
308 }
309 if entity.display_name.contains(':') && p.display_name.contains(':') {
311 let entity_prefix = entity.display_name.split(':').next().unwrap_or("");
312 let pattern_prefix = p.display_name.split(':').next().unwrap_or("");
313 if entity_prefix == pattern_prefix && !entity_prefix.is_empty() {
314 return true;
315 }
316 }
317 false
318 })
319 .count();
320
321 if similar_count > 5 {
322 confidence += 0.2;
323 reasons.push(format!(
324 "Matches pattern seen in {} other short-lived entities",
325 similar_count
326 ));
327 } else if similar_count > 0 {
328 confidence += 0.1;
329 reasons.push(format!(
330 "Similar to {} other short-lived entities",
331 similar_count
332 ));
333 }
334 }
335
336 if entity.total_logins <= 5 && days_active <= 2 {
338 confidence += 0.1;
339 reasons.push(format!(
340 "Low activity: only {} login(s)",
341 entity.total_logins
342 ));
343 }
344
345 if days_active >= 2 {
347 let first_day_idx = entity.files_appeared.first().and_then(|f| {
348 f.split('_')
349 .next_back()
350 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
351 });
352 let last_day_idx = entity.files_appeared.last().and_then(|f| {
353 f.split('_')
354 .next_back()
355 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
356 });
357
358 if let (Some(first), Some(last)) = (first_day_idx, last_day_idx) {
359 let span = last - first + 1;
360 if span > days_active {
361 confidence *= 0.7;
363 reasons.push(
364 "Has gaps in activity (possibly sporadic access, not churned)".to_string(),
365 );
366 }
367 }
368 }
369
370 confidence = f32::min(confidence, 1.0);
372 let is_ephemeral = confidence >= 0.4; if is_ephemeral && days_active < self.total_files {
376 reasons.push(format!(
377 "Not seen in most recent {} file(s)",
378 self.total_files - days_active
379 ));
380 }
381
382 (is_ephemeral, confidence, reasons)
383 }
384
385 fn classify_activity_pattern(&self, entity: &EntityChurnRecord) -> String {
387 let days_active = entity.files_appeared.len();
388
389 if days_active == 1 {
390 return "single_burst".to_string();
391 }
392
393 if days_active == self.total_files {
394 return "consistent".to_string();
395 }
396
397 if days_active >= (self.total_files * 2) / 3 {
398 return "consistent".to_string();
399 }
400
401 if let (Some(_first_file), Some(last_file)) =
403 (entity.files_appeared.first(), entity.files_appeared.last())
404 {
405 let last_file_num = last_file
407 .split('_')
408 .next_back()
409 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
410 .unwrap_or(self.total_files);
411
412 if last_file_num < self.total_files / 2 {
413 return "declining".to_string();
414 }
415 }
416
417 if days_active <= 2 {
418 return "single_burst".to_string();
419 }
420
421 "sporadic".to_string()
422 }
423}
424
425fn get_file_size(path: &str) -> Result<u64> {
426 Ok(std::fs::metadata(path)?.len())
427}
428
429fn load_entity_mappings(path: &str) -> Result<HashMap<String, EntityMapping>> {
430 let file = File::open(path).context("Failed to open entity map file")?;
431 let mappings: HashMap<String, EntityMapping> =
432 serde_json::from_reader(file).context("Failed to parse entity map JSON")?;
433 Ok(mappings)
434}
435
436#[derive(Debug, Deserialize, Clone)]
437#[allow(dead_code)]
438struct BaselineEntity {
439 entity_id: String,
440 #[serde(default)]
442 entity_name: String,
443 #[serde(default)]
444 entity_disabled: bool,
445 #[serde(default)]
446 entity_created: String,
447 #[serde(default)]
448 entity_updated: String,
449 #[serde(default)]
450 alias_id: String,
451 #[serde(default)]
452 alias_name: String,
453 #[serde(default)]
454 mount_path: String,
455 #[serde(default)]
456 mount_type: String,
457 #[serde(default)]
458 mount_accessor: String,
459 #[serde(default)]
460 alias_created: String,
461 #[serde(default)]
462 alias_updated: String,
463 #[serde(default)]
464 alias_metadata: String,
465}
466
467impl BaselineEntity {
468 fn get_name(&self) -> String {
470 if !self.entity_name.is_empty() {
471 self.entity_name.clone()
472 } else if !self.alias_name.is_empty() {
473 self.alias_name.clone()
474 } else {
475 String::new()
476 }
477 }
478
479 fn get_created(&self) -> String {
481 self.entity_created.clone()
482 }
483}
484
485fn load_baseline_entities(path: &str) -> Result<HashMap<String, BaselineEntity>> {
486 let file = File::open(path).context("Failed to open baseline entities file")?;
487
488 let path_lower = path.to_lowercase();
490 if std::path::Path::new(&path_lower)
491 .extension()
492 .is_some_and(|ext| ext.eq_ignore_ascii_case("json"))
493 {
494 let entities: Vec<BaselineEntity> =
496 serde_json::from_reader(file).context("Failed to parse baseline entities JSON")?;
497 Ok(entities
498 .into_iter()
499 .map(|e| (e.entity_id.clone(), e))
500 .collect())
501 } else {
502 let mut reader = csv::Reader::from_reader(file);
504 let mut entities = HashMap::with_capacity(5000); for result in reader.deserialize() {
507 let entity: BaselineEntity = result.context("Failed to parse baseline CSV row")?;
508 entities.entry(entity.entity_id.clone()).or_insert(entity);
510 }
511
512 Ok(entities)
513 }
514}
515
516pub fn run(
517 log_files: &[String],
518 entity_map: Option<&str>,
519 baseline_entities: Option<&str>,
520 output: Option<&str>,
521 format: Option<&str>,
522) -> Result<()> {
523 println!("\n=== Multi-Day Entity Churn Analysis ===\n");
524 println!("Analyzing {} log files:", log_files.len());
525 for (i, file) in log_files.iter().enumerate() {
526 let size = get_file_size(file)?;
527 println!(
528 " Day {}: {} ({:.2} GB)",
529 i + 1,
530 file,
531 size as f64 / 1_000_000_000.0
532 );
533 }
534 println!();
535
536 let baseline = if let Some(path) = baseline_entities {
538 println!(
539 "Loading baseline entity list (Vault API metadata) from {}...",
540 path
541 );
542 let baseline_set = load_baseline_entities(path)?;
543 println!(
544 "Loaded {} pre-existing entities from Vault API baseline",
545 format_number(baseline_set.len())
546 );
547 println!();
548 Some(baseline_set)
549 } else {
550 println!("No baseline entity list provided. Cannot distinguish truly NEW entities from pre-existing.");
551 println!(" All Day 1 entities will be marked as 'pre_existing_or_new_day_1'.");
552 println!(" To get accurate results, run: ./vault-audit entity-list --output baseline_entities.json\n");
553 None
554 };
555
556 let entity_mappings = if let Some(path) = entity_map {
558 println!(
559 "Loading historical entity mappings (audit log enrichment) from {}...",
560 path
561 );
562 let mappings = load_entity_mappings(path)?;
563 println!(
564 "Loaded {} entity mappings with historical audit log data",
565 format_number(mappings.len())
566 );
567 println!();
568 Some(mappings)
569 } else {
570 None
571 };
572
573 let mut entities: HashMap<String, EntityChurnRecord> = HashMap::with_capacity(5000);
576 let mut daily_stats: Vec<DailyStats> = Vec::new();
577
578 for (file_idx, log_file) in log_files.iter().enumerate() {
580 let file_name = Path::new(log_file)
581 .file_name()
582 .unwrap()
583 .to_string_lossy()
584 .to_string();
585
586 println!("\nProcessing Day {} ({})...", file_idx + 1, file_name);
587
588 let file = open_file(log_file)
589 .with_context(|| format!("Failed to open log file: {}", log_file))?;
590 let file_size = get_file_size(log_file)? as usize;
591
592 let reader = BufReader::new(file);
593 let mut progress = ProgressBar::new(file_size, "Processing");
594
595 let mut new_entities_this_file = 0;
596 let mut returning_entities_this_file = HashSet::new();
597 let mut logins_this_file = 0;
598 let mut bytes_processed = 0;
599
600 for line in reader.lines() {
601 let line = line.context("Failed to read line from log file")?;
602 bytes_processed += line.len() + 1; if bytes_processed % 10_000 == 0 {
606 progress.update(bytes_processed.min(file_size));
607 }
608
609 let trimmed = line.trim();
610 if trimmed.is_empty() {
611 continue;
612 }
613
614 let entry: AuditEntry = match serde_json::from_str(trimmed) {
615 Ok(e) => e,
616 Err(_) => continue,
617 };
618
619 let Some(ref request) = entry.request else {
621 continue;
622 };
623 let Some(ref path) = request.path else {
624 continue;
625 };
626 if !path.ends_with("/login") {
627 continue;
628 }
629
630 logins_this_file += 1;
631
632 let Some(ref auth) = entry.auth else {
634 continue;
635 };
636 let Some(ref entity_id) = auth.entity_id else {
637 continue;
638 };
639
640 let display_name = auth
641 .display_name
642 .clone()
643 .unwrap_or_else(|| entity_id.clone());
644 let mount_path = request.path.clone().unwrap_or_default();
645 let mount_type = request.mount_type.clone().unwrap_or_default();
646 let token_type = auth.token_type.clone().unwrap_or_default();
647
648 let first_seen_time = chrono::DateTime::parse_from_rfc3339(&entry.time)
650 .ok()
651 .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
652
653 if let Some(entity_record) = entities.get_mut(entity_id) {
655 entity_record.total_logins += 1;
657 entity_record.last_seen_file.clone_from(&file_name);
658 entity_record.last_seen_time = first_seen_time;
659 if !entity_record.files_appeared.contains(&file_name) {
660 entity_record.files_appeared.push(file_name.clone());
661 }
662 returning_entities_this_file.insert(entity_id.clone());
663 } else {
664 new_entities_this_file += 1;
666
667 let lifecycle = if let Some(ref baseline_set) = baseline {
669 if baseline_set.contains_key(entity_id) {
670 "pre_existing_baseline".to_string()
671 } else {
672 match file_idx {
674 0 => "new_day_1".to_string(),
675 1 => "new_day_2".to_string(),
676 2 => "new_day_3".to_string(),
677 _ => format!("new_day_{}", file_idx + 1),
678 }
679 }
680 } else {
681 match file_idx {
683 0 => "pre_existing_or_new_day_1".to_string(),
684 1 => "new_day_2".to_string(),
685 2 => "new_day_3".to_string(),
686 _ => format!("new_day_{}", file_idx + 1),
687 }
688 };
689
690 let (
692 baseline_entity_name,
693 baseline_created,
694 baseline_alias_name,
695 baseline_mount_path,
696 ) = if let Some(ref baseline_map) = baseline {
697 if let Some(baseline_entity) = baseline_map.get(entity_id) {
698 let name = baseline_entity.get_name();
699 let created = baseline_entity.get_created();
700 (
701 if name.is_empty() { None } else { Some(name) },
702 if created.is_empty() {
703 None
704 } else {
705 Some(created)
706 },
707 if baseline_entity.alias_name.is_empty() {
708 None
709 } else {
710 Some(baseline_entity.alias_name.clone())
711 },
712 if baseline_entity.mount_path.is_empty() {
713 None
714 } else {
715 Some(baseline_entity.mount_path.clone())
716 },
717 )
718 } else {
719 (None, None, None, None)
720 }
721 } else {
722 (None, None, None, None)
723 };
724
725 let (
727 historical_display_name,
728 historical_first_seen,
729 historical_last_seen,
730 historical_login_count,
731 ) = if let Some(ref mappings) = entity_mappings {
732 if let Some(mapping) = mappings.get(entity_id) {
733 (
734 Some(mapping.display_name.clone()),
735 Some(mapping.first_seen.clone()),
736 Some(mapping.last_seen.clone()),
737 Some(mapping.login_count),
738 )
739 } else {
740 (None, None, None, None)
741 }
742 } else {
743 (None, None, None, None)
744 };
745
746 entities.insert(
747 entity_id.clone(),
748 EntityChurnRecord {
749 entity_id: entity_id.clone(),
750 display_name: display_name.clone(),
751 mount_path: mount_path.clone(),
752 mount_type: mount_type.clone(),
753 token_type: token_type.clone(),
754 first_seen_file: file_name.clone(),
755 first_seen_time,
756 last_seen_file: file_name.clone(),
757 last_seen_time: first_seen_time,
758 files_appeared: vec![file_name.clone()],
759 total_logins: 1,
760 lifecycle,
761 activity_pattern: "unknown".to_string(), is_ephemeral_pattern: false, ephemeral_confidence: 0.0, ephemeral_reasons: Vec::new(), baseline_entity_name,
766 baseline_created,
767 baseline_alias_name,
768 baseline_mount_path,
769 historical_display_name,
770 historical_first_seen,
771 historical_last_seen,
772 historical_login_count,
773 },
774 );
775 }
776 }
777
778 progress.finish();
779
780 daily_stats.push(DailyStats {
781 file_name,
782 new_entities: new_entities_this_file,
783 returning_entities: returning_entities_this_file.len(),
784 total_logins: logins_this_file,
785 });
786
787 println!(
788 "Day {} Summary: {} new entities, {} returning, {} logins",
789 file_idx + 1,
790 format_number(new_entities_this_file),
791 format_number(returning_entities_this_file.len()),
792 format_number(logins_this_file)
793 );
794 }
795
796 println!("\nAnalyzing entity behavior patterns...");
798
799 let mut analyzer = EphemeralPatternAnalyzer::new(log_files.len());
800
801 analyzer.learn_from_entities(&entities);
803 println!(
804 "Learned from {} short-lived entity patterns",
805 format_number(analyzer.short_lived_patterns.len())
806 );
807
808 let entity_ids: Vec<String> = entities.keys().cloned().collect();
810 for entity_id in entity_ids {
811 if let Some(entity) = entities.get_mut(&entity_id) {
812 entity.activity_pattern = analyzer.classify_activity_pattern(entity);
814
815 let (is_ephemeral, confidence, reasons) = analyzer.analyze_entity(entity);
817 entity.is_ephemeral_pattern = is_ephemeral;
818 entity.ephemeral_confidence = confidence;
819 entity.ephemeral_reasons = reasons;
820 }
821 }
822
823 println!("\n=== Entity Churn Analysis ===\n");
825
826 println!("Daily Breakdown:");
827 for (idx, stats) in daily_stats.iter().enumerate() {
828 println!(
829 " Day {}: {} new, {} returning, {} total logins",
830 idx + 1,
831 format_number(stats.new_entities),
832 format_number(stats.returning_entities),
833 format_number(stats.total_logins)
834 );
835 }
836
837 let mut lifecycle_counts: HashMap<String, usize> = HashMap::with_capacity(20); let mut entities_by_file_count: HashMap<usize, usize> = HashMap::with_capacity(log_files.len());
840
841 for entity in entities.values() {
842 *lifecycle_counts
843 .entry(entity.lifecycle.clone())
844 .or_insert(0) += 1;
845 *entities_by_file_count
846 .entry(entity.files_appeared.len())
847 .or_insert(0) += 1;
848 }
849
850 println!("\nEntity Lifecycle Classification:");
851 let mut lifecycle_vec: Vec<_> = lifecycle_counts.iter().collect();
852 lifecycle_vec.sort_by_key(|(k, _)| *k);
853 for (lifecycle, count) in lifecycle_vec {
854 println!(" {}: {}", lifecycle, format_number(*count));
855 }
856
857 println!("\nEntity Persistence:");
858 for day_count in 1..=log_files.len() {
859 if let Some(count) = entities_by_file_count.get(&day_count) {
860 let label = if day_count == 1 {
861 "Appeared 1 day only"
862 } else if day_count == log_files.len() {
863 "Appeared all days (persistent)"
864 } else {
865 "Appeared some days"
866 };
867 println!(
868 " {} day(s): {} entities ({})",
869 day_count,
870 format_number(*count),
871 label
872 );
873 }
874 }
875
876 let mut activity_pattern_counts: HashMap<String, usize> = HashMap::with_capacity(10); let mut ephemeral_entities = Vec::new();
879
880 for entity in entities.values() {
881 *activity_pattern_counts
882 .entry(entity.activity_pattern.clone())
883 .or_insert(0) += 1;
884
885 if entity.is_ephemeral_pattern {
886 ephemeral_entities.push(entity.clone());
887 }
888 }
889
890 println!("\nActivity Pattern Distribution:");
891 let mut pattern_vec: Vec<_> = activity_pattern_counts.iter().collect();
892 pattern_vec.sort_by(|a, b| b.1.cmp(a.1));
893 for (pattern, count) in pattern_vec {
894 println!(" {}: {}", pattern, format_number(*count));
895 }
896
897 println!("\nEphemeral Entity Detection:");
898 println!(
899 " Detected {} likely ephemeral entities (confidence ≥ 0.4)",
900 format_number(ephemeral_entities.len())
901 );
902
903 if !ephemeral_entities.is_empty() {
904 ephemeral_entities.sort_by(|a, b| {
906 b.ephemeral_confidence
907 .partial_cmp(&a.ephemeral_confidence)
908 .unwrap_or(std::cmp::Ordering::Equal)
909 });
910
911 println!(" Top 10 by confidence:");
912 for (idx, entity) in ephemeral_entities.iter().take(10).enumerate() {
913 println!(
914 " {}. {} (confidence: {:.1}%)",
915 idx + 1,
916 entity.display_name,
917 entity.ephemeral_confidence * 100.0
918 );
919 for reason in &entity.ephemeral_reasons {
920 println!(" - {}", reason);
921 }
922 }
923
924 let high_conf = ephemeral_entities
926 .iter()
927 .filter(|e| e.ephemeral_confidence >= 0.7)
928 .count();
929 let med_conf = ephemeral_entities
930 .iter()
931 .filter(|e| e.ephemeral_confidence >= 0.5 && e.ephemeral_confidence < 0.7)
932 .count();
933 let low_conf = ephemeral_entities
934 .iter()
935 .filter(|e| e.ephemeral_confidence >= 0.4 && e.ephemeral_confidence < 0.5)
936 .count();
937
938 println!("\n Confidence distribution:");
939 println!(" High (≥70%): {}", format_number(high_conf));
940 println!(" Medium (50-69%): {}", format_number(med_conf));
941 println!(" Low (40-49%): {}", format_number(low_conf));
942 }
943
944 let mut mount_stats: HashMap<String, (usize, String)> = HashMap::with_capacity(100); for entity in entities.values() {
947 let entry = mount_stats
948 .entry(entity.mount_path.clone())
949 .or_insert_with(|| (0, entity.mount_type.clone()));
950 entry.0 += 1;
951 }
952
953 println!("\nTop Authentication Methods (Total Entities):");
954 let mut mount_vec: Vec<_> = mount_stats.iter().collect();
955 mount_vec.sort_by(|a, b| b.1 .0.cmp(&a.1 .0));
956
957 for (idx, (path, (count, mount_type))) in mount_vec.iter().take(20).enumerate() {
958 println!(
959 " {}. {} ({}): {}",
960 idx + 1,
961 path,
962 mount_type,
963 format_number(*count)
964 );
965 }
966
967 let github_entities: Vec<_> = entities
969 .values()
970 .filter(|e| e.mount_path.contains("/github"))
971 .collect();
972
973 if !github_entities.is_empty() {
974 println!("\n=== GitHub Entity Analysis ===");
975 println!(
976 "Total GitHub entities: {}",
977 format_number(github_entities.len())
978 );
979
980 let mut repo_counts: HashMap<String, usize> = HashMap::new();
982 for entity in &github_entities {
983 if let Some(repo) = entity.display_name.split(':').nth(1) {
985 *repo_counts.entry(repo.to_string()).or_insert(0) += 1;
986 }
987 }
988
989 println!("Unique repositories: {}", format_number(repo_counts.len()));
990 println!("\nTop repositories by entity count:");
991 let mut repo_vec: Vec<_> = repo_counts.iter().collect();
992 repo_vec.sort_by(|a, b| b.1.cmp(a.1));
993
994 for (idx, (repo, count)) in repo_vec.iter().take(20).enumerate() {
995 if **count > 1 {
996 println!(
997 " {}. {}: {} entities",
998 idx + 1,
999 repo,
1000 format_number(**count)
1001 );
1002 }
1003 }
1004 }
1005
1006 if let Some(output_path) = output {
1008 let mut entities_vec: Vec<_> = entities.into_values().collect();
1009 entities_vec.sort_by(|a, b| a.first_seen_time.cmp(&b.first_seen_time));
1010
1011 let output_format = format.unwrap_or_else(|| {
1013 if std::path::Path::new(output_path)
1014 .extension()
1015 .is_some_and(|ext| ext.eq_ignore_ascii_case("csv"))
1016 {
1017 "csv"
1018 } else {
1019 "json"
1020 }
1021 });
1022
1023 println!(
1024 "\nExporting detailed entity records to {} (format: {})...",
1025 output_path, output_format
1026 );
1027
1028 let output_file = File::create(output_path)
1029 .with_context(|| format!("Failed to create output file: {}", output_path))?;
1030
1031 match output_format {
1032 "csv" => {
1033 let mut writer = csv::Writer::from_writer(output_file);
1034 for entity in &entities_vec {
1035 let csv_record: EntityChurnRecordCsv = entity.clone().into();
1036 writer
1037 .serialize(&csv_record)
1038 .context("Failed to write CSV record")?;
1039 }
1040 writer.flush().context("Failed to flush CSV writer")?;
1041 }
1042 _ => {
1043 serde_json::to_writer_pretty(output_file, &entities_vec)
1045 .context("Failed to write JSON output")?;
1046 }
1047 }
1048
1049 println!(
1050 "Exported {} entity records",
1051 format_number(entities_vec.len())
1052 );
1053 }
1054
1055 println!("\n=== Analysis Complete ===\n");
1056 Ok(())
1057}