1use crate::audit::types::AuditEntry;
102use crate::utils::format::format_number;
103use crate::utils::progress::ProgressBar;
104use crate::utils::reader::open_file;
105use anyhow::{Context, Result};
106use chrono::{DateTime, Utc};
107use serde::{Deserialize, Serialize};
108use std::collections::{HashMap, HashSet};
109use std::fs::File;
110use std::io::{BufRead, BufReader};
111use std::path::Path;
112
113#[derive(Debug, Serialize, Deserialize)]
115struct EntityMapping {
116 display_name: String,
117 mount_path: String,
118 #[allow(dead_code)]
119 mount_accessor: String,
120 #[allow(dead_code)]
121 login_count: usize,
122 #[allow(dead_code)]
123 first_seen: String,
124 #[allow(dead_code)]
125 last_seen: String,
126}
127
128#[derive(Debug, Serialize, Clone)]
130struct EntityChurnRecord {
131 entity_id: String,
132 display_name: String,
133 mount_path: String,
134 mount_type: String,
135 token_type: String,
136 first_seen_file: String,
137 first_seen_time: DateTime<Utc>,
138 last_seen_file: String,
139 last_seen_time: DateTime<Utc>,
140 files_appeared: Vec<String>,
141 total_logins: usize,
142 lifecycle: String, activity_pattern: String, is_ephemeral_pattern: bool,
145 ephemeral_confidence: f32, ephemeral_reasons: Vec<String>,
147 #[serde(skip_serializing_if = "Option::is_none")]
149 baseline_entity_name: Option<String>,
150 #[serde(skip_serializing_if = "Option::is_none")]
151 baseline_created: Option<String>,
152 #[serde(skip_serializing_if = "Option::is_none")]
153 baseline_alias_name: Option<String>,
154 #[serde(skip_serializing_if = "Option::is_none")]
155 baseline_mount_path: Option<String>,
156 #[serde(skip_serializing_if = "Option::is_none")]
158 historical_display_name: Option<String>,
159 #[serde(skip_serializing_if = "Option::is_none")]
160 historical_first_seen: Option<String>,
161 #[serde(skip_serializing_if = "Option::is_none")]
162 historical_last_seen: Option<String>,
163 #[serde(skip_serializing_if = "Option::is_none")]
164 historical_login_count: Option<usize>,
165}
166
167#[derive(Debug, Serialize)]
169struct EntityChurnRecordCsv {
170 entity_id: String,
171 display_name: String,
172 mount_path: String,
173 mount_type: String,
174 token_type: String,
175 first_seen_file: String,
176 first_seen_time: String,
177 last_seen_file: String,
178 last_seen_time: String,
179 files_appeared: String, days_active: usize,
181 total_logins: usize,
182 lifecycle: String,
183 activity_pattern: String,
184 is_ephemeral_pattern: bool,
185 ephemeral_confidence: f32,
186 ephemeral_reasons: String, baseline_entity_name: String,
188 baseline_created: String,
189 baseline_alias_name: String,
190 baseline_mount_path: String,
191 historical_display_name: String,
192 historical_first_seen: String,
193 historical_last_seen: String,
194 historical_login_count: String,
195}
196
197impl From<EntityChurnRecord> for EntityChurnRecordCsv {
198 fn from(record: EntityChurnRecord) -> Self {
199 Self {
200 entity_id: record.entity_id,
201 display_name: record.display_name,
202 mount_path: record.mount_path,
203 mount_type: record.mount_type,
204 token_type: record.token_type,
205 first_seen_file: record.first_seen_file,
206 first_seen_time: record.first_seen_time.to_rfc3339(),
207 last_seen_file: record.last_seen_file,
208 last_seen_time: record.last_seen_time.to_rfc3339(),
209 files_appeared: record.files_appeared.join(", "),
210 days_active: record.files_appeared.len(),
211 total_logins: record.total_logins,
212 lifecycle: record.lifecycle,
213 activity_pattern: record.activity_pattern,
214 is_ephemeral_pattern: record.is_ephemeral_pattern,
215 ephemeral_confidence: record.ephemeral_confidence,
216 ephemeral_reasons: record.ephemeral_reasons.join("; "),
217 baseline_entity_name: record.baseline_entity_name.unwrap_or_default(),
218 baseline_created: record.baseline_created.unwrap_or_default(),
219 baseline_alias_name: record.baseline_alias_name.unwrap_or_default(),
220 baseline_mount_path: record.baseline_mount_path.unwrap_or_default(),
221 historical_display_name: record.historical_display_name.unwrap_or_default(),
222 historical_first_seen: record.historical_first_seen.unwrap_or_default(),
223 historical_last_seen: record.historical_last_seen.unwrap_or_default(),
224 historical_login_count: record
225 .historical_login_count
226 .map(|n| n.to_string())
227 .unwrap_or_default(),
228 }
229 }
230}
231
232#[derive(Debug, Clone)]
233struct DailyStats {
234 #[allow(dead_code)]
235 file_name: String,
236 new_entities: usize,
237 returning_entities: usize,
238 total_logins: usize,
239}
240
241#[derive(Debug)]
243struct EphemeralPatternAnalyzer {
244 total_files: usize,
245 short_lived_patterns: Vec<ShortLivedPattern>,
246}
247
248#[derive(Debug)]
249struct ShortLivedPattern {
250 days_active: usize,
251 display_name: String,
252 mount_path: String,
253}
254
255impl EphemeralPatternAnalyzer {
256 const fn new(total_files: usize) -> Self {
257 Self {
258 total_files,
259 short_lived_patterns: Vec::new(),
260 }
261 }
262
263 fn learn_from_entities(&mut self, entities: &HashMap<String, EntityChurnRecord>) {
265 for entity in entities.values() {
266 let days_active = entity.files_appeared.len();
267
268 if days_active <= 2 {
270 self.short_lived_patterns.push(ShortLivedPattern {
271 days_active,
272 display_name: entity.display_name.clone(),
273 mount_path: entity.mount_path.clone(),
274 });
275 }
276 }
277 }
278
279 fn analyze_entity(&self, entity: &EntityChurnRecord) -> (bool, f32, Vec<String>) {
281 let days_active = entity.files_appeared.len();
282 let mut confidence = 0.0;
283 let mut reasons = Vec::new();
284
285 if days_active == 1 {
287 confidence += 0.5;
288 reasons.push(format!("Appeared only 1 day ({})", entity.first_seen_file));
289 } else if days_active == 2 {
290 confidence += 0.3;
291 reasons.push(format!(
292 "Appeared only 2 days: {}, {}",
293 entity.files_appeared.first().unwrap_or(&String::new()),
294 entity.files_appeared.last().unwrap_or(&String::new())
295 ));
296 }
297
298 if days_active <= 2 {
300 let similar_count = self
302 .short_lived_patterns
303 .iter()
304 .filter(|p| {
305 if p.mount_path == entity.mount_path && p.days_active <= 2 {
307 return true;
308 }
309 if entity.display_name.contains(':') && p.display_name.contains(':') {
311 let entity_prefix = entity.display_name.split(':').next().unwrap_or("");
312 let pattern_prefix = p.display_name.split(':').next().unwrap_or("");
313 if entity_prefix == pattern_prefix && !entity_prefix.is_empty() {
314 return true;
315 }
316 }
317 false
318 })
319 .count();
320
321 if similar_count > 5 {
322 confidence += 0.2;
323 reasons.push(format!(
324 "Matches pattern seen in {} other short-lived entities",
325 similar_count
326 ));
327 } else if similar_count > 0 {
328 confidence += 0.1;
329 reasons.push(format!(
330 "Similar to {} other short-lived entities",
331 similar_count
332 ));
333 }
334 }
335
336 if entity.total_logins <= 5 && days_active <= 2 {
338 confidence += 0.1;
339 reasons.push(format!(
340 "Low activity: only {} login(s)",
341 entity.total_logins
342 ));
343 }
344
345 if days_active >= 2 {
347 let first_day_idx = entity.files_appeared.first().and_then(|f| {
348 f.split('_')
349 .next_back()
350 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
351 });
352 let last_day_idx = entity.files_appeared.last().and_then(|f| {
353 f.split('_')
354 .next_back()
355 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
356 });
357
358 if let (Some(first), Some(last)) = (first_day_idx, last_day_idx) {
359 let span = last - first + 1;
360 if span > days_active {
361 confidence *= 0.7;
363 reasons.push(
364 "Has gaps in activity (possibly sporadic access, not churned)".to_string(),
365 );
366 }
367 }
368 }
369
370 confidence = f32::min(confidence, 1.0);
372 let is_ephemeral = confidence >= 0.4; if is_ephemeral && days_active < self.total_files {
376 reasons.push(format!(
377 "Not seen in most recent {} file(s)",
378 self.total_files - days_active
379 ));
380 }
381
382 (is_ephemeral, confidence, reasons)
383 }
384
385 fn classify_activity_pattern(&self, entity: &EntityChurnRecord) -> String {
387 let days_active = entity.files_appeared.len();
388
389 if days_active == 1 {
390 return "single_burst".to_string();
391 }
392
393 if days_active == self.total_files {
394 return "consistent".to_string();
395 }
396
397 if days_active >= (self.total_files * 2) / 3 {
398 return "consistent".to_string();
399 }
400
401 if let (Some(_first_file), Some(last_file)) =
403 (entity.files_appeared.first(), entity.files_appeared.last())
404 {
405 let last_file_num = last_file
407 .split('_')
408 .next_back()
409 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
410 .unwrap_or(self.total_files);
411
412 if last_file_num < self.total_files / 2 {
413 return "declining".to_string();
414 }
415 }
416
417 if days_active <= 2 {
418 return "single_burst".to_string();
419 }
420
421 "sporadic".to_string()
422 }
423}
424
425fn get_file_size(path: &str) -> Result<u64> {
426 Ok(std::fs::metadata(path)?.len())
427}
428
429fn load_entity_mappings(path: &str) -> Result<HashMap<String, EntityMapping>> {
430 let file = File::open(path).context("Failed to open entity map file")?;
431 let mappings: HashMap<String, EntityMapping> =
432 serde_json::from_reader(file).context("Failed to parse entity map JSON")?;
433 Ok(mappings)
434}
435
436#[derive(Debug, Deserialize, Clone)]
437#[allow(dead_code)]
438struct BaselineEntity {
439 entity_id: String,
440 #[serde(default)]
442 entity_name: String,
443 #[serde(default)]
444 entity_disabled: bool,
445 #[serde(default)]
446 entity_created: String,
447 #[serde(default)]
448 entity_updated: String,
449 #[serde(default)]
450 alias_id: String,
451 #[serde(default)]
452 alias_name: String,
453 #[serde(default)]
454 mount_path: String,
455 #[serde(default)]
456 mount_type: String,
457 #[serde(default)]
458 mount_accessor: String,
459 #[serde(default)]
460 alias_created: String,
461 #[serde(default)]
462 alias_updated: String,
463 #[serde(default)]
464 alias_metadata: String,
465}
466
467impl BaselineEntity {
468 fn get_name(&self) -> String {
470 if !self.entity_name.is_empty() {
471 self.entity_name.clone()
472 } else if !self.alias_name.is_empty() {
473 self.alias_name.clone()
474 } else {
475 String::new()
476 }
477 }
478
479 fn get_created(&self) -> String {
481 self.entity_created.clone()
482 }
483}
484
485fn load_baseline_entities(path: &str) -> Result<HashMap<String, BaselineEntity>> {
486 let file = File::open(path).context("Failed to open baseline entities file")?;
487
488 let path_lower = path.to_lowercase();
490 if std::path::Path::new(&path_lower)
491 .extension()
492 .is_some_and(|ext| ext.eq_ignore_ascii_case("json"))
493 {
494 let entities: Vec<BaselineEntity> =
496 serde_json::from_reader(file).context("Failed to parse baseline entities JSON")?;
497 Ok(entities
498 .into_iter()
499 .map(|e| (e.entity_id.clone(), e))
500 .collect())
501 } else {
502 let mut reader = csv::Reader::from_reader(file);
504 let mut entities = HashMap::with_capacity(5000); for result in reader.deserialize() {
507 let entity: BaselineEntity = result.context("Failed to parse baseline CSV row")?;
508 entities.entry(entity.entity_id.clone()).or_insert(entity);
510 }
511
512 Ok(entities)
513 }
514}
515
516pub fn run(
517 log_files: &[String],
518 entity_map: Option<&str>,
519 baseline_entities: Option<&str>,
520 output: Option<&str>,
521 format: Option<&str>,
522) -> Result<()> {
523 println!("\n=== Multi-Day Entity Churn Analysis ===\n");
524 println!("Analyzing {} log files:", log_files.len());
525 for (i, file) in log_files.iter().enumerate() {
526 let size = get_file_size(file)?;
527 println!(
528 " Day {}: {} ({:.2} GB)",
529 i + 1,
530 file,
531 size as f64 / 1_000_000_000.0
532 );
533 }
534 println!();
535
536 let baseline = if let Some(path) = baseline_entities {
538 println!(
539 "Loading baseline entity list (Vault API metadata) from {}...",
540 path
541 );
542 let baseline_set = load_baseline_entities(path)?;
543 println!(
544 "Loaded {} pre-existing entities from Vault API baseline",
545 format_number(baseline_set.len())
546 );
547 println!();
548 Some(baseline_set)
549 } else {
550 println!("No baseline entity list provided. Cannot distinguish truly NEW entities from pre-existing.");
551 println!(" All Day 1 entities will be marked as 'pre_existing_or_new_day_1'.");
552 println!(" To get accurate results, run: ./vault-audit entity-list --output baseline_entities.json\n");
553 None
554 };
555
556 let entity_mappings = if let Some(path) = entity_map {
558 println!(
559 "Loading historical entity mappings (audit log enrichment) from {}...",
560 path
561 );
562 let mappings = load_entity_mappings(path)?;
563 println!(
564 "Loaded {} entity mappings with historical audit log data",
565 format_number(mappings.len())
566 );
567 println!();
568 Some(mappings)
569 } else {
570 None
571 };
572
573 let mut entities: HashMap<String, EntityChurnRecord> = HashMap::with_capacity(5000);
576 let mut daily_stats: Vec<DailyStats> = Vec::new();
577
578 for (file_idx, log_file) in log_files.iter().enumerate() {
580 let file_name = Path::new(log_file)
581 .file_name()
582 .unwrap()
583 .to_string_lossy()
584 .to_string();
585
586 eprintln!("\nProcessing Day {} ({})...", file_name, file_idx + 1);
587
588 eprintln!("Scanning file to determine total lines...");
590 let total_file_lines = crate::utils::parallel::count_file_lines(log_file)?;
591
592 let file = open_file(log_file)
593 .with_context(|| format!("Failed to open log file: {}", log_file))?;
594
595 let reader = BufReader::new(file);
596 let progress = ProgressBar::new(total_file_lines, "Processing");
597
598 let mut new_entities_this_file = 0;
599 let mut returning_entities_this_file = HashSet::new();
600 let mut logins_this_file = 0;
601 let mut lines_processed = 0;
602
603 for line in reader.lines() {
604 let line = line.context("Failed to read line from log file")?;
605 lines_processed += 1;
606
607 if lines_processed % 10_000 == 0 {
609 progress.update(lines_processed);
610 }
611
612 let trimmed = line.trim();
613 if trimmed.is_empty() {
614 continue;
615 }
616
617 let entry: AuditEntry = match serde_json::from_str(trimmed) {
618 Ok(e) => e,
619 Err(_) => continue,
620 };
621
622 let Some(ref request) = entry.request else {
624 continue;
625 };
626 let Some(ref path) = request.path else {
627 continue;
628 };
629 if !path.ends_with("/login") {
630 continue;
631 }
632
633 logins_this_file += 1;
634
635 let Some(ref auth) = entry.auth else {
637 continue;
638 };
639 let Some(ref entity_id) = auth.entity_id else {
640 continue;
641 };
642
643 let display_name = auth
644 .display_name
645 .clone()
646 .unwrap_or_else(|| entity_id.clone());
647 let mount_path = request.path.clone().unwrap_or_default();
648 let mount_type = request.mount_type.clone().unwrap_or_default();
649 let token_type = auth.token_type.clone().unwrap_or_default();
650
651 let first_seen_time = chrono::DateTime::parse_from_rfc3339(&entry.time)
653 .ok()
654 .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
655
656 if let Some(entity_record) = entities.get_mut(entity_id) {
658 entity_record.total_logins += 1;
660 entity_record.last_seen_file.clone_from(&file_name);
661 entity_record.last_seen_time = first_seen_time;
662 if !entity_record.files_appeared.contains(&file_name) {
663 entity_record.files_appeared.push(file_name.clone());
664 }
665 returning_entities_this_file.insert(entity_id.clone());
666 } else {
667 new_entities_this_file += 1;
669
670 let lifecycle = if let Some(ref baseline_set) = baseline {
672 if baseline_set.contains_key(entity_id) {
673 "pre_existing_baseline".to_string()
674 } else {
675 match file_idx {
677 0 => "new_day_1".to_string(),
678 1 => "new_day_2".to_string(),
679 2 => "new_day_3".to_string(),
680 _ => format!("new_day_{}", file_idx + 1),
681 }
682 }
683 } else {
684 match file_idx {
686 0 => "pre_existing_or_new_day_1".to_string(),
687 1 => "new_day_2".to_string(),
688 2 => "new_day_3".to_string(),
689 _ => format!("new_day_{}", file_idx + 1),
690 }
691 };
692
693 let (
695 baseline_entity_name,
696 baseline_created,
697 baseline_alias_name,
698 baseline_mount_path,
699 ) = if let Some(ref baseline_map) = baseline {
700 if let Some(baseline_entity) = baseline_map.get(entity_id) {
701 let name = baseline_entity.get_name();
702 let created = baseline_entity.get_created();
703 (
704 if name.is_empty() { None } else { Some(name) },
705 if created.is_empty() {
706 None
707 } else {
708 Some(created)
709 },
710 if baseline_entity.alias_name.is_empty() {
711 None
712 } else {
713 Some(baseline_entity.alias_name.clone())
714 },
715 if baseline_entity.mount_path.is_empty() {
716 None
717 } else {
718 Some(baseline_entity.mount_path.clone())
719 },
720 )
721 } else {
722 (None, None, None, None)
723 }
724 } else {
725 (None, None, None, None)
726 };
727
728 let (
730 historical_display_name,
731 historical_first_seen,
732 historical_last_seen,
733 historical_login_count,
734 ) = if let Some(ref mappings) = entity_mappings {
735 if let Some(mapping) = mappings.get(entity_id) {
736 (
737 Some(mapping.display_name.clone()),
738 Some(mapping.first_seen.clone()),
739 Some(mapping.last_seen.clone()),
740 Some(mapping.login_count),
741 )
742 } else {
743 (None, None, None, None)
744 }
745 } else {
746 (None, None, None, None)
747 };
748
749 entities.insert(
750 entity_id.clone(),
751 EntityChurnRecord {
752 entity_id: entity_id.clone(),
753 display_name: display_name.clone(),
754 mount_path: mount_path.clone(),
755 mount_type: mount_type.clone(),
756 token_type: token_type.clone(),
757 first_seen_file: file_name.clone(),
758 first_seen_time,
759 last_seen_file: file_name.clone(),
760 last_seen_time: first_seen_time,
761 files_appeared: vec![file_name.clone()],
762 total_logins: 1,
763 lifecycle,
764 activity_pattern: "unknown".to_string(), is_ephemeral_pattern: false, ephemeral_confidence: 0.0, ephemeral_reasons: Vec::new(), baseline_entity_name,
769 baseline_created,
770 baseline_alias_name,
771 baseline_mount_path,
772 historical_display_name,
773 historical_first_seen,
774 historical_last_seen,
775 historical_login_count,
776 },
777 );
778 }
779 }
780
781 progress.update(total_file_lines);
783 progress.finish();
784
785 daily_stats.push(DailyStats {
786 file_name,
787 new_entities: new_entities_this_file,
788 returning_entities: returning_entities_this_file.len(),
789 total_logins: logins_this_file,
790 });
791
792 eprintln!(
793 "Day {} Summary: {} new entities, {} returning, {} logins",
794 file_idx + 1,
795 format_number(new_entities_this_file),
796 format_number(returning_entities_this_file.len()),
797 format_number(logins_this_file)
798 );
799 }
800
801 println!("\nAnalyzing entity behavior patterns...");
803
804 let mut analyzer = EphemeralPatternAnalyzer::new(log_files.len());
805
806 analyzer.learn_from_entities(&entities);
808 println!(
809 "Learned from {} short-lived entity patterns",
810 format_number(analyzer.short_lived_patterns.len())
811 );
812
813 let entity_ids: Vec<String> = entities.keys().cloned().collect();
815 for entity_id in entity_ids {
816 if let Some(entity) = entities.get_mut(&entity_id) {
817 entity.activity_pattern = analyzer.classify_activity_pattern(entity);
819
820 let (is_ephemeral, confidence, reasons) = analyzer.analyze_entity(entity);
822 entity.is_ephemeral_pattern = is_ephemeral;
823 entity.ephemeral_confidence = confidence;
824 entity.ephemeral_reasons = reasons;
825 }
826 }
827
828 println!("\n=== Entity Churn Analysis ===\n");
830
831 println!("Daily Breakdown:");
832 for (idx, stats) in daily_stats.iter().enumerate() {
833 println!(
834 " Day {}: {} new, {} returning, {} total logins",
835 idx + 1,
836 format_number(stats.new_entities),
837 format_number(stats.returning_entities),
838 format_number(stats.total_logins)
839 );
840 }
841
842 let mut lifecycle_counts: HashMap<String, usize> = HashMap::with_capacity(20); let mut entities_by_file_count: HashMap<usize, usize> = HashMap::with_capacity(log_files.len());
845
846 for entity in entities.values() {
847 *lifecycle_counts
848 .entry(entity.lifecycle.clone())
849 .or_insert(0) += 1;
850 *entities_by_file_count
851 .entry(entity.files_appeared.len())
852 .or_insert(0) += 1;
853 }
854
855 println!("\nEntity Lifecycle Classification:");
856 let mut lifecycle_vec: Vec<_> = lifecycle_counts.iter().collect();
857 lifecycle_vec.sort_by_key(|(k, _)| *k);
858 for (lifecycle, count) in lifecycle_vec {
859 println!(" {}: {}", lifecycle, format_number(*count));
860 }
861
862 println!("\nEntity Persistence:");
863 for day_count in 1..=log_files.len() {
864 if let Some(count) = entities_by_file_count.get(&day_count) {
865 let label = if day_count == 1 {
866 "Appeared 1 day only"
867 } else if day_count == log_files.len() {
868 "Appeared all days (persistent)"
869 } else {
870 "Appeared some days"
871 };
872 println!(
873 " {} day(s): {} entities ({})",
874 day_count,
875 format_number(*count),
876 label
877 );
878 }
879 }
880
881 let mut activity_pattern_counts: HashMap<String, usize> = HashMap::with_capacity(10); let mut ephemeral_entities = Vec::new();
884
885 for entity in entities.values() {
886 *activity_pattern_counts
887 .entry(entity.activity_pattern.clone())
888 .or_insert(0) += 1;
889
890 if entity.is_ephemeral_pattern {
891 ephemeral_entities.push(entity.clone());
892 }
893 }
894
895 println!("\nActivity Pattern Distribution:");
896 let mut pattern_vec: Vec<_> = activity_pattern_counts.iter().collect();
897 pattern_vec.sort_by(|a, b| b.1.cmp(a.1));
898 for (pattern, count) in pattern_vec {
899 println!(" {}: {}", pattern, format_number(*count));
900 }
901
902 println!("\nEphemeral Entity Detection:");
903 println!(
904 " Detected {} likely ephemeral entities (confidence ≥ 0.4)",
905 format_number(ephemeral_entities.len())
906 );
907
908 if !ephemeral_entities.is_empty() {
909 ephemeral_entities.sort_by(|a, b| {
911 b.ephemeral_confidence
912 .partial_cmp(&a.ephemeral_confidence)
913 .unwrap_or(std::cmp::Ordering::Equal)
914 });
915
916 println!(" Top 10 by confidence:");
917 for (idx, entity) in ephemeral_entities.iter().take(10).enumerate() {
918 println!(
919 " {}. {} (confidence: {:.1}%)",
920 idx + 1,
921 entity.display_name,
922 entity.ephemeral_confidence * 100.0
923 );
924 for reason in &entity.ephemeral_reasons {
925 println!(" - {}", reason);
926 }
927 }
928
929 let high_conf = ephemeral_entities
931 .iter()
932 .filter(|e| e.ephemeral_confidence >= 0.7)
933 .count();
934 let med_conf = ephemeral_entities
935 .iter()
936 .filter(|e| e.ephemeral_confidence >= 0.5 && e.ephemeral_confidence < 0.7)
937 .count();
938 let low_conf = ephemeral_entities
939 .iter()
940 .filter(|e| e.ephemeral_confidence >= 0.4 && e.ephemeral_confidence < 0.5)
941 .count();
942
943 println!("\n Confidence distribution:");
944 println!(" High (≥70%): {}", format_number(high_conf));
945 println!(" Medium (50-69%): {}", format_number(med_conf));
946 println!(" Low (40-49%): {}", format_number(low_conf));
947 }
948
949 let mut mount_stats: HashMap<String, (usize, String)> = HashMap::with_capacity(100); for entity in entities.values() {
952 let entry = mount_stats
953 .entry(entity.mount_path.clone())
954 .or_insert_with(|| (0, entity.mount_type.clone()));
955 entry.0 += 1;
956 }
957
958 println!("\nTop Authentication Methods (Total Entities):");
959 let mut mount_vec: Vec<_> = mount_stats.iter().collect();
960 mount_vec.sort_by(|a, b| b.1 .0.cmp(&a.1 .0));
961
962 for (idx, (path, (count, mount_type))) in mount_vec.iter().take(20).enumerate() {
963 println!(
964 " {}. {} ({}): {}",
965 idx + 1,
966 path,
967 mount_type,
968 format_number(*count)
969 );
970 }
971
972 let github_entities: Vec<_> = entities
974 .values()
975 .filter(|e| e.mount_path.contains("/github"))
976 .collect();
977
978 if !github_entities.is_empty() {
979 println!("\n=== GitHub Entity Analysis ===");
980 println!(
981 "Total GitHub entities: {}",
982 format_number(github_entities.len())
983 );
984
985 let mut repo_counts: HashMap<String, usize> = HashMap::new();
987 for entity in &github_entities {
988 if let Some(repo) = entity.display_name.split(':').nth(1) {
990 *repo_counts.entry(repo.to_string()).or_insert(0) += 1;
991 }
992 }
993
994 println!("Unique repositories: {}", format_number(repo_counts.len()));
995 println!("\nTop repositories by entity count:");
996 let mut repo_vec: Vec<_> = repo_counts.iter().collect();
997 repo_vec.sort_by(|a, b| b.1.cmp(a.1));
998
999 for (idx, (repo, count)) in repo_vec.iter().take(20).enumerate() {
1000 if **count > 1 {
1001 println!(
1002 " {}. {}: {} entities",
1003 idx + 1,
1004 repo,
1005 format_number(**count)
1006 );
1007 }
1008 }
1009 }
1010
1011 if let Some(output_path) = output {
1013 let mut entities_vec: Vec<_> = entities.into_values().collect();
1014 entities_vec.sort_by(|a, b| a.first_seen_time.cmp(&b.first_seen_time));
1015
1016 let output_format = format.unwrap_or_else(|| {
1018 if std::path::Path::new(output_path)
1019 .extension()
1020 .is_some_and(|ext| ext.eq_ignore_ascii_case("csv"))
1021 {
1022 "csv"
1023 } else {
1024 "json"
1025 }
1026 });
1027
1028 println!(
1029 "\nExporting detailed entity records to {} (format: {})...",
1030 output_path, output_format
1031 );
1032
1033 let output_file = File::create(output_path)
1034 .with_context(|| format!("Failed to create output file: {}", output_path))?;
1035
1036 match output_format {
1037 "csv" => {
1038 let mut writer = csv::Writer::from_writer(output_file);
1039 for entity in &entities_vec {
1040 let csv_record: EntityChurnRecordCsv = entity.clone().into();
1041 writer
1042 .serialize(&csv_record)
1043 .context("Failed to write CSV record")?;
1044 }
1045 writer.flush().context("Failed to flush CSV writer")?;
1046 }
1047 _ => {
1048 serde_json::to_writer_pretty(output_file, &entities_vec)
1050 .context("Failed to write JSON output")?;
1051 }
1052 }
1053
1054 println!(
1055 "Exported {} entity records",
1056 format_number(entities_vec.len())
1057 );
1058 }
1059
1060 println!("\n=== Analysis Complete ===\n");
1061 Ok(())
1062}