1use crate::audit::types::AuditEntry;
84use crate::utils::progress::ProgressBar;
85use crate::utils::reader::open_file;
86use anyhow::{Context, Result};
87use chrono::{DateTime, Utc};
88use serde::{Deserialize, Serialize};
89use std::collections::{HashMap, HashSet};
90use std::fs::File;
91use std::io::{BufRead, BufReader};
92use std::path::Path;
93
94#[derive(Debug, Serialize, Deserialize)]
96struct EntityMapping {
97 display_name: String,
98 mount_path: String,
99 #[allow(dead_code)]
100 mount_accessor: String,
101 #[allow(dead_code)]
102 login_count: usize,
103 #[allow(dead_code)]
104 first_seen: String,
105 #[allow(dead_code)]
106 last_seen: String,
107}
108
109#[derive(Debug, Serialize, Clone)]
111struct EntityChurnRecord {
112 entity_id: String,
113 display_name: String,
114 mount_path: String,
115 mount_type: String,
116 token_type: String,
117 first_seen_file: String,
118 first_seen_time: DateTime<Utc>,
119 last_seen_file: String,
120 last_seen_time: DateTime<Utc>,
121 files_appeared: Vec<String>,
122 total_logins: usize,
123 lifecycle: String, activity_pattern: String, is_ephemeral_pattern: bool,
126 ephemeral_confidence: f32, ephemeral_reasons: Vec<String>,
128 #[serde(skip_serializing_if = "Option::is_none")]
130 baseline_entity_name: Option<String>,
131 #[serde(skip_serializing_if = "Option::is_none")]
132 baseline_created: Option<String>,
133 #[serde(skip_serializing_if = "Option::is_none")]
134 baseline_alias_name: Option<String>,
135 #[serde(skip_serializing_if = "Option::is_none")]
136 baseline_mount_path: Option<String>,
137 #[serde(skip_serializing_if = "Option::is_none")]
139 historical_display_name: Option<String>,
140 #[serde(skip_serializing_if = "Option::is_none")]
141 historical_first_seen: Option<String>,
142 #[serde(skip_serializing_if = "Option::is_none")]
143 historical_last_seen: Option<String>,
144 #[serde(skip_serializing_if = "Option::is_none")]
145 historical_login_count: Option<usize>,
146}
147
148#[derive(Debug, Serialize)]
150struct EntityChurnRecordCsv {
151 entity_id: String,
152 display_name: String,
153 mount_path: String,
154 mount_type: String,
155 token_type: String,
156 first_seen_file: String,
157 first_seen_time: String,
158 last_seen_file: String,
159 last_seen_time: String,
160 files_appeared: String, days_active: usize,
162 total_logins: usize,
163 lifecycle: String,
164 activity_pattern: String,
165 is_ephemeral_pattern: bool,
166 ephemeral_confidence: f32,
167 ephemeral_reasons: String, baseline_entity_name: String,
169 baseline_created: String,
170 baseline_alias_name: String,
171 baseline_mount_path: String,
172 historical_display_name: String,
173 historical_first_seen: String,
174 historical_last_seen: String,
175 historical_login_count: String,
176}
177
178impl From<EntityChurnRecord> for EntityChurnRecordCsv {
179 fn from(record: EntityChurnRecord) -> Self {
180 EntityChurnRecordCsv {
181 entity_id: record.entity_id,
182 display_name: record.display_name,
183 mount_path: record.mount_path,
184 mount_type: record.mount_type,
185 token_type: record.token_type,
186 first_seen_file: record.first_seen_file,
187 first_seen_time: record.first_seen_time.to_rfc3339(),
188 last_seen_file: record.last_seen_file,
189 last_seen_time: record.last_seen_time.to_rfc3339(),
190 files_appeared: record.files_appeared.join(", "),
191 days_active: record.files_appeared.len(),
192 total_logins: record.total_logins,
193 lifecycle: record.lifecycle,
194 activity_pattern: record.activity_pattern,
195 is_ephemeral_pattern: record.is_ephemeral_pattern,
196 ephemeral_confidence: record.ephemeral_confidence,
197 ephemeral_reasons: record.ephemeral_reasons.join("; "),
198 baseline_entity_name: record.baseline_entity_name.unwrap_or_default(),
199 baseline_created: record.baseline_created.unwrap_or_default(),
200 baseline_alias_name: record.baseline_alias_name.unwrap_or_default(),
201 baseline_mount_path: record.baseline_mount_path.unwrap_or_default(),
202 historical_display_name: record.historical_display_name.unwrap_or_default(),
203 historical_first_seen: record.historical_first_seen.unwrap_or_default(),
204 historical_last_seen: record.historical_last_seen.unwrap_or_default(),
205 historical_login_count: record
206 .historical_login_count
207 .map(|n| n.to_string())
208 .unwrap_or_default(),
209 }
210 }
211}
212
213#[derive(Debug)]
214struct DailyStats {
215 #[allow(dead_code)]
216 file_name: String,
217 new_entities: usize,
218 returning_entities: usize,
219 total_logins: usize,
220}
221
222#[derive(Debug)]
224struct EphemeralPatternAnalyzer {
225 total_files: usize,
226 short_lived_patterns: Vec<ShortLivedPattern>,
227}
228
229#[derive(Debug)]
230struct ShortLivedPattern {
231 days_active: usize,
232 display_name: String,
233 mount_path: String,
234}
235
236impl EphemeralPatternAnalyzer {
237 fn new(total_files: usize) -> Self {
238 Self {
239 total_files,
240 short_lived_patterns: Vec::new(),
241 }
242 }
243
244 fn learn_from_entities(&mut self, entities: &HashMap<String, EntityChurnRecord>) {
246 for entity in entities.values() {
247 let days_active = entity.files_appeared.len();
248
249 if days_active <= 2 {
251 self.short_lived_patterns.push(ShortLivedPattern {
252 days_active,
253 display_name: entity.display_name.clone(),
254 mount_path: entity.mount_path.clone(),
255 });
256 }
257 }
258 }
259
260 fn analyze_entity(&self, entity: &EntityChurnRecord) -> (bool, f32, Vec<String>) {
262 let days_active = entity.files_appeared.len();
263 let mut confidence = 0.0;
264 let mut reasons = Vec::new();
265
266 if days_active == 1 {
268 confidence += 0.5;
269 reasons.push(format!("Appeared only 1 day ({})", entity.first_seen_file));
270 } else if days_active == 2 {
271 confidence += 0.3;
272 reasons.push(format!(
273 "Appeared only 2 days: {}, {}",
274 entity.files_appeared.first().unwrap_or(&String::new()),
275 entity.files_appeared.last().unwrap_or(&String::new())
276 ));
277 }
278
279 if days_active <= 2 {
281 let similar_count = self
283 .short_lived_patterns
284 .iter()
285 .filter(|p| {
286 if p.mount_path == entity.mount_path && p.days_active <= 2 {
288 return true;
289 }
290 if entity.display_name.contains(':') && p.display_name.contains(':') {
292 let entity_prefix = entity.display_name.split(':').next().unwrap_or("");
293 let pattern_prefix = p.display_name.split(':').next().unwrap_or("");
294 if entity_prefix == pattern_prefix && !entity_prefix.is_empty() {
295 return true;
296 }
297 }
298 false
299 })
300 .count();
301
302 if similar_count > 5 {
303 confidence += 0.2;
304 reasons.push(format!(
305 "Matches pattern seen in {} other short-lived entities",
306 similar_count
307 ));
308 } else if similar_count > 0 {
309 confidence += 0.1;
310 reasons.push(format!(
311 "Similar to {} other short-lived entities",
312 similar_count
313 ));
314 }
315 }
316
317 if entity.total_logins <= 5 && days_active <= 2 {
319 confidence += 0.1;
320 reasons.push(format!(
321 "Low activity: only {} login(s)",
322 entity.total_logins
323 ));
324 }
325
326 if days_active >= 2 {
328 let first_day_idx = entity.files_appeared.first().and_then(|f| {
329 f.split('_')
330 .next_back()
331 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
332 });
333 let last_day_idx = entity.files_appeared.last().and_then(|f| {
334 f.split('_')
335 .next_back()
336 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
337 });
338
339 if let (Some(first), Some(last)) = (first_day_idx, last_day_idx) {
340 let span = last - first + 1;
341 if span > days_active {
342 confidence *= 0.7;
344 reasons.push(
345 "Has gaps in activity (possibly sporadic access, not churned)".to_string(),
346 );
347 }
348 }
349 }
350
351 confidence = f32::min(confidence, 1.0);
353 let is_ephemeral = confidence >= 0.4; if is_ephemeral && days_active < self.total_files {
357 reasons.push(format!(
358 "Not seen in most recent {} file(s)",
359 self.total_files - days_active
360 ));
361 }
362
363 (is_ephemeral, confidence, reasons)
364 }
365
366 fn classify_activity_pattern(&self, entity: &EntityChurnRecord) -> String {
368 let days_active = entity.files_appeared.len();
369
370 if days_active == 1 {
371 return "single_burst".to_string();
372 }
373
374 if days_active == self.total_files {
375 return "consistent".to_string();
376 }
377
378 if days_active >= (self.total_files * 2) / 3 {
379 return "consistent".to_string();
380 }
381
382 if let (Some(_first_file), Some(last_file)) =
384 (entity.files_appeared.first(), entity.files_appeared.last())
385 {
386 let last_file_num = last_file
388 .split('_')
389 .next_back()
390 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
391 .unwrap_or(self.total_files);
392
393 if last_file_num < self.total_files / 2 {
394 return "declining".to_string();
395 }
396 }
397
398 if days_active <= 2 {
399 return "single_burst".to_string();
400 }
401
402 "sporadic".to_string()
403 }
404}
405
406fn format_number(n: usize) -> String {
407 let s = n.to_string();
408 let mut result = String::new();
409 for (i, c) in s.chars().rev().enumerate() {
410 if i > 0 && i % 3 == 0 {
411 result.push(',');
412 }
413 result.push(c);
414 }
415 result.chars().rev().collect()
416}
417
418fn get_file_size(path: &str) -> Result<u64> {
419 Ok(std::fs::metadata(path)?.len())
420}
421
422fn load_entity_mappings(path: &str) -> Result<HashMap<String, EntityMapping>> {
423 let file = File::open(path).context("Failed to open entity map file")?;
424 let mappings: HashMap<String, EntityMapping> =
425 serde_json::from_reader(file).context("Failed to parse entity map JSON")?;
426 Ok(mappings)
427}
428
429#[derive(Debug, Deserialize, Clone)]
430#[allow(dead_code)]
431struct BaselineEntity {
432 entity_id: String,
433 #[serde(default)]
435 entity_name: String,
436 #[serde(default)]
437 entity_disabled: bool,
438 #[serde(default)]
439 entity_created: String,
440 #[serde(default)]
441 entity_updated: String,
442 #[serde(default)]
443 alias_id: String,
444 #[serde(default)]
445 alias_name: String,
446 #[serde(default)]
447 mount_path: String,
448 #[serde(default)]
449 mount_type: String,
450 #[serde(default)]
451 mount_accessor: String,
452 #[serde(default)]
453 alias_created: String,
454 #[serde(default)]
455 alias_updated: String,
456 #[serde(default)]
457 alias_metadata: String,
458}
459
460impl BaselineEntity {
461 fn get_name(&self) -> String {
463 if !self.entity_name.is_empty() {
464 self.entity_name.clone()
465 } else if !self.alias_name.is_empty() {
466 self.alias_name.clone()
467 } else {
468 String::new()
469 }
470 }
471
472 fn get_created(&self) -> String {
474 self.entity_created.clone()
475 }
476}
477
478fn load_baseline_entities(path: &str) -> Result<HashMap<String, BaselineEntity>> {
479 let file = File::open(path).context("Failed to open baseline entities file")?;
480
481 let path_lower = path.to_lowercase();
483 if path_lower.ends_with(".json") {
484 let entities: Vec<BaselineEntity> =
486 serde_json::from_reader(file).context("Failed to parse baseline entities JSON")?;
487 Ok(entities
488 .into_iter()
489 .map(|e| (e.entity_id.clone(), e))
490 .collect())
491 } else {
492 let mut reader = csv::Reader::from_reader(file);
494 let mut entities = HashMap::new();
495
496 for result in reader.deserialize() {
497 let entity: BaselineEntity = result.context("Failed to parse baseline CSV row")?;
498 entities.entry(entity.entity_id.clone()).or_insert(entity);
500 }
501
502 Ok(entities)
503 }
504}
505
506pub fn run(
507 log_files: &[String],
508 entity_map: Option<&str>,
509 baseline_entities: Option<&str>,
510 output: Option<&str>,
511 format: Option<&str>,
512) -> Result<()> {
513 println!("\n=== Multi-Day Entity Churn Analysis ===\n");
514 println!("Analyzing {} log files:", log_files.len());
515 for (i, file) in log_files.iter().enumerate() {
516 let size = get_file_size(file)?;
517 println!(
518 " Day {}: {} ({:.2} GB)",
519 i + 1,
520 file,
521 size as f64 / 1_000_000_000.0
522 );
523 }
524 println!();
525
526 let baseline = if let Some(path) = baseline_entities {
528 println!(
529 "Loading baseline entity list (Vault API metadata) from {}...",
530 path
531 );
532 let baseline_set = load_baseline_entities(path)?;
533 println!(
534 "Loaded {} pre-existing entities from Vault API baseline",
535 format_number(baseline_set.len())
536 );
537 println!();
538 Some(baseline_set)
539 } else {
540 println!("No baseline entity list provided. Cannot distinguish truly NEW entities from pre-existing.");
541 println!(" All Day 1 entities will be marked as 'pre_existing_or_new_day_1'.");
542 println!(" To get accurate results, run: ./vault-audit entity-list --output baseline_entities.json\n");
543 None
544 };
545
546 let entity_mappings = if let Some(path) = entity_map {
548 println!(
549 "Loading historical entity mappings (audit log enrichment) from {}...",
550 path
551 );
552 let mappings = load_entity_mappings(path)?;
553 println!(
554 "Loaded {} entity mappings with historical audit log data",
555 format_number(mappings.len())
556 );
557 println!();
558 Some(mappings)
559 } else {
560 None
561 };
562
563 let mut entities: HashMap<String, EntityChurnRecord> = HashMap::new();
565 let mut daily_stats: Vec<DailyStats> = Vec::new();
566
567 for (file_idx, log_file) in log_files.iter().enumerate() {
569 let file_name = Path::new(log_file)
570 .file_name()
571 .unwrap()
572 .to_string_lossy()
573 .to_string();
574
575 println!("\nProcessing Day {} ({})...", file_idx + 1, file_name);
576
577 let file = open_file(log_file)
578 .with_context(|| format!("Failed to open log file: {}", log_file))?;
579 let file_size = get_file_size(log_file)? as usize;
580
581 let reader = BufReader::new(file);
582 let mut progress = ProgressBar::new(file_size, "Processing");
583
584 let mut new_entities_this_file = 0;
585 let mut returning_entities_this_file = HashSet::new();
586 let mut logins_this_file = 0;
587 let mut bytes_processed = 0;
588
589 for line in reader.lines() {
590 let line = line.context("Failed to read line from log file")?;
591 bytes_processed += line.len() + 1; if bytes_processed % 10_000 == 0 {
595 progress.update(bytes_processed.min(file_size));
596 }
597
598 let trimmed = line.trim();
599 if trimmed.is_empty() {
600 continue;
601 }
602
603 let entry: AuditEntry = match serde_json::from_str(trimmed) {
604 Ok(e) => e,
605 Err(_) => continue,
606 };
607
608 let Some(ref request) = entry.request else {
610 continue;
611 };
612 let Some(ref path) = request.path else {
613 continue;
614 };
615 if !path.ends_with("/login") {
616 continue;
617 }
618
619 logins_this_file += 1;
620
621 let Some(ref auth) = entry.auth else {
623 continue;
624 };
625 let Some(ref entity_id) = auth.entity_id else {
626 continue;
627 };
628
629 let display_name = auth
630 .display_name
631 .clone()
632 .unwrap_or_else(|| entity_id.clone());
633 let mount_path = request.path.clone().unwrap_or_default();
634 let mount_type = request.mount_type.clone().unwrap_or_default();
635 let token_type = auth.token_type.clone().unwrap_or_default();
636
637 let first_seen_time = chrono::DateTime::parse_from_rfc3339(&entry.time)
639 .ok()
640 .map(|dt| dt.with_timezone(&Utc))
641 .unwrap_or_else(Utc::now);
642
643 if let Some(entity_record) = entities.get_mut(entity_id) {
645 entity_record.total_logins += 1;
647 entity_record.last_seen_file = file_name.clone();
648 entity_record.last_seen_time = first_seen_time;
649 if !entity_record.files_appeared.contains(&file_name) {
650 entity_record.files_appeared.push(file_name.clone());
651 }
652 returning_entities_this_file.insert(entity_id.clone());
653 } else {
654 new_entities_this_file += 1;
656
657 let lifecycle = if let Some(ref baseline_set) = baseline {
659 if baseline_set.contains_key(entity_id) {
660 "pre_existing_baseline".to_string()
661 } else {
662 match file_idx {
664 0 => "new_day_1".to_string(),
665 1 => "new_day_2".to_string(),
666 2 => "new_day_3".to_string(),
667 _ => format!("new_day_{}", file_idx + 1),
668 }
669 }
670 } else {
671 match file_idx {
673 0 => "pre_existing_or_new_day_1".to_string(),
674 1 => "new_day_2".to_string(),
675 2 => "new_day_3".to_string(),
676 _ => format!("new_day_{}", file_idx + 1),
677 }
678 };
679
680 let (
682 baseline_entity_name,
683 baseline_created,
684 baseline_alias_name,
685 baseline_mount_path,
686 ) = if let Some(ref baseline_map) = baseline {
687 if let Some(baseline_entity) = baseline_map.get(entity_id) {
688 let name = baseline_entity.get_name();
689 let created = baseline_entity.get_created();
690 (
691 if !name.is_empty() { Some(name) } else { None },
692 if !created.is_empty() {
693 Some(created)
694 } else {
695 None
696 },
697 if !baseline_entity.alias_name.is_empty() {
698 Some(baseline_entity.alias_name.clone())
699 } else {
700 None
701 },
702 if !baseline_entity.mount_path.is_empty() {
703 Some(baseline_entity.mount_path.clone())
704 } else {
705 None
706 },
707 )
708 } else {
709 (None, None, None, None)
710 }
711 } else {
712 (None, None, None, None)
713 };
714
715 let (
717 historical_display_name,
718 historical_first_seen,
719 historical_last_seen,
720 historical_login_count,
721 ) = if let Some(ref mappings) = entity_mappings {
722 if let Some(mapping) = mappings.get(entity_id) {
723 (
724 Some(mapping.display_name.clone()),
725 Some(mapping.first_seen.clone()),
726 Some(mapping.last_seen.clone()),
727 Some(mapping.login_count),
728 )
729 } else {
730 (None, None, None, None)
731 }
732 } else {
733 (None, None, None, None)
734 };
735
736 entities.insert(
737 entity_id.clone(),
738 EntityChurnRecord {
739 entity_id: entity_id.clone(),
740 display_name: display_name.clone(),
741 mount_path: mount_path.clone(),
742 mount_type: mount_type.clone(),
743 token_type: token_type.clone(),
744 first_seen_file: file_name.clone(),
745 first_seen_time,
746 last_seen_file: file_name.clone(),
747 last_seen_time: first_seen_time,
748 files_appeared: vec![file_name.clone()],
749 total_logins: 1,
750 lifecycle,
751 activity_pattern: "unknown".to_string(), is_ephemeral_pattern: false, ephemeral_confidence: 0.0, ephemeral_reasons: Vec::new(), baseline_entity_name,
756 baseline_created,
757 baseline_alias_name,
758 baseline_mount_path,
759 historical_display_name,
760 historical_first_seen,
761 historical_last_seen,
762 historical_login_count,
763 },
764 );
765 }
766 }
767
768 progress.finish();
769
770 daily_stats.push(DailyStats {
771 file_name,
772 new_entities: new_entities_this_file,
773 returning_entities: returning_entities_this_file.len(),
774 total_logins: logins_this_file,
775 });
776
777 println!(
778 "Day {} Summary: {} new entities, {} returning, {} logins",
779 file_idx + 1,
780 format_number(new_entities_this_file),
781 format_number(returning_entities_this_file.len()),
782 format_number(logins_this_file)
783 );
784 }
785
786 println!("\nAnalyzing entity behavior patterns...");
788
789 let mut analyzer = EphemeralPatternAnalyzer::new(log_files.len());
790
791 analyzer.learn_from_entities(&entities);
793 println!(
794 "Learned from {} short-lived entity patterns",
795 format_number(analyzer.short_lived_patterns.len())
796 );
797
798 let entity_ids: Vec<String> = entities.keys().cloned().collect();
800 for entity_id in entity_ids {
801 if let Some(entity) = entities.get_mut(&entity_id) {
802 entity.activity_pattern = analyzer.classify_activity_pattern(entity);
804
805 let (is_ephemeral, confidence, reasons) = analyzer.analyze_entity(entity);
807 entity.is_ephemeral_pattern = is_ephemeral;
808 entity.ephemeral_confidence = confidence;
809 entity.ephemeral_reasons = reasons;
810 }
811 }
812
813 println!("\n=== Entity Churn Analysis ===\n");
815
816 println!("Daily Breakdown:");
817 for (idx, stats) in daily_stats.iter().enumerate() {
818 println!(
819 " Day {}: {} new, {} returning, {} total logins",
820 idx + 1,
821 format_number(stats.new_entities),
822 format_number(stats.returning_entities),
823 format_number(stats.total_logins)
824 );
825 }
826
827 let mut lifecycle_counts: HashMap<String, usize> = HashMap::new();
829 let mut entities_by_file_count: HashMap<usize, usize> = HashMap::new();
830
831 for entity in entities.values() {
832 *lifecycle_counts
833 .entry(entity.lifecycle.clone())
834 .or_insert(0) += 1;
835 *entities_by_file_count
836 .entry(entity.files_appeared.len())
837 .or_insert(0) += 1;
838 }
839
840 println!("\nEntity Lifecycle Classification:");
841 let mut lifecycle_vec: Vec<_> = lifecycle_counts.iter().collect();
842 lifecycle_vec.sort_by_key(|(k, _)| *k);
843 for (lifecycle, count) in lifecycle_vec {
844 println!(" {}: {}", lifecycle, format_number(*count));
845 }
846
847 println!("\nEntity Persistence:");
848 for day_count in 1..=log_files.len() {
849 if let Some(count) = entities_by_file_count.get(&day_count) {
850 let label = if day_count == 1 {
851 "Appeared 1 day only"
852 } else if day_count == log_files.len() {
853 "Appeared all days (persistent)"
854 } else {
855 "Appeared some days"
856 };
857 println!(
858 " {} day(s): {} entities ({})",
859 day_count,
860 format_number(*count),
861 label
862 );
863 }
864 }
865
866 let mut activity_pattern_counts: HashMap<String, usize> = HashMap::new();
868 let mut ephemeral_entities = Vec::new();
869
870 for entity in entities.values() {
871 *activity_pattern_counts
872 .entry(entity.activity_pattern.clone())
873 .or_insert(0) += 1;
874
875 if entity.is_ephemeral_pattern {
876 ephemeral_entities.push(entity.clone());
877 }
878 }
879
880 println!("\nActivity Pattern Distribution:");
881 let mut pattern_vec: Vec<_> = activity_pattern_counts.iter().collect();
882 pattern_vec.sort_by(|a, b| b.1.cmp(a.1));
883 for (pattern, count) in pattern_vec {
884 println!(" {}: {}", pattern, format_number(*count));
885 }
886
887 println!("\nEphemeral Entity Detection:");
888 println!(
889 " Detected {} likely ephemeral entities (confidence ≥ 0.4)",
890 format_number(ephemeral_entities.len())
891 );
892
893 if !ephemeral_entities.is_empty() {
894 ephemeral_entities.sort_by(|a, b| {
896 b.ephemeral_confidence
897 .partial_cmp(&a.ephemeral_confidence)
898 .unwrap_or(std::cmp::Ordering::Equal)
899 });
900
901 println!(" Top 10 by confidence:");
902 for (idx, entity) in ephemeral_entities.iter().take(10).enumerate() {
903 println!(
904 " {}. {} (confidence: {:.1}%)",
905 idx + 1,
906 entity.display_name,
907 entity.ephemeral_confidence * 100.0
908 );
909 for reason in &entity.ephemeral_reasons {
910 println!(" - {}", reason);
911 }
912 }
913
914 let high_conf = ephemeral_entities
916 .iter()
917 .filter(|e| e.ephemeral_confidence >= 0.7)
918 .count();
919 let med_conf = ephemeral_entities
920 .iter()
921 .filter(|e| e.ephemeral_confidence >= 0.5 && e.ephemeral_confidence < 0.7)
922 .count();
923 let low_conf = ephemeral_entities
924 .iter()
925 .filter(|e| e.ephemeral_confidence >= 0.4 && e.ephemeral_confidence < 0.5)
926 .count();
927
928 println!("\n Confidence distribution:");
929 println!(" High (≥70%): {}", format_number(high_conf));
930 println!(" Medium (50-69%): {}", format_number(med_conf));
931 println!(" Low (40-49%): {}", format_number(low_conf));
932 }
933
934 let mut mount_stats: HashMap<String, (usize, String)> = HashMap::new();
936 for entity in entities.values() {
937 let entry = mount_stats
938 .entry(entity.mount_path.clone())
939 .or_insert((0, entity.mount_type.clone()));
940 entry.0 += 1;
941 }
942
943 println!("\nTop Authentication Methods (Total Entities):");
944 let mut mount_vec: Vec<_> = mount_stats.iter().collect();
945 mount_vec.sort_by(|a, b| b.1 .0.cmp(&a.1 .0));
946
947 for (idx, (path, (count, mount_type))) in mount_vec.iter().take(20).enumerate() {
948 println!(
949 " {}. {} ({}): {}",
950 idx + 1,
951 path,
952 mount_type,
953 format_number(*count)
954 );
955 }
956
957 let github_entities: Vec<_> = entities
959 .values()
960 .filter(|e| e.mount_path.contains("/github"))
961 .collect();
962
963 if !github_entities.is_empty() {
964 println!("\n=== GitHub Entity Analysis ===");
965 println!(
966 "Total GitHub entities: {}",
967 format_number(github_entities.len())
968 );
969
970 let mut repo_counts: HashMap<String, usize> = HashMap::new();
972 for entity in &github_entities {
973 if let Some(repo) = entity.display_name.split(':').nth(1) {
975 *repo_counts.entry(repo.to_string()).or_insert(0) += 1;
976 }
977 }
978
979 println!("Unique repositories: {}", format_number(repo_counts.len()));
980 println!("\nTop repositories by entity count:");
981 let mut repo_vec: Vec<_> = repo_counts.iter().collect();
982 repo_vec.sort_by(|a, b| b.1.cmp(a.1));
983
984 for (idx, (repo, count)) in repo_vec.iter().take(20).enumerate() {
985 if **count > 1 {
986 println!(
987 " {}. {}: {} entities",
988 idx + 1,
989 repo,
990 format_number(**count)
991 );
992 }
993 }
994 }
995
996 if let Some(output_path) = output {
998 let mut entities_vec: Vec<_> = entities.into_values().collect();
999 entities_vec.sort_by(|a, b| a.first_seen_time.cmp(&b.first_seen_time));
1000
1001 let output_format = format.unwrap_or_else(|| {
1003 if output_path.ends_with(".csv") {
1004 "csv"
1005 } else {
1006 "json"
1007 }
1008 });
1009
1010 println!(
1011 "\nExporting detailed entity records to {} (format: {})...",
1012 output_path, output_format
1013 );
1014
1015 let output_file = File::create(output_path)
1016 .with_context(|| format!("Failed to create output file: {}", output_path))?;
1017
1018 match output_format {
1019 "csv" => {
1020 let mut writer = csv::Writer::from_writer(output_file);
1021 for entity in &entities_vec {
1022 let csv_record: EntityChurnRecordCsv = entity.clone().into();
1023 writer
1024 .serialize(&csv_record)
1025 .context("Failed to write CSV record")?;
1026 }
1027 writer.flush().context("Failed to flush CSV writer")?;
1028 }
1029 _ => {
1030 serde_json::to_writer_pretty(output_file, &entities_vec)
1032 .context("Failed to write JSON output")?;
1033 }
1034 }
1035
1036 println!(
1037 "Exported {} entity records",
1038 format_number(entities_vec.len())
1039 );
1040 }
1041
1042 println!("\n=== Analysis Complete ===\n");
1043 Ok(())
1044}