1use crate::audit::types::AuditEntry;
102use crate::utils::progress::ProgressBar;
103use crate::utils::reader::open_file;
104use anyhow::{Context, Result};
105use chrono::{DateTime, Utc};
106use serde::{Deserialize, Serialize};
107use std::collections::{HashMap, HashSet};
108use std::fs::File;
109use std::io::{BufRead, BufReader};
110use std::path::Path;
111
112#[derive(Debug, Serialize, Deserialize)]
114struct EntityMapping {
115 display_name: String,
116 mount_path: String,
117 #[allow(dead_code)]
118 mount_accessor: String,
119 #[allow(dead_code)]
120 login_count: usize,
121 #[allow(dead_code)]
122 first_seen: String,
123 #[allow(dead_code)]
124 last_seen: String,
125}
126
127#[derive(Debug, Serialize, Clone)]
129struct EntityChurnRecord {
130 entity_id: String,
131 display_name: String,
132 mount_path: String,
133 mount_type: String,
134 token_type: String,
135 first_seen_file: String,
136 first_seen_time: DateTime<Utc>,
137 last_seen_file: String,
138 last_seen_time: DateTime<Utc>,
139 files_appeared: Vec<String>,
140 total_logins: usize,
141 lifecycle: String, activity_pattern: String, is_ephemeral_pattern: bool,
144 ephemeral_confidence: f32, ephemeral_reasons: Vec<String>,
146 #[serde(skip_serializing_if = "Option::is_none")]
148 baseline_entity_name: Option<String>,
149 #[serde(skip_serializing_if = "Option::is_none")]
150 baseline_created: Option<String>,
151 #[serde(skip_serializing_if = "Option::is_none")]
152 baseline_alias_name: Option<String>,
153 #[serde(skip_serializing_if = "Option::is_none")]
154 baseline_mount_path: Option<String>,
155 #[serde(skip_serializing_if = "Option::is_none")]
157 historical_display_name: Option<String>,
158 #[serde(skip_serializing_if = "Option::is_none")]
159 historical_first_seen: Option<String>,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 historical_last_seen: Option<String>,
162 #[serde(skip_serializing_if = "Option::is_none")]
163 historical_login_count: Option<usize>,
164}
165
166#[derive(Debug, Serialize)]
168struct EntityChurnRecordCsv {
169 entity_id: String,
170 display_name: String,
171 mount_path: String,
172 mount_type: String,
173 token_type: String,
174 first_seen_file: String,
175 first_seen_time: String,
176 last_seen_file: String,
177 last_seen_time: String,
178 files_appeared: String, days_active: usize,
180 total_logins: usize,
181 lifecycle: String,
182 activity_pattern: String,
183 is_ephemeral_pattern: bool,
184 ephemeral_confidence: f32,
185 ephemeral_reasons: String, baseline_entity_name: String,
187 baseline_created: String,
188 baseline_alias_name: String,
189 baseline_mount_path: String,
190 historical_display_name: String,
191 historical_first_seen: String,
192 historical_last_seen: String,
193 historical_login_count: String,
194}
195
196impl From<EntityChurnRecord> for EntityChurnRecordCsv {
197 fn from(record: EntityChurnRecord) -> Self {
198 EntityChurnRecordCsv {
199 entity_id: record.entity_id,
200 display_name: record.display_name,
201 mount_path: record.mount_path,
202 mount_type: record.mount_type,
203 token_type: record.token_type,
204 first_seen_file: record.first_seen_file,
205 first_seen_time: record.first_seen_time.to_rfc3339(),
206 last_seen_file: record.last_seen_file,
207 last_seen_time: record.last_seen_time.to_rfc3339(),
208 files_appeared: record.files_appeared.join(", "),
209 days_active: record.files_appeared.len(),
210 total_logins: record.total_logins,
211 lifecycle: record.lifecycle,
212 activity_pattern: record.activity_pattern,
213 is_ephemeral_pattern: record.is_ephemeral_pattern,
214 ephemeral_confidence: record.ephemeral_confidence,
215 ephemeral_reasons: record.ephemeral_reasons.join("; "),
216 baseline_entity_name: record.baseline_entity_name.unwrap_or_default(),
217 baseline_created: record.baseline_created.unwrap_or_default(),
218 baseline_alias_name: record.baseline_alias_name.unwrap_or_default(),
219 baseline_mount_path: record.baseline_mount_path.unwrap_or_default(),
220 historical_display_name: record.historical_display_name.unwrap_or_default(),
221 historical_first_seen: record.historical_first_seen.unwrap_or_default(),
222 historical_last_seen: record.historical_last_seen.unwrap_or_default(),
223 historical_login_count: record
224 .historical_login_count
225 .map(|n| n.to_string())
226 .unwrap_or_default(),
227 }
228 }
229}
230
231#[derive(Debug)]
232struct DailyStats {
233 #[allow(dead_code)]
234 file_name: String,
235 new_entities: usize,
236 returning_entities: usize,
237 total_logins: usize,
238}
239
240#[derive(Debug)]
242struct EphemeralPatternAnalyzer {
243 total_files: usize,
244 short_lived_patterns: Vec<ShortLivedPattern>,
245}
246
247#[derive(Debug)]
248struct ShortLivedPattern {
249 days_active: usize,
250 display_name: String,
251 mount_path: String,
252}
253
254impl EphemeralPatternAnalyzer {
255 fn new(total_files: usize) -> Self {
256 Self {
257 total_files,
258 short_lived_patterns: Vec::new(),
259 }
260 }
261
262 fn learn_from_entities(&mut self, entities: &HashMap<String, EntityChurnRecord>) {
264 for entity in entities.values() {
265 let days_active = entity.files_appeared.len();
266
267 if days_active <= 2 {
269 self.short_lived_patterns.push(ShortLivedPattern {
270 days_active,
271 display_name: entity.display_name.clone(),
272 mount_path: entity.mount_path.clone(),
273 });
274 }
275 }
276 }
277
278 fn analyze_entity(&self, entity: &EntityChurnRecord) -> (bool, f32, Vec<String>) {
280 let days_active = entity.files_appeared.len();
281 let mut confidence = 0.0;
282 let mut reasons = Vec::new();
283
284 if days_active == 1 {
286 confidence += 0.5;
287 reasons.push(format!("Appeared only 1 day ({})", entity.first_seen_file));
288 } else if days_active == 2 {
289 confidence += 0.3;
290 reasons.push(format!(
291 "Appeared only 2 days: {}, {}",
292 entity.files_appeared.first().unwrap_or(&String::new()),
293 entity.files_appeared.last().unwrap_or(&String::new())
294 ));
295 }
296
297 if days_active <= 2 {
299 let similar_count = self
301 .short_lived_patterns
302 .iter()
303 .filter(|p| {
304 if p.mount_path == entity.mount_path && p.days_active <= 2 {
306 return true;
307 }
308 if entity.display_name.contains(':') && p.display_name.contains(':') {
310 let entity_prefix = entity.display_name.split(':').next().unwrap_or("");
311 let pattern_prefix = p.display_name.split(':').next().unwrap_or("");
312 if entity_prefix == pattern_prefix && !entity_prefix.is_empty() {
313 return true;
314 }
315 }
316 false
317 })
318 .count();
319
320 if similar_count > 5 {
321 confidence += 0.2;
322 reasons.push(format!(
323 "Matches pattern seen in {} other short-lived entities",
324 similar_count
325 ));
326 } else if similar_count > 0 {
327 confidence += 0.1;
328 reasons.push(format!(
329 "Similar to {} other short-lived entities",
330 similar_count
331 ));
332 }
333 }
334
335 if entity.total_logins <= 5 && days_active <= 2 {
337 confidence += 0.1;
338 reasons.push(format!(
339 "Low activity: only {} login(s)",
340 entity.total_logins
341 ));
342 }
343
344 if days_active >= 2 {
346 let first_day_idx = entity.files_appeared.first().and_then(|f| {
347 f.split('_')
348 .next_back()
349 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
350 });
351 let last_day_idx = entity.files_appeared.last().and_then(|f| {
352 f.split('_')
353 .next_back()
354 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
355 });
356
357 if let (Some(first), Some(last)) = (first_day_idx, last_day_idx) {
358 let span = last - first + 1;
359 if span > days_active {
360 confidence *= 0.7;
362 reasons.push(
363 "Has gaps in activity (possibly sporadic access, not churned)".to_string(),
364 );
365 }
366 }
367 }
368
369 confidence = f32::min(confidence, 1.0);
371 let is_ephemeral = confidence >= 0.4; if is_ephemeral && days_active < self.total_files {
375 reasons.push(format!(
376 "Not seen in most recent {} file(s)",
377 self.total_files - days_active
378 ));
379 }
380
381 (is_ephemeral, confidence, reasons)
382 }
383
384 fn classify_activity_pattern(&self, entity: &EntityChurnRecord) -> String {
386 let days_active = entity.files_appeared.len();
387
388 if days_active == 1 {
389 return "single_burst".to_string();
390 }
391
392 if days_active == self.total_files {
393 return "consistent".to_string();
394 }
395
396 if days_active >= (self.total_files * 2) / 3 {
397 return "consistent".to_string();
398 }
399
400 if let (Some(_first_file), Some(last_file)) =
402 (entity.files_appeared.first(), entity.files_appeared.last())
403 {
404 let last_file_num = last_file
406 .split('_')
407 .next_back()
408 .and_then(|s| s.trim_end_matches(".log").parse::<usize>().ok())
409 .unwrap_or(self.total_files);
410
411 if last_file_num < self.total_files / 2 {
412 return "declining".to_string();
413 }
414 }
415
416 if days_active <= 2 {
417 return "single_burst".to_string();
418 }
419
420 "sporadic".to_string()
421 }
422}
423
424fn format_number(n: usize) -> String {
425 let s = n.to_string();
426 let mut result = String::new();
427 for (i, c) in s.chars().rev().enumerate() {
428 if i > 0 && i % 3 == 0 {
429 result.push(',');
430 }
431 result.push(c);
432 }
433 result.chars().rev().collect()
434}
435
436fn get_file_size(path: &str) -> Result<u64> {
437 Ok(std::fs::metadata(path)?.len())
438}
439
440fn load_entity_mappings(path: &str) -> Result<HashMap<String, EntityMapping>> {
441 let file = File::open(path).context("Failed to open entity map file")?;
442 let mappings: HashMap<String, EntityMapping> =
443 serde_json::from_reader(file).context("Failed to parse entity map JSON")?;
444 Ok(mappings)
445}
446
447#[derive(Debug, Deserialize, Clone)]
448#[allow(dead_code)]
449struct BaselineEntity {
450 entity_id: String,
451 #[serde(default)]
453 entity_name: String,
454 #[serde(default)]
455 entity_disabled: bool,
456 #[serde(default)]
457 entity_created: String,
458 #[serde(default)]
459 entity_updated: String,
460 #[serde(default)]
461 alias_id: String,
462 #[serde(default)]
463 alias_name: String,
464 #[serde(default)]
465 mount_path: String,
466 #[serde(default)]
467 mount_type: String,
468 #[serde(default)]
469 mount_accessor: String,
470 #[serde(default)]
471 alias_created: String,
472 #[serde(default)]
473 alias_updated: String,
474 #[serde(default)]
475 alias_metadata: String,
476}
477
478impl BaselineEntity {
479 fn get_name(&self) -> String {
481 if !self.entity_name.is_empty() {
482 self.entity_name.clone()
483 } else if !self.alias_name.is_empty() {
484 self.alias_name.clone()
485 } else {
486 String::new()
487 }
488 }
489
490 fn get_created(&self) -> String {
492 self.entity_created.clone()
493 }
494}
495
496fn load_baseline_entities(path: &str) -> Result<HashMap<String, BaselineEntity>> {
497 let file = File::open(path).context("Failed to open baseline entities file")?;
498
499 let path_lower = path.to_lowercase();
501 if path_lower.ends_with(".json") {
502 let entities: Vec<BaselineEntity> =
504 serde_json::from_reader(file).context("Failed to parse baseline entities JSON")?;
505 Ok(entities
506 .into_iter()
507 .map(|e| (e.entity_id.clone(), e))
508 .collect())
509 } else {
510 let mut reader = csv::Reader::from_reader(file);
512 let mut entities = HashMap::new();
513
514 for result in reader.deserialize() {
515 let entity: BaselineEntity = result.context("Failed to parse baseline CSV row")?;
516 entities.entry(entity.entity_id.clone()).or_insert(entity);
518 }
519
520 Ok(entities)
521 }
522}
523
524pub fn run(
525 log_files: &[String],
526 entity_map: Option<&str>,
527 baseline_entities: Option<&str>,
528 output: Option<&str>,
529 format: Option<&str>,
530) -> Result<()> {
531 println!("\n=== Multi-Day Entity Churn Analysis ===\n");
532 println!("Analyzing {} log files:", log_files.len());
533 for (i, file) in log_files.iter().enumerate() {
534 let size = get_file_size(file)?;
535 println!(
536 " Day {}: {} ({:.2} GB)",
537 i + 1,
538 file,
539 size as f64 / 1_000_000_000.0
540 );
541 }
542 println!();
543
544 let baseline = if let Some(path) = baseline_entities {
546 println!(
547 "Loading baseline entity list (Vault API metadata) from {}...",
548 path
549 );
550 let baseline_set = load_baseline_entities(path)?;
551 println!(
552 "Loaded {} pre-existing entities from Vault API baseline",
553 format_number(baseline_set.len())
554 );
555 println!();
556 Some(baseline_set)
557 } else {
558 println!("No baseline entity list provided. Cannot distinguish truly NEW entities from pre-existing.");
559 println!(" All Day 1 entities will be marked as 'pre_existing_or_new_day_1'.");
560 println!(" To get accurate results, run: ./vault-audit entity-list --output baseline_entities.json\n");
561 None
562 };
563
564 let entity_mappings = if let Some(path) = entity_map {
566 println!(
567 "Loading historical entity mappings (audit log enrichment) from {}...",
568 path
569 );
570 let mappings = load_entity_mappings(path)?;
571 println!(
572 "Loaded {} entity mappings with historical audit log data",
573 format_number(mappings.len())
574 );
575 println!();
576 Some(mappings)
577 } else {
578 None
579 };
580
581 let mut entities: HashMap<String, EntityChurnRecord> = HashMap::new();
583 let mut daily_stats: Vec<DailyStats> = Vec::new();
584
585 for (file_idx, log_file) in log_files.iter().enumerate() {
587 let file_name = Path::new(log_file)
588 .file_name()
589 .unwrap()
590 .to_string_lossy()
591 .to_string();
592
593 println!("\nProcessing Day {} ({})...", file_idx + 1, file_name);
594
595 let file = open_file(log_file)
596 .with_context(|| format!("Failed to open log file: {}", log_file))?;
597 let file_size = get_file_size(log_file)? as usize;
598
599 let reader = BufReader::new(file);
600 let mut progress = ProgressBar::new(file_size, "Processing");
601
602 let mut new_entities_this_file = 0;
603 let mut returning_entities_this_file = HashSet::new();
604 let mut logins_this_file = 0;
605 let mut bytes_processed = 0;
606
607 for line in reader.lines() {
608 let line = line.context("Failed to read line from log file")?;
609 bytes_processed += line.len() + 1; if bytes_processed % 10_000 == 0 {
613 progress.update(bytes_processed.min(file_size));
614 }
615
616 let trimmed = line.trim();
617 if trimmed.is_empty() {
618 continue;
619 }
620
621 let entry: AuditEntry = match serde_json::from_str(trimmed) {
622 Ok(e) => e,
623 Err(_) => continue,
624 };
625
626 let Some(ref request) = entry.request else {
628 continue;
629 };
630 let Some(ref path) = request.path else {
631 continue;
632 };
633 if !path.ends_with("/login") {
634 continue;
635 }
636
637 logins_this_file += 1;
638
639 let Some(ref auth) = entry.auth else {
641 continue;
642 };
643 let Some(ref entity_id) = auth.entity_id else {
644 continue;
645 };
646
647 let display_name = auth
648 .display_name
649 .clone()
650 .unwrap_or_else(|| entity_id.clone());
651 let mount_path = request.path.clone().unwrap_or_default();
652 let mount_type = request.mount_type.clone().unwrap_or_default();
653 let token_type = auth.token_type.clone().unwrap_or_default();
654
655 let first_seen_time = chrono::DateTime::parse_from_rfc3339(&entry.time)
657 .ok()
658 .map(|dt| dt.with_timezone(&Utc))
659 .unwrap_or_else(Utc::now);
660
661 if let Some(entity_record) = entities.get_mut(entity_id) {
663 entity_record.total_logins += 1;
665 entity_record.last_seen_file = file_name.clone();
666 entity_record.last_seen_time = first_seen_time;
667 if !entity_record.files_appeared.contains(&file_name) {
668 entity_record.files_appeared.push(file_name.clone());
669 }
670 returning_entities_this_file.insert(entity_id.clone());
671 } else {
672 new_entities_this_file += 1;
674
675 let lifecycle = if let Some(ref baseline_set) = baseline {
677 if baseline_set.contains_key(entity_id) {
678 "pre_existing_baseline".to_string()
679 } else {
680 match file_idx {
682 0 => "new_day_1".to_string(),
683 1 => "new_day_2".to_string(),
684 2 => "new_day_3".to_string(),
685 _ => format!("new_day_{}", file_idx + 1),
686 }
687 }
688 } else {
689 match file_idx {
691 0 => "pre_existing_or_new_day_1".to_string(),
692 1 => "new_day_2".to_string(),
693 2 => "new_day_3".to_string(),
694 _ => format!("new_day_{}", file_idx + 1),
695 }
696 };
697
698 let (
700 baseline_entity_name,
701 baseline_created,
702 baseline_alias_name,
703 baseline_mount_path,
704 ) = if let Some(ref baseline_map) = baseline {
705 if let Some(baseline_entity) = baseline_map.get(entity_id) {
706 let name = baseline_entity.get_name();
707 let created = baseline_entity.get_created();
708 (
709 if !name.is_empty() { Some(name) } else { None },
710 if !created.is_empty() {
711 Some(created)
712 } else {
713 None
714 },
715 if !baseline_entity.alias_name.is_empty() {
716 Some(baseline_entity.alias_name.clone())
717 } else {
718 None
719 },
720 if !baseline_entity.mount_path.is_empty() {
721 Some(baseline_entity.mount_path.clone())
722 } else {
723 None
724 },
725 )
726 } else {
727 (None, None, None, None)
728 }
729 } else {
730 (None, None, None, None)
731 };
732
733 let (
735 historical_display_name,
736 historical_first_seen,
737 historical_last_seen,
738 historical_login_count,
739 ) = if let Some(ref mappings) = entity_mappings {
740 if let Some(mapping) = mappings.get(entity_id) {
741 (
742 Some(mapping.display_name.clone()),
743 Some(mapping.first_seen.clone()),
744 Some(mapping.last_seen.clone()),
745 Some(mapping.login_count),
746 )
747 } else {
748 (None, None, None, None)
749 }
750 } else {
751 (None, None, None, None)
752 };
753
754 entities.insert(
755 entity_id.clone(),
756 EntityChurnRecord {
757 entity_id: entity_id.clone(),
758 display_name: display_name.clone(),
759 mount_path: mount_path.clone(),
760 mount_type: mount_type.clone(),
761 token_type: token_type.clone(),
762 first_seen_file: file_name.clone(),
763 first_seen_time,
764 last_seen_file: file_name.clone(),
765 last_seen_time: first_seen_time,
766 files_appeared: vec![file_name.clone()],
767 total_logins: 1,
768 lifecycle,
769 activity_pattern: "unknown".to_string(), is_ephemeral_pattern: false, ephemeral_confidence: 0.0, ephemeral_reasons: Vec::new(), baseline_entity_name,
774 baseline_created,
775 baseline_alias_name,
776 baseline_mount_path,
777 historical_display_name,
778 historical_first_seen,
779 historical_last_seen,
780 historical_login_count,
781 },
782 );
783 }
784 }
785
786 progress.finish();
787
788 daily_stats.push(DailyStats {
789 file_name,
790 new_entities: new_entities_this_file,
791 returning_entities: returning_entities_this_file.len(),
792 total_logins: logins_this_file,
793 });
794
795 println!(
796 "Day {} Summary: {} new entities, {} returning, {} logins",
797 file_idx + 1,
798 format_number(new_entities_this_file),
799 format_number(returning_entities_this_file.len()),
800 format_number(logins_this_file)
801 );
802 }
803
804 println!("\nAnalyzing entity behavior patterns...");
806
807 let mut analyzer = EphemeralPatternAnalyzer::new(log_files.len());
808
809 analyzer.learn_from_entities(&entities);
811 println!(
812 "Learned from {} short-lived entity patterns",
813 format_number(analyzer.short_lived_patterns.len())
814 );
815
816 let entity_ids: Vec<String> = entities.keys().cloned().collect();
818 for entity_id in entity_ids {
819 if let Some(entity) = entities.get_mut(&entity_id) {
820 entity.activity_pattern = analyzer.classify_activity_pattern(entity);
822
823 let (is_ephemeral, confidence, reasons) = analyzer.analyze_entity(entity);
825 entity.is_ephemeral_pattern = is_ephemeral;
826 entity.ephemeral_confidence = confidence;
827 entity.ephemeral_reasons = reasons;
828 }
829 }
830
831 println!("\n=== Entity Churn Analysis ===\n");
833
834 println!("Daily Breakdown:");
835 for (idx, stats) in daily_stats.iter().enumerate() {
836 println!(
837 " Day {}: {} new, {} returning, {} total logins",
838 idx + 1,
839 format_number(stats.new_entities),
840 format_number(stats.returning_entities),
841 format_number(stats.total_logins)
842 );
843 }
844
845 let mut lifecycle_counts: HashMap<String, usize> = HashMap::new();
847 let mut entities_by_file_count: HashMap<usize, usize> = HashMap::new();
848
849 for entity in entities.values() {
850 *lifecycle_counts
851 .entry(entity.lifecycle.clone())
852 .or_insert(0) += 1;
853 *entities_by_file_count
854 .entry(entity.files_appeared.len())
855 .or_insert(0) += 1;
856 }
857
858 println!("\nEntity Lifecycle Classification:");
859 let mut lifecycle_vec: Vec<_> = lifecycle_counts.iter().collect();
860 lifecycle_vec.sort_by_key(|(k, _)| *k);
861 for (lifecycle, count) in lifecycle_vec {
862 println!(" {}: {}", lifecycle, format_number(*count));
863 }
864
865 println!("\nEntity Persistence:");
866 for day_count in 1..=log_files.len() {
867 if let Some(count) = entities_by_file_count.get(&day_count) {
868 let label = if day_count == 1 {
869 "Appeared 1 day only"
870 } else if day_count == log_files.len() {
871 "Appeared all days (persistent)"
872 } else {
873 "Appeared some days"
874 };
875 println!(
876 " {} day(s): {} entities ({})",
877 day_count,
878 format_number(*count),
879 label
880 );
881 }
882 }
883
884 let mut activity_pattern_counts: HashMap<String, usize> = HashMap::new();
886 let mut ephemeral_entities = Vec::new();
887
888 for entity in entities.values() {
889 *activity_pattern_counts
890 .entry(entity.activity_pattern.clone())
891 .or_insert(0) += 1;
892
893 if entity.is_ephemeral_pattern {
894 ephemeral_entities.push(entity.clone());
895 }
896 }
897
898 println!("\nActivity Pattern Distribution:");
899 let mut pattern_vec: Vec<_> = activity_pattern_counts.iter().collect();
900 pattern_vec.sort_by(|a, b| b.1.cmp(a.1));
901 for (pattern, count) in pattern_vec {
902 println!(" {}: {}", pattern, format_number(*count));
903 }
904
905 println!("\nEphemeral Entity Detection:");
906 println!(
907 " Detected {} likely ephemeral entities (confidence ≥ 0.4)",
908 format_number(ephemeral_entities.len())
909 );
910
911 if !ephemeral_entities.is_empty() {
912 ephemeral_entities.sort_by(|a, b| {
914 b.ephemeral_confidence
915 .partial_cmp(&a.ephemeral_confidence)
916 .unwrap_or(std::cmp::Ordering::Equal)
917 });
918
919 println!(" Top 10 by confidence:");
920 for (idx, entity) in ephemeral_entities.iter().take(10).enumerate() {
921 println!(
922 " {}. {} (confidence: {:.1}%)",
923 idx + 1,
924 entity.display_name,
925 entity.ephemeral_confidence * 100.0
926 );
927 for reason in &entity.ephemeral_reasons {
928 println!(" - {}", reason);
929 }
930 }
931
932 let high_conf = ephemeral_entities
934 .iter()
935 .filter(|e| e.ephemeral_confidence >= 0.7)
936 .count();
937 let med_conf = ephemeral_entities
938 .iter()
939 .filter(|e| e.ephemeral_confidence >= 0.5 && e.ephemeral_confidence < 0.7)
940 .count();
941 let low_conf = ephemeral_entities
942 .iter()
943 .filter(|e| e.ephemeral_confidence >= 0.4 && e.ephemeral_confidence < 0.5)
944 .count();
945
946 println!("\n Confidence distribution:");
947 println!(" High (≥70%): {}", format_number(high_conf));
948 println!(" Medium (50-69%): {}", format_number(med_conf));
949 println!(" Low (40-49%): {}", format_number(low_conf));
950 }
951
952 let mut mount_stats: HashMap<String, (usize, String)> = HashMap::new();
954 for entity in entities.values() {
955 let entry = mount_stats
956 .entry(entity.mount_path.clone())
957 .or_insert((0, entity.mount_type.clone()));
958 entry.0 += 1;
959 }
960
961 println!("\nTop Authentication Methods (Total Entities):");
962 let mut mount_vec: Vec<_> = mount_stats.iter().collect();
963 mount_vec.sort_by(|a, b| b.1 .0.cmp(&a.1 .0));
964
965 for (idx, (path, (count, mount_type))) in mount_vec.iter().take(20).enumerate() {
966 println!(
967 " {}. {} ({}): {}",
968 idx + 1,
969 path,
970 mount_type,
971 format_number(*count)
972 );
973 }
974
975 let github_entities: Vec<_> = entities
977 .values()
978 .filter(|e| e.mount_path.contains("/github"))
979 .collect();
980
981 if !github_entities.is_empty() {
982 println!("\n=== GitHub Entity Analysis ===");
983 println!(
984 "Total GitHub entities: {}",
985 format_number(github_entities.len())
986 );
987
988 let mut repo_counts: HashMap<String, usize> = HashMap::new();
990 for entity in &github_entities {
991 if let Some(repo) = entity.display_name.split(':').nth(1) {
993 *repo_counts.entry(repo.to_string()).or_insert(0) += 1;
994 }
995 }
996
997 println!("Unique repositories: {}", format_number(repo_counts.len()));
998 println!("\nTop repositories by entity count:");
999 let mut repo_vec: Vec<_> = repo_counts.iter().collect();
1000 repo_vec.sort_by(|a, b| b.1.cmp(a.1));
1001
1002 for (idx, (repo, count)) in repo_vec.iter().take(20).enumerate() {
1003 if **count > 1 {
1004 println!(
1005 " {}. {}: {} entities",
1006 idx + 1,
1007 repo,
1008 format_number(**count)
1009 );
1010 }
1011 }
1012 }
1013
1014 if let Some(output_path) = output {
1016 let mut entities_vec: Vec<_> = entities.into_values().collect();
1017 entities_vec.sort_by(|a, b| a.first_seen_time.cmp(&b.first_seen_time));
1018
1019 let output_format = format.unwrap_or_else(|| {
1021 if output_path.ends_with(".csv") {
1022 "csv"
1023 } else {
1024 "json"
1025 }
1026 });
1027
1028 println!(
1029 "\nExporting detailed entity records to {} (format: {})...",
1030 output_path, output_format
1031 );
1032
1033 let output_file = File::create(output_path)
1034 .with_context(|| format!("Failed to create output file: {}", output_path))?;
1035
1036 match output_format {
1037 "csv" => {
1038 let mut writer = csv::Writer::from_writer(output_file);
1039 for entity in &entities_vec {
1040 let csv_record: EntityChurnRecordCsv = entity.clone().into();
1041 writer
1042 .serialize(&csv_record)
1043 .context("Failed to write CSV record")?;
1044 }
1045 writer.flush().context("Failed to flush CSV writer")?;
1046 }
1047 _ => {
1048 serde_json::to_writer_pretty(output_file, &entities_vec)
1050 .context("Failed to write JSON output")?;
1051 }
1052 }
1053
1054 println!(
1055 "Exported {} entity records",
1056 format_number(entities_vec.len())
1057 );
1058 }
1059
1060 println!("\n=== Analysis Complete ===\n");
1061 Ok(())
1062}