spot_img

How Machine Learning and Semantic Embeddings Reorder CVE Vulnerabilities Beyond Raw CVSS Scores

Date:

- Advertisement -spot_img
- Advertisement -spot_img


def visualize_results(df, priority_scores, feature_importance):
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle(‘Vulnerability Scanner – ML Analysis Dashboard’, fontsize=16, fontweight=”bold”)
axes[0, 0].hist(priority_scores, bins=30, color=”crimson”, alpha=0.7, edgecolor=”black”)
axes[0, 0].set_xlabel(‘Priority Score’)
axes[0, 0].set_ylabel(‘Frequency’)
axes[0, 0].set_title(‘Priority Score Distribution’)
axes[0, 0].axvline(np.percentile(priority_scores, 75), color=”orange”, linestyle=”–“, label=”75th percentile”)
axes[0, 0].legend()
axes[0, 1].scatter(df[‘cvss_score’], priority_scores, alpha=0.6, c=priority_scores, cmap=’RdYlGn_r’, s=50)
axes[0, 1].set_xlabel(‘CVSS Score’)
axes[0, 1].set_ylabel(‘ML Priority Score’)
axes[0, 1].set_title(‘CVSS vs ML Priority’)
axes[0, 1].plot([0, 10], [0, 1], ‘k–‘, alpha=0.3)
severity_counts = df[‘severity’].value_counts()
colors = {‘CRITICAL’: ‘darkred’, ‘HIGH’: ‘red’, ‘MEDIUM’: ‘orange’, ‘LOW’: ‘yellow’}
axes[0, 2].bar(severity_counts.index, severity_counts.values, color=[colors.get(s, ‘gray’) for s in severity_counts.index])
axes[0, 2].set_xlabel(‘Severity’)
axes[0, 2].set_ylabel(‘Count’)
axes[0, 2].set_title(‘Severity Distribution’)
axes[0, 2].tick_params(axis=”x”, rotation=45)
top_features = feature_importance.head(10)
axes[1, 0].barh(top_features[‘feature’], top_features[‘importance’], color=”steelblue”)
axes[1, 0].set_xlabel(‘Importance’)
axes[1, 0].set_title(‘Top 10 Feature Importance’)
axes[1, 0].invert_yaxis()
if ‘cluster’ in df.columns:
cluster_counts = df[‘cluster’].value_counts().sort_index()
axes[1, 1].bar(cluster_counts.index, cluster_counts.values, color=”teal”, alpha=0.7)
axes[1, 1].set_xlabel(‘Cluster’)
axes[1, 1].set_ylabel(‘Count’)
axes[1, 1].set_title(‘Vulnerability Clusters’)
attack_vector_counts = df[‘attack_vector’].value_counts()
axes[1, 2].pie(attack_vector_counts.values, labels=attack_vector_counts.index, autopct=”%1.1f%%”, startangle=90)
axes[1, 2].set_title(‘Attack Vector Distribution’)
plt.tight_layout()
plt.show()

def main():
print(“=”*70)
print(“AI-ASSISTED VULNERABILITY SCANNER WITH ML PRIORITIZATION”)
print(“=”*70)
print()
fetcher = CVEDataFetcher()
df = fetcher.fetch_recent_cves(days=30, max_results=50)
print(f”Dataset Overview:”)
print(f” Total CVEs: {len(df)}”)
print(f” Date Range: {df[‘published’].min()[:10]} to {df[‘published’].max()[:10]}”)
print(f” Severity Breakdown: {df[‘severity’].value_counts().to_dict()}”)
print()
feature_extractor = VulnerabilityFeatureExtractor()
embeddings = feature_extractor.extract_semantic_features(df[‘description’].tolist())
df = feature_extractor.extract_keyword_features(df)
df = feature_extractor.encode_categorical_features(df)
prioritizer = VulnerabilityPrioritizer()
X = prioritizer.prepare_features(df, embeddings)
severity_map = {‘LOW’: 0, ‘MEDIUM’: 1, ‘HIGH’: 2, ‘CRITICAL’: 3, ‘UNKNOWN’: 1}
y_severity = df[‘severity’].map(severity_map).values
y_score = df[‘cvss_score’].values
X_scaled = prioritizer.train_models(X, y_severity, y_score)
priority_scores, severity_probs, score_preds = prioritizer.predict_priority(X)
df[‘ml_priority_score’] = priority_scores
df[‘predicted_score’] = score_preds
analyzer = VulnerabilityAnalyzer(n_clusters=5)
clusters = analyzer.cluster_vulnerabilities(embeddings)
df = analyzer.analyze_clusters(df, clusters)
feature_imp, emb_imp = prioritizer.get_feature_importance()
print(f”n— Feature Importance —“)
print(feature_imp.head(10))
print(f”nAverage embedding importance: {emb_imp:.4f}”)
print(“n” + “=”*70)
print(“TOP 10 PRIORITY VULNERABILITIES”)
print(“=”*70)
top_vulns = df.nlargest(10, ‘ml_priority_score’)[[‘cve_id’, ‘cvss_score’, ‘ml_priority_score’, ‘severity’, ‘description’]]
for idx, row in top_vulns.iterrows():
print(f”n{row[‘cve_id’]} [Priority: {row[‘ml_priority_score’]:.3f}]”)
print(f” CVSS: {row[‘cvss_score’]:.1f} | Severity: {row[‘severity’]}”)
print(f” {row[‘description’][:100]}…”)
print(“nnGenerating visualizations…”)
visualize_results(df, priority_scores, feature_imp)
print(“n” + “=”*70)
print(“ANALYSIS COMPLETE”)
print(“=”*70)
print(f”nResults summary:”)
print(f” High Priority (>0.7): {(priority_scores > 0.7).sum()} vulnerabilities”)
print(f” Medium Priority (0.4-0.7): {((priority_scores >= 0.4) & (priority_scores <= 0.7)).sum()}”)
print(f” Low Priority (<0.4): {(priority_scores < 0.4).sum()}”)
return df, prioritizer, analyzer

if __name__ == “__main__”:
results_df, prioritizer, analyzer = main()
print(“n✓ All analyses completed successfully!”)
print(“nYou can now:”)
print(” – Access results via ‘results_df’ DataFrame”)
print(” – Use ‘prioritizer’ to predict new vulnerabilities”)
print(” – Explore ‘analyzer’ for clustering insights”)



Source link

- Advertisement -spot_img

LEAVE A REPLY

Please enter your comment!
Please enter your name here

66 − 65 =
Powered by MathCaptcha

Share post:

Subscribe

spot_img

Popular

More like this
Related

Crypto Victory Ahead? This Senator’s Decision Clears Path For Market Structure Bill Approval

Trusted Editorial content, reviewed by leading industry experts...

Russia Advances Crypto Seizure Bill In New Regulatory Push

Trusted Editorial content, reviewed by leading industry experts...

Enterprise AI adoption shifts to agentic systems

According to Databricks, enterprise AI adoption is shifting...

Stocks making the biggest moves after hours: TXN, STX, QRVO

Check out the companies making headlines in after-hours...