首篇文章 标题测试

猫萌 发布于 2025-04-09 最后更新于 2025-04-10 408 次阅读


副标题测试 定位标题1

图片测试

文章测试

txt文件测试

定位标题2

代码测试

import pyshark
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from tqdm import tqdm
# 配置:输入文件路径
PCAP_FILE = 'example.pcapng'  # 替换为你的PCAP路径
# 数据结构初始化
device_domains = defaultdict(set)
device_traffic = defaultdict(lambda: defaultdict(int))  # device -> domain -> bytes
# 加载数据包(只抓取TLS层)
print("正在解析 PCAP 文件,请稍候...")
capture = pyshark.FileCapture(
    PCAP_FILE,
    display_filter='tls.handshake.extensions_server_name || ip',
    use_json=True,
    include_raw=False
)
total_packets = sum(1 for _ in pyshark.FileCapture(PCAP_FILE))  # 预估总数用于进度条
capture = pyshark.FileCapture(
    PCAP_FILE,
    display_filter='tls.handshake.extensions_server_name || ip',
    use_json=True,
    include_raw=False
)
print(f"总包数估计:{total_packets}")
for pkt in tqdm(capture, total=total_packets):
    try:
        if 'IP' in pkt:
            src_ip = pkt.ip.src
            dst_ip = pkt.ip.dst
        else:
            continue
        # 抓取 TLS SNI
        if hasattr(pkt, 'tls') and hasattr(pkt.tls, 'handshake_extensions_server_name'):
            domain = pkt.tls.handshake_extensions_server_name
            size = int(pkt.length)
            device_domains[src_ip].add(domain)
            device_traffic[src_ip][domain] += size
    except Exception as e:
        continue  # 忽略解析失败的数据包
capture.close()
print("解析完成。")
# 数据转为 DataFrame
records = []
for device, domains in device_domains.items():
    for domain in domains:
        bytes_count = device_traffic[device][domain]
        records.append({
            'Device_IP': device,
            'Domain': domain,
            'Traffic_Bytes': bytes_count
        })
df = pd.DataFrame(records)
df.to_csv("sni_device_traffic.csv", index=False, encoding='utf-8-sig')
print("数据已保存为 sni_device_traffic.csv")
# 可视化分析
# 设置中文
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei']
sns.set(style="whitegrid")
# 1. 设备访问的网站数量分布
plt.figure(figsize=(10, 6))
domain_counts = df.groupby('Device_IP')['Domain'].nunique().sort_values(ascending=False)
sns.barplot(x=domain_counts.index, y=domain_counts.values, palette='viridis')
plt.xticks(rotation=45)
plt.ylabel("唯一域名数")
plt.title("不同设备访问网站数")
plt.tight_layout()
plt.savefig("device_domain_counts.png")
plt.show()
# 2. 每个设备访问的Top 5网站流量分布
for device in df['Device_IP'].unique():
    sub_df = df[df['Device_IP'] == device].sort_values(by='Traffic_Bytes', ascending=False).head(5)
    plt.figure(figsize=(8, 5))
    sns.barplot(x='Traffic_Bytes', y='Domain', data=sub_df, palette='rocket')
    plt.title(f"设备 {device} 访问 Top 5 网站(按流量)")
    plt.xlabel("流量 (Bytes)")
    plt.tight_layout()
    plt.savefig(f"{device}_top5_sites.png")
    plt.show()

以上代码为Python将wireshark的抓包文件pcap进行分类和可视化,通过查看SNI字段区分出设备访问哪些网站,以及流量属性

www.castorice.online

管理员QQ 1059077940