副标题测试 定位标题1
图片测试

文章测试
txt文件测试
定位标题2
代码测试
import pyshark
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from tqdm import tqdm
# 配置:输入文件路径
PCAP_FILE = 'example.pcapng' # 替换为你的PCAP路径
# 数据结构初始化
device_domains = defaultdict(set)
device_traffic = defaultdict(lambda: defaultdict(int)) # device -> domain -> bytes
# 加载数据包(只抓取TLS层)
print("正在解析 PCAP 文件,请稍候...")
capture = pyshark.FileCapture(
PCAP_FILE,
display_filter='tls.handshake.extensions_server_name || ip',
use_json=True,
include_raw=False
)
total_packets = sum(1 for _ in pyshark.FileCapture(PCAP_FILE)) # 预估总数用于进度条
capture = pyshark.FileCapture(
PCAP_FILE,
display_filter='tls.handshake.extensions_server_name || ip',
use_json=True,
include_raw=False
)
print(f"总包数估计:{total_packets}")
for pkt in tqdm(capture, total=total_packets):
try:
if 'IP' in pkt:
src_ip = pkt.ip.src
dst_ip = pkt.ip.dst
else:
continue
# 抓取 TLS SNI
if hasattr(pkt, 'tls') and hasattr(pkt.tls, 'handshake_extensions_server_name'):
domain = pkt.tls.handshake_extensions_server_name
size = int(pkt.length)
device_domains[src_ip].add(domain)
device_traffic[src_ip][domain] += size
except Exception as e:
continue # 忽略解析失败的数据包
capture.close()
print("解析完成。")
# 数据转为 DataFrame
records = []
for device, domains in device_domains.items():
for domain in domains:
bytes_count = device_traffic[device][domain]
records.append({
'Device_IP': device,
'Domain': domain,
'Traffic_Bytes': bytes_count
})
df = pd.DataFrame(records)
df.to_csv("sni_device_traffic.csv", index=False, encoding='utf-8-sig')
print("数据已保存为 sni_device_traffic.csv")
# 可视化分析
# 设置中文
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei']
sns.set(style="whitegrid")
# 1. 设备访问的网站数量分布
plt.figure(figsize=(10, 6))
domain_counts = df.groupby('Device_IP')['Domain'].nunique().sort_values(ascending=False)
sns.barplot(x=domain_counts.index, y=domain_counts.values, palette='viridis')
plt.xticks(rotation=45)
plt.ylabel("唯一域名数")
plt.title("不同设备访问网站数")
plt.tight_layout()
plt.savefig("device_domain_counts.png")
plt.show()
# 2. 每个设备访问的Top 5网站流量分布
for device in df['Device_IP'].unique():
sub_df = df[df['Device_IP'] == device].sort_values(by='Traffic_Bytes', ascending=False).head(5)
plt.figure(figsize=(8, 5))
sns.barplot(x='Traffic_Bytes', y='Domain', data=sub_df, palette='rocket')
plt.title(f"设备 {device} 访问 Top 5 网站(按流量)")
plt.xlabel("流量 (Bytes)")
plt.tight_layout()
plt.savefig(f"{device}_top5_sites.png")
plt.show()
以上代码为Python将wireshark的抓包文件pcap进行分类和可视化,通过查看SNI字段区分出设备访问哪些网站,以及流量属性
www.castorice.online
管理员QQ 1059077940
Comments 4 条评论
您好,这是一条评论。若需要审核、编辑或删除评论,请访问仪表盘的评论界面。评论者头像来自 Gravatar。
喵喵喵喵
(•̀ω•́ 」∠)
hello