From 99eab512f610d396ddbd1f3cd3b8b5822a0718a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20De=20Keersmaeker?=
<francois.dekeersmaeker@uclouvain.be>
Date: Fri, 13 Dec 2024 15:11:06 +0100
Subject: [PATCH] MAC anon.: used SHA256 to ensure anonymization determinism
---
pcap_anonymize/layers/mac.py | 47 ++++++++++++++++++++++++++++++------
test/test_mac.py | 27 ++++++---------------
2 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/pcap_anonymize/layers/mac.py b/pcap_anonymize/layers/mac.py
index edf72e4..7193fde 100644
--- a/pcap_anonymize/layers/mac.py
+++ b/pcap_anonymize/layers/mac.py
@@ -2,11 +2,12 @@
Anonymize MAC addresses.
"""
-import random
-import secrets
+from hashlib import sha256
from scapy.layers.l2 import Ether, ARP
+from scapy.layers.dhcp import DHCP
BASE_HEX = 16
+BYTE_ORDER = "big"
# Special, well-known MAC addresses
special_macs = [
@@ -78,17 +79,33 @@ def anonymize_mac(mac: str) -> str:
ul_bit = get_ul_bit(mac)
is_local = bool(ul_bit) # True ==> LAA, False ==> UAA
- # Locally administered address
+ ## Locally administered address
if is_local:
bit_mask = ig_bit | ul_bit
- first_byte = (random.getrandbits(6) << 2) | bit_mask # Keep I/G and U/L bits
- return f"{first_byte:02x}:" + ':'.join(secrets.token_hex(1) for _ in range(5))
+
+ # Compute SHA-256 hash of the MAC address
+ mac_sha256 = sha256()
+ for byte in mac_split:
+ mac_sha256.update(int(byte, BASE_HEX).to_bytes(1, BYTE_ORDER))
+ digest = mac_sha256.digest()
+
+ first_byte = (digest[0] & 0b11111100) | bit_mask # Keep I/G and U/L bits
+ return f"{first_byte:02x}:" + ':'.join(f"{digest[i]:02x}" for i in range(1, 6))
+
+
+ ## Universally administered address
- # Universally administered address
+ # Compute SHA-256 hash based on the three least-significant bytes
+ mac_sha256 = sha256()
+ for byte in mac_split[3:]:
+ mac_sha256.update(int(byte, BASE_HEX).to_bytes(1, BYTE_ORDER))
+ digest = mac_sha256.digest()
+
+ # Keep OUI and anonymize the rest
return (
- ':'.join(mac_split[:3]) + # Keep OUI
+ ':'.join(mac_split[:3]) + # Keep OUI
':' +
- ':'.join(secrets.token_hex(1) for _ in range(3)) # Random last 3 bytes
+ ':'.join(f"{digest[i]:02x}" for i in range(0, 3)) # Hashed last 3 bytes
)
@@ -118,3 +135,17 @@ def anonymize_arp(arp: ARP) -> ARP:
arp.setfieldval("hwsrc", anonymize_mac(arp.getfieldval("hwsrc")))
arp.setfieldval("hwdst", anonymize_mac(arp.getfieldval("hwdst")))
return arp
+
+
+def anonymize_dhcp(dhcp: DHCP) -> DHCP:
+ """
+ Anonymize a packet's DHCP layer.
+
+ Args:
+ dhcp (scapy.DHCP): DHCP layer to anonymize
+ Returns:
+ scapy.DHCP: anonymized DHCP layer
+ """
+ # Anonymize client MAC address
+ dhcp.setfieldval("chaddr", anonymize_mac(dhcp.getfieldval("chaddr")))
+ return dhcp
diff --git a/test/test_mac.py b/test/test_mac.py
index 13ae6a7..7e72333 100644
--- a/test/test_mac.py
+++ b/test/test_mac.py
@@ -1,8 +1,5 @@
from pcap_anonymize.layers.mac import get_ig_bit, get_ul_bit, anonymize_mac
-# Number of random MAC addresses to generate per unit test
-N_TESTS = 5
-
### TEST FUNCTIONS ###
@@ -41,15 +38,11 @@ def test_anonymize_mac_laa() -> None:
All bits should be anonymized except the I/G and U/L bits.
"""
mac_laa = "02:00:00:00:00:00"
-
- # Generate N anonymized MAC addresses,
- # and verify they are correct
- for _ in range(N_TESTS):
- mac_laa_anon = anonymize_mac(mac_laa)
- assert mac_laa_anon != mac_laa
- # Verify I/G and U/L bits
- assert get_ig_bit(mac_laa) == get_ig_bit(mac_laa_anon)
- assert get_ul_bit(mac_laa) == get_ul_bit(mac_laa_anon)
+ mac_laa_anon = anonymize_mac(mac_laa)
+ assert mac_laa_anon != mac_laa
+ # Verify I/G and U/L bits
+ assert get_ig_bit(mac_laa) == get_ig_bit(mac_laa_anon)
+ assert get_ul_bit(mac_laa) == get_ul_bit(mac_laa_anon)
def test_anonymize_mac_uaa() -> None:
@@ -60,10 +53,6 @@ def test_anonymize_mac_uaa() -> None:
and the 3 last bytes should be anonymized.
"""
mac_uaa = "00:11:22:33:44:55"
-
- # Generate N anonymized MAC addresses,
- # and verify they are correct
- for _ in range(N_TESTS):
- mac_uaa_anon = anonymize_mac(mac_uaa)
- assert mac_uaa_anon.startswith(mac_uaa[:8]) # Vendor's OUI is kept
- assert mac_uaa_anon[10:] != mac_uaa[10:] # Last 3 bytes are anonymized
+ mac_uaa_anon = anonymize_mac(mac_uaa)
+ assert mac_uaa_anon.startswith(mac_uaa[:8]) # Vendor's OUI is kept
+ assert mac_uaa_anon[10:] != mac_uaa[10:] # Last 3 bytes are anonymized
--
GitLab