diff --git a/pcap_anonymize/layers/mac.py b/pcap_anonymize/layers/mac.py index edf72e4b4d313db60601004fbdd2c086de67ffad..7193fde6de116795720e3493cff8132776f57b57 100644 --- a/pcap_anonymize/layers/mac.py +++ b/pcap_anonymize/layers/mac.py @@ -2,11 +2,12 @@ Anonymize MAC addresses. """ -import random -import secrets +from hashlib import sha256 from scapy.layers.l2 import Ether, ARP +from scapy.layers.dhcp import DHCP BASE_HEX = 16 +BYTE_ORDER = "big" # Special, well-known MAC addresses special_macs = [ @@ -78,17 +79,33 @@ def anonymize_mac(mac: str) -> str: ul_bit = get_ul_bit(mac) is_local = bool(ul_bit) # True ==> LAA, False ==> UAA - # Locally administered address + ## Locally administered address if is_local: bit_mask = ig_bit | ul_bit - first_byte = (random.getrandbits(6) << 2) | bit_mask # Keep I/G and U/L bits - return f"{first_byte:02x}:" + ':'.join(secrets.token_hex(1) for _ in range(5)) + + # Compute SHA-256 hash of the MAC address + mac_sha256 = sha256() + for byte in mac_split: + mac_sha256.update(int(byte, BASE_HEX).to_bytes(1, BYTE_ORDER)) + digest = mac_sha256.digest() + + first_byte = (digest[0] & 0b11111100) | bit_mask # Keep I/G and U/L bits + return f"{first_byte:02x}:" + ':'.join(f"{digest[i]:02x}" for i in range(1, 6)) + + + ## Universally administered address - # Universally administered address + # Compute SHA-256 hash based on the three least-significant bytes + mac_sha256 = sha256() + for byte in mac_split[3:]: + mac_sha256.update(int(byte, BASE_HEX).to_bytes(1, BYTE_ORDER)) + digest = mac_sha256.digest() + + # Keep OUI and anonymize the rest return ( - ':'.join(mac_split[:3]) + # Keep OUI + ':'.join(mac_split[:3]) + # Keep OUI ':' + - ':'.join(secrets.token_hex(1) for _ in range(3)) # Random last 3 bytes + ':'.join(f"{digest[i]:02x}" for i in range(0, 3)) # Hashed last 3 bytes ) @@ -118,3 +135,17 @@ def anonymize_arp(arp: ARP) -> ARP: arp.setfieldval("hwsrc", anonymize_mac(arp.getfieldval("hwsrc"))) arp.setfieldval("hwdst", anonymize_mac(arp.getfieldval("hwdst"))) return arp + + +def anonymize_dhcp(dhcp: DHCP) -> DHCP: + """ + Anonymize a packet's DHCP layer. + + Args: + dhcp (scapy.DHCP): DHCP layer to anonymize + Returns: + scapy.DHCP: anonymized DHCP layer + """ + # Anonymize client MAC address + dhcp.setfieldval("chaddr", anonymize_mac(dhcp.getfieldval("chaddr"))) + return dhcp diff --git a/test/test_mac.py b/test/test_mac.py index 13ae6a7807f3f001883b710669220f502ca54ef2..7e72333be0cf6f5982f7610d9dc68037b16c9dc4 100644 --- a/test/test_mac.py +++ b/test/test_mac.py @@ -1,8 +1,5 @@ from pcap_anonymize.layers.mac import get_ig_bit, get_ul_bit, anonymize_mac -# Number of random MAC addresses to generate per unit test -N_TESTS = 5 - ### TEST FUNCTIONS ### @@ -41,15 +38,11 @@ def test_anonymize_mac_laa() -> None: All bits should be anonymized except the I/G and U/L bits. """ mac_laa = "02:00:00:00:00:00" - - # Generate N anonymized MAC addresses, - # and verify they are correct - for _ in range(N_TESTS): - mac_laa_anon = anonymize_mac(mac_laa) - assert mac_laa_anon != mac_laa - # Verify I/G and U/L bits - assert get_ig_bit(mac_laa) == get_ig_bit(mac_laa_anon) - assert get_ul_bit(mac_laa) == get_ul_bit(mac_laa_anon) + mac_laa_anon = anonymize_mac(mac_laa) + assert mac_laa_anon != mac_laa + # Verify I/G and U/L bits + assert get_ig_bit(mac_laa) == get_ig_bit(mac_laa_anon) + assert get_ul_bit(mac_laa) == get_ul_bit(mac_laa_anon) def test_anonymize_mac_uaa() -> None: @@ -60,10 +53,6 @@ def test_anonymize_mac_uaa() -> None: and the 3 last bytes should be anonymized. """ mac_uaa = "00:11:22:33:44:55" - - # Generate N anonymized MAC addresses, - # and verify they are correct - for _ in range(N_TESTS): - mac_uaa_anon = anonymize_mac(mac_uaa) - assert mac_uaa_anon.startswith(mac_uaa[:8]) # Vendor's OUI is kept - assert mac_uaa_anon[10:] != mac_uaa[10:] # Last 3 bytes are anonymized + mac_uaa_anon = anonymize_mac(mac_uaa) + assert mac_uaa_anon.startswith(mac_uaa[:8]) # Vendor's OUI is kept + assert mac_uaa_anon[10:] != mac_uaa[10:] # Last 3 bytes are anonymized