From 23df2d1304ece169d7e0dfc843dfb8026b413d9f Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 28 Nov 2017 17:26:04 -0500 Subject: [PATCH] bpo-32107 - Improve MAC address calculation and fix test_uuid.py (#4600) ``uuid.getnode()`` now preferentially returns universally administered MAC addresses if available, over locally administered MAC addresses. This makes a better guarantee for global uniqueness of UUIDs returned from ``uuid.uuid1()``. If only locally administered MAC addresses are available, the first such one found is returned. Also improve internal code style by being explicit about ``return None`` rather than falling off the end of the function. Improve the test robustness. --- Doc/library/uuid.rst | 16 +++-- Lib/test/test_uuid.py | 26 ++++--- Lib/uuid.py | 71 ++++++++++++++++--- .../2017-11-26-18-48-17.bpo-32107.h2ph2K.rst | 5 ++ 4 files changed, 89 insertions(+), 29 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index ea9ea7dc7d..8ec75a79ac 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -156,10 +156,18 @@ The :mod:`uuid` module defines the following functions: Get the hardware address as a 48-bit positive integer. The first time this runs, it may launch a separate program, which could be quite slow. If all - attempts to obtain the hardware address fail, we choose a random 48-bit number - with its eighth bit set to 1 as recommended in RFC 4122. "Hardware address" - means the MAC address of a network interface, and on a machine with multiple - network interfaces the MAC address of any one of them may be returned. + attempts to obtain the hardware address fail, we choose a random 48-bit + number with the multicast bit (least significant bit of the first octet) + set to 1 as recommended in RFC 4122. "Hardware address" means the MAC + address of a network interface. On a machine with multiple network + interfaces, universally administered MAC addresses (i.e. where the second + least significant bit of the first octet is *unset*) will be preferred over + locally administered MAC addresses, but with no other ordering guarantees. + + .. versionchanged:: 3.7 + Universally administered MAC addresses are preferred over locally + administered MAC addresses, since the former are guaranteed to be + globally unique, while the latter are not. .. index:: single: getnode diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 083c2aa8aa..f113c55120 100644 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -512,59 +512,57 @@ eth0 Link encap:Ethernet HWaddr 12:34:56:78:90:ab self.assertEqual(mac, 0x1234567890ab) - def check_node(self, node, requires=None, network=False): + def check_node(self, node, requires=None): if requires and node is None: self.skipTest('requires ' + requires) hex = '%012x' % node if support.verbose >= 2: print(hex, end=' ') - if network: - # 47 bit will never be set in IEEE 802 addresses obtained - # from network cards. - self.assertFalse(node & 0x010000000000, hex) self.assertTrue(0 < node < (1 << 48), "%s is not an RFC 4122 node ID" % hex) @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_ifconfig_getnode(self): node = self.uuid._ifconfig_getnode() - self.check_node(node, 'ifconfig', True) + self.check_node(node, 'ifconfig') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_ip_getnode(self): node = self.uuid._ip_getnode() - self.check_node(node, 'ip', True) + self.check_node(node, 'ip') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_arp_getnode(self): node = self.uuid._arp_getnode() - self.check_node(node, 'arp', True) + self.check_node(node, 'arp') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_lanscan_getnode(self): node = self.uuid._lanscan_getnode() - self.check_node(node, 'lanscan', True) + self.check_node(node, 'lanscan') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_netstat_getnode(self): node = self.uuid._netstat_getnode() - self.check_node(node, 'netstat', True) + self.check_node(node, 'netstat') @unittest.skipUnless(os.name == 'nt', 'requires Windows') def test_ipconfig_getnode(self): node = self.uuid._ipconfig_getnode() - self.check_node(node, 'ipconfig', True) + self.check_node(node, 'ipconfig') @unittest.skipUnless(importable('win32wnet'), 'requires win32wnet') @unittest.skipUnless(importable('netbios'), 'requires netbios') def test_netbios_getnode(self): node = self.uuid._netbios_getnode() - self.check_node(node, network=True) + self.check_node(node) def test_random_getnode(self): node = self.uuid._random_getnode() - # Least significant bit of first octet must be set. - self.assertTrue(node & 0x010000000000, '%012x' % node) + # The multicast bit, i.e. the least significant bit of first octet, + # must be set for randomly generated MAC addresses. See RFC 4122, + # $4.1.6. + self.assertTrue(node & (1 << 40), '%012x' % node) self.check_node(node) @unittest.skipUnless(os.name == 'posix', 'requires Posix') diff --git a/Lib/uuid.py b/Lib/uuid.py index 020c6e73c8..cb2bc092bd 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -342,11 +342,30 @@ def _popen(command, *args): env=env) return proc +# For MAC (a.k.a. IEEE 802, or EUI-48) addresses, the second least significant +# bit of the first octet signifies whether the MAC address is universally (0) +# or locally (1) administered. Network cards from hardware manufacturers will +# always be universally administered to guarantee global uniqueness of the MAC +# address, but any particular machine may have other interfaces which are +# locally administered. An example of the latter is the bridge interface to +# the Touch Bar on MacBook Pros. +# +# This bit works out to be the 42nd bit counting from 1 being the least +# significant, or 1<<41. We'll prefer universally administered MAC addresses +# over locally administered ones since the former are globally unique, but +# we'll return the first of the latter found if that's all the machine has. +# +# See https://en.wikipedia.org/wiki/MAC_address#Universal_vs._local + +def _is_universal(mac): + return not (mac & (1 << 41)) + def _find_mac(command, args, hw_identifiers, get_index): + first_local_mac = None try: proc = _popen(command, *args.split()) if not proc: - return + return None with proc: for line in proc.stdout: words = line.lower().rstrip().split() @@ -355,8 +374,9 @@ def _find_mac(command, args, hw_identifiers, get_index): try: word = words[get_index(i)] mac = int(word.replace(b':', b''), 16) - if mac: + if _is_universal(mac): return mac + first_local_mac = first_local_mac or mac except (ValueError, IndexError): # Virtual interfaces, such as those provided by # VPNs, do not have a colon-delimited MAC address @@ -366,6 +386,7 @@ def _find_mac(command, args, hw_identifiers, get_index): pass except OSError: pass + return first_local_mac or None def _ifconfig_getnode(): """Get the hardware address on Unix by running ifconfig.""" @@ -375,6 +396,7 @@ def _ifconfig_getnode(): mac = _find_mac('ifconfig', args, keywords, lambda i: i+1) if mac: return mac + return None def _ip_getnode(): """Get the hardware address on Unix by running ip.""" @@ -382,6 +404,7 @@ def _ip_getnode(): mac = _find_mac('ip', 'link list', [b'link/ether'], lambda i: i+1) if mac: return mac + return None def _arp_getnode(): """Get the hardware address on Unix by running arp.""" @@ -404,8 +427,10 @@ def _arp_getnode(): # This works on Linux, FreeBSD and NetBSD mac = _find_mac('arp', '-an', [os.fsencode('(%s)' % ip_addr)], lambda i: i+2) + # Return None instead of 0. if mac: return mac + return None def _lanscan_getnode(): """Get the hardware address on Unix by running lanscan.""" @@ -415,32 +440,36 @@ def _lanscan_getnode(): def _netstat_getnode(): """Get the hardware address on Unix by running netstat.""" # This might work on AIX, Tru64 UNIX. + first_local_mac = None try: proc = _popen('netstat', '-ia') if not proc: - return + return None with proc: words = proc.stdout.readline().rstrip().split() try: i = words.index(b'Address') except ValueError: - return + return None for line in proc.stdout: try: words = line.rstrip().split() word = words[i] if len(word) == 17 and word.count(b':') == 5: mac = int(word.replace(b':', b''), 16) - if mac: + if _is_universal(mac): return mac + first_local_mac = first_local_mac or mac except (ValueError, IndexError): pass except OSError: pass + return first_local_mac or None def _ipconfig_getnode(): """Get the hardware address on Windows by running ipconfig.exe.""" import os, re + first_local_mac = None dirs = ['', r'c:\windows\system32', r'c:\winnt\system32'] try: import ctypes @@ -458,18 +487,23 @@ def _ipconfig_getnode(): for line in pipe: value = line.split(':')[-1].strip().lower() if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value): - return int(value.replace('-', ''), 16) + mac = int(value.replace('-', ''), 16) + if _is_universal(mac): + return mac + first_local_mac = first_local_mac or mac + return first_local_mac or None def _netbios_getnode(): """Get the hardware address on Windows using NetBIOS calls. See http://support.microsoft.com/kb/118623 for details.""" import win32wnet, netbios + first_local_mac = None ncb = netbios.NCB() ncb.Command = netbios.NCBENUM ncb.Buffer = adapters = netbios.LANA_ENUM() adapters._pack() if win32wnet.Netbios(ncb) != 0: - return + return None adapters._unpack() for i in range(adapters.length): ncb.Reset() @@ -488,7 +522,11 @@ def _netbios_getnode(): bytes = status.adapter_address[:6] if len(bytes) != 6: continue - return int.from_bytes(bytes, 'big') + mac = int.from_bytes(bytes, 'big') + if _is_universal(mac): + return mac + first_local_mac = first_local_mac or mac + return first_local_mac or None _generate_time_safe = _UuidCreate = None @@ -601,9 +639,19 @@ def _windll_getnode(): return UUID(bytes=bytes_(_buffer.raw)).node def _random_getnode(): - """Get a random node ID, with eighth bit set as suggested by RFC 4122.""" + """Get a random node ID.""" + # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or + # pseudo-randomly generated value may be used; see Section 4.5. The + # multicast bit must be set in such addresses, in order that they will + # never conflict with addresses obtained from network cards." + # + # The "multicast bit" of a MAC address is defined to be "the least + # significant bit of the first octet". This works out to be the 41st bit + # counting from 1 being the least significant bit, or 1<<40. + # + # See https://en.wikipedia.org/wiki/MAC_address#Unicast_vs._multicast import random - return random.getrandbits(48) | 0x010000000000 + return random.getrandbits(48) | (1 << 40) _node = None @@ -626,13 +674,14 @@ def getnode(): getters = [_unix_getnode, _ifconfig_getnode, _ip_getnode, _arp_getnode, _lanscan_getnode, _netstat_getnode] - for getter in getters + [_random_getnode]: + for getter in getters: try: _node = getter() except: continue if _node is not None: return _node + return _random_getnode() _last_timestamp = None diff --git a/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst b/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst new file mode 100644 index 0000000000..b26daa7b1b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst @@ -0,0 +1,5 @@ +``uuid.getnode()`` now preferentially returns universally administered MAC +addresses if available, over locally administered MAC addresses. This makes a +better guarantee for global uniqueness of UUIDs returned from +``uuid.uuid1()``. If only locally administered MAC addresses are available, +the first such one found is returned. -- 2.40.0