Linux的组播地址管理和网卡的组播混杂模式

July 05, 2024 | 7 Minute Read

linux内核源码

https://elixir.bootlin.com/linux/latest/source/net/ipv6/addrconf.c
https://elixir.bootlin.com/linux/latest/source/net/ipv6/mcast.c
https://elixir.bootlin.com/linux/latest/source/drivers/net/ethernet/intel/e1000e/netdev.c

从设置组播地址到设置网卡mac地址过滤白名单的流程

addrconf_notify
  ipv6_add_dev
    ipv6_dev_mc_inc
      ip6_mc_add_src
          dev_mc_add
             e1000e_set_rx_mode
               e1000e_write_mc_addr_list
           igmp6_join_group
           mld_ifc_event

mld_gq_work
    mld_send_report

ip6_mc_source
ip6_mc_add_src
static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
	if (dev != blackhole_netdev) {
		/* Join interface-local all-node multicast group */
		ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);

		/* Join all-node multicast group */
		ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);

		/* Join all-router multicast group if forwarding is set */
		if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
	}
}



static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
			bool global)
{
	int err;

	netif_addr_lock_bh(dev);
	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
			       NETDEV_HW_ADDR_T_MULTICAST, global, false,
			       0, false);
	if (!err)
		__dev_set_rx_mode(dev);
	netif_addr_unlock_bh(dev);
	return err;
}
/**
 *	dev_mc_add - Add a multicast address
 *	@dev: device
 *	@addr: address to add
 *
 *	Add a multicast address to the device or increase
 *	the reference count if it already exists.
 */
int dev_mc_add(struct net_device *dev, const unsigned char *addr)
{
	return __dev_mc_add(dev, addr, false);
}
EXPORT_SYMBOL(dev_mc_add);






/**
 * e1000e_set_rx_mode - secondary unicast, Multicast and Promiscuous mode set
 * @netdev: network interface device structure
 *
 * The ndo_set_rx_mode entry point is called whenever the unicast or multicast
 * address list or the network interface flags are updated.  This routine is
 * responsible for configuring the hardware for proper unicast, multicast,
 * promiscuous mode, and all-multi behavior.
 **/
static void e1000e_set_rx_mode(struct net_device *netdev)
{
	struct e1000_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	u32 rctl;

	if (pm_runtime_suspended(netdev->dev.parent))
		return;

	/* Check for Promiscuous and All Multicast modes */
	rctl = er32(RCTL);

	/* clear the affected bits */
	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);

	if (netdev->flags & IFF_PROMISC) {
		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
		/* Do not hardware filter VLANs in promisc mode */
		e1000e_vlan_filter_disable(adapter);
	} else {
		int count;

		if (netdev->flags & IFF_ALLMULTI) {
			rctl |= E1000_RCTL_MPE;
		} else {
			/* Write addresses to the MTA, if the attempt fails
			 * then we should just turn on promiscuous mode so
			 * that we can at least receive multicast traffic
			 */
			count = e1000e_write_mc_addr_list(netdev);
			if (count < 0)
				rctl |= E1000_RCTL_MPE;
		}
		e1000e_vlan_filter_enable(adapter);
		/* Write addresses to available RAR registers, if there is not
		 * sufficient space to store all the addresses then enable
		 * unicast promiscuous mode
		 */
		count = e1000e_write_uc_addr_list(netdev);
		if (count < 0)
			rctl |= E1000_RCTL_UPE;
	}

	ew32(RCTL, rctl);

	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
		e1000e_vlan_strip_enable(adapter);
	else
		e1000e_vlan_strip_disable(adapter);
}




/**
 * e1000e_write_mc_addr_list - write multicast addresses to MTA
 * @netdev: network interface device structure
 *
 * Writes multicast address list to the MTA hash table.
 * Returns: -ENOMEM on failure
 *                0 on no addresses written
 *                X on writing X addresses to MTA
 */
static int e1000e_write_mc_addr_list(struct net_device *netdev)
{
	struct e1000_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	struct netdev_hw_addr *ha;
	u8 *mta_list;
	int i;

	if (netdev_mc_empty(netdev)) {
		/* nothing to program, so clear mc list */
		hw->mac.ops.update_mc_addr_list(hw, NULL, 0);
		return 0;
	}

	mta_list = kcalloc(netdev_mc_count(netdev), ETH_ALEN, GFP_ATOMIC);
	if (!mta_list)
		return -ENOMEM;

	/* update_mc_addr_list expects a packed array of only addresses. */
	i = 0;
	netdev_for_each_mc_addr(ha, netdev)
	    memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);

	hw->mac.ops.update_mc_addr_list(hw, mta_list, i);
	kfree(mta_list);

	return netdev_mc_count(netdev);
}

驱动代码注释说的很明白了, 组播mac白名单也是一个列表配置在网卡的硬件里面的。 如果地址太多了配置空间装不下了,就直接把 IFF_ALLMULTI 组播混杂模式打开

linux 查看向网卡注册的组播侦听地址

组播的目的mac地址 都是根据组播ip地址关联映射的,所以本机监听某个组播地址时,需要要把这个mac地址下发到 网卡的白名单里面去,网卡硬件才会把这个mac对应的包上传到内核层进行处理。不然网卡直接 丢掉不想要的包吧。 linux系统会 根据上次应用的组播地址使用情况,自己管理网卡的mac白名单。 还需要通过igmp3(ipv4) 和 MLDv2(ipv6) 协议维护组播侦听状态,向交换机或者邻居组播路由器 报告本机的组播地址。

cat /proc/net/igmp

cat /proc/net/igmp6  | grep eth0
2    eth0            ff0200000000000000000001ff000000     2 00000004 0    // 对方mac未知时的 "solicited-node multicast address"
2    eth0            ff0200000000000000000001ffe000b8     2 00000004 0    // addrconf 分配的ipv6地址 对应的  "solicited-node multicast address"
2    eth0            ff050000000000000000000000000002     1 00000004 0    // in6addr_sitelocal_allrouters
2    eth0            ff010000000000000000000000000002     1 00000008 0    //  in6addr_interfacelocal_allrouters
2    eth0            ff020000000000000000000000000002     1 00000004 0    //  in6addr_linklocal_allrouters
2    eth0            ff020000000000000000000000000001     1 0000000C 0    // in6addr_linklocal_allnodes
2    eth0            ff010000000000000000000000000001     1 00000008 0    // in6addr_interfacelocal_allnodes

本地ipv6地址增加,本机要新增加对应组播地址的侦听, 或者ndp 代理的都会引起 组播mac注册变化吧。
ip  -6 neigh add proxy 2409:1a54:62d:ef10:9c6c:6a6:d94f:34bc dev eth0

cat /proc/net/igmp6 | grep eth0
2    eth0            ff0200000000000000000001ff4f34bc     1 00000004 0   // 新增加的proxy 对应的   "solicited-node multicast address"
2    eth0            ff0200000000000000000001ff000000     2 00000004 0
2    eth0            ff0200000000000000000001ffe000b8     2 00000004 0
2    eth0            ff050000000000000000000000000002     1 00000004 0
2    eth0            ff010000000000000000000000000002     1 00000008 0
2    eth0            ff020000000000000000000000000002     1 00000004 0
2    eth0            ff020000000000000000000000000001     1 0000000C 0
2    eth0            ff010000000000000000000000000001     1 00000008 0

刚发现 有一个 ip maddress 命令可以管理网卡上的组播地址的。

       ip-maddress - multicast addresses management

SYNOPSIS
       ip [ OPTIONS ] maddress  { COMMAND | help }

       ip maddress [ add | del ] MULTIADDR dev NAME

       ip maddress show [ dev NAME ]

 
# ip maddress show dev enp0s8
3:      enp0s8
        link  01:00:5e:00:00:01
        link  33:33:00:00:00:01
        link  33:33:ff:79:78:a2
        inet  224.0.0.1
        inet6 ff02::1:ff79:78a2 users 2
        inet6 ff02::1
        inet6 ff01::1

# ip maddress add 33:33:00:00:00:16 dev  enp0s8 
# ip maddress show dev enp0s8
3:      enp0s8
        link  01:00:5e:00:00:01
        link  33:33:00:00:00:01
        link  33:33:ff:79:78:a2
        link  33:33:00:00:00:16 static
        inet  224.0.0.1
        inet6 ff02::1:ff79:78a2 users 2
        inet6 ff02::1
        inet6 ff01::1

不过只支持 配置 mac,不支持 配置 ipv6组播地址,ipv4 ipv6 只能自己应用里面监听加入组播地址了才行吧。

网卡的“组播混杂模式”设置

不过网卡有个 “组播混杂模式”, linux 里面对应 net_device的 IFF_ALLMULTI 属性。 可以用 ip 命令打开这个 “allmulticast” 开关。

       ip link set { DEVICE | group GROUP }
               [ multicast { on | off } ]
               [ allmulticast { on | off } ]
               [ promisc { on | off } ]

promisc: 是混杂模式,接受所有收到的包上传都内核层处理,不检查包的目的mac是不是本机需要的了。
allmulticast: 是组播混杂模式,接收所有的组播包。但不包含目的mac不是当前网卡的单播包吧。
multicast: 是接收组播 设置完可以在 ifconfig 命令输出里面看到属性变化。