diff -Nru linux/Documentation/filesystems/proc.txt linux-patched/Documentation/filesystems/proc.txt --- linux/Documentation/filesystems/proc.txt Fri Apr 6 18:42:48 2001 +++ linux-patched/Documentation/filesystems/proc.txt Wed Aug 27 16:21:27 2003 @@ -471,6 +471,11 @@ ...] 1375103 17405 0 0 0 0 0 0 ...] 1703981 5535 0 0 0 3 0 0 +In addition, each Channel Bond interface has it's own directory. For +example, the bond0 device will have a directory called /proc/net/bond0/. +It will contain information that is specific to that bond, such as the +current slaves of the bond, the link status of the slaves, and how +many times the slaves link has failed. 1.5 SCSI info ------------- diff -Nru linux/Documentation/networking/bonding.txt linux-patched/Documentation/networking/bonding.txt --- linux/Documentation/networking/bonding.txt Thu Jan 1 01:00:00 1970 +++ linux-patched/Documentation/networking/bonding.txt Wed Aug 27 16:22:01 2003 @@ -0,0 +1,524 @@ + + Linux Ethernet Bonding Driver mini-howto + +Initial release : Thomas Davis +Corrections, HA extensions : 2000/10/03-15 : + - Willy Tarreau + - Constantine Gavrilov + - Chad N. Tindel + - Janice Girouard + +Note : +------ +The bonding driver originally came from Donald Becker's beowulf patches for +kernel 2.0. It has changed quite a bit since, and the original tools from +extreme-linux and beowulf sites will not work with this version of the driver. + +For new versions of the driver, patches for older kernels and the updated +userspace tools, please follow the links at the end of this file. + +Installation +============ + +1) Build kernel with the bonding driver +--------------------------------------- +For the latest version of the bonding driver, use kernel 2.4.12 or above +(otherwise you will need to apply a patch). + +Configure kernel with `make menuconfig/xconfig/config', and select +"Bonding driver support" in the "Network device support" section. It is +recommended to configure the driver as module since it is currently the only way +to pass parameters to the driver and configure more than one bonding device. + +Build and install the new kernel and modules. + +2) Get and install the userspace tools +-------------------------------------- +This version of the bonding driver requires updated ifenslave program. The +original one from extreme-linux and beowulf will not work. Kernels 2.4.12 +and above include the updated version of ifenslave.c in Documentation/network +directory. For older kernels, please follow the links at the end of this file. + +IMPORTANT!!! If you are running on Redhat 7.1 or greater, you need +to be careful because /usr/include/linux is no longer a symbolic link +to /usr/src/linux/include/linux. If you build ifenslave while this is +true, ifenslave will appear to succeed but your bond won't work. The purpose +of the -I option on the ifenslave compile line is to make sure it uses +/usr/src/linux/include/linux/if_bonding.h instead of the version from +/usr/include/linux. + +To install ifenslave.c, do: + # gcc -Wall -Wstrict-prototypes -O -I/usr/src/linux/include ifenslave.c -o ifenslave + # cp ifenslave /sbin/ifenslave + +3) Configure your system +------------------------ +Also see the following section on the module parameters. You will need to add +at least the following line to /etc/conf.modules (or /etc/modules.conf): + + alias bond0 bonding + +Use standard distribution techniques to define bond0 network interface. For +example, on modern RedHat distributions, create ifcfg-bond0 file in +/etc/sysconfig/network-scripts directory that looks like this: + +DEVICE=bond0 +IPADDR=192.168.1.1 +NETMASK=255.255.255.0 +NETWORK=192.168.1.0 +BROADCAST=192.168.1.255 +ONBOOT=yes +BOOTPROTO=none +USERCTL=no + +(put the appropriate values for you network instead of 192.168.1). + +All interfaces that are part of the trunk, should have SLAVE and MASTER +definitions. For example, in the case of RedHat, if you wish to make eth0 and +eth1 (or other interfaces) a part of the bonding interface bond0, their config +files (ifcfg-eth0, ifcfg-eth1, etc.) should look like this: + +DEVICE=eth0 +USERCTL=no +ONBOOT=yes +MASTER=bond0 +SLAVE=yes +BOOTPROTO=none + +(use DEVICE=eth1 for eth1 and MASTER=bond1 for bond1 if you have configured +second bonding interface). + +Restart the networking subsystem or just bring up the bonding device if your +administration tools allow it. Otherwise, reboot. (For the case of RedHat +distros, you can do `ifup bond0' or `/etc/rc.d/init.d/network restart'.) + +If the administration tools of your distribution do not support master/slave +notation in configuration of network interfaces, you will need to configure +the bonding device with the following commands manually: + + # /sbin/ifconfig bond0 192.168.1.1 up + # /sbin/ifenslave bond0 eth0 + # /sbin/ifenslave bond0 eth1 + +(substitute 192.168.1.1 with your IP address and add custom network and custom +netmask to the arguments of ifconfig if required). + +You can then create a script with these commands and put it into the appropriate +rc directory. + +If you specifically need that all your network drivers are loaded before the +bonding driver, use one of modutils' powerful features : in your modules.conf, +tell that when asked for bond0, modprobe should first load all your interfaces : + +probeall bond0 eth0 eth1 bonding + +Be careful not to reference bond0 itself at the end of the line, or modprobe will +die in an endless recursive loop. + +4) Module parameters. +--------------------- +The following module parameters can be passed: + + mode= + +Possible values are 0 (round robin policy, default) and 1 (active backup +policy), and 2 (XOR). See question 9 and the HA section for additional info. + + miimon= + +Use integer value for the frequency (in ms) of MII link monitoring. Zero value +is default and means the link monitoring will be disabled. A good value is 100 +if you wish to use link monitoring. See HA section for additional info. + + downdelay= + +Use integer value for delaying disabling a link by this number (in ms) after +the link failure has been detected. Must be a multiple of miimon. Default +value is zero. See HA section for additional info. + + updelay= + +Use integer value for delaying enabling a link by this number (in ms) after +the "link up" status has been detected. Must be a multiple of miimon. Default +value is zero. See HA section for additional info. + + arp_interval= + +Use integer value for the frequency (in ms) of arp monitoring. Zero value +is default and means the arp monitoring will be disabled. See HA section +for additional info. This field is value in active_backup mode only. + + arp_ip_target= + +An ip address to use when arp_interval is > 0. This is the target of the +arp request sent to determine the health of the link to the target. +Specify this value in ddd.ddd.ddd.ddd format. + +If you need to configure several bonding devices, the driver must be loaded +several times. I.e. for two bonding devices, your /etc/conf.modules must look +like this: + +alias bond0 bonding +alias bond1 bonding + +options bond0 miimon=100 +options bond1 -o bonding1 miimon=100 + +5) Testing configuration +------------------------ +You can test the configuration and transmit policy with ifconfig. For example, +for round robin policy, you should get something like this: + +[root]# /sbin/ifconfig +bond0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 + inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0 + UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1 + RX packets:7224794 errors:0 dropped:0 overruns:0 frame:0 + TX packets:3286647 errors:1 dropped:0 overruns:1 carrier:0 + collisions:0 txqueuelen:0 + +eth0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 + inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0 + UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1 + RX packets:3573025 errors:0 dropped:0 overruns:0 frame:0 + TX packets:1643167 errors:1 dropped:0 overruns:1 carrier:0 + collisions:0 txqueuelen:100 + Interrupt:10 Base address:0x1080 + +eth1 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 + inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0 + UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1 + RX packets:3651769 errors:0 dropped:0 overruns:0 frame:0 + TX packets:1643480 errors:0 dropped:0 overruns:0 carrier:0 + collisions:0 txqueuelen:100 + Interrupt:9 Base address:0x1400 + +Questions : +=========== + +1. Is it SMP safe? + + Yes. The old 2.0.xx channel bonding patch was not SMP safe. + The new driver was designed to be SMP safe from the start. + +2. What type of cards will work with it? + + Any Ethernet type cards (you can even mix cards - a Intel + EtherExpress PRO/100 and a 3com 3c905b, for example). + You can even bond together Gigabit Ethernet cards! + +3. How many bonding devices can I have? + + One for each module you load. See section on module parameters for how + to accomplish this. + +4. How many slaves can a bonding device have? + + Limited by the number of network interfaces Linux supports and the + number of cards you can place in your system. + +5. What happens when a slave link dies? + + If your ethernet cards support MII status monitoring and the MII + monitoring has been enabled in the driver (see description of module + parameters), there will be no adverse consequences. This release + of the bonding driver knows how to get the MII information and + enables or disables its slaves according to their link status. + See section on HA for additional information. + + For ethernet cards not supporting MII status, or if you wish to + verify that packets have been both send and received, you may + configure the arp_interval and arp_ip_target. If packets have + not been sent or received during this interval, an arp request + is sent to the target to generate send and receive traffic. + If after this interval, either the successful send and/or + receive count has not incremented, the next slave in the sequence + will become the active slave. + + If neither mii_monitor and arp_interval is configured, the bonding + driver will not handle this situation very well. The driver will + continue to send packets but some packets will be lost. Retransmits + will cause serious degradation of performance (in the case when one + of two slave links fails, 50% packets will be lost, which is a serious + problem for both TCP and UDP). + +6. Can bonding be used for High Availability? + + Yes, if you use MII monitoring and ALL your cards support MII link + status reporting. See section on HA for more information. + +7. Which switches/systems does it work with? + + In round-robin mode, it works with systems that support trunking: + + * Cisco 5500 series (look for EtherChannel support). + * SunTrunking software. + * Alteon AceDirector switches / WebOS (use Trunks). + * BayStack Switches (trunks must be explicitly configured). Stackable + models (450) can define trunks between ports on different physical + units. + * Linux bonding, of course ! + + In Active-backup mode, it should work with any Layer-II switches. + +8. Where does a bonding device get its MAC address from? + + If not explicitly configured with ifconfig, the MAC address of the + bonding device is taken from its first slave device. This MAC address + is then passed to all following slaves and remains persistent (even if + the the first slave is removed) until the bonding device is brought + down or reconfigured. + + If you wish to change the MAC address, you can set it with ifconfig: + + # ifconfig bond0 hw ether 00:11:22:33:44:55 + + The MAC address can be also changed by bringing down/up the device + and then changing its slaves (or their order): + + # ifconfig bond0 down ; modprobe -r bonding + # ifconfig bond0 .... up + # ifenslave bond0 eth... + + This method will automatically take the address from the next slave + that will be added. + + To restore your slaves' MAC addresses, you need to detach them + from the bond (`ifenslave -d bond0 eth0'), set them down + (`ifconfig eth0 down'), unload the drivers (`rmmod 3c59x', for + example) and reload them to get the MAC addresses from their + eeproms. If the driver is shared by several devices, you need + to turn them all down. Another solution is to look for the MAC + address at boot time (dmesg or tail /var/log/messages) and to + reset it by hand with ifconfig : + + # ifconfig eth0 down + # ifconfig eth0 hw ether 00:20:40:60:80:A0 + +9. Which transmit polices can be used? + + Round robin, based on the order of enslaving, the output device + is selected base on the next available slave. Regardless of + the source and/or destination of the packet. + + XOR, based on (src hw addr XOR dst hw addr) % slave cnt. This + selects the same slave for each destination hw address. + + Active-backup policy that ensures that one and only one device will + transmit at any given moment. Active-backup policy is useful for + implementing high availability solutions using two hubs (see + section on HA). + +High availability +================= + +To implement high availability using the bonding driver, you need to +compile the driver as module because currently it is the only way to pass +parameters to the driver. This may change in the future. + +High availability is achieved by using MII status reporting. You need to +verify that all your interfaces support MII link status reporting. On Linux +kernel 2.2.17, all the 100 Mbps capable drivers and yellowfin gigabit driver +support it. If your system has an interface that does not support MII status +reporting, a failure of its link will not be detected! + +The bonding driver can regularly check all its slaves links by checking the +MII status registers. The check interval is specified by the module argument +"miimon" (MII monitoring). It takes an integer that represents the +checking time in milliseconds. It should not come to close to (1000/HZ) +(10 ms on i386) because it may then reduce the system interactivity. 100 ms +seems to be a good value. It means that a dead link will be detected at most +100 ms after it goes down. + +Example: + + # modprobe bonding miimon=100 + +Or, put in your /etc/modules.conf : + + alias bond0 bonding + options bond0 miimon=100 + +There are currently two policies for high availability, depending on whether +a) hosts are connected to a single host or switch that support trunking +b) hosts are connected to several different switches or a single switch that + does not support trunking. + +1) HA on a single switch or host - load balancing +------------------------------------------------- +It is the easiest to set up and to understand. Simply configure the +remote equipment (host or switch) to aggregate traffic over several +ports (Trunk, EtherChannel, etc.) and configure the bonding interfaces. +If the module has been loaded with the proper MII option, it will work +automatically. You can then try to remove and restore different links +and see in your logs what the driver detects. When testing, you may +encounter problems on some buggy switches that disable the trunk for a +long time if all ports in a trunk go down. This is not Linux, but really +the switch (reboot it to ensure). + +Example 1 : host to host at double speed + + +----------+ +----------+ + | |eth0 eth0| | + | Host A +--------------------------+ Host B | + | +--------------------------+ | + | |eth1 eth1| | + +----------+ +----------+ + + On each host : + # modprobe bonding miimon=100 + # ifconfig bond0 addr + # ifenslave bond0 eth0 eth1 + +Example 2 : host to switch at double speed + + +----------+ +----------+ + | |eth0 port1| | + | Host A +--------------------------+ switch | + | +--------------------------+ | + | |eth1 port2| | + +----------+ +----------+ + + On host A : On the switch : + # modprobe bonding miimon=100 # set up a trunk on port1 + # ifconfig bond0 addr and port2 + # ifenslave bond0 eth0 eth1 + +2) HA on two or more switches (or a single switch without trunking support) +--------------------------------------------------------------------------- +This mode is more problematic because it relies on the fact that there +are multiple ports and the host's MAC address should be visible on one +port only to avoid confusing the switches. + +If you need to know which interface is the active one, and which ones are +backup, use ifconfig. All backup interfaces have the NOARP flag set. + +To use this mode, pass "mode=1" to the module at load time : + + # modprobe bonding miimon=100 mode=1 + +Or, put in your /etc/modules.conf : + + alias bond0 bonding + options bond0 miimon=100 mode=1 + +Example 1: Using multiple host and multiple switches to build a "no single +point of failure" solution. + + + | | + |port3 port3| + +-----+----+ +-----+----+ + | |port7 ISL port7| | + | switch A +--------------------------+ switch B | + | +--------------------------+ | + | |port8 port8| | + +----++----+ +-----++---+ + port2||port1 port1||port2 + || +-------+ || + |+-------------+ host1 +---------------+| + | eth0 +-------+ eth1 | + | | + | +-------+ | + +--------------+ host2 +----------------+ + eth0 +-------+ eth1 + +In this configuration, there are an ISL - Inter Switch Link (could be a trunk), +several servers (host1, host2 ...) attached to both switches each, and one or +more ports to the outside world (port3...). One an only one slave on each host +is active at a time, while all links are still monitored (the system can +detect a failure of active and backup links). + +Each time a host changes its active interface, it sticks to the new one until +it goes down. In this example, the hosts are not too much affected by the +expiration time of the switches' forwarding tables. + +If host1 and host2 have the same functionality and are used in load balancing +by another external mechanism, it is good to have host1's active interface +connected to one switch and host2's to the other. Such system will survive +a failure of a single host, cable, or switch. The worst thing that may happen +in the case of a switch failure is that half of the hosts will be temporarily +unreachable until the other switch expires its tables. + +Example 2: Using multiple ethernet cards connected to a switch to configure + NIC failover (switch is not required to support trunking). + + + +----------+ +----------+ + | |eth0 port1| | + | Host A +--------------------------+ switch | + | +--------------------------+ | + | |eth1 port2| | + +----------+ +----------+ + + On host A : On the switch : + # modprobe bonding miimon=100 mode=1 # (optional) minimize the time + # ifconfig bond0 addr # for table expiration + # ifenslave bond0 eth0 eth1 + +Each time the host changes its active interface, it sticks to the new one until +it goes down. In this example, the host is strongly affected by the expiration +time of the switch forwarding table. + +3) Adapting to your switches' timing +------------------------------------ +If your switches take a long time to go into backup mode, it may be +desirable not to activate a backup interface immediately after a link goes +down. It is possible to delay the moment at which a link will be +completely disabled by passing the module parameter "downdelay" (in +milliseconds, must be a multiple of miimon). + +When a switch reboots, it is possible that its ports report "link up" status +before they become usable. This could fool a bond device by causing it to +use some ports that are not ready yet. It is possible to delay the moment at +which an active link will be reused by passing the module parameter "updelay" +(in milliseconds, must be a multiple of miimon). + +A similar situation can occur when a host re-negotiates a lost link with the +switch (a case of cable replacement). + +A special case is when a bonding interface has lost all slave links. Then the +driver will immediately reuse the first link that goes up, even if updelay +parameter was specified. (If there are slave interfaces in the "updelay" state, +the interface that first went into that state will be immediately reused.) This +allows to reduce down-time if the value of updelay has been overestimated. + +Examples : + + # modprobe bonding miimon=100 mode=1 downdelay=2000 updelay=5000 + # modprobe bonding miimon=100 mode=0 downdelay=0 updelay=5000 + +4) Limitations +-------------- +The main limitations are : + - only the link status is monitored. If the switch on the other side is + partially down (e.g. doesn't forward anymore, but the link is OK), the link + won't be disabled. Another way to check for a dead link could be to count + incoming frames on a heavily loaded host. This is not applicable to small + servers, but may be useful when the front switches send multicast + information on their links (e.g. VRRP), or even health-check the servers. + Use the arp_interval/arp_ip_target parameters to count incoming/outgoing + frames. + +Resources and links +=================== + +Current developement on this driver is posted to: + - http://www.sourceforge.net/projects/bonding/ + +Donald Becker's Ethernet Drivers and diag programs may be found at : + - http://www.scyld.com/network/ + +You will also find a lot of information regarding Ethernet, NWay, MII, etc. at +www.scyld.com. + +For new versions of the driver, patches for older kernels and the updated +userspace tools, take a look at Willy Tarreau's site : + - http://wtarreau.free.fr/pub/bonding/ + - http://www-miaif.lip6.fr/willy/pub/bonding/ + +To get latest informations about Linux Kernel development, please consult +the Linux Kernel Mailing List Archives at : + http://boudicca.tux.org/hypermail/linux-kernel/latest/ + +-- END -- diff -Nru linux/Documentation/networking/ifenslave.c linux-patched/Documentation/networking/ifenslave.c --- linux/Documentation/networking/ifenslave.c Thu Jan 1 01:00:00 1970 +++ linux-patched/Documentation/networking/ifenslave.c Wed Aug 27 16:22:01 2003 @@ -0,0 +1,565 @@ +/* Mode: C; + * ifenslave.c: Configure network interfaces for parallel routing. + * + * This program controls the Linux implementation of running multiple + * network interfaces in parallel. + * + * Usage: ifenslave [-v] master-interface < slave-interface [metric ] > ... + * + * Author: Donald Becker + * Copyright 1994-1996 Donald Becker + * + * This program is free software; you can redistribute it + * and/or modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation. + * + * The author may be reached as becker@CESDIS.gsfc.nasa.gov, or C/O + * Center of Excellence in Space Data and Information Sciences + * Code 930.5, Goddard Space Flight Center, Greenbelt MD 20771 + * + * Changes : + * - 2000/10/02 Willy Tarreau : + * - few fixes. Master's MAC address is now correctly taken from + * the first device when not previously set ; + * - detach support : call BOND_RELEASE to detach an enslaved interface. + * - give a mini-howto from command-line help : # ifenslave -h + * + * - 2001/02/16 Chad N. Tindel : + * - Master is now brought down before setting the MAC address. In + * the 2.4 kernel you can't change the MAC address while the device is + * up because you get EBUSY. + * + * - 2001/09/13 Takao Indoh + * - Added the ability to change the active interface on a mode 1 bond + * at runtime. + * + * - 2001/10/23 Chad N. Tindel : + * - No longer set the MAC address of the master. The bond device will + * take care of this itself + * - Try the SIOC*** versions of the bonding ioctls before using the + * old versions + * - 2002/02/18 Erik Habbinga : + * - ifr2.ifr_flags was not initialized in the hwaddr_notset case, + * SIOCGIFFLAGS now called before hwaddr_notset test + */ + +static char *version = +"ifenslave.c:v0.07 9/9/97 Donald Becker (becker@cesdis.gsfc.nasa.gov).\n" +"detach support added on 2000/10/02 by Willy Tarreau (willy at meta-x.org).\n" +"2.4 kernel support added on 2001/02/16 by Chad N. Tindel (ctindel at ieee dot org.\n"; + +static const char *usage_msg = +"Usage: ifenslave [-adfrvVh] < [metric ] > ...\n" +" ifenslave -c master-interface slave-if\n"; + +static const char *howto_msg = +"Usage: ifenslave [-adfrvVh] < [metric ] > ...\n" +" ifenslave -c master-interface slave-if\n" +"\n" +" To create a bond device, simply follow these three steps :\n" +" - ensure that the required drivers are properly loaded :\n" +" # modprobe bonding ; modprobe <3c59x|eepro100|pcnet32|tulip|...>\n" +" - assign an IP address to the bond device :\n" +" # ifconfig bond0 netmask broadcast \n" +" - attach all the interfaces you need to the bond device :\n" +" # ifenslave bond0 eth0 eth1 eth2\n" +" If bond0 didn't have a MAC address, it will take eth0's. Then, all\n" +" interfaces attached AFTER this assignment will get the same MAC addr.\n" +"\n" +" To detach a dead interface without setting the bond device down :\n" +" # ifenslave -d bond0 eth1\n" +"\n" +" To set the bond device down and automatically release all the slaves :\n" +" # ifconfig bond0 down\n" +"\n" +" To change active slave :\n" +" # ifenslave -c bond0 eth0\n" +"\n"; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct option longopts[] = { + /* { name has_arg *flag val } */ + {"all-interfaces", 0, 0, 'a'}, /* Show all interfaces. */ + {"force", 0, 0, 'f'}, /* Force the operation. */ + {"help", 0, 0, '?'}, /* Give help */ + {"howto", 0, 0, 'h'}, /* Give some more help */ + {"receive-slave", 0, 0, 'r'}, /* Make a receive-only slave. */ + {"verbose", 0, 0, 'v'}, /* Report each action taken. */ + {"version", 0, 0, 'V'}, /* Emit version information. */ + {"detach", 0, 0, 'd'}, /* Detach a slave interface. */ + {"change-active", 0, 0, 'c'}, /* Change the active slave. */ + { 0, 0, 0, 0 } +}; + +/* Command-line flags. */ +unsigned int +opt_a = 0, /* Show-all-interfaces flag. */ +opt_f = 0, /* Force the operation. */ +opt_r = 0, /* Set up a Rx-only slave. */ +opt_d = 0, /* detach a slave interface. */ +opt_c = 0, /* change-active-slave flag. */ +verbose = 0, /* Verbose flag. */ +opt_version = 0, +opt_howto = 0; +int skfd = -1; /* AF_INET socket for ioctl() calls. */ + +static void if_print(char *ifname); + +int +main(int argc, char **argv) +{ + struct ifreq ifr2, if_hwaddr, if_ipaddr, if_metric, if_mtu, if_dstaddr; + struct ifreq if_netmask, if_brdaddr, if_flags; + int goterr = 0; + int c, errflag = 0; + sa_family_t master_family; + char **spp, *master_ifname, *slave_ifname; + int hwaddr_notset; + + while ((c = getopt_long(argc, argv, "acdfrvV?h", longopts, 0)) != EOF) + switch (c) { + case 'a': opt_a++; break; + case 'f': opt_f++; break; + case 'r': opt_r++; break; + case 'd': opt_d++; break; + case 'c': opt_c++; break; + case 'v': verbose++; break; + case 'V': opt_version++; break; + case 'h': opt_howto++; break; + case '?': errflag++; + } + + /* option check */ + if (opt_c) + if(opt_a || opt_f || opt_r || opt_d || verbose || opt_version || + opt_howto || errflag ) { + fprintf(stderr, usage_msg); + return 2; + } + + if (errflag) { + fprintf(stderr, usage_msg); + return 2; + } + + if (opt_howto) { + fprintf(stderr, howto_msg); + return 0; + } + + if (verbose || opt_version) { + printf(version); + if (opt_version) + exit(0); + } + + /* Open a basic socket. */ + if ((skfd = socket(AF_INET, SOCK_DGRAM,0)) < 0) { + perror("socket"); + exit(-1); + } + + if (verbose) + fprintf(stderr, "DEBUG: argc=%d, optind=%d and argv[optind] is %s.\n", + argc, optind, argv[optind]); + + /* No remaining args means show all interfaces. */ + if (optind == argc) { + if_print((char *)NULL); + (void) close(skfd); + exit(0); + } + + /* Copy the interface name. */ + spp = argv + optind; + master_ifname = *spp++; + slave_ifname = *spp++; + + /* Check command line. */ + if (opt_c) { + char **tempp = spp; + if ((master_ifname == NULL)||(slave_ifname == NULL)||(*tempp++ != NULL)) { + fprintf(stderr, usage_msg); + return 2; + } + } + + /* A single args means show the configuration for this interface. */ + if (slave_ifname == NULL) { + if_print(master_ifname); + (void) close(skfd); + exit(0); + } + + /* Get the vitals from the master interface. */ + { + struct ifreq *ifra[7] = { &if_ipaddr, &if_mtu, &if_dstaddr, + &if_brdaddr, &if_netmask, &if_flags, + &if_hwaddr }; + const char *req_name[7] = { + "IP address", "MTU", "destination address", + "broadcast address", "netmask", "status flags", + "hardware address" }; + const int ioctl_req_type[7] = { + SIOCGIFADDR, SIOCGIFMTU, SIOCGIFDSTADDR, + SIOCGIFBRDADDR, SIOCGIFNETMASK, SIOCGIFFLAGS, + SIOCGIFHWADDR }; + int i; + + for (i = 0; i < 7; i++) { + strncpy(ifra[i]->ifr_name, master_ifname, IFNAMSIZ); + if (ioctl(skfd, ioctl_req_type[i], ifra[i]) < 0) { + fprintf(stderr, + "Something broke getting the master's %s: %s.\n", + req_name[i], strerror(errno)); + } + } + + hwaddr_notset = 1; /* assume master's address not set yet */ + for (i = 0; hwaddr_notset && (i < 6); i++) { + hwaddr_notset &= ((unsigned char *)if_hwaddr.ifr_hwaddr.sa_data)[i] == 0; + } + + /* The family '1' is ARPHRD_ETHER for ethernet. */ + if (if_hwaddr.ifr_hwaddr.sa_family != 1 && !opt_f) { + fprintf(stderr, "The specified master interface '%s' is not" + " ethernet-like.\n This program is designed to work" + " with ethernet-like network interfaces.\n" + " Use the '-f' option to force the operation.\n", + master_ifname); + + exit (1); + } + master_family = if_hwaddr.ifr_hwaddr.sa_family; + if (verbose) { + unsigned char *hwaddr = (unsigned char *)if_hwaddr.ifr_hwaddr.sa_data; + printf("The current hardware address (SIOCGIFHWADDR) of %s is type %d " + "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x.\n", master_ifname, + if_hwaddr.ifr_hwaddr.sa_family, hwaddr[0], hwaddr[1], + hwaddr[2], hwaddr[3], hwaddr[4], hwaddr[5]); + } + } + + + /* do this when enslaving interfaces */ + do { + if (opt_d) { /* detach a slave interface from the master */ + strncpy(if_flags.ifr_name, master_ifname, IFNAMSIZ); + strncpy(if_flags.ifr_slave, slave_ifname, IFNAMSIZ); + if ((ioctl(skfd, SIOCBONDRELEASE, &if_flags) < 0) && + (ioctl(skfd, BOND_RELEASE_OLD, &if_flags) < 0)) { + fprintf(stderr, "SIOCBONDRELEASE: cannot detach %s from %s. errno=%s.\n", + slave_ifname, master_ifname, strerror(errno)); + } + else { /* we'll set the interface down to avoid any conflicts due to + same IP/MAC */ + strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ); + if (ioctl(skfd, SIOCGIFFLAGS, &ifr2) < 0) { + int saved_errno = errno; + fprintf(stderr, "SIOCGIFFLAGS on %s failed: %s\n", slave_ifname, + strerror(saved_errno)); + } + else { + ifr2.ifr_flags &= ~(IFF_UP | IFF_RUNNING); + if (ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { + int saved_errno = errno; + fprintf(stderr, "Shutting down interface %s failed: %s\n", + slave_ifname, strerror(saved_errno)); + } + } + } + } + else { /* attach a slave interface to the master */ + /* two possibilities : + - if hwaddr_notset, do nothing. The bond will assign the + hwaddr from it's first slave. + - if !hwaddr_notset, assign the master's hwaddr to each slave + */ + + strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ); + if (ioctl(skfd, SIOCGIFFLAGS, &ifr2) < 0) { + int saved_errno = errno; + fprintf(stderr, "SIOCGIFFLAGS on %s failed: %s\n", slave_ifname, + strerror(saved_errno)); + return 1; + } + + if (hwaddr_notset) { /* we do nothing */ + + } + else { /* we'll assign master's hwaddr to this slave */ + if (ifr2.ifr_flags & IFF_UP) { + ifr2.ifr_flags &= ~IFF_UP; + if (ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { + int saved_errno = errno; + fprintf(stderr, "Shutting down interface %s failed: %s\n", + slave_ifname, strerror(saved_errno)); + } + } + + strncpy(if_hwaddr.ifr_name, slave_ifname, IFNAMSIZ); + if (ioctl(skfd, SIOCSIFHWADDR, &if_hwaddr) < 0) { + int saved_errno = errno; + fprintf(stderr, "SIOCSIFHWADDR on %s failed: %s\n", if_hwaddr.ifr_name, + strerror(saved_errno)); + if (saved_errno == EBUSY) + fprintf(stderr, " The slave device %s is busy: it must be" + " idle before running this command.\n", slave_ifname); + else if (saved_errno == EOPNOTSUPP) + fprintf(stderr, " The slave device you specified does not support" + " setting the MAC address.\n Your kernel likely does not" + " support slave devices.\n"); + else if (saved_errno == EINVAL) + fprintf(stderr, " The slave device's address type does not match" + " the master's address type.\n"); + } else { + if (verbose) { + unsigned char *hwaddr = if_hwaddr.ifr_hwaddr.sa_data; + printf("Slave's (%s) hardware address set to " + "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x.\n", slave_ifname, + hwaddr[0], hwaddr[1], hwaddr[2], hwaddr[3], hwaddr[4], hwaddr[5]); + } + } + } + + if (*spp && !strcmp(*spp, "metric")) { + if (*++spp == NULL) { + fprintf(stderr, usage_msg); + exit(2); + } + if_metric.ifr_metric = atoi(*spp); + strncpy(if_metric.ifr_name, slave_ifname, IFNAMSIZ); + if (ioctl(skfd, SIOCSIFMETRIC, &if_metric) < 0) { + fprintf(stderr, "SIOCSIFMETRIC on %s: %s\n", slave_ifname, + strerror(errno)); + goterr = 1; + } + spp++; + } + + if (strncpy(if_ipaddr.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFADDR, &if_ipaddr) < 0) { + fprintf(stderr, + "Something broke setting the slave's address: %s.\n", + strerror(errno)); + } else { + if (verbose) { + unsigned char *ipaddr = if_ipaddr.ifr_addr.sa_data; + printf("Set the slave's (%s) IP address to %d.%d.%d.%d.\n", + slave_ifname, ipaddr[0], ipaddr[1], ipaddr[2], ipaddr[3]); + } + } + + if (strncpy(if_mtu.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFMTU, &if_mtu) < 0) { + fprintf(stderr, "Something broke setting the slave MTU: %s.\n", + strerror(errno)); + } else { + if (verbose) + printf("Set the slave's (%s) MTU to %d.\n", slave_ifname, if_mtu.ifr_mtu); + } + + if (strncpy(if_dstaddr.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFDSTADDR, &if_dstaddr) < 0) { + fprintf(stderr, "Error setting the slave (%s) with SIOCSIFDSTADDR: %s.\n", + slave_ifname, strerror(errno)); + } else { + if (verbose) { + unsigned char *ipaddr = if_dstaddr.ifr_dstaddr.sa_data; + printf("Set the slave's (%s) destination address to %d.%d.%d.%d.\n", + slave_ifname, ipaddr[0], ipaddr[1], ipaddr[2], ipaddr[3]); + } + } + + if (strncpy(if_brdaddr.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFBRDADDR, &if_brdaddr) < 0) { + fprintf(stderr, + "Something broke setting the slave (%s) broadcast address: %s.\n", + slave_ifname, strerror(errno)); + } else { + if (verbose) { + unsigned char *ipaddr = if_brdaddr.ifr_broadaddr.sa_data; + printf("Set the slave's (%s) broadcast address to %d.%d.%d.%d.\n", + slave_ifname, ipaddr[0], ipaddr[1], ipaddr[2], ipaddr[3]); + } + } + + if (strncpy(if_netmask.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFNETMASK, &if_netmask) < 0) { + fprintf(stderr, + "Something broke setting the slave (%s) netmask: %s.\n", + slave_ifname, strerror(errno)); + } else { + if (verbose) { + unsigned char *ipaddr = if_netmask.ifr_netmask.sa_data; + printf("Set the slave's (%s) netmask to %d.%d.%d.%d.\n", + slave_ifname, ipaddr[0], ipaddr[1], ipaddr[2], ipaddr[3]); + } + } + + ifr2.ifr_flags |= IFF_UP; /* the interface will need to be up to be bonded */ + if ((ifr2.ifr_flags &= ~(IFF_SLAVE | IFF_MASTER)) == 0 + || strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { + fprintf(stderr, + "Something broke setting the slave (%s) flags: %s.\n", + slave_ifname, strerror(errno)); + } else { + if (verbose) + printf("Set the slave's (%s) flags %4.4x.\n", slave_ifname, if_flags.ifr_flags); + } + + /* Do the real thing */ + if ( ! opt_r) { + strncpy(if_flags.ifr_name, master_ifname, IFNAMSIZ); + strncpy(if_flags.ifr_slave, slave_ifname, IFNAMSIZ); + if (!opt_c) { + if ((ioctl(skfd, SIOCBONDENSLAVE, &if_flags) < 0) && + (ioctl(skfd, BOND_ENSLAVE_OLD, &if_flags) < 0)) { + fprintf(stderr, "SIOCBONDENSLAVE: %s.\n", strerror(errno)); + } + } + else { + if ((ioctl(skfd, SIOCBONDCHANGEACTIVE, &if_flags) < 0) && + (ioctl(skfd, BOND_CHANGE_ACTIVE_OLD, &if_flags) < 0)) { + fprintf(stderr, "SIOCBONDCHANGEACTIVE: %s.\n", strerror(errno)); + } + } + } + } + } while ( (slave_ifname = *spp++) != NULL); + + /* Close the socket. */ + (void) close(skfd); + + return(goterr); +} + +static short mif_flags; + +/* Get the inteface configuration from the kernel. */ +static int if_getconfig(char *ifname) +{ + struct ifreq ifr; + int metric, mtu; /* Parameters of the master interface. */ + struct sockaddr dstaddr, broadaddr, netmask; + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFFLAGS, &ifr) < 0) + return -1; + mif_flags = ifr.ifr_flags; + printf("The result of SIOCGIFFLAGS on %s is %x.\n", + ifname, ifr.ifr_flags); + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFADDR, &ifr) < 0) + return -1; + printf("The result of SIOCGIFADDR is %2.2x.%2.2x.%2.2x.%2.2x.\n", + ifr.ifr_addr.sa_data[0], ifr.ifr_addr.sa_data[1], + ifr.ifr_addr.sa_data[2], ifr.ifr_addr.sa_data[3]); + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFHWADDR, &ifr) < 0) + return -1; + + { + /* Gotta convert from 'char' to unsigned for printf(). */ + unsigned char *hwaddr = (unsigned char *)ifr.ifr_hwaddr.sa_data; + printf("The result of SIOCGIFHWADDR is type %d " + "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x.\n", + ifr.ifr_hwaddr.sa_family, hwaddr[0], hwaddr[1], + hwaddr[2], hwaddr[3], hwaddr[4], hwaddr[5]); + } + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFMETRIC, &ifr) < 0) { + metric = 0; + } else + metric = ifr.ifr_metric; + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFMTU, &ifr) < 0) + mtu = 0; + else + mtu = ifr.ifr_mtu; + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFDSTADDR, &ifr) < 0) { + memset(&dstaddr, 0, sizeof(struct sockaddr)); + } else + dstaddr = ifr.ifr_dstaddr; + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFBRDADDR, &ifr) < 0) { + memset(&broadaddr, 0, sizeof(struct sockaddr)); + } else + broadaddr = ifr.ifr_broadaddr; + + strcpy(ifr.ifr_name, ifname); + if (ioctl(skfd, SIOCGIFNETMASK, &ifr) < 0) { + memset(&netmask, 0, sizeof(struct sockaddr)); + } else + netmask = ifr.ifr_netmask; + + return(0); +} + +static void if_print(char *ifname) +{ + char buff[1024]; + struct ifconf ifc; + struct ifreq *ifr; + int i; + + if (ifname == (char *)NULL) { + ifc.ifc_len = sizeof(buff); + ifc.ifc_buf = buff; + if (ioctl(skfd, SIOCGIFCONF, &ifc) < 0) { + fprintf(stderr, "SIOCGIFCONF: %s\n", strerror(errno)); + return; + } + + ifr = ifc.ifc_req; + for (i = ifc.ifc_len / sizeof(struct ifreq); --i >= 0; ifr++) { + if (if_getconfig(ifr->ifr_name) < 0) { + fprintf(stderr, "%s: unknown interface.\n", + ifr->ifr_name); + continue; + } + + if (((mif_flags & IFF_UP) == 0) && !opt_a) continue; + /*ife_print(&ife);*/ + } + } else { + if (if_getconfig(ifname) < 0) + fprintf(stderr, "%s: unknown interface.\n", ifname); + } +} + + +/* + * Local variables: + * version-control: t + * kept-new-versions: 5 + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 4 + * compile-command: "gcc -Wall -Wstrict-prototypes -O -I/usr/src/linux/include ifenslave.c -o ifenslave" + * End: + */ diff -Nru linux/arch/s390x/kernel/ioctl32.c linux-patched/arch/s390x/kernel/ioctl32.c --- linux/arch/s390x/kernel/ioctl32.c Sun Aug 12 18:38:48 2001 +++ linux-patched/arch/s390x/kernel/ioctl32.c Wed Aug 27 16:21:27 2003 @@ -484,6 +484,13 @@ IOCTL32_HANDLER(SIOCADDRT, routing_ioctl), IOCTL32_HANDLER(SIOCDELRT, routing_ioctl), + IOCTL32_HANDLER(SIOCBONDENSLAVE, bond_ioctl), + IOCTL32_HANDLER(SIOCBONDRELEASE, bond_ioctl), + IOCTL32_HANDLER(SIOCBONDSETHWADDR, bond_ioctl), + IOCTL32_HANDLER(SIOCBONDSLAVEINFOQUERY, bond_ioctl), + IOCTL32_HANDLER(SIOCBONDINFOQUERY, bond_ioctl), + IOCTL32_HANDLER(SIOCBONDCHANGEACTIVE, bond_ioctl), + IOCTL32_HANDLER(EXT2_IOC32_GETFLAGS, do_ext2_ioctl), IOCTL32_HANDLER(EXT2_IOC32_SETFLAGS, do_ext2_ioctl), IOCTL32_HANDLER(EXT2_IOC32_GETVERSION, do_ext2_ioctl), diff -Nru linux/arch/sparc64/kernel/ioctl32.c linux-patched/arch/sparc64/kernel/ioctl32.c --- linux/arch/sparc64/kernel/ioctl32.c Wed Aug 27 15:52:47 2003 +++ linux-patched/arch/sparc64/kernel/ioctl32.c Wed Aug 27 16:21:27 2003 @@ -71,6 +71,7 @@ #include #include #include +#include #include #include #include @@ -629,6 +630,61 @@ return err; } +static int bond_ioctl(unsigned long fd, unsigned int cmd, unsigned long arg) +{ + struct ifreq ifr; + mm_segment_t old_fs; + int err, len; + u32 data, bondcmd; + + if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32))) + return -EFAULT; + ifr.ifr_data = (__kernel_caddr_t)get_free_page(GFP_KERNEL); + if (!ifr.ifr_data) + return -EAGAIN; + + __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); + + if (get_user(ethcmd, (u32 *)A(data))) { + err = -EFAULT; + goto out; + } + switch (bondcmd) { + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + len = IFNAMSIZ * sizeof(char); + break; + SIOCBONDSLAVEINFOQUERY: + SIOCBONDINFOQUERY: + len = sizeof (char *); + break; + } + + if (copy_from_user(ifr.ifr_data, (char *)A(data), len)) { + err = -EFAULT; + goto out; + } + + old_fs = get_fs(); + set_fs (KERNEL_DS); + err = sys_ioctl (fd, cmd, (unsigned long)&ifr); + set_fs (old_fs); + if (!err) { + u32 data; + + __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); + len = copy_to_user((char *)A(data), ifr.ifr_data, len); + if (len) + err = -EFAULT; + } + +out: + free_page((unsigned long)ifr.ifr_data); + return err; +} + static inline int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg) { struct ifreq ifr; diff -Nru linux/drivers/net/bonding.c linux-patched/drivers/net/bonding.c --- linux/drivers/net/bonding.c Wed Aug 27 15:52:58 2003 +++ linux-patched/drivers/net/bonding.c Wed Aug 27 16:22:01 2003 @@ -4,11 +4,14 @@ * Copyright 1999, Thomas Davis, tadavis@lbl.gov. * Licensed under the GPL. Based on dummy.c, and eql.c devices. * - * bond.c: a bonding/etherchannel/sun trunking net driver + * bonding.c: an Ethernet Bonding driver * - * This is useful to talk to a Cisco 5500, running Etherchannel, aka: - * Linux Channel Bonding + * This is useful to talk to a Cisco EtherChannel compatible equipment: + * Cisco 5500 * Sun Trunking (Solaris) + * Alteon AceDirector Trunks + * Linux Bonding + * and probably many L2 switches ... * * How it works: * ifconfig bond0 ipaddress netmask up @@ -21,280 +24,2100 @@ * will release all slaves, marking them as down. * * ifenslave bond0 eth0 - * will attache eth0 to bond0 as a slave. eth0 hw mac address will either + * will attach eth0 to bond0 as a slave. eth0 hw mac address will either * a: be used as initial mac address * b: if a hw mac address already is there, eth0's hw mac address - * will then be set from bond0. + * will then be set from bond0. * * v0.1 - first working version. * v0.2 - changed stats to be calculated by summing slaves stats. + * + * Changes: + * Arnaldo Carvalho de Melo + * - fix leaks on failure at bond_init + * + * 2000/09/30 - Willy Tarreau + * - added trivial code to release a slave device. + * - fixed security bug (CAP_NET_ADMIN not checked) + * - implemented MII link monitoring to disable dead links : + * All MII capable slaves are checked every milliseconds + * (100 ms seems good). This value can be changed by passing it to + * insmod. A value of zero disables the monitoring (default). + * - fixed an infinite loop in bond_xmit_roundrobin() when there's no + * good slave. + * - made the code hopefully SMP safe + * + * 2000/10/03 - Willy Tarreau + * - optimized slave lists based on relevant suggestions from Thomas Davis + * - implemented active-backup method to obtain HA with two switches: + * stay as long as possible on the same active interface, while we + * also monitor the backup one (MII link status) because we want to know + * if we are able to switch at any time. ( pass "mode=1" to insmod ) + * - lots of stress testings because we need it to be more robust than the + * wires ! :-> + * + * 2000/10/09 - Willy Tarreau + * - added up and down delays after link state change. + * - optimized the slaves chaining so that when we run forward, we never + * repass through the bond itself, but we can find it by searching + * backwards. Renders the deletion more difficult, but accelerates the + * scan. + * - smarter enslaving and releasing. + * - finer and more robust SMP locking + * + * 2000/10/17 - Willy Tarreau + * - fixed two potential SMP race conditions + * + * 2000/10/18 - Willy Tarreau + * - small fixes to the monitoring FSM in case of zero delays + * 2000/11/01 - Willy Tarreau + * - fixed first slave not automatically used in trunk mode. + * 2000/11/10 : spelling of "EtherChannel" corrected. + * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). + * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). + * + * 2001/1/3 - Chad N. Tindel + * - The bonding driver now simulates MII status monitoring, just like + * a normal network device. It will show that the link is down iff + * every slave in the bond shows that their links are down. If at least + * one slave is up, the bond's MII status will appear as up. + * + * 2001/2/7 - Chad N. Tindel + * - Applications can now query the bond from user space to get + * information which may be useful. They do this by calling + * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves + * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to + * get slave specific information (# link failures, etc). See + * for more details. The structs of interest + * are ifbond and ifslave. + * + * 2001/4/5 - Chad N. Tindel + * - Ported to 2.4 Kernel * + * 2001/5/2 - Jeffrey E. Mast + * - When a device is detached from a bond, the slave device is no longer + * left thinking that is has a master. + * + * 2001/5/16 - Jeffrey E. Mast + * - memset did not appropriately initialized the bond rw_locks. Used + * rwlock_init to initialize to unlocked state to prevent deadlock when + * first attempting a lock + * - Called SET_MODULE_OWNER for bond device + * + * 2001/5/17 - Tim Anderson + * - 2 paths for releasing for slave release; 1 through ioctl + * and 2) through close. Both paths need to release the same way. + * - the free slave in bond release is changing slave status before + * the free. The netdev_set_master() is intended to change slave state + * so it should not be done as part of the release process. + * - Simple rule for slave state at release: only the active in A/B and + * only one in the trunked case. + * + * 2001/6/01 - Tim Anderson + * - Now call dev_close when releasing a slave so it doesn't screw up + * out routing table. + * + * 2001/6/01 - Chad N. Tindel + * - Added /proc support for getting bond and slave information. + * Information is in /proc/net//info. + * - Changed the locking when calling bond_close to prevent deadlock. + * + * 2001/8/05 - Janice Girouard + * - correct problem where refcnt of slave is not incremented in bond_ioctl + * so the system hangs when halting. + * - correct locking problem when unable to malloc in bond_enslave. + * - adding bond_xmit_xor logic. + * - adding multiple bond device support. + * + * 2001/8/13 - Erik Habbinga + * - correct locking problem with rtnl_exlock_nowait + * + * 2001/8/23 - Janice Girouard + * - bzero initial dev_bonds, to correct oops + * - convert SIOCDEVPRIVATE to new MII ioctl calls + * + * 2001/9/13 - Takao Indoh + * - Add the BOND_CHANGE_ACTIVE ioctl implementation + * + * 2001/9/14 - Mark Huth + * - Change MII_LINK_READY to not check for end of auto-negotiation, + * but only for an up link. + * + * 2001/9/20 - Chad N. Tindel + * - Add the device field to bonding_t. Previously the net_device + * corresponding to a bond wasn't available from the bonding_t + * structure. + * + * 2001/9/25 - Janice Girouard + * - add arp_monitor for active backup mode + * + * 2001/10/23 - Takao Indoh + * - Various memory leak fixes + * + * 2001/11/5 - Mark Huth + * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under + * certain hotswap conditions. + * Note: this same change may be required in bond_arp_monitor ??? + * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr + * - Handle hot swap ethernet interface deregistration events to remove + * kernel oops following hot swap of enslaved interface + * + * 2002/1/2 - Chad N. Tindel + * - Restore original slave flags at release time. + * + * 2002/02/18 - Erik Habbinga + * - bond_release(): calling kfree on our_slave after call to + * bond_restore_slave_flags, not before + * - bond_enslave(): saving slave flags into original_flags before + * call to netdev_set_master, so the IFF_SLAVE flag doesn't end + * up in original_flags */ -#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + #include +#include +#include +#include +#include + +/* monitor all links that often (in milliseconds). <=0 disables monitoring */ +#ifndef BOND_LINK_MON_INTERV +#define BOND_LINK_MON_INTERV 0 +#endif + +#undef MII_LINK_UP +#define MII_LINK_UP 0x04 + +#undef MII_ENDOF_NWAY +#define MII_ENDOF_NWAY 0x20 + +#undef MII_LINK_READY +#define MII_LINK_READY (MII_LINK_UP) + +#ifndef BOND_LINK_ARP_INTERV +#define BOND_LINK_ARP_INTERV 0 +#endif + +static int arp_interval = BOND_LINK_ARP_INTERV; +static char *arp_ip_target = NULL; +static unsigned long arp_target = 0; +static u32 my_ip = 0; +char *arp_target_hw_addr = NULL; + +static int max_bonds = BOND_DEFAULT_MAX_BONDS; +static int miimon = BOND_LINK_MON_INTERV; +static int mode = BOND_MODE_ROUNDROBIN; +static int updelay = 0; +static int downdelay = 0; + +static int first_pass = 1; +int bond_cnt; +static struct bonding *these_bonds = NULL; +static struct net_device *dev_bonds = NULL; + +MODULE_PARM(max_bonds, "i"); +MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +MODULE_PARM(miimon, "i"); +MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); +MODULE_PARM(mode, "i"); +MODULE_PARM(arp_interval, "i"); +MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); +MODULE_PARM(arp_ip_target, "1-12s"); +MODULE_PARM_DESC(arp_ip_target, "arp target in n.n.n.n form"); +MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); +MODULE_PARM(updelay, "i"); +MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); +MODULE_PARM(downdelay, "i"); +MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); + +extern void arp_send( int type, int ptype, u32 dest_ip, struct net_device *dev, + u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, + unsigned char *target_hw); + +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev); +static struct net_device_stats *bond_get_stats(struct net_device *dev); +static void bond_mii_monitor(struct net_device *dev); +static void bond_arp_monitor(struct net_device *dev); +static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); +static void bond_restore_slave_flags(slave_t *slave); +static void bond_set_slave_inactive_flags(slave_t *slave); +static void bond_set_slave_active_flags(slave_t *slave); +static int bond_enslave(struct net_device *master, struct net_device *slave); +static int bond_release(struct net_device *master, struct net_device *slave); +static int bond_release_all(struct net_device *master); +static int bond_sethwaddr(struct net_device *master, struct net_device *slave); + +/* + * bond_get_info is the interface into the /proc filesystem. This is + * a different interface than the BOND_INFO_QUERY ioctl. That is done + * through the generic networking ioctl interface, and bond_info_query + * is the internal function which provides that information. + */ +static int bond_get_info(char *buf, char **start, off_t offset, int length); + +/* #define BONDING_DEBUG 1 */ + +/* several macros */ + +#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ + (netif_running(dev) && netif_carrier_ok(dev))) -typedef struct slave +static void bond_restore_slave_flags(slave_t *slave) { - struct slave *next; - struct slave *prev; - struct net_device *dev; -} slave_t; + slave->dev->flags = slave->original_flags; +} -typedef struct bonding +static void bond_set_slave_inactive_flags(slave_t *slave) { - slave_t *next; - slave_t *prev; - struct net_device *master; + slave->state = BOND_STATE_BACKUP; + slave->dev->flags |= IFF_NOARP; +} - slave_t *current_slave; - struct net_device_stats stats; -} bonding_t; +static void bond_set_slave_active_flags(slave_t *slave) +{ + slave->state = BOND_STATE_ACTIVE; + slave->dev->flags &= ~IFF_NOARP; +} +/* + * This function detaches the slave from the list . + * WARNING: no check is made to verify if the slave effectively + * belongs to . It returns in case it's needed. + * Nothing is freed on return, structures are just unchained. + * If the bond->current_slave pointer was pointing to , + * it's replaced with slave->next, or if not applicable. + */ +static slave_t *bond_detach_slave(bonding_t *bond, slave_t *slave) +{ + if ((bond == NULL) || (slave == NULL) || + ((void *)bond == (void *)slave)) { + printk(KERN_ERR + "bond_detach_slave(): trying to detach " + "slave %p from bond %p\n", bond, slave); + return slave; + } -static int bond_xmit(struct sk_buff *skb, struct net_device *dev); -static struct net_device_stats *bond_get_stats(struct net_device *dev); + if (bond->next == slave) { /* is the slave at the head ? */ + if (bond->prev == slave) { /* is the slave alone ? */ + write_lock(&bond->ptrlock); + bond->current_slave = NULL; /* no slave anymore */ + write_unlock(&bond->ptrlock); + bond->prev = bond->next = (slave_t *)bond; + } else { /* not alone */ + bond->next = slave->next; + slave->next->prev = (slave_t *)bond; + bond->prev->next = slave->next; + + write_lock(&bond->ptrlock); + if (bond->current_slave == slave) { + bond->current_slave = slave->next; + } + write_unlock(&bond->ptrlock); + } + } + else { + slave->prev->next = slave->next; + if (bond->prev == slave) { /* is this slave the last one ? */ + bond->prev = slave->prev; + } else { + slave->next->prev = slave->prev; + } -static struct net_device *this_bond; + write_lock(&bond->ptrlock); + if (bond->current_slave == slave) { + bond->current_slave = slave->next; + } + write_unlock(&bond->ptrlock); + } + + return slave; +} -static void release_one_slave(struct net_device *master, slave_t *slave) +/* + * if supports MII link status reporting, check its link + * and report it as a bit field in a short int : + * - 0x04 means link is up, + * - 0x20 means end of autonegociation + * If the device doesn't support MII, then we only report 0x24, + * meaning that the link is up and running since we can't check it. + */ +static u16 bond_check_dev_link(struct net_device *dev) { - bonding_t *bond = master->priv; + static int (* ioctl)(struct net_device *, struct ifreq *, int); + struct ifreq ifr; + u16 *data = (u16 *)&ifr.ifr_data; + + /* data[0] automagically filled by the ioctl */ + data[1] = 1; /* MII location 1 reports Link Status */ + + if (((ioctl = dev->do_ioctl) != NULL) && /* ioctl to access MII */ + (ioctl(dev, &ifr, SIOCGMIIPHY) == 0)) { + /* now, data[3] contains info about link status : + - data[3] & 0x04 means link up + - data[3] & 0x20 means end of auto-negociation + */ + return data[3]; + } else { + return MII_LINK_READY; /* spoof link up ( we can't check it) */ + } +} - spin_lock_bh(&master->xmit_lock); - if (bond->current_slave == slave) - bond->current_slave = slave->next; - slave->next->prev = slave->prev; - slave->prev->next = slave->next; - spin_unlock_bh(&master->xmit_lock); +static u16 bond_check_mii_link(bonding_t *bond) +{ + int has_active_interface = 0; + unsigned long flags; - netdev_set_master(slave->dev, NULL); + read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->ptrlock); + has_active_interface = (bond->current_slave != NULL); + read_unlock(&bond->ptrlock); + read_unlock_irqrestore(&bond->lock, flags); - dev_put(slave->dev); - kfree(slave); + return (has_active_interface ? MII_LINK_READY : 0); } -static int bond_close(struct net_device *master) +static int bond_open(struct net_device *dev) { - bonding_t *bond = master->priv; - slave_t *slave; - - while ((slave = bond->next) != (slave_t*)bond) - release_one_slave(master, slave); + struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; + struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; + MOD_INC_USE_COUNT; + + if (miimon > 0) { /* link check interval, in milliseconds. */ + init_timer(timer); + timer->expires = jiffies + (miimon * HZ / 1000); + timer->data = (unsigned long)dev; + timer->function = (void *)&bond_mii_monitor; + add_timer(timer); + } + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + init_timer(arp_timer); + arp_timer->expires = jiffies + (arp_interval * HZ / 1000); + arp_timer->data = (unsigned long)dev; + arp_timer->function = (void *)&bond_arp_monitor; + add_timer(arp_timer); + } return 0; } -static void bond_set_multicast_list(struct net_device *master) +static int bond_close(struct net_device *master) { + bonding_t *bond = (struct bonding *) master->priv; + //slave_t *slave; + unsigned long flags; + + write_lock_irqsave(&bond->lock, flags); + + if (miimon > 0) { /* link check interval, in milliseconds. */ + del_timer(&bond->mii_timer); + } + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + del_timer(&bond->arp_timer); + } + + /* Release the bonded slaves */ + bond_release_all(master); + + write_unlock_irqrestore(&bond->lock, flags); + + MOD_DEC_USE_COUNT; + return 0; } -static int bond_enslave(struct net_device *master, struct net_device *dev) +static void set_multicast_list(struct net_device *master) { - int err; +/* bonding_t *bond = master->priv; slave_t *slave; - if (dev->type != master->type) + for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { + slave->dev->mc_list = master->mc_list; + slave->dev->mc_count = master->mc_count; + slave->dev->flags = master->flags; + slave->dev->set_multicast_list(slave->dev); + } + */ +} + +/* + * This function counts the the number of attached + * slaves for use by bond_xmit_xor. + */ +static void update_slave_cnt(bonding_t *bond) +{ + slave_t *slave = NULL; + + bond->slave_cnt = 0; + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) { + bond->slave_cnt++; + } +} + +/* enslave device to bond device */ +static int bond_enslave(struct net_device *master_dev, + struct net_device *slave_dev) +{ + bonding_t *bond = NULL; + slave_t *new_slave = NULL; + unsigned long flags = 0; + int ndx = 0; + int err = 0; + + if (master_dev == NULL || slave_dev == NULL) { return -ENODEV; + } + bond = (struct bonding *) master_dev->priv; + + if (slave_dev->do_ioctl == NULL) { + printk(KERN_DEBUG + "Warning : no link monitoring support for %s\n", + slave_dev->name); + } + write_lock_irqsave(&bond->lock, flags); + + /* not running. */ + if ((slave_dev->flags & IFF_UP) != IFF_UP) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is not running\n"); +#endif + write_unlock_irqrestore(&bond->lock, flags); + return -EINVAL; + } - if ((slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) + /* already enslaved */ + if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, Device was already enslaved\n"); +#endif + write_unlock_irqrestore(&bond->lock, flags); + return -EBUSY; + } + + if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { + write_unlock_irqrestore(&bond->lock, flags); return -ENOMEM; + } + memset(new_slave, 0, sizeof(slave_t)); - memset(slave, 0, sizeof(slave_t)); + /* save flags before call to netdev_set_master */ + new_slave->original_flags = slave_dev->flags; + err = netdev_set_master(slave_dev, master_dev); - err = netdev_set_master(dev, master); if (err) { - kfree(slave); - return err; +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error %d calling netdev_set_master\n", err); +#endif + kfree(new_slave); + write_unlock_irqrestore(&bond->lock, flags); + return err; } - slave->dev = dev; + new_slave->dev = slave_dev; - spin_lock_bh(&master->xmit_lock); + /* + * queue to the end of the slaves list, make the first element its + * successor, the last one its predecessor, and make it the bond's + * predecessor. + */ + new_slave->prev = bond->prev; + new_slave->prev->next = new_slave; + bond->prev = new_slave; + new_slave->next = bond->next; + + new_slave->delay = 0; + new_slave->link_failure_count = 0; + + /* check for initial state */ + if ((miimon <= 0) || ((bond_check_dev_link(slave_dev) & MII_LINK_READY) + == MII_LINK_READY)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n"); +#endif + new_slave->link = BOND_LINK_UP; + } + else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_DOWN\n"); +#endif + new_slave->link = BOND_LINK_DOWN; + } - dev_hold(dev); + /* if we're in active-backup mode, we need one and only one active + * interface. The backup interfaces will have their NOARP flag set + * because we need them to be completely deaf and not to respond to + * any ARP request on the network to avoid fooling a switch. Thus, + * since we guarantee that current_slave always point to the last + * usable interface, we just have to verify this interface's flag. + */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + if (((bond->current_slave == NULL) + || (bond->current_slave->dev->flags & IFF_NOARP)) + && (new_slave->link == BOND_LINK_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This is the first active slave\n"); +#endif + /* first slave or no active slave yet, and this link + is OK, so make this interface the active one */ + bond->current_slave = new_slave; + bond_set_slave_active_flags(new_slave); + } + else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This is just a backup slave\n"); +#endif + bond_set_slave_inactive_flags(new_slave); + } + } else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This slave is always active in trunk mode\n"); +#endif + /* always active in trunk mode */ + new_slave->state = BOND_STATE_ACTIVE; + if (bond->current_slave == NULL) { + bond->current_slave = new_slave; + } + } - slave->prev = bond->prev; - slave->next = (slave_t*)bond; - slave->prev->next = slave; - slave->next->prev = slave; + update_slave_cnt(bond); - spin_unlock_bh(&master->xmit_lock); + write_unlock_irqrestore(&bond->lock, flags); + + /* + * !!! This is to support old versions of ifenslave. We can remove + * this in 2.5 because our ifenslave takes care of this for us. + * We check to see if the master has a mac address yet. If not, + * we'll give it the mac address of our slave device. + */ + for (ndx = 0; ndx < slave_dev->addr_len; ndx++) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Checking ndx=%d of master_dev->dev_addr\n", + ndx); +#endif + if (master_dev->dev_addr[ndx] != 0) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Found non-zero byte at ndx=%d\n", + ndx); +#endif + break; + } + } + if (ndx == slave_dev->addr_len) { + /* + * We got all the way through the address and it was + * all 0's. + */ +#ifdef BONDING_DEBUG + printk(KERN_CRIT "%s doesn't have a MAC address yet. ", + master_dev->name); + printk(KERN_CRIT "Going to give assign it from %s.\n", + slave_dev->name); +#endif + bond_sethwaddr(master_dev, slave_dev); + } + + printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n", + master_dev->name, slave_dev->name, + new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", + new_slave->link == BOND_LINK_UP ? "n up" : " down"); return 0; } -static int bond_release(struct net_device *master, struct net_device *dev) +/* + * This function changes the active slave to slave . + * It returns -EINVAL in the following cases. + * - is not found in the list. + * - There is not active slave now. + * - is already active. + * - The link state of is not BOND_LINK_UP. + * - is not running. + * In these cases, this fuction does nothing. + * In the other cases, currnt_slave pointer is changed and 0 is returned. + */ +static int bond_change_active(struct net_device *master_dev, struct net_device *slave_dev) { - bonding_t *bond = master->priv; + bonding_t *bond; slave_t *slave; + slave_t *oldactive = NULL; + slave_t *newactive = NULL; + unsigned long flags; + int ret = 0; - if (dev->master != master) - return -EINVAL; + if (master_dev == NULL || slave_dev == NULL) { + return -ENODEV; + } - for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { - if (slave->dev == dev) { - release_one_slave(master, slave); + bond = (struct bonding *) master_dev->priv; + write_lock_irqsave(&bond->lock, flags); + slave = (slave_t *)bond; + oldactive = bond->current_slave; + + while ((slave = slave->prev) != (slave_t *)bond) { + if(slave_dev == slave->dev) { + newactive = slave; break; } } + if ((newactive != NULL)&& + (oldactive != NULL)&& + (newactive != oldactive)&& + (newactive->link == BOND_LINK_UP)&& + IS_UP(newactive->dev)) { + bond_set_slave_inactive_flags(oldactive); + bond_set_slave_active_flags(newactive); + bond->current_slave = newactive; + printk("%s : activate %s(old : %s)\n", + master_dev->name, newactive->dev->name, + oldactive->dev->name); + } + else { + ret = -EINVAL; + } + write_unlock_irqrestore(&bond->lock, flags); + return ret; +} + +/* Choose a new valid interface from the pool, set it active + * and make it the current slave. If no valid interface is + * found, the oldest slave in BACK state is choosen and + * activated. If none is found, it's considered as no + * interfaces left so the current slave is set to NULL. + * The result is a pointer to the current slave. + * + * Since this function sends messages tails through printk, the caller + * must have started something like `printk(KERN_INFO "xxxx ");'. + * + * Warning: must put locks around the call to this function if needed. + */ +slave_t *change_active_interface(bonding_t *bond) +{ + slave_t *newslave, *oldslave; + slave_t *bestslave = NULL; + int mintime; + + read_lock(&bond->ptrlock); + newslave = oldslave = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (newslave == NULL) { /* there were no active slaves left */ + if (bond->next != (slave_t *)bond) { /* found one slave */ + write_lock(&bond->ptrlock); + newslave = bond->current_slave = bond->next; + write_unlock(&bond->ptrlock); + } else { + printk (" but could not find any %s interface.\n", + (mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); + write_lock(&bond->ptrlock); + bond->current_slave = (slave_t *)NULL; + write_unlock(&bond->ptrlock); + return NULL; /* still no slave, return NULL */ + } + } + + mintime = updelay; + + do { + if (IS_UP(newslave->dev)) { + if (newslave->link == BOND_LINK_UP) { + /* this one is immediately usable */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_active_flags(newslave); + printk (" and making interface %s the active one.\n", + newslave->dev->name); + } + else { + printk (" and setting pointer to interface %s.\n", + newslave->dev->name); + } + + write_lock(&bond->ptrlock); + bond->current_slave = newslave; + write_unlock(&bond->ptrlock); + return newslave; + } + else if (newslave->link == BOND_LINK_BACK) { + /* link up, but waiting for stabilization */ + if (newslave->delay < mintime) { + mintime = newslave->delay; + bestslave = newslave; + } + } + } + } while ((newslave = newslave->next) != oldslave); + + /* no usable backup found, we'll see if we at least got a link that was + coming back for a long time, and could possibly already be usable. + */ + + if (bestslave != NULL) { + /* early take-over. */ + printk (" and making interface %s the active one %d ms earlier.\n", + bestslave->dev->name, + (updelay - bestslave->delay)*miimon); + + bestslave->delay = 0; + bestslave->link = BOND_LINK_UP; + bond_set_slave_active_flags(bestslave); + + write_lock(&bond->ptrlock); + bond->current_slave = bestslave; + write_unlock(&bond->ptrlock); + return bestslave; + } + + printk (" but could not find any %s interface.\n", + (mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); + + /* absolutely nothing found. let's return NULL */ + write_lock(&bond->ptrlock); + bond->current_slave = (slave_t *)NULL; + write_unlock(&bond->ptrlock); + return NULL; +} + +/* + * Try to release the slave device from the bond device + * It is legal to access current_slave without a lock because all the function + * is write-locked. + * + * The rules for slave state should be: + * for Active/Backup: + * Active stays on all backups go down + * for Bonded connections: + * The first up interface should be left on and all others downed. + */ +static int bond_release(struct net_device *master, struct net_device *slave) +{ + bonding_t *bond; + slave_t *our_slave, *old_current; + unsigned long flags; + + if (master == NULL || slave == NULL) { + return -ENODEV; + } + + bond = (struct bonding *) master->priv; + + write_lock_irqsave(&bond->lock, flags); + + /* master already enslaved, or slave not enslaved, + or no slave for this master */ + if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) { + printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name); + write_unlock_irqrestore(&bond->lock, flags); + return -EINVAL; + } + + our_slave = (slave_t *)bond; + old_current = bond->current_slave; + while ((our_slave = our_slave->prev) != (slave_t *)bond) { + if (our_slave->dev == slave) { + bond_detach_slave(bond, our_slave); + + printk (KERN_INFO "%s: releasing %s interface %s", + master->name, + (our_slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", + slave->name); + + if (our_slave == old_current) { + /* find a new interface and be verbose */ + change_active_interface(bond); + } else { + printk(".\n"); + } + + /* release the slave from its bond */ + + netdev_set_master(slave, NULL); + + /* only restore its RUNNING flag if monitoring set it down */ + if (slave->flags & IFF_UP) { + slave->flags |= IFF_RUNNING; + } + + if (slave->flags & IFF_NOARP || + bond->current_slave != NULL) { + dev_close(slave); + } + + bond_restore_slave_flags(our_slave); + kfree(our_slave); + + if (bond->current_slave == NULL) { + printk(KERN_INFO + "%s: now running without any active interface !\n", + master->name); + } + + update_slave_cnt(bond); + + write_unlock_irqrestore(&bond->lock, flags); + return 0; /* deletion OK */ + } + } + + /* if we get here, it's because the device was not found */ + write_unlock_irqrestore(&bond->lock, flags); + + printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name); + return -EINVAL; +} + +/* + * This function releases all slaves. + * Warning: must put write-locks around the call to this function. + */ +static int bond_release_all(struct net_device *master) +{ + bonding_t *bond; + slave_t *our_slave; + struct net_device *slave_dev; + + if (master == NULL) { + return -ENODEV; + } + + if (master->flags & IFF_SLAVE) { + return -EINVAL; + } + + bond = (struct bonding *) master->priv; + bond->current_slave = NULL; + + while ((our_slave = bond->prev) != (slave_t *)bond) { + slave_dev = our_slave->dev; + bond->prev = our_slave->prev; + + kfree(our_slave); + + netdev_set_master(slave_dev, NULL); + + /* only restore its RUNNING flag if monitoring set it down */ + if (slave_dev->flags & IFF_UP) + slave_dev->flags |= IFF_RUNNING; + + if (slave_dev->flags & IFF_NOARP) + dev_close(slave_dev); + } + bond->next = (slave_t *)bond; + bond->slave_cnt = 0; + printk (KERN_INFO "%s: releases all slaves\n", master->name); + return 0; } -/* It is pretty silly, SIOCSIFHWADDR exists to make this. */ +/* this function is called regularly to monitor each slave's link. */ +static void bond_mii_monitor(struct net_device *master) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave, *bestslave, *oldcurrent; + unsigned long flags; + int slave_died = 0; + + read_lock_irqsave(&bond->lock, flags); + + /* we will try to read the link status of each of our slaves, and + * set their IFF_RUNNING flag appropriately. For each slave not + * supporting MII status, we won't do anything so that a user-space + * program could monitor the link itself if needed. + */ + + bestslave = NULL; + slave = (slave_t *)bond; + + read_lock(&bond->ptrlock); + oldcurrent = bond->current_slave; + read_unlock(&bond->ptrlock); + + while ((slave = slave->prev) != (slave_t *)bond) { + /* use updelay+1 to match an UP slave even when updelay is 0 */ + int mindelay = updelay + 1; + struct net_device *dev = slave->dev; + u16 link_state; + + link_state = bond_check_dev_link(dev); + + switch (slave->link) { + case BOND_LINK_UP: /* the link was up */ + if ((link_state & MII_LINK_UP) == MII_LINK_UP) { + /* link stays up, tell that this one + is immediately available */ + if (IS_UP(dev) && (mindelay > -2)) { + /* -2 is the best case : + this slave was already up */ + mindelay = -2; + bestslave = slave; + } + break; + } + else { /* link going down */ + slave->link = BOND_LINK_FAIL; + slave->delay = downdelay; + if (slave->link_failure_count < UINT_MAX) { + slave->link_failure_count++; + } + if (downdelay > 0) { + printk (KERN_INFO + "%s: link status down for %sinterface " + "%s, disabling it in %d ms.\n", + master->name, + IS_UP(dev) + ? ((mode == BOND_MODE_ACTIVEBACKUP) + ? ((slave == oldcurrent) + ? "active " : "backup ") + : "") + : "idle ", + dev->name, + downdelay * miimon); + } + } + /* no break ! fall through the BOND_LINK_FAIL test to + ensure proper action to be taken + */ + case BOND_LINK_FAIL: /* the link has just gone down */ + if ((link_state & MII_LINK_UP) == 0) { + /* link stays down */ + if (slave->delay <= 0) { + /* link down for too long time */ + slave->link = BOND_LINK_DOWN; + /* in active/backup mode, we must + completely disable this interface */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_inactive_flags(slave); + } + printk(KERN_INFO + "%s: link status definitely down " + "for interface %s, disabling it", + master->name, + dev->name); + + read_lock(&bond->ptrlock); + if (slave == bond->current_slave) { + read_unlock(&bond->ptrlock); + /* find a new interface and be verbose */ + change_active_interface(bond); + } else { + read_unlock(&bond->ptrlock); + printk(".\n"); + } + slave_died = 1; + } else { + slave->delay--; + } + } else if ((link_state & MII_LINK_READY) == MII_LINK_READY) { + /* link up again */ + slave->link = BOND_LINK_UP; + printk(KERN_INFO + "%s: link status up again after %d ms " + "for interface %s.\n", + master->name, + (downdelay - slave->delay) * miimon, + dev->name); + + if (IS_UP(dev) && (mindelay > -1)) { + /* -1 is a good case : this slave went + down only for a short time */ + mindelay = -1; + bestslave = slave; + } + } + break; + case BOND_LINK_DOWN: /* the link was down */ + if ((link_state & MII_LINK_READY) != MII_LINK_READY) { + /* the link stays down, nothing more to do */ + break; + } else { /* link going up */ + slave->link = BOND_LINK_BACK; + slave->delay = updelay; + + if (updelay > 0) { + /* if updelay == 0, no need to + advertise about a 0 ms delay */ + printk (KERN_INFO + "%s: link status up for interface" + " %s, enabling it in %d ms.\n", + master->name, + dev->name, + updelay * miimon); + } + } + /* no break ! fall through the BOND_LINK_BACK state in + case there's something to do. + */ + case BOND_LINK_BACK: /* the link has just come back */ + if ((link_state & MII_LINK_UP) == 0) { + /* link down again */ + slave->link = BOND_LINK_DOWN; + printk(KERN_INFO + "%s: link status down again after %d ms " + "for interface %s.\n", + master->name, + (updelay - slave->delay) * miimon, + dev->name); + } + else if ((link_state & MII_LINK_READY) == MII_LINK_READY) { + /* link stays up */ + if (slave->delay == 0) { + /* now the link has been up for long time enough */ + slave->link = BOND_LINK_UP; + + if (mode == BOND_MODE_ACTIVEBACKUP) { + /* prevent it from being the active one */ + slave->state = BOND_STATE_BACKUP; + } + else { + /* make it immediately active */ + slave->state = BOND_STATE_ACTIVE; + } + + printk(KERN_INFO + "%s: link status definitely up " + "for interface %s.\n", + master->name, + dev->name); + } + else + slave->delay--; + + /* we'll also look for the mostly eligible slave */ + if (IS_UP(dev) && (slave->delay < mindelay)) { + mindelay = slave->delay; + bestslave = slave; + } + } + break; + } /* end of switch */ + } /* end of while */ + + /* + * if there's no active interface and we discovered that one + * of the slaves could be activated earlier, so we do it. + */ + read_lock(&bond->ptrlock); + oldcurrent = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (oldcurrent == NULL) { /* no active interface at the moment */ + if (bestslave != NULL) { /* last chance to find one ? */ + if (bestslave->link == BOND_LINK_UP) { + printk (KERN_INFO + "%s: making interface %s the new active one.\n", + master->name, bestslave->dev->name); + } else { + printk (KERN_INFO + "%s: making interface %s the new " + "active one %d ms earlier.\n", + master->name, bestslave->dev->name, + (updelay - bestslave->delay) * miimon); + + bestslave->delay= 0; + bestslave->link = BOND_LINK_UP; + } + + if (mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_active_flags(bestslave); + } else { + bestslave->state = BOND_STATE_ACTIVE; + } + write_lock(&bond->ptrlock); + bond->current_slave = bestslave; + write_unlock(&bond->ptrlock); + } else if (slave_died) { + /* print this message only once a slave has just died */ + printk(KERN_INFO + "%s: now running without any active interface !\n", + master->name); + } + } + + read_unlock_irqrestore(&bond->lock, flags); + /* re-arm the timer */ + mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000)); +} + +/* + * this function is called regularly to monitor each slave's link + * insuring that traffic is being sent and received. If the adapter + * has been dormant, then an arp is transmitted to generate traffic + */ +static void bond_arp_monitor(struct net_device *master) +{ + bonding_t *bond; + unsigned long flags; + slave_t *slave; + int the_delta_in_ticks = arp_interval * HZ / 1000; + int next_timer = jiffies + (arp_interval * HZ / 1000); + + bond = (struct bonding *) master->priv; + if (master->priv == NULL) { + mod_timer(&bond->arp_timer, next_timer); + return; + } + + read_lock_irqsave(&bond->lock, flags); + + if (!IS_UP(master)) { + mod_timer(&bond->arp_timer, next_timer); + goto arp_monitor_out; + } + + + if (rtnl_shlock_nowait()) { + goto arp_monitor_out; + } + + if (rtnl_exlock_nowait()) { + rtnl_shunlock(); + goto arp_monitor_out; + } + + /* see if any of the previous devices are up now (i.e. they have seen a + * response from an arp request sent by another adapter, since they + * have the same hardware address). + */ + + slave = (slave_t *)bond; + while ((slave = slave->prev) != (slave_t *)bond) { + + read_lock(&bond->ptrlock); + if ( (!(slave->link == BOND_LINK_UP)) + && (slave!= bond->current_slave) ) { + + read_unlock(&bond->ptrlock); + + if ( ((jiffies - slave->dev->trans_start) <= + the_delta_in_ticks) && + ((jiffies - slave->dev->last_rx) <= + the_delta_in_ticks) ) { + + slave->link = BOND_LINK_UP; + write_lock(&bond->ptrlock); + if (bond->current_slave == NULL) { + slave->state = BOND_STATE_ACTIVE; + bond->current_slave = slave; + } + if (slave!=bond->current_slave) { + slave->dev->flags |= IFF_NOARP; + } + write_unlock(&bond->ptrlock); + } else { + if ((jiffies - slave->dev->last_rx) <= + the_delta_in_ticks) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, + arp_target, slave->dev, + my_ip, arp_target_hw_addr, + slave->dev->dev_addr, + arp_target_hw_addr); + } + } + } else + read_unlock(&bond->ptrlock); + } + + read_lock(&bond->ptrlock); + slave = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (slave != 0) { + + /* see if you need to take down the current_slave, since + * you haven't seen an arp in 2*arp_intervals + */ + + if ( ((jiffies - slave->dev->trans_start) >= + (2*the_delta_in_ticks)) || + ((jiffies - slave->dev->last_rx) >= + (2*the_delta_in_ticks)) ) { + + if (slave->link == BOND_LINK_UP) { + slave->link = BOND_LINK_DOWN; + slave->state = BOND_STATE_BACKUP; + /* + * we want to see arps, otherwise we couldn't + * bring the adapter back online... + */ + printk(KERN_INFO "%s: link status definitely " + "down for interface %s, " + "disabling it", + slave->dev->master->name, + slave->dev->name); + /* find a new interface and be verbose */ + change_active_interface(bond); + read_lock(&bond->ptrlock); + slave = bond->current_slave; + read_unlock(&bond->ptrlock); + } + } + + /* + * ok, we know up/down, so just send a arp out if there has + * been no activity for a while + */ + + if (slave != NULL ) { + if ( ((jiffies - slave->dev->trans_start) >= + the_delta_in_ticks) || + ((jiffies - slave->dev->last_rx) >= + the_delta_in_ticks) ) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, + arp_target, slave->dev, + my_ip, arp_target_hw_addr, + slave->dev->dev_addr, + arp_target_hw_addr); + } + } + + } + + /* if we have no current slave.. try sending + * an arp on all of the interfaces + */ + + read_lock(&bond->ptrlock); + if (bond->current_slave == NULL) { + read_unlock(&bond->ptrlock); + slave = (slave_t *)bond; + while ((slave = slave->prev) != (slave_t *)bond) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, arp_target, + slave->dev, my_ip, arp_target_hw_addr, + slave->dev->dev_addr, arp_target_hw_addr); + } + } + else { + read_unlock(&bond->ptrlock); + } + + rtnl_exunlock(); + rtnl_shunlock(); + +arp_monitor_out: + read_unlock_irqrestore(&bond->lock, flags); + + /* re-arm the timer */ + mod_timer(&bond->arp_timer, next_timer); +} + +#define isdigit(c) (c >= '0' && c <= '9') +__inline static int atoi( char **s) +{ +int i=0; +while (isdigit(**s)) + i = i*20 + *((*s)++) - '0'; +return i; +} + +#define isascii(c) (((unsigned char)(c))<=0x7f) +#define LF 0xA +#define isspace(c) (c==' ' || c==' '|| c==LF) +typedef uint32_t in_addr_t; + +int +my_inet_aton(char *cp, unsigned long *the_addr) { + static const in_addr_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; + in_addr_t val; + char c; + union iaddr { + uint8_t bytes[4]; + uint32_t word; + } res; + uint8_t *pp = res.bytes; + int digit,base; + + res.word = 0; + + c = *cp; + for (;;) { + /* + * Collect number up to ``.''. + * Values are specified as for C: + * 0x=hex, 0=octal, isdigit=decimal. + */ + if (!isdigit(c)) goto ret_0; + val = 0; base = 10; digit = 0; + for (;;) { + if (isdigit(c)) { + val = (val * base) + (c - '0'); + c = *++cp; + digit = 1; + } else { + break; + } + } + if (c == '.') { + /* + * Internet format: + * a.b.c.d + * a.b.c (with c treated as 16 bits) + * a.b (with b treated as 24 bits) + */ + if (pp > res.bytes + 2 || val > 0xff) { + goto ret_0; + } + *pp++ = val; + c = *++cp; + } else + break; + } + /* + * Check for trailing characters. + */ + if (c != '\0' && (!isascii(c) || !isspace(c))) { + goto ret_0; + } + /* + * Did we get a valid digit? + */ + if (!digit) { + goto ret_0; + } + + /* Check whether the last part is in its limits depending on + the number of parts in total. */ + if (val > max[pp - res.bytes]) { + goto ret_0; + } + + if (the_addr!= NULL) { + *the_addr = res.word | htonl (val); + } + + return (1); + +ret_0: + return (0); +} static int bond_sethwaddr(struct net_device *master, struct net_device *slave) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "bond_sethwaddr: master=%x\n", (unsigned int)master); + printk(KERN_CRIT "bond_sethwaddr: slave=%x\n", (unsigned int)slave); + printk(KERN_CRIT "bond_sethwaddr: slave->addr_len=%d\n", slave->addr_len); +#endif memcpy(master->dev_addr, slave->dev_addr, slave->addr_len); return 0; } -static int bond_ioctl(struct net_device *master, struct ifreq *ifr, int cmd) +static int bond_info_query(struct net_device *master, struct ifbond *info) { - struct net_device *slave = __dev_get_by_name(ifr->ifr_slave); + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave; + unsigned long flags; - if (slave == NULL) + info->bond_mode = mode; + info->num_slaves = 0; + info->miimon = miimon; + + read_lock_irqsave(&bond->lock, flags); + for (slave = bond->prev; slave!=(slave_t *)bond; slave = slave->prev) { + info->num_slaves++; + } + read_unlock_irqrestore(&bond->lock, flags); + + return 0; +} + +static int bond_slave_info_query(struct net_device *master, + struct ifslave *info) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave; + int cur_ndx = 0; + unsigned long flags; + + if (info->slave_id < 0) { return -ENODEV; + } - switch (cmd) { - case BOND_ENSLAVE: - return bond_enslave(master, slave); - case BOND_RELEASE: - return bond_release(master, slave); - case BOND_SETHWADDR: - return bond_sethwaddr(master, slave); - default: - return -EOPNOTSUPP; + read_lock_irqsave(&bond->lock, flags); + for (slave = bond->prev; + slave != (slave_t *)bond && cur_ndx < info->slave_id; + slave = slave->prev) { + cur_ndx++; } + read_unlock_irqrestore(&bond->lock, flags); + + if (cur_ndx == info->slave_id) { + strcpy(info->slave_name, slave->dev->name); + info->link = slave->link; + info->state = slave->state; + info->link_failure_count = slave->link_failure_count; + } else { + return -ENODEV; + } + + return 0; } -static int bond_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *slave = ptr; - - if (this_bond == NULL || - this_bond == slave || - this_bond != slave->master) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UNREGISTER: - bond_release(this_bond, slave); - break; +static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) +{ + struct net_device *slave_dev = NULL; + struct ifbond *u_binfo = NULL, k_binfo; + struct ifslave *u_sinfo = NULL, k_sinfo; + u16 *data = NULL; + int ret = 0; + +#ifdef BONDING_DEBUG + printk(KERN_INFO "bond_ioctl: master=%s, cmd=%d\n", + master_dev->name, cmd); +#endif + + switch (cmd) { + case SIOCGMIIPHY: + data = (u16 *)&ifr->ifr_data; + if (data == NULL) { + return -EINVAL; + } + data[0] = 0; + /* Fall Through */ + case SIOCGMIIREG: + /* + * We do this again just in case we were called by SIOCGMIIREG + * instead of SIOCGMIIPHY. + */ + data = (u16 *)&ifr->ifr_data; + if (data == NULL) { + return -EINVAL; + } + if (data[1] == 1) { + data[3] = bond_check_mii_link( + (struct bonding *)master_dev->priv); + } + return 0; + case BOND_INFO_QUERY_OLD: + case SIOCBONDINFOQUERY: + u_binfo = (struct ifbond *)ifr->ifr_data; + if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { + return -EFAULT; + } + ret = bond_info_query(master_dev, &k_binfo); + if (ret == 0) { + if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { + return -EFAULT; + } + } + return ret; + case BOND_SLAVE_INFO_QUERY_OLD: + case SIOCBONDSLAVEINFOQUERY: + u_sinfo = (struct ifslave *)ifr->ifr_data; + if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { + return -EFAULT; + } + ret = bond_slave_info_query(master_dev, &k_sinfo); + if (ret == 0) { + if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { + return -EFAULT; + } + } + return ret; } - return NOTIFY_DONE; + if (!capable(CAP_NET_ADMIN)) { + return -EPERM; + } + + slave_dev = dev_get_by_name(ifr->ifr_slave); + +#ifdef BONDING_DEBUG + printk(KERN_INFO "slave_dev=%x: \n", (unsigned int)slave_dev); + printk(KERN_INFO "slave_dev->name=%s: \n", slave_dev->name); +#endif + + if (slave_dev == NULL) { + ret = -ENODEV; + } else { + switch (cmd) { + case BOND_ENSLAVE_OLD: + case SIOCBONDENSLAVE: + ret = bond_enslave(master_dev, slave_dev); + break; + case BOND_RELEASE_OLD: + case SIOCBONDRELEASE: + ret = bond_release(master_dev, slave_dev); + break; + case BOND_SETHWADDR_OLD: + case SIOCBONDSETHWADDR: + ret = bond_sethwaddr(master_dev, slave_dev); + break; + case BOND_CHANGE_ACTIVE_OLD: + case SIOCBONDCHANGEACTIVE: + if (mode == BOND_MODE_ACTIVEBACKUP) { + ret = bond_change_active(master_dev, slave_dev); + } + else { + ret = -EINVAL; + } + break; + default: + ret = -EOPNOTSUPP; + } + dev_put(slave_dev); + } + return ret; } -static struct notifier_block bond_netdev_notifier={ - notifier_call: bond_event -}; +#ifdef CONFIG_NET_FASTROUTE +static int bond_accept_fastpath(struct net_device *dev, struct dst_entry *dst) +{ + return -1; +} +#endif -static int __init bond_init(struct net_device *dev) +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) { - bonding_t *bond; + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + unsigned long flags; - bond = kmalloc(sizeof(struct bonding), GFP_KERNEL); - if (bond == NULL) - return -ENOMEM; + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } - memset(bond, 0, sizeof(struct bonding)); - bond->next = (slave_t*)bond; - bond->prev = (slave_t*)bond; - bond->master = dev; - bond->current_slave = (slave_t*)bond; - dev->priv = bond; + read_lock_irqsave(&bond->lock, flags); - /* Initialize the device structure. */ - dev->hard_start_xmit = bond_xmit; - dev->get_stats = bond_get_stats; - dev->stop = bond_close; - dev->set_multicast_list = bond_set_multicast_list; - dev->do_ioctl = bond_ioctl; + read_lock(&bond->ptrlock); + slave = start_at = bond->current_slave; + read_unlock(&bond->ptrlock); - /* Fill in the fields of the device structure with ethernet-generic - values. */ - ether_setup(dev); - dev->tx_queue_len = 0; - dev->flags |= IFF_MASTER; + if (slave == NULL) { /* we're at the root, get the first slave */ + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } - this_bond = dev; + do { + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { - register_netdevice_notifier(&bond_netdev_notifier); + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + + write_lock(&bond->ptrlock); + bond->current_slave = slave->next; + write_unlock(&bond->ptrlock); + + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } + } while ((slave = slave->next) != start_at); + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); return 0; } -static int bond_xmit(struct sk_buff *skb, struct net_device *dev) +/* + * in XOR mode, we determine the output device by performing xor on + * the source and destination hw adresses. If this device is not + * enabled, find the next slave following this xor slave. + */ +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) { - bonding_t *bond = dev->priv; slave_t *slave, *start_at; - int pkt_len = skb->len; + struct bonding *bond = (struct bonding *) dev->priv; + unsigned long flags; + struct ethhdr *data = (struct ethhdr *)skb->data; + int slave_no; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } - slave = start_at = bond->current_slave; + read_lock_irqsave(&bond->lock, flags); + slave = bond->prev; + + /* we're at the root, get the first slave */ + if ((slave == NULL) || (slave->dev == NULL)) { + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } + + slave_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % bond->slave_cnt; + + while ( (slave_no > 0) && (slave != (slave_t *)bond) ) { + slave = slave->prev; + slave_no--; + } + start_at = slave; do { - if (slave == (slave_t*)bond) - continue; + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { - if (netif_running(slave->dev) && netif_carrier_ok(slave->dev)) { - bond->current_slave = slave->next; skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); - if (dev_queue_xmit(skb)) { - bond->stats.tx_dropped++; - } else { - bond->stats.tx_packets++; - bond->stats.tx_bytes += pkt_len; - } + read_unlock_irqrestore(&bond->lock, flags); return 0; } } while ((slave = slave->next) != start_at); - bond->stats.tx_dropped++; - kfree_skb(skb); + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; +} + +/* + * in active-backup mode, we know that bond->current_slave is always valid if + * the bond has a usable interface. + */ +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = (struct bonding *) dev->priv; + unsigned long flags; + int ret; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + /* if we are sending arp packets, try to at least + identify our own ip address */ + if ( (arp_interval > 0) && (my_ip==0) && + (skb->protocol == __constant_htons(ETH_P_ARP) ) ) { + char *the_ip = (((char *)skb->data)) + + sizeof(struct ethhdr) + + sizeof(struct arphdr) + + ETH_ALEN; + memcpy(&my_ip, the_ip, 4); + } + + /* if we are sending arp packets and don't know + the target hw address, save it so we don't need + to use a broadcast address */ + if ( (arp_interval > 0) && (arp_target_hw_addr==NULL) && + (skb->protocol == __constant_htons(ETH_P_IP) ) ) { + struct ethhdr *eth_hdr = + (struct ethhdr *) (((char *)skb->data)); + arp_target_hw_addr = kmalloc(ETH_ALEN, GFP_KERNEL); + memcpy(arp_target_hw_addr, eth_hdr->h_dest, ETH_ALEN); + } + + read_lock_irqsave(&bond->lock, flags); + + read_lock(&bond->ptrlock); + if (bond->current_slave != NULL) { /* one usable interface */ + skb->dev = bond->current_slave->dev; + read_unlock(&bond->ptrlock); + skb->priority = 1; + ret = dev_queue_xmit(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } + else { + read_unlock(&bond->ptrlock); + } + + /* no suitable interface, frame not sent */ +#ifdef BONDING_DEBUG + printk(KERN_INFO "There was no suitable interface, so we don't transmit\n"); +#endif + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); return 0; } static struct net_device_stats *bond_get_stats(struct net_device *dev) { bonding_t *bond = dev->priv; + struct net_device_stats *stats = bond->stats, *sstats; + slave_t *slave; + unsigned long flags; - return &bond->stats; + memset(bond->stats, 0, sizeof(struct net_device_stats)); + + read_lock_irqsave(&bond->lock, flags); + + for (slave = bond->prev; slave!=(slave_t *)bond; slave = slave->prev) { + sstats = slave->dev->get_stats(slave->dev); + + stats->rx_packets += sstats->rx_packets; + stats->rx_bytes += sstats->rx_bytes; + stats->rx_errors += sstats->rx_errors; + stats->rx_dropped += sstats->rx_dropped; + + stats->tx_packets += sstats->tx_packets; + stats->tx_bytes += sstats->tx_bytes; + stats->tx_errors += sstats->tx_errors; + stats->tx_dropped += sstats->tx_dropped; + + stats->multicast += sstats->multicast; + stats->collisions += sstats->collisions; + + stats->rx_length_errors += sstats->rx_length_errors; + stats->rx_over_errors += sstats->rx_over_errors; + stats->rx_crc_errors += sstats->rx_crc_errors; + stats->rx_frame_errors += sstats->rx_frame_errors; + stats->rx_fifo_errors += sstats->rx_fifo_errors; + stats->rx_missed_errors += sstats->rx_missed_errors; + + stats->tx_aborted_errors += sstats->tx_aborted_errors; + stats->tx_carrier_errors += sstats->tx_carrier_errors; + stats->tx_fifo_errors += sstats->tx_fifo_errors; + stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; + stats->tx_window_errors += sstats->tx_window_errors; + + } + + read_unlock_irqrestore(&bond->lock, flags); + return stats; } -static struct net_device dev_bond; +static int bond_get_info(char *buf, char **start, off_t offset, int length) +{ + bonding_t *bond = these_bonds; + int len = 0; + off_t begin = 0; + u16 link; + slave_t *slave = NULL; + unsigned long flags; + + while (bond != NULL) { + /* + * This function locks the mutex, so we can't lock it until + * afterwards + */ + link = bond_check_mii_link(bond); + + len += sprintf(buf + len, "Bonding Mode: "); + len += sprintf(buf + len, "%s\n", mode ? "active-backup" : "load balancing"); + + if (mode == BOND_MODE_ACTIVEBACKUP) { + read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->ptrlock); + if (bond->current_slave != NULL) { + len += sprintf(buf + len, + "Currently Active Slave: %s\n", + bond->current_slave->dev->name); + } + read_unlock(&bond->ptrlock); + read_unlock_irqrestore(&bond->lock, flags); + } + + len += sprintf(buf + len, "MII Status: "); + len += sprintf(buf + len, + link == MII_LINK_READY ? "up\n" : "down\n"); + len += sprintf(buf + len, "MII Polling Interval (ms): %d\n", + miimon); + len += sprintf(buf + len, "Up Delay (ms): %d\n", updelay); + len += sprintf(buf + len, "Down Delay (ms): %d\n", downdelay); + + read_lock_irqsave(&bond->lock, flags); + for (slave = bond->prev; slave != (slave_t *)bond; + slave = slave->prev) { + len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name); + + len += sprintf(buf + len, "MII Status: "); + + len += sprintf(buf + len, + slave->link == BOND_LINK_UP ? + "up\n" : "down\n"); + len += sprintf(buf + len, "Link Failure Count: %d\n", + slave->link_failure_count); + } + read_unlock_irqrestore(&bond->lock, flags); + + /* + * Figure out the calcs for the /proc/net interface + */ + *start = buf + (offset - begin); + len -= (offset - begin); + if (len > length) { + len = length; + } + if (len < 0) { + len = 0; + } + + + bond = bond->next_bond; + } + return len; +} + +static int bond_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct bonding *this_bond=(struct bonding *)these_bonds; + struct bonding *last_bond; + struct net_device *event_dev = (struct net_device *)ptr; + + /* while there are bonds configured */ + while (this_bond != NULL) { + if (this_bond == event_dev->priv ) { + switch (event) { + case NETDEV_UNREGISTER: + /* + * remove this bond from a linked list of + * bonds + */ + if (this_bond == these_bonds) { + these_bonds = this_bond->next_bond; + } else { + for (last_bond = these_bonds; + last_bond != NULL; + last_bond = last_bond->next_bond) { + if (last_bond->next_bond == + this_bond) { + last_bond->next_bond = + this_bond->next_bond; + } + } + } + return NOTIFY_DONE; + + default: + return NOTIFY_DONE; + } + } else if (this_bond->device == event_dev->master) { + switch (event) { + case NETDEV_UNREGISTER: + bond_release(this_bond->device, event_dev); + break; + } + return NOTIFY_DONE; + } + this_bond = this_bond->next_bond; + } + return NOTIFY_DONE; +} + +static struct notifier_block bond_netdev_notifier={ + bond_event, + NULL, + 0 +}; + +static int __init bond_init(struct net_device *dev) +{ + bonding_t *bond, *this_bond, *last_bond; + +#ifdef BONDING_DEBUG + printk (KERN_INFO "Begin bond_init for %s\n", dev->name); +#endif + bond = kmalloc(sizeof(struct bonding), GFP_KERNEL); + if (bond == NULL) { + return -ENOMEM; + } + memset(bond, 0, sizeof(struct bonding)); + + /* initialize rwlocks */ + rwlock_init(&bond->lock); + rwlock_init(&bond->ptrlock); + + bond->stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); + if (bond->stats == NULL) { + kfree(bond); + return -ENOMEM; + } + memset(bond->stats, 0, sizeof(struct net_device_stats)); + + bond->next = bond->prev = (slave_t *)bond; + bond->current_slave = NULL; + bond->device = dev; + dev->priv = bond; + + /* Initialize the device structure. */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + dev->hard_start_xmit = bond_xmit_activebackup; + } else if (mode == BOND_MODE_ROUNDROBIN) { + dev->hard_start_xmit = bond_xmit_roundrobin; + } else if (mode == BOND_MODE_XOR) { + dev->hard_start_xmit = bond_xmit_xor; + } else { + printk(KERN_ERR "Unknown bonding mode %d\n", mode); + kfree(bond->stats); + kfree(bond); + return -EINVAL; + } + + dev->get_stats = bond_get_stats; + dev->open = bond_open; + dev->stop = bond_close; + dev->set_multicast_list = set_multicast_list; + dev->do_ioctl = bond_ioctl; + + /* + * Fill in the fields of the device structure with ethernet-generic + * values. + */ + + ether_setup(dev); + + dev->tx_queue_len = 0; + dev->flags |= IFF_MASTER|IFF_MULTICAST; +#ifdef CONFIG_NET_FASTROUTE + dev->accept_fastpath = bond_accept_fastpath; +#endif + + printk(KERN_INFO "%s registered with", dev->name); + if (miimon > 0) { + printk(" MII link monitoring set to %d ms", miimon); + updelay /= miimon; + downdelay /= miimon; + } else { + printk("out MII link monitoring"); + } + printk(", in %s mode.\n",mode?"active-backup":"bonding"); + +#ifdef CONFIG_PROC_FS + bond->bond_proc_dir = proc_mkdir(dev->name, proc_net); + if (bond->bond_proc_dir == NULL) { + printk(KERN_ERR "%s: Cannot init /proc/net/%s/\n", + dev->name, dev->name); + kfree(bond->stats); + kfree(bond); + return -ENOMEM; + } + bond->bond_proc_info_file = + create_proc_info_entry("info", 0, bond->bond_proc_dir, + bond_get_info); + if (bond->bond_proc_info_file == NULL) { + printk(KERN_ERR "%s: Cannot init /proc/net/%s/info\n", + dev->name, dev->name); + remove_proc_entry(dev->name, proc_net); + kfree(bond->stats); + kfree(bond); + return -ENOMEM; + } +#endif /* CONFIG_PROC_FS */ + + if (first_pass == 1) { + these_bonds = bond; + register_netdevice_notifier(&bond_netdev_notifier); + first_pass = 0; + } else { + last_bond = these_bonds; + this_bond = these_bonds->next_bond; + while (this_bond != NULL) { + last_bond = this_bond; + this_bond = this_bond->next_bond; + } + last_bond->next_bond = bond; + } + + return 0; +} + +/* +static int __init bond_probe(struct net_device *dev) +{ + bond_init(dev); + return 0; +} + */ static int __init bonding_init(void) { - /* Find a name for this unit */ + int no; int err; - - dev_bond.init = bond_init; - err = dev_alloc_name(&dev_bond,"bond%d"); - if (err<0) - return err; - - SET_MODULE_OWNER(&dev_bond); - if (register_netdev(&dev_bond) != 0) - return -EIO; + /* Find a name for this unit */ + static struct net_device *dev_bond = NULL; + if (max_bonds < 1 || max_bonds > INT_MAX) { + printk(KERN_WARNING + "bonding_init(): max_bonds (%d) not in range %d-%d, " + "so it was reset to BOND_DEFAULT_MAX_BONDS (%d)", + max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); + max_bonds = BOND_DEFAULT_MAX_BONDS; + } + dev_bond = dev_bonds = kmalloc(max_bonds*sizeof(struct net_device), + GFP_KERNEL); + if (dev_bond == NULL) { + return -ENOMEM; + } + memset(dev_bonds, 0, max_bonds*sizeof(struct net_device)); + + if (arp_ip_target) { + if (my_inet_aton(arp_ip_target, &arp_target) == 0) { + arp_interval = 0; + } + } + + for (no = 0; no < max_bonds; no++) { + dev_bond->init = bond_init; + + err = dev_alloc_name(dev_bond,"bond%d"); + if (err < 0) { + kfree(dev_bonds); + return err; + } + SET_MODULE_OWNER(dev_bond); + if (register_netdev(dev_bond) != 0) { + kfree(dev_bonds); + return -EIO; + } + dev_bond++; + } return 0; } static void __exit bonding_exit(void) { - unregister_netdevice_notifier(&bond_netdev_notifier); + struct net_device *dev_bond = dev_bonds; + struct bonding *bond; + int no; - unregister_netdev(&dev_bond); + unregister_netdevice_notifier(&bond_netdev_notifier); + + for (no = 0; no < max_bonds; no++) { - kfree(dev_bond.priv); +#ifdef CONFIG_PROC_FS + bond = (struct bonding *) dev_bond->priv; + remove_proc_entry("info", bond->bond_proc_dir); + remove_proc_entry(dev_bond->name, proc_net); +#endif + unregister_netdev(dev_bond); + kfree(bond->stats); + kfree(dev_bond->priv); + + dev_bond->priv = NULL; + dev_bond++; + } + kfree(dev_bonds); } module_init(bonding_init); diff -Nru linux/drivers/net/bonding.c.orig linux-patched/drivers/net/bonding.c.orig --- linux/drivers/net/bonding.c.orig Thu Jan 1 01:00:00 1970 +++ linux-patched/drivers/net/bonding.c.orig Wed Aug 27 16:21:27 2003 @@ -0,0 +1,2109 @@ +/* + * originally based on the dummy device. + * + * Copyright 1999, Thomas Davis, tadavis@lbl.gov. + * Licensed under the GPL. Based on dummy.c, and eql.c devices. + * + * bonding.c: an Ethernet Bonding driver + * + * This is useful to talk to a Cisco EtherChannel compatible equipment: + * Cisco 5500 + * Sun Trunking (Solaris) + * Alteon AceDirector Trunks + * Linux Bonding + * and probably many L2 switches ... + * + * How it works: + * ifconfig bond0 ipaddress netmask up + * will setup a network device, with an ip address. No mac address + * will be assigned at this time. The hw mac address will come from + * the first slave bonded to the channel. All slaves will then use + * this hw mac address. + * + * ifconfig bond0 down + * will release all slaves, marking them as down. + * + * ifenslave bond0 eth0 + * will attach eth0 to bond0 as a slave. eth0 hw mac address will either + * a: be used as initial mac address + * b: if a hw mac address already is there, eth0's hw mac address + * will then be set from bond0. + * + * v0.1 - first working version. + * v0.2 - changed stats to be calculated by summing slaves stats. + * + * Changes: + * Arnaldo Carvalho de Melo + * - fix leaks on failure at bond_init + * + * 2000/09/30 - Willy Tarreau + * - added trivial code to release a slave device. + * - fixed security bug (CAP_NET_ADMIN not checked) + * - implemented MII link monitoring to disable dead links : + * All MII capable slaves are checked every milliseconds + * (100 ms seems good). This value can be changed by passing it to + * insmod. A value of zero disables the monitoring (default). + * - fixed an infinite loop in bond_xmit_roundrobin() when there's no + * good slave. + * - made the code hopefully SMP safe + * + * 2000/10/03 - Willy Tarreau + * - optimized slave lists based on relevant suggestions from Thomas Davis + * - implemented active-backup method to obtain HA with two switches: + * stay as long as possible on the same active interface, while we + * also monitor the backup one (MII link status) because we want to know + * if we are able to switch at any time. ( pass "mode=1" to insmod ) + * - lots of stress testings because we need it to be more robust than the + * wires ! :-> + * + * 2000/10/09 - Willy Tarreau + * - added up and down delays after link state change. + * - optimized the slaves chaining so that when we run forward, we never + * repass through the bond itself, but we can find it by searching + * backwards. Renders the deletion more difficult, but accelerates the + * scan. + * - smarter enslaving and releasing. + * - finer and more robust SMP locking + * + * 2000/10/17 - Willy Tarreau + * - fixed two potential SMP race conditions + * + * 2000/10/18 - Willy Tarreau + * - small fixes to the monitoring FSM in case of zero delays + * 2000/11/01 - Willy Tarreau + * - fixed first slave not automatically used in trunk mode. + * 2000/11/10 : spelling of "EtherChannel" corrected. + * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). + * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). + * + * 2001/1/3 - Chad N. Tindel + * - The bonding driver now simulates MII status monitoring, just like + * a normal network device. It will show that the link is down iff + * every slave in the bond shows that their links are down. If at least + * one slave is up, the bond's MII status will appear as up. + * + * 2001/2/7 - Chad N. Tindel + * - Applications can now query the bond from user space to get + * information which may be useful. They do this by calling + * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves + * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to + * get slave specific information (# link failures, etc). See + * for more details. The structs of interest + * are ifbond and ifslave. + * + * 2001/4/5 - Chad N. Tindel + * - Ported to 2.4 Kernel + * + * 2001/5/2 - Jeffrey E. Mast + * - When a device is detached from a bond, the slave device is no longer + * left thinking that is has a master. + * + * 2001/5/16 - Jeffrey E. Mast + * - memset did not appropriately initialized the bond rw_locks. Used + * rwlock_init to initialize to unlocked state to prevent deadlock when + * first attempting a lock + * - Called SET_MODULE_OWNER for bond device + * + * 2001/5/17 - Tim Anderson + * - 2 paths for releasing for slave release; 1 through ioctl + * and 2) through close. Both paths need to release the same way. + * - the free slave in bond release is changing slave status before + * the free. The netdev_set_master() is intended to change slave state + * so it should not be done as part of the release process. + * - Simple rule for slave state at release: only the active in A/B and + * only one in the trunked case. + * + * 2001/6/01 - Tim Anderson + * - Now call dev_close when releasing a slave so it doesn't screw up + * out routing table. + * + * 2001/6/01 - Chad N. Tindel + * - Added /proc support for getting bond and slave information. + * Information is in /proc/net//info. + * - Changed the locking when calling bond_close to prevent deadlock. + * + * 2001/8/05 - Janice Girouard + * - correct problem where refcnt of slave is not incremented in bond_ioctl + * so the system hangs when halting. + * - correct locking problem when unable to malloc in bond_enslave. + * - adding bond_xmit_xor logic. + * - adding multiple bond device support. + * + * 2001/8/13 - Erik Habbinga + * - correct locking problem with rtnl_exlock_nowait + * + * 2001/8/23 - Janice Girouard + * - bzero initial dev_bonds, to correct oops + * - convert SIOCDEVPRIVATE to new MII ioctl calls + * + * 2001/9/13 - Takao Indoh + * - Add the BOND_CHANGE_ACTIVE ioctl implementation + * + * 2001/9/14 - Mark Huth + * - Change MII_LINK_READY to not check for end of auto-negotiation, + * but only for an up link. + * + * 2001/9/20 - Chad N. Tindel + * - Add the device field to bonding_t. Previously the net_device + * corresponding to a bond wasn't available from the bonding_t + * structure. + * + * 2001/9/25 - Janice Girouard + * - add arp_monitor for active backup mode + * + * 2001/10/23 - Takao Indoh + * - Various memory leak fixes + * + * 2001/11/5 - Mark Huth + * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under + * certain hotswap conditions. + * Note: this same change may be required in bond_arp_monitor ??? + * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr + * - Handle hot swap ethernet interface deregistration events to remove + * kernel oops following hot swap of enslaved interface + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* monitor all links that often (in milliseconds). <=0 disables monitoring */ +#ifndef BOND_LINK_MON_INTERV +#define BOND_LINK_MON_INTERV 0 +#endif + +#undef MII_LINK_UP +#define MII_LINK_UP 0x04 + +#undef MII_ENDOF_NWAY +#define MII_ENDOF_NWAY 0x20 + +#undef MII_LINK_READY +/*#define MII_LINK_READY (MII_LINK_UP | MII_ENDOF_NWAY)*/ +#define MII_LINK_READY (MII_LINK_UP) + +#define MAX_BOND_ADDR 256 + +#ifndef BOND_LINK_ARP_INTERV +#define BOND_LINK_ARP_INTERV 0 +#endif + +static int arp_interval = BOND_LINK_ARP_INTERV; +static char *arp_ip_target = NULL; +static unsigned long arp_target = 0; +static u32 my_ip = 0; +char *arp_target_hw_addr = NULL; + +static int max_bonds = MAX_BONDS; +static int miimon = BOND_LINK_MON_INTERV; +static int mode = BOND_MODE_ROUNDROBIN; +static int updelay = 0; +static int downdelay = 0; + +static int first_pass = 1; +int bond_cnt; +static struct bonding *these_bonds = NULL; +static struct net_device *dev_bonds = NULL; + +MODULE_PARM(max_bonds, "1-" __MODULE_STRING(INT_MAX) "i"); +MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +MODULE_PARM(miimon, "i"); +MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); +MODULE_PARM(mode, "i"); +MODULE_PARM(arp_interval, "i"); +MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); +MODULE_PARM(arp_ip_target, "1-12s"); +MODULE_PARM_DESC(arp_ip_target, "arp target in n.n.n.n form"); +MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); +MODULE_PARM(updelay, "i"); +MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); +MODULE_PARM(downdelay, "i"); +MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); + +extern void arp_send( int type, int ptype, u32 dest_ip, struct net_device *dev, + u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, + unsigned char *target_hw); + +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev); +static struct net_device_stats *bond_get_stats(struct net_device *dev); +static void bond_mii_monitor(struct net_device *dev); +static void bond_arp_monitor(struct net_device *dev); +static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); +static void bond_set_slave_inactive_flags(slave_t *slave); +static void bond_set_slave_active_flags(slave_t *slave); +static int bond_enslave(struct net_device *master, struct net_device *slave); +static int bond_release(struct net_device *master, struct net_device *slave); +static int bond_release_all(struct net_device *master); +static int bond_sethwaddr(struct net_device *master, struct net_device *slave); + +/* + * bond_get_info is the interface into the /proc filesystem. This is + * a different interface than the BOND_INFO_QUERY ioctl. That is done + * through the generic networking ioctl interface, and bond_info_query + * is the internal function which provides that information. + */ +static int bond_get_info(char *buf, char **start, off_t offset, int length); + +/* #define BONDING_DEBUG 1 */ + +/* several macros */ + +#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ + (netif_running(dev) && netif_carrier_ok(dev))) + +static void bond_set_slave_inactive_flags(slave_t *slave) +{ + slave->state = BOND_STATE_BACKUP; + slave->dev->flags |= IFF_NOARP; +} + +static void bond_set_slave_active_flags(slave_t *slave) +{ + slave->state = BOND_STATE_ACTIVE; + slave->dev->flags &= ~IFF_NOARP; +} + +/* + * This function detaches the slave from the list . + * WARNING: no check is made to verify if the slave effectively + * belongs to . It returns in case it's needed. + * Nothing is freed on return, structures are just unchained. + * If the bond->current_slave pointer was pointing to , + * it's replaced with slave->next, or if not applicable. + */ +static slave_t *bond_detach_slave(bonding_t *bond, slave_t *slave) +{ + if ((bond == NULL) || (slave == NULL) || + ((void *)bond == (void *)slave)) { + printk(KERN_ERR + "bond_detach_slave(): trying to detach " + "slave %p from bond %p\n", bond, slave); + return slave; + } + + if (bond->next == slave) { /* is the slave at the head ? */ + if (bond->prev == slave) { /* is the slave alone ? */ + write_lock(&bond->ptrlock); + bond->current_slave = NULL; /* no slave anymore */ + write_unlock(&bond->ptrlock); + bond->prev = bond->next = (slave_t *)bond; + } else { /* not alone */ + bond->next = slave->next; + slave->next->prev = (slave_t *)bond; + bond->prev->next = slave->next; + + write_lock(&bond->ptrlock); + if (bond->current_slave == slave) { + bond->current_slave = slave->next; + } + write_unlock(&bond->ptrlock); + } + } + else { + slave->prev->next = slave->next; + if (bond->prev == slave) { /* is this slave the last one ? */ + bond->prev = slave->prev; + } else { + slave->next->prev = slave->prev; + } + + write_lock(&bond->ptrlock); + if (bond->current_slave == slave) { + bond->current_slave = slave->next; + } + write_unlock(&bond->ptrlock); + } + + return slave; +} + +/* + * if supports MII link status reporting, check its link + * and report it as a bit field in a short int : + * - 0x04 means link is up, + * - 0x20 means end of autonegociation + * If the device doesn't support MII, then we only report 0x24, + * meaning that the link is up and running since we can't check it. + */ +static u16 bond_check_dev_link(struct net_device *dev) +{ + static int (* ioctl)(struct net_device *, struct ifreq *, int); + struct ifreq ifr; + u16 *data = (u16 *)&ifr.ifr_data; + + /* data[0] automagically filled by the ioctl */ + data[1] = 1; /* MII location 1 reports Link Status */ + + if (((ioctl = dev->do_ioctl) != NULL) && /* ioctl to access MII */ + (ioctl(dev, &ifr, SIOCGMIIPHY) == 0)) { + /* now, data[3] contains info about link status : + - data[3] & 0x04 means link up + - data[3] & 0x20 means end of auto-negociation + */ + return data[3]; + } else { + return MII_LINK_READY; /* spoof link up ( we can't check it) */ + } +} + +static u16 bond_check_mii_link(bonding_t *bond) +{ + int has_active_interface = 0; + unsigned long flags; + + read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->ptrlock); + has_active_interface = (bond->current_slave != NULL); + read_unlock(&bond->ptrlock); + read_unlock_irqrestore(&bond->lock, flags); + + return (has_active_interface ? MII_LINK_READY : 0); +} + +static int bond_open(struct net_device *dev) +{ + struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; + struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; + MOD_INC_USE_COUNT; + + if (miimon > 0) { /* link check interval, in milliseconds. */ + init_timer(timer); + timer->expires = jiffies + (miimon * HZ / 1000); + timer->data = (unsigned long)dev; + timer->function = (void *)&bond_mii_monitor; + add_timer(timer); + } + + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + init_timer(arp_timer); + arp_timer->expires = jiffies + (arp_interval * HZ / 1000); + arp_timer->data = (unsigned long)dev; + arp_timer->function = (void *)&bond_arp_monitor; + add_timer(arp_timer); + } + return 0; +} + +static int bond_close(struct net_device *master) +{ + bonding_t *bond = (struct bonding *) master->priv; + //slave_t *slave; + unsigned long flags; + + write_lock_irqsave(&bond->lock, flags); + + if (miimon > 0) { /* link check interval, in milliseconds. */ + del_timer(&bond->mii_timer); + } + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + del_timer(&bond->arp_timer); + } + + /* Release the bonded slaves */ + bond_release_all(master); + + write_unlock_irqrestore(&bond->lock, flags); + + MOD_DEC_USE_COUNT; + return 0; +} + +static void set_multicast_list(struct net_device *master) +{ +/* + bonding_t *bond = master->priv; + slave_t *slave; + + for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { + slave->dev->mc_list = master->mc_list; + slave->dev->mc_count = master->mc_count; + slave->dev->flags = master->flags; + slave->dev->set_multicast_list(slave->dev); + } + */ +} + +/* + * This function counts the the number of attached + * slaves for use by bond_xmit_xor. + */ +static void update_slave_cnt(bonding_t *bond) +{ + slave_t *slave = NULL; + + bond->slave_cnt = 0; + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) { + bond->slave_cnt++; + } +} + +/* enslave device to bond device */ +static int bond_enslave(struct net_device *master_dev, + struct net_device *slave_dev) +{ + bonding_t *bond = NULL; + slave_t *new_slave = NULL; + unsigned long flags = 0; + int ndx = 0; + int err = 0; + + if (master_dev == NULL || slave_dev == NULL) { + return -ENODEV; + } + bond = (struct bonding *) master_dev->priv; + + if (slave_dev->do_ioctl == NULL) { + printk(KERN_DEBUG + "Warning : no link monitoring support for %s\n", + slave_dev->name); + } + write_lock_irqsave(&bond->lock, flags); + + /* not running. */ + if ((slave_dev->flags & IFF_UP) != IFF_UP) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is not running\n"); +#endif + write_unlock_irqrestore(&bond->lock, flags); + return -EINVAL; + } + + /* already enslaved */ + if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, Device was already enslaved\n"); +#endif + write_unlock_irqrestore(&bond->lock, flags); + return -EBUSY; + } + + if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { + write_unlock_irqrestore(&bond->lock, flags); + return -ENOMEM; + } + memset(new_slave, 0, sizeof(slave_t)); + + err = netdev_set_master(slave_dev, master_dev); + + if (err) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error %d calling netdev_set_master\n", err); +#endif + kfree(new_slave); + write_unlock_irqrestore(&bond->lock, flags); + return err; + } + + new_slave->dev = slave_dev; + + /* + * queue to the end of the slaves list, make the first element its + * successor, the last one its predecessor, and make it the bond's + * predecessor. + */ + new_slave->prev = bond->prev; + new_slave->prev->next = new_slave; + bond->prev = new_slave; + new_slave->next = bond->next; + + new_slave->delay = 0; + new_slave->link_failure_count = 0; + + /* check for initial state */ + if ((miimon <= 0) || ((bo