`
`ock.c
`
`:ock.c
`
`ker-
`~ the
`call
`
`3coi,
`e all
`ages
`ated
`lily
`
`rreq.c
`
`20.8
`
`raw_input Function 663
`
`last = O;
`for (rp = rawcb.rcb_next; rp !: &rawcb; rp : rp->rcb_next) {
`if (rp->rcb_proto.sp_family [= proto >sp_family)
`continue;
`if (rp->rcb_proto.sp_protocol &&
`rp->rcb_proto.sp_protocol !_ proto->sp_protocol)
`continue;
`
`* We assume the lower level routines have
`* placed the address in a canonical format
`* suitable for a structure comparison.
`
`61
`62
`63
`64
`65
`66
`67
`68
`69
`70
`71
`72
`* Note that if the lengths are not the same
`73
`* the comparison will fail at the first byte.
`74
`*/
`75
`76 ~define equal(al, a2) \
`(bcmp((caddr_t) (al), (caddr_t) (a2), al->sa_len) == 0)
`77
`78
`if (rp->rcb_laddr && !equal(rp->rcb_laddr, dst))
`continue;
`79
`if (rp->rcb_faddr && !equal(rp->rcb_faddr, src))
`80
`continue;
`81
`82
`if (last) {
`struct mbuf *n;
`83
`if (n = m_copy(m, 0, (int) M_COPYALL)) {
`84
`if (sbappendaddr(&last->so_rcv, src,
`85
`n, (struct mbuf *) 0) -= 0)
`86
`/* should notify about lost packet */
`87
`m_freem(n);
`88
`else {
`89
`sorwakeup(last);
`90
`sockets++;
`91
`92
`93
`94
`95
`96
`97
`98
`99
`i00
`i01
`102
`103
`104
`105
`106
`107 }
`
`}
`
`}
`
`}
`last = rp->rcb_socket;
`
`}
`if (last) {
`if (sbappendaddr(&last->so_rcv, src,
`m, (struct mbuf *) 0) == 0)
`
`m_freem(m);
`else {
`sorwakeup(last);
`sockets++;
`
`}
`} else
`m_freem(m);
`
`
`
`Figure 20.17 raw_input function: pass routing messages to 0 or more processes.
`
`raw_usrreq.c
`
`Ex.1013.689
`
`DELL
`
`
`
`664
`
`Routing Sockets
`
`Chapter 20
`
`51--61
`
`62-67
`
`68--81
`
`82--107
`
`In all four calls to raw_input that we’ve seen, the proto, src, and dst arguments
`are pointers to the three globals route_proto, route_src, and route_dst, which
`are declared and initialized as shown with Figure 19.26.
`Compare address family and protocol
`The for loop goes through every routing control block checking for a match. The ,
`family in the control block (normally PF_ROUTE)
`must match the family in the
`sockproto structure or the control block is skipped. Next, if the protocol in the control
`block (the third argument to socket) is nonzero, it must match the family in the
`sockproto structure, or the message is skipped. Hence a process that creates a routing
`socket with a protocol of 0 receives all routing messages.
`Compare local and foreign addresses ~:~
`
`These two tests compare the local address in the control block and the foreign
`address in the control block, if specified. Currently the process is unable to set the
`rcb laddr or rcb_faddr members of the control block. Normally a process would
`set ~e former with bind and the latter with connect, but that is not possible with
`routing sockets in Net/3. Instead, we’ll see that route_usrreq permanently connects
`the socket to the route_src socket address structure, which is OK since that is always
`the src argument to this function.
`Append message to socket receive buffer i..~.
`If last is nonnull, it points to the most recently seen socket structure that should
`receive this message. If this variable is nennull, a copy of the message is appended to
`that socket’s receive buffer by re_copy and sbappendaddr, and any processes waiting
`on this receive buffer are awakened. Then last is set to point to this socket that just
`matched the previous tests. The use of last is to avoid calling ra copy (an expensive
`operation) if only one process is to receive the message.
`If N processes are to receive the message, the first N - 1 receive a copy and the final
`one receives the message itself.
`The variable sockets that is incremented within this function is not used. Since it
`is incremented only when a message is passed to a process, if it is 0 at the end of the
`function it indicates that no process received the message (but the value isn’t stored
`anywhere).
`
`20.9
`
`route_usrreq Function
`
`route_usrreq is the routing protocol’s user-request function. It is called for a variety
`of operations. Figure 20.18 shows the function.
`
`rtsock.c
`
`64 int
`65 route_usrreq(so, req, m, nam, control)
`66 struct socket *so;
`req:
`67 int
`68 struct mbuf *m, *nam, *control;
`69 {
`
`Ex.1013.690
`
`DELL
`
`
`
`route_usrreq Function 665
`
`error : 0;
`int
`struct rawcb *rp = sotorawcb(so);
`s;
`int
`
`if (req :: PRU_ATTACH) {
`MALLOC(rp, struct rawcb * sizeof(*rp), M_PCB, M_WAITOK);
`if (so->so_pcb = (caddr_t) rp)
`bzero(so->so~cb, sizeof(*rp));
`
`}±
`
`f (req == PRU_DETACH && rp) {
`int af : rp->rcb~roto.sp~rotocol;
`if (af == AF_INET)
`route_cb.ip_count--;
`else if (af == AF_NS)
`route_cb.ns_count--;
`else if (af == AF_ISO)
`route_cb.iso_count--;
`route_cb.any_count--;
`
`}s
`
` = splnet();
`error : raw_usrreq(so, req, m, nam, control);
`rp = sotorawcb(so);
`if (req == PRU_ATTACH && rp) {
`int af = rp->rcb~roto.sp_protocol;
`if (error) {
`free((caddr_t) rp, M_PCB);
`splx(s);
`return (error);
`
`}i
`
`f (af == AF_INET)
`route_cb.ip_count++;
`else if (af == AF_NS)
`route_cb.ns_count++;
`else if (af == AF_ISO)
`route_cb.iso_count++;
`route_cb.any_count++;
`
`70
`71
`72
`
`73
`74
`75
`76
`77
`78
`79
`80
`81
`82
`83
`84
`85
`86
`87
`88
`89
`90
`91
`92
`93
`94
`95
`96
`97
`98
`99
`i00
`i01
`102
`103
`104
`
`105
`106
`107
`108
`109
`Ii0
`ill )
`
`rp->rcb_faddr : &route_src;
`soisconnected(so);
`so->so_options I= SO_USELOOPBACK;
`
`}
`splx(s);
`return {error);
`
`Figure 20.18 route_usrreq function: process PRU_xxx
`
`requests.
`
`rtsock.c
`
`64--77
`
`PRU_ATTACH: allocate control block
`
`The PRU_ATTACH request is issued when the process calls Memory is allo-
`socket.
`cated for a routing control block. The pointer returned by MALLOC is stored in the
`so_pcb member of the socket structure, and if the memory was allocated, the rawcb
`structure is set to 0.
`
`Ex.1013.691
`
`2O
`
`nts
`ich
`
`~he
`the
`:rol
`the
`[ng
`
`ign
`the
`uld
`rith
`_~cts
`ays
`
`,uld
`] to
`Ling
`just
`~ive
`
`inal
`
`:e it
`the
`)red
`
`:iety
`
`;ock.c
`
`DELL
`
`
`
`666
`
`Routing Sockets
`
`Chapter 20
`
`The function raw_usrreq is called to process the PRU_xxx request further.
`
`PRU_DETACH: decrement counters
`The close system call issues the PRU_DETACH request. If the socket structure
`points to a protocol control block, two of the counters in the route_cb structure are
`decremented: one is the any_count and one is based on the protocol.
`Process request
`
`Increment counters
`If the request is PRU_ATTACH and the socket points to a routing control block, a
`check is made for an error from raw_usrreq. Two of the counters in the route_cb
`structure are then incremented: one is the any_count and one is based on the protocol.
`Connect socket
`The foreign address in the routing control block is set to route_src. This perma-
`nently connects the new socket to receive routing messages from the PF_ROUTE family.
`
`Enable SO_USELOOPBACK by default
`socket option is enabled. This is a socket option that
`The SO_USELOOPBACK
`defaults to being enabled--all others default to being disabled.
`raw_usrreq Function
`
`78-87
`
`88-90
`
`91-104
`
`105-106
`
`107-111
`
`20.10
`
`raw_usrreq performs most of the processing for the user request in the routing
`domain. It was called by route_usrreq in the previous section. The reason the user-
`request processing is divided between these two functions is that other protocols (e.g.,
`the OSI CLNP) call raw_usrreq but not route_usrreq, raw_usrreq is not
`intended to be the pr_usrreq function for a protocol. Instead it is a common subrou-
`tine called by the various pr_usrreq functions.
`Figure 20.19 shows the beginning and end of the raw_usrreq function. The body
`of the swi t ch is discussed in separate figures following this figure.
`PRU_CONTROL requests invalid
`The PRU_CONTROL request is from the ioctl system call and is not supported in
`the routing domain.
`Control information invalid
`If control information was passed by the process (using the sendmsg system call)
`an error is returned, since the routing domain doesn’t use this optional information.
`Socket must have a control block
`If the socket structure doesn’t point to a routing control block, an error is
`returned. If a new socket is being created, it is the caller’s responsibility (i.e.,
`route_usrreq) to allocate this control block and store the pointer in the so_!~cb
`member before calling this function.
`The default for this switch catches two requests that are not handled by case
`statements: PRU_BIND and PRU_CONNECT. The code for these two requests is present
`but commented out in Net/3. Therefore issuing the bind or connect system calls on a
`
`119-129
`
`130-133
`
`134-137
`
`262-269
`
`Ex.1013.692
`
`DELL
`
`
`
`Ex.1013.693
`
`~pter 20
`
`20.10
`
`raw_usrreq Function
`
`667
`
`raw_usrreq.c
`
`119 int
`120 raw_usrreq(so, req, m, nam, control)
`121 struct socket *so;
`req;
`122 int
`123 struct mbuf *m, *nam, *control;
`124 {
`125
`126
`127
`
`struct rawcb *rp = sotorawcb(so);
`error = 0;
`int
`len;
`int
`
`if (req == PRU_CONTROL)
`return (EOPNOTSUPP);
`if (control && control->m_len)
`error = EOPNOTSUPP;
`goto release;
`
`}i
`
`f (rp == 0) {
`error = EINVAL
`goto release;
`
`]s
`
`witch (req) {
`
`128
`129
`130
`131
`132
`133
`134
`135
`136
`137
`138
`
`switch cases */
`
`default:
`panic("raw_usrreq");
`
`}
`release:
`if (m [: NULL)
`m_freem(m);
`return (error);
`
`262
`263
`264
`265
`266
`267
`268
`269 }
`
`
`
`Figure 20.19 Body of raw_usrreq function.
`
`raw_usrreq.c
`
`routing socket causes a kernel panic. This is a bug. Fortunately it requires a superuser
`process to create this type of socket.
`
`We now discuss the individual case statements. Figure 20.20 shows the processing
`
`for the PRU_ATTACH and PRU_DETACH requests.
`The PRU ATTACH request is a result of the socket system call. A routing socket
`must be created by a superuser process.
`The function raw_attach (Figure 20.24) links the control block into the doubly
`linked list. The ham argument is the third argument to socket and gets stored in the
`control block.
`is issued by the close system call. The test of a null rp pointer
`The PRU_DETACH
`is superfluous, since the test was already done before the switch statement.
`raw_detach (Figure 20.25) removes the control block from the doubly linked list.
`
`139--148
`
`149--150
`
`151--159
`
`160--161
`
`ructure
`ure are
`
`~lock, a
`~te_cb
`rotocol.
`
`perma-
`family.
`
`.on that
`
`routing
`:he user-
`ols (e.g.,
`t is not
`subrou-
`
`"he body
`
`)orted in
`
`,tern call)
`tion.
`
`error is
`ility (i.e.,
`¯ so_pcb
`
`by case
`is present
`calls on a
`
`DELL
`
`
`
`668
`
`Routing Sockets
`
`Chapter 20
`
`Section 2(3
`
`S
`
`22222222222222222222 22222 22222
`
`e
`
`- raw_usrreq.c
`
`186--188
`
`189--196
`
`197--202
`
`* Allocate a raw control block and fill in the
`* necessary info to allow packets to be routed to
`* the appropriate raw interface routine.
`*/
`case PRU_ATTACH:
`if ((so->so_state & SS_PRIV) == 0) {
`error = EACCES;
`break;
`
`}e
`
`rror = raw_attach(so, (int) ham);
`break;
`
`/*
`* Destroy state just before socket deallocation.
`* Flush data or not depending on the options.
`*/
`case PRU_DETACH:
`if (rp == 0) {
`error = ENOTCONN;
`break;
`
`]
`raw_detach(rp);
`break;
`
`- raw_usrreq.c
`
`Figure 20.20
`
`raw usrreq function: PRU ATTACH and PRU DETACH requests.
`
`139
`140
`141
`142
`143
`144
`145
`146
`147
`148
`149
`150
`
`151
`152
`153
`154
`155
`156
`157
`158
`159
`160
`161
`
`Figure 20.21 shows the processing of the
`and
`PRU_CONNECT2, PRU_DISCONNECT,
`PRU SHUTDOWN requests.
`
`raw_usrreq.c
`
`186
`187
`188
`
`189
`190
`191
`192
`193
`194
`195
`196
`
`197
`198
`199
`200
`201
`202
`
`case PRU_COIffNECT2 :
`error = EOPNOTSUPP;
`goto release;
`
`case PRU_DISCONNECT:
`if (rp->rcb_faddr == 0)
`error = ENOTCONN;
`break;
`
`raw_disconnect(rp);
`soisdisconnected(so);
`break;
`
`/*
`* Mark the connection as being incapable of further input.
`*/
`case PRU_SHUTDOWN:
`socantsendmore(so);
`break;
`
`~aw_usrreq.c
`
`203-21 7
`
`Figure20.21 raw_usrreqfunction:PRU_CONNECT2, PRU-DISCONNECT’and PRU_SHUTDOWNrequeStS.
`
`Ex.1013.694
`
`DELL
`
`
`
`Lpter 20
`
`.usrreq.c
`
`_usrreq.c
`
`,~T, and
`
`_usrreq.c
`
`}.10
`
`raw_us rreq Function
`
`669
`
`186-188
`
`189-196
`
`197-202
`
`The PRU_CONNECT2 request is from the socketpair system call and is not sup-
`ported in the routing domain.
`Since a routing socket is always connected (Figure 20.18), the PRU_DISCONNECT
`request is issued by close before the PRU_DETACH request. The socket must already
`be connected to a foreign address, which is always true for a routing socket.
`raw_di sconnect and soi sdi sconnected complete the processing.
`The PRU_SHUTDOWN request is from the shutdown system call when the argument
`specifies that no more writes will be performed on the socket, socantsendmore dis-
`ables further writes.
`
`The most common request for a routing socket, PRU_SEND, and the PRU_ABORT
`and PRU_SENSE requests are shown in Figure 20.22.
`
`203
`204
`205
`206
`207
`208
`209
`210
`211
`212
`213
`214
`215
`216
`217
`218
`219
`220
`221
`222
`
`223
`224
`225
`226
`227
`
`228
`229
`230
`231
`232
`
`raw_usrreq.c
`
`/*
`* Ship a packet out. The appropriate raw output
`* routine handles any massaging necessary.
`*/
`case PRU_SEND:
`if (nam) {
`if (rp->rcb_faddr) {
`error = EISCONN;
`break;
`
`}
`rp->rcb_faddr = mtod(nam, struct sockaddr *);
`} else if (rp->rcb_faddr == 0) {
`error = ENOTCONN;
`break;
`
`}
`error = (*so->so_proto->pr_output) (m, so);
`m : NULL;
`if (nam)
`rp->rcb_faddr = 0;
`break;
`
`case PRU_ABORT:
`raw_disconnect(rp);
`sofree(so);
`soisdisconnected(so);
`break;
`
`case PRU_SENSE:
`/*
`* stat: don’t bother with a blocksize.
`*/
`return (0);
`
`mw_usr~q.c
`Figure20.22 raw_usrreqfunction:PRU_SEND, PRU_ABORT, and PRU_SENSErequests.
`
`_usrreq.c
`~uests.
`
`203-21 7
`
`The PRU_SEND request is issued by sosend when the process writes to the socket.
`If a ham argument is specified, that is, the process specified a destination address using
`either sendto or sendmsg, an error is returned because route_usrreq always sets
`rcb_faddr for a routing socket.
`
`Ex.1013.695
`
`DELL
`
`
`
`670
`
`Routing Sockets
`
`Chapter 20
`
`The message in the mbuf chain pointed to by m is passed to the protocol’s
`pr_output function, which is
`route_output.
`If a PRU_ABORT request is issued, the control block is disconnected, the socket is
`released, and the socket is disconnected.
`The PRU_SENSE request is issued by the fstat system call. The function returns
`
`218-222
`
`223-227
`
`228-232
`
`
`
`
`
`Figure 20.23 shows the remaining pRu_xxx requests.
`
`¯ raw_usrreq.c
`
`/*
`* Not supported.
`*/
`case PRU_RCVOOB:
`case PRU_RCVD:
`return (EOPNOTSUPP);
`
`case PRU_LISTEN:
`case PRU_ACCEPT:
`case PRU_SENDOOB:
`error : EOPNOTSUPP;
`break;
`
`case PRU_SOCKADDR:
`if (rp->rcb_laddr :: 0) {
`error = EINVAL;
`break;
`
`}
`len = rp->rcb_laddr->sa_len;
`bcopy((caddr_t) rp->rcb_laddr, mtod(nam, caddr_t),
`nam->m_len : len;
`break;
`
`(unsigned) len);
`
`case PRU_PEERADDR:
`if (rp->rcb_faddr == 0)
`error = ENOTCONN;
`break;
`
`len = rp->rcb_faddr->sa_len;
`bcopy((caddr_t) rp->rcb_faddr, mtod(nam, caddr_t),
`nam->m_len = len;
`break;
`
`(unsigned) len);
`
`raw_usrreq.c
`
`Figure 20.23 raw usrreq function: final part.
`
`~;t<~
`
`244
`245
`246
`247
`248
`249
`250
`251
`252
`
`253
`254
`255
`256
`257
`258
`259
`260
`261
`
`233-243
`
`244-261
`
`These five requests are not supported.
`The PRU SOCKADDR and PRU PEERADDR requests are from the getsockname and 11~
`getpeername system calls respectively. The former always returns an error, since the
`bind system call, which sets the local address, is not supported in the routing domain.i~
`The latter always returns the contents of the socket address structure route_src~
`
`which was set by route_usrreq as the foreign address. ~:
`
`OK.
`
`233
`234
`235
`236
`237
`238
`
`239
`240
`241
`242
`243
`
`Sectic
`
`20.1
`
`49-
`
`65-
`
`68-
`
`75-
`
`Ex.1013.696
`
`DELL
`
`
`
`pter 2(]
`
`Section 20.11
`
`raw_attach, raw_detach, and raw_disconnect Functions 671
`
`tocol’s
`
`cket is
`
`’eturns
`
`.usrreq.c
`
`20.11
`
`Functions
`raw_attach, raw_detach, and raw_disconnect
`
`The raw_attach function, shown in Figure 20.24, was called by
`
`processing the PRU_ATTACH request.
`
`raw_input
`
`raw_cb.c
`
`49 int
`50 raw_attach(so, proto)
`51 struct socket *so;
`proto;
`52 int
`53 {
`54
`55
`
`struct rawcb *rp = sotorawcb(so);
`int
`error;
`
`56
`57
`58
`59
`60
`61
`62
`63
`64
`65
`66
`67
`68
`69
`7O }
`
`/*
`* It is assumed that raw_attach is called
`* after space has been allocated for the
`* rawcb.
`*/
`if (rp :: 0)
`return (ENOBUFS);
`if (error = soreserve(so, raw_sendspace, raw_recvspace))
`return (error);
`rp->rcb_socket = so;
`rp->rcb_proto.sp_family = so->so_proto->pr_domain >dom_family;
`rp->rcb_proto.sp_protocol = proto;
`insque(rp, &rawcb};
`return (0);
`
`raw_cb.c
`
`fen);
`
`Figure 20.24 raw_attach function.
`
`49--64
`
`65--67
`
`68-70
`
`The caller must have already allocated the raw protocol control block, soreserve
`sets the high-water marks for the send and receive buffers to 8192. This should be more
`than adequate for the routing messages.
`A pointer to the socket structure is stored in the protocol control block along with
`the dom_family (which is PF_ROUTE from Figure 20.1 for the routing domain) and the
`proto argument (which is the third argument to
`socket).
`insque adds the control block to the front of the doubly linked list headed by the
`global rawcb.
`
`75-84
`
`The raw_detach function, shown in Figure 20.25, was called by
`ish processing the
`request.
`PRU_DETACH
`The so_pcb pointer in the socket structure is set to null and the socket is released.
`The control block is removed from the doubly linked list by remque and the memory
`used for the control block is released by free.
`
`raw_input
`
`function, shown in Figure 20.26, was called by
`The raw_disconnect
`to process the PRU_DI SCONNECT and PRU_ABORT requests.
`If the socket does not reference a descriptor, raw_detach releases the socket and
`control block.
`
`88-94
`
`raw_input
`
`fen);
`
`,_usrreq.c
`
`~me and
`~ince the
`domain.
`ze_src,
`
`Ex.1013.697
`
`DELL
`
`
`
`672
`
`Routing Sockets
`
`75 void
`76 raw_detach(rp)
`77 struct rawcb *rp;
`78 {
`79 struct socket *so = rp->rcb_socket;
`
`so->so_pcb : 0;
`sofree(so);
`remque(rp);
`free((caddr_t)
`
`80
`81
`82
`83
`84 }
`
`(rp), M_PCB);
`
`Figure 20.25 raw_detach function.
`
`88 void
`89 raw_disconnect(rp)
`90 struct rawcb *rp;
`91 {
`
`92
`93
`94 ]
`
`if (rp->rcb_socket->so_state & SS_NOFDREF)
`raw_detach(rp);
`
`Figure 20.26 raw_disconnect function.
`
`Chapter 20
`
`raw_cb.c
`
`raw_cb.c
`
`raw_cb.c
`
`raw_cb.c
`
`20.12 Summary
`
`A routing socket is a raw socket in the PF_ROUTE domain. Routing sockets can be cre-
`ated only by a superuser process. If a nonprivileged process wants to read the routing
`information contained in the kernel, the sysctl system call supported by the routing
`domain can be used (we described this in the previous chapter).
`This chapter was our first encounter with the protocol control blocks (PCBs) that are
`normally associated with each socket. In the routing domain a special rawcb contains
`information about the routing socket: the local and foreign addresses, the address fam-
`ily, and the protocol. We’ll see in Chapter 22 that the larger Internet protocol control
`block (inpcb) is used with UDP, TCP, and raw IP sockets. The concepts are the same,
`however: the socket structure is used by the socket layer, and the PCB, a rawcb or an
`inpcb, is used by the protocol layer. The socket structure points to the PCB and vice
`versa. ¯
`The route output function handles the five routing requests that can be issued
`by a process, raw input delivers a routing message to one or more routing socket,
`
`depending on the%rotocol and address family. The various PRU_xxx requests for a
`socket are handled by raw_usrreq and route_usrreq. In later chapters.:.}$[{
`routing
`we’ll encounter additional xxx_usrreq
`functions, one per protocol (UDP, TCP, and raw
`IP), each consisting of a switch statement to handle each request.
`
`Ex.1013.698
`
`DELL
`
`
`
`~ter 20
`
`Exercises
`
`Exercises 673
`
`20.1 List two ways a process can receive the return value from when the pro-
`
`route_output
`cess writes a message to a routing socket. Which method is more reliable?
`20.2 What happens when a process specifies a nonzero protocol argument to the socket system
`call, since the pr_protocol member of the routesw structure is 0?
`20.3 Routes in the routing table (other than ARP entries) never time out. Implement a timeout
`on routes.
`
`2O
`
`:b.c
`
`:b.c
`
`:b.c
`
`cb.c
`
`_’re-
`ing
`ing
`
`are
`tins
`irn-
`trol
`me,
`’ all
`rice
`
`~ed
`ets,
`Dr a
`ters
`?aw
`
`Ex.1013.699
`
`DELL
`
`
`
`
`
`
`
`
`
`21.1
`
`
`
`21.2
`
`
`
`
`
`
`Ex.1013.700
`DELL Ex.1013.700
`
`DELL
`
`
`
`21
`
`ARP: Address Resolution
`Protocol
`
`21.1
`
`Introduction
`
`ARP, the Address Resolution Protocol, handles the translation of 32-bit IP addresses into
`the corresponding hardware address. For an Ethernet, the hardware addresses are
`48-bit Ethernet addresses. In this chapter we only consider mapping IP addresses into
`48-bit Ethernet addresses, although ARP is more general and can work with other types
`of data links. ARP is specified in RFC 826 [Plummet 1982].
`When a host has an IP datagram to send to another host on a locally attached Ether-
`
`net, the local host first looks up the destination host in the ARP cache,
`a table that maps a
`32-bit IP address into its corresponding 48-bit Ethernet address. If the entry is found for
`the destination, the corresponding Ethernet address is copied into the Ethernet header
`and the datagram is added to the appropriate interface’s output queue. If the entry is
`not found, the ARP functions hold onto the IP datagram, broadcast an ARP request ask-
`ing the destination host for its Ethernet address, and, when a reply is received, send the
`datagram to its destination.
`This simple overview handles the common case, but there are many details that we
`describe in this chapter as we examine the Net/3 implementation of ARP. Chapter 4 of
`Volume 1 contains additional ARP examples.
`
`21.2
`
`ARP and the Routing Table
`
`The Net/3 implementation of ARP is tied to the routing table, which is why we post-
`poned discussing ARP until we had described the structure of the Net/3 routing tables.
`Figure 21.1 shows an example that we use in this chapter when describing ARP.
`
`6?5
`
`Ex.1013.701
`
`DELL
`
`
`
`676
`
`ARP: Address Resolution Protocol
`
`llinfo_arp:
`
`Chapter 21
`
`llinfo ~rp{}
`la_next
`)rev
`la_rt
`hold
`la_asked
`
`llinfo_arp{}
`la_next
`3rev
`la_r/
`la_hold
`la_asked
`
`sockaddr_dl{}
`
`lIFT--ETHER ~Isdl_alen :0
`
`rtentrY{}
`rn_key =
`140.252.13.32
`
`~ rt_gateway
`rt_flags
`rt_ifP --
`~-rt_ifa
`
`sockaddr_dl {
`
`HER
`I sdl~alen = 6
`
`I~:0~20:3:f6:42
`rtentry{ }
`rn_key =
`140.252.13.33
`
`llinfo
`ateway
`rt_flags
`fP
`rt_ifa
`
`rmx_expire
`
`rmx_expire
`
`\
`\
`
`sockaddr_dl{}
`
`~:LINK
`
`ETHER
`alen =6
`~0:c2:9b:2~
`
`sockaddr_dl{}
`
`~_LINK
`
`T ETHER
`i alen =6
`0:~0:6f:2d:40
`
`rtentry{}
`rn_key =
`] 40.252.13.54
`
`rt_llinfo
`rt_gateway
`--~t_flags
`.rt_ifp
`rt_ifa
`
`rtentry{}
`rn_key =
`140.252.13.35
`
`-~_llinfo
`~t_gateway
`rt_flags
`~t_ifp
`~t_ifa
`
`rmx_exp i r e
`
`rmx_explre
`
`ifnet :
`
`~
`~
`
`~
`
`[
`
`softc [0]
`i f ne---~-{~-
`index=]
`
`
`arpcom{ } sl_softC (}
`
`sl_softc [0] :
`i fnet{ }
`index:2
`
`loif:
`ifnet{}
`index:3
`
`
`
`ifnet addrs:ifnet_a~rs :
`
`[
`
`le so
`
`ftc{ }
`
`~
`~~-~ AF LINK
`~_~~i le~
`
`i f addr { }
`
`i f addr { }
`AF_LINK
`slO
`
`ifaddr{}
`AF_LINK
`io0
`IFT_LOOP
`
`in ifaddr{}
`in i faddr{} ~ ~F3N
`1146-~52.13.66_
`1140.252.13.65_.
`
`~
`
`J---~1255"255"255"224
`
`-
`
`Figure 21.1 Relationship of ARP to routing table and interface structures.
`
`The entire figure corresponds to the example network used throughout the text
`ure 1.17). It shows the ARP entries on the system bsdi. The ifnet, ifaddr,
`in_ifaddr structures are simplified from Figures 3.32 and 6.5. We have
`some of the details from these three structures, which were covered in Chapters 3
`
`Ex.1013.702
`
`DELL
`
`
`
`".hapter 21
`
`Section 21.2
`
`ARP and the Routing Table 677
`
`For example, we don’t show the two sockaddr_dl structures that appear after each
`ifaddr structure--instead we summarize the information contained in these two
`structures. Similarly, we summarize the information contained in the three in_i faddr
`structures.
`We briefly summarize some relevant points from this figure, the details of which we
`cover as we proceed through the chapter.
`
`structures contains a minimal amount of
`A doubly linked list of llinfo_arp
`information for each hardware address known by ARE The global
`llinfo_arp is the head of this list. Not shown in this figure is that the
`la_prev pointer of the first entry points to the last entry, and the la_next
`pointer of the last entry points to the first entry. This linked list is processed by
`the ARP timer function every 5 minutes.
`For each IP address with a known hardware address, a routing table entry exists
`(an rtentry structure). The llinfo_arp structure points to the correspond-
`ing rtentry structure, and vice versa, using the la_rt and rt_llinfo point-
`ers. The three routing table entries in this figure with an associated
`llinfo_arp structure are for the hosts sun (140.252.13.33), svr4
`(140.252.13.34), and bsdi itself (140.252.13.35). These three are also shown in
`Figure 18.2.
`We show a fourth routing table entry on the left, without an 11 in f o_arp struc-
`ture, which is the entry for the network route to the local Ethernet
`(140.252.13.32). We show its rt_flags with the c bit on, since this entry is
`cloned to form the other three routing table entries. This entry is created by the
`call to rtinit when the IP address is assigned to the interface by in_ifinit
`(Figure 6.19). The other three entries are host entries (the H flag) and are gener-
`ated by ARP (the L flag) when a datagram is sent to that IP address.
`
`The rt_gateway member of the rtentry structure points to a sockaddr_dl
`structure. This data-link socket address structure contains the hardware address
`if the sdl_alen member equals 6.
`5. The rt_ifp member of the routing table entry points to the ifnet structure of
`the outgoing interface. Notice that the two routing table entries in the middle,
`for other hosts on the local Ethernet, both point to le_softc [0], but the rout-
`ing table entry on the right, for the host bsdi itself, points to the loopback struc-
`ture. Since rt_ifp, if_output (Figure 8.25) points to the output routine,
`packets sent to the local IP address are routed to the 1oopback interface.
`
`6. Each routing table entry also points to the corresponding in_i faddr structure.
`(Actually the rt_ifa member points to an i faddr structure, but recall from
`Figure 6.8 that the first member of an in_i faddr structure is an i faddr struc-
`ture.) We show only one of these pointers in the figure, although all four point
`to the same structure. Remember that a single interface, say le0, can have mul-
`fiple IP addresses, each with its own in_ifaddr structure, which is why the
`rt_i fa pointer is required in addition to the rt_i fp pointer.
`
`ire
`
`t(}
`~=3
`
`~r{}
`
`)P
`
`ae text (Fig-
`faddr, and
`ve removed
`~ters 3 and 6.
`
`Ex.1013.703
`
`DELL
`
`
`
`678
`
`ARP: Address Resolution Protocol
`
`Chapter 21
`
`7. The la_hold member is a pointer to an mbuf chain. An ARP request is broad-
`cast because a datagram is sent to that IP address. While the kernel awaits the
`ARP reply it holds onto the mbuf chain for the datagram by storing its address
`in la_hold. When the ARP reply is received, the mbuf chain pointed to by
`la holdis sent.
`8. Finally, we show the variable rrax_expire, which is in the rt_metrics struc-
`ture within the routing table entry. This value is the timer associated with each
`ARP entry. Some time after an ARP entry has been created (normally 20 min-
`utes) the ARP entry is deleted.
`
`Even though major routing table changes took place with 4.3BSD Reno, the ARP cache was left
`alone with 4.3BSD Reno and Net/2. 4.4BSD, however, removed the stand-alone ARP cache
`and moved the ARP information into the routing table.
`
`The ARP table in Net/2 was an array of structures composed of the following members: an IP
`address, an Ethernet address, a timer, flags, and a pointer to an mbuf (similar to the la_hold
`member in Figure 21.1). We see with Net/3 that the same information is now spread through-
`out multiple structures, all of which are linked.
`
`21.3 Code Introduction
`There are nine ARP functions in a single C file and definitions in two headers, as shown
`in Figure 21.2.
`
`File
`
`Description
`
`arphdr structure definition
`net / i f_arp, h
`various structure and constant definitions
`netinet / i f_ether, h
`net inet / i f_ether, c ARP functions
`
`Figure 21.2 Files discussed in this chapter.
`
`Figure 21.3 shows the relationship of the ARP functions to other kernel functions.
`In this figure we also show the relationship between the ARP functions and some of the
`routing functions from Chapter 19. We describe all these relationships as we proceed
`through the chapter.
`
`Global Variables
`
`Ten global variables are introduced in this chapter, which are shown in Figure 21.4.
`
`Ex.1013.704
`
`DELL
`
`
`
`~r 21
`
`)ad-
`the
`ross
`) by
`
`each
`min-
`
`as left
`cache
`
`anIP
`_hold
`"ough-
`
`~town
`
`_-tions.
`of the
`:oceed
`
`ion 21.3
`
`ifconfig
`
`Code Introduction 679
`
`arp program,
`[ routing~;ocket [
`
`~!
`
`....
`
`v.... ~
`
`~ .................
`
`kernel
`
`-/-
`
`~
`/m ~
`/~
`~
`~.0
`
`software interrupt
`when ARP request
`or reply received
`
`/ IRTM GET
`/ Ifor one ARP entry
`/ |
`
`~tnernet
`~ ~ ~
`.... .~ ~ ~
`oewce anvers R ~ ~
`
`xd~
`"9~ ~
`-~,~
`
`’
`
`L~
`~/~
`
`/
`
`5 minutes
`
`~ g~ ~
`
`~
`
`/
`
`~ /
`
`expiredl
`
`SOLVE
`
`~
`
`i fa_rtrequest function
`for all Ethernet devices
`
`Figure 21.3 Relationship of ARP functions to rest of kernel.
`
`Ex.1013.705
`
`DELL
`
`
`
`680
`
`ARP: Address Resolution Protocol
`
`Chapter 21
`
`Variable
`
`Datatype
`
`Description
`
`llinfo_arp
`
`arpintrq
`arpt_prune
`arpt_keep
`arpt_down
`arp_inuse
`arp_allocated
`arp_maxtries
`arpinit_done
`useloopback
`
`struct llinfo_arp
`
`struct i fqueue
`int
`int
`int
`int
`int
`int
`int
`int
`
`head of llinfo_arp doubly linked list (Figure 21.1)
`ARP input queue from Ethemet device drivers (Figure 4-9)
`
`#seconds between checking ARP list (5 x 60)
`#seconds ARP entry valid once resolved (20 x 60)
`#seconds between ARP flooding algorithm (20)
`#ARP entries currently tn use
`#ARP entries ever allocated
`max #tries for an IP address before pausing (5)
`initialization-performed flag
`use loopback for local host (default true)
`
`Figure 21.4 Global variables introduced in this chapter.
`
`Statistics
`
`The only statistics maintained by ARP are the two globals arp_inuse and
`arp_allocated, from Figure 21.4. The former counts the number of ARP entries cur-
`rently in use and the latter counts the total number of ARP entries allocated since the
`system was initialized. Neither counter is output by the netstat program, but they
`can be examined with a debuggen
`The entire ARP cache can be listed using the arp -a command, which uses the
`sysctl system call with the arguments shown in Figure 19.36. Figure 21.5 shows the
`output from this command, for the entries shown in Figure 18.2.
`
`bsdi $ arp -a
`sun.tuc.noao.edu (140.252.13.33) at 8:0:20:3:f6:42
`svr4.tuc.noao.edu (140.252.13.34) at 0:0:c0:c2:9b:26
`bsdi.tuc.noao.edu (140.252.13.35) at 0:0:c0:6f:2d:40 permanent
`ALL-SYSTEMS.MCAST.NET (224.0.0.1) at (incomplete)
`
`Figure 21.5 arp -a output corresponding to Figure 18.2.
`
`Since the multicast group 224.0.0.1 has the L flag set in Figure 18.2, and since the
`-+
`program looks for entries with the RTF_LLINFO flag set, the multicast groups are out
`put by the program. Later in this chapter we’ll see why this entry is marked as "incom*
`plete" and why the entry above it is "permanent+"
`
`SNMP Variables
`
`As described in Section 25.8 of Volume 1, the original SNMP MIB defined an
`translation group that was the system’s ARP cache. MIB-II deprecated this group
`instead each network protocol group (i.e., IP) contains its own address
`tables. Notice that the change in Net/2 to Net/3 from a stand-alone ARP table to
`integration of the ARP information within the IP routing table parallels this
`change.
`
`Ex.1013.706
`
`DELL
`
`
`
`hapter 21
`
`..1)
`gure 4.9)
`
`use and
`~tries cur-
`since the
`, but they
`
`~ uses the
`shows the
`
`ce the arp
`ps are out-
`as "ilacom-
`
`an address
`group and
`translation
`table to an
`this SNMP
`
`ARP Structures 681
`
`Figure 21.6 shows the IP address translation table from MIB-II, named
`±pNeVToMed±aTable. The values returned by SNMP for this table are taken from the
`routing table entry and its corresponding ± £net structure.
`
`IP address translation table, index = < ipNetToMedialflndex >.< ipNetToMediaNetAddress >
`
`Description
`Member
`Name
`corresponding interface:
`i fIndex
`if_index
`physical address
`rt_gateway
`IP address
`rt_key
`type of mapping: 1 = other, 2 = invalidated,
`rt_flags
`3 = dynamic, 4 = static (see text)
`
`ipNetToMediaIfIndex
`ipNetToMediaPhysAddress
`ipNetToMediaNetAddress
`ipNetToMediaType
`
`Figure 21.6 IP address translation table:
`ipNetToMediaTable.
`
`If the routing table entry has an expiration time of 0 it is considered permanent and
`hence "static." Otherwise the entry is considered "dynamic."
`
`21.4 ARP Structures
`
`Figure 21.7 shows the format of an ARP packet when transmitted on an Ethernet.
`
`-hardware type, ar_hrd (ARPHRD_ETHER)
`
`ether_type
`
`ether_dhost
`Ethernet
`d~tinafionaddr
`bytes
`
`ether_shost ~ ’
`Ethernet ~amJ
`sourceaddr ]typ~
`2 2
`6
`Ethernet header
`ether_header
`
`( }
`
`(- hardware length,
`ar_hln (6)
`
`iprotocol type, ar~oro (ETHERTYPE_IP)
`
`( (- protocol length, ar_p 1 n (4)
`~T ar_op arp_sha arp_spa arp_tha
`sender
`sender I
`
`I I I I I
`] I ] I
`2 1 1
`
`op
`
`I Ethernetaddr I IPaddr I Ethernetaddr
`4
`6
`6
`
`2
`
`arp_tpa
`
`I targettarget
`IPaddr
`4
`
`ARP header
`arphdr { }
`