diff -urNp linux-2.6.13/include/linux/tcp.h linux-2.6.13-sack-fastpath/include/linux/tcp.h --- linux-2.6.13/include/linux/tcp.h 2005-08-28 18:41:01.000000000 -0500 +++ linux-2.6.13-sack-fastpath/include/linux/tcp.h 2005-10-06 18:53:24.000000000 -0500 @@ -333,6 +333,22 @@ struct tcp_sock { __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; + /* from STCP, retrans queue hinting */ + struct sk_buff* mark_head_lost_skb_hint; + int mark_head_lost_cnt_hint; + + struct sk_buff* update_scoreboard_skb_hint; + + struct sk_buff* xmit_retransmit_queue_lost_skb_hint; + int xmit_retransmit_queue_lost_cnt_hint; + struct sk_buff* xmit_retransmit_queue_forward_skb_hint; + int xmit_retransmit_queue_forward_cnt_hint; + + /* from STCP, SACK fastpath */ + struct tcp_sack_block recv_sack_cache[4]; + int sackfastpath_facket_cnt_hint; + struct sk_buff* sackfastpath_skb_hint; + /* Two commonly used timers in both sender and receiver paths. */ unsigned long timeout; struct timer_list retransmit_timer; /* Resend (no ack) */ diff -urNp linux-2.6.13/include/net/sock.h linux-2.6.13-sack-fastpath/include/net/sock.h --- linux-2.6.13/include/net/sock.h 2005-08-28 18:41:01.000000000 -0500 +++ linux-2.6.13-sack-fastpath/include/net/sock.h 2005-10-06 18:54:48.000000000 -0500 @@ -1189,6 +1189,13 @@ static inline struct page *sk_stream_all (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ skb = skb->next) +/*from STCP for fast SACK Process*/ +#define sk_stream_for_retrans_queue_from(skb, skb_init, sk) \ + for (skb = (struct sk_buff *)(skb_init); \ + (skb != (sk)->sk_send_head) && \ + (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ + skb=skb->next) + /* * Default write policy as shown to user space via poll/select/SIGIO */ diff -urNp linux-2.6.13/include/net/tcp.h linux-2.6.13-sack-fastpath/include/net/tcp.h --- linux-2.6.13/include/net/tcp.h 2005-08-28 18:41:01.000000000 -0500 +++ linux-2.6.13-sack-fastpath/include/net/tcp.h 2005-10-06 18:56:25.000000000 -0500 @@ -1691,6 +1691,15 @@ static inline void tcp_mib_init(void) TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1); } +/*from STCP */ +static inline void clear_all_retrans_hints(struct tcp_sock *tp){ + tp->mark_head_lost_skb_hint = NULL; + tp->update_scoreboard_skb_hint = NULL; + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + tp->xmit_retransmit_queue_forward_skb_hint = NULL; + tp->sackfastpath_skb_hint = NULL; +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, diff -urNp linux-2.6.13/net/ipv4/tcp_input.c linux-2.6.13-sack-fastpath/net/ipv4/tcp_input.c --- linux-2.6.13/net/ipv4/tcp_input.c 2005-08-28 18:41:01.000000000 -0500 +++ linux-2.6.13-sack-fastpath/net/ipv4/tcp_input.c 2005-10-06 19:35:57.000000000 -0500 @@ -907,6 +907,7 @@ tcp_sacktag_write_queue(struct sock *sk, int prior_fackets; u32 lost_retrans = 0; int flag = 0; + int dup_sack = 0; int i; /* So, SACKs for already sent large segments will be lost. @@ -921,12 +922,25 @@ tcp_sacktag_write_queue(struct sock *sk, tp->fackets_out = 0; prior_fackets = tp->fackets_out; - for (i=0; istart_seq); - __u32 end_seq = ntohl(sp->end_seq); - int fack_count = 0; - int dup_sack = 0; + /* SACK fastpath: + * if the only SACK change is the increase of the end_seq of + * the first block then only apply that SACK block + * and use retrans queue hinting otherwise slowpath */ + flag = 1; + for ( i=0; irecv_sack_cache[i].start_seq != start_seq) + flag = 0; + }else { + if ((tp->recv_sack_cache[i].start_seq != start_seq) || + (tp->recv_sack_cache[i].end_seq != end_seq)) + flag = 0; + } + tp->recv_sack_cache[i].start_seq = start_seq; + tp->recv_sack_cache[i].end_seq = end_seq; /* Check for D-SACK. */ if (i == 0) { @@ -958,15 +972,57 @@ tcp_sacktag_write_queue(struct sock *sk, if (before(ack, prior_snd_una - tp->max_window)) return 0; } + } + + if(flag) { + num_sacks=1; + }else{ + int j; + tp->sackfastpath_skb_hint = NULL; + + /* order SACK blocks to allow in order walk of the retrans queue */ + for(i=num_sacks-1; i > 0; i--){ + for(j=0; jrecv_sack_cache[j+1].start_seq); + sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); + sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); + sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); + } + + } + } + } + + /* clear flag as used for different purpose in following code */ + flag = 0; + + for (i=0; istart_seq); + __u32 end_seq = ntohl(sp->end_seq); + int fack_count; + + /* Use SACK fastpath hint if valid */ + if( tp->sackfastpath_skb_hint != NULL){ + skb = tp->sackfastpath_skb_hint; + fack_count = tp->sackfastpath_facket_cnt_hint; + }else{ + skb = sk->sk_write_queue.next; + fack_count = 0; + } /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) flag |= FLAG_DATA_LOST; - sk_stream_for_retrans_queue(skb, sk) { + sk_stream_for_retrans_queue_from(skb, skb, sk) { u8 sacked = TCP_SKB_CB(skb)->sacked; int in_sack; + tp->sackfastpath_skb_hint = skb; + tp->sackfastpath_facket_cnt_hint = fack_count; + /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ @@ -1019,8 +1075,11 @@ tcp_sacktag_write_queue(struct sock *sk, TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= tcp_skb_pcount(skb); tp->retrans_out -= tcp_skb_pcount(skb); - } - } else { + + /* clear lost hint */ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + } + } else { /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ @@ -1031,6 +1090,9 @@ tcp_sacktag_write_queue(struct sock *sk, if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tp->lost_out -= tcp_skb_pcount(skb); + + /* clear lost hint */ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; } } @@ -1054,6 +1116,7 @@ tcp_sacktag_write_queue(struct sock *sk, (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); + tp->xmit_retransmit_queue_lost_skb_hint = NULL; } } } @@ -1081,6 +1144,9 @@ tcp_sacktag_write_queue(struct sock *sk, TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); + /* clear lost hint */ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -1187,6 +1253,8 @@ static void tcp_enter_frto_loss(struct s tcp_set_ca_state(tp, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); + + clear_all_retrans_hints(tp); } void tcp_clear_retrans(struct tcp_sock *tp) @@ -1251,6 +1319,8 @@ void tcp_enter_loss(struct sock *sk, int tcp_set_ca_state(tp, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); + + clear_all_retrans_hints(tp); } static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) @@ -1471,19 +1541,39 @@ static void tcp_mark_head_lost(struct so int packets, u32 high_seq) { struct sk_buff *skb; - int cnt = packets; + int cnt; - BUG_TRAP(cnt <= tp->packets_out); + BUG_TRAP(packets <= tp->packets_out); - sk_stream_for_retrans_queue(skb, sk) { - cnt -= tcp_skb_pcount(skb); - if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) - break; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - } - } + if ( tp->mark_head_lost_skb_hint != NULL ) { + skb = tp->mark_head_lost_skb_hint; + cnt = tp->mark_head_lost_cnt_hint; + }else{ + skb = sk->sk_write_queue.next; + cnt = 0; + } + + sk_stream_for_retrans_queue_from(skb, skb, sk) { + /* TODO: do this better */ + /* this is not the most efficient way to do this... */ + tp->mark_head_lost_skb_hint = skb; + tp->mark_head_lost_cnt_hint = cnt; + if (++cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + break; + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out++; + + /* clear xmit_retransmit_queue hints + * if this is beyond hint */ + if(tp->xmit_retransmit_queue_lost_skb_hint != NULL && + before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->xmit_retransmit_queue_lost_skb_hint)->seq) ){ + + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + } + } + } + tcp_sync_left_out(tp); } @@ -1508,13 +1598,32 @@ static void tcp_update_scoreboard(struct if (tcp_head_timedout(sk, tp)) { struct sk_buff *skb; - sk_stream_for_retrans_queue(skb, sk) { - if (tcp_skb_timedout(tp, skb) && - !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - } - } + if( tp->update_scoreboard_skb_hint != NULL ) { + skb = tp->update_scoreboard_skb_hint; + }else{ + skb = sk->sk_write_queue.next; + } + + sk_stream_for_retrans_queue_from(skb, skb, sk) { + if(tcp_skb_timedout(tp, skb)){ + if(!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)){ + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out++; + /* clear xmit_retrans hint */ + if(tp->xmit_retransmit_queue_lost_skb_hint != NULL && + before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->xmit_retransmit_queue_lost_skb_hint)->seq) ){ + + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + } + } + }else{ + break; + } + + } + + tp->update_scoreboard_skb_hint = skb; + tcp_sync_left_out(tp); } } @@ -1588,6 +1697,10 @@ static void tcp_undo_cwr(struct tcp_sock } tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; + + /* There is something screwy going on with the retrans hints after + an undo */ + clear_all_retrans_hints(tp); } static inline int tcp_may_undo(struct tcp_sock *tp) @@ -1671,6 +1784,9 @@ static int tcp_try_undo_loss(struct sock sk_stream_for_retrans_queue(skb, sk) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } + + clear_all_retrans_hints(tp); + DBGUNDO(sk, tp, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; @@ -2087,6 +2203,7 @@ static int tcp_clean_rtx_queue(struct so tcp_packets_out_dec(tp, skb); __skb_unlink(skb, skb->list); sk_stream_free_skb(sk, skb); + clear_all_retrans_hints(tp); } if (acked&FLAG_ACKED) { diff -urNp linux-2.6.13/net/ipv4/tcp_output.c linux-2.6.13-sack-fastpath/net/ipv4/tcp_output.c --- linux-2.6.13/net/ipv4/tcp_output.c 2005-08-28 18:41:01.000000000 -0500 +++ linux-2.6.13-sack-fastpath/net/ipv4/tcp_output.c 2005-10-06 19:28:32.000000000 -0500 @@ -434,6 +434,8 @@ static int tcp_fragment(struct sock *sk, int nsize; u16 flags; + clear_all_retrans_hints(tp); + nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -1237,6 +1239,9 @@ static void tcp_retrans_try_collapse(str BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); + /* changing transmit queue under us so clear hints */ + clear_all_retrans_hints(tp); + /* Ok. We will be able to collapse the packet. */ __skb_unlink(next_skb, next_skb->list); @@ -1306,6 +1311,8 @@ void tcp_simple_retransmit(struct sock * } } + clear_all_retrans_hints(tp); + if (!lost) return; @@ -1463,13 +1470,25 @@ void tcp_xmit_retransmit_queue(struct so { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int packet_cnt = tp->lost_out; + int packet_cnt; + if( tp->xmit_retransmit_queue_lost_skb_hint != NULL ){ + skb = tp->xmit_retransmit_queue_lost_skb_hint; + packet_cnt = tp->xmit_retransmit_queue_lost_cnt_hint; + }else{ + skb = sk->sk_write_queue.next; + packet_cnt = 0; + } + /* First pass: retransmit lost packets. */ - if (packet_cnt) { - sk_stream_for_retrans_queue(skb, sk) { + if (tp->lost_out) { + sk_stream_for_retrans_queue_from(skb, skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + /* we could do better than to assign each time */ + tp->xmit_retransmit_queue_lost_skb_hint = skb; + tp->xmit_retransmit_queue_lost_cnt_hint = packet_cnt; + /* Assume this retransmit will generate * only one packet for congestion window * calculation purposes. This works because @@ -1482,8 +1501,10 @@ void tcp_xmit_retransmit_queue(struct so if (sacked&TCPCB_LOST) { if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { - if (tcp_retransmit_skb(sk, skb)) - return; + if (tcp_retransmit_skb(sk, skb)){ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + return; + } if (tp->ca_state != TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else @@ -1494,8 +1515,8 @@ void tcp_xmit_retransmit_queue(struct so tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } - packet_cnt -= tcp_skb_pcount(skb); - if (packet_cnt <= 0) + packet_cnt += tcp_skb_pcount(skb); + if (packet_cnt >= tp->lost_out) break; } } @@ -1521,9 +1542,18 @@ void tcp_xmit_retransmit_queue(struct so if (tcp_may_send_now(sk, tp)) return; - packet_cnt = 0; - - sk_stream_for_retrans_queue(skb, sk) { + if ( tp->xmit_retransmit_queue_forward_skb_hint != NULL){ + skb = tp->xmit_retransmit_queue_forward_skb_hint; + packet_cnt = tp->xmit_retransmit_queue_forward_cnt_hint; + } else{ + skb = sk->sk_write_queue.next; + packet_cnt = 0; + } + + sk_stream_for_retrans_queue_from(skb, skb, sk) { + tp->xmit_retransmit_queue_forward_cnt_hint = packet_cnt; + tp->xmit_retransmit_queue_forward_skb_hint = skb; + /* Similar to the retransmit loop above we * can pretend that the retransmitted SKB * we send out here will be composed of one @@ -1540,8 +1570,10 @@ void tcp_xmit_retransmit_queue(struct so continue; /* Ok, retransmit it. */ - if (tcp_retransmit_skb(sk, skb)) - break; + if(tcp_retransmit_skb(sk, skb)){ + tp->xmit_retransmit_queue_forward_skb_hint = NULL; + break; + } if (skb == skb_peek(&sk->sk_write_queue)) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);