diff -urNp linux-2.6.13/include/linux/tcp.h linux-2.6.13-sack-fastpath/include/linux/tcp.h
--- linux-2.6.13/include/linux/tcp.h	2005-08-28 18:41:01.000000000 -0500
+++ linux-2.6.13-sack-fastpath/include/linux/tcp.h	2005-10-06 18:53:24.000000000 -0500
@@ -333,6 +333,22 @@ struct tcp_sock {
 	__u32	snd_cwnd_used;
 	__u32	snd_cwnd_stamp;
 
+        /* from STCP, retrans queue hinting */
+        struct sk_buff* mark_head_lost_skb_hint;
+        int             mark_head_lost_cnt_hint;
+
+        struct sk_buff* update_scoreboard_skb_hint;
+
+        struct sk_buff* xmit_retransmit_queue_lost_skb_hint;
+        int xmit_retransmit_queue_lost_cnt_hint;
+        struct sk_buff* xmit_retransmit_queue_forward_skb_hint;
+        int xmit_retransmit_queue_forward_cnt_hint;
+
+        /* from STCP, SACK fastpath */
+        struct tcp_sack_block recv_sack_cache[4];
+        int sackfastpath_facket_cnt_hint;
+        struct sk_buff* sackfastpath_skb_hint;
+
 	/* Two commonly used timers in both sender and receiver paths. */
 	unsigned long		timeout;
  	struct timer_list	retransmit_timer;	/* Resend (no ack)	*/
diff -urNp linux-2.6.13/include/net/sock.h linux-2.6.13-sack-fastpath/include/net/sock.h
--- linux-2.6.13/include/net/sock.h	2005-08-28 18:41:01.000000000 -0500
+++ linux-2.6.13-sack-fastpath/include/net/sock.h	2005-10-06 18:54:48.000000000 -0500
@@ -1189,6 +1189,13 @@ static inline struct page *sk_stream_all
 		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
 		     skb = skb->next)
 
+/*from STCP for fast SACK Process*/
+#define sk_stream_for_retrans_queue_from(skb, skb_init, sk) \
+                for (skb = (struct sk_buff *)(skb_init);                \
+                     (skb != (sk)->sk_send_head) &&                        \
+                     (skb != (struct sk_buff *)&(sk)->sk_write_queue);     \
+                     skb=skb->next)
+
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff -urNp linux-2.6.13/include/net/tcp.h linux-2.6.13-sack-fastpath/include/net/tcp.h
--- linux-2.6.13/include/net/tcp.h	2005-08-28 18:41:01.000000000 -0500
+++ linux-2.6.13-sack-fastpath/include/net/tcp.h	2005-10-06 18:56:25.000000000 -0500
@@ -1691,6 +1691,15 @@ static inline void tcp_mib_init(void)
 	TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
 }
 
+/*from STCP */
+static inline void clear_all_retrans_hints(struct tcp_sock *tp){
+        tp->mark_head_lost_skb_hint = NULL;
+        tp->update_scoreboard_skb_hint = NULL;
+        tp->xmit_retransmit_queue_lost_skb_hint = NULL;
+        tp->xmit_retransmit_queue_forward_skb_hint = NULL;
+        tp->sackfastpath_skb_hint = NULL;
+}
+
 /* /proc */
 enum tcp_seq_states {
 	TCP_SEQ_STATE_LISTENING,
diff -urNp linux-2.6.13/net/ipv4/tcp_input.c linux-2.6.13-sack-fastpath/net/ipv4/tcp_input.c
--- linux-2.6.13/net/ipv4/tcp_input.c	2005-08-28 18:41:01.000000000 -0500
+++ linux-2.6.13-sack-fastpath/net/ipv4/tcp_input.c	2005-10-06 19:35:57.000000000 -0500
@@ -907,6 +907,7 @@ tcp_sacktag_write_queue(struct sock *sk,
 	int prior_fackets;
 	u32 lost_retrans = 0;
 	int flag = 0;
+	int dup_sack = 0;
 	int i;
 
 	/* So, SACKs for already sent large segments will be lost.
@@ -921,12 +922,25 @@ tcp_sacktag_write_queue(struct sock *sk,
 		tp->fackets_out = 0;
 	prior_fackets = tp->fackets_out;
 
-	for (i=0; i<num_sacks; i++, sp++) {
-		struct sk_buff *skb;
-		__u32 start_seq = ntohl(sp->start_seq);
-		__u32 end_seq = ntohl(sp->end_seq);
-		int fack_count = 0;
-		int dup_sack = 0;
+        /* SACK fastpath:
+         * if the only SACK change is the increase of the end_seq of 
+         * the first block then only apply that SACK block 
+         * and use retrans queue hinting otherwise slowpath */
+        flag = 1;
+        for ( i=0; i<num_sacks; i++) {
+                __u32 start_seq = ntohl(sp[i].start_seq);
+                __u32 end_seq =  ntohl(sp[i].end_seq);
+                        
+                if(i == 0){
+			if (tp->recv_sack_cache[i].start_seq != start_seq)
+				flag = 0;                           
+                }else {
+			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
+                            (tp->recv_sack_cache[i].end_seq != end_seq))
+                        	flag = 0;
+                }
+                tp->recv_sack_cache[i].start_seq = start_seq;
+                tp->recv_sack_cache[i].end_seq = end_seq;
 
 		/* Check for D-SACK. */
 		if (i == 0) {
@@ -958,15 +972,57 @@ tcp_sacktag_write_queue(struct sock *sk,
 			if (before(ack, prior_snd_una - tp->max_window))
 				return 0;
 		}
+        }
+ 
+        if(flag) {
+                num_sacks=1;
+        }else{
+                int j;
+                tp->sackfastpath_skb_hint = NULL;
+
+                /* order SACK blocks to allow in order walk of the retrans queue */
+                for(i=num_sacks-1; i > 0; i--){
+                        for(j=0; j<i; j++){
+                                if(after(ntohl(sp[j].start_seq), ntohl(sp[j+1].start_seq))){
+                                        sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
+                                        sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
+                                        sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
+                                        sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+                                }
+
+                        }
+                }
+        }
+
+        /* clear flag as used for different purpose in following code */
+        flag = 0;
+
+        for (i=0; i<num_sacks; i++, sp++) {
+                struct sk_buff *skb;
+                __u32 start_seq = ntohl(sp->start_seq);
+                __u32 end_seq = ntohl(sp->end_seq);
+                int fack_count;
+
+                /* Use SACK fastpath hint if valid */
+                if( tp->sackfastpath_skb_hint != NULL){
+                        skb = tp->sackfastpath_skb_hint;
+                        fack_count = tp->sackfastpath_facket_cnt_hint;
+                }else{
+                        skb = sk->sk_write_queue.next;
+                        fack_count = 0;
+                }
 
 		/* Event "B" in the comment above. */
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		sk_stream_for_retrans_queue_from(skb, skb, sk) {
 			u8 sacked = TCP_SKB_CB(skb)->sacked;
 			int in_sack;
 
+                        tp->sackfastpath_skb_hint = skb;
+                        tp->sackfastpath_facket_cnt_hint = fack_count;
+
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
@@ -1019,8 +1075,11 @@ tcp_sacktag_write_queue(struct sock *sk,
 						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
 						tp->lost_out -= tcp_skb_pcount(skb);
 						tp->retrans_out -= tcp_skb_pcount(skb);
-					}
-				} else {
+
+                                                /* clear lost hint */
+                                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
+                                        }
+                                } else {
 					/* New sack for not retransmitted frame,
 					 * which was in hole. It is reordering.
 					 */
@@ -1031,6 +1090,9 @@ tcp_sacktag_write_queue(struct sock *sk,
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 						tp->lost_out -= tcp_skb_pcount(skb);
+
+                                                /* clear lost hint */
+                                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
 					}
 				}
 
@@ -1054,6 +1116,7 @@ tcp_sacktag_write_queue(struct sock *sk,
 			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
+                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
 			}
 		}
 	}
@@ -1081,6 +1144,9 @@ tcp_sacktag_write_queue(struct sock *sk,
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 
+                                /* clear lost hint */
+                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
+
 				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 					tp->lost_out += tcp_skb_pcount(skb);
 					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1187,6 +1253,8 @@ static void tcp_enter_frto_loss(struct s
 	tcp_set_ca_state(tp, TCP_CA_Loss);
 	tp->high_seq = tp->frto_highmark;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1251,6 +1319,8 @@ void tcp_enter_loss(struct sock *sk, int
 	tcp_set_ca_state(tp, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
@@ -1471,19 +1541,39 @@ static void tcp_mark_head_lost(struct so
 			       int packets, u32 high_seq)
 {
 	struct sk_buff *skb;
-	int cnt = packets;
+	int cnt;
 
-	BUG_TRAP(cnt <= tp->packets_out);
+	BUG_TRAP(packets <= tp->packets_out);
 
-	sk_stream_for_retrans_queue(skb, sk) {
-		cnt -= tcp_skb_pcount(skb);
-		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
-			break;
-		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
-			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-			tp->lost_out += tcp_skb_pcount(skb);
-		}
-	}
+        if ( tp->mark_head_lost_skb_hint != NULL ) {
+                skb = tp->mark_head_lost_skb_hint;
+                cnt = tp->mark_head_lost_cnt_hint;
+        }else{
+                skb = sk->sk_write_queue.next;
+		cnt = 0;
+        }
+
+        sk_stream_for_retrans_queue_from(skb, skb, sk) {
+                /* TODO: do this better */
+                /* this is not the most efficient way to do this... */
+                tp->mark_head_lost_skb_hint = skb;
+                tp->mark_head_lost_cnt_hint = cnt;
+                if (++cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+                        break;
+                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                        tp->lost_out++;
+
+                        /* clear xmit_retransmit_queue hints 
+                         *  if this is beyond hint */
+                        if(tp->xmit_retransmit_queue_lost_skb_hint != NULL &&
+                           before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->xmit_retransmit_queue_lost_skb_hint)->seq) ){
+
+                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
+                        }
+                }
+        }
+ 
 	tcp_sync_left_out(tp);
 }
 
@@ -1508,13 +1598,32 @@ static void tcp_update_scoreboard(struct
 	if (tcp_head_timedout(sk, tp)) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
-			if (tcp_skb_timedout(tp, skb) &&
-			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
-				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				tp->lost_out += tcp_skb_pcount(skb);
-			}
-		}
+                if( tp->update_scoreboard_skb_hint != NULL ) {
+                        skb = tp->update_scoreboard_skb_hint;
+                }else{
+                        skb = sk->sk_write_queue.next;
+                }
+
+                sk_stream_for_retrans_queue_from(skb, skb, sk) {
+                        if(tcp_skb_timedout(tp, skb)){
+                                if(!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)){
+                                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                                        tp->lost_out++;
+                                        /* clear xmit_retrans hint */
+                                        if(tp->xmit_retransmit_queue_lost_skb_hint != NULL &&
+                                           before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->xmit_retransmit_queue_lost_skb_hint)->seq) ){
+
+                                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
+                                        }
+                                }
+                        }else{
+                                break;
+                        }
+
+                }
+
+                tp->update_scoreboard_skb_hint = skb;
+
 		tcp_sync_left_out(tp);
 	}
 }
@@ -1588,6 +1697,10 @@ static void tcp_undo_cwr(struct tcp_sock
 	}
 	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+        /* There is something screwy going on with the retrans hints after
+           an undo */
+        clear_all_retrans_hints(tp);
 }
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1671,6 +1784,9 @@ static int tcp_try_undo_loss(struct sock
 		sk_stream_for_retrans_queue(skb, sk) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
+
+                clear_all_retrans_hints(tp);
+
 		DBGUNDO(sk, tp, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
@@ -2087,6 +2203,7 @@ static int tcp_clean_rtx_queue(struct so
 		tcp_packets_out_dec(tp, skb);
 		__skb_unlink(skb, skb->list);
 		sk_stream_free_skb(sk, skb);
+        	clear_all_retrans_hints(tp);
 	}
 
 	if (acked&FLAG_ACKED) {
diff -urNp linux-2.6.13/net/ipv4/tcp_output.c linux-2.6.13-sack-fastpath/net/ipv4/tcp_output.c
--- linux-2.6.13/net/ipv4/tcp_output.c	2005-08-28 18:41:01.000000000 -0500
+++ linux-2.6.13-sack-fastpath/net/ipv4/tcp_output.c	2005-10-06 19:28:32.000000000 -0500
@@ -434,6 +434,8 @@ static int tcp_fragment(struct sock *sk,
 	int nsize;
 	u16 flags;
 
+        clear_all_retrans_hints(tp);
+
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -1237,6 +1239,9 @@ static void tcp_retrans_try_collapse(str
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
+                /* changing transmit queue under us so clear hints */
+                clear_all_retrans_hints(tp);
+
 		/* Ok.  We will be able to collapse the packet. */
 		__skb_unlink(next_skb, next_skb->list);
 
@@ -1306,6 +1311,8 @@ void tcp_simple_retransmit(struct sock *
 		}
 	}
 
+        clear_all_retrans_hints(tp);
+
 	if (!lost)
 		return;
 
@@ -1463,13 +1470,25 @@ void tcp_xmit_retransmit_queue(struct so
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int packet_cnt = tp->lost_out;
+	int packet_cnt;
 
+        if( tp->xmit_retransmit_queue_lost_skb_hint != NULL ){
+                skb = tp->xmit_retransmit_queue_lost_skb_hint;
+                packet_cnt = tp->xmit_retransmit_queue_lost_cnt_hint;
+        }else{
+                skb = sk->sk_write_queue.next;
+                packet_cnt = 0;
+        }
+ 
 	/* First pass: retransmit lost packets. */
-	if (packet_cnt) {
-		sk_stream_for_retrans_queue(skb, sk) {
+       if (tp->lost_out) {
+                sk_stream_for_retrans_queue_from(skb, skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+                        /* we could do better than to assign each time */
+                        tp->xmit_retransmit_queue_lost_skb_hint = skb;
+                        tp->xmit_retransmit_queue_lost_cnt_hint = packet_cnt;
+
 			/* Assume this retransmit will generate
 			 * only one packet for congestion window
 			 * calculation purposes.  This works because
@@ -1482,8 +1501,10 @@ void tcp_xmit_retransmit_queue(struct so
 
 			if (sacked&TCPCB_LOST) {
 				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-					if (tcp_retransmit_skb(sk, skb))
-						return;
+                                        if (tcp_retransmit_skb(sk, skb)){
+                                                tp->xmit_retransmit_queue_lost_skb_hint = NULL;
+                                                return;
+                                        }
 					if (tp->ca_state != TCP_CA_Loss)
 						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
 					else
@@ -1494,8 +1515,8 @@ void tcp_xmit_retransmit_queue(struct so
 						tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 				}
 
-				packet_cnt -= tcp_skb_pcount(skb);
-				if (packet_cnt <= 0)
+				packet_cnt += tcp_skb_pcount(skb);
+				if (packet_cnt >= tp->lost_out)
 					break;
 			}
 		}
@@ -1521,9 +1542,18 @@ void tcp_xmit_retransmit_queue(struct so
 	if (tcp_may_send_now(sk, tp))
 		return;
 
-	packet_cnt = 0;
-
-	sk_stream_for_retrans_queue(skb, sk) {
+        if ( tp->xmit_retransmit_queue_forward_skb_hint != NULL){
+                skb = tp->xmit_retransmit_queue_forward_skb_hint;
+                packet_cnt = tp->xmit_retransmit_queue_forward_cnt_hint;
+        } else{
+                skb = sk->sk_write_queue.next;
+                packet_cnt = 0;
+        }
+
+        sk_stream_for_retrans_queue_from(skb, skb, sk) {
+                tp->xmit_retransmit_queue_forward_cnt_hint = packet_cnt;
+                tp->xmit_retransmit_queue_forward_skb_hint = skb;
+ 
 		/* Similar to the retransmit loop above we
 		 * can pretend that the retransmitted SKB
 		 * we send out here will be composed of one
@@ -1540,8 +1570,10 @@ void tcp_xmit_retransmit_queue(struct so
 			continue;
 
 		/* Ok, retransmit it. */
-		if (tcp_retransmit_skb(sk, skb))
-			break;
+                if(tcp_retransmit_skb(sk, skb)){
+                        tp->xmit_retransmit_queue_forward_skb_hint = NULL;
+                        break;
+                }
 
 		if (skb == skb_peek(&sk->sk_write_queue))
 			tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
