TCP的TSO处理（一） - JAVA

p_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
/* 有以下情况则不需要分片：
* 1. 数据的长度不超过允许的最大长度MSS
* 2. 网卡不支持GSO
* 3. 网卡不支持重新计算校验和
*/
if (skb->len <= mss_now || ! sk_can_gso(sk) ||
skb->ip_summed == CHECKSUM_NONE) {

/* Avoid the costly divide in the normal non-TSO case.*/
skb_shinfo(skb)->gso_segs = 1;
skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
} else {

/* 计算需要分成几个数据段*/
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);/*向上取整*/
skb_shinfo(skb)->gso_size = mss_now; /* 每个数据段的大小*/
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
}
}

/* Due to TSO, an SKB can be composed of multiple actual packets.
* To keep these tracked properly, we use this.
*/
static inline int tcp_skb_pcount (const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_segs;
}

/* This is valid if tcp_skb_pcount() > 1 */
static inline int tcp_skb_mss(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_size;
}

static inline int sk_can_gso(const struct sock *sk)
{
/* sk_route_caps标志网卡驱动的特征, sk_gso_type表示GSO的类型，
* 设置为SKB_GSO_TCPV4
*/
return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
}

static inline int net_gso_ok(int features, int gso_type)
{
int feature = gso_type << NETIF_F_GSO_SHIFT;
return (features & feature) == feature;
}

sk_gso_max_size

NIC also specify the maximum segment size which it can handle, in sk_gso_max_size field.
Mostly it will be set to 64k. This 64k values means if the data at TCP is more than 64k,
then again TCP has to segment it in 64k and then push to interface.
相关变量，sock中：unsigned int sk_gso_max_size.
[java]
/* RFC2861 Check whether we are limited by application or congestion window

* This is the inverse of cwnd check in tcp_tso_should_defer
* 函数返回1，受拥塞控制窗口的限制，需要增加拥塞控制窗口；
* 函数返回0，受应用程序的限制，不需要增加拥塞控制窗口。
*/

int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
{
const struct tcp_sock *tp = tcp_sk(sk);
u32 left;

if (in_flight >= tp->snd_cwnd)
return 1;

/* left表示还可以发送的数据量 */
left = tp->snd_cwnd - in_flight;

/* 如果使用gso，符合以下条件，认为是拥塞窗口受到了限制，
* 可以增加拥塞窗口。
*/
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
left * tp->mss_cache < sk->sk_gso_max_size)
return 1;

/* 如果left大于允许的突发流量，那么拥塞窗口的增长已经很快了，
* 不能再增加了。
*/
return left <= tcp_max_burst(tp);
}

TSO Nagle

GSO, Generic Segmentation Offload，是协议栈提高效率的一个策略。
它尽可能晚的推迟分段(segmentation)，最理想的是在网卡驱动里分段，在网卡驱动里把
大包(super-packet)拆开，组成SG list，或在一块预先分配好的内存中重组各段，然后交给
网卡。
The idea behind GSO seems to be that many of the performance benefits of LSO (TSO/UFO)
can be obtained in a hardware-independent way, by passing large "superpackets" around for
as long as possible, and deferring segmentation to the last possible moment - for devices
without hardware segmentation/fragmentation support, this would be when data is actually
handled to the device driver; for devices with hardware support, it could even be done in hardware.

Try to defer sending, if possible, in order to minimize the amount of TSO splitting we do.
View it as a kind of TSO Nagle test.
通过延迟数据包的发送，来减少TSO分段的次数，达到减小CPU负载的目的。
[java]
struct tcp_sock {
...
u32 tso_deferred; /* 上次TSO延迟的时间戳 */
...
};
[java]
/** This algorithm is from John Heffner.
* 0: send now ; 1: deferred
*/
static int tcp_tso_

TCP的TSO处理（一）(二)