Newer
Older
/* bnx2x_cmn.c: Broadcom Everest network driver.
*
* Copyright (c) 2007-2013 Broadcom Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation.
*
* Maintained by: Ariel Elior <ariel.elior@qlogic.com>
* Written by: Eliezer Tamir
* Based on code from Michael Chan's bnx2 driver
* UDP CSUM errata workaround by Arik Gendelman
* Slowpath and fastpath rework by Vladislav Zolotarov
* Statistics and Link management by Yitchak Gertner
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/interrupt.h>
#include <net/ip6_checksum.h>
#include <net/busy_poll.h>
#include <linux/prefetch.h>
static void bnx2x_free_fp_mem_cnic(struct bnx2x *bp);
static int bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp);
static int bnx2x_alloc_fp_mem(struct bnx2x *bp);
static int bnx2x_poll(struct napi_struct *napi, int budget);
static void bnx2x_add_all_napi_cnic(struct bnx2x *bp)
{
int i;
/* Add NAPI objects */
for_each_rx_queue_cnic(bp, i) {
netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
bnx2x_poll, NAPI_POLL_WEIGHT);
napi_hash_add(&bnx2x_fp(bp, i, napi));
}
}
static void bnx2x_add_all_napi(struct bnx2x *bp)
{
int i;
/* Add NAPI objects */
for_each_eth_queue(bp, i) {
netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
bnx2x_poll, NAPI_POLL_WEIGHT);
napi_hash_add(&bnx2x_fp(bp, i, napi));
}
}
static int bnx2x_calc_num_queues(struct bnx2x *bp)
{
int nq = bnx2x_num_queues ? : netif_get_num_default_rss_queues();
/* Reduce memory usage in kdump environment by using only one queue */
if (reset_devices)
nq = 1;
nq = clamp(nq, 1, BNX2X_MAX_QUEUES(bp));
return nq;
/**
* bnx2x_move_fp - move content of the fastpath structure.
*
* @bp: driver handle
* @from: source FP index
* @to: destination FP index
*
* Makes sure the contents of the bp->fp[to].napi is kept
* intact. This is done by first copying the napi struct from
* the target to the source, and then mem copying the entire
* source onto the target. Update txdata pointers and related
* content.
*/
static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
{
struct bnx2x_fastpath *from_fp = &bp->fp[from];
struct bnx2x_fastpath *to_fp = &bp->fp[to];
struct bnx2x_sp_objs *from_sp_objs = &bp->sp_objs[from];
struct bnx2x_sp_objs *to_sp_objs = &bp->sp_objs[to];
struct bnx2x_fp_stats *from_fp_stats = &bp->fp_stats[from];
struct bnx2x_fp_stats *to_fp_stats = &bp->fp_stats[to];
int old_max_eth_txqs, new_max_eth_txqs;
int old_txdata_index = 0, new_txdata_index = 0;
struct bnx2x_agg_info *old_tpa_info = to_fp->tpa_info;
/* Copy the NAPI object as it has been already initialized */
from_fp->napi = to_fp->napi;
/* Move bnx2x_fastpath contents */
memcpy(to_fp, from_fp, sizeof(*to_fp));
to_fp->index = to;
/* Retain the tpa_info of the original `to' version as we don't want
* 2 FPs to contain the same tpa_info pointer.
*/
to_fp->tpa_info = old_tpa_info;
/* move sp_objs contents as well, as their indices match fp ones */
memcpy(to_sp_objs, from_sp_objs, sizeof(*to_sp_objs));
/* move fp_stats contents as well, as their indices match fp ones */
memcpy(to_fp_stats, from_fp_stats, sizeof(*to_fp_stats));
/* Update txdata pointers in fp and move txdata content accordingly:
* Each fp consumes 'max_cos' txdata structures, so the index should be
* decremented by max_cos x delta.
*/
old_max_eth_txqs = BNX2X_NUM_ETH_QUEUES(bp) * (bp)->max_cos;
new_max_eth_txqs = (BNX2X_NUM_ETH_QUEUES(bp) - from + to) *
(bp)->max_cos;
if (from == FCOE_IDX(bp)) {
old_txdata_index = old_max_eth_txqs + FCOE_TXQ_IDX_OFFSET;
new_txdata_index = new_max_eth_txqs + FCOE_TXQ_IDX_OFFSET;
}
memcpy(&bp->bnx2x_txq[new_txdata_index],
&bp->bnx2x_txq[old_txdata_index],
sizeof(struct bnx2x_fp_txdata));
to_fp->txdata_ptr[0] = &bp->bnx2x_txq[new_txdata_index];
/**
* bnx2x_fill_fw_str - Fill buffer with FW version string.
*
* @bp: driver handle
* @buf: character buffer to fill with the fw name
* @buf_len: length of the above buffer
*
*/
void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len)
{
if (IS_PF(bp)) {
u8 phy_fw_ver[PHY_FW_VER_LEN];
phy_fw_ver[0] = '\0';
bnx2x_get_ext_phy_fw_version(&bp->link_params,
phy_fw_ver, PHY_FW_VER_LEN);
strlcpy(buf, bp->fw_ver, buf_len);
snprintf(buf + strlen(bp->fw_ver), 32 - strlen(bp->fw_ver),
"bc %d.%d.%d%s%s",
(bp->common.bc_ver & 0xff0000) >> 16,
(bp->common.bc_ver & 0xff00) >> 8,
(bp->common.bc_ver & 0xff),
((phy_fw_ver[0] != '\0') ? " phy " : ""), phy_fw_ver);
} else {
/**
* bnx2x_shrink_eth_fp - guarantees fastpath structures stay intact
*
* @bp: driver handle
* @delta: number of eth queues which were not allocated
*/
static void bnx2x_shrink_eth_fp(struct bnx2x *bp, int delta)
{
int i, cos, old_eth_num = BNX2X_NUM_ETH_QUEUES(bp);
/* Queue pointer cannot be re-set on an fp-basis, as moving pointer
* backward along the array could cause memory to be overridden
*/
for (cos = 1; cos < bp->max_cos; cos++) {
for (i = 0; i < old_eth_num - delta; i++) {
struct bnx2x_fastpath *fp = &bp->fp[i];
int new_idx = cos * (old_eth_num - delta) + i;
memcpy(&bp->bnx2x_txq[new_idx], fp->txdata_ptr[cos],
sizeof(struct bnx2x_fp_txdata));
fp->txdata_ptr[cos] = &bp->bnx2x_txq[new_idx];
}
}
}
int bnx2x_load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
/* free skb in the packet ring at pos idx
* return idx of last bd freed
*/
static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
u16 idx, unsigned int *pkts_compl,
unsigned int *bytes_compl)
struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
struct eth_tx_start_bd *tx_start_bd;
struct eth_tx_bd *tx_data_bd;
struct sk_buff *skb = tx_buf->skb;
u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
int nbd;
/* prefetch skb end pointer to speedup dev_kfree_skb() */
prefetch(&skb->end);
DP(NETIF_MSG_TX_DONE, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n",
txdata->txq_index, idx, tx_buf, skb);
tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
#ifdef BNX2X_STOP_ON_ERROR
if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
BNX2X_ERR("BAD nbd!\n");
bnx2x_panic();
}
#endif
new_cons = nbd + tx_buf->first_bd;
/* Get the next bd */
bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
/* Skip a parse bd... */
--nbd;
bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
/* TSO headers+data bds share a common mapping. See bnx2x_tx_split() */
if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
split_bd_len = BD_UNMAP_LEN(tx_data_bd);
--nbd;
bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
}
/* unmap first bd */
dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
BD_UNMAP_LEN(tx_start_bd) + split_bd_len,
DMA_TO_DEVICE);
/* now free frags */
while (nbd > 0) {
tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
if (--nbd)
bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
}
/* release skb */
WARN_ON(!skb);
(*pkts_compl)++;
(*bytes_compl) += skb->len;
}
dev_kfree_skb_any(skb);
tx_buf->first_bd = 0;
tx_buf->skb = NULL;
return new_cons;
}
int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
unsigned int pkts_compl = 0, bytes_compl = 0;
#ifdef BNX2X_STOP_ON_ERROR
if (unlikely(bp->panic))
return -1;
#endif
txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
sw_cons = txdata->tx_pkt_cons;
while (sw_cons != hw_cons) {
u16 pkt_cons;
pkt_cons = TX_BD(sw_cons);
DP(NETIF_MSG_TX_DONE,
"queue[%d]: hw_cons %u sw_cons %u pkt_cons %u\n",
txdata->txq_index, hw_cons, sw_cons, pkt_cons);
bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons,
netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
txdata->tx_pkt_cons = sw_cons;
txdata->tx_bd_cons = bd_cons;
/* Need to make the tx_bd_cons update visible to start_xmit()
* before checking for netif_tx_queue_stopped(). Without the
* memory barrier, there is a small possibility that
* start_xmit() will miss it and cause the queue to be stopped
* forever.
* On the other hand we need an rmb() here to ensure the proper
* ordering of bit testing in the following
* netif_tx_queue_stopped(txq) call.
*/
smp_mb();
if (unlikely(netif_tx_queue_stopped(txq))) {
/* Taking tx_lock() is needed to prevent re-enabling the queue
* while it's empty. This could have happen if rx_action() gets
* suspended in bnx2x_tx_int() after the condition before
* netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
*
* stops the queue->sees fresh tx_bd_cons->releases the queue->
* sends some packets consuming the whole queue again->
* stops the queue
*/
__netif_tx_lock(txq, smp_processor_id());
if ((netif_tx_queue_stopped(txq)) &&
(bp->state == BNX2X_STATE_OPEN) &&
(bnx2x_tx_avail(bp, txdata) >= MAX_DESC_PER_TX_PKT))
netif_tx_wake_queue(txq);
__netif_tx_unlock(txq);
}
return 0;
}
static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
u16 idx)
{
u16 last_max = fp->last_max_sge;
if (SUB_S16(idx, last_max) > 0)
fp->last_max_sge = idx;
}
static inline void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
u16 sge_len,
struct eth_end_agg_rx_cqe *cqe)
{
struct bnx2x *bp = fp->bp;
u16 last_max, last_elem, first_elem;
u16 delta = 0;
u16 i;
if (!sge_len)
return;
/* First mark all used pages */
for (i = 0; i < sge_len; i++)
BIT_VEC64_CLEAR_BIT(fp->sge_mask,
DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
sge_len - 1, le16_to_cpu(cqe->sgl_or_raw_data.sgl[sge_len - 1]));
/* Here we assume that the last SGE index is the biggest */
prefetch((void *)(fp->sge_mask));
last_max = RX_SGE(fp->last_max_sge);
last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
/* If ring is not full */
if (last_elem + 1 != first_elem)
last_elem++;
/* Now update the prod */
for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
if (likely(fp->sge_mask[i]))
break;
fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
delta += BIT_VEC64_ELEM_SZ;
}
if (delta > 0) {
fp->rx_sge_prod += delta;
/* clear page-end entries */
bnx2x_clear_sge_mask_next_elems(fp);
}
DP(NETIF_MSG_RX_STATUS,
"fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
fp->last_max_sge, fp->rx_sge_prod);
}
/* Get Toeplitz hash value in the skb using the value from the
* CQE (calculated by HW).
*/
static u32 bnx2x_get_rxhash(const struct bnx2x *bp,
if ((bp->dev->features & NETIF_F_RXHASH) &&
(cqe->status_flags & ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG)) {
enum eth_rss_hash_type htype;
htype = cqe->status_flags & ETH_FAST_PATH_RX_CQE_RSS_HASH_TYPE;
*rxhash_type = ((htype == TCP_IPV4_HASH_TYPE) ||
(htype == TCP_IPV6_HASH_TYPE)) ?
PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3;
return le32_to_cpu(cqe->rss_hash_result);
static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
struct eth_fast_path_rx_cqe *cqe)
{
struct bnx2x *bp = fp->bp;
struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
dma_addr_t mapping;
struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
struct sw_rx_bd *first_buf = &tpa_info->first_buf;
/* print error if current state != stop */
if (tpa_info->tpa_state != BNX2X_TPA_STOP)
BNX2X_ERR("start of bin not in stop [%d]\n", queue);
/* Try to map an empty data buffer from the aggregation info */
mapping = dma_map_single(&bp->pdev->dev,
fp->rx_buf_size, DMA_FROM_DEVICE);
/*
* ...if it fails - move the skb from the consumer to the producer
* and set the current aggregation state as ERROR to drop it
* when TPA_STOP arrives.
*/
if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
/* Move the BD from the consumer to the producer */
bnx2x_reuse_rx_data(fp, cons, prod);
tpa_info->tpa_state = BNX2X_TPA_ERROR;
return;
}
/* move empty data from pool to prod */
prod_rx_buf->data = first_buf->data;
dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
/* move partial skb from cons to pool (don't unmap yet) */
*first_buf = *cons_rx_buf;
/* mark bin state as START */
tpa_info->parsing_flags =
le16_to_cpu(cqe->pars_flags.flags);
tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
tpa_info->tpa_state = BNX2X_TPA_START;
tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
tpa_info->placement_offset = cqe->placement_offset;
tpa_info->rxhash = bnx2x_get_rxhash(bp, cqe, &tpa_info->rxhash_type);
if (fp->mode == TPA_MODE_GRO) {
u16 gro_size = le16_to_cpu(cqe->pkt_len_or_gro_seg_len);
tpa_info->full_page = SGE_PAGES / gro_size * gro_size;
#ifdef BNX2X_STOP_ON_ERROR
fp->tpa_queue_used |= (1 << queue);
#ifdef _ASM_GENERIC_INT_L64_H
DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
#else
DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
#endif
fp->tpa_queue_used);
#endif
}
/* Timestamp option length allowed for TPA aggregation:
*
* nop nop kind length echo val
*/
#define TPA_TSTAMP_OPT_LEN 12
/**
* @parsing_flags: parsing flags from the START CQE
* @len_on_bd: total length of the first packet for the
* aggregation.
*
* Approximate value of the MSS for this aggregation calculated using
* the first packet of it.
* Compute number of aggregated segments, and gso_type.
static void bnx2x_set_gro_params(struct sk_buff *skb, u16 parsing_flags,
u16 len_on_bd, unsigned int pkt_len,
u16 num_of_coalesced_segs)
/* TPA aggregation won't have either IP options or TCP options
* other than timestamp or IPv6 extension headers.
u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
hdrs_len += sizeof(struct ipv6hdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
} else {
hdrs_len += sizeof(struct iphdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
}
/* Check if there was a TCP timestamp, if there is it's will
* always be 12 bytes length: nop nop kind length echo val.
*
* Otherwise FW would close the aggregation.
*/
if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
hdrs_len += TPA_TSTAMP_OPT_LEN;
skb_shinfo(skb)->gso_size = len_on_bd - hdrs_len;
/* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count
* to skb_shinfo(skb)->gso_segs
*/
NAPI_GRO_CB(skb)->count = num_of_coalesced_segs;
static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp,
u16 index, gfp_t gfp_mask)
struct page *page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT);
struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
dma_addr_t mapping;
if (unlikely(page == NULL)) {
BNX2X_ERR("Can't alloc sge\n");
return -ENOMEM;
}
mapping = dma_map_page(&bp->pdev->dev, page, 0,
if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
__free_pages(page, PAGES_PER_SGE_SHIFT);
BNX2X_ERR("Can't map sge\n");
return -ENOMEM;
}
sw_buf->page = page;
dma_unmap_addr_set(sw_buf, mapping, mapping);
sge->addr_hi = cpu_to_le32(U64_HI(mapping));
sge->addr_lo = cpu_to_le32(U64_LO(mapping));
return 0;
}
static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
struct bnx2x_agg_info *tpa_info,
u16 pages,
struct sk_buff *skb,
struct eth_end_agg_rx_cqe *cqe,
u16 cqe_idx)
{
struct sw_rx_page *rx_pg, old_rx_pg;
u16 len_on_bd = tpa_info->len_on_bd;
frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
if (fp->mode == TPA_MODE_GRO) {
gro_size = tpa_info->gro_size;
full_page = tpa_info->full_page;
}
/* This is needed in order to enable forwarding support */
if (frag_size)
bnx2x_set_gro_params(skb, tpa_info->parsing_flags, len_on_bd,
le16_to_cpu(cqe->pkt_len),
le16_to_cpu(cqe->num_of_coalesced_segs));
if (pages > min_t(u32, 8, MAX_SKB_FRAGS) * SGE_PAGES) {
BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
pages, cqe_idx);
BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
bnx2x_panic();
return -EINVAL;
}
#endif
/* Run through the SGL and compose the fragmented skb */
for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
/* FW gives the indices of the SGE as if the ring is an array
(meaning that "next" element will consume 2 indices) */
if (fp->mode == TPA_MODE_GRO)
frag_len = min_t(u32, frag_size, (u32)full_page);
else /* LRO */
frag_len = min_t(u32, frag_size, (u32)SGE_PAGES);
rx_pg = &fp->rx_page_ring[sge_idx];
old_rx_pg = *rx_pg;
/* If we fail to allocate a substitute page, we simply stop
where we are and drop the whole packet */
err = bnx2x_alloc_rx_sge(bp, fp, sge_idx, GFP_ATOMIC);
bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++;
/* Unmap the page as we're going to pass it to the stack */
dma_unmap_page(&bp->pdev->dev,
dma_unmap_addr(&old_rx_pg, mapping),
/* Add one frag and update the appropriate fields in the skb */
if (fp->mode == TPA_MODE_LRO)
skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
else { /* GRO */
int rem;
int offset = 0;
for (rem = frag_len; rem > 0; rem -= gro_size) {
int len = rem > gro_size ? gro_size : rem;
skb_fill_page_desc(skb, frag_id++,
old_rx_pg.page, offset, len);
if (offset)
get_page(old_rx_pg.page);
offset += len;
}
}
skb->len += frag_len;
frag_size -= frag_len;
}
return 0;
}
static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data)
{
if (fp->rx_frag_size)
put_page(virt_to_head_page(data));
else
kfree(data);
}
static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
if (fp->rx_frag_size) {
/* GFP_KERNEL allocations are used only during initialization */
if (unlikely(gfp_mask & __GFP_WAIT))
return (void *)__get_free_page(gfp_mask);
return netdev_alloc_frag(fp->rx_frag_size);
return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask);
#ifdef CONFIG_INET
static void bnx2x_gro_ip_csum(struct bnx2x *bp, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th;
skb_set_transport_header(skb, sizeof(struct iphdr));
th = tcp_hdr(skb);
th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
iph->saddr, iph->daddr, 0);
}
static void bnx2x_gro_ipv6_csum(struct bnx2x *bp, struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th;
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
th = tcp_hdr(skb);
th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
&iph->saddr, &iph->daddr, 0);
}
static void bnx2x_gro_csum(struct bnx2x *bp, struct sk_buff *skb,
void (*gro_func)(struct bnx2x*, struct sk_buff*))
{
skb_set_network_header(skb, 0);
gro_func(bp, skb);
tcp_gro_complete(skb);
}
#endif
static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp,
struct sk_buff *skb)
{
#ifdef CONFIG_INET
switch (be16_to_cpu(skb->protocol)) {
case ETH_P_IP:
bnx2x_gro_csum(bp, skb, bnx2x_gro_ip_csum);
bnx2x_gro_csum(bp, skb, bnx2x_gro_ipv6_csum);
BNX2X_ERR("Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n",
be16_to_cpu(skb->protocol));
}
}
#endif
skb_record_rx_queue(skb, fp->rx_queue);
napi_gro_receive(&fp->napi, skb);
}
static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
struct bnx2x_agg_info *tpa_info,
u16 pages,
struct eth_end_agg_rx_cqe *cqe,
u16 cqe_idx)
struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
u8 old_tpa_state = tpa_info->tpa_state;
tpa_info->tpa_state = BNX2X_TPA_STOP;
/* If we there was an error during the handling of the TPA_START -
* drop this aggregation.
*/
if (old_tpa_state == BNX2X_TPA_ERROR)
goto drop;
/* Try to allocate the new data */
new_data = bnx2x_frag_alloc(fp, GFP_ATOMIC);
/* Unmap skb in the pool anyway, as we are going to change
pool entry status to BNX2X_TPA_STOP even if new skb allocation
fails. */
dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
BNX2X_ERR("skb_put is about to fail... pad %d len %d rx_buf_size %d\n",
bnx2x_panic();
return;
}
#endif
skb_reserve(skb, pad + NET_SKB_PAD);
skb_set_hash(skb, tpa_info->rxhash, tpa_info->rxhash_type);
skb->protocol = eth_type_trans(skb, bp->dev);
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!bnx2x_fill_frag_skb(bp, fp, tpa_info, pages,
skb, cqe, cqe_idx)) {
if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tpa_info->vlan_tag);
DP(NETIF_MSG_RX_STATUS,
"Failed to allocate new pages - dropping packet!\n");
dev_kfree_skb_any(skb);
/* put new data in bin */
rx_buf->data = new_data;
if (new_data)
bnx2x_frag_free(fp, new_data);
drop:
/* drop the packet and keep the buffer in the bin */
DP(NETIF_MSG_RX_STATUS,
"Failed to allocate or map a new skb - dropping packet!\n");
bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed++;
static int bnx2x_alloc_rx_data(struct bnx2x *bp, struct bnx2x_fastpath *fp,
u16 index, gfp_t gfp_mask)
{
u8 *data;
struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index];
struct eth_rx_bd *rx_bd = &fp->rx_desc_ring[index];
dma_addr_t mapping;
data = bnx2x_frag_alloc(fp, gfp_mask);
if (unlikely(data == NULL))
return -ENOMEM;
mapping = dma_map_single(&bp->pdev->dev, data + NET_SKB_PAD,
fp->rx_buf_size,
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
BNX2X_ERR("Can't map rx data\n");
return -ENOMEM;
}
rx_buf->data = data;
dma_unmap_addr_set(rx_buf, mapping, mapping);
rx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
rx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
return 0;
}
static
void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
struct bnx2x_fastpath *fp,
struct bnx2x_eth_q_stats *qstats)
/* Do nothing if no L4 csum validation was done.
* We do not check whether IP csum was validated. For IPv4 we assume
* that if the card got as far as validating the L4 csum, it also
* validated the IP csum. IPv6 has no IP csum.
*/
ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
/* If L4 validation was done, check if an error was found. */
if (cqe->fast_path_cqe.type_error_flags &
(ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
else
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
{
struct bnx2x *bp = fp->bp;
u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
u16 sw_comp_cons, sw_comp_prod;
union eth_rx_cqe *cqe;
struct eth_fast_path_rx_cqe *cqe_fp;
#ifdef BNX2X_STOP_ON_ERROR
if (unlikely(bp->panic))
return 0;
#endif
if (budget <= 0)
return rx_pkt;
bd_cons = fp->rx_bd_cons;
bd_prod = fp->rx_bd_prod;
bd_prod_fw = bd_prod;
sw_comp_cons = fp->rx_comp_cons;
sw_comp_prod = fp->rx_comp_prod;
comp_ring_cons = RCQ_BD(sw_comp_cons);
cqe = &fp->rx_comp_ring[comp_ring_cons];
cqe_fp = &cqe->fast_path_cqe;
"queue[%d]: sw_comp_cons %u\n", fp->index, sw_comp_cons);
while (BNX2X_IS_CQE_COMPLETED(cqe_fp)) {
struct sw_rx_bd *rx_buf = NULL;
struct sk_buff *skb;
u8 cqe_fp_flags;
enum eth_rx_cqe_type cqe_fp_type;
#ifdef BNX2X_STOP_ON_ERROR
if (unlikely(bp->panic))
return 0;
#endif
bd_prod = RX_BD(bd_prod);
bd_cons = RX_BD(bd_cons);

wenxiong@linux.vnet.ibm.com
committed
/* A rmb() is required to ensure that the CQE is not read
* before it is written by the adapter DMA. PCI ordering
* rules will make sure the other fields are written before
* the marker at the end of struct eth_fast_path_rx_cqe
* but without rmb() a weakly ordered processor can process
* stale data. Without the barrier TPA state-machine might
* enter inconsistent state and kernel stack might be
* provided with incorrect packet description - these lead
* to various kernel crashed.
*/
rmb();
cqe_fp_flags = cqe_fp->type_error_flags;
cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
DP(NETIF_MSG_RX_STATUS,
"CQE type %x err %x status %x queue %x vlan %x len %u\n",
CQE_TYPE(cqe_fp_flags),
cqe_fp_flags, cqe_fp->status_flags,
le32_to_cpu(cqe_fp->rss_hash_result),
le16_to_cpu(cqe_fp->vlan_tag),
le16_to_cpu(cqe_fp->pkt_len_or_gro_seg_len));
if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
bnx2x_sp_event(fp, cqe);
goto next_cqe;
rx_buf = &fp->rx_buf_ring[bd_cons];
data = rx_buf->data;
if (!CQE_TYPE_FAST(cqe_fp_type)) {
struct bnx2x_agg_info *tpa_info;
u16 frag_size, pages;
/* sanity check */
if (fp->disable_tpa &&
(CQE_TYPE_START(cqe_fp_type) ||
CQE_TYPE_STOP(cqe_fp_type)))
BNX2X_ERR("START/STOP packet while disable_tpa type %x\n",
if (CQE_TYPE_START(cqe_fp_type)) {
u16 queue = cqe_fp->queue_index;
DP(NETIF_MSG_RX_STATUS,
"calling tpa_start on queue %d\n",
queue);
bnx2x_tpa_start(fp, queue,
bd_cons, bd_prod,
cqe_fp);
}
queue = cqe->end_agg_cqe.queue_index;
tpa_info = &fp->tpa_info[queue];
DP(NETIF_MSG_RX_STATUS,
"calling tpa_stop on queue %d\n",
queue);
frag_size = le16_to_cpu(cqe->end_agg_cqe.pkt_len) -
tpa_info->len_on_bd;
if (fp->mode == TPA_MODE_GRO)
pages = (frag_size + tpa_info->full_page - 1) /
tpa_info->full_page;
else
pages = SGE_PAGE_ALIGN(frag_size) >>
SGE_PAGE_SHIFT;
bnx2x_tpa_stop(bp, fp, tpa_info, pages,
&cqe->end_agg_cqe, comp_ring_cons);
bnx2x_update_sge_prod(fp, pages, &cqe->end_agg_cqe);
goto next_cqe;
pad = cqe_fp->placement_offset;
dma_sync_single_for_cpu(&bp->pdev->dev,
pad + RX_COPY_THRESH,
DMA_FROM_DEVICE);
pad += NET_SKB_PAD;
prefetch(data + pad); /* speedup eth_type_trans() */