Got cmm working

This commit is contained in:
2026-05-11 07:01:39 +08:00
parent 35f2edc0b5
commit 7411e1994b
23 changed files with 213 additions and 40 deletions

View File

@@ -0,0 +1 @@
This is for when vendor is already patching upstream source. And we are patching on top of it.

View File

@@ -0,0 +1,71 @@
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
index 12a409d..740793d 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
@@ -37,6 +37,7 @@
#include <linux/fsl_qman.h> /* struct qman_fq */
#include "fm_ext.h"
+#include "fm_ehash.h"
#include "dpaa_eth_trace.h"
extern int dpa_rx_extra_headroom;
@@ -88,8 +89,13 @@ static inline void DPA_BUG_ON(bool cond)
(FM_PORT_FRM_ERR_UNSUPPORTED_FORMAT | \
FM_PORT_FRM_ERR_LENGTH | FM_PORT_FRM_ERR_DMA)
-/* The raw buffer size must be cacheline aligned. */
-#define DPA_BP_RAW_SIZE 2048
+/* The raw buffer size must be cacheline aligned.
+ * As 1518 byte packets are received in scatter gather buffers from DPAA,
+ * and these buffers are used by Wi-Fi which requires contiguous buffers,
+ * increase the raw buffer size from 2048 to 2176 to accommodate them in a
+ * contiguous FD.
+ */
+#define DPA_BP_RAW_SIZE 2176
/* This is what FMan is ever allowed to use.
* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
@@ -174,6 +180,7 @@ static inline void DPA_BUG_ON(bool cond)
#endif
#define DPAA_ETH_RX_QUEUES 128
+#define DPAA_IP_VERSION_4 4
/* Convenience macros for storing/retrieving the skb back-pointers. They must
* accommodate both recycling and confirmation paths - i.e. cases when the buf
@@ -304,6 +311,10 @@ struct dpa_percpu_priv_s {
u64 tx_frag_skbuffs;
/* number of S/G frames received */
u64 rx_sg;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) && defined(CONFIG_CPE_FAST_PATH)
+ u64 tx_caam_enc;
+ u64 tx_caam_dec;
+#endif
struct rtnl_link_stats64 stats;
struct dpa_rx_errors rx_errors;
@@ -375,9 +386,13 @@ struct dpa_priv_s {
int loop_id;
int loop_to;
#endif
-#ifdef CONFIG_FSL_DPAA_CEETM
+#if defined(CONFIG_FSL_DPAA_CEETM) || defined(CONFIG_CPE_FAST_PATH)
bool ceetm_en; /* CEETM QoS enabled */
+#ifdef CONFIG_CPE_FAST_PATH
+ void *qm_ctx; /* CEETM context */
+#endif
#endif
+ void *ifinfo;
};
struct fm_port_fqs {
@@ -392,7 +407,7 @@ struct fm_port_fqs {
extern struct net_device *dpa_loop_netdevs[20];
#endif
-int dpaa_eth_refill_bpools(struct dpa_bp *dpa_bp, int *count_ptr);
+int dpaa_eth_refill_bpools(struct dpa_bp *dpa_bp, int *count_ptr, int threshold);
void __hot _dpa_rx(struct net_device *net_dev,
struct qman_portal *portal,
const struct dpa_priv_s *priv,

View File

@@ -0,0 +1,30 @@
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index f9f0f16..1f94967 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -55,6 +55,10 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#if defined(CONFIG_CPE_FAST_PATH)
+#include <linux/jiffies.h>
+#endif
+
#define PPP_VERSION "2.4.2"
/*
@@ -3535,6 +3539,14 @@ ppp_connect_channel(struct channel *pch, int unit)
outl:
spin_unlock(&pch->upl);
+#if defined(CONFIG_CPE_FAST_PATH)
+ if ((ppp->dev) && (!ppp->closing)) {
+ rtnl_lock();
+ rtmsg_ifinfo(RTM_NEWLINK, ppp->dev, 0, GFP_KERNEL, 0, NULL);
+ rtnl_unlock();
+ }
+#endif
+
out:
mutex_unlock(&pn->all_ppp_mutex);
return ret;

View File

@@ -0,0 +1,371 @@
diff --git a/drivers/staging/fsl_qbman/qman_high.c b/drivers/staging/fsl_qbman/qman_high.c
index 4085aa9a2dcb..96c3122e665b 100644
--- a/drivers/staging/fsl_qbman/qman_high.c
+++ b/drivers/staging/fsl_qbman/qman_high.c
@@ -34,6 +34,9 @@
#include "qman_low.h"
+#include <linux/net.h>
+#include <linux/netdevice.h>
+
/* Compilation constants */
#define DQRR_MAXFILL 15
#define EQCR_ITHRESH 4 /* if EQCR congests, interrupt threshold */
@@ -69,6 +72,33 @@
spin_unlock(&__fq478->fqlock); \
} while (0)
+#if 1
+#define display_ceetm_cmd(a,b,c)
+#else
+#define display_ceetm_cmd(a, b, c) _display_ceetm_cmd((char *)__func__, a, b, c)
+static void _display_ceetm_cmd(char *func, uint32_t verb, void *buf, uint32_t size)
+{
+ uint8_t *ptr;
+ uint32_t ii,jj=0;
+ uint8_t buff[200];
+
+ ptr = buf;
+ jj = sprintf(buff, "%s::\n%02x ", func, verb);
+ for (ii = 1; ii <= size; ii++) {
+ if (ii && ((ii % 16) == 0))
+ {
+ buff[jj] = 0;
+ printk("%s\n", buff);
+ jj = 0;
+ }
+ jj += sprintf(buff+jj, "%02x ", *ptr);
+ ptr++;
+ }
+ buff[jj] = 0;
+ printk("%s\n\n", buff);
+}
+#endif
+
static inline void fq_set(struct qman_fq *fq, u32 mask)
{
set_bits(mask, &fq->flags);
@@ -128,6 +158,10 @@ struct qman_portal {
u8 alloced;
/* power management data */
u32 save_isdr;
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+ struct net_device *dummy_dev;
+ struct napi_struct napi;
+#endif
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* Keep a shadow copy of the DQRR on LE systems as the SW needs to
* do byte swaps of DQRR read only memory. First entry must be aligned
@@ -473,7 +507,16 @@ static irqreturn_t portal_isr(__always_unused int irq, void *ptr)
/* DQRR-handling if it's interrupt-driven */
if (is & QM_PIRQ_DQRI) {
+#ifndef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
__poll_portal_fast(p, CONFIG_FSL_QMAN_POLL_LIMIT);
+#else
+ /* Disable QMan IRQ and invoke NAPI */
+ qman_p_irqsource_remove(p, QM_PIRQ_DQRI);
+ if (napi_schedule_prep(&p->napi))
+ {
+ __napi_schedule(&p->napi);
+ }
+#endif
clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI;
}
@@ -575,6 +618,27 @@ struct dev_pm_domain qman_portal_device_pm_domain = {
}
};
+
+
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+static int qman_portal_dqrr_poll(struct napi_struct *napi, int budget)
+{
+ struct qman_portal *portal = container_of(napi, struct qman_portal, napi);
+
+ int cleaned = qman_p_poll_dqrr(portal, budget);
+
+ if (cleaned < budget) {
+ int tmp;
+ napi_complete(napi);
+ tmp = qman_p_irqsource_add(portal, QM_PIRQ_DQRI);
+ // DPA_BUG_ON(tmp);
+ }
+
+ return cleaned;
+}
+#endif
+
+
struct qman_portal *qman_create_portal(
struct qman_portal *portal,
const struct qm_portal_config *config,
@@ -737,6 +801,15 @@ struct qman_portal *qman_create_portal(
goto fail_dqrr_mr_empty;
}
}
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+ /* Initialize NAPI for Rx processing */
+ portal->dummy_dev = alloc_netdev_dummy(0);
+ if (!portal->dummy_dev)
+ goto fail_dqrr_mr_empty;
+
+ netif_napi_add(portal->dummy_dev, &portal->napi, qman_portal_dqrr_poll);
+ napi_enable(&portal->napi);
+#endif
/* Success */
portal->config = config;
/*
@@ -832,6 +902,15 @@ void qman_destroy_portal(struct qman_portal *qm)
const struct qm_portal_config *pcfg;
int i;
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+ if (qm->dummy_dev) {
+ napi_disable(&qm->napi);
+ netif_napi_del(&qm->napi);
+ free_netdev(qm->dummy_dev);
+ qm->dummy_dev = NULL;
+ }
+#endif
+
/* Stop dequeues on the portal */
qm_dqrr_sdqcr_set(&qm->p, 0);
@@ -3170,6 +3245,7 @@ static int qman_ceetm_configure_lfqmt(struct qm_mcc_ceetm_lfqmt_config *opts)
p = get_affine_portal();
PORTAL_IRQ_LOCK(p, irqflags);
+ display_ceetm_cmd(QM_CEETM_VERB_LFQMT_CONFIG, opts, sizeof(struct qm_mcc_ceetm_lfqmt_config));
mcc = qm_mc_start(&p->p);
mcc->lfqmt_config = *opts;
qm_mc_commit(&p->p, QM_CEETM_VERB_LFQMT_CONFIG);
@@ -3233,6 +3309,7 @@ static int qman_ceetm_configure_cq(struct qm_mcc_ceetm_cq_config *opts)
mcc = qm_mc_start(&p->p);
mcc->cq_config = *opts;
+ display_ceetm_cmd(QM_CEETM_VERB_CQ_CONFIG, opts, sizeof(struct qm_mcc_ceetm_cq_config));
qm_mc_commit(&p->p, QM_CEETM_VERB_CQ_CONFIG);
while (!(mcr = qm_mc_result(&p->p)))
cpu_relax();
@@ -3296,7 +3373,7 @@ static int qman_ceetm_configure_dct(struct qm_mcc_ceetm_dct_config *opts)
p = get_affine_portal();
PORTAL_IRQ_LOCK(p, irqflags);
-
+ display_ceetm_cmd(QM_CEETM_VERB_DCT_CONFIG, opts, sizeof(struct qm_mcc_ceetm_dct_config));
mcc = qm_mc_start(&p->p);
mcc->dct_config = *opts;
qm_mc_commit(&p->p, QM_CEETM_VERB_DCT_CONFIG);
@@ -3360,6 +3437,8 @@ static int qman_ceetm_configure_class_scheduler(
mcc = qm_mc_start(&p->p);
mcc->csch_config = *opts;
+ display_ceetm_cmd(QM_CEETM_VERB_CLASS_SCHEDULER_CONFIG, opts,
+ sizeof(struct qm_mcc_ceetm_class_scheduler_config));
qm_mc_commit(&p->p, QM_CEETM_VERB_CLASS_SCHEDULER_CONFIG);
while (!(mcr = qm_mc_result(&p->p)))
cpu_relax();
@@ -3410,7 +3489,7 @@ static int qman_ceetm_query_class_scheduler(struct qm_ceetm_channel *channel,
return 0;
}
-static int qman_ceetm_configure_mapping_shaper_tcfc(
+int qman_ceetm_configure_mapping_shaper_tcfc(
struct qm_mcc_ceetm_mapping_shaper_tcfc_config *opts)
{
struct qm_mc_command *mcc;
@@ -3423,6 +3502,8 @@ static int qman_ceetm_configure_mapping_shaper_tcfc(
PORTAL_IRQ_LOCK(p, irqflags);
mcc = qm_mc_start(&p->p);
+ display_ceetm_cmd(QM_CEETM_VERB_MAPPING_SHAPER_TCFC_CONFIG, opts,
+ sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
mcc->mst_config = *opts;
qm_mc_commit(&p->p, QM_CEETM_VERB_MAPPING_SHAPER_TCFC_CONFIG);
while (!(mcr = qm_mc_result(&p->p)))
@@ -3440,6 +3521,7 @@ static int qman_ceetm_configure_mapping_shaper_tcfc(
}
return 0;
}
+EXPORT_SYMBOL(qman_ceetm_configure_mapping_shaper_tcfc);
static int qman_ceetm_query_mapping_shaper_tcfc(
struct qm_mcc_ceetm_mapping_shaper_tcfc_query *opts,
@@ -3485,7 +3567,7 @@ static int qman_ceetm_configure_ccgr(struct qm_mcc_ceetm_ccgr_config *opts)
p = get_affine_portal();
PORTAL_IRQ_LOCK(p, irqflags);
-
+ display_ceetm_cmd(QM_CEETM_VERB_CCGR_CONFIG, opts, sizeof(struct qm_mcc_ceetm_ccgr_config));
mcc = qm_mc_start(&p->p);
mcc->ccgr_config = *opts;
@@ -3903,6 +3985,7 @@ int qman_ceetm_lni_enable_shaper(struct qm_ceetm_lni *lni, int coupled,
lni->shaper_couple = coupled;
lni->oal = oal;
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_LNI_SHAPER | lni->idx);
config_opts.dcpid = lni->dcp_idx;
config_opts.shaper_config.cpl = coupled;
@@ -3936,7 +4019,8 @@ int qman_ceetm_lni_disable_shaper(struct qm_ceetm_lni *lni)
pr_err("The shaper has been disabled\n");
return -EINVAL;
}
-
+
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_LNI_SHAPER | lni->idx);
config_opts.dcpid = lni->dcp_idx;
config_opts.shaper_config.cpl = lni->shaper_couple;
@@ -4173,6 +4257,7 @@ int qman_ceetm_lni_set_tcfcc(struct qm_ceetm_lni *lni,
return -EINVAL;
}
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
query_opts.cid = cpu_to_be16(CEETM_COMMAND_TCFC | lni->idx);
query_opts.dcpid = lni->dcp_idx;
if (qman_ceetm_query_mapping_shaper_tcfc(&query_opts, &query_result)) {
@@ -4254,6 +4339,7 @@ int qman_ceetm_channel_claim(struct qm_ceetm_channel **channel,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
p->idx = channel_idx;
p->dcp_idx = lni->dcp_idx;
p->lni_idx = lni->idx;
@@ -4264,7 +4350,7 @@ int qman_ceetm_channel_claim(struct qm_ceetm_channel **channel,
channel_idx);
config_opts.dcpid = lni->dcp_idx;
config_opts.channel_mapping.map_lni_id = lni->idx;
- config_opts.channel_mapping.map_shaped = 0;
+ config_opts.channel_mapping.map_shaped = 1;
if (qman_ceetm_configure_mapping_shaper_tcfc(&config_opts)) {
pr_err("Can't map channel#%d for LNI#%d\n",
channel_idx, lni->idx);
@@ -4296,7 +4382,7 @@ int qman_ceetm_channel_release(struct qm_ceetm_channel *channel)
channel->dcp_idx);
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_CHANNEL_SHAPER |
channel->idx);
config_opts.dcpid = channel->dcp_idx;
@@ -4334,7 +4420,7 @@ int qman_ceetm_channel_enable_shaper(struct qm_ceetm_channel *channel,
pr_err("This channel shaper has been enabled!\n");
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->shaper_enable = 1;
channel->shaper_couple = coupled;
@@ -4347,6 +4433,7 @@ int qman_ceetm_channel_enable_shaper(struct qm_ceetm_channel *channel,
return -EINVAL;
}
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_CHANNEL_MAPPING |
channel->idx);
config_opts.dcpid = channel->dcp_idx;
@@ -4441,7 +4528,7 @@ int qman_ceetm_channel_set_commit_rate(struct qm_ceetm_channel *channel,
pr_err("Fail to get the current channel shaper setting\n");
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->cr_token_rate.whole = token_rate->whole;
channel->cr_token_rate.fraction = token_rate->fraction;
channel->cr_token_bucket_limit = token_limit;
@@ -4534,7 +4621,8 @@ int qman_ceetm_channel_set_excess_rate(struct qm_ceetm_channel *channel,
pr_err("Fail to get the current channel shaper setting\n");
return -EINVAL;
}
-
+
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->er_token_rate.whole = token_rate->whole;
channel->er_token_rate.fraction = token_rate->fraction;
channel->er_token_bucket_limit = token_limit;
@@ -4618,7 +4706,7 @@ int qman_ceetm_channel_set_weight(struct qm_ceetm_channel *channel,
pr_err("This channel is a shaped one\n");
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->cr_token_bucket_limit = token_limit;
config_opts.cid = cpu_to_be16(CEETM_COMMAND_CHANNEL_SHAPER |
channel->idx);
@@ -4668,7 +4756,7 @@ int qman_ceetm_channel_set_group(struct qm_ceetm_channel *channel, int group_b,
pr_err("Can't query channel#%d's scheduler!\n", channel->idx);
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cqcid = cpu_to_be16(channel->idx);
config_opts.dcpid = channel->dcp_idx;
config_opts.gpc_combine_flag = !group_b;
@@ -4759,6 +4847,7 @@ int qman_ceetm_channel_set_group_er_eligibility(struct qm_ceetm_channel
channel->idx);
return -EINVAL;
}
+ memset(&csch_config, 0, sizeof(struct qm_mcc_ceetm_class_scheduler_config));
csch_config.cqcid = cpu_to_be16(channel->idx);
csch_config.dcpid = channel->dcp_idx;
csch_config.gpc_combine_flag = csch_query.gpc_combine_flag;
@@ -4806,6 +4895,7 @@ int qman_ceetm_channel_set_cq_cr_eligibility(struct qm_ceetm_channel *channel,
channel->idx);
return -EINVAL;
}
+ memset(&csch_config, 0, sizeof(struct qm_mcc_ceetm_class_scheduler_config));
csch_config.cqcid = cpu_to_be16(channel->idx);
csch_config.dcpid = channel->dcp_idx;
csch_config.gpc_combine_flag = csch_query.gpc_combine_flag;
@@ -4889,7 +4979,7 @@ int qman_ceetm_cq_claim(struct qm_ceetm_cq **cq,
pr_err("Can't allocate memory for CQ#%d!\n", idx);
return -ENOMEM;
}
-
+ memset(&cq_config, 0, sizeof(struct qm_mcc_ceetm_cq_config));
list_add_tail(&p->node, &channel->class_queues);
p->idx = idx;
p->is_claimed = 1;
@@ -4938,7 +5028,7 @@ int qman_ceetm_cq_claim_A(struct qm_ceetm_cq **cq,
pr_err("Can't allocate memory for CQ#%d!\n", idx);
return -ENOMEM;
}
-
+ memset(&cq_config, 0, sizeof(struct qm_mcc_ceetm_cq_config));
list_add_tail(&p->node, &channel->class_queues);
p->idx = idx;
p->is_claimed = 1;
@@ -4986,7 +5076,7 @@ int qman_ceetm_cq_claim_B(struct qm_ceetm_cq **cq,
pr_err("Can't allocate memory for CQ#%d!\n", idx);
return -ENOMEM;
}
-
+ memset(&cq_config, 0, sizeof(struct qm_mcc_ceetm_cq_config));
list_add_tail(&p->node, &channel->class_queues);
p->idx = idx;
p->is_claimed = 1;
@@ -5040,7 +5130,7 @@ int qman_ceetm_set_queue_weight(struct qm_ceetm_cq *cq,
cq->parent->idx);
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_class_scheduler_config));
config_opts.cqcid = cpu_to_be16(cq->parent->idx);
config_opts.dcpid = cq->parent->dcp_idx;
config_opts.crem = query_result.crem;
@@ -5257,6 +5347,7 @@ int qman_ceetm_lfq_claim(struct qm_ceetm_lfq **lfq,
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
+ memset(&lfqmt_config, 0, sizeof(struct qm_mcc_ceetm_lfqmt_config));
p->idx = lfqid;
p->dctidx = (u16)(lfqid & CEETM_LFQMT_LFQID_LSB);
p->parent = cq->parent;

View File

@@ -0,0 +1,28 @@
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 6d4e3d1b1111..7c0d4c2e2222 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -65,6 +65,23 @@ void brioctl_set(int (*hook)(struct net *net, unsigned int cmd,
void __user *uarg));
int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg);
+#if defined(CONFIG_CPE_FAST_PATH)
+struct brevent_fdb_update {
+ char *mac_addr;
+ struct net_device *dev;
+ struct net_device *brdev;
+};
+
+enum brevent_notif_type {
+ BREVENT_PORT_DOWN = 1, /* arg is struct net_device ptr */
+ BREVENT_FDB_UPDATE /* arg is struct brevent_fdb_update ptr */
+};
+
+int register_brevent_notifier(struct notifier_block *nb);
+int unregister_brevent_notifier(struct notifier_block *nb);
+int call_brevent_notifiers(unsigned long val, void *v);
+#endif
+
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);

View File

@@ -0,0 +1,76 @@
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0a14daa..ff8a1ad 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -196,6 +196,12 @@ struct xfrm_state {
struct hlist_node bysrc;
};
struct hlist_node byspi;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ struct hlist_node byh;
+ u16 handle;
+ u16 in_byh_hash;
+ u16 parent_sa_handle; /* handle of the old SA from which this SA is created using rekey */
+#endif
struct hlist_node byseq;
struct hlist_node state_cache;
struct hlist_node state_cache_input;
@@ -314,6 +320,11 @@ struct xfrm_state {
/* Private data of this transformer, format is opaque,
* interpreted by xfrm_type methods. */
void *data;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* Intended direction of this state, used for offloading */
+ int offloaded;
+ u64 curr_time;
+#endif
u8 dir;
const struct xfrm_mode_cbs *mode_cbs;
@@ -337,6 +348,13 @@ enum {
XFRM_STATE_EXPIRED,
XFRM_STATE_DEAD
};
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+enum {
+ XFRM_STATE_DIR_UNKNOWN,
+ XFRM_STATE_DIR_IN,
+ XFRM_STATE_DIR_OUT,
+};
+#endif
/* callback structure passed from either netlink or pfkey */
struct km_event {
@@ -1173,6 +1191,32 @@ struct sec_path {
struct sec_path *secpath_set(struct sk_buff *skb);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+struct xfrm_input_shared {
+ struct sk_buff *skb;
+ int xfrm_nr, first, xfrm_encap;
+ struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
+ __u16 encap_type;
+ int decaps;
+ u32 seq, spi;
+ unsigned int nhoff;
+ int nexthdr;
+ int (*callback)(struct xfrm_input_shared *sh);
+ atomic_t refcnt;
+};
+
+static inline void xfrm_shared_get(struct xfrm_input_shared *sh)
+{
+ atomic_inc(&sh->refcnt);
+}
+
+static inline void xfrm_shared_put(struct xfrm_input_shared *sh)
+{
+ if (atomic_dec_and_test(&sh->refcnt))
+ kfree(sh);
+}
+#endif
+
static inline void
secpath_reset(struct sk_buff *skb)
{

View File

@@ -0,0 +1,36 @@
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 43233af..2e401ae 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -58,6 +58,10 @@ enum ctattr_type {
CTA_FILTER,
CTA_STATUS_MASK,
CTA_TIMESTAMP_EVENT,
+ CTA_LAYERSCAPE_FP_ORIG,
+ CTA_LAYERSCAPE_FP_REPLY,
+ CTA_QOSCONNMARK,
+ CTA_QOSCONNMARK_PAD,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
@@ -243,6 +247,20 @@ enum ctattr_secctx {
};
#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1)
+enum ctattr_comcerto_fp {
+ CTA_COMCERTO_FP_UNSPEC,
+ CTA_COMCERTO_FP_MARK,
+ CTA_COMCERTO_FP_IFINDEX,
+ CTA_COMCERTO_FP_IIF,
+ CTA_COMCERTO_FP_UNDERLYING_IIF,
+ CTA_COMCERTO_FP_UNDERLYING_VID,
+#ifndef IPSEC_FLOW_CACHE
+ CTA_COMCERTO_FP_XFRM_HANDLE,
+#endif
+ __CTA_COMCERTO_FP_MAX
+};
+#define CTA_COMCERTO_FP_MAX (__CTA_COMCERTO_FP_MAX - 1)
+
enum ctattr_stats_cpu {
CTA_STATS_UNSPEC,
CTA_STATS_SEARCHED, /* no longer used */

View File

@@ -0,0 +1,28 @@
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9a3d6f2b8c1e..b4f7b2c1d9aa 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -620,6 +620,11 @@ struct br_input_skb_cb {
#endif
u32 backup_nhid;
+
+#ifdef CONFIG_CPE_FAST_PATH
+ u16 vid;
+ u8 untagged:1;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -859,6 +864,11 @@ int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags);
+#if defined(CONFIG_CPE_FAST_PATH)
+extern void br_fdb_register_can_expire_cb(int(*cb)(unsigned char *mac_addr, struct net_device *dev));
+extern void br_fdb_deregister_can_expire_cb(void);
+#endif
+
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid,
bool *notified, struct netlink_ext_ack *extack);

View File

@@ -0,0 +1,131 @@
diff --git a/net/core/dev.c b/net/core/dev.c
index 2acfa44..02e304b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,6 +176,10 @@ static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
+#if defined(CONFIG_CPE_FAST_PATH)
+static fp_iface_stats_get fast_path_stats_get;
+#endif
+
static DEFINE_MUTEX(ifalias_mutex);
/* protects napi_hash addition/deletion and napi_gen_id */
@@ -4002,9 +4006,15 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
skb = segs;
}
} else {
- if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb))
- goto out_kfree_skb;
+ /* Linearize only if IPsec policy is not selected. */
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (!skb->ipsec_offload)
+#endif
+ {
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto out_kfree_skb;
+ }
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
@@ -4792,6 +4802,40 @@ out:
}
EXPORT_SYMBOL(__dev_queue_xmit);
+#if defined(CONFIG_CPE_FAST_PATH)
+/* WiFi IPsec offload hook - allows cdx to intercept packets for IPsec
+ * processing when the packet is transmitted on a wifi interface.
+ */
+dpaa_wifi_xmit_local_hook_t dpaa_wifi_xmit_local_ipsec_handler;
+EXPORT_SYMBOL(dpaa_wifi_xmit_local_ipsec_handler);
+
+/* Register a hook function for IPsec offload on wifi interfaces. */
+int dpa_register_wifi_xmit_local_hook(dpaa_wifi_xmit_local_hook_t hookfn)
+{
+ if (dpaa_wifi_xmit_local_ipsec_handler) {
+ pr_warn("%s: hook already registered\n", __func__);
+ return -1;
+ }
+ dpaa_wifi_xmit_local_ipsec_handler = hookfn;
+ return 0;
+}
+EXPORT_SYMBOL(dpa_register_wifi_xmit_local_hook);
+
+/* Unregister the IPsec offload hook. */
+void dpa_unregister_wifi_xmit_local_hook(void)
+{
+ dpaa_wifi_xmit_local_ipsec_handler = NULL;
+}
+EXPORT_SYMBOL(dpa_unregister_wifi_xmit_local_hook);
+
+/* Original dev_queue_xmit - called when wifi hook is not applicable. */
+int original_dev_queue_xmit(struct sk_buff *skb)
+{
+ return __dev_queue_xmit(skb, NULL);
+}
+EXPORT_SYMBOL(original_dev_queue_xmit);
+#endif
+
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
struct net_device *dev = skb->dev;
@@ -5862,6 +5906,15 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
trace_netif_receive_skb(skb);
+#ifdef CONFIG_CPE_FAST_PATH
+ /* ifindex of device we arrived on, now skb->skb_iif
+ * always tracks skb->dev.
+ */
+ if (!skb->iif_index)
+ skb->iif_index = skb->dev->ifindex;
+ if (!skb->underlying_iif)
+ skb->underlying_iif = skb->dev->ifindex;
+#endif
orig_dev = skb->dev;
skb_reset_network_header(skb);
@@ -7627,9 +7680,9 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
return work;
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush_normal(&n->gro, HZ >= 1000);
+ gro_flush(&n->gro, HZ >= 1000);
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
*/
@@ -11762,10 +11812,28 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
}
}
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (fast_path_stats_get)
+ fast_path_stats_get(dev, storage);
+#endif
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
+#if defined(CONFIG_CPE_FAST_PATH)
+void dev_fp_stats_get_register(fp_iface_stats_get func)
+{
+ fast_path_stats_get = func;
+}
+EXPORT_SYMBOL(dev_fp_stats_get_register);
+
+void dev_fp_stats_get_deregister(void)
+{
+ fast_path_stats_get = NULL;
+}
+EXPORT_SYMBOL(dev_fp_stats_get_deregister);
+#endif
+
/**
* dev_fetch_sw_netstats - get per-cpu network device statistics
* @s: place to store stats

View File

@@ -0,0 +1,79 @@
diff -uNr a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
--- a/net/ipv4/ip_output.c 2026-05-08 18:06:42.017639432 +0000
+++ b/net/ipv4/ip_output.c 2026-05-08 18:06:42.100831810 +0000
@@ -103,6 +103,17 @@
{
struct iphdr *iph = ip_hdr(skb);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ /*
+ * The tunnel header is not added in slow path and packet may be ipv6
+ * (IPv6 traffic and IPv4 IPSec tunnel) in case IPv6 over IPv4 IPsec
+ * tunnel. When it assumes as IPv4 and accessing IP header from SKB
+ * causing invalid accesses it leading to kernel panic.
+ * Avoiding ip header checks.
+ */
+ if ((skb->ipsec_offload) && (iph->version == 6))
+ goto sendout;
+#endif /* endif for CONFIG_INET_IPSEC_OFFLOAD */
IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
iph_set_totlen(iph, skb->len);
@@ -115,8 +126,17 @@
if (unlikely(!skb))
return 0;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+sendout:
+#endif
skb->protocol = htons(ETH_P_IP);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ if (skb->ipsec_offload) {
+ dst_output(net, sk, skb);
+ return 0;
+ } else
+#endif /* endif for CONFIG_INET_IPSEC_OFFLOAD */
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst_dev(skb),
dst_output);
@@ -309,7 +329,11 @@
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu || IPCB(skb)->frag_max_size)
+ if (
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ (skb->ipsec_offload == 0) &&
+#endif
+ (skb->len > mtu || IPCB(skb)->frag_max_size))
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
@@ -435,6 +459,16 @@
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ /* Bypass invoking post routing hooks since the tunnel header and ESP
+ * processing is not done in slow path for IPSec offloaded cases
+ */
+ if (skb->ipsec_offload) {
+ ret_val = ip_finish_output(net, sk, skb);
+ rcu_read_unlock();
+ return ret_val;
+ }
+#endif
ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, skb, indev, dev,
ip_finish_output,
@@ -559,6 +593,9 @@
skb_dst_copy(to, from);
to->dev = from->dev;
to->mark = from->mark;
+#if defined(CONFIG_CPE_FAST_PATH)
+ to->qosmark = from->qosmark;
+#endif
skb_copy_hash(to, from);

View File

@@ -0,0 +1,73 @@
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f904739..e26e743 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -80,6 +80,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
hdr = ipv6_hdr(skb);
daddr = &hdr->daddr;
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* For IPv4 over IPv6 IPsec tunnel cases, just send the packet out
+ * since the packet is IPv4
+ */
+ if((skb->ipsec_offload) && (hdr->version == 4))
+ goto sendout;
+#endif /* endif for CONFIG_INET6_IPSEC_OFFLOAD */
if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
@@ -111,6 +118,9 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
}
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+sendout:
+#endif /* endif for CONFIG_INET6_IPSEC_OFFLOAD */
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
@@ -202,8 +212,15 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
if (skb_is_gso(skb))
return ip6_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu ||
- (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
+ if (
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* If ipsec offload is there, do not do fragment. So, when IPSec
+ * offload is enabled it directly calls ip6_finish_output2
+ */
+ (skb->ipsec_offload == 0) &&
+#endif
+ ((skb->len > mtu) ||
+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
return ip6_finish_output2(net, sk, skb);
@@ -244,6 +261,16 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return 0;
}
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* Bypass invoking post routing hooks since the tunnel header and ESP
+ * processing is not done in slow path for IPSec offloaded cases
+ */
+ if (skb->ipsec_offload) {
+ ret = ip6_finish_output(net, sk, skb);
+ rcu_read_unlock();
+ return ret;
+ }
+#endif
ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
net, sk, skb, indev, dev,
ip6_finish_output,
@@ -697,6 +724,9 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_dst_set(to, dst_clone(skb_dst(from)));
to->dev = from->dev;
to->mark = from->mark;
+#if defined(CONFIG_CPE_FAST_PATH)
+ to->qosmark = from->qosmark;
+#endif
skb_copy_hash(to, from);

View File

@@ -0,0 +1,38 @@
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 1c9b283..5682505 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -130,7 +130,19 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
len = skb->len - sizeof(struct ipv6hdr);
if (len > IPV6_MAXPLEN)
len = 0;
+
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /*
+ * Since tunnel header is not added in slow path and route cache entry
+ * lookup is changed to perform lookup on tunnel header, dst_output()
+ * may reach IPv6 output for IPv4-over-IPv6 IPsec tunnel packets. Only
+ * update the IPv6 payload length when the skb really carries IPv6 here.
+ */
+ if (ipv6_hdr(skb)->version == 6)
+ ipv6_hdr(skb)->payload_len = htons(len);
+#else
ipv6_hdr(skb)->payload_len = htons(len);
+#endif
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
/* if egress device is enslaved to an L3 master device pass the
@@ -142,6 +154,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (skb->ipsec_offload) {
+ dst_output(net, sk, skb);
+ return 0;
+ }
+#endif
+
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst_dev(skb),
dst_output);

View File

@@ -0,0 +1,949 @@
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c56bb4f451e6..7225be6880e1 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -26,8 +26,184 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/xfrm.h>
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)|| defined(CONFIG_INET6_IPSEC_OFFLOAD)
+#include <net/netlink.h>
+#endif
#include <net/sock.h>
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)|| defined(CONFIG_INET6_IPSEC_OFFLOAD)
+#include <net/ip6_route.h>
+#define NLKEY_SUPPORT 1
+#else
+#undef NLKEY_SUPPORT
+#endif
+
+#ifdef NLKEY_SUPPORT
+#include <net/dsfield.h>
+#include <net/inet_dscp.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+
+extern int xfrm_get_tos(struct flowi *fl, int family);
+
+
+#define NLKEY_SA_CREATE 0x0A01
+#define NLKEY_SA_DELETE 0x0A02
+#define NLKEY_SA_FLUSH 0x0A03
+#define NLKEY_SA_SET_KEYS 0x0A04
+#define NLKEY_SA_SET_TUNNEL 0x0A05
+#define NLKEY_SA_SET_NATT 0x0A06
+#define NLKEY_SA_SET_STATE 0x0A07
+#define NLKEY_SA_SET_LIFETIME 0x0A08
+#define NLKEY_SA_NOTIFY 0x0A09
+#define NLKEY_SA_INFO_UPDATE 0x0A0C
+#define NLKEY_SA_SET_OFFLOAD 0x0A0D
+#define NLKEY_FLOW_ADD 0x0A11
+#define NLKEY_FLOW_REMOVE 0x0A12
+#define NLKEY_FLOW_NOTIFY 0x0A13
+#define NLKEY_NULL_MSG 0x0000
+
+#define NLKEY_HDR_LEN 4
+#define NLKEY_MSG_LEN 256
+
+#define NLKEY_MAX_NUM_KEYS 2
+#define NLKEY_MAX_KEY_LEN (512 / 8)
+
+struct nlkey_msg {
+ /* message data */
+ unsigned short fcode;
+ unsigned short length;
+ unsigned short payload[(NLKEY_MSG_LEN /sizeof(unsigned short))];
+};
+/* sizeof(nlkey_msg) = 4 + 256 */
+
+struct nlkey_sa_id {
+ unsigned int spi;
+ unsigned char sa_type;
+ unsigned char proto_family;
+ unsigned char replay_window;
+#define NLKEY_SAFLAGS_ESN 0x1
+#define NLKEY_SAFLAGS_INBOUND 0x2
+ unsigned char flags;
+ unsigned int dst_ip[4];
+ unsigned int src_ip[4];
+ unsigned short mtu;
+ unsigned short dev_mtu;
+
+};
+/* sizeof(nlkey_sa_id) = 24 */
+
+struct nlkey_sa_create {
+ unsigned short sagd;
+ unsigned short parent_sa_sagd; /*sagd value of old SA from which this SA is rekeyed.*/
+ struct nlkey_sa_id said;
+};
+/* sizeof(nlkey_sa_delete) = 28 */
+
+struct nlkey_sa_delete {
+ unsigned short sagd;
+ unsigned short rsvd;
+};
+/* sizeof(nlkey_sa_delete) = 4 */
+
+struct nlkey_sa_set_tunnel {
+ unsigned short sagd;
+ unsigned char rsvd;
+ unsigned char proto_family;
+ union {
+ struct iphdr ipv4h;
+ struct ipv6hdr ipv6h;
+ } h;
+};
+/* sizeof(nlkey_sa_set_tunnel) = 36 */
+
+struct nlkey_sa_set_natt {
+ unsigned short sagd;
+ unsigned short sport;
+ unsigned short dport;
+ unsigned short rsvd;
+};
+/* sizeof(nlkey_sa_set_natt) = 4 */
+
+struct nlkey_sa_set_state {
+ unsigned short sagd;
+ unsigned short parent_sa_sagd;
+ unsigned short state;
+ unsigned short rsvd2;
+};
+/* sizeof(nlkey_sa_set_natt) = 8 */
+
+struct nlkey_key_desc {
+ unsigned short key_bits;
+ unsigned char key_alg;
+ unsigned char key_type;
+ unsigned char key[NLKEY_MAX_KEY_LEN];
+};
+/* sizeof(nlkey_key_desc) = 36 */
+
+struct nlkey_sa_set_keys {
+ unsigned short sagd;
+ unsigned short rsvd;
+ unsigned short num_keys;
+ unsigned short rsvd2;
+ struct nlkey_key_desc keys[NLKEY_MAX_NUM_KEYS];
+};
+/* sizeof(nlkey_sa_set_keys) = 80 */
+
+struct nlkey_lifetime_desc {
+ unsigned int allocations;
+ unsigned int bytes[2];
+};
+/* sizeof(nlkey_sa_set_lifetime) = 12 */
+
+struct nlkey_sa_set_lifetime {
+ unsigned short sagd;
+ unsigned short rsvd;
+ struct nlkey_lifetime_desc hard_time;
+ struct nlkey_lifetime_desc soft_time;
+ struct nlkey_lifetime_desc current_time;
+};
+/* sizeof(nlkey_sa_set_lifetime) = 40 */
+
+/* SA notifications */
+#define IPSEC_SOFT_EXPIRE 0
+#define IPSEC_HARD_EXPIRE 1
+
+struct nlkey_sa_notify {
+ unsigned short sagd;
+ unsigned short rsvd;
+ unsigned int action;
+};
+/* sizeof(nlkey_sa_notify) = 8 */
+
+/* SA Info update */
+
+struct nlkey_sa_info {
+ unsigned short sagd;
+ unsigned short rsvd;
+ unsigned long long bytes;
+ unsigned long long packets;
+};
+/* sizeof(nlkey_sa_info) = */
+
+
+static int ipsec_nlkey_send(struct net *net, struct xfrm_state *x, const struct km_event *c);
+static void ipsec_nlkey_rcv(struct sk_buff *skb);
+static void ipsec_nlkey_init(void);
+static unsigned short ipsec_sacode_to_nlkeycode(unsigned short sa_code);
+static struct sk_buff * ipsec_xfrm2nlkey (struct net *net, struct xfrm_state *x,
+ const struct km_event *c, unsigned short *msg_id);
+static int ipsec_nlkey_set_said(struct net *net, struct xfrm_state *x, const struct km_event *c, struct nlkey_sa_id *said);
+
+void flow_cache_remove(const struct flowi *fl, unsigned short family,
+ unsigned short dir);
+/* netlink NETLINK_KEY socket */
+struct sock *nlkey_socket = NULL;
+
+#endif
+/************************************************************************************/
+
#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
@@ -876,6 +1051,10 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP;
if (x->props.flags & XFRM_STATE_NOPMTUDISC)
sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC;
+#ifdef NLKEY_SUPPORT
+ if (x->props.flags & XFRM_STATE_ESN)
+ sa->sadb_sa_flags |= SADB_SAFLAGS_ESN;
+#endif
/* hard time */
if (hsc & 2) {
@@ -908,6 +1087,11 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
lifetime->sadb_lifetime_bytes = x->curlft.bytes;
lifetime->sadb_lifetime_addtime = x->curlft.add_time;
lifetime->sadb_lifetime_usetime = x->curlft.use_time;
+
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)|| defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ lifetime->sadb_lifetime_usetime = x->curr_time;
+#endif
+
/* src address */
addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
addr->sadb_address_len =
@@ -1133,6 +1317,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
x->props.flags |= XFRM_STATE_DECAP_DSCP;
if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
x->props.flags |= XFRM_STATE_NOPMTUDISC;
+#ifdef NLKEY_SUPPORT
+ if (sa->sadb_sa_flags & SADB_SAFLAGS_ESN)
+ x->props.flags |= XFRM_STATE_ESN;
+#endif
lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1];
if (lifetime != NULL) {
@@ -3076,6 +3264,12 @@ static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c)
struct net *net = x ? xs_net(x) : c->net;
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
+
+#ifdef NLKEY_SUPPORT
+ /* send message to the user space through NETLINK_KEY socket*/
+ ipsec_nlkey_send(net, x, c);
+#endif
+
if (atomic_read(&net_pfkey->socks_nr) == 0)
return 0;
@@ -3863,6 +4057,687 @@ static struct xfrm_mgr pfkeyv2_mgr =
.is_alive = pfkey_is_alive,
};
+
+#ifdef NLKEY_SUPPORT
+extern struct xfrm_state *xfrm_state_lookup_byhandle(struct net *net, u16 handle);
+
+static unsigned short ipsec_sacode_to_nlkeycode(unsigned short sa_code)
+{
+ unsigned nlkey_code;
+
+ switch (sa_code)
+ {
+ case XFRM_MSG_DELSA:
+ nlkey_code = NLKEY_SA_DELETE;
+ break;
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ nlkey_code = NLKEY_SA_CREATE;
+ break;
+ case XFRM_MSG_FLUSHSA:
+ nlkey_code = NLKEY_SA_FLUSH;
+ break;
+ case XFRM_MSG_EXPIRE:
+ nlkey_code = NLKEY_SA_SET_STATE;
+ break;
+ default:
+ nlkey_code = NLKEY_NULL_MSG;
+ break;
+ }
+
+ return nlkey_code;
+}
+
+static void ipsec_nlkey_rcv(struct sk_buff *skb)
+{
+ struct nlmsghdr *nlh = NULL;
+ struct nlkey_msg *msg = NULL;
+ struct flowi flow;
+ unsigned short *p;
+ unsigned short family, dir;
+ struct xfrm_state *x;
+ struct nlkey_sa_notify sa_notify_msg;
+ struct nlkey_sa_info sa_info_msg;
+
+ /* extract message from skb */
+ nlh = (struct nlmsghdr *)skb->data;
+
+ msg = (struct nlkey_msg *)NLMSG_DATA(nlh);
+
+ //printk(KERN_INFO "ipsec_nlkey_rcv fcode: 0x%x length: %d bytes\n",msg->fcode,msg->length);
+
+ /* process command received from user space */
+ switch(msg->fcode)
+ {
+ case NLKEY_FLOW_REMOVE:
+ //printk(KERN_INFO "ipsec_nlkey_rcv NLKEY_FLOW_REMOVE\n");
+ p = msg->payload;
+ memcpy(&flow, p, sizeof(struct flowi)); p += sizeof(struct flowi)/2;
+ family = *p; p++;
+ dir = *p; p++;
+ flow_cache_remove(&flow, family, dir);
+ break;
+
+ case NLKEY_SA_NOTIFY:
+ //printk(KERN_INFO "ipsec_nlkey_rcv NLKEY_SA_NOTIFY\n");
+ memcpy(&sa_notify_msg, msg->payload, sizeof(struct nlkey_sa_notify));
+ x = xfrm_state_lookup_byhandle(&init_net, sa_notify_msg.sagd);
+ if (x) {
+ spin_lock(&x->lock);
+
+ if (sa_notify_msg.action) {
+ // hard expired
+ x->km.state = XFRM_STATE_EXPIRED;
+ hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL_SOFT);
+ }
+ else if (!x->km.dying) {
+ x->km.dying = 1;
+ km_state_expired(x, 0, 0);
+ }
+
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ }
+ break;
+
+ case NLKEY_SA_INFO_UPDATE:
+ memcpy(&sa_info_msg, msg->payload, sizeof(struct nlkey_sa_info));
+
+ x = xfrm_state_lookup_byhandle(&init_net,sa_info_msg.sagd);
+ if (x) {
+ spin_lock(&x->lock);
+
+ if (x->curlft.bytes != sa_info_msg.bytes)
+ x->curr_time = ktime_get_real_seconds();
+
+ x->curlft.bytes = sa_info_msg.bytes;
+ x->curlft.packets = sa_info_msg.packets;
+
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ }
+ break;
+
+ case NLKEY_SA_SET_OFFLOAD:
+ memcpy(&sa_notify_msg, msg->payload, sizeof(struct nlkey_sa_notify));
+ x = xfrm_state_lookup_byhandle(&init_net,sa_notify_msg.sagd);
+ if (x) {
+ spin_lock(&x->lock);
+ if(sa_notify_msg.action)
+ x->offloaded = 1;
+ else
+ x->offloaded = 0;
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ }
+ break;
+ default:
+ //printk(KERN_INFO "ipsec_nlkey_rcv fcode 0x%x not supported\n", msg->fcode);
+ break;
+ }
+
+}
+static int ipsec_nlkey_set_said(struct net *net, struct xfrm_state *x,
+ const struct km_event *c, struct nlkey_sa_id *said)
+{
+
+ struct flowi fl;
+ int tos;
+ xfrm_address_t saddr, daddr;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ int rc = 0;
+ int oif = 0;
+
+ memset(&fl, 0, sizeof(struct flowi));
+
+ /* SPI */
+ said->spi = x->id.spi;
+ /* SA Type (AH or ESP) */
+ said->sa_type = x->id.proto;
+ /* Protocol Family (IPv4 or IPv6) */
+ said->proto_family = x->props.family;
+ /* Replay window */
+ said->replay_window = x->props.replay_window;
+ /* Destination IP Address */
+ if(x->props.family == AF_INET6) {
+ memcpy(&said->dst_ip, x->id.daddr.a6, sizeof(struct in6_addr));
+ fl.u.ip6.daddr = *(struct in6_addr *)x->id.daddr.a6;
+ memcpy(&said->src_ip, x->props.saddr.a6, sizeof(struct in6_addr));
+ }
+ else {
+ said->dst_ip[0] = x->id.daddr.a4;
+ fl.u.ip4.daddr = x->id.daddr.a4;
+ said->src_ip[0] = x->props.saddr.a4;
+ }
+ said->mtu = 0;
+
+ if(x->props.flags & XFRM_STATE_ESN)
+ said->flags = NLKEY_SAFLAGS_ESN;
+ xfrm_flowi_addr_get(&fl, &saddr, &daddr, x->props.family);
+
+ tos = xfrm_get_tos(&fl, x->props.family);
+ if (tos < 0) {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+
+ switch (x->props.family)
+ {
+ case AF_INET:
+ if (!__ip_route_output_key(net, &(fl.u.ip4)))
+ {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+ oif = fl.u.ip4.flowi4_oif;
+ break;
+
+ case AF_INET6:
+ rt = rt6_lookup(net, &fl.u.ip6.daddr, NULL, 0, NULL, 0);
+ if ((!rt) || (!rt->dst.dev))
+ {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+ oif = rt->dst.dev->ifindex;
+ break;
+ }
+
+ {
+ struct xfrm_dst_lookup_params params = {
+ .net = net,
+ .dscp = inet_dsfield_to_dscp(tos),
+ .oif = oif,
+ .saddr = NULL,
+ .daddr = &daddr,
+ .mark = xfrm_smark_get(0, x),
+ };
+ dst = __xfrm_dst_lookup(x->props.family, &params);
+ }
+ if (IS_ERR(dst)) {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+
+ if (strcmp(dst->dev->name, "lo") == 0)
+ said->flags |= NLKEY_SAFLAGS_INBOUND;
+
+ said->dev_mtu = dst_mtu(dst);
+ said->mtu = xfrm_state_mtu(x,dst_mtu(dst));
+
+ dst_release(dst);
+error:
+ return rc;
+}
+
+static struct sk_buff * ipsec_xfrm2nlkey (struct net *net, struct xfrm_state *x,
+ const struct km_event *c, unsigned short *msg_id)
+{
+ struct nlkey_sa_id sa_id_msg;
+ struct nlkey_sa_create sa_create_msg;
+ struct nlkey_sa_delete sa_delete_msg;
+ struct nlkey_sa_set_keys sa_set_keys_msg;
+ struct nlkey_sa_set_tunnel sa_set_tunnel_msg;
+ struct nlkey_sa_set_natt sa_set_natt_msg;
+ struct nlkey_sa_set_state sa_set_state_msg;
+ struct nlkey_sa_set_lifetime sa_set_lifetime_msg;
+ struct nlkey_msg msg;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh = NULL;
+ gfp_t allocation = GFP_ATOMIC; //This may called from atomic context
+ unsigned char tunnel, keys, natt, state, lifetime;
+
+ /* supported SA informations */
+ keys = 1; state = 1; tunnel = 1; lifetime = 1; natt = 1;
+
+ /* next message to build */
+ memset(&msg, 0, sizeof(struct nlkey_msg));
+ msg.fcode = *msg_id;
+
+ //printk(KERN_INFO "\n\nipsec_xfrm2nlkey: processing event 0x%x\n", msg.fcode);
+
+ switch (msg.fcode)
+ {
+ case NLKEY_SA_CREATE:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_CREATE\n");
+ if(x) {
+ /* some check before builing message */
+ if(x->id.proto != IPPROTO_ESP) {
+ printk(KERN_ERR "protocol %d not supported in fast path.\n", x->id.proto);
+ *msg_id = NLKEY_NULL_MSG;
+ goto exit;
+ }
+
+ memset(&sa_create_msg, 0, sizeof(struct nlkey_sa_create));
+
+ /* SA global handler */
+ sa_create_msg.sagd = x->handle;
+
+ sa_create_msg.parent_sa_sagd = x->parent_sa_handle;
+
+ /* SA identifier */
+ if(ipsec_nlkey_set_said(net, x, c, &sa_create_msg.said) < 0)
+ {
+ printk(KERN_ERR "%s: set sa ID failed\n", __func__);
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ goto exit;
+ }
+ memcpy(msg.payload, &sa_create_msg, sizeof(struct nlkey_sa_create));
+ msg.length = sizeof(struct nlkey_sa_create);
+ *msg_id = NLKEY_SA_SET_KEYS; /* next message */
+ } else {
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ goto exit;
+ }
+
+ break;
+
+ case NLKEY_SA_SET_KEYS:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_KEYS\n");
+ if(keys) {
+ memset(&sa_set_keys_msg, 0, sizeof(struct nlkey_sa_set_keys));
+
+ /* SA global handler */
+ sa_set_keys_msg.sagd = x->handle;
+
+ /* auth key */
+ if(x->aalg) {
+ if (x->aalg->alg_key_len) {
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits = x->aalg->alg_key_len;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = x->props.aalgo;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_type = 0;
+ memcpy(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key, x->aalg->alg_key,(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits / 8));
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: AUTH - algo %d key %d bits\n", sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits);
+ sa_set_keys_msg.num_keys++;
+ }
+ }
+ /* encrypt key */
+ if(x->ealg) {
+ if (x->ealg->alg_key_len) {
+
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits = x->ealg->alg_key_len;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = x->props.ealgo;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_type = 1;
+ memcpy(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key, x->ealg->alg_key,(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits / 8));
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: ENCRYPT - algo %d key %d bits\n", sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits);
+ sa_set_keys_msg.num_keys++;
+ }
+ }
+ /* combined key */
+ if (x->aead) {
+ if (x->aead->alg_key_len) {
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits = x->aead->alg_key_len;
+ if (strstr(x->aead->alg_name, "rfc4106(gcm")) /* AES GCM support */
+ {
+ if (x->aead->alg_icv_len == 64)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_GCM_ICV8;
+ else if (x->aead->alg_icv_len == 96)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_GCM_ICV12;
+ else if (x->aead->alg_icv_len == 128)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_GCM_ICV16;
+ }
+ else if (strstr(x->aead->alg_name, "ccm")) /* AES CCM */
+ {
+ if (x->aead->alg_icv_len == 64)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_CCM_ICV8;
+ else if (x->aead->alg_icv_len == 96)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_CCM_ICV12;
+ else if (x->aead->alg_icv_len == 128)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_CCM_ICV16;
+ }
+ else if (strstr(x->aead->alg_name, "rfc4543(gcm")) /* AES GMAC defined in RFC 4543 derived from AES GCM */
+ {
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_NULL_AES_GMAC;
+ }
+
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_type = 1;
+ memcpy(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key, x->aead->alg_key,(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits/ 8));
+ /*
+ printk(KERN_INFO "ipsec_xfrm2nlkey: ENCRYPT -alg name %s algo %d key %d bits\n",
+ x->aead->alg_name, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits);
+ */
+ sa_set_keys_msg.num_keys++;
+ }
+ }
+
+ memcpy(msg.payload, &sa_set_keys_msg, sizeof(struct nlkey_sa_set_keys));
+ msg.length = sizeof(struct nlkey_sa_set_keys);
+ *msg_id = NLKEY_SA_SET_TUNNEL; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_TUNNEL; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_TUNNEL:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_TUNNEL\n");
+ if(tunnel && (x->props.mode == XFRM_MODE_TUNNEL)) {
+ memset(&sa_set_tunnel_msg, 0, sizeof(struct nlkey_sa_set_tunnel));
+
+ /* SA global handler */
+ sa_set_tunnel_msg.sagd = x->handle;
+
+ /* Tunnel */
+ sa_set_tunnel_msg.proto_family = x->props.family;
+ if(x->props.family == AF_INET6) {
+ struct ipv6hdr *top_iph = &sa_set_tunnel_msg.h.ipv6h;
+ int dsfield;
+ top_iph->version = 6;
+ top_iph->priority = 0;
+ top_iph->flow_lbl[0] = 0;
+ top_iph->flow_lbl[1] = 0;
+ top_iph->flow_lbl[2] = 0;
+ top_iph->nexthdr = IPPROTO_IPIP;
+ dsfield = ipv6_get_dsfield(top_iph);
+ dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+ if (x->props.flags & XFRM_STATE_NOECN)
+ dsfield &= ~INET_ECN_MASK;
+ ipv6_change_dsfield(top_iph, 0, dsfield);
+ top_iph->hop_limit = 64;
+ memcpy(&top_iph->daddr, x->id.daddr.a6, sizeof(struct in6_addr));
+ memcpy(&top_iph->saddr, x->props.saddr.a6, sizeof(struct in6_addr));
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: IPv6 tunnel\n");
+ //printk(KERN_INFO "dst: %x %x %x %x\n", x->id.daddr.a6[0], x->id.daddr.a6[1], x->id.daddr.a6[2], x->id.daddr.a6[3]);
+ //(KERN_INFO "src: %x %x %x %x\n", x->props.saddr.a6[0], x->props.saddr.a6[1], x->props.saddr.a6[2], x->props.saddr.a6[3]);
+ }
+ else {
+ struct iphdr *top_iph = &sa_set_tunnel_msg.h.ipv4h;
+ top_iph->ihl = 5;
+ top_iph->version = 4;
+ top_iph->tos = 0;
+ top_iph->frag_off = 0;
+ top_iph->ttl = 64;
+ top_iph->saddr = x->props.saddr.a4;
+ top_iph->daddr = x->id.daddr.a4;
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: IPv4 tunnel dst:%x - src:%x \n", x->id.daddr.a4, x->props.saddr.a4);
+ }
+ memcpy(msg.payload, &sa_set_tunnel_msg, sizeof(struct nlkey_sa_set_tunnel));
+ msg.length = sizeof(struct nlkey_sa_set_tunnel);
+ *msg_id = NLKEY_SA_SET_NATT; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_NATT; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_NATT:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_NATT\n");
+ if((natt) && (x->encap)){
+ memset(&sa_set_natt_msg, 0, sizeof(struct nlkey_sa_set_natt));
+
+ /* SA global handler */
+ sa_set_natt_msg.sagd = x->handle;
+ sa_set_natt_msg.sport = x->encap->encap_sport;
+ sa_set_natt_msg.dport = x->encap->encap_dport;
+ //printk(KERN_INFO "src port: %d dst port: %d \n", ntohs(sa_set_natt_msg.sport), ntohs( sa_set_natt_msg.dport));
+ memcpy(msg.payload, &sa_set_natt_msg, sizeof(struct nlkey_sa_set_natt));
+ msg.length = sizeof(struct nlkey_sa_set_natt);
+ *msg_id = NLKEY_SA_SET_LIFETIME; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_LIFETIME; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_LIFETIME:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_LIFETIME\n");
+ if(lifetime) {
+ memset(&sa_set_lifetime_msg, 0, sizeof(struct nlkey_sa_set_lifetime));
+
+ /* SA global handler */
+ sa_set_lifetime_msg.sagd = x->handle;
+
+ /* hard time */
+ sa_set_lifetime_msg.hard_time.allocations = _X2KEY(x->lft.hard_packet_limit);
+ if(_X2KEY(x->lft.hard_byte_limit))
+ memcpy(sa_set_lifetime_msg.hard_time.bytes, &x->lft.hard_byte_limit, sizeof(uint64_t));
+
+ /* soft time */
+ sa_set_lifetime_msg.soft_time.allocations = _X2KEY(x->lft.soft_packet_limit);
+ if(_X2KEY(x->lft.soft_byte_limit))
+ memcpy(sa_set_lifetime_msg.soft_time.bytes, &x->lft.soft_byte_limit, sizeof(uint64_t));
+
+ /* current time */
+ sa_set_lifetime_msg.current_time.allocations = x->curlft.packets;
+ memcpy(sa_set_lifetime_msg.current_time.bytes, &x->curlft.bytes, sizeof(uint64_t));
+
+ memcpy(msg.payload, &sa_set_lifetime_msg, sizeof(struct nlkey_sa_set_lifetime));
+ msg.length = sizeof(struct nlkey_sa_set_lifetime);
+ *msg_id = NLKEY_SA_SET_STATE; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_STATE; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_STATE:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SET_STATE\n");
+ if(state) {
+ memset(&sa_set_state_msg, 0, sizeof(struct nlkey_sa_set_state));
+ memset(&sa_id_msg, 0, sizeof(struct nlkey_sa_id));
+
+ /* SA global handler */
+ sa_set_state_msg.sagd = x->handle;
+ sa_set_state_msg.parent_sa_sagd = x->parent_sa_handle;
+ /* State */
+ sa_set_state_msg.state = x->km.state;
+ // TODO: set the offloaded state once ack received !
+
+ memcpy(msg.payload, &sa_set_state_msg, sizeof(struct nlkey_sa_set_state));
+ msg.length = sizeof(struct nlkey_sa_set_state);
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ } else {
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_DELETE:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_DELETE\n");
+ memset(&sa_delete_msg, 0, sizeof(struct nlkey_sa_delete));
+
+ /* SA global handler */
+ sa_delete_msg.sagd = x->handle;
+ memcpy(msg.payload, &sa_delete_msg, sizeof(struct nlkey_sa_delete));
+ msg.length = sizeof(struct nlkey_sa_delete);
+
+
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ break;
+
+ case NLKEY_SA_FLUSH:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_FLUSH\n");
+ /* No data required for flush SA command */
+
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ break;
+
+ default:
+ printk(KERN_ERR "ipsec_xfrm2nlkey: event 0x%x not supported\n", c->event);
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ break;
+ }
+
+ /* prepare netlink message for kernel to user space direction */
+ if(msg.length > NLKEY_MSG_LEN)
+ {
+ printk(KERN_ERR "ipsec_xfrm2nlkey: maximum message size reached (%d bytes)\n", msg.length);
+ goto exit;
+ }
+
+ skb = alloc_skb(NLMSG_SPACE(NLKEY_MSG_LEN + NLKEY_HDR_LEN), allocation);
+ if (skb == NULL)
+ goto exit;
+
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(NLKEY_HDR_LEN + msg.length));
+ memcpy(NLMSG_DATA(nlh), (unsigned char *)&msg, (NLKEY_HDR_LEN + msg.length));
+
+ /* whole length of the message i.e. header + payload */
+ nlh->nlmsg_len = NLMSG_SPACE(NLKEY_HDR_LEN + msg.length);
+
+ /* from kernel */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_flags = 0;
+ nlh->nlmsg_type = 0;
+ NETLINK_CB(skb).portid = 0;
+ NETLINK_CB(skb).dst_group = 1;
+exit:
+ return skb;
+}
+
+static int ipsec_nlkey_send(struct net *net, struct xfrm_state *x, const struct km_event *c)
+{
+ struct sk_buff *skb;
+ unsigned short msg_type;
+ int rc = 0;
+
+ /* We may generate more than one message when adding new SA (sa_create + sa_set_state + sa_set_tunnel...) */
+ msg_type = ipsec_sacode_to_nlkeycode((unsigned short)c->event);
+
+ while(msg_type != NLKEY_NULL_MSG)
+ {
+ /* build nlkey message */
+ skb = ipsec_xfrm2nlkey(net, x, c, &msg_type);
+
+ if(skb != NULL)
+ if((rc = netlink_broadcast(nlkey_socket, skb, 0, 1, GFP_ATOMIC)) < 0)
+ return rc;
+ }
+
+ return rc;
+}
+
+
+int ipsec_nlkey_flow(u16 xfrm_nr, u16 *xfrm_handle, const struct flowi *fl, u16 family, u16 dir, u16 ignore_neigh)
+{
+ struct sk_buff *skb;
+ struct nlkey_msg msg;
+ struct nlmsghdr *nlh = NULL;
+ unsigned short *p;
+ gfp_t allocation = GFP_ATOMIC; //This may called from atomic context
+
+ //printk(KERN_INFO "ipsec_nlkey_flow \n");
+
+ /* next message to build */
+ memset(&msg, 0, sizeof(struct nlkey_msg));
+ msg.fcode = NLKEY_FLOW_ADD;
+
+ // Number of SA for this flow
+ p = msg.payload;
+ *p++ = xfrm_nr;
+ msg.length += sizeof(unsigned short);
+ // SA handles list
+ memcpy(p, xfrm_handle, xfrm_nr*sizeof(unsigned short));
+ msg.length += xfrm_nr*sizeof(unsigned short);
+ p+=xfrm_nr;
+ // flow family
+ *p++ = family;
+ msg.length += sizeof(unsigned short);
+ // flow family
+ *p++ = dir;
+ msg.length += sizeof(unsigned short);
+ // flow mode
+ *p++ = ignore_neigh;
+ msg.length += sizeof(unsigned short);
+ // flow descriptor
+ memcpy(p, fl, sizeof(struct flowi));
+ msg.length +=sizeof(struct flowi);
+ p+=sizeof(struct flowi) / sizeof(u16);
+
+ skb = alloc_skb(NLMSG_SPACE(NLKEY_MSG_LEN + NLKEY_HDR_LEN), allocation);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ /* prepare netlink message for kernel to user space direction */
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(NLKEY_HDR_LEN + msg.length));
+ memcpy(NLMSG_DATA(nlh), (unsigned char *)&msg, (NLKEY_HDR_LEN + msg.length));
+
+ /* whole length of the message i.e. header + payload */
+ nlh->nlmsg_len = NLMSG_SPACE(NLKEY_HDR_LEN + msg.length);
+
+ /* from kernel */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_flags = 0;
+ nlh->nlmsg_type = 0;
+ NETLINK_CB(skb).portid = 0;
+ NETLINK_CB(skb).dst_group = 1;
+
+ return(netlink_broadcast(nlkey_socket, skb, 0, 1, allocation));
+}
+EXPORT_SYMBOL(ipsec_nlkey_flow);
+
+
+int ipsec_nlkey_flow_remove(struct flowi *fl, u16 family, u16 dir)
+{
+ struct sk_buff *skb;
+ struct nlkey_msg msg;
+ struct nlmsghdr *nlh = NULL;
+ unsigned short *p;
+ gfp_t allocation = GFP_ATOMIC; //This may called from atomic context
+
+
+ //printk(KERN_INFO "ipsec_nlkey_flow_remove\n");
+
+ /* next message to build */
+ memset(&msg, 0, sizeof(struct nlkey_msg));
+ msg.fcode = NLKEY_FLOW_REMOVE;
+
+ p = msg.payload;
+ // flow family
+ *p++ = family;
+ msg.length += sizeof(unsigned short);
+ // flow family
+ *p++ = dir;
+ msg.length += sizeof(unsigned short);
+ // flow descriptor
+ memcpy(p, fl, sizeof(struct flowi));
+ msg.length +=sizeof(struct flowi);
+ p+=sizeof(struct flowi) / sizeof(u16);
+
+ skb = alloc_skb(NLMSG_SPACE(NLKEY_MSG_LEN + NLKEY_HDR_LEN), allocation);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ /* prepare netlink message for kernel to user space direction */
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(NLKEY_HDR_LEN + msg.length));
+ memcpy(NLMSG_DATA(nlh), (unsigned char *)&msg, (NLKEY_HDR_LEN + msg.length));
+
+ /* whole length of the message i.e. header + payload */
+ nlh->nlmsg_len = NLMSG_SPACE(NLKEY_HDR_LEN + msg.length);
+
+ /* from kernel */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_flags = 0;
+ nlh->nlmsg_type = 0;
+ NETLINK_CB(skb).portid = 0;
+ NETLINK_CB(skb).dst_group = 1;
+
+
+ return(netlink_broadcast(nlkey_socket, skb, 0, 1, allocation));
+
+
+}
+EXPORT_SYMBOL(ipsec_nlkey_flow_remove);
+
+
+
+static void ipsec_nlkey_init(void)
+{
+ struct netlink_kernel_cfg cfg = {
+ .groups = 1,
+ .input = ipsec_nlkey_rcv,
+ };
+ printk(KERN_INFO "Initializing NETLINK_KEY socket\n");
+ nlkey_socket = netlink_kernel_create(&init_net, NETLINK_KEY, &cfg);
+}
+#endif
+
+
static int __net_init pfkey_net_init(struct net *net)
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -3897,6 +4772,11 @@ static void __exit ipsec_pfkey_exit(void)
sock_unregister(PF_KEY);
unregister_pernet_subsys(&pfkey_net_ops);
proto_unregister(&key_proto);
+
+#ifdef NLKEY_SUPPORT
+ /* release NETLINK_KEY socket */
+ sock_release(nlkey_socket->sk_socket);
+#endif
}
static int __init ipsec_pfkey_init(void)
@@ -3913,6 +4793,12 @@ static int __init ipsec_pfkey_init(void)
if (err != 0)
goto out_unregister_pernet;
xfrm_register_km(&pfkeyv2_mgr);
+
+#ifdef NLKEY_SUPPORT
+ /* create NETLINK_KEY socket for IPSec offload on Comcerto */
+ ipsec_nlkey_init();
+#endif
+
out:
return err;

View File

@@ -0,0 +1,288 @@
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3a04665..7e7d13d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -28,6 +28,11 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
+#if defined(CONFIG_CPE_FAST_PATH)
+#ifndef IPSEC_FLOW_CACHE
+#include <net/xfrm.h>
+#endif
+#endif
#include <linux/slab.h>
#include <linux/siphash.h>
@@ -202,9 +207,16 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct,
struct nlattr *nest_proto;
int ret;
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_lock();
+#endif
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (!l4proto->to_nlattr)
+ if (!l4proto->to_nlattr) {
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_unlock();
+#endif
return 0;
+ }
nest_proto = nla_nest_start(skb, CTA_PROTOINFO);
if (!nest_proto)
@@ -214,9 +226,15 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct,
nla_nest_end(skb, nest_proto);
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_unlock();
+#endif
return ret;
nla_put_failure:
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_unlock();
+#endif
return -1;
}
@@ -353,6 +371,18 @@ nla_put_failure:
#define ctnetlink_dump_mark(a, b, c) (0)
#endif
+#if defined(CONFIG_CPE_FAST_PATH)
+static inline int
+ctnetlink_dump_qosconnmark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ nla_put_be64(skb, CTA_QOSCONNMARK, cpu_to_be64(ct->qosconnmark),
+ CTA_QOSCONNMARK_PAD);
+ return 0;
+}
+#else
+#define ctnetlink_dump_qosconnmark(a, b) (0)
+#endif
+
#ifdef CONFIG_NF_CONNTRACK_SECMARK
static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
{
@@ -430,6 +460,59 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
return 0;
}
+#if defined(CONFIG_CPE_FAST_PATH)
+static int
+ctnetlink_dump_comcerto_fp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nlattr *nest_count;
+
+ nest_count = nla_nest_start(skb, CTA_LAYERSCAPE_FP_ORIG | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ nla_put_u32(skb, CTA_COMCERTO_FP_MARK, ct->fp_info[IP_CT_DIR_ORIGINAL].mark);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IFINDEX, ct->fp_info[IP_CT_DIR_ORIGINAL].ifindex);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IIF, ct->fp_info[IP_CT_DIR_ORIGINAL].iif);
+ nla_put_u32(skb, CTA_COMCERTO_FP_UNDERLYING_IIF, ct->fp_info[IP_CT_DIR_ORIGINAL].underlying_iif);
+ nla_put_u16(skb, CTA_COMCERTO_FP_UNDERLYING_VID, ct->fp_info[IP_CT_DIR_ORIGINAL].underlying_vlan_id);
+#ifndef IPSEC_FLOW_CACHE
+ if ((ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle[0]) ||
+ (ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle[MAX_SUPPORTED_XFRMS_PER_DIR]))
+ {
+ nla_put(skb, CTA_COMCERTO_FP_XFRM_HANDLE, sizeof(ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle),
+ ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle);
+ }
+#endif /* IPSEC_FLOW_CACHE */
+ nla_nest_end(skb, nest_count);
+
+ nest_count = nla_nest_start(skb, CTA_LAYERSCAPE_FP_REPLY | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ nla_put_u32(skb, CTA_COMCERTO_FP_MARK, ct->fp_info[IP_CT_DIR_REPLY].mark);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IFINDEX, ct->fp_info[IP_CT_DIR_REPLY].ifindex);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IIF, ct->fp_info[IP_CT_DIR_REPLY].iif);
+ nla_put_u32(skb, CTA_COMCERTO_FP_UNDERLYING_IIF, ct->fp_info[IP_CT_DIR_REPLY].underlying_iif);
+ nla_put_u16(skb, CTA_COMCERTO_FP_UNDERLYING_VID, ct->fp_info[IP_CT_DIR_REPLY].underlying_vlan_id);
+#ifndef IPSEC_FLOW_CACHE
+ if ((ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle[0]) ||
+ (ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle[MAX_SUPPORTED_XFRMS_PER_DIR]))
+ {
+ nla_put(skb, CTA_COMCERTO_FP_XFRM_HANDLE, sizeof(ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle),
+ ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle);
+ }
+#endif /* IPSEC_FLOW_CACHE */
+ nla_nest_end(skb, nest_count);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+#else
+#define ctnetlink_dump_comcerto_fp(a, b) (0)
+#endif
+
#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
@@ -570,7 +653,11 @@ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
{
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_mark(skb, ct, true) < 0 ||
+ ctnetlink_dump_qosconnmark(skb, ct) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
+#ifdef CONFIG_CPE_FAST_PATH
+ ctnetlink_dump_comcerto_fp(skb, ct) < 0 ||
+#endif
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0)
@@ -722,6 +809,13 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
+#ifdef CONFIG_CPE_FAST_PATH
+ + 2 * nla_total_size(0) /* CTA_LAYERSCAPE_FP_ORIG|REPL */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_MARK */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_IFINDEX */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_IIF */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_UNDERLYING_IIF */
+#endif
#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
@@ -729,6 +823,9 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#if defined(CONFIG_CPE_FAST_PATH)
+ + nla_total_size(sizeof(u_int64_t)) /* CTA_QOSCONNMARK */
+#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
+ nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
#endif
@@ -806,6 +903,11 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (ctnetlink_dump_comcerto_fp(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
+
if (ctnetlink_dump_id(skb, ct) < 0)
goto nla_put_failure;
@@ -858,6 +960,11 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
goto nla_put_failure;
#endif
+#if defined(CONFIG_CPE_FAST_PATH)
+ if ((events & (1 << IPCT_QOSCONNMARK) || ct->qosconnmark) &&
+ ctnetlink_dump_qosconnmark(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
if (ctnetlink_dump_event_timestamp(skb, ct))
goto nla_put_failure;
@@ -1570,6 +1677,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_NAT_SRC] = { .type = NLA_NESTED },
[CTA_TIMEOUT] = { .type = NLA_U32 },
[CTA_MARK] = { .type = NLA_U32 },
+#if defined(CONFIG_CPE_FAST_PATH)
+ [CTA_QOSCONNMARK] = { .type = NLA_U64 },
+#endif
[CTA_ID] = { .type = NLA_U32 },
[CTA_NAT_DST] = { .type = NLA_NESTED },
[CTA_TUPLE_MASTER] = { .type = NLA_NESTED },
@@ -1906,6 +2016,48 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS])));
}
+#if defined(CONFIG_CPE_FAST_PATH)
+/*
+ * This function detects ctnetlink messages that require
+ * to set the conntrack status to IPS_PERMANENT.
+ * It updates only this bit regardless of other possible
+ * changes.
+ * Return 0 if succesfull
+ */
+static int
+ctnetlink_change_permanent(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+ unsigned int status;
+ u_int32_t id;
+ __be32 conntrack_id = ntohl((__force __be32)nf_ct_get_id(ct));
+
+ if (cda[CTA_STATUS] && cda[CTA_ID]) {
+ status = ntohl(nla_get_be32(cda[CTA_STATUS]));
+ id = ntohl(nla_get_be32(cda[CTA_ID]));
+
+ if (status & IPS_PERMANENT) {
+ if (conntrack_id == id) {
+ ct->status |= IPS_PERMANENT;
+ return 0;
+ }
+ else
+ return -ENOENT;
+ }
+ else if (nf_ct_is_permanent(ct))
+ {
+ /* Clear the PERMANENT bit. */
+ if (conntrack_id == id) {
+ clear_bit(IPS_PERMANENT_BIT, &ct->status);
+ return 0;
+ }
+ else
+ return -ENOENT;
+ }
+ }
+ return -1;
+}
+#endif
+
static int
ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -2209,6 +2361,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
return err;
}
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (cda[CTA_QOSCONNMARK])
+ ct->qosconnmark = be64_to_cpu(nla_get_be64(cda[CTA_QOSCONNMARK]));
+#endif
+
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ctnetlink_change_mark(ct, cda);
@@ -2347,6 +2504,11 @@ ctnetlink_create_conntrack(struct net *net,
goto err2;
}
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (cda[CTA_QOSCONNMARK])
+ ct->qosconnmark = be64_to_cpu(nla_get_be64(cda[CTA_QOSCONNMARK]));
+#endif
+
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ctnetlink_change_mark(ct, cda);
@@ -2473,6 +2635,15 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
+#if defined(CONFIG_CPE_FAST_PATH)
+ /* If the permanent status has been set, this is a specific
+ * message. Don't broadcast the event and don't update the ct */
+ err = ctnetlink_change_permanent(ct, cda);
+ if ((err == 0) || (err == -ENOENT)) {
+ nf_ct_put(ct);
+ return err;
+ }
+#endif
err = ctnetlink_change_conntrack(ct, cda);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |

View File

@@ -0,0 +1,21 @@
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 5d6a0b2b4f3a..7c3d10e2e8b1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config WIRELESS_EXT
- bool
+ def_bool y
config WEXT_CORE
def_bool y
@@ -11,7 +11,7 @@ config WEXT_PROC
depends on WEXT_CORE
config WEXT_PRIV
- bool
+ def_bool y
config CFG80211
tristate "cfg80211 - wireless configuration API"

View File

@@ -0,0 +1,9 @@
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 0a0a0a0..0b0b0b0 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
+obj-$(CONFIG_INET_IPSEC_OFFLOAD) += ipsec_flow.o

View File

@@ -0,0 +1,163 @@
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 62486f8..3cbe4f8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -48,6 +48,11 @@
#include <net/inet_dscp.h>
#include "xfrm_hash.h"
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+#include "ipsec_flow.h"
+#endif
+#endif
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
@@ -179,6 +184,15 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+extern int ipsec_nlkey_flow(u16 xfrm_nr, u16 *xfrm_handle,
+ const struct flowi *fl, u16 family, u16 dir, u16 ignore_neigh);
+int ipsec_flow_init(struct net *net);
+void ipsec_flow_fini(struct net *net);
+#endif
+#endif
+
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -2599,6 +2613,17 @@ static dscp_t xfrm_get_dscp(const struct flowi *fl, int family)
return 0;
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+int xfrm_get_tos(const struct flowi *fl, int family)
+{
+ if (family == AF_INET)
+ return inet_dscp_to_dsfield(fl->u.ip4.flowi4_dscp) & INET_DSCP_MASK;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_get_tos);
+#endif
+
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
{
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -3295,6 +3320,37 @@ no_transform:
dst = dst_orig;
}
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ {
+ struct dst_entry *dst1 = dst;
+ struct xfrm_state *x;
+ u16 xfrm_handle[XFRM_POLICY_TYPE_MAX];
+ u16 ignore_neigh = 0;
+
+ num_xfrms = 0;
+ memset(xfrm_handle, 0, XFRM_POLICY_TYPE_MAX * sizeof(u16));
+ while (((x = dst1->xfrm) != NULL) &&
+ (num_xfrms < XFRM_POLICY_TYPE_MAX)) {
+ xfrm_handle[num_xfrms++] = x->handle;
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ ignore_neigh = 1;
+ dst1 = xfrm_dst_child(dst1);
+
+ if (dst1 == NULL) {
+ err = -EHOSTUNREACH;
+ goto error;
+ }
+ }
+ if (ipsec_flow_add(net, fl, family, dir, xfrm_handle)) {
+ /* sent flow notification to cmm with sa_handle */
+ ipsec_nlkey_flow(num_xfrms, xfrm_handle, fl, family,
+ (unsigned short)dir, ignore_neigh);
+ }
+ }
+#endif
+#endif
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst->xfrm &&
@@ -3853,6 +3909,34 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ {
+ struct xfrm_state *x;
+ u16 xfrm_handle[XFRM_POLICY_TYPE_MAX];
+
+ xfrm_nr = 0;
+ memset(xfrm_handle, 0, XFRM_POLICY_TYPE_MAX * sizeof(u16));
+ for (i = sp->len - 1;
+ (i >= 0) && (xfrm_nr < XFRM_POLICY_TYPE_MAX); i--) {
+ x = sp->xvec[i];
+ xfrm_handle[xfrm_nr++] = x->handle;
+ }
+ if (ipsec_flow_add(net, (const struct flowi *)&fl, family, dir,
+ xfrm_handle)) {
+ /* sent flow notification to cmm with sa_handle */
+ ipsec_nlkey_flow(xfrm_nr, xfrm_handle,
+ (const struct flowi *)&fl, family, dir, 0);
+ }
+ }
+
+ /* Hub and spoke changes: Setting the POLICY_IN direction in the packet */
+ skb->ipsec_xfrm_dir |= (1 << XFRM_POLICY_IN);
+
+std_path:
+#endif
+#endif
+
xfrm_pols_put(pols, npols);
sp->verified_cnt = k;
@@ -4328,6 +4412,14 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out_sysctl;
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ rv = ipsec_flow_init(net);
+ if (rv < 0)
+ goto out_ipsec_flow;
+#endif
+#endif
+
rv = xfrm_nat_keepalive_net_init(net);
if (rv < 0)
goto out_nat_keepalive;
@@ -4335,6 +4427,12 @@ static int __net_init xfrm_net_init(struct net *net)
return 0;
out_nat_keepalive:
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ ipsec_flow_fini(net);
+out_ipsec_flow:
+#endif
+#endif
xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
@@ -4349,6 +4447,11 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
xfrm_nat_keepalive_net_fini(net);
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ ipsec_flow_fini(net);
+#endif
+#endif
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);

View File

@@ -0,0 +1,291 @@
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9e14e45..d685ed7 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -58,6 +58,10 @@ static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
return refcount_inc_not_zero(&x->refcnt);
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+static unsigned short xfrm_state_handle;
+#endif
+
static inline unsigned int xfrm_dst_hash(struct net *net,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -119,6 +123,9 @@ static void xfrm_hash_transfer(struct hlist_head *list,
struct hlist_head *nsrctable,
struct hlist_head *nspitable,
struct hlist_head *nseqtable,
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ struct hlist_head *nhtable,
+#endif
unsigned int nhashmask)
{
struct hlist_node *tmp;
@@ -150,6 +157,13 @@ static void xfrm_hash_transfer(struct hlist_head *list,
XFRM_STATE_INSERT(byseq, &x->byseq, nseqtable + h,
x->xso.type);
}
+
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && x->in_byh_hash) {
+ h = x->handle & nhashmask;
+ hlist_add_head_rcu(&x->byh, nhtable + h);
+ }
+#endif
}
}
@@ -162,6 +176,9 @@ static void xfrm_hash_resize(struct work_struct *work)
{
struct net *net = container_of(work, struct net, xfrm.state_hash_work);
struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ struct hlist_head *nh, *oh;
+#endif
unsigned long nsize, osize;
unsigned int nhashmask, ohashmask;
int i;
@@ -188,6 +205,16 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(nspi, nsize);
return;
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ nh = xfrm_hash_alloc(nsize);
+ if (!nh) {
+ xfrm_hash_free(ndst, nsize);
+ xfrm_hash_free(nsrc, nsize);
+ xfrm_hash_free(nspi, nsize);
+ xfrm_hash_free(nseq, nsize);
+ return;
+ }
+#endif
spin_lock_bh(&net->xfrm.xfrm_state_lock);
write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
@@ -195,17 +222,27 @@ static void xfrm_hash_resize(struct work_struct *work)
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--)
- xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
+ xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq,
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ nh,
+#endif
+ nhashmask);
osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ oh = xfrm_state_deref_prot(net->xfrm.state_byh, net);
+#endif
ohashmask = net->xfrm.state_hmask;
rcu_assign_pointer(net->xfrm.state_bydst, ndst);
rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
rcu_assign_pointer(net->xfrm.state_byseq, nseq);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ rcu_assign_pointer(net->xfrm.state_byh, nh);
+#endif
net->xfrm.state_hmask = nhashmask;
write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
@@ -219,6 +256,9 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
xfrm_hash_free(oseq, osize);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xfrm_hash_free(oh, osize);
+#endif
}
static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
@@ -744,6 +784,9 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
INIT_HLIST_NODE(&x->byseq);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ INIT_HLIST_NODE(&x->byh);
+#endif
hrtimer_setup(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME,
HRTIMER_MODE_ABS_SOFT);
timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
@@ -754,6 +797,12 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ do {
+ x->handle = xfrm_state_handle++;
+ } while (x->handle == 0);
+ x->in_byh_hash = 0;
+#endif
x->pcpu_num = UINT_MAX;
spin_lock_init(&x->lock);
x->mode_data = NULL;
@@ -829,6 +878,12 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->id.spi)
hlist_del_rcu(&x->byspi);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && x->in_byh_hash) {
+ hlist_del_rcu(&x->byh);
+ x->in_byh_hash = 0;
+ }
+#endif
net->xfrm.state_num--;
xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
@@ -1582,6 +1637,13 @@ found:
net->xfrm.state_byseq + h,
x->xso.type);
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && !x->in_byh_hash) {
+ h = x->handle & net->xfrm.state_hmask;
+ hlist_add_head_rcu(&x->byh, net->xfrm.state_byh + h);
+ x->in_byh_hash = 1;
+ }
+#endif
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
hrtimer_start(&x->mtimer,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
@@ -1752,6 +1814,14 @@ static void __xfrm_state_insert(struct xfrm_state *x)
x->xso.type);
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && !x->in_byh_hash) {
+ h = x->handle & net->xfrm.state_hmask;
+ hlist_add_head_rcu(&x->byh, net->xfrm.state_byh + h);
+ x->in_byh_hash = 1;
+ }
+#endif
+
hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1773,6 +1843,9 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
u32 mark = xnew->mark.v & xnew->mark.m;
u32 if_id = xnew->if_id;
u32 cpu_id = xnew->pcpu_num;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ u16 parent_sa_handle = 0;
+#endif
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
@@ -1782,9 +1855,17 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
x->pcpu_num == cpu_id &&
(mark & x->mark.m) == x->mark.v &&
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
- xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
+ xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) {
x->genid++;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (!parent_sa_handle)
+ parent_sa_handle = x->handle;
+#endif
+ }
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xnew->parent_sa_handle = parent_sa_handle;
+#endif
}
void xfrm_state_insert(struct xfrm_state *x)
@@ -2352,6 +2433,37 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
}
EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+struct xfrm_state *__xfrm_state_lookup_byhandle(struct net *net, u16 handle)
+{
+ unsigned int h = handle & net->xfrm.state_hmask;
+ struct xfrm_state *x;
+
+ hlist_for_each_entry(x, net->xfrm.state_byh + h, byh) {
+ if (x->handle != handle)
+ continue;
+
+ xfrm_state_hold(x);
+ return x;
+ }
+
+ return NULL;
+}
+
+struct xfrm_state *
+xfrm_state_lookup_byhandle(struct net *net, u16 handle)
+{
+ struct xfrm_state *x;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x = __xfrm_state_lookup_byhandle(net, handle);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup_byhandle);
+#endif
+
struct xfrm_state *
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
@@ -2603,6 +2715,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family);
XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && !x->in_byh_hash) {
+ h = x->handle & net->xfrm.state_hmask;
+ hlist_add_head_rcu(&x->byh, net->xfrm.state_byh + h);
+ x->in_byh_hash = 1;
+ }
+#endif
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
goto unlock;
@@ -3279,6 +3398,12 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ net->xfrm.state_byh = xfrm_hash_alloc(sz);
+ if (!net->xfrm.state_byh)
+ goto out_byh;
+ get_random_bytes(&xfrm_state_handle, sizeof(xfrm_state_handle));
+#endif
net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
if (!net->xfrm.state_cache_input)
@@ -3294,6 +3419,10 @@ int __net_init xfrm_state_init(struct net *net)
return 0;
out_state_cache_input:
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xfrm_hash_free(net->xfrm.state_byh, sz);
+out_byh:
+#endif
xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
@@ -3321,9 +3450,15 @@ void xfrm_state_fini(struct net *net)
WARN_ON(!hlist_empty(net->xfrm.state_byspi + i));
WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i));
WARN_ON(!hlist_empty(net->xfrm.state_bydst + i));
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ WARN_ON(!hlist_empty(net->xfrm.state_byh + i));
+#endif
}
sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xfrm_hash_free(net->xfrm.state_byh, sz);
+#endif
xfrm_hash_free(net->xfrm.state_byseq, sz);
xfrm_hash_free(net->xfrm.state_byspi, sz);
xfrm_hash_free(net->xfrm.state_bysrc, sz);

View File

@@ -0,0 +1,93 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Mono <dev@mono>
Date: Mon, 11 May 2026 00:00:00 +0900
Subject: [PATCH] libnetfilter_conntrack: do not abort on unusable NXP attrs
The NXP ASK extension patch teaches libnetfilter_conntrack about
Comcerto/Layerscape fast-path and QoS conntrack attributes, but it also
uses abi_breakage() when those attributes are present with a shape this
userspace does not expect.
That is too fragile for CMM. CMM dumps the global conntrack table, which
can contain ordinary Kubernetes/Cilium conntrack entries alongside entries
that are relevant to the NXP fast path. A single unexpected or
unrepresentable vendor attribute must not abort the entire dump before CMM
has a chance to ignore the entry.
Keep unsupported attribute IDs ignored as before. For NXP fast-path/QoS
attributes that fail validation or nested parsing, skip only that attribute
or fast-path block and continue parsing the rest of the conntrack object.
Signed-off-by: Mono <dev@mono>
---
src/conntrack/parse_mnl.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/src/conntrack/parse_mnl.c b/src/conntrack/parse_mnl.c
index 33f7824..0000000 100644
--- a/src/conntrack/parse_mnl.c
+++ b/src/conntrack/parse_mnl.c
@@ -873,16 +873,16 @@ nfct_parse_comcerto_fp_attr_cb(const struct nlattr *attr, void *data)
case CTA_COMCERTO_FP_IIF:
case CTA_COMCERTO_FP_UNDERLYING_IIF:
if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
case CTA_COMCERTO_FP_UNDERLYING_VID:
if (mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
case CTA_COMCERTO_FP_XFRM_HANDLE:
/* 4 x u32 = 16 bytes */
if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, 16) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
}
tb[type] = attr;
@@ -1024,11 +1024,11 @@ nfct_parse_conntrack_attr_cb(const struct nlattr *attr, void *data)
case CTA_LAYERSCAPE_FP_ORIG:
case CTA_LAYERSCAPE_FP_REPLY:
if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
case CTA_QOSCONNMARK:
if (mnl_attr_validate(attr, MNL_TYPE_U64) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
}
tb[type] = attr;
@@ -1164,18 +1164,21 @@ nfct_payload_parse(const void *payload, size_t payload_len,
/* NXP ASK: Comcerto fast path and QoS */
if (tb[CTA_LAYERSCAPE_FP_ORIG]) {
- if (nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_ORIG], ct,
- __DIR_ORIG) < 0)
- return -1;
+ /*
+ * Do not abort the entire conntrack dump if one fast-path
+ * extension block cannot be represented by this userspace.
+ */
+ nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_ORIG], ct,
+ __DIR_ORIG);
}
if (tb[CTA_LAYERSCAPE_FP_REPLY]) {
- if (nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_REPLY], ct,
- __DIR_REPL) < 0)
- return -1;
+ /* See CTA_LAYERSCAPE_FP_ORIG handling above. */
+ nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_REPLY], ct,
+ __DIR_REPL);
}
if (tb[CTA_QOSCONNMARK]) {
ct->qosconnmark = be64toh(mnl_attr_get_u64(tb[CTA_QOSCONNMARK]));
set_bit(ATTR_QOSCONNMARK, ct->head.set);
}
--
2.47.3