Merge branch 'ask'

This commit is contained in:
2026-05-11 07:58:14 +08:00
58 changed files with 6863 additions and 87 deletions

View File

@@ -143,6 +143,14 @@ Tested worker node upgrade chain:
---
## CMM
Based on vendor's ASK
See:
- [Running cmm](docs/cmm.md)
## Current status
This project is usable for experimenting with a single control-plane device image, but it is still a development project.

View File

@@ -9,9 +9,9 @@ apk add alpine-base \
# For diagnotics
apk add \
iproute2 iproute2-ss curl bind-tools procps strace tcpdump lsof jq binutils \
iproute2 iproute2-ss curl bind-tools procps strace tcpdump lsof jq gdb binutils \
openssl conntrack-tools ethtool findmnt kmod coreutils util-linux zstd libcap-utils \
iotop sysstat
iotop sysstat dtc
echo '[ -x /bin/bash ] && exec /bin/bash -l' >> "/root/.profile"
# Compat layer for kubelet for now. Will look into building it myself later. If needed

View File

@@ -27,6 +27,7 @@ FUSE_OVERLAYFS="${FUSE_OVERLAYFS:-/usr/bin/fuse-overlayfs}"
EXTRA_IMAGES=(
"${EXTRA_IMAGES[@]:-}"
"docker-daemon:localhost/monok8s/node-control:$TAG"
"docker-daemon:localhost/monok8s/cmm:$TAG"
)
# Keep archive cache version/arch scoped so downloads do not get mixed.

View File

@@ -1,9 +1,10 @@
INTERVAL=10
DEVPATH=hwmon0=devices/platform/soc/2180000.i2c/i2c-0/i2c-7/7-002e hwmon1=devices/virtual/thermal/thermal_zone0
DEVNAME=hwmon0=emc2305 hwmon1=ddr_thermal
FCTEMPS=hwmon0/pwm2=hwmon1/temp1_input
FCFANS= hwmon0/pwm2=hwmon0/fan1_input
MINTEMP=hwmon0/pwm2=35
MAXTEMP=hwmon0/pwm2=60
MINSTART=hwmon0/pwm2=60
MINSTOP=hwmon0/pwm2=45
DEVPATH=hwmon0=devices/platform/soc/1a00000.fman/1afd000.mdio/mdio_bus/0x0000000001afd000/0x0000000001afd000:00 hwmon3=devices/platform/soc/2180000.i2c/i2c-0/i2c-7/7-002e
DEVNAME=hwmon0=0x0000000001afd000:00 hwmon3=emc2305
FCTEMPS=hwmon3/pwm1=hwmon0/temp1_input
FCFANS= hwmon3/pwm1=hwmon3/fan1_input
MINTEMP=hwmon3/pwm1=35
MAXTEMP=hwmon3/pwm1=60
MINSTART=hwmon3/pwm1=60
MINSTOP=hwmon3/pwm1=45
MINPWM=hwmon3/pwm1=0

View File

@@ -11,6 +11,19 @@ BOOT_STATE=/run/monok8s/boot-state.env
BOOTPART_FILE="$CONFIG_DIR/.bootpart"
MIGRATION_STATE_DIR="$CONFIG_DIR/migration-state"
load_module_optional() {
module="$1"
if ! modprobe "$module"; then
echo "WARNING: failed to load optional module: $module"
fi
}
echo "Loading optional hardware offload modules..."
load_module_optional cdx
load_module_optional fci
load_module_optional auto_bridge
mkdir -p /dev/hugepages
mountpoint -q /dev/hugepages || mount -t hugetlbfs none /dev/hugepages
echo 256 > /proc/sys/vm/nr_hugepages

27
ask/cmm/cmm.conf Normal file
View File

@@ -0,0 +1,27 @@
# CMM Fast Forward configuration
# This file specifies traffic that should NOT be offloaded to the fast path
# Don't Fast Forward FTP traffic (needs ALG)
config fastforward ftp
option proto tcp
option port 21
# Don't Fast Forward SIP (needs ALG)
config fastforward sip
option proto udp
option port 5060
# Don't Fast Forward PPTP control
config fastforward pptp
option proto tcp
option port 1723
# Optional logging
# NOTE: "stdout" only supported in our own patched version.
# Keep info disabled by default; CMM receives the whole conntrack stream.
config logging
option file stdout
option command 0
option error 1
option warning 1
option info 0

11
ask/cmm/entrypoint.sh Normal file
View File

@@ -0,0 +1,11 @@
#!/bin/sh
set -eu
CMM_CONFIG="${CMM_CONFIG:-/etc/cmm/cmm.conf}"
# Vendor default from cmm.service:
# 131072 = 128 * 1024 max active conntrack/offload entries.
CMM_MAX_CONNECTIONS="${CMM_MAX_CONNECTIONS:-131072}"
echo "[ask] starting cmm"
exec /bin/cmm -D -f "$CMM_CONFIG" -n "$CMM_MAX_CONNECTIONS"

37
ask/cmm/init_dpa.sh Executable file
View File

@@ -0,0 +1,37 @@
#!/bin/sh
set -eu
STATE_DIR="${CMM_STATE_DIR:-/host/run/monok8s/cmm}"
DONE_FILE="$STATE_DIR/dpa_app.loaded"
LOCK_DIR="$STATE_DIR/dpa_app.lock"
mkdir -p "$STATE_DIR"
if [ -e "$DONE_FILE" ]; then
echo "[ask] dpa_app already loaded for this boot; skipping"
exit 0
fi
if ! mkdir "$LOCK_DIR" 2>/dev/null; then
echo "[ask] another dpa_app initialization is running; waiting"
while [ ! -e "$DONE_FILE" ]; do
sleep 1
done
echo "[ask] dpa_app was loaded by another process; skipping"
exit 0
fi
trap 'rmdir "$LOCK_DIR" 2>/dev/null || true' EXIT
if [ -e "$DONE_FILE" ]; then
echo "[ask] dpa_app already loaded for this boot; skipping"
exit 0
fi
echo "[ask] running dpa_app"
/bin/dpa_app
date -u +"%Y-%m-%dT%H:%M:%SZ" > "$DONE_FILE"
echo "[ask] dpa_app loaded"

View File

@@ -11,7 +11,17 @@ FMC_VERSION=lf-$(LINUX_FACTORY)
DPDK_VERSION=lf-$(LINUX_FACTORY)
VPP_VERSION=lf-$(LINUX_FACTORY)
VPP_UPSTREAM_VERSION=23.10
MONO_ASK_VERSION=mt-$(LINUX_FACTORY)
# ASK's deps
MONO_ASK_VERSION=mt-6.12.49-2.2.0
LIBNFNETLINK_VERSION=1.0.2
LIBMNL_VERSION=1.0.5
LIBNFCT_VERSION=1.1.0
LIBCLI_VERSION=1.10.7
# Check the package version for Debian trixies (what ASK uses)
LIBXML2_VERSION=2.11.7
TCLAP_VERSION=1.2.5
LIBPCAP_VERSION=1.10.4
CRIO_VERSION=cri-o.arm64.v1.35.2
KUBE_VERSION=v1.35.3
@@ -47,3 +57,10 @@ APT_PROXY=
# remote image repository prefix to push to
# e.g. ghcr.io/monok8s
IMAGE_REPOSITORY=
# Mirror
# You can host your local mirror by running (but you'll need to download them first)
# kubectl apply -f devtools/dep-pkg-mirror.yaml
# devtools/push-dep-pkg-mirror.sh
# e.g. http://dep-pkg-mirror.default.svc.cluster.local/monok8s
DEP_PKG_MIRROR=

View File

@@ -165,6 +165,47 @@ func NewCmdCreate(flags *genericclioptions.ConfigFlags) *cobra.Command {
cmd.AddCommand(&agentcmd)
cmmconf := render.CMMConf{}
cmmcmd := cobra.Command{
Use: "cmm",
Short: "Print CMM daemonsets template",
RunE: func(cmd *cobra.Command, _ []string) error {
if len(cmmconf.ImagePullSecrets) > 0 && strings.TrimSpace(cmmconf.Image) == "" {
return fmt.Errorf("--image-pull-secret requires --image")
}
ns, _, err := flags.ToRawKubeConfigLoader().Namespace()
if err != nil {
return err
}
cmmconf.Namespace = ns
out, err := render.RenderCMMDaemonSets(cmmconf)
if err != nil {
return err
}
_, err = fmt.Fprint(cmd.OutOrStdout(), out)
return err
},
}
cmmcmd.Flags().StringVar(
&cmmconf.Image,
"image",
"",
"CMM image, including optional registry and tag",
)
cmmcmd.Flags().StringSliceVar(
&cmmconf.ImagePullSecrets,
"image-pull-secret",
nil,
"Image pull secret name for the CMM image; may be specified multiple times or as a comma-separated list",
)
cmd.AddCommand(&cmmcmd)
return cmd
}

219
clitools/pkg/render/cmm.go Normal file
View File

@@ -0,0 +1,219 @@
package render
import (
"fmt"
"strings"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
buildinfo "example.com/monok8s/pkg/buildinfo"
)
const cmmName = "cmm"
type CMMConf struct {
Namespace string
Image string
ImagePullSecrets []string
Labels map[string]string
}
func RenderCMMDaemonSets(conf CMMConf) (string, error) {
objs, err := buildCMMDaemonSetObjects(conf)
if err != nil {
return "", err
}
return renderObjects(objs)
}
func buildCMMDaemonSetObjects(conf CMMConf) ([]runtime.Object, error) {
if strings.TrimSpace(conf.Namespace) == "" {
return nil, fmt.Errorf("namespace is required")
}
conf.Labels = map[string]string{
"app.kubernetes.io/name": cmmName,
"app.kubernetes.io/component": "hardware-offload",
"app.kubernetes.io/part-of": "monok8s",
"app.kubernetes.io/managed-by": "monok8s",
}
return []runtime.Object{
buildCMMServiceAccount(conf),
buildCMMDaemonSet(conf),
}, nil
}
func buildCMMServiceAccount(conf CMMConf) *corev1.ServiceAccount {
return &corev1.ServiceAccount{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "ServiceAccount",
},
ObjectMeta: metav1.ObjectMeta{
Name: cmmName,
Namespace: conf.Namespace,
Labels: copyStringMap(conf.Labels),
},
}
}
func buildCMMDaemonSet(conf CMMConf) *appsv1.DaemonSet {
privileged := true
dsLabels := map[string]string{
"app.kubernetes.io/name": cmmName,
"app.kubernetes.io/component": "hardware-offload",
"app.kubernetes.io/part-of": "monok8s",
"app.kubernetes.io/managed-by": "monok8s",
}
image, pullPolicy := cmmImage(conf)
return &appsv1.DaemonSet{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "DaemonSet",
},
ObjectMeta: metav1.ObjectMeta{
Name: cmmName,
Namespace: conf.Namespace,
Labels: copyStringMap(conf.Labels),
},
Spec: appsv1.DaemonSetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/name": cmmName,
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: dsLabels,
},
Spec: corev1.PodSpec{
ServiceAccountName: cmmName,
HostNetwork: true,
DNSPolicy: corev1.DNSClusterFirstWithHostNet,
ImagePullSecrets: imagePullSecrets(conf.ImagePullSecrets),
Tolerations: []corev1.Toleration{
{Operator: corev1.TolerationOpExists},
},
InitContainers: []corev1.Container{
{
Name: "dpa-app",
Image: image,
ImagePullPolicy: pullPolicy,
Command: []string{"/init_dpa.sh"},
Env: cdxEnv(),
SecurityContext: &corev1.SecurityContext{
Privileged: &privileged,
},
VolumeMounts: append(
[]corev1.VolumeMount{
{
Name: "host-run-cmm",
MountPath: "/host/run/monok8s/cmm",
},
},
cdxVolumeMounts()...,
),
},
},
Containers: []corev1.Container{
{
Name: cmmName,
Image: image,
ImagePullPolicy: pullPolicy,
Env: cmmEnv(),
SecurityContext: &corev1.SecurityContext{
Privileged: &privileged,
},
VolumeMounts: cdxVolumeMounts(),
},
},
Volumes: cmmVolumes(),
NodeSelector: map[string]string{
"node.kubernetes.io/instance-type": "mono-gateway",
},
},
},
},
}
}
func cdxEnv() []corev1.EnvVar {
return []corev1.EnvVar{
{
Name: "CDX_CFG_FILE",
Value: "/etc/dpa/cdx_cfg.xml",
},
{
Name: "CDX_PCD_FILE",
Value: "/etc/dpa/cdx_pcd.xml",
},
{
Name: "CDX_PDL_FILE",
Value: "/etc/fmc/config/hxs_pdl_v3.xml",
},
{
Name: "CDX_SP_FILE",
Value: "/etc/dpa/cdx_sp.xml",
},
}
}
func cmmEnv() []corev1.EnvVar {
return []corev1.EnvVar{
{
Name: "CMM_CONFIG",
Value: "/etc/cmm/cmm.conf",
},
{
Name: "CMM_MAX_CONNECTIONS",
Value: "131072",
},
}
}
func cdxVolumeMounts() []corev1.VolumeMount {
return []corev1.VolumeMount{
{
Name: "cdx-ctrl",
MountPath: "/dev/cdx_ctrl",
},
}
}
func cmmVolumes() []corev1.Volume {
return []corev1.Volume{
{
Name: "cdx-ctrl",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/dev/cdx_ctrl",
Type: hostPathType(corev1.HostPathCharDev),
},
},
},
{
Name: "host-run-cmm",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/run/monok8s/cmm",
Type: hostPathType(corev1.HostPathDirectoryOrCreate),
},
},
},
}
}
func cmmImage(conf CMMConf) (string, corev1.PullPolicy) {
if conf.Image != "" {
return conf.Image, corev1.PullIfNotPresent
}
return fmt.Sprintf("localhost/monok8s/cmm:%s", buildinfo.Version), corev1.PullNever
}

View File

@@ -188,7 +188,6 @@ make cluster-config \\
MKS_CNI_PLUGIN=none
EOF
if [ "$signed" != "true" ]; then
echo >&2
echo "warning: cluster-info was not signed within ${WAIT_SECONDS}s." >&2

View File

@@ -0,0 +1,101 @@
# Hosts a mirror for dep pkg
# kubectl apply -f dep-pkg-mirror.yaml -n [namespace]
# kubectl -n [namespace] cp ./packages \ deploy/monok8s-mirror:/usr/share/nginx/html/monok8s/
# Fetch helper contract:
# DEP_PKG_MIRROR=https://mirror.example.com/monok8s
# mirror URL = ${DEP_PKG_MIRROR}/packages/${mirror_path}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: dep-pkg-mirror-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: ConfigMap
metadata:
name: dep-pkg-mirror-nginx
data:
default.conf: |
server {
listen 8080;
server_name _;
root /usr/share/nginx/html;
autoindex on;
autoindex_exact_size off;
autoindex_localtime on;
location /monok8s/packages/ {
try_files $uri =404;
}
location = /healthz {
access_log off;
return 200 "ok\n";
}
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dep-pkg-mirror
spec:
replicas: 1
selector:
matchLabels:
app: dep-pkg-mirror
template:
metadata:
labels:
app: dep-pkg-mirror
spec:
containers:
- name: nginx
image: nginx:1.27-alpine
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 8080
readinessProbe:
httpGet:
path: /healthz
port: http
livenessProbe:
httpGet:
path: /healthz
port: http
volumeMounts:
- name: data
mountPath: /usr/share/nginx/html/monok8s/packages
subPath: packages
readOnly: false
- name: nginx-conf
mountPath: /etc/nginx/conf.d/default.conf
subPath: default.conf
readOnly: true
volumes:
- name: data
persistentVolumeClaim:
claimName: dep-pkg-mirror-data
- name: nginx-conf
configMap:
name: dep-pkg-mirror-nginx
---
apiVersion: v1
kind: Service
metadata:
name: dep-pkg-mirror
spec:
type: ClusterIP
selector:
app: dep-pkg-mirror
ports:
- name: http
port: 80
targetPort: http

80
devtools/push-dep-pkg-mirror.sh Executable file
View File

@@ -0,0 +1,80 @@
#!/bin/sh
set -eu
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
NAMESPACE="${NAMESPACE:-default}"
PACKAGES_DIR="$(realpath "$SCRIPT_DIR/../packages/")"
APP_LABEL="${APP_LABEL:-app=dep-pkg-mirror}"
CONTAINER="${CONTAINER:-nginx}"
REMOTE_DIR="${REMOTE_DIR:-/usr/share/nginx/html/monok8s/packages}"
if [ ! -d "$PACKAGES_DIR" ]; then
echo "error: package dir not found: $PACKAGES_DIR" >&2
exit 1
fi
need_cmd() {
command -v "$1" >/dev/null 2>&1 || {
echo "error: missing command: $1" >&2
exit 1
}
}
need_cmd kubectl
need_cmd mktemp
need_cmd tar
need_cmd cp
need_cmd find
pod="$(
kubectl -n "$NAMESPACE" get pod \
-l "$APP_LABEL" \
--field-selector=status.phase=Running \
-o jsonpath='{.items[0].metadata.name}'
)"
if [ -z "$pod" ]; then
echo "error: no running pod found with label: $APP_LABEL in namespace: $NAMESPACE" >&2
exit 1
fi
echo "using pod: $pod"
echo "remote dir: $REMOTE_DIR"
echo "checking remote dir is writable"
kubectl -n "$NAMESPACE" exec "$pod" -c "$CONTAINER" -- sh -c "
mkdir -p '$REMOTE_DIR' &&
touch '$REMOTE_DIR/.write-test' &&
rm -f '$REMOTE_DIR/.write-test'
"
stage="$(mktemp -d)"
cleanup() {
rm -rf "$stage"
}
trap cleanup EXIT INT TERM
mkdir -p "$stage/packages"
echo "staging $PACKAGES_DIR/ as packages/"
tar \
--exclude='.DS_Store' \
--exclude='.stamp-*' \
-C "$PACKAGES_DIR" \
-cf - . | tar -C "$stage/packages" -xf -
echo "copying staged packages into mirror pod"
kubectl -n "$NAMESPACE" cp \
"$stage/packages/." \
"$pod:$REMOTE_DIR" \
-c "$CONTAINER"
echo "done"
echo
echo "Mirror base URL:"
echo " DEP_PKG_MIRROR=http://dep-pkg-mirror.${NAMESPACE}.svc.cluster.local/monok8s"
echo
echo "Example:"
echo " packages/kubernetes/kubelet-v1.35.3"
echo " -> http://dep-pkg-mirror.${NAMESPACE}.svc.cluster.local/monok8s/packages/kubernetes/kubelet-v1.35.3"

310
docker/ask.Dockerfile Normal file
View File

@@ -0,0 +1,310 @@
ARG BUILD_BASE_TAG=dev
ARG DOCKER_IMAGE_ROOT=monok8s
FROM --platform=$BUILDPLATFORM ${DOCKER_IMAGE_ROOT}/build-base:${BUILD_BASE_TAG} AS build
# Install glibc cross-compiler for kernel and standard build dependencies
RUN apt-get update && apt-get install -y pkg-config
RUN git config --global user.email "monok8s@localhost" && \
git config --global user.name "monok8s authors" && \
git config --global --add safe.directory '*'
WORKDIR /src
ARG AARCH64_MUSL_CC_TAR
ARG NXP_TAR
ARG MONO_ASK_TAR
ARG LIBNFNETLINK_TAR
ARG LIBMNL_TAR
ARG LIBNFCT_TAR
ARG FMLIB_TAR
ARG FMC_TAR
ARG LIBXML2_TAR
ARG LIBPCAP_TAR
ARG LIBCLI_TAR
ARG TCLAP_TAR
# ASK's version pins (hardcoded wget)
ARG LIBNFNETLINK_VERSION
ARG LIBNFCT_VERSION
# MUSL Cross Compiler
COPY "${AARCH64_MUSL_CC_TAR}" ./aarch64_musl_cc.tar.gz
# Linux kernel
COPY "${NXP_TAR}" ./kernel.tar.gz
# Copy the ASK deps
COPY "${MONO_ASK_TAR}" ./mono-ask.tar.gz
COPY "${FMC_TAR}" ./fmc.tar.gz
COPY "${FMLIB_TAR}" ./fmlib.tar.gz
COPY "${LIBXML2_TAR}" ./libxml2.tar.xz
COPY "${LIBPCAP_TAR}" ./libpcap.tar.xz
COPY "${TCLAP_TAR}" ./tclap.tar.gz
COPY "${LIBMNL_TAR}" ./libmnl.tar.bz2
COPY "${LIBCLI_TAR}" ./libcli.tar.gz
# Pinned version should keep version names
COPY "${LIBNFNETLINK_TAR}" ./libnfnetlink-${LIBNFNETLINK_VERSION}.tar.bz2
COPY "${LIBNFCT_TAR}" ./libnetfilter_conntrack-${LIBNFCT_VERSION}.tar.xz
# Provision the musl cross-compiler from musl.cc
RUN tar zxf "aarch64_musl_cc.tar.gz" -C /opt
# Common paths / flags for the remaining ASK build.
# Keep userspace fully static against the musl sysroot.
ENV PATH="/opt/aarch64-linux-musl-cross/bin:${PATH}" \
ASK_DIR=/src/ASK \
KERNEL_DIR=/src/linux \
SYSROOT=/opt/aarch64-linux-musl-cross/aarch64-linux-musl \
HOST=aarch64-linux-musl \
CROSS_COMPILE=aarch64-linux-musl- \
ARCH=arm64 \
PLATFORM=LS1043A
# Extract and build the dependency libraries
RUN mkdir -p ASK/sources/tarballs && \
tar zxf "mono-ask.tar.gz" -C "ASK" --strip-components=1 && \
mv libnfnetlink-${LIBNFNETLINK_VERSION}.tar.bz2 ASK/sources/tarballs/ && \
mv libnetfilter_conntrack-${LIBNFCT_VERSION}.tar.xz ASK/sources/tarballs/
RUN mkdir linux && tar zxf "kernel.tar.gz" -C "linux" --strip-components=1 && \
mkdir -p /src/ASK/patches/kernel/updated-patch
# ASK's Kernel patches
COPY patches/ask/split-kernel-patch.sh /src/split-kernel-patch.sh
COPY patches/ask/upstream/kernel/*.patch /src/ASK/patches/kernel/updated-patch/
RUN chmod +x /src/split-kernel-patch.sh && \
PATCH_FILE="/src/ASK/patches/kernel/002-mono-gateway-ask-kernel_linux_6_12.patch" \
LINUX_DIR="/src/linux" \
OUT_DIR="/src/ASK/patches/kernel/split-002" \
/src/split-kernel-patch.sh split && \
/src/split-kernel-patch.sh apply
# Build the kernel first, without the custom DTS
COPY kernel-extra.config /src/kernel-extra.config
COPY kernel-build/ensure-kconfig.sh /src/
RUN cd /src/linux \
&& make ARCH="${ARCH}" CROSS_COMPILE="${CROSS_COMPILE}" defconfig lsdk.config \
&& ./scripts/kconfig/merge_config.sh -m .config "${ASK_DIR}/config/kernel/defconfig" /src/kernel-extra.config \
&& make ARCH="${ARCH}" CROSS_COMPILE="${CROSS_COMPILE}" olddefconfig \
&& /src/ensure-kconfig.sh .config /src/kernel-extra.config \
&& make ARCH="${ARCH}" CROSS_COMPILE="${CROSS_COMPILE}" -j"$(nproc)"
# tclap
RUN mkdir -p tclap && tar zxf "tclap.tar.gz" -C "tclap" --strip-components=1 && \
cp -r tclap/include/tclap /opt/aarch64-linux-musl-cross/aarch64-linux-musl/include/ && \
rm -rf tclap
# libxml2
RUN mkdir -p libxml2 && tar xf "libxml2.tar.xz" -C "libxml2" --strip-components=1 && \
cd libxml2 && \
CC=aarch64-linux-musl-gcc ./configure --host=aarch64-linux-musl \
--prefix=/opt/aarch64-linux-musl-cross/aarch64-linux-musl \
--enable-static --disable-shared --without-python --without-zlib --without-lzma && \
make -j$(nproc) && make install && \
cd .. && rm -rf libxml2
# libmnl
RUN mkdir -p libmnl && tar xjf "libmnl.tar.bz2" -C "libmnl" --strip-components=1 && \
cd libmnl && \
CC=aarch64-linux-musl-gcc ./configure --host=aarch64-linux-musl \
--prefix=/opt/aarch64-linux-musl-cross/aarch64-linux-musl \
--enable-static --disable-shared && \
make -j$(nproc) && make install && \
cd .. && rm -rf libmnl
# libcli
RUN mkdir -p libcli && tar zxf "libcli.tar.gz" -C "libcli" --strip-components=1 && \
cd libcli && \
make CC=aarch64-linux-musl-gcc AR=aarch64-linux-musl-ar libcli.a && \
cp libcli.h /opt/aarch64-linux-musl-cross/aarch64-linux-musl/include/ && \
cp libcli.a /opt/aarch64-linux-musl-cross/aarch64-linux-musl/lib/ && \
cd .. && rm -rf libcli
# libpcap
RUN mkdir -p libpcap && tar xf "libpcap.tar.xz" -C "libpcap" --strip-components=1 && \
cd libpcap && \
CC=aarch64-linux-musl-gcc ./configure --host=aarch64-linux-musl \
--prefix=/opt/aarch64-linux-musl-cross/aarch64-linux-musl \
--with-pcap=linux --enable-static --disable-shared \
--disable-usb --disable-netmap --disable-bluetooth --disable-dbus && \
make -j$(nproc) && make install && \
cd .. && rm -rf libpcap
# fmlib, fmc
RUN mkdir fmlib && \
tar zxf "fmlib.tar.gz" -C "fmlib" --strip-components=1 && \
cd fmlib && git init -q && git add -A && git commit -q -m "base" && \
git apply /src/ASK/patches/fmlib/01-mono-ask-extensions.patch && \
make CROSS_COMPILE=aarch64-linux-musl- KERNEL_SRC=../linux libfm-arm.a && \
ln -sf libfm-arm.a libfm.a && \
cd ../ && \
mkdir -p fmc && \
tar zxf "fmc.tar.gz" -C "fmc" --strip-components=1 && \
# Handle fmc: Initialize dummy repo, patch, and build \
cd fmc && git init -q && git add -A && git commit -q -m "base" && \
git apply /src/ASK/patches/fmc/01-mono-ask-extensions.patch && \
make -C "source" CC="aarch64-linux-musl-gcc -static" CXX="aarch64-linux-musl-g++ -static" AR=aarch64-linux-musl-ar \
MACHINE=ls1046 \
FMD_USPACE_HEADER_PATH=../../fmlib/include/fmd \
FMD_USPACE_LIB_PATH=../../fmlib \
LIBXML2_HEADER_PATH=/opt/aarch64-linux-musl-cross/aarch64-linux-musl/include/libxml2 \
TCLAP_HEADER_PATH=/opt/aarch64-linux-musl-cross/aarch64-linux-musl/include
# Patch verdor's modules
RUN mkdir -p /src/patches/ask
COPY patches/ask/ /src/patches/ask/
RUN cd /src/ASK && \
git init -q && git add -A && git commit -q -m "base" && \
find /src/patches/ask \
-path /src/patches/ask/upstream -prune -o \
-name '*.patch' -print \
| sort > /tmp/ask-module-patches.list && \
test ! -s /tmp/ask-module-patches.list || xargs -a /tmp/ask-module-patches.list git apply --check && \
test ! -s /tmp/ask-module-patches.list || xargs -a /tmp/ask-module-patches.list git apply
# Build patched libnfnetlink + libnetfilter_conntrack into the musl sysroot.
# These are needed by cmm through pkg-config.
RUN mkdir -p "${ASK_DIR}/sources" && \
mkdir -p libnfnetlink && \
tar xjf "${ASK_DIR}/sources/tarballs/libnfnetlink-${LIBNFNETLINK_VERSION}.tar.bz2" \
-C libnfnetlink --strip-components=1 && \
cd libnfnetlink && \
git init -q && git add -A && git commit -q -m "upstream" && \
git apply "${ASK_DIR}/patches/libnfnetlink/01-nxp-ask-nonblocking-heap-buffer.patch" && \
CC=aarch64-linux-musl-gcc AR=aarch64-linux-musl-ar RANLIB=aarch64-linux-musl-ranlib \
./configure --host="${HOST}" --prefix="${SYSROOT}" \
--enable-static --disable-shared && \
make -j$(nproc) && make install && \
cd /src && rm -rf libnfnetlink && \
mkdir -p libnetfilter_conntrack && \
tar xf "${ASK_DIR}/sources/tarballs/libnetfilter_conntrack-${LIBNFCT_VERSION}.tar.xz" \
-C libnetfilter_conntrack --strip-components=1 && \
cd libnetfilter_conntrack && \
cp /src/patches/ask/upstream/libnetfilter-conntrack/* "${ASK_DIR}/patches/libnetfilter-conntrack/" && \
git init -q && git add -A && git commit -q -m "upstream" && \
find "${ASK_DIR}/patches/libnetfilter-conntrack/" \
-name '*.patch' -exec sha256sum {} \; && \
find "${ASK_DIR}/patches/libnetfilter-conntrack/" \
-name '*.patch' -print \
| sort > /tmp/libnfct-patches.list && \
test ! -s /tmp/libnfct-patches.list || xargs -a /tmp/libnfct-patches.list git apply --check && \
test ! -s /tmp/libnfct-patches.list || xargs -a /tmp/libnfct-patches.list git apply && \
PKG_CONFIG_PATH="${SYSROOT}/lib/pkgconfig" \
CC=aarch64-linux-musl-gcc AR=aarch64-linux-musl-ar RANLIB=aarch64-linux-musl-ranlib \
./configure --host="${HOST}" --prefix="${SYSROOT}" \
--enable-static --disable-shared \
CFLAGS="-I${SYSROOT}/include" \
LDFLAGS="-L${SYSROOT}/lib" && \
make -j$(nproc) && make install && \
cd /src && rm -rf libnetfilter_conntrack
# Build libfci, used by cmm.
RUN make -C "${ASK_DIR}/fci/lib" \
CC=aarch64-linux-musl-gcc \
AR=aarch64-linux-musl-ar
# DTS changes happen after the expensive kernel build layer
COPY kernel-build/dts/*.dts /src/linux/arch/arm64/boot/dts/freescale/
ARG DEVICE_TREE_TARGET
RUN cd /src/linux \
&& grep -q "^dtb-\\\$(CONFIG_ARCH_LAYERSCAPE) += ${DEVICE_TREE_TARGET}.dtb$" \
arch/arm64/boot/dts/freescale/Makefile \
|| echo "dtb-\$(CONFIG_ARCH_LAYERSCAPE) += ${DEVICE_TREE_TARGET}.dtb" \
>> arch/arm64/boot/dts/freescale/Makefile \
&& make ARCH="${ARCH}" CROSS_COMPILE="${CROSS_COMPILE}" \
"freescale/${DEVICE_TREE_TARGET}.dtb"
# Build out-of-tree ASK kernel modules: cdx -> fci, plus auto_bridge.
RUN make -C "${ASK_DIR}/cdx" \
CROSS_COMPILE="${CROSS_COMPILE}" \
ARCH="${ARCH}" \
KERNELDIR="${KERNEL_DIR}" \
PLATFORM="${PLATFORM}" \
modules && \
make -C "${ASK_DIR}/fci" \
CROSS_COMPILE="${CROSS_COMPILE}" \
ARCH="${ARCH}" \
KERNEL_SOURCE="${KERNEL_DIR}" \
BOARD_ARCH="${ARCH}" \
KBUILD_EXTRA_SYMBOLS="${ASK_DIR}/cdx/Module.symvers" \
modules && \
make -C "${ASK_DIR}/auto_bridge" \
CROSS_COMPILE="${CROSS_COMPILE}" \
ARCH="${ARCH}" \
KERNEL_SOURCE="${KERNEL_DIR}" \
PLATFORM="${PLATFORM}"
# Patch vendor's binaries
RUN sed -i '/^[[:space:]]*CFLAGS[[:space:]]*+=.*-Wall.*-Werror/a CFLAGS += -Wno-address-of-packed-member' "${ASK_DIR}/cmm/Makefile"
# Build remaining userspace binaries: cmm and dpa_app.
RUN make -C "${ASK_DIR}/cmm" \
CC="aarch64-linux-musl-gcc -static" \
LIBFCI_DIR="${ASK_DIR}/fci/lib" \
ABM_DIR="${ASK_DIR}/auto_bridge" \
SYSROOT="${SYSROOT}" \
PKG_CONFIG_PATH="${SYSROOT}/lib/pkgconfig" && \
make -C "${ASK_DIR}/dpa_app" \
CC="aarch64-linux-musl-gcc -static" \
CFLAGS="-DDPAA_DEBUG_ENABLE -DLS1043 -DNCSW_LINUX -D__STDC_LIMIT_MACROS \
-I/src/fmc/source \
-I${ASK_DIR}/cdx \
-I/src/fmlib/include/fmd \
-I/src/fmlib/include/fmd/Peripherals \
-I/src/fmlib/include/fmd/integrations" \
LDFLAGS="-static -L/src/fmc/source -L/src/fmlib -L${SYSROOT}/lib \
-lfmc -lfm -lcli -lxml2 -lstdc++ -lpthread -lm"
# Stage outputs
RUN mkdir -p /out/ASK/dist && \
cp "${ASK_DIR}/cdx/cdx.ko" /out/ASK/dist && \
cp "${ASK_DIR}/fci/fci.ko" /out/ASK/dist && \
cp "${ASK_DIR}/auto_bridge/auto_bridge.ko" /out/ASK/dist && \
mkdir -p /out/ASK/bin && \
cp /src/fmc/source/fmc /out/ASK/bin/ && \
cp "${ASK_DIR}/cmm/src/cmm" /out/ASK/bin/ && \
cp "${ASK_DIR}/dpa_app/dpa_app" /out/ASK/bin/
# aarch64-linux-musl-strip /out/ASK/bin/fmc /out/ASK/bin/cmm /out/ASK/bin/dpa_app
# in-tree Linux kernel modules
RUN mkdir -p /out/rootfs && \
make -C /src/linux ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \
modules_install INSTALL_MOD_PATH=/out/rootfs
RUN KERNEL_VER=$(ls /out/rootfs/lib/modules/) && \
mkdir -p /out/rootfs/lib/modules/$KERNEL_VER/extra && \
mv /out/ASK/dist/*.ko /out/rootfs/lib/modules/$KERNEL_VER/extra/ && \
depmod -b /out/rootfs $KERNEL_VER && \
cd /out && tar zcf rootfs.tar.gz rootfs
RUN mkdir -p /out/rootfs-cfg/etc/dpa && \
cp -r "${ASK_DIR}/dpa_app/files/etc/"* /out/rootfs-cfg/etc/dpa/ && \
cp "${ASK_DIR}/config/gateway-dk/cdx_cfg.xml" /out/rootfs-cfg/etc/dpa/ && \
cp -r /src/fmc/etc/* /out/rootfs-cfg/etc && \
mkdir -p /out/rootfs-cfg/etc/cmm && \
cp "${ASK_DIR}/config/fastforward" /out/rootfs-cfg/etc/cmm/fastforward.vendor.orig
FROM scratch AS export
ARG DEVICE_TREE_TARGET
# Export the newly staged in-tree modules
COPY --from=build /out/rootfs.tar.gz /
COPY --from=build \
/src/linux/System.map \
/src/linux/.config \
/src/linux/arch/arm64/boot/Image.gz \
/src/linux/arch/arm64/boot/dts/freescale/${DEVICE_TREE_TARGET}.dtb \
/kernel/
COPY --from=build /out/ASK/bin/ /bin/
# Export the configs for Gateway Development Kit
COPY --from=build /out/rootfs-cfg/ /rootfs-cfg/

13
docker/cmm.Dockerfile Normal file
View File

@@ -0,0 +1,13 @@
ARG ALPINE_SERIES=3.23
FROM alpine:${ALPINE_SERIES}
COPY ./out/ASK/rootfs-cfg/etc /etc
COPY ./out/ASK/bin/cmm /bin/cmm
COPY ./out/ASK/bin/dpa_app /bin/dpa_app
COPY ./ask/cmm/cmm.conf /etc/cmm/cmm.conf
COPY ./ask/cmm/entrypoint.sh /entrypoint.sh
COPY ./ask/cmm/init_dpa.sh /init_dpa.sh
RUN chmod +x /bin/cmm /bin/dpa_app /init_dpa.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -1,12 +1,23 @@
ARG DEP_PKG_MIRROR=
ARG DEP_PKG_OFFLINE=0
FROM alpine:3.23.0 AS base
ARG DEP_PKG_MIRROR
ARG DEP_PKG_OFFLINE
ENV DEP_PKG_MIRROR="${DEP_PKG_MIRROR}"
ENV DEP_PKG_OFFLINE="${DEP_PKG_OFFLINE}"
RUN apk add --no-cache curl ca-certificates
COPY scripts/fetch-artifact /usr/local/bin/fetch-artifact
# ---- kubelet ----
FROM base AS kubelet
ARG KUBE_VERSION
ARG ARCH
WORKDIR /out/kubernetes
RUN curl -fL --retry 3 -o "kubelet-${KUBE_VERSION}" \
RUN fetch-artifact \
"packages/kubernetes/kubelet-${KUBE_VERSION}" \
"kubelet-${KUBE_VERSION}" \
"https://dl.k8s.io/${KUBE_VERSION}/bin/linux/${ARCH}/kubelet" && \
chmod +x "kubelet-${KUBE_VERSION}"
@@ -15,7 +26,9 @@ FROM base AS kubeadm
ARG KUBE_VERSION
ARG ARCH
WORKDIR /out/kubernetes
RUN curl -fL --retry 3 -o "kubeadm-${KUBE_VERSION}" \
RUN fetch-artifact \
"packages/kubernetes/kubeadm-${KUBE_VERSION}" \
"kubeadm-${KUBE_VERSION}" \
"https://dl.k8s.io/${KUBE_VERSION}/bin/linux/${ARCH}/kubeadm" && \
chmod +x "kubeadm-${KUBE_VERSION}"
@@ -24,73 +37,189 @@ FROM base AS kubectl
ARG KUBE_VERSION
ARG ARCH
WORKDIR /out/kubernetes
RUN curl -fL --retry 3 -o "kubectl-${KUBE_VERSION}" \
RUN fetch-artifact \
"packages/kubernetes/kubectl-${KUBE_VERSION}" \
"kubectl-${KUBE_VERSION}" \
"https://dl.k8s.io/${KUBE_VERSION}/bin/linux/${ARCH}/kubectl" && \
chmod +x "kubectl-${KUBE_VERSION}"
# ---- busybox ----
FROM base AS busybox
ARG BUSYBOX_VERSION
ARG BUSYBOX_TAR
WORKDIR /out
RUN curl -fL --retry 3 -o "busybox-${BUSYBOX_VERSION}.tar.gz" \
RUN fetch-artifact \
"${BUSYBOX_TAR}" \
"busybox-${BUSYBOX_VERSION}.tar.gz" \
"https://github.com/mirror/busybox/archive/refs/tags/${BUSYBOX_VERSION}.tar.gz"
# ---- e2fsprogs ----
FROM base AS e2fsprogs
ARG E2FSPROGS_VERSION
ARG E2FSPROGS_TAR
WORKDIR /out
RUN curl -fL --retry 3 -o "e2fsprogs-v${E2FSPROGS_VERSION}.tar.gz" \
RUN fetch-artifact \
"${E2FSPROGS_TAR}" \
"e2fsprogs-v${E2FSPROGS_VERSION}.tar.gz" \
"https://github.com/tytso/e2fsprogs/archive/refs/tags/v${E2FSPROGS_VERSION}.tar.gz"
# ---- dpdk ----
FROM base AS dpdk
ARG DPDK_VERSION
ARG DPDK_TAR
WORKDIR /out/nxp/dpdk
RUN curl -fL --retry 3 -o "${DPDK_VERSION}.tar.gz" \
RUN fetch-artifact \
"${DPDK_TAR}" \
"${DPDK_VERSION}.tar.gz" \
"https://github.com/nxp-qoriq/dpdk/archive/refs/tags/${DPDK_VERSION}.tar.gz"
# ---- fmlib ----
FROM base AS fmlib
ARG FMLIB_VERSION
ARG FMLIB_TAR
WORKDIR /out/nxp/fmlib
RUN curl -fL --retry 3 -o "${FMLIB_VERSION}.tar.gz" \
RUN fetch-artifact \
"${FMLIB_TAR}" \
"${FMLIB_VERSION}.tar.gz" \
"https://github.com/nxp-qoriq/fmlib/archive/refs/tags/${FMLIB_VERSION}.tar.gz"
# ---- fmc ----
FROM base AS fmc
ARG FMC_VERSION
ARG FMC_TAR
WORKDIR /out/nxp/fmc
RUN curl -fL --retry 3 -o "${FMC_VERSION}.tar.gz" \
RUN fetch-artifact \
"${FMC_TAR}" \
"${FMC_VERSION}.tar.gz" \
"https://github.com/nxp-qoriq/fmc/archive/refs/tags/${FMC_VERSION}.tar.gz"
# ---- vpp ----
FROM base AS vpp
ARG VPP_VERSION
ARG VPP_TAR
WORKDIR /out/nxp/vpp
RUN curl -fL --retry 3 -o "${VPP_VERSION}.tar.gz" \
RUN fetch-artifact \
"${VPP_TAR}" \
"${VPP_VERSION}.tar.gz" \
"https://github.com/nxp-qoriq/vpp/archive/refs/tags/${VPP_VERSION}.tar.gz"
# ---- MUSL CC ----
FROM base AS aarch64_musl_cc
WORKDIR /out
RUN fetch-artifact \
"packages/aarch64-linux-musl-cross.tgz" \
"aarch64-linux-musl-cross.tgz" \
"https://musl.cc/aarch64-linux-musl-cross.tgz"
# ---- ASK ----
FROM base AS mono_ask
ARG MONO_ASK_VERSION
ARG MONO_ASK_TAR
WORKDIR /out/ask
RUN fetch-artifact \
"${MONO_ASK_TAR}" \
"${MONO_ASK_VERSION}.tar.gz" \
"https://github.com/we-are-mono/ASK/archive/refs/tags/${MONO_ASK_VERSION}.tar.gz"
# ---- libnfnetlink ----
FROM base AS libnfnetlink
ARG LIBNFNETLINK_VERSION
ARG LIBNFNETLINK_TAR
WORKDIR /out/ask/libnfnetlink
RUN fetch-artifact \
"${LIBNFNETLINK_TAR}" \
"${LIBNFNETLINK_VERSION}.tar.bz2" \
"https://www.netfilter.org/projects/libnfnetlink/files/libnfnetlink-${LIBNFNETLINK_VERSION}.tar.bz2"
# ---- libnfct ----
FROM base AS libnfct
ARG LIBNFCT_VERSION
ARG LIBNFCT_TAR
WORKDIR /out/ask/libnfct
RUN fetch-artifact \
"${LIBNFCT_TAR}" \
"${LIBNFCT_VERSION}.tar.xz" \
"https://www.netfilter.org/projects/libnetfilter_conntrack/files/libnetfilter_conntrack-${LIBNFCT_VERSION}.tar.xz"
# ---- libmnl ----
FROM base AS libmnl
ARG LIBMNL_VERSION
ARG LIBMNL_TAR
WORKDIR /out/ask/libmnl
RUN fetch-artifact \
"${LIBMNL_TAR}" \
"${LIBMNL_VERSION}.tar.bz2" \
"https://www.netfilter.org/projects/libmnl/files/libmnl-${LIBMNL_VERSION}.tar.bz2"
# ---- tclap ----
FROM base AS tclap
ARG TCLAP_VERSION
ARG TCLAP_TAR
WORKDIR /out/ask/tclap
RUN fetch-artifact \
"${TCLAP_TAR}" \
"${TCLAP_VERSION}.tar.gz" \
"https://sourceforge.net/projects/tclap/files/tclap-${TCLAP_VERSION}.tar.gz"
# ---- libxml2 ----
FROM base AS libxml2
ARG LIBXML2_VERSION
ARG LIBXML2_TAR
WORKDIR /out/ask/libxml2
RUN fetch-artifact \
"${LIBXML2_TAR}" \
"${LIBXML2_VERSION}.tar.xz" \
"https://download.gnome.org/sources/libxml2/2.11/libxml2-${LIBXML2_VERSION}.tar.xz"
# ---- libcli ----
FROM base AS libcli
ARG LIBCLI_VERSION
ARG LIBCLI_TAR
WORKDIR /out/ask/libcli
RUN fetch-artifact \
"${LIBCLI_TAR}" \
"${LIBCLI_VERSION}.tar.gz" \
"https://github.com/dparrish/libcli/archive/refs/tags/V${LIBCLI_VERSION}.tar.gz"
# ---- libpcap ----
FROM base AS libpcap
ARG LIBPCAP_VERSION
ARG LIBPCAP_TAR
WORKDIR /out/ask/libpcap
RUN fetch-artifact \
"${LIBPCAP_TAR}" \
"${LIBPCAP_VERSION}.tar.xz" \
"https://www.tcpdump.org/release/libpcap-${LIBPCAP_VERSION}.tar.xz"
# ---- alpine rootfs ----
FROM base AS alpine_rootfs
ARG ALPINE_SERIES
ARG ALPINE_ARCH
ARG ALPINE_VER
ARG ALPINE_TAR
WORKDIR /out
RUN curl -fL --retry 3 -o "alpine-minirootfs-${ALPINE_VER}-${ALPINE_ARCH}.tar.gz" \
RUN fetch-artifact \
"${ALPINE_TAR}" \
"alpine-minirootfs-${ALPINE_VER}-${ALPINE_ARCH}.tar.gz" \
"https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_SERIES}/releases/${ALPINE_ARCH}/alpine-minirootfs-${ALPINE_VER}-${ALPINE_ARCH}.tar.gz"
# ---- nxp linux ----
FROM base AS nxp_linux
ARG NXP_VERSION
ARG NXP_TAR
WORKDIR /out/nxp/kernel
RUN curl -fL --retry 3 -o "${NXP_VERSION}.tar.gz" \
RUN fetch-artifact \
"${NXP_TAR}" \
"${NXP_VERSION}.tar.gz" \
"https://github.com/nxp-qoriq/linux/archive/refs/tags/${NXP_VERSION}.tar.gz"
# ---- crio ----
FROM base AS crio
ARG CRIO_VERSION
ARG CRIO_TAR
WORKDIR /out
RUN curl -fL --retry 3 -o "${CRIO_VERSION}.tar.gz" \
RUN fetch-artifact \
"${CRIO_TAR}" \
"${CRIO_VERSION}.tar.gz" \
"https://storage.googleapis.com/cri-o/artifacts/${CRIO_VERSION}.tar.gz"
# ---- final exported artifact set ----
@@ -101,9 +230,18 @@ COPY --from=kubectl /out/ /
COPY --from=busybox /out/ /
COPY --from=e2fsprogs /out/ /
COPY --from=dpdk /out/ /
COPY --from=aarch64_musl_cc /out/ /
COPY --from=mono_ask /out/ /
COPY --from=vpp /out/ /
COPY --from=fmlib /out/ /
COPY --from=fmc /out/ /
COPY --from=vpp /out/ /
COPY --from=libnfnetlink /out/ /
COPY --from=libnfct /out/ /
COPY --from=libmnl /out/ /
COPY --from=libcli /out/ /
COPY --from=libpcap /out/ /
COPY --from=libxml2 /out/ /
COPY --from=tclap /out/ /
COPY --from=alpine_rootfs /out/ /
COPY --from=nxp_linux /out/ /
COPY --from=crio /out/ /

320
docs/cmm.md Normal file
View File

@@ -0,0 +1,320 @@
# CMM integration for monok8s
This document describes how monok8s runs the vendor Connection Manager daemon (`cmm`) from [ASK](https://github.com/we-are-mono/ASK/) on Kubernetes nodes.
`cmm` is part of the NXP/ASK hardware-offload stack. In the vendor layout it is normally started as a boot-time service, together with the `cdx` kernel module and `dpa_app`. monok8s intentionally does **not** follow that model. Kubernetes has priority: the node should boot, kubelet should come up, CNI should configure networking, and only then should the CMM stack start from a DaemonSet.
## Startup model
The intended startup order is:
1. The node boots.
2. `kubelet` starts.
3. CNI is configured.
4. The `cmm` DaemonSet starts on the node.
5. The DaemonSet prepares the DPA/CDX runtime and starts `cmm` in the foreground.
This is different from the vendor flow, where CMM-related components are treated as host services started early during boot. That flow is a poor fit for monok8s because CNI and Kubernetes-owned networking must win any ordering conflict.
## Local changes from vendor ASK
monok8s carries a small set of patches so the ASK CMM stack behaves correctly inside a Kubernetes pod.
### `cmm`
The `cmm` daemon is patched to:
- run in the foreground, so it can be supervised directly by Kubernetes;
- log to stdout/stderr, so logs are visible through `kubectl logs`;
- avoid exiting when it sees CNI-managed conntrack entries it does not understand.
The last item is important. On a Kubernetes node, conntrack is not exclusively owned by CMM. CNI, kubelet, host networking, and ordinary pods can all create conntrack entries. CMM must tolerate that environment.
### `cdx` kernel module
The `cdx` module is patched so loading the module does **not** automatically start `dpa_app`.
In monok8s, module loading and DPA configuration are separate steps. This avoids doing device configuration too early, before Kubernetes networking is ready.
### `dpa_app`
`dpa_app` is patched so the XML config paths can be supplied through environment variables:
- `CDX_CFG_FILE`
- `CDX_PCD_FILE`
- `CDX_PDL_FILE`
- `CDX_SP_FILE`
This lets the DaemonSet select different XML files per node, board, or port layout without rebuilding the image.
## Patch locations
The relevant patch sets are under:
```text
patches/ask/upstream/libnetfilter-conntrack
patches/ask/cmm
patches/ask/cdx
patches/ask/dpa
```
Other ASK patches in the tree are mostly kernel-porting work for the target NXP LSDK kernel, including the 6.18-based kernel used by monok8s.
## Installation
CMM is **not installed by default**. Install it explicitly after the node-control components are available.
With `MKS_ENABLE_NODE_CONTROL` enabled, generate and apply the CMM manifests with:
```sh
kubectl -n mono-system exec -it ds/node-agent -- ctl create cmm | kubectl apply -f -
```
This creates the CMM DaemonSet and the supporting objects required to run it on each matching node.
Check that the pod is running:
```sh
kubectl -n mono-system get pods -l app.kubernetes.io/name=cmm -owide
```
View logs with:
```sh
kubectl -n mono-system logs ds/cmm -f
```
If the DaemonSet name or labels change, inspect the generated YAML from `ctl create cmm` and use the actual object names.
## Accessing the CMM CLI
The CMM CLI is exposed from the pod for debugging and manual inspection. The DaemonSet uses `hostNetwork: true`, but the safest access method is still `kubectl port-forward`; it avoids exposing the CLI beyond your local machine.
First find a CMM pod:
```sh
kubectl -n mono-system get pods -l app.kubernetes.io/name=cmm
```
Then forward a local port to the CMM CLI port inside the pod. Kubernetes port-forward syntax is `LOCAL_PORT:REMOTE_PORT`.
For example, if CMM listens on port `12345` in the pod:
```sh
kubectl -n mono-system port-forward pod/cmm-xxxxx 12345:2103
```
In another terminal, connect to the local forwarded port:
```sh
telnet 127.0.0.1 12345
```
Use `telnet` for this CLI. Plain `ncat` can show leading garbage characters or behave badly with the login prompt because the CMM CLI behaves like a telnet-style interactive console rather than a clean raw TCP protocol.
Default login, if unchanged by the generated config, is usually:
```text
Username: admin
Password: admin
```
Do not expose this port through a Service or LoadBalancer unless you have added proper access control. Treat the CMM CLI as an operator/debug interface.
## Configuration
`ctl create cmm` emits a default configuration suitable for the expected monok8s hardware layout. You can override the generated YAML before applying it.
The vendor's original `fastforward` config is preserved in the image as a reference file, but monok8s uses its own runtime config. Keep those roles separate:
- vendor reference config: useful for comparison and debugging;
- monok8s runtime config: the config actually consumed by the DaemonSet.
A clear filename for the preserved vendor file is:
```text
fastforward.vendor.orig
```
That name is less project-specific than `fastforward.ask.orig` and makes the intent obvious: it is the original vendor-provided config, not the active config.
## Multi-node configuration
If all nodes have the same board and port layout, one shared CMM/DPA config is enough.
If nodes have different port layouts, use node-specific XML config. The recommended pattern is:
1. Mount all supported configs into the CMM pod.
2. Pass the Kubernetes node name into the pod.
3. Run a small wrapper script before `dpa_app`.
4. The wrapper selects the XML files for the current node and exports the corresponding `CDX_*` environment variables.
5. The wrapper then execs the normal DPA initialization script.
Example DaemonSet fragment:
```yaml
spec:
template:
spec:
initContainers:
- name: dpa-app
image: localhost/monok8s/cmm:dev
imagePullPolicy: Never
command:
- /node-config/select-dpa-config.sh
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: node-config
mountPath: /node-config
readOnly: true
- name: dpa-configs
mountPath: /etc/monok8s/dpa-configs
readOnly: true
volumes:
- name: node-config
configMap:
name: cmm-node-config-wrapper
defaultMode: 0755
- name: dpa-configs
configMap:
name: cmm-dpa-configs
```
Example wrapper:
```sh
#!/bin/sh
set -eu
CONFIG_DIR="/etc/monok8s/dpa-configs/${NODE_NAME}"
if [ ! -d "${CONFIG_DIR}" ]; then
echo "missing DPA config directory for node ${NODE_NAME}: ${CONFIG_DIR}" >&2
exit 1
fi
export CDX_CFG_FILE="${CONFIG_DIR}/cdx_cfg.xml"
export CDX_PCD_FILE="${CONFIG_DIR}/cdx_pcd.xml"
export CDX_PDL_FILE="${CONFIG_DIR}/hxs_pdl_v3.xml"
export CDX_SP_FILE="${CONFIG_DIR}/cdx_sp.xml"
exec /bin/init_dpa.sh
```
The ConfigMap layout should then look like this conceptually:
```text
/etc/monok8s/dpa-configs/
node-a/
cdx_cfg.xml
cdx_pcd.xml
hxs_pdl_v3.xml
cdx_sp.xml
node-b/
cdx_cfg.xml
cdx_pcd.xml
hxs_pdl_v3.xml
cdx_sp.xml
```
For production, prefer a naming scheme based on stable node labels or hardware profiles rather than raw node names if multiple nodes share the same layout. Raw node names are fine for early bring-up, but they do not scale well.
## Operational notes
### CMM and Kubernetes conntrack
Do not assume CMM owns the full conntrack table. Kubernetes nodes contain conntrack entries from:
- CNI traffic;
- kubelet;
- host-network pods;
- service routing;
- node-local traffic;
- ordinary workloads.
CMM must tolerate unknown entries. If it exits because it encountered a CNI or Kubernetes conntrack entry, that is a bug in the integration layer, not an operator error.
### `hostNetwork: true`
The CMM pod uses `hostNetwork: true` because it needs to interact with host networking and hardware-offload state. This also means any port bound by the pod may be bound in the host network namespace.
For the CLI, prefer `kubectl port-forward` anyway. It gives you a controlled local tunnel and avoids accidentally publishing the CLI on the node network.
### `CMM_MAX_CONNECTIONS`
The default value:
```sh
CMM_MAX_CONNECTIONS="${CMM_MAX_CONNECTIONS:-131072}"
```
uses `131072`, which is `128 * 1024`. It is a power-of-two-sized default commonly used for connection-table limits. Treat it as a capacity default, not a magic correctness value.
Lower it if memory pressure is a concern. Raise it only if the hardware, memory budget, and expected traffic justify it.
## Troubleshooting
### The CMM pod is not running
Check the DaemonSet and pod events:
```sh
kubectl -n mono-system get ds cmm -oyaml
kubectl -n mono-system describe pod -l app.kubernetes.io/name=cmm
```
Then check logs:
```sh
kubectl -n mono-system logs ds/cmm --all-containers=true --tail=200
```
### The CLI shows strange characters with `ncat`
Use `telnet` instead:
```sh
telnet 127.0.0.1 2103
```
The CMM CLI behaves like a telnet-style console. `ncat --telnet` may still not behave exactly like traditional telnet for this CLI.
### Port-forward connects to the wrong port
Remember the syntax:
```text
LOCAL_PORT:REMOTE_PORT
```
So this command:
```sh
kubectl -n mono-system port-forward pod/cmm-xxxxx 12345:2103
```
means:
```text
127.0.0.1:12345 on your workstation -> port 2103 inside the pod
```
Connect to `127.0.0.1:12345`, not `127.0.0.1:2103`.
### `dpa_app` uses the wrong XML files
Confirm the environment seen by the init container or wrapper:
```sh
kubectl -n mono-system logs pod/cmm-xxxxx -c dpa-app
```
The wrapper should print enough information to identify the selected config directory and XML paths. If it does not, add explicit logging before `exec /bin/init_dpa.sh`.
## Recommended policy
Keep CMM optional. It is hardware-specific, operationally sharp, and not required for a generic Kubernetes node. The base monok8s node should boot and join the cluster without CMM. Enable CMM only on hardware profiles where the ASK offload stack is expected and tested.

View File

@@ -1,3 +1,6 @@
## ASK
The most important one is vendor's ASK. Which provides all the required resources to build the device-specific kernel
## Updating build.env
You can find the latest package versions in here
* [kernel](https://github.com/nxp-qoriq/linux/archive/refs/tags/)
@@ -10,3 +13,112 @@ You can find the latest package versions in here
https://github.com/we-are-mono/OpenWRT-ASK/tree/mono-25.12.0-rc3/target/linux/layerscape/files/arch/arm64/boot/dts/freescale
* We need both `mono-gateway-dk-sdk.dts` and `mono-gateway-dk.dts` since the sdk one includes the non-sdk one.
* The actual dts being used is the `mono-gateway-dk-sdk.dts`
## Testing dpa_app
Run this on the gateway device
```bash
while true; do nc -l -p 1234 -e sh; done
```
Run this script on your dev machine
```bash
#!/bin/bash
FILES="
bin/dpa_app
rootfs-cfg/etc/dpa/cdx_cfg.xml
rootfs-cfg/etc/dpa/cdx_pcd.xml
rootfs-cfg/etc/dpa/cdx_sp.xml
rootfs-cfg/etc/dpa/cdx_cfg_ls1046_rdb.xml
rootfs-cfg/etc/fmc/config/hxs_pdl_v3.xml
rootfs-cfg/etc/fmc/config/cfgdata.xsd
rootfs-cfg/etc/fmc/config/netpcd.xsd
"
SIZE=$(
tar -C ./out/ASK -czf - $FILES | wc -c
)
(
echo 'set -eu'
echo 'rm -rf /var/dpa-test'
echo 'mkdir -p /var/dpa-test/bin'
echo 'mkdir -p /etc/dpa'
echo 'mkdir -p /etc/fmc/config'
echo 'base64 -d > /tmp/dpa-test.tar.gz <<'"'"'EOF'"'"''
tar -C ./out/ASK -czf - $FILES | pv -s "$SIZE" | base64
echo 'EOF'
echo 'tar -xzf /tmp/dpa-test.tar.gz -C /var/dpa-test'
echo 'cp /var/dpa-test/bin/dpa_app /var/dpa_app'
echo 'cp /var/dpa-test/rootfs-cfg/etc/dpa/cdx_cfg.xml /etc/dpa/cdx_cfg.xml'
echo 'cp /var/dpa-test/rootfs-cfg/etc/dpa/cdx_pcd.xml /etc/dpa/cdx_pcd.xml'
echo 'cp /var/dpa-test/rootfs-cfg/etc/dpa/cdx_sp.xml /etc/dpa/cdx_sp.xml'
echo 'cp /var/dpa-test/rootfs-cfg/etc/fmc/config/hxs_pdl_v3.xml /etc/fmc/config/hxs_pdl_v3.xml'
echo 'cp /var/dpa-test/rootfs-cfg/etc/fmc/config/cfgdata.xsd /etc/fmc/config/cfgdata.xsd'
echo 'cp /var/dpa-test/rootfs-cfg/etc/fmc/config/netpcd.xsd /etc/fmc/config/netpcd.xsd'
echo 'chmod +x /var/dpa_app'
echo 'export CDX_CFG_FILE=/etc/dpa/cdx_cfg.xml'
echo 'export CDX_PCD_FILE=/etc/dpa/cdx_pcd.xml'
echo 'export CDX_SP_FILE=/etc/dpa/cdx_sp.xml'
echo 'export CDX_PDL_FILE=/etc/fmc/config/hxs_pdl_v3.xml'
echo 'echo "CDX_CFG_FILE=$CDX_CFG_FILE"'
echo 'echo "CDX_PCD_FILE=$CDX_PCD_FILE"'
echo 'echo "CDX_SP_FILE=$CDX_SP_FILE"'
echo 'echo "CDX_PDL_FILE=$CDX_PDL_FILE"'
echo 'ls -l /var/dpa_app /etc/dpa /etc/fmc/config'
echo 'echo Running /var/dpa_app'
echo '/var/dpa_app'
echo 'echo exit=$?'
) | nc 10.0.0.10 1234
```
## Testing cmm
You'll need to run dpa_app first before running this
```base
#!/bin/bash
set -eu
FILES="
bin/cmm
rootfs-cfg/etc/cmm/fastforward.vendor.orig
"
SIZE=$(
tar -C ./out/ASK -czf - $FILES | wc -c
)
(
echo 'set -eux'
echo 'rm -rf /var/cmm-test'
echo 'mkdir -p /var/cmm-test'
echo 'mkdir -p /var/ask/bin'
echo 'mkdir -p /var/ask/etc/cmm'
echo 'base64 -d > /tmp/cmm-test.tar.gz <<'"'"'EOF'"'"''
tar -C ./out/ASK -czf - $FILES | pv -s "$SIZE" | base64
echo 'EOF'
echo 'tar -xzf /tmp/cmm-test.tar.gz -C /var/cmm-test'
echo 'install -m 0755 /var/cmm-test/bin/cmm /var/ask/bin/cmm'
echo 'install -m 0644 /var/cmm-test/rootfs-cfg/etc/cmm/fastforward.vendor.orig /var/ask/etc/cmm/fastforward'
echo 'ls -l /var/ask/bin/cmm /var/ask/etc/cmm/fastforward /dev/cdx_ctrl'
echo 'ldd /var/ask/bin/cmm || true'
echo 'test -e /sys/class/vwd/vwd0/vwd_fast_path_enable && echo 1 > /sys/class/vwd/vwd0/vwd_fast_path_enable || true'
echo 'echo Running cmm'
echo '/var/ask/bin/cmm -D -f /var/ask/etc/cmm/fastforward -n 131072'
echo 'echo exit=$?'
) | nc 10.0.0.10 1234
```

View File

@@ -264,19 +264,20 @@
*/
ethernet@8 {
compatible = "fsl,dpa-ethernet-init";
fsl,bman-buffer-pools = <&bp7 &bp8 &bp9>;
fsl,qman-frame-queues-rx = <0x5c 1 0x5d 1>;
fsl,qman-frame-queues-tx = <0x7c 1 0x7d 1>;
compatible = "fsl,dpa-ethernet";
/delete-property/ fsl,bman-buffer-pools;
/delete-property/ fsl,qman-frame-queues-rx;
/delete-property/ fsl,qman-frame-queues-tx;
dma-coherent;
};
/* Add MAC10 - not in qoriq-dpaa-eth.dtsi */
ethernet@9 {
compatible = "fsl,dpa-ethernet-init";
compatible = "fsl,dpa-ethernet";
fsl,fman-mac = <&enet7>;
fsl,bman-buffer-pools = <&bp7 &bp8 &bp9>;
fsl,qman-frame-queues-rx = <0x5e 1 0x5f 1>;
fsl,qman-frame-queues-tx = <0x7e 1 0x7f 1>;
/delete-property/ fsl,bman-buffer-pools;
/delete-property/ fsl,qman-frame-queues-rx;
/delete-property/ fsl,qman-frame-queues-tx;
dma-coherent;
};
};
@@ -336,33 +337,19 @@
};
ethernet@f0000 {
status = "okay";
/delete-property/ managed;
fixed-link = <0 1 10000 0 0>;
phy-connection-type = "xgmii";
};
ethernet@f2000 {
status = "okay";
/delete-property/ managed;
fixed-link = <0 1 10000 0 0>;
phy-connection-type = "xgmii";
};
/* DPA Offline port bindings - required for CDX.
* Use phandles fman0_oh_0x3 (port@83000) and fman0_oh_0x4 (port@84000).
* Cell-index overridden to SDK-style (1 and 2) in port nodes above.
*/
dpa-fman0-oh@2 {
compatible = "fsl,dpa-oh";
fsl,qman-frame-queues-oh = <0x60 0x01 0x61 0x01>;
fsl,fman-oh-port = <&fman0_oh_0x3>;
};
dpa-fman0-oh@3 {
compatible = "fsl,dpa-oh";
fsl,qman-frame-queues-oh = <0x62 0x01 0x63 0x01>;
fsl,fman-oh-port = <&fman0_oh_0x4>;
};
/* Override OH port cell-index values for SDK driver compatibility.
* SDK driver expects cell-index 0 for HC (Host Command/PCD) port.
* Mainline qoriq-fman3-0.dtsi uses cell-index 2-7, but SDK needs 0-5.
@@ -373,8 +360,8 @@
};
port@83000 {
cell-index = <1>;
compatible = "fsl,fman-port-oh";
cell-index = <1>;
};
port@84000 {
@@ -568,4 +555,18 @@
&fsldpaa {
dma-coherent;
/* CDX / DPA IPsec offline port */
dpa-fman0-oh@2 {
compatible = "fsl,dpa-oh";
fsl,qman-frame-queues-oh = <0x60 0x01 0x61 0x01>;
fsl,fman-oh-port = <&fman0_oh_0x3>;
dma-coherent;
};
/* CDX / VWD WiFi offline port; optional */
dpa-fman0-oh@3 {
compatible = "fsl,dpa-oh";
fsl,qman-frame-queues-oh = <0x62 0x01 0x63 0x01>;
fsl,fman-oh-port = <&fman0_oh_0x4>;
dma-coherent;
};
};

View File

@@ -500,14 +500,14 @@
status = "okay";
};
fm1_mac9: ethernet@f0000 { /* 10GEC1 */
fm0_mac9: ethernet@f0000 { /* 10GEC1 */
sfp = <&sfp_xfi0>;
phy-connection-type = "10gbase-r";
managed = "in-band-status";
pcs-handle-names = "xfi"; /* Match enet7 for consistency */
};
fm1_mac10: ethernet@f2000 { /* 10GEC2 */
fm0_mac10: ethernet@f2000 { /* 10GEC2 */
sfp = <&sfp_xfi1>;
phy-connection-type = "10gbase-r";
managed = "in-band-status";

View File

@@ -247,3 +247,9 @@ CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_TASKSTATS=y
CONFIG_TASK_XACCT=y
###############################################################################
# NXP
###############################################################################
# CONFIG_CDX_BUS=y

108
makefile
View File

@@ -7,16 +7,28 @@ TAG ?= dev
PACKAGES_DIR := packages
OUT_DIR := out
E2FSPROGS_TAR := $(PACKAGES_DIR)/e2fsprogs-$(E2FSPROGS_VERSION).tar.gz
E2FSPROGS_TAR := $(PACKAGES_DIR)/e2fsprogs-v$(E2FSPROGS_VERSION).tar.gz
BUSYBOX_TAR := $(PACKAGES_DIR)/busybox-$(BUSYBOX_VERSION).tar.gz
ALPINE_TAR := $(PACKAGES_DIR)/alpine-minirootfs-$(ALPINE_VER)-$(ALPINE_ARCH).tar.gz
NXP_TAR := $(PACKAGES_DIR)/nxp/kernel/$(NXP_VERSION).tar.gz
FMLIB_TAR := $(PACKAGES_DIR)/nxp/fmlib/$(FMLIB_VERSION).tar.gz
FMC_TAR := $(PACKAGES_DIR)/nxp/fmc/$(FMC_VERSION).tar.gz
VPP_TAR := $(PACKAGES_DIR)/nxp/vpp/$(VPP_VERSION).tar.gz
DPDK_TAR := $(PACKAGES_DIR)/nxp/dpdk/$(DPDK_VERSION).tar.gz
CRIO_TAR := $(PACKAGES_DIR)/$(CRIO_VERSION).tar.gz
AARCH64_MUSL_CC_TAR := $(PACKAGES_DIR)/aarch64-linux-musl-cross.tgz
# ASK-specific
MONO_ASK_TAR := $(PACKAGES_DIR)/ask/$(MONO_ASK_VERSION).tar.gz
FMLIB_TAR := $(PACKAGES_DIR)/nxp/fmlib/$(FMLIB_VERSION).tar.gz
FMC_TAR := $(PACKAGES_DIR)/nxp/fmc/$(FMC_VERSION).tar.gz
LIBNFNETLINK_TAR := $(PACKAGES_DIR)/ask/libnfnetlink/$(LIBNFNETLINK_VERSION).tar.bz2
LIBNFCT_TAR := $(PACKAGES_DIR)/ask/libnfct/$(LIBNFCT_VERSION).tar.xz
LIBMNL_TAR := $(PACKAGES_DIR)/ask/libmnl/$(LIBMNL_VERSION).tar.bz2
LIBCLI_TAR := $(PACKAGES_DIR)/ask/libcli/$(LIBCLI_VERSION).tar.gz
TCLAP_TAR := $(PACKAGES_DIR)/ask/tclap/$(TCLAP_VERSION).tar.gz
LIBXML2_TAR := $(PACKAGES_DIR)/ask/libxml2/$(LIBXML2_VERSION).tar.xz
LIBPCAP_TAR := $(PACKAGES_DIR)/ask/libpcap/$(LIBPCAP_VERSION).tar.xz
# Kubernetes components
KUBELET_BIN := $(PACKAGES_DIR)/kubernetes/kubelet-$(KUBE_VERSION)
KUBEADM_BIN := $(PACKAGES_DIR)/kubernetes/kubeadm-$(KUBE_VERSION)
@@ -93,6 +105,7 @@ RELEASE_DEPS := \
$(BOARD_ITB) \
$(CLITOOLS_BIN) \
docker/alpine.Dockerfile \
cmm-image \
$(ALPINE_SRCS) \
build.env \
makefile
@@ -115,19 +128,46 @@ $(OUT_DIR):
$(DOWNLOAD_PACKAGES_STAMP): docker/download-packages.Dockerfile build.env makefile | $(PACKAGES_DIR)
docker build \
-f docker/download-packages.Dockerfile \
--build-arg DEP_PKG_MIRROR=$(DEP_PKG_MIRROR) \
--build-arg KUBE_VERSION=$(KUBE_VERSION) \
--build-arg ARCH=$(ARCH) \
--build-arg BUSYBOX_VERSION=$(BUSYBOX_VERSION) \
--build-arg DEVICE_TREE_TARGET=$(DEVICE_TREE_TARGET) \
--build-arg E2FSPROGS_VERSION=$(E2FSPROGS_VERSION) \
--build-arg DPDK_VERSION=$(DPDK_VERSION) \
--build-arg FMLIB_VERSION=$(FMLIB_VERSION) \
--build-arg FMC_VERSION=$(FMC_VERSION) \
--build-arg VPP_VERSION=$(VPP_VERSION) \
--build-arg MONO_ASK_VERSION=$(MONO_ASK_VERSION) \
--build-arg LIBNFNETLINK_VERSION=$(LIBNFNETLINK_VERSION) \
--build-arg LIBMNL_VERSION=$(LIBMNL_VERSION) \
--build-arg LIBNFCT_VERSION=$(LIBNFCT_VERSION) \
--build-arg LIBCLI_VERSION=$(LIBCLI_VERSION) \
--build-arg LIBXML2_VERSION=$(LIBXML2_VERSION) \
--build-arg LIBPCAP_VERSION=$(LIBPCAP_VERSION) \
--build-arg TCLAP_VERSION=$(TCLAP_VERSION) \
--build-arg ALPINE_SERIES=$(ALPINE_SERIES) \
--build-arg ALPINE_ARCH=$(ALPINE_ARCH) \
--build-arg ALPINE_VER=$(ALPINE_VER) \
--build-arg NXP_VERSION=$(NXP_VERSION) \
--build-arg CRIO_VERSION=$(CRIO_VERSION) \
--build-arg BUSYBOX_TAR=$(BUSYBOX_TAR) \
--build-arg E2FSPROGS_TAR=$(E2FSPROGS_TAR) \
--build-arg DPDK_TAR=$(DPDK_TAR) \
--build-arg FMLIB_TAR=$(FMLIB_TAR) \
--build-arg FMC_TAR=$(FMC_TAR) \
--build-arg VPP_TAR=$(VPP_TAR) \
--build-arg MONO_ASK_TAR=$(MONO_ASK_TAR) \
--build-arg LIBNFNETLINK_TAR=$(LIBNFNETLINK_TAR) \
--build-arg LIBMNL_TAR=$(LIBMNL_TAR) \
--build-arg LIBNFCT_TAR=$(LIBNFCT_TAR) \
--build-arg LIBCLI_TAR=$(LIBCLI_TAR) \
--build-arg LIBXML2_TAR=$(LIBXML2_TAR) \
--build-arg LIBPCAP_TAR=$(LIBPCAP_TAR) \
--build-arg TCLAP_TAR=$(TCLAP_TAR) \
--build-arg ALPINE_TAR=$(ALPINE_TAR) \
--build-arg NXP_TAR=$(NXP_TAR) \
--build-arg CRIO_TAR=$(CRIO_TAR) \
--output type=local,dest=./$(PACKAGES_DIR) .
@touch $@
@@ -155,17 +195,11 @@ $(BUILD_BASE_STAMP): $(BUILD_BASE_DEPS) | $(OUT_DIR)
docker tag monok8s/build-base:$(TAG) monok8s/build-base:$$iid; \
touch $@
$(KERNEL_IMAGE): $(KERNEL_DEPS) $(DOWNLOAD_PACKAGES_STAMP) | $(OUT_DIR)
docker build \
-f docker/kernel-build.Dockerfile \
--build-arg DOCKER_IMAGE_ROOT=$(DOCKER_IMAGE_ROOT) \
--build-arg TAG=$(TAG) \
--build-arg ARCH=$(ARCH) \
--build-arg CROSS_COMPILE=$(CROSS_COMPILE) \
--build-arg NXP_VERSION=$(NXP_VERSION) \
--build-arg DEVICE_TREE_TARGET=$(DEVICE_TREE_TARGET) \
--output type=local,dest=./$(OUT_DIR) .
test -f $@
$(KERNEL_IMAGE): $(KERNEL_DEPS) $(DOWNLOAD_PACKAGES_STAMP) ASK | $(OUT_DIR)
rm -f "$@"
cp $(OUT_DIR)/ASK/rootfs.tar.gz $(OUT_DIR)/rootfs.tar.gz
cp $(OUT_DIR)/ASK/kernel/* $(OUT_DIR)/
test -f "$@"
$(INITRAMFS): $(INITRAMFS_DEPS) $(DOWNLOAD_PACKAGES_STAMP) | $(OUT_DIR)
docker build \
@@ -183,6 +217,49 @@ $(INITRAMFS): $(INITRAMFS_DEPS) $(DOWNLOAD_PACKAGES_STAMP) | $(OUT_DIR)
$(CLITOOLS_BIN): $(CLITOOLS_SRCS)
$(MAKE) -C clitools build-local VERSION="$(TAG)"
ASK: $(ASK_TAR) $(LIBNFCT_TAR) $(LIBNFNETLINK_TAR) $(TCLAP_TAR) $(LIBXML2_TAR) | $(OUT_DIR)
@echo "Building NXP ASK components and Kernel..."
rm -rf "$(OUT_DIR)/ASK"
mkdir -p "$(OUT_DIR)/ASK"
@build_base_tag=$$(docker image inspect \
--format '{{.Id}}' \
$(DOCKER_IMAGE_ROOT)/build-base:$(TAG) \
| cut -d':' -f2 \
| cut -c -8); \
docker build \
-f docker/ask.Dockerfile \
--platform linux/amd64 \
--build-arg DOCKER_IMAGE_ROOT=$(DOCKER_IMAGE_ROOT) \
--build-arg BUILD_BASE_TAG=$$build_base_tag \
--build-arg MONO_ASK_TAR=$(MONO_ASK_TAR) \
--build-arg AARCH64_MUSL_CC_TAR=$(AARCH64_MUSL_CC_TAR) \
--build-arg NXP_TAR=$(NXP_TAR) \
--build-arg FMLIB_TAR=$(FMLIB_TAR) \
--build-arg FMC_TAR=$(FMC_TAR) \
--build-arg LIBNFNETLINK_TAR=$(LIBNFNETLINK_TAR) \
--build-arg LIBMNL_TAR=$(LIBMNL_TAR) \
--build-arg LIBNFCT_TAR=$(LIBNFCT_TAR) \
--build-arg LIBXML2_TAR=$(LIBXML2_TAR) \
--build-arg LIBPCAP_TAR=$(LIBPCAP_TAR) \
--build-arg TCLAP_TAR=$(TCLAP_TAR) \
--build-arg LIBCLI_TAR=$(LIBCLI_TAR) \
--build-arg LIBNFNETLINK_VERSION=$(LIBNFNETLINK_VERSION) \
--build-arg LIBNFCT_VERSION=$(LIBNFCT_VERSION) \
--build-arg DEVICE_TREE_TARGET=$(DEVICE_TREE_TARGET) \
--output type=local,dest=./$(OUT_DIR)/ASK .
cmm-image: ASK
docker buildx build \
--platform linux/arm64 \
-f docker/cmm.Dockerfile \
--build-arg ALPINE_SERIES=$(ALPINE_SERIES) \
--load \
-t localhost/monok8s/cmm:$(TAG) .
push-cmm-image: cmm-image
docker tag -t localhost/monok8s/cmm:$(TAG) $(IMAGE_REPOSITORY)/cmm:$(KUBE_VERSION)-$(TAG) .
docker push $(IMAGE_REPOSITORY)/cmm:$(KUBE_VERSION)-$(TAG)
vpp: $(BUILD_BASE_STAMP) $(VPP_TAR) $(DPDK_TAR) $(FMLIB_TAR) $(FMC_TAR) $(NXP_TAR)
@build_base_tag=$$(docker image inspect \
--format '{{.Id}}' \
@@ -273,7 +350,6 @@ $(RELEASE_IMAGE): $(RELEASE_DEPS) $(DOWNLOAD_PACKAGES_STAMP) | $(OUT_DIR)
test -f $@
# ---- config targets ------------------------------------------------------------
cluster-config: $(CLUSTER_ENV_DEFAULT) $(CLUSTER_ENV_WORK) $(SCRIPTS_DIR)/merge-env.sh | $(OUT_DIR)
@@ -339,5 +415,5 @@ pkgclean:
rm -rf $(PACKAGES_DIR)
.PHONY: release kernel initramfs itb build-base clitools clean distclean pkgclean \
vpp \
vpp ASK cmm-image \
cluster-config cluster-defconfig cluster-print

View File

@@ -0,0 +1,62 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: builder <builder@localhost>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH] auto_bridge: adapt timer API names for Linux 6.18
Linux 6.18 uses the renamed timer helpers. Update the vendor
module to use timer_delete() and timer_container_of().
---
auto_bridge/auto_bridge.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/auto_bridge/auto_bridge.c
+++ b/auto_bridge/auto_bridge.c
@@ -243,7 +243,7 @@
list_add(&table_entry->list_msg_to_send, &l2flow_list_msg_to_send);
work_to_do = 1;
}
- if (del_timer(&table_entry->timeout) || no_timer)
+ if (timer_delete(&table_entry->timeout) || no_timer)
__abm_go_dying(table_entry);
}
}
@@ -604,7 +604,7 @@
****************************************************************************/
static void abm_death_by_timeout(struct timer_list *t)
{
- struct l2flowTable *table_entry = from_timer(table_entry, t, timeout);
+ struct l2flowTable *table_entry = timer_container_of(table_entry, t, timeout);
spin_lock_bh(&abm_lock);
__abm_go_dying(table_entry);
@@ -671,7 +671,7 @@
/* Flow is programmed in FPP */
table_entry->state = L2FLOW_STATE_FF;
/* If timer already expired we'll die, it's ok though... */
- del_timer(&table_entry->timeout);
+ timer_delete(&table_entry->timeout);
}
else if(flags & L2FLOW_DENIED){
/* Flow is not programmed in FPP */
@@ -735,7 +735,7 @@
}
/* Die soon or now */
- if(del_timer(&table_entry->timeout) || (table_entry->state == L2FLOW_STATE_FF))
+ if(timer_delete(&table_entry->timeout) || (table_entry->state == L2FLOW_STATE_FF))
__abm_go_dying(table_entry);
}
else{
@@ -1093,7 +1093,7 @@
list_for_each_safe(entry, tmp, &l2flow_table[i]){
table_entry = container_of(entry, struct l2flowTable, list);
table_entry->flags |= L2FLOW_FL_DEAD;
- if(del_timer(&table_entry->timeout) || table_entry->state == L2FLOW_STATE_FF)
+ if(timer_delete(&table_entry->timeout) || table_entry->state == L2FLOW_STATE_FF)
__abm_go_dying(table_entry);
}
}
--
2.43.0

View File

@@ -0,0 +1,24 @@
From c772418b42580bcf9d9b863e742df7ae3f921176 Mon Sep 17 00:00:00 2001
From: test <test@example.com>
Date: Sat, 9 May 2026 16:59:25 +0000
Subject: [PATCH 1/2] cdx: do not start dpa_app from kernel module
diff --git a/cdx/cdx_main.c b/cdx/cdx_main.c
index 2d7b72b..ec763cb 100644
--- a/cdx/cdx_main.c
+++ b/cdx/cdx_main.c
@@ -8,8 +8,8 @@
*
*/
-//uncomment to start dpa_app from cdx module
-#define START_DPA_APP 1
+// Start dpa_app from userspace service ordering instead of kernel module init.
+// #define START_DPA_APP 1
#define DEFINE_GLOBALS
#include "portdefs.h"
--
2.47.3

View File

@@ -0,0 +1,86 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: builder <builder@localhost>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH] cdx: keep optional offload init failures non-fatal
The CDX module currently treats several board-specific acceleration paths as
mandatory. On boards without VWD/WiFi OH ports, fragmentation BMan pools, or
DPA IPsec OH ports, module init either fails or can crash before returning an
error.
Keep the CDX core loadable when those optional resources are absent:
- warn and continue when VWD/WiFi init fails;
- skip the vendor fragmentation module on this platform, because it can
dereference a missing BMan pool;
- warn and continue when DPA IPsec init fails;
- only initialize DPA IPsec follow-up buffer pools when IPsec init succeeds.
---
cdx/cdx_main.c | 47 +++++++++++++++++++++--------------------------
1 file changed, 21 insertions(+), 26 deletions(-)
diff --git a/cdx/cdx_main.c b/cdx/cdx_main.c
--- a/cdx/cdx_main.c
+++ b/cdx/cdx_main.c
@@ -178,31 +178,36 @@ static int __init cdx_module_init(void)
#ifdef CFG_WIFI_OFFLOAD
rc = dpaa_vwd_init();
if (rc != 0) {
- printk("%s::vwd_driver_init failed\n", __func__);
- goto exit;
+ printk(KERN_WARNING "%s::vwd_driver_init failed rc %d; continuing without wifi offload\n",
+ __func__, rc);
+ rc = 0;
}
#endif
- // initialize global fragmentation params
- if (cdx_init_frag_module()) {
- printk("%s::cdx_init_frag_module failed\n", __func__);
- rc = -EIO;
- goto exit;
- }
+
+ /*
+ * The vendor fragmentation path is optional for this platform and can
+ * dereference a missing BMan pool before returning an error. Do not let a
+ * missing fragmentation pool prevent the CDX core from loading.
+ */
+ printk(KERN_WARNING "%s::skipping cdx_init_frag_module on this platform\n",
+ __func__);
#ifdef DPA_IPSEC_OFFLOAD
- if (cdx_dpa_ipsec_init()) {
- printk("%s::dpa_ipsec start failed\n", __func__);
- goto exit;
- }
-
- if (cdx_init_scatter_gather_bpool()) {
- printk("%s::cdx_init_scatter_gather_bpool failed\n",__func__);
- rc = -ENOMEM;
- goto exit;
- }
- if (cdx_init_skb_2bfreed_bpool()) {
- printk("%s(%d) : cdx_init_skb_2bfreed_bpool failed\n", __func__,__LINE__);
- rc = -ENOMEM;
- goto exit;
+ rc = cdx_dpa_ipsec_init();
+ if (rc) {
+ printk(KERN_WARNING "%s::dpa_ipsec start failed rc %d; continuing without DPA IPsec offload\n",
+ __func__, rc);
+ rc = 0;
+ } else {
+ if (cdx_init_scatter_gather_bpool()) {
+ printk("%s::cdx_init_scatter_gather_bpool failed\n", __func__);
+ rc = -ENOMEM;
+ goto exit;
+ }
+ if (cdx_init_skb_2bfreed_bpool()) {
+ printk("%s(%d) : cdx_init_skb_2bfreed_bpool failed\n", __func__, __LINE__);
+ rc = -ENOMEM;
+ goto exit;
+ }
}
#endif
--
2.45.0

View File

@@ -0,0 +1,76 @@
--- a/cdx/dpa_wifi.c 2026-05-08 15:39:41.418608108 +0000
+++ b/cdx/dpa_wifi.c 2026-05-08 15:39:41.434536563 +0000
@@ -840,17 +840,15 @@
/* Get a page frag to store the SGTable, or a full page if the errata
* is in place and we need to avoid crossing a 4k boundary.
*/
-#ifdef FM_ERRATUM_A050385
if (unlikely(fm_has_errata_a050385())) {
struct page *new_page = alloc_page(GFP_ATOMIC);
if (unlikely(!new_page))
return -ENOMEM;
sgt_buf = page_address(new_page);
- }
- else
-#endif
+ } else {
sgt_buf = netdev_alloc_frag(priv->eth_priv->tx_headroom + sgt_size);
+ }
if (unlikely(!sgt_buf)) {
dev_err(dpa_bp->dev, "netdev_alloc_frag() failed\n");
@@ -1082,11 +1080,7 @@
/* We do not support Jumbo frames on LS1043 and thus we edit
* the skb truesize only when the 4k errata is not present.
*/
-#ifdef FM_ERRATUM_A050385
if (likely(!fm_has_errata_a050385())) {
-#else
- if (likely(!dpaa_errata_a010022)) {
-#endif
skb->truesize = SKB_TRUESIZE(dpa_fd_length(fd));
}
}
@@ -1152,11 +1146,8 @@
err = custom_vwd_skb_to_sg_fd(priv, skb, &fd);
INCR_PER_CPU_STAT(vap_dev->vap_stats, pkts_tx_sg);
#else
-#ifdef FM_ERRATUM_A050385
if (unlikely(fm_has_errata_a050385()) && a050385_check_skb(skb, priv->eth_priv))
skb_need_wa = true;
-#endif
-
nonlinear = skb_is_nonlinear(skb);
@@ -1217,7 +1208,6 @@
* more fragments than we support. In this case,
* we have no choice but to linearize it ourselves.
*/
-#ifdef FM_ERRATUM_A050385
/* No point in linearizing the skb now if we are going
* to realign and linearize it again further down due
* to the A050385 errata
@@ -1226,13 +1216,11 @@
skb_need_wa = true;
else
err = __skb_linearize(skb);
-#endif
}
if (unlikely(!skb || err < 0))
/* Common out-of-memory error path */
goto skb_to_fd_failed;
-#ifdef FM_ERRATUM_A050385
/* Verify the skb a second time if it has been updated since
* the previous check
*/
@@ -1248,7 +1236,6 @@
dev_kfree_skb(skb);
skb = nskb;
}
-#endif
err = vwd_skb_to_contig_fd(priv, skb, &fd, &offset);
}

View File

@@ -0,0 +1,52 @@
--- a/cdx/devoh.c
+++ b/cdx/devoh.c
@@ -313,6 +313,7 @@
uint32_t port_idx;
uint8_t oh_iface_name[8]="";
+ printk(KERN_INFO "%s::adding OH iface name=%s\n", __func__, name);
if (sscanf(name, "dpa-fman%d-oh@%d", &fman_idx,
&port_idx) != 2) {
@@ -331,6 +332,8 @@
DPA_ERROR("%s::oh_port_driver_get_port_info failed\n", __func__);
return FAILURE;
}
+ printk(KERN_INFO "%s::OH port info name=%s channel=%u default_fqid=0x%x err_fqid=0x%x\n",
+ __func__, name, info.channel_id, info.default_fqid, info.err_fqid);
//ethernet/physical iface type
iface_info = (struct dpa_iface_info *)
kzalloc(sizeof(struct dpa_iface_info), GFP_KERNEL);
@@ -358,6 +361,10 @@
__func__, name);
goto err_ret;
}
+ printk(KERN_INFO "%s::CDX OH iface config found name=%s fman=%u port_idx=%u portid=%u max_dist=%u\n",
+ __func__, name,
+ iface_info->oh_info.fman_idx, iface_info->oh_info.port_idx,
+ iface_info->oh_info.portid, iface_info->oh_info.max_dist);
if (cdx_create_dir_in_procfs(&iface_info->pcd_proc_entry, oh_iface_name, PCD_DIR)) {
DPA_ERROR("%s:: create pcd proc entry failed %s\n",
__func__, name);
@@ -581,6 +588,10 @@
#endif
}
//add fqid information into of port list
+ printk(KERN_INFO "%s::created OH FQs name=%s fman=%u port_idx=%u rx_default_fqid=0x%x rx_err_fqid=0x%x channel=0x%x\n",
+ __func__, dpa_oh_iface_info->name, iface_info->fman_idx,
+ iface_info->port_idx, iface_info->fqinfo[RX_DEFA_FQ].fq_base,
+ iface_info->fqinfo[RX_ERR_FQ].fq_base, iface_info->channel_id);
port_info->fm_idx = iface_info->fman_idx;
port_info->ohinfo = iface_info;
port_info->channel = iface_info->channel_id;
@@ -600,6 +611,10 @@
}
offline_port_info[iface_info->fman_idx][iface_info->port_idx].flags |=
(OF_FQID_VALID | PORT_VALID);
+ printk(KERN_INFO "%s::OH port registered name=%s fman=%u port_idx=%u flags=0x%x\n",
+ __func__, port_info->name, iface_info->fman_idx,
+ iface_info->port_idx,
+ offline_port_info[iface_info->fman_idx][iface_info->port_idx].flags);
return 0;
}

View File

@@ -0,0 +1,75 @@
From 7b6ff0e4a7b5e7d422c787d55225ecaa32afc8e4 Mon Sep 17 00:00:00 2001
From: Mono <dev@mono>
Date: Sun, 10 May 2026 15:53:21 +0000
Subject: [PATCH] cmm: add foreground mode
---
cmm/src/cmm.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/cmm/src/cmm.c b/cmm/src/cmm.c
index 6452476..1bbc73e 100644
--- a/cmm/src/cmm.c
+++ b/cmm/src/cmm.c
@@ -339,6 +339,7 @@ int main (int argc, char ** argv)
struct sigaction action;
int option,ii;
char *buf;
+ int foreground = 0;
int ret = 0;
int ch;
@@ -402,7 +403,7 @@ int main (int argc, char ** argv)
}
// Analyse the command line
- while ((option = getopt(argc, argv, "c:f:n:hv")) != -1)
+ while ((option = getopt(argc, argv, "c:f:n:hvD")) != -1)
{
switch (option)
{
@@ -424,6 +425,10 @@ int main (int argc, char ** argv)
}
break;
+ case 'D': // Do not daemonize; run in foreground
+ foreground = 1;
+ break;
+
case 'h': // Print help
cmmHelp();
return 0;
@@ -443,9 +448,11 @@ int main (int argc, char ** argv)
goto err0;
}
- // Daemonize the application
- if(daemon(0, 1) == -1)
- goto err0;
+ // Daemonize the application unless foreground mode was requested
+ if (!foreground) {
+ if(daemon(0, 1) == -1)
+ goto err0;
+ }
//Ensure clean termination
action.sa_handler = sig_term_hdlr;
sigemptyset(&action.sa_mask);
@@ -471,9 +478,12 @@ int main (int argc, char ** argv)
//schedParams.sched_priority = 99;
//sched_setscheduler(0, SCHED_FIFO, &schedParams);
- //Init process does not set stdout on console
- if(freopen("/dev/console", "w", stdout) == NULL)
- goto err0;
+ // Init process does not set stdout on console.
+ // In foreground mode, keep stdout attached to the caller/container.
+ if (!foreground) {
+ if(freopen("/dev/console", "w", stdout) == NULL)
+ goto err0;
+ }
sigemptyset(&block_mask);
sigaddset(&block_mask, SIGTERM);
sigaddset(&block_mask, SIGPIPE);
--
2.47.3

View File

@@ -0,0 +1,36 @@
From 787cf734c807eecc479776ab6ac5c2c43c72e93d Mon Sep 17 00:00:00 2001
From: Patch <patch@example.com>
Date: Sun, 10 May 2026 17:37:49 +0000
Subject: [PATCH] cmm: support stdout log target
diff --git a/cmm/src/ffcontrol.c b/cmm/src/ffcontrol.c
index 4c9bdf1..b2b6b53 100644
--- a/cmm/src/ffcontrol.c
+++ b/cmm/src/ffcontrol.c
@@ -19,6 +19,7 @@
#include <string.h>
#include <signal.h>
#include <ctype.h>
+#include <unistd.h>
/* bits/sockaddr.h is glibc internal, use sys/socket.h (already included) */
#include <asm/types.h>
@@ -865,7 +866,13 @@ static int section_logging_option_hdlr(void *data, int argc, char **argv)
if (!strcasecmp(option, "file"))
{
- globalConf.logFile = fopen(value, "a");
+ if (!strcasecmp(value, "stdout") || !strcmp(value, "-"))
+ globalConf.logFile = fdopen(dup(STDOUT_FILENO), "a");
+ else if (!strcasecmp(value, "stderr"))
+ globalConf.logFile = fdopen(dup(STDERR_FILENO), "a");
+ else
+ globalConf.logFile = fopen(value, "a");
+
if (!globalConf.logFile)
{
cmm_print(DEBUG_CRIT, "cmmFcParser: Opening logfile %s returned error %s.\n", value, strerror(errno));
--
2.47.3

View File

@@ -0,0 +1,74 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: monok8s authors <monok8s@localhost>
Date: Mon, 11 May 2026 00:00:00 +0000
Subject: [PATCH] cmm: ignore conntracks without fastpath metadata
CMM receives conntrack notifications for the whole system conntrack table.
On a Kubernetes node, many entries are unrelated to Comcerto/NXP fastpath:
loopback traffic, local control-plane traffic, Cilium traffic, broadcast,
multicast, and ordinary slow-path flows.
Those entries do not necessarily carry the private fastpath attributes CMM
expects. Treat them as non-fastpathable instead of trying to process them.
---
cmm/src/ffcontrol.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/cmm/src/ffcontrol.c b/cmm/src/ffcontrol.c
--- a/cmm/src/ffcontrol.c
+++ b/cmm/src/ffcontrol.c
@@ -75,6 +75,25 @@
return 1;
}
+/*****************************************************************
+* cmmFcHasFastpathAttrs()
+*
+* CMM receives all conntrack notifications, including entries that
+* never passed through the Comcerto/NXP fastpath hooks. Those entries
+* do not have the private fastpath attributes needed below. Treat them
+* as ordinary slow-path conntracks and ignore them.
+******************************************************************/
+static int cmmFcHasFastpathAttrs(struct nf_conntrack *ct)
+{
+ if (!nfct_attr_is_set(ct, ATTR_ORIG_COMCERTO_FP_IIF))
+ return 0;
+
+ if (!nfct_attr_is_set(ct, ATTR_ORIG_COMCERTO_FP_IFINDEX))
+ return 0;
+
+ return 1;
+}
+
/*****************************************************************
* cmmIsConntrack4Allowed()
*
@@ -92,6 +111,12 @@
sAddr = nfct_get_attr_u32(ct, ATTR_ORIG_IPV4_SRC);
dAddr = nfct_get_attr_u32(ct, ATTR_REPL_IPV4_SRC);
+ if (!cmmFcHasFastpathAttrs(ct)) {
+ cmm_print(DEBUG_INFO, "%s: conntrack has no fastpath metadata, ignored\n",
+ __func__);
+ goto refused;
+ }
+
/* Multicast connections are not forwarded */
if (MULTICAST(dAddr)) {
cmm_print(DEBUG_WARNING, "%s: conntrack multicast dst:%s:%x src:%s:%x\n", __func__,
@@ -197,6 +222,12 @@
Saddr = nfct_get_attr(ct, ATTR_ORIG_IPV6_SRC);
SaddrReply = nfct_get_attr(ct, ATTR_REPL_IPV6_SRC);
+ if (!Saddr || !SaddrReply || !cmmFcHasFastpathAttrs(ct)) {
+ cmm_print(DEBUG_INFO, "%s: conntrack has no fastpath metadata, ignored\n",
+ __func__);
+ goto refused;
+ }
+
if ((SaddrReply[0] & ntohl(0xff000000)) == ntohl(0xff000000))
{
goto refused;
--
2.45.0

View File

@@ -0,0 +1,33 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: builder <builder@localhost>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH] gateway-dk: use standard CDX offline port IDs
The CDX soft-parser logic treats logical port IDs >= 9 as offline
ports, and the bundled CDX configs consistently use portid 9 and 10
for dpa-fman0-oh@2 and dpa-fman0-oh@3.
Keep the Gateway DK OFFLINE port numbers unchanged, because number 1
and 2 are what dpa_app maps to dpa-fman0-oh@2 and dpa-fman0-oh@3.
Only adjust the logical port IDs.
---
config/gateway-dk/cdx_cfg.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/config/gateway-dk/cdx_cfg.xml b/config/gateway-dk/cdx_cfg.xml
index 1111111..2222222 100644
--- a/config/gateway-dk/cdx_cfg.xml
+++ b/config/gateway-dk/cdx_cfg.xml
@@ -11,8 +11,8 @@
<port type="10G" number="1" policy="cdx_ethport_7_policy" portid="7"/>
<!-- OFFLINE ports for CDX -->
<!-- number="1" -> dpa-fman0-oh@2 (IPsec), number="2" -> dpa-fman0-oh@3 (WiFi) -->
- <port type="OFFLINE" number="1" policy="cdx_port_of2_policy" portid="8"/>
- <port type="OFFLINE" number="2" policy="cdx_port_of3_policy" portid="9"/>
+ <port type="OFFLINE" number="1" policy="cdx_port_of2_policy" portid="9"/>
+ <port type="OFFLINE" number="2" policy="cdx_port_of3_policy" portid="10"/>
</engine>
</config>
</cfgdata>
--
2.43.0

View File

@@ -0,0 +1,72 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: monok8s <monok8s@example.invalid>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH 1/4] dpa_app: allow XML config paths to be overridden by env
Keep the vendor default XML paths, but allow deployments to override them
without patching the binary or placing board-specific XML files directly
under /etc.
Supported environment variables:
CDX_CFG_FILE
CDX_PCD_FILE
CDX_PDL_FILE
CDX_SP_FILE
This is useful for monok8s/OpenRC integration where board-specific DPA
configuration can live under a managed config directory.
---
dpa_app/dpa.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/dpa_app/dpa.c b/dpa_app/dpa.c
index 91ca1d4..960afcd 100644
--- a/dpa_app/dpa.c
+++ b/dpa_app/dpa.c
@@ -91,6 +91,34 @@ char *pcd_file = DEFAULT_PCD_FILE;
char *pdl_file = DEFAULT_PDL_FILE;
char *sp_file = DEFAULT_SP_FILE;
+static void dpa_load_env_paths(void)
+{
+ char *v;
+
+ v = getenv("CDX_CFG_FILE");
+ if (v && *v)
+ cfg_file = v;
+
+ v = getenv("CDX_PCD_FILE");
+ if (v && *v)
+ pcd_file = v;
+
+ v = getenv("CDX_PDL_FILE");
+ if (v && *v)
+ pdl_file = v;
+
+ v = getenv("CDX_SP_FILE");
+ if (v && *v)
+ sp_file = v;
+
+#ifdef DPA_C_DEBUG
+ printf("%s::cfg_file %s\n", __func__, cfg_file);
+ printf("%s::pcd_file %s\n", __func__, pcd_file);
+ printf("%s::pdl_file %s\n", __func__, pdl_file);
+ printf("%s::sp_file %s\n", __func__, sp_file);
+#endif
+}
+
//fmc model from xml files
static struct fmc_model_t cmodel;
@@ -752,6 +780,8 @@ int dpa_init(void)
char devname[64];
int retval;
+ dpa_load_env_paths();
+
//open cdx control device
sprintf(devname, "/dev/%s", CDX_CTRL_CDEVNAME);
cdx_dev_handle = open(devname, O_RDWR);
--
2.39.5

View File

@@ -0,0 +1,152 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: monok8s <monok8s@example.invalid>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH 2/4] cdx: harden FMAN port lookup
The DPA userspace loader passes FMC-derived port metadata into the CDX
ioctl. If an Ethernet netdev has a partially initialized DPAA private
structure, find_osdev_by_fman_params() can dereference a missing mac_dev
or FM wrapper pointer while trying to match the FMC port.
Add lookup diagnostics and hard NULL guards so a mismatched board
model returns -ENODEV instead of oopsing the kernel.
---
cdx/devman.c | 85 +++++++++++++++++++++++++++++++++++++--------------
cdx/dpa_cfg.c | 18 ++++++++---
2 files changed, 76 insertions(+), 27 deletions(-)
diff -urN a/cdx/devman.c b/cdx/devman.c
--- a/cdx/devman.c 2026-05-10 00:32:28.745375897 +0000
+++ b/cdx/devman.c 2026-05-10 00:32:29.834300280 +0000
@@ -396,39 +396,74 @@
uint32_t speed)
{
struct net_device *device;
- struct dpa_priv_s *priv;
- struct mac_device *macdev;
+
+ DPA_INFO("%s::lookup fm %u port %u speed %uG\n",
+ __func__, fm_idx, port_idx, speed);
device = first_net_device(&init_net);
- while(1) {
- if (!device)
- break;
- if (device->type == ARPHRD_ETHER) {
- t_LnxWrpFmDev *p_LnxWrpFmDev;
- priv = netdev_priv(device);
- macdev = priv->mac_dev;
- if (macdev) {
- p_LnxWrpFmDev = (t_LnxWrpFmDev*)macdev->fm;
- if (speed == 10) {
- //10 gig interfaces upports only SUPPORTED_10000baseT_Full
- /*DGW board has 2 fixed-link interfaces
- 1 - (eth2)(xDSL)1G Fixed link interface linked to rgmii-txid
- 2 - eth5(G.fast)- 1G Fixed link interface linked to sgmii and
- connected to 10G link of the board.
- sgmii - considered as 1000baseT_Full and this has cell_index = 0*/
-
- if ( (!macdev->fixed_link) && (macdev->if_support != SUPPORTED_10000baseT_Full) )
- goto next_device;
- }
- if ((fm_idx == p_LnxWrpFmDev->id) &&
- (port_idx == macdev->cell_index))
- return device;
- }
+ while (device) {
+ struct dpa_priv_s *priv;
+ struct mac_device *macdev;
+ t_LnxWrpFmDev *p_LnxWrpFmDev;
+
+ if (device->type != ARPHRD_ETHER)
+ goto next_device;
+
+ priv = netdev_priv(device);
+ if (!priv) {
+ DPA_INFO("%s::skip %s: null private data\n",
+ __func__, device->name);
+ goto next_device;
+ }
+
+ macdev = priv->mac_dev;
+ if (!macdev) {
+ DPA_INFO("%s::skip %s: null mac_dev\n",
+ __func__, device->name);
+ goto next_device;
+ }
+
+ if (!macdev->fm) {
+ DPA_INFO("%s::skip %s: null mac_dev->fm cell_index %u max_speed %u fixed_link %u if_support 0x%x\n",
+ __func__, device->name, macdev->cell_index,
+ macdev->max_speed, macdev->fixed_link,
+ macdev->if_support);
+ goto next_device;
}
+
+ p_LnxWrpFmDev = (t_LnxWrpFmDev *)macdev->fm;
+ DPA_INFO("%s::candidate %s fm %u cell_index %u max_speed %u fixed_link %u if_support 0x%x\n",
+ __func__, device->name, p_LnxWrpFmDev->id,
+ macdev->cell_index, macdev->max_speed,
+ macdev->fixed_link, macdev->if_support);
+
+ if (speed == 10) {
+ //10 gig interfaces upports only SUPPORTED_10000baseT_Full
+ /*DGW board has 2 fixed-link interfaces
+ 1 - (eth2)(xDSL)1G Fixed link interface linked to rgmii-txid
+ 2 - eth5(G.fast)- 1G Fixed link interface linked to sgmii and
+ connected to 10G link of the board.
+ sgmii - considered as 1000baseT_Full and this has cell_index = 0*/
+
+ if ((!macdev->fixed_link) &&
+ (macdev->if_support != SUPPORTED_10000baseT_Full))
+ goto next_device;
+ }
+
+ if ((fm_idx == p_LnxWrpFmDev->id) &&
+ (port_idx == macdev->cell_index)) {
+ DPA_INFO("%s::matched %s for fm %u port %u speed %uG\n",
+ __func__, device->name, fm_idx, port_idx, speed);
+ return device;
+ }
+
next_device:
device = next_net_device(device);
}
- return device;
+
+ DPA_ERROR("%s::no OS device found for fm %u port %u speed %uG\n",
+ __func__, fm_idx, port_idx, speed);
+ return NULL;
}
diff -urN a/cdx/dpa_cfg.c b/cdx/dpa_cfg.c
--- a/cdx/dpa_cfg.c 2026-05-10 00:32:28.757992164 +0000
+++ b/cdx/dpa_cfg.c 2026-05-10 00:32:51.954850425 +0000
@@ -301,15 +301,21 @@
struct net_device *dev;
if (port_info->type) {
+ DPA_INFO("%s::mapping user port %s fm %u index %u portid %u type %uG\n",
+ __func__, port_info->name, port_info->fm_index,
+ port_info->index, port_info->portid, port_info->type);
dev = find_osdev_by_fman_params(port_info->fm_index,
port_info->index, port_info->type);
if (!dev) {
- DPA_ERROR("%s::could not map port %s\n",
- __func__, port_info->name);
- return -EIO;
- } else {
- strcpy(port_info->name, dev->name);
+ DPA_ERROR("%s::could not map port %s fm %u index %u portid %u type %uG\n",
+ __func__, port_info->name,
+ port_info->fm_index, port_info->index,
+ port_info->portid, port_info->type);
+ return -ENODEV;
}
+ DPA_INFO("%s::mapped user port %s to netdev %s\n",
+ __func__, port_info->name, dev->name);
+ strscpy(port_info->name, dev->name, sizeof(port_info->name));
}
#ifdef DPA_CFG_DEBUG
DPA_INFO("%s::port %s, fmindex %d, port index %d, port id %d\n",

View File

@@ -0,0 +1,120 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: monok8s <monok8s@example.invalid>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH 3/4] cdx: avoid kfree of userspace dist_info pointers
get_port_info() copies an array of cdx_port_info from userspace. At that
point each cdx_port_info.dist_info field is still a userspace pointer.
However release_cfg_info() treats any non-NULL dist_info as a kernel
allocation and kfree()s it on error paths.
If a later step fails before get_dist_info() has replaced every dist_info
with a kernel allocation, release_cfg_info() can kfree a raw userspace
pointer and oops in kfree()/virt_to_folio().
Stash the userspace dist_info pointers in a temporary array, clear the
kernel-side cdx_port_info.dist_info fields immediately after copy_from_user(),
and pass the saved userspace pointer explicitly to get_dist_info(). This
keeps release_cfg_info() safe on partial-initialization failures.
---
cdx/dpa_cfg.c | 47 +++++++++++++++++++++++++++++++++++++----------
1 file changed, 37 insertions(+), 10 deletions(-)
diff -urN a/cdx/dpa_cfg.c b/cdx/dpa_cfg.c
--- a/cdx/dpa_cfg.c 2026-05-10 00:46:34.295813594 +0000
+++ b/cdx/dpa_cfg.c 2026-05-10 00:46:35.558487337 +0000
@@ -169,11 +169,10 @@
}
//allocate and copy distribution info from uspace
-static int get_dist_info(struct cdx_port_info *port_info)
+static int get_dist_info(struct cdx_port_info *port_info, void *uspace_info)
{
uint32_t mem_size;
struct cdx_dist_info *dist_info;
- void *uspace_info;
#ifdef DPA_CFG_DEBUG
DPA_INFO("%s::port %s dist %d\n", __func__,
@@ -187,7 +186,6 @@
return -ENOMEM;
}
memset(dist_info, 0, mem_size);
- uspace_info = port_info->dist_info;
port_info->dist_info = dist_info;
if (copy_from_user(dist_info, uspace_info,
mem_size)) {
@@ -273,6 +271,7 @@
{
struct cdx_port_info *port_info;
void *uspace_info;
+ void **uspace_dist_info;
uint32_t mem_size;
uint32_t ii;
@@ -289,13 +288,40 @@
return -ENOMEM;
}
memset(port_info, 0, mem_size);
+
+ uspace_dist_info = kcalloc(finfo->max_ports, sizeof(*uspace_dist_info),
+ GFP_KERNEL);
+ if (!uspace_dist_info) {
+ DPA_ERROR("%s::memalloc for uspace_dist_info failed\n",
+ __func__);
+ kfree(port_info);
+ return -ENOMEM;
+ }
+
uspace_info = finfo->portinfo;
finfo->portinfo = port_info;
if (copy_from_user(port_info, uspace_info, mem_size)) {
DPA_ERROR("%s::Read port_info failed\n",
__func__);
+ finfo->portinfo = NULL;
+ kfree(uspace_dist_info);
+ kfree(port_info);
return -EIO;
}
+
+ /*
+ * port_info has just been copied from userspace, so each dist_info
+ * member is still a userspace pointer. release_cfg_info() kfree()s
+ * non-NULL dist_info members, therefore keeping those raw userspace
+ * pointers in the kernel copy turns any later error path into an
+ * invalid kfree(). Stash the userspace pointers separately and clear
+ * the struct fields until get_dist_info() replaces them with real
+ * kernel allocations.
+ */
+ for (ii = 0; ii < finfo->max_ports; ii++) {
+ uspace_dist_info[ii] = port_info[ii].dist_info;
+ port_info[ii].dist_info = NULL;
+ }
//put the linux name for the port
for (ii = 0; ii < finfo->max_ports; ii++) {
struct net_device *dev;
@@ -311,6 +337,7 @@
__func__, port_info->name,
port_info->fm_index, port_info->index,
port_info->portid, port_info->type);
+ kfree(uspace_dist_info);
return -ENODEV;
}
DPA_INFO("%s::mapped user port %s to netdev %s\n",
@@ -330,11 +357,14 @@
for (ii = 0; ii < finfo->max_ports; ii++) {
int retval;
//get dist info for this port
- retval = get_dist_info(port_info);
- if (retval)
+ retval = get_dist_info(port_info, uspace_dist_info[ii]);
+ if (retval) {
+ kfree(uspace_dist_info);
return retval;
+ }
port_info++;
}
+ kfree(uspace_dist_info);
return 0;
}

View File

@@ -0,0 +1,205 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: monok8s <monok8s@example.invalid>
Date: Sun, 10 May 2026 00:00:00 +0000
Subject: [PATCH 4/4] cdx: stash userspace fman nested pointers before cleanup
cdx_ioc_set_dpa_params() copies struct cdx_fman_info from userspace.
The nested portinfo and tbl_info members are userspace pointers at that
point, but release_cfg_info() treats non-NULL nested pointers as
kernel-owned allocations.
If setup fails before get_port_info() or get_cctbl_info() replaces those
members with kernel allocations, the error path can kfree a userspace
address and panic in kfree()/virt_to_folio().
Stash the userspace pointers in temporary arrays, clear the nested
members in fman_info immediately, and pass the stashed pointers into the
copy helpers explicitly. Also route early setup failures through the same
cleanup path so partial state is released consistently.
---
diff --git a/cdx/dpa_cfg.c b/cdx/dpa_cfg.c
index c678d5d..55b910d 100644
--- a/cdx/dpa_cfg.c
+++ b/cdx/dpa_cfg.c
@@ -267,10 +267,9 @@ static void *get_dist_info_by_fman_params(struct cdx_fman_info *finfo, uint32_t
#endif //CDX_RTP_RELAY
//allocate and copy port releated info from uspace
-static int get_port_info(struct cdx_fman_info *finfo)
+static int get_port_info(struct cdx_fman_info *finfo, void *uspace_info)
{
struct cdx_port_info *port_info;
- void *uspace_info;
void **uspace_dist_info;
uint32_t mem_size;
uint32_t ii;
@@ -298,7 +297,6 @@ static int get_port_info(struct cdx_fman_info *finfo)
return -ENOMEM;
}
- uspace_info = finfo->portinfo;
finfo->portinfo = port_info;
if (copy_from_user(port_info, uspace_info, mem_size)) {
DPA_ERROR("%s::Read port_info failed\n",
@@ -355,7 +353,7 @@ static int get_port_info(struct cdx_fman_info *finfo)
}
port_info = finfo->portinfo;
for (ii = 0; ii < finfo->max_ports; ii++) {
- int retval;
+ int retval = 0;
//get dist info for this port
retval = get_dist_info(port_info, uspace_dist_info[ii]);
if (retval) {
@@ -369,11 +367,10 @@ static int get_port_info(struct cdx_fman_info *finfo)
}
//allocate and copy cc table infor from uspace
-static int get_cctbl_info(struct cdx_fman_info *finfo)
+static int get_cctbl_info(struct cdx_fman_info *finfo, void *uspace_info)
{
struct table_info *tbl_info;
uint32_t mem_size;
- void *uspace_info;
//allocate table information area
mem_size = (sizeof(struct table_info) * finfo->num_tables);
@@ -384,7 +381,6 @@ static int get_cctbl_info(struct cdx_fman_info *finfo)
return -ENOMEM;
}
memset(tbl_info, 0, mem_size);
- uspace_info = finfo->tbl_info;
finfo->tbl_info = tbl_info;
//copy table related info from user space
if (copy_from_user(tbl_info, (void *)uspace_info, mem_size)) {
@@ -625,9 +621,11 @@ int cdx_ioc_set_dpa_params(unsigned long args)
{
struct cdx_ctrl_set_dpa_params params;
struct cdx_fman_info *finfo;
+ void **uspace_port_info = NULL;
+ void **uspace_tbl_info = NULL;
uint32_t ii;
uint32_t mem_size;
- int retval;
+ int retval = 0;
if (copy_from_user(&params, (void *)args,
sizeof(struct cdx_ctrl_set_dpa_params))) {
@@ -655,6 +653,35 @@ int cdx_ioc_set_dpa_params(unsigned long args)
retval = -EIO;
goto err_ret;
}
+ uspace_port_info = kcalloc(num_fmans, sizeof(*uspace_port_info),
+ GFP_KERNEL);
+ uspace_tbl_info = kcalloc(num_fmans, sizeof(*uspace_tbl_info),
+ GFP_KERNEL);
+ if (!uspace_port_info || !uspace_tbl_info) {
+ DPA_ERROR("%s::unable to allocate user pointer stash\n",
+ __func__);
+ for (ii = 0; ii < num_fmans; ii++) {
+ fman_info[ii].portinfo = NULL;
+ fman_info[ii].tbl_info = NULL;
+ }
+ retval = -ENOMEM;
+ goto err_ret;
+ }
+
+ /*
+ * fman_info is copied from userspace. Its nested portinfo and
+ * tbl_info members are userspace pointers until get_port_info() and
+ * get_cctbl_info() replace them with kernel allocations. Never leave
+ * raw userspace pointers in fman_info, because release_cfg_info() owns
+ * and frees non-NULL nested pointers on error paths.
+ */
+ for (ii = 0; ii < num_fmans; ii++) {
+ uspace_port_info[ii] = fman_info[ii].portinfo;
+ uspace_tbl_info[ii] = fman_info[ii].tbl_info;
+ fman_info[ii].portinfo = NULL;
+ fman_info[ii].tbl_info = NULL;
+ }
+
if (copy_from_user(&ipr_info, (void *)params.ipr_info,
sizeof(struct cdx_ipr_info))) {
DPA_ERROR("%s::Read iprv_info failed\n",
@@ -665,22 +688,26 @@ int cdx_ioc_set_dpa_params(unsigned long args)
//init the fman handles
finfo = fman_info;
for (ii = 0; ii < num_fmans; ii++) {
- if (cdxdrv_get_fman_handles(finfo))
- return -1;
+ if (cdxdrv_get_fman_handles(finfo)) {
+ retval = -EIO;
+ goto err_ret;
+ }
finfo++;
}
finfo = fman_info;
//init interface stats module
- if (cdxdrv_init_stats(finfo->muram_handle))
- return -1;
+ if (cdxdrv_init_stats(finfo->muram_handle)) {
+ retval = -EIO;
+ goto err_ret;
+ }
for (ii = 0; ii < num_fmans; ii++) {
//get port info
- retval = get_port_info(finfo);
+ retval = get_port_info(finfo, uspace_port_info[ii]);
if (retval)
goto err_ret;
//get cc table info
- retval = get_cctbl_info(finfo);
+ retval = get_cctbl_info(finfo, uspace_tbl_info[ii]);
if (retval)
goto err_ret;
finfo++;
@@ -727,29 +754,43 @@ int cdx_ioc_set_dpa_params(unsigned long args)
finfo++;
}
- if (cdx_create_port_fqs())
- return -1;
+ if (cdx_create_port_fqs()) {
+ retval = -EIO;
+ goto err_ret;
+ }
//create cp rate limit policier profiles
if (cdxdrv_create_missaction_policer_profiles(fman_info)) {
+ retval = -EIO;
goto err_ret;
}
#ifdef ENABLE_INGRESS_QOS
if (cdxdrv_create_ingress_qos_policer_profiles(fman_info)) {
+ retval = -EIO;
goto err_ret;
}
#endif
#ifdef ENABLE_EGRESS_QOS
- if(ceetm_init_cq_plcr())
+ if(ceetm_init_cq_plcr()) {
+ retval = -EIO;
goto err_ret;
+ }
#endif
//init the fman and its ports
for (ii = 0; ii < num_fmans; ii++) {
- if (cdxdrv_set_miss_action(ii))
+ if (cdxdrv_set_miss_action(ii)) {
+ retval = -EIO;
goto err_ret;
+ }
}
display_dpa_cfg();
+ kfree(uspace_port_info);
+ kfree(uspace_tbl_info);
return 0;
err_ret:
+ DPA_ERROR("%s::error path retval %d, releasing partial DPA cfg\n",
+ __func__, retval);
+ kfree(uspace_port_info);
+ kfree(uspace_tbl_info);
release_cfg_info();
return retval;
}
--
2.39.5

View File

@@ -0,0 +1,249 @@
#!/usr/bin/env bash
set -euo pipefail
# split-kernel-patch.sh
# Split one big git-style patch into one patch file per touched file, then
# optionally apply them one by one and stop at the first failure.
#
# Defaults match your ASK kernel patch workflow.
#
# Usage:
# ./split-kernel-patch.sh split
# ./split-kernel-patch.sh apply
# ./split-kernel-patch.sh check
# ./split-kernel-patch.sh reset-output
#
# Env overrides:
# PATCH_FILE=/src/ASK/patches/kernel/002-mono-gateway-ask-kernel_linux_6_12.patch
# LINUX_DIR=/src/linux
# OUT_DIR=/src/ASK/patches/kernel/split-002
# FUZZ=0
# USE_PATCH=0 # 0 = git apply, 1 = patch utility
# THREEWAY=0 # git apply --3way when USE_PATCH=0
# UPDATED_PATCH_DIR=/src/ASK/patches/kernel/updated-patch
# # If a file with the same basename exists here, use it
# # instead of the generated split fragment.
PATCH_FILE="${PATCH_FILE:-/src/ASK/patches/kernel/002-mono-gateway-ask-kernel_linux_6_12.patch}"
LINUX_DIR="${LINUX_DIR:-/src/linux}"
OUT_DIR="${OUT_DIR:-/src/ASK/patches/kernel/split-002}"
FUZZ="${FUZZ:-0}"
USE_PATCH="${USE_PATCH:-0}"
THREEWAY="${THREEWAY:-0}"
UPDATED_PATCH_DIR="${UPDATED_PATCH_DIR:-$(dirname "$OUT_DIR")/updated-patch}"
SERIES_FILE="${OUT_DIR}/series"
LOG_FILE="${OUT_DIR}/apply.log"
usage() {
sed -n '1,35p' "$0" >&2
}
need_file() {
[ -f "$1" ] || { echo "ERROR: missing file: $1" >&2; exit 1; }
}
need_dir() {
[ -d "$1" ] || { echo "ERROR: missing directory: $1" >&2; exit 1; }
}
resolve_patch_file() {
# Generated split fragments are immutable-ish. During porting, put a
# corrected replacement patch in UPDATED_PATCH_DIR with the exact same
# basename, e.g.:
# updated-patch/0005-drivers__net__ethernet__freescale__sdk_dpaa__dpaa_eth.h.patch
# The apply/check loop will use that replacement instead.
local generated="$1"
local replacement="${UPDATED_PATCH_DIR}/$(basename "$generated")"
if [ -f "$replacement" ]; then
printf '%s\n' "$replacement"
else
printf '%s\n' "$generated"
fi
}
patch_target_file() {
# Best-effort target display for one split fragment. Prefer the b/ path
# from the diff header, fall back to +++ if needed.
awk '
/^diff --git / {
p = $4
sub(/^b\//, "", p)
print p
exit
}
/^\+\+\+ / && $2 != "/dev/null" {
p = $2
sub(/^b\//, "", p)
print p
exit
}
' "$1"
}
reset_output() {
rm -rf "$OUT_DIR"
mkdir -p "$OUT_DIR"
}
split_patch() {
need_file "$PATCH_FILE"
reset_output
# Lossless split: do NOT trim trailing whitespace and do NOT rewrite content.
# Each output starts at a 'diff --git ...' boundary.
awk -v outdir="$OUT_DIR" -v series="$SERIES_FILE" '
function sanitize(s) {
sub(/^b\//, "", s)
gsub(/^\"|\"$/, "", s)
gsub(/\//, "__", s)
gsub(/[^A-Za-z0-9._+-]/, "_", s)
if (length(s) > 160) s = substr(s, 1, 160)
return s
}
BEGIN {
n = 0
out = ""
}
/^diff --git / {
if (out != "") close(out)
n++
path = $4
safe = sanitize(path)
out = sprintf("%s/%04d-%s.patch", outdir, n, safe)
print out >> series
}
{
if (out != "") print $0 > out
}
END {
if (out != "") close(out)
if (n == 0) {
print "ERROR: no diff --git sections found" > "/dev/stderr"
exit 2
}
print n > outdir "/count"
}
' "$PATCH_FILE"
echo "Split $(cat "$OUT_DIR/count") patch fragments into: $OUT_DIR"
echo "Series file: $SERIES_FILE"
}
ensure_split_exists() {
if [ ! -s "$SERIES_FILE" ]; then
echo "=> No split series found, splitting first..."
split_patch
fi
}
apply_one_git() {
patch_file="$1"
args=(--verbose)
if [ "$THREEWAY" = "1" ]; then
args+=(--3way)
fi
git apply "${args[@]}" --check "$patch_file"
git apply "${args[@]}" "$patch_file"
}
apply_one_patch_utility() {
patch_file="$1"
# --forward avoids reapplying already-applied hunks.
# --reject leaves .rej files for manual whack-a-mole.
patch -p1 --forward --batch --fuzz="$FUZZ" --dry-run < "$patch_file"
patch -p1 --forward --batch --fuzz="$FUZZ" --reject < "$patch_file"
}
check_or_apply() {
mode="$1"
ensure_split_exists
need_dir "$LINUX_DIR"
: > "$LOG_FILE"
cd "$LINUX_DIR"
i=0
total=$(wc -l < "$SERIES_FILE" | tr -d ' ')
while IFS= read -r patch_file; do
i=$((i + 1))
generated_patch_file="$patch_file"
patch_file=$(resolve_patch_file "$generated_patch_file")
base=$(basename "$generated_patch_file")
patching_target=$(patch_target_file "$patch_file")
if [ -z "$patching_target" ]; then
patching_target="unknown"
fi
if [ "$patch_file" != "$generated_patch_file" ]; then
echo "=> [$i/$total] $base (using updated-patch override)" | tee -a "$LOG_FILE"
echo " override: $patch_file" >>"$LOG_FILE"
else
echo "=> [$i/$total] $base" | tee -a "$LOG_FILE"
fi
if [ "$mode" = "check" ]; then
if git apply --check "$patch_file" >>"$LOG_FILE" 2>&1; then
echo " OK" | tee -a "$LOG_FILE"
continue
fi
echo " FAIL: $patch_file" | tee -a "$LOG_FILE"
echo "Stopped at: $patch_file"
echo "Inspect log: $LOG_FILE"
echo "Target file: $patching_target"
echo ""
exit 1
fi
if [ "$USE_PATCH" = "1" ]; then
if apply_one_patch_utility "$patch_file" >>"$LOG_FILE" 2>&1; then
echo " applied" | tee -a "$LOG_FILE"
continue
fi
else
if apply_one_git "$patch_file" >>"$LOG_FILE" 2>&1; then
echo " applied" | tee -a "$LOG_FILE"
continue
fi
fi
echo " FAILED: $patch_file" | tee -a "$LOG_FILE"
echo ""
echo "Stopped at: $patch_file"
echo "Inspect log: $LOG_FILE"
echo "Target file: $patching_target"
echo ""
echo "Useful next commands:"
echo " cd $LINUX_DIR"
echo " git diff"
echo " git status --short"
echo " ${USE_PATCH:+find . -name '*.rej' -o -name '*.orig'}"
exit 1
done < "$SERIES_FILE"
echo "All $total patch fragments ${mode}ed successfully."
}
cmd="${1:-split}"
case "$cmd" in
split)
split_patch
;;
check)
check_or_apply check
;;
apply)
check_or_apply apply
;;
reset-output)
reset_output
;;
*)
usage
exit 2
;;
esac

View File

@@ -0,0 +1 @@
This is for when vendor is already patching upstream source. And we are patching on top of it.

View File

@@ -0,0 +1,71 @@
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
index 12a409d..740793d 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
@@ -37,6 +37,7 @@
#include <linux/fsl_qman.h> /* struct qman_fq */
#include "fm_ext.h"
+#include "fm_ehash.h"
#include "dpaa_eth_trace.h"
extern int dpa_rx_extra_headroom;
@@ -88,8 +89,13 @@ static inline void DPA_BUG_ON(bool cond)
(FM_PORT_FRM_ERR_UNSUPPORTED_FORMAT | \
FM_PORT_FRM_ERR_LENGTH | FM_PORT_FRM_ERR_DMA)
-/* The raw buffer size must be cacheline aligned. */
-#define DPA_BP_RAW_SIZE 2048
+/* The raw buffer size must be cacheline aligned.
+ * As 1518 byte packets are received in scatter gather buffers from DPAA,
+ * and these buffers are used by Wi-Fi which requires contiguous buffers,
+ * increase the raw buffer size from 2048 to 2176 to accommodate them in a
+ * contiguous FD.
+ */
+#define DPA_BP_RAW_SIZE 2176
/* This is what FMan is ever allowed to use.
* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
@@ -174,6 +180,7 @@ static inline void DPA_BUG_ON(bool cond)
#endif
#define DPAA_ETH_RX_QUEUES 128
+#define DPAA_IP_VERSION_4 4
/* Convenience macros for storing/retrieving the skb back-pointers. They must
* accommodate both recycling and confirmation paths - i.e. cases when the buf
@@ -304,6 +311,10 @@ struct dpa_percpu_priv_s {
u64 tx_frag_skbuffs;
/* number of S/G frames received */
u64 rx_sg;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) && defined(CONFIG_CPE_FAST_PATH)
+ u64 tx_caam_enc;
+ u64 tx_caam_dec;
+#endif
struct rtnl_link_stats64 stats;
struct dpa_rx_errors rx_errors;
@@ -375,9 +386,13 @@ struct dpa_priv_s {
int loop_id;
int loop_to;
#endif
-#ifdef CONFIG_FSL_DPAA_CEETM
+#if defined(CONFIG_FSL_DPAA_CEETM) || defined(CONFIG_CPE_FAST_PATH)
bool ceetm_en; /* CEETM QoS enabled */
+#ifdef CONFIG_CPE_FAST_PATH
+ void *qm_ctx; /* CEETM context */
+#endif
#endif
+ void *ifinfo;
};
struct fm_port_fqs {
@@ -392,7 +407,7 @@ struct fm_port_fqs {
extern struct net_device *dpa_loop_netdevs[20];
#endif
-int dpaa_eth_refill_bpools(struct dpa_bp *dpa_bp, int *count_ptr);
+int dpaa_eth_refill_bpools(struct dpa_bp *dpa_bp, int *count_ptr, int threshold);
void __hot _dpa_rx(struct net_device *net_dev,
struct qman_portal *portal,
const struct dpa_priv_s *priv,

View File

@@ -0,0 +1,30 @@
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index f9f0f16..1f94967 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -55,6 +55,10 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#if defined(CONFIG_CPE_FAST_PATH)
+#include <linux/jiffies.h>
+#endif
+
#define PPP_VERSION "2.4.2"
/*
@@ -3535,6 +3539,14 @@ ppp_connect_channel(struct channel *pch, int unit)
outl:
spin_unlock(&pch->upl);
+#if defined(CONFIG_CPE_FAST_PATH)
+ if ((ppp->dev) && (!ppp->closing)) {
+ rtnl_lock();
+ rtmsg_ifinfo(RTM_NEWLINK, ppp->dev, 0, GFP_KERNEL, 0, NULL);
+ rtnl_unlock();
+ }
+#endif
+
out:
mutex_unlock(&pn->all_ppp_mutex);
return ret;

View File

@@ -0,0 +1,371 @@
diff --git a/drivers/staging/fsl_qbman/qman_high.c b/drivers/staging/fsl_qbman/qman_high.c
index 4085aa9a2dcb..96c3122e665b 100644
--- a/drivers/staging/fsl_qbman/qman_high.c
+++ b/drivers/staging/fsl_qbman/qman_high.c
@@ -34,6 +34,9 @@
#include "qman_low.h"
+#include <linux/net.h>
+#include <linux/netdevice.h>
+
/* Compilation constants */
#define DQRR_MAXFILL 15
#define EQCR_ITHRESH 4 /* if EQCR congests, interrupt threshold */
@@ -69,6 +72,33 @@
spin_unlock(&__fq478->fqlock); \
} while (0)
+#if 1
+#define display_ceetm_cmd(a,b,c)
+#else
+#define display_ceetm_cmd(a, b, c) _display_ceetm_cmd((char *)__func__, a, b, c)
+static void _display_ceetm_cmd(char *func, uint32_t verb, void *buf, uint32_t size)
+{
+ uint8_t *ptr;
+ uint32_t ii,jj=0;
+ uint8_t buff[200];
+
+ ptr = buf;
+ jj = sprintf(buff, "%s::\n%02x ", func, verb);
+ for (ii = 1; ii <= size; ii++) {
+ if (ii && ((ii % 16) == 0))
+ {
+ buff[jj] = 0;
+ printk("%s\n", buff);
+ jj = 0;
+ }
+ jj += sprintf(buff+jj, "%02x ", *ptr);
+ ptr++;
+ }
+ buff[jj] = 0;
+ printk("%s\n\n", buff);
+}
+#endif
+
static inline void fq_set(struct qman_fq *fq, u32 mask)
{
set_bits(mask, &fq->flags);
@@ -128,6 +158,10 @@ struct qman_portal {
u8 alloced;
/* power management data */
u32 save_isdr;
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+ struct net_device *dummy_dev;
+ struct napi_struct napi;
+#endif
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* Keep a shadow copy of the DQRR on LE systems as the SW needs to
* do byte swaps of DQRR read only memory. First entry must be aligned
@@ -473,7 +507,16 @@ static irqreturn_t portal_isr(__always_unused int irq, void *ptr)
/* DQRR-handling if it's interrupt-driven */
if (is & QM_PIRQ_DQRI) {
+#ifndef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
__poll_portal_fast(p, CONFIG_FSL_QMAN_POLL_LIMIT);
+#else
+ /* Disable QMan IRQ and invoke NAPI */
+ qman_p_irqsource_remove(p, QM_PIRQ_DQRI);
+ if (napi_schedule_prep(&p->napi))
+ {
+ __napi_schedule(&p->napi);
+ }
+#endif
clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI;
}
@@ -575,6 +618,27 @@ struct dev_pm_domain qman_portal_device_pm_domain = {
}
};
+
+
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+static int qman_portal_dqrr_poll(struct napi_struct *napi, int budget)
+{
+ struct qman_portal *portal = container_of(napi, struct qman_portal, napi);
+
+ int cleaned = qman_p_poll_dqrr(portal, budget);
+
+ if (cleaned < budget) {
+ int tmp;
+ napi_complete(napi);
+ tmp = qman_p_irqsource_add(portal, QM_PIRQ_DQRI);
+ // DPA_BUG_ON(tmp);
+ }
+
+ return cleaned;
+}
+#endif
+
+
struct qman_portal *qman_create_portal(
struct qman_portal *portal,
const struct qm_portal_config *config,
@@ -737,6 +801,15 @@ struct qman_portal *qman_create_portal(
goto fail_dqrr_mr_empty;
}
}
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+ /* Initialize NAPI for Rx processing */
+ portal->dummy_dev = alloc_netdev_dummy(0);
+ if (!portal->dummy_dev)
+ goto fail_dqrr_mr_empty;
+
+ netif_napi_add(portal->dummy_dev, &portal->napi, qman_portal_dqrr_poll);
+ napi_enable(&portal->napi);
+#endif
/* Success */
portal->config = config;
/*
@@ -832,6 +902,15 @@ void qman_destroy_portal(struct qman_portal *qm)
const struct qm_portal_config *pcfg;
int i;
+#ifdef CONFIG_FSL_ASK_QMAN_PORTAL_NAPI
+ if (qm->dummy_dev) {
+ napi_disable(&qm->napi);
+ netif_napi_del(&qm->napi);
+ free_netdev(qm->dummy_dev);
+ qm->dummy_dev = NULL;
+ }
+#endif
+
/* Stop dequeues on the portal */
qm_dqrr_sdqcr_set(&qm->p, 0);
@@ -3170,6 +3245,7 @@ static int qman_ceetm_configure_lfqmt(struct qm_mcc_ceetm_lfqmt_config *opts)
p = get_affine_portal();
PORTAL_IRQ_LOCK(p, irqflags);
+ display_ceetm_cmd(QM_CEETM_VERB_LFQMT_CONFIG, opts, sizeof(struct qm_mcc_ceetm_lfqmt_config));
mcc = qm_mc_start(&p->p);
mcc->lfqmt_config = *opts;
qm_mc_commit(&p->p, QM_CEETM_VERB_LFQMT_CONFIG);
@@ -3233,6 +3309,7 @@ static int qman_ceetm_configure_cq(struct qm_mcc_ceetm_cq_config *opts)
mcc = qm_mc_start(&p->p);
mcc->cq_config = *opts;
+ display_ceetm_cmd(QM_CEETM_VERB_CQ_CONFIG, opts, sizeof(struct qm_mcc_ceetm_cq_config));
qm_mc_commit(&p->p, QM_CEETM_VERB_CQ_CONFIG);
while (!(mcr = qm_mc_result(&p->p)))
cpu_relax();
@@ -3296,7 +3373,7 @@ static int qman_ceetm_configure_dct(struct qm_mcc_ceetm_dct_config *opts)
p = get_affine_portal();
PORTAL_IRQ_LOCK(p, irqflags);
-
+ display_ceetm_cmd(QM_CEETM_VERB_DCT_CONFIG, opts, sizeof(struct qm_mcc_ceetm_dct_config));
mcc = qm_mc_start(&p->p);
mcc->dct_config = *opts;
qm_mc_commit(&p->p, QM_CEETM_VERB_DCT_CONFIG);
@@ -3360,6 +3437,8 @@ static int qman_ceetm_configure_class_scheduler(
mcc = qm_mc_start(&p->p);
mcc->csch_config = *opts;
+ display_ceetm_cmd(QM_CEETM_VERB_CLASS_SCHEDULER_CONFIG, opts,
+ sizeof(struct qm_mcc_ceetm_class_scheduler_config));
qm_mc_commit(&p->p, QM_CEETM_VERB_CLASS_SCHEDULER_CONFIG);
while (!(mcr = qm_mc_result(&p->p)))
cpu_relax();
@@ -3410,7 +3489,7 @@ static int qman_ceetm_query_class_scheduler(struct qm_ceetm_channel *channel,
return 0;
}
-static int qman_ceetm_configure_mapping_shaper_tcfc(
+int qman_ceetm_configure_mapping_shaper_tcfc(
struct qm_mcc_ceetm_mapping_shaper_tcfc_config *opts)
{
struct qm_mc_command *mcc;
@@ -3423,6 +3502,8 @@ static int qman_ceetm_configure_mapping_shaper_tcfc(
PORTAL_IRQ_LOCK(p, irqflags);
mcc = qm_mc_start(&p->p);
+ display_ceetm_cmd(QM_CEETM_VERB_MAPPING_SHAPER_TCFC_CONFIG, opts,
+ sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
mcc->mst_config = *opts;
qm_mc_commit(&p->p, QM_CEETM_VERB_MAPPING_SHAPER_TCFC_CONFIG);
while (!(mcr = qm_mc_result(&p->p)))
@@ -3440,6 +3521,7 @@ static int qman_ceetm_configure_mapping_shaper_tcfc(
}
return 0;
}
+EXPORT_SYMBOL(qman_ceetm_configure_mapping_shaper_tcfc);
static int qman_ceetm_query_mapping_shaper_tcfc(
struct qm_mcc_ceetm_mapping_shaper_tcfc_query *opts,
@@ -3485,7 +3567,7 @@ static int qman_ceetm_configure_ccgr(struct qm_mcc_ceetm_ccgr_config *opts)
p = get_affine_portal();
PORTAL_IRQ_LOCK(p, irqflags);
-
+ display_ceetm_cmd(QM_CEETM_VERB_CCGR_CONFIG, opts, sizeof(struct qm_mcc_ceetm_ccgr_config));
mcc = qm_mc_start(&p->p);
mcc->ccgr_config = *opts;
@@ -3903,6 +3985,7 @@ int qman_ceetm_lni_enable_shaper(struct qm_ceetm_lni *lni, int coupled,
lni->shaper_couple = coupled;
lni->oal = oal;
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_LNI_SHAPER | lni->idx);
config_opts.dcpid = lni->dcp_idx;
config_opts.shaper_config.cpl = coupled;
@@ -3936,7 +4019,8 @@ int qman_ceetm_lni_disable_shaper(struct qm_ceetm_lni *lni)
pr_err("The shaper has been disabled\n");
return -EINVAL;
}
-
+
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_LNI_SHAPER | lni->idx);
config_opts.dcpid = lni->dcp_idx;
config_opts.shaper_config.cpl = lni->shaper_couple;
@@ -4173,6 +4257,7 @@ int qman_ceetm_lni_set_tcfcc(struct qm_ceetm_lni *lni,
return -EINVAL;
}
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
query_opts.cid = cpu_to_be16(CEETM_COMMAND_TCFC | lni->idx);
query_opts.dcpid = lni->dcp_idx;
if (qman_ceetm_query_mapping_shaper_tcfc(&query_opts, &query_result)) {
@@ -4254,6 +4339,7 @@ int qman_ceetm_channel_claim(struct qm_ceetm_channel **channel,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
p->idx = channel_idx;
p->dcp_idx = lni->dcp_idx;
p->lni_idx = lni->idx;
@@ -4264,7 +4350,7 @@ int qman_ceetm_channel_claim(struct qm_ceetm_channel **channel,
channel_idx);
config_opts.dcpid = lni->dcp_idx;
config_opts.channel_mapping.map_lni_id = lni->idx;
- config_opts.channel_mapping.map_shaped = 0;
+ config_opts.channel_mapping.map_shaped = 1;
if (qman_ceetm_configure_mapping_shaper_tcfc(&config_opts)) {
pr_err("Can't map channel#%d for LNI#%d\n",
channel_idx, lni->idx);
@@ -4296,7 +4382,7 @@ int qman_ceetm_channel_release(struct qm_ceetm_channel *channel)
channel->dcp_idx);
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_CHANNEL_SHAPER |
channel->idx);
config_opts.dcpid = channel->dcp_idx;
@@ -4334,7 +4420,7 @@ int qman_ceetm_channel_enable_shaper(struct qm_ceetm_channel *channel,
pr_err("This channel shaper has been enabled!\n");
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->shaper_enable = 1;
channel->shaper_couple = coupled;
@@ -4347,6 +4433,7 @@ int qman_ceetm_channel_enable_shaper(struct qm_ceetm_channel *channel,
return -EINVAL;
}
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cid = cpu_to_be16(CEETM_COMMAND_CHANNEL_MAPPING |
channel->idx);
config_opts.dcpid = channel->dcp_idx;
@@ -4441,7 +4528,7 @@ int qman_ceetm_channel_set_commit_rate(struct qm_ceetm_channel *channel,
pr_err("Fail to get the current channel shaper setting\n");
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->cr_token_rate.whole = token_rate->whole;
channel->cr_token_rate.fraction = token_rate->fraction;
channel->cr_token_bucket_limit = token_limit;
@@ -4534,7 +4621,8 @@ int qman_ceetm_channel_set_excess_rate(struct qm_ceetm_channel *channel,
pr_err("Fail to get the current channel shaper setting\n");
return -EINVAL;
}
-
+
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->er_token_rate.whole = token_rate->whole;
channel->er_token_rate.fraction = token_rate->fraction;
channel->er_token_bucket_limit = token_limit;
@@ -4618,7 +4706,7 @@ int qman_ceetm_channel_set_weight(struct qm_ceetm_channel *channel,
pr_err("This channel is a shaped one\n");
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
channel->cr_token_bucket_limit = token_limit;
config_opts.cid = cpu_to_be16(CEETM_COMMAND_CHANNEL_SHAPER |
channel->idx);
@@ -4668,7 +4756,7 @@ int qman_ceetm_channel_set_group(struct qm_ceetm_channel *channel, int group_b,
pr_err("Can't query channel#%d's scheduler!\n", channel->idx);
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_mapping_shaper_tcfc_config));
config_opts.cqcid = cpu_to_be16(channel->idx);
config_opts.dcpid = channel->dcp_idx;
config_opts.gpc_combine_flag = !group_b;
@@ -4759,6 +4847,7 @@ int qman_ceetm_channel_set_group_er_eligibility(struct qm_ceetm_channel
channel->idx);
return -EINVAL;
}
+ memset(&csch_config, 0, sizeof(struct qm_mcc_ceetm_class_scheduler_config));
csch_config.cqcid = cpu_to_be16(channel->idx);
csch_config.dcpid = channel->dcp_idx;
csch_config.gpc_combine_flag = csch_query.gpc_combine_flag;
@@ -4806,6 +4895,7 @@ int qman_ceetm_channel_set_cq_cr_eligibility(struct qm_ceetm_channel *channel,
channel->idx);
return -EINVAL;
}
+ memset(&csch_config, 0, sizeof(struct qm_mcc_ceetm_class_scheduler_config));
csch_config.cqcid = cpu_to_be16(channel->idx);
csch_config.dcpid = channel->dcp_idx;
csch_config.gpc_combine_flag = csch_query.gpc_combine_flag;
@@ -4889,7 +4979,7 @@ int qman_ceetm_cq_claim(struct qm_ceetm_cq **cq,
pr_err("Can't allocate memory for CQ#%d!\n", idx);
return -ENOMEM;
}
-
+ memset(&cq_config, 0, sizeof(struct qm_mcc_ceetm_cq_config));
list_add_tail(&p->node, &channel->class_queues);
p->idx = idx;
p->is_claimed = 1;
@@ -4938,7 +5028,7 @@ int qman_ceetm_cq_claim_A(struct qm_ceetm_cq **cq,
pr_err("Can't allocate memory for CQ#%d!\n", idx);
return -ENOMEM;
}
-
+ memset(&cq_config, 0, sizeof(struct qm_mcc_ceetm_cq_config));
list_add_tail(&p->node, &channel->class_queues);
p->idx = idx;
p->is_claimed = 1;
@@ -4986,7 +5076,7 @@ int qman_ceetm_cq_claim_B(struct qm_ceetm_cq **cq,
pr_err("Can't allocate memory for CQ#%d!\n", idx);
return -ENOMEM;
}
-
+ memset(&cq_config, 0, sizeof(struct qm_mcc_ceetm_cq_config));
list_add_tail(&p->node, &channel->class_queues);
p->idx = idx;
p->is_claimed = 1;
@@ -5040,7 +5130,7 @@ int qman_ceetm_set_queue_weight(struct qm_ceetm_cq *cq,
cq->parent->idx);
return -EINVAL;
}
-
+ memset(&config_opts, 0, sizeof(struct qm_mcc_ceetm_class_scheduler_config));
config_opts.cqcid = cpu_to_be16(cq->parent->idx);
config_opts.dcpid = cq->parent->dcp_idx;
config_opts.crem = query_result.crem;
@@ -5257,6 +5347,7 @@ int qman_ceetm_lfq_claim(struct qm_ceetm_lfq **lfq,
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
+ memset(&lfqmt_config, 0, sizeof(struct qm_mcc_ceetm_lfqmt_config));
p->idx = lfqid;
p->dctidx = (u16)(lfqid & CEETM_LFQMT_LFQID_LSB);
p->parent = cq->parent;

View File

@@ -0,0 +1,28 @@
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 6d4e3d1b1111..7c0d4c2e2222 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -65,6 +65,23 @@ void brioctl_set(int (*hook)(struct net *net, unsigned int cmd,
void __user *uarg));
int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg);
+#if defined(CONFIG_CPE_FAST_PATH)
+struct brevent_fdb_update {
+ char *mac_addr;
+ struct net_device *dev;
+ struct net_device *brdev;
+};
+
+enum brevent_notif_type {
+ BREVENT_PORT_DOWN = 1, /* arg is struct net_device ptr */
+ BREVENT_FDB_UPDATE /* arg is struct brevent_fdb_update ptr */
+};
+
+int register_brevent_notifier(struct notifier_block *nb);
+int unregister_brevent_notifier(struct notifier_block *nb);
+int call_brevent_notifiers(unsigned long val, void *v);
+#endif
+
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);

View File

@@ -0,0 +1,76 @@
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0a14daa..ff8a1ad 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -196,6 +196,12 @@ struct xfrm_state {
struct hlist_node bysrc;
};
struct hlist_node byspi;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ struct hlist_node byh;
+ u16 handle;
+ u16 in_byh_hash;
+ u16 parent_sa_handle; /* handle of the old SA from which this SA is created using rekey */
+#endif
struct hlist_node byseq;
struct hlist_node state_cache;
struct hlist_node state_cache_input;
@@ -314,6 +320,11 @@ struct xfrm_state {
/* Private data of this transformer, format is opaque,
* interpreted by xfrm_type methods. */
void *data;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* Intended direction of this state, used for offloading */
+ int offloaded;
+ u64 curr_time;
+#endif
u8 dir;
const struct xfrm_mode_cbs *mode_cbs;
@@ -337,6 +348,13 @@ enum {
XFRM_STATE_EXPIRED,
XFRM_STATE_DEAD
};
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+enum {
+ XFRM_STATE_DIR_UNKNOWN,
+ XFRM_STATE_DIR_IN,
+ XFRM_STATE_DIR_OUT,
+};
+#endif
/* callback structure passed from either netlink or pfkey */
struct km_event {
@@ -1173,6 +1191,32 @@ struct sec_path {
struct sec_path *secpath_set(struct sk_buff *skb);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+struct xfrm_input_shared {
+ struct sk_buff *skb;
+ int xfrm_nr, first, xfrm_encap;
+ struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
+ __u16 encap_type;
+ int decaps;
+ u32 seq, spi;
+ unsigned int nhoff;
+ int nexthdr;
+ int (*callback)(struct xfrm_input_shared *sh);
+ atomic_t refcnt;
+};
+
+static inline void xfrm_shared_get(struct xfrm_input_shared *sh)
+{
+ atomic_inc(&sh->refcnt);
+}
+
+static inline void xfrm_shared_put(struct xfrm_input_shared *sh)
+{
+ if (atomic_dec_and_test(&sh->refcnt))
+ kfree(sh);
+}
+#endif
+
static inline void
secpath_reset(struct sk_buff *skb)
{

View File

@@ -0,0 +1,36 @@
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 43233af..2e401ae 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -58,6 +58,10 @@ enum ctattr_type {
CTA_FILTER,
CTA_STATUS_MASK,
CTA_TIMESTAMP_EVENT,
+ CTA_LAYERSCAPE_FP_ORIG,
+ CTA_LAYERSCAPE_FP_REPLY,
+ CTA_QOSCONNMARK,
+ CTA_QOSCONNMARK_PAD,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
@@ -243,6 +247,20 @@ enum ctattr_secctx {
};
#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1)
+enum ctattr_comcerto_fp {
+ CTA_COMCERTO_FP_UNSPEC,
+ CTA_COMCERTO_FP_MARK,
+ CTA_COMCERTO_FP_IFINDEX,
+ CTA_COMCERTO_FP_IIF,
+ CTA_COMCERTO_FP_UNDERLYING_IIF,
+ CTA_COMCERTO_FP_UNDERLYING_VID,
+#ifndef IPSEC_FLOW_CACHE
+ CTA_COMCERTO_FP_XFRM_HANDLE,
+#endif
+ __CTA_COMCERTO_FP_MAX
+};
+#define CTA_COMCERTO_FP_MAX (__CTA_COMCERTO_FP_MAX - 1)
+
enum ctattr_stats_cpu {
CTA_STATS_UNSPEC,
CTA_STATS_SEARCHED, /* no longer used */

View File

@@ -0,0 +1,28 @@
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9a3d6f2b8c1e..b4f7b2c1d9aa 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -620,6 +620,11 @@ struct br_input_skb_cb {
#endif
u32 backup_nhid;
+
+#ifdef CONFIG_CPE_FAST_PATH
+ u16 vid;
+ u8 untagged:1;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -859,6 +864,11 @@ int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags);
+#if defined(CONFIG_CPE_FAST_PATH)
+extern void br_fdb_register_can_expire_cb(int(*cb)(unsigned char *mac_addr, struct net_device *dev));
+extern void br_fdb_deregister_can_expire_cb(void);
+#endif
+
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid,
bool *notified, struct netlink_ext_ack *extack);

View File

@@ -0,0 +1,131 @@
diff --git a/net/core/dev.c b/net/core/dev.c
index 2acfa44..02e304b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,6 +176,10 @@ static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
+#if defined(CONFIG_CPE_FAST_PATH)
+static fp_iface_stats_get fast_path_stats_get;
+#endif
+
static DEFINE_MUTEX(ifalias_mutex);
/* protects napi_hash addition/deletion and napi_gen_id */
@@ -4002,9 +4006,15 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
skb = segs;
}
} else {
- if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb))
- goto out_kfree_skb;
+ /* Linearize only if IPsec policy is not selected. */
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (!skb->ipsec_offload)
+#endif
+ {
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto out_kfree_skb;
+ }
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
@@ -4792,6 +4802,40 @@ out:
}
EXPORT_SYMBOL(__dev_queue_xmit);
+#if defined(CONFIG_CPE_FAST_PATH)
+/* WiFi IPsec offload hook - allows cdx to intercept packets for IPsec
+ * processing when the packet is transmitted on a wifi interface.
+ */
+dpaa_wifi_xmit_local_hook_t dpaa_wifi_xmit_local_ipsec_handler;
+EXPORT_SYMBOL(dpaa_wifi_xmit_local_ipsec_handler);
+
+/* Register a hook function for IPsec offload on wifi interfaces. */
+int dpa_register_wifi_xmit_local_hook(dpaa_wifi_xmit_local_hook_t hookfn)
+{
+ if (dpaa_wifi_xmit_local_ipsec_handler) {
+ pr_warn("%s: hook already registered\n", __func__);
+ return -1;
+ }
+ dpaa_wifi_xmit_local_ipsec_handler = hookfn;
+ return 0;
+}
+EXPORT_SYMBOL(dpa_register_wifi_xmit_local_hook);
+
+/* Unregister the IPsec offload hook. */
+void dpa_unregister_wifi_xmit_local_hook(void)
+{
+ dpaa_wifi_xmit_local_ipsec_handler = NULL;
+}
+EXPORT_SYMBOL(dpa_unregister_wifi_xmit_local_hook);
+
+/* Original dev_queue_xmit - called when wifi hook is not applicable. */
+int original_dev_queue_xmit(struct sk_buff *skb)
+{
+ return __dev_queue_xmit(skb, NULL);
+}
+EXPORT_SYMBOL(original_dev_queue_xmit);
+#endif
+
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
struct net_device *dev = skb->dev;
@@ -5862,6 +5906,15 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
trace_netif_receive_skb(skb);
+#ifdef CONFIG_CPE_FAST_PATH
+ /* ifindex of device we arrived on, now skb->skb_iif
+ * always tracks skb->dev.
+ */
+ if (!skb->iif_index)
+ skb->iif_index = skb->dev->ifindex;
+ if (!skb->underlying_iif)
+ skb->underlying_iif = skb->dev->ifindex;
+#endif
orig_dev = skb->dev;
skb_reset_network_header(skb);
@@ -7627,9 +7680,9 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
return work;
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush_normal(&n->gro, HZ >= 1000);
+ gro_flush(&n->gro, HZ >= 1000);
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
*/
@@ -11762,10 +11812,28 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
}
}
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (fast_path_stats_get)
+ fast_path_stats_get(dev, storage);
+#endif
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
+#if defined(CONFIG_CPE_FAST_PATH)
+void dev_fp_stats_get_register(fp_iface_stats_get func)
+{
+ fast_path_stats_get = func;
+}
+EXPORT_SYMBOL(dev_fp_stats_get_register);
+
+void dev_fp_stats_get_deregister(void)
+{
+ fast_path_stats_get = NULL;
+}
+EXPORT_SYMBOL(dev_fp_stats_get_deregister);
+#endif
+
/**
* dev_fetch_sw_netstats - get per-cpu network device statistics
* @s: place to store stats

View File

@@ -0,0 +1,79 @@
diff -uNr a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
--- a/net/ipv4/ip_output.c 2026-05-08 18:06:42.017639432 +0000
+++ b/net/ipv4/ip_output.c 2026-05-08 18:06:42.100831810 +0000
@@ -103,6 +103,17 @@
{
struct iphdr *iph = ip_hdr(skb);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ /*
+ * The tunnel header is not added in slow path and packet may be ipv6
+ * (IPv6 traffic and IPv4 IPSec tunnel) in case IPv6 over IPv4 IPsec
+ * tunnel. When it assumes as IPv4 and accessing IP header from SKB
+ * causing invalid accesses it leading to kernel panic.
+ * Avoiding ip header checks.
+ */
+ if ((skb->ipsec_offload) && (iph->version == 6))
+ goto sendout;
+#endif /* endif for CONFIG_INET_IPSEC_OFFLOAD */
IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
iph_set_totlen(iph, skb->len);
@@ -115,8 +126,17 @@
if (unlikely(!skb))
return 0;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+sendout:
+#endif
skb->protocol = htons(ETH_P_IP);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ if (skb->ipsec_offload) {
+ dst_output(net, sk, skb);
+ return 0;
+ } else
+#endif /* endif for CONFIG_INET_IPSEC_OFFLOAD */
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst_dev(skb),
dst_output);
@@ -309,7 +329,11 @@
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu || IPCB(skb)->frag_max_size)
+ if (
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ (skb->ipsec_offload == 0) &&
+#endif
+ (skb->len > mtu || IPCB(skb)->frag_max_size))
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
@@ -435,6 +459,16 @@
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)
+ /* Bypass invoking post routing hooks since the tunnel header and ESP
+ * processing is not done in slow path for IPSec offloaded cases
+ */
+ if (skb->ipsec_offload) {
+ ret_val = ip_finish_output(net, sk, skb);
+ rcu_read_unlock();
+ return ret_val;
+ }
+#endif
ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, skb, indev, dev,
ip_finish_output,
@@ -559,6 +593,9 @@
skb_dst_copy(to, from);
to->dev = from->dev;
to->mark = from->mark;
+#if defined(CONFIG_CPE_FAST_PATH)
+ to->qosmark = from->qosmark;
+#endif
skb_copy_hash(to, from);

View File

@@ -0,0 +1,73 @@
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f904739..e26e743 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -80,6 +80,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
hdr = ipv6_hdr(skb);
daddr = &hdr->daddr;
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* For IPv4 over IPv6 IPsec tunnel cases, just send the packet out
+ * since the packet is IPv4
+ */
+ if((skb->ipsec_offload) && (hdr->version == 4))
+ goto sendout;
+#endif /* endif for CONFIG_INET6_IPSEC_OFFLOAD */
if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
@@ -111,6 +118,9 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
}
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+sendout:
+#endif /* endif for CONFIG_INET6_IPSEC_OFFLOAD */
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
@@ -202,8 +212,15 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
if (skb_is_gso(skb))
return ip6_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu ||
- (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
+ if (
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* If ipsec offload is there, do not do fragment. So, when IPSec
+ * offload is enabled it directly calls ip6_finish_output2
+ */
+ (skb->ipsec_offload == 0) &&
+#endif
+ ((skb->len > mtu) ||
+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
return ip6_finish_output2(net, sk, skb);
@@ -244,6 +261,16 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return 0;
}
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /* Bypass invoking post routing hooks since the tunnel header and ESP
+ * processing is not done in slow path for IPSec offloaded cases
+ */
+ if (skb->ipsec_offload) {
+ ret = ip6_finish_output(net, sk, skb);
+ rcu_read_unlock();
+ return ret;
+ }
+#endif
ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
net, sk, skb, indev, dev,
ip6_finish_output,
@@ -697,6 +724,9 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_dst_set(to, dst_clone(skb_dst(from)));
to->dev = from->dev;
to->mark = from->mark;
+#if defined(CONFIG_CPE_FAST_PATH)
+ to->qosmark = from->qosmark;
+#endif
skb_copy_hash(to, from);

View File

@@ -0,0 +1,38 @@
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 1c9b283..5682505 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -130,7 +130,19 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
len = skb->len - sizeof(struct ipv6hdr);
if (len > IPV6_MAXPLEN)
len = 0;
+
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ /*
+ * Since tunnel header is not added in slow path and route cache entry
+ * lookup is changed to perform lookup on tunnel header, dst_output()
+ * may reach IPv6 output for IPv4-over-IPv6 IPsec tunnel packets. Only
+ * update the IPv6 payload length when the skb really carries IPv6 here.
+ */
+ if (ipv6_hdr(skb)->version == 6)
+ ipv6_hdr(skb)->payload_len = htons(len);
+#else
ipv6_hdr(skb)->payload_len = htons(len);
+#endif
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
/* if egress device is enslaved to an L3 master device pass the
@@ -142,6 +154,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
+#if defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (skb->ipsec_offload) {
+ dst_output(net, sk, skb);
+ return 0;
+ }
+#endif
+
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst_dev(skb),
dst_output);

View File

@@ -0,0 +1,949 @@
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c56bb4f451e6..7225be6880e1 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -26,8 +26,184 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/xfrm.h>
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)|| defined(CONFIG_INET6_IPSEC_OFFLOAD)
+#include <net/netlink.h>
+#endif
#include <net/sock.h>
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)|| defined(CONFIG_INET6_IPSEC_OFFLOAD)
+#include <net/ip6_route.h>
+#define NLKEY_SUPPORT 1
+#else
+#undef NLKEY_SUPPORT
+#endif
+
+#ifdef NLKEY_SUPPORT
+#include <net/dsfield.h>
+#include <net/inet_dscp.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+
+extern int xfrm_get_tos(struct flowi *fl, int family);
+
+
+#define NLKEY_SA_CREATE 0x0A01
+#define NLKEY_SA_DELETE 0x0A02
+#define NLKEY_SA_FLUSH 0x0A03
+#define NLKEY_SA_SET_KEYS 0x0A04
+#define NLKEY_SA_SET_TUNNEL 0x0A05
+#define NLKEY_SA_SET_NATT 0x0A06
+#define NLKEY_SA_SET_STATE 0x0A07
+#define NLKEY_SA_SET_LIFETIME 0x0A08
+#define NLKEY_SA_NOTIFY 0x0A09
+#define NLKEY_SA_INFO_UPDATE 0x0A0C
+#define NLKEY_SA_SET_OFFLOAD 0x0A0D
+#define NLKEY_FLOW_ADD 0x0A11
+#define NLKEY_FLOW_REMOVE 0x0A12
+#define NLKEY_FLOW_NOTIFY 0x0A13
+#define NLKEY_NULL_MSG 0x0000
+
+#define NLKEY_HDR_LEN 4
+#define NLKEY_MSG_LEN 256
+
+#define NLKEY_MAX_NUM_KEYS 2
+#define NLKEY_MAX_KEY_LEN (512 / 8)
+
+struct nlkey_msg {
+ /* message data */
+ unsigned short fcode;
+ unsigned short length;
+ unsigned short payload[(NLKEY_MSG_LEN /sizeof(unsigned short))];
+};
+/* sizeof(nlkey_msg) = 4 + 256 */
+
+struct nlkey_sa_id {
+ unsigned int spi;
+ unsigned char sa_type;
+ unsigned char proto_family;
+ unsigned char replay_window;
+#define NLKEY_SAFLAGS_ESN 0x1
+#define NLKEY_SAFLAGS_INBOUND 0x2
+ unsigned char flags;
+ unsigned int dst_ip[4];
+ unsigned int src_ip[4];
+ unsigned short mtu;
+ unsigned short dev_mtu;
+
+};
+/* sizeof(nlkey_sa_id) = 24 */
+
+struct nlkey_sa_create {
+ unsigned short sagd;
+ unsigned short parent_sa_sagd; /*sagd value of old SA from which this SA is rekeyed.*/
+ struct nlkey_sa_id said;
+};
+/* sizeof(nlkey_sa_delete) = 28 */
+
+struct nlkey_sa_delete {
+ unsigned short sagd;
+ unsigned short rsvd;
+};
+/* sizeof(nlkey_sa_delete) = 4 */
+
+struct nlkey_sa_set_tunnel {
+ unsigned short sagd;
+ unsigned char rsvd;
+ unsigned char proto_family;
+ union {
+ struct iphdr ipv4h;
+ struct ipv6hdr ipv6h;
+ } h;
+};
+/* sizeof(nlkey_sa_set_tunnel) = 36 */
+
+struct nlkey_sa_set_natt {
+ unsigned short sagd;
+ unsigned short sport;
+ unsigned short dport;
+ unsigned short rsvd;
+};
+/* sizeof(nlkey_sa_set_natt) = 4 */
+
+struct nlkey_sa_set_state {
+ unsigned short sagd;
+ unsigned short parent_sa_sagd;
+ unsigned short state;
+ unsigned short rsvd2;
+};
+/* sizeof(nlkey_sa_set_natt) = 8 */
+
+struct nlkey_key_desc {
+ unsigned short key_bits;
+ unsigned char key_alg;
+ unsigned char key_type;
+ unsigned char key[NLKEY_MAX_KEY_LEN];
+};
+/* sizeof(nlkey_key_desc) = 36 */
+
+struct nlkey_sa_set_keys {
+ unsigned short sagd;
+ unsigned short rsvd;
+ unsigned short num_keys;
+ unsigned short rsvd2;
+ struct nlkey_key_desc keys[NLKEY_MAX_NUM_KEYS];
+};
+/* sizeof(nlkey_sa_set_keys) = 80 */
+
+struct nlkey_lifetime_desc {
+ unsigned int allocations;
+ unsigned int bytes[2];
+};
+/* sizeof(nlkey_sa_set_lifetime) = 12 */
+
+struct nlkey_sa_set_lifetime {
+ unsigned short sagd;
+ unsigned short rsvd;
+ struct nlkey_lifetime_desc hard_time;
+ struct nlkey_lifetime_desc soft_time;
+ struct nlkey_lifetime_desc current_time;
+};
+/* sizeof(nlkey_sa_set_lifetime) = 40 */
+
+/* SA notifications */
+#define IPSEC_SOFT_EXPIRE 0
+#define IPSEC_HARD_EXPIRE 1
+
+struct nlkey_sa_notify {
+ unsigned short sagd;
+ unsigned short rsvd;
+ unsigned int action;
+};
+/* sizeof(nlkey_sa_notify) = 8 */
+
+/* SA Info update */
+
+struct nlkey_sa_info {
+ unsigned short sagd;
+ unsigned short rsvd;
+ unsigned long long bytes;
+ unsigned long long packets;
+};
+/* sizeof(nlkey_sa_info) = */
+
+
+static int ipsec_nlkey_send(struct net *net, struct xfrm_state *x, const struct km_event *c);
+static void ipsec_nlkey_rcv(struct sk_buff *skb);
+static void ipsec_nlkey_init(void);
+static unsigned short ipsec_sacode_to_nlkeycode(unsigned short sa_code);
+static struct sk_buff * ipsec_xfrm2nlkey (struct net *net, struct xfrm_state *x,
+ const struct km_event *c, unsigned short *msg_id);
+static int ipsec_nlkey_set_said(struct net *net, struct xfrm_state *x, const struct km_event *c, struct nlkey_sa_id *said);
+
+void flow_cache_remove(const struct flowi *fl, unsigned short family,
+ unsigned short dir);
+/* netlink NETLINK_KEY socket */
+struct sock *nlkey_socket = NULL;
+
+#endif
+/************************************************************************************/
+
#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
@@ -876,6 +1051,10 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP;
if (x->props.flags & XFRM_STATE_NOPMTUDISC)
sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC;
+#ifdef NLKEY_SUPPORT
+ if (x->props.flags & XFRM_STATE_ESN)
+ sa->sadb_sa_flags |= SADB_SAFLAGS_ESN;
+#endif
/* hard time */
if (hsc & 2) {
@@ -908,6 +1087,11 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
lifetime->sadb_lifetime_bytes = x->curlft.bytes;
lifetime->sadb_lifetime_addtime = x->curlft.add_time;
lifetime->sadb_lifetime_usetime = x->curlft.use_time;
+
+#if defined(CONFIG_INET_IPSEC_OFFLOAD)|| defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ lifetime->sadb_lifetime_usetime = x->curr_time;
+#endif
+
/* src address */
addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
addr->sadb_address_len =
@@ -1133,6 +1317,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
x->props.flags |= XFRM_STATE_DECAP_DSCP;
if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
x->props.flags |= XFRM_STATE_NOPMTUDISC;
+#ifdef NLKEY_SUPPORT
+ if (sa->sadb_sa_flags & SADB_SAFLAGS_ESN)
+ x->props.flags |= XFRM_STATE_ESN;
+#endif
lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1];
if (lifetime != NULL) {
@@ -3076,6 +3264,12 @@ static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c)
struct net *net = x ? xs_net(x) : c->net;
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
+
+#ifdef NLKEY_SUPPORT
+ /* send message to the user space through NETLINK_KEY socket*/
+ ipsec_nlkey_send(net, x, c);
+#endif
+
if (atomic_read(&net_pfkey->socks_nr) == 0)
return 0;
@@ -3863,6 +4057,687 @@ static struct xfrm_mgr pfkeyv2_mgr =
.is_alive = pfkey_is_alive,
};
+
+#ifdef NLKEY_SUPPORT
+extern struct xfrm_state *xfrm_state_lookup_byhandle(struct net *net, u16 handle);
+
+static unsigned short ipsec_sacode_to_nlkeycode(unsigned short sa_code)
+{
+ unsigned nlkey_code;
+
+ switch (sa_code)
+ {
+ case XFRM_MSG_DELSA:
+ nlkey_code = NLKEY_SA_DELETE;
+ break;
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ nlkey_code = NLKEY_SA_CREATE;
+ break;
+ case XFRM_MSG_FLUSHSA:
+ nlkey_code = NLKEY_SA_FLUSH;
+ break;
+ case XFRM_MSG_EXPIRE:
+ nlkey_code = NLKEY_SA_SET_STATE;
+ break;
+ default:
+ nlkey_code = NLKEY_NULL_MSG;
+ break;
+ }
+
+ return nlkey_code;
+}
+
+static void ipsec_nlkey_rcv(struct sk_buff *skb)
+{
+ struct nlmsghdr *nlh = NULL;
+ struct nlkey_msg *msg = NULL;
+ struct flowi flow;
+ unsigned short *p;
+ unsigned short family, dir;
+ struct xfrm_state *x;
+ struct nlkey_sa_notify sa_notify_msg;
+ struct nlkey_sa_info sa_info_msg;
+
+ /* extract message from skb */
+ nlh = (struct nlmsghdr *)skb->data;
+
+ msg = (struct nlkey_msg *)NLMSG_DATA(nlh);
+
+ //printk(KERN_INFO "ipsec_nlkey_rcv fcode: 0x%x length: %d bytes\n",msg->fcode,msg->length);
+
+ /* process command received from user space */
+ switch(msg->fcode)
+ {
+ case NLKEY_FLOW_REMOVE:
+ //printk(KERN_INFO "ipsec_nlkey_rcv NLKEY_FLOW_REMOVE\n");
+ p = msg->payload;
+ memcpy(&flow, p, sizeof(struct flowi)); p += sizeof(struct flowi)/2;
+ family = *p; p++;
+ dir = *p; p++;
+ flow_cache_remove(&flow, family, dir);
+ break;
+
+ case NLKEY_SA_NOTIFY:
+ //printk(KERN_INFO "ipsec_nlkey_rcv NLKEY_SA_NOTIFY\n");
+ memcpy(&sa_notify_msg, msg->payload, sizeof(struct nlkey_sa_notify));
+ x = xfrm_state_lookup_byhandle(&init_net, sa_notify_msg.sagd);
+ if (x) {
+ spin_lock(&x->lock);
+
+ if (sa_notify_msg.action) {
+ // hard expired
+ x->km.state = XFRM_STATE_EXPIRED;
+ hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL_SOFT);
+ }
+ else if (!x->km.dying) {
+ x->km.dying = 1;
+ km_state_expired(x, 0, 0);
+ }
+
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ }
+ break;
+
+ case NLKEY_SA_INFO_UPDATE:
+ memcpy(&sa_info_msg, msg->payload, sizeof(struct nlkey_sa_info));
+
+ x = xfrm_state_lookup_byhandle(&init_net,sa_info_msg.sagd);
+ if (x) {
+ spin_lock(&x->lock);
+
+ if (x->curlft.bytes != sa_info_msg.bytes)
+ x->curr_time = ktime_get_real_seconds();
+
+ x->curlft.bytes = sa_info_msg.bytes;
+ x->curlft.packets = sa_info_msg.packets;
+
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ }
+ break;
+
+ case NLKEY_SA_SET_OFFLOAD:
+ memcpy(&sa_notify_msg, msg->payload, sizeof(struct nlkey_sa_notify));
+ x = xfrm_state_lookup_byhandle(&init_net,sa_notify_msg.sagd);
+ if (x) {
+ spin_lock(&x->lock);
+ if(sa_notify_msg.action)
+ x->offloaded = 1;
+ else
+ x->offloaded = 0;
+ spin_unlock(&x->lock);
+ xfrm_state_put(x);
+ }
+ break;
+ default:
+ //printk(KERN_INFO "ipsec_nlkey_rcv fcode 0x%x not supported\n", msg->fcode);
+ break;
+ }
+
+}
+static int ipsec_nlkey_set_said(struct net *net, struct xfrm_state *x,
+ const struct km_event *c, struct nlkey_sa_id *said)
+{
+
+ struct flowi fl;
+ int tos;
+ xfrm_address_t saddr, daddr;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ int rc = 0;
+ int oif = 0;
+
+ memset(&fl, 0, sizeof(struct flowi));
+
+ /* SPI */
+ said->spi = x->id.spi;
+ /* SA Type (AH or ESP) */
+ said->sa_type = x->id.proto;
+ /* Protocol Family (IPv4 or IPv6) */
+ said->proto_family = x->props.family;
+ /* Replay window */
+ said->replay_window = x->props.replay_window;
+ /* Destination IP Address */
+ if(x->props.family == AF_INET6) {
+ memcpy(&said->dst_ip, x->id.daddr.a6, sizeof(struct in6_addr));
+ fl.u.ip6.daddr = *(struct in6_addr *)x->id.daddr.a6;
+ memcpy(&said->src_ip, x->props.saddr.a6, sizeof(struct in6_addr));
+ }
+ else {
+ said->dst_ip[0] = x->id.daddr.a4;
+ fl.u.ip4.daddr = x->id.daddr.a4;
+ said->src_ip[0] = x->props.saddr.a4;
+ }
+ said->mtu = 0;
+
+ if(x->props.flags & XFRM_STATE_ESN)
+ said->flags = NLKEY_SAFLAGS_ESN;
+ xfrm_flowi_addr_get(&fl, &saddr, &daddr, x->props.family);
+
+ tos = xfrm_get_tos(&fl, x->props.family);
+ if (tos < 0) {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+
+ switch (x->props.family)
+ {
+ case AF_INET:
+ if (!__ip_route_output_key(net, &(fl.u.ip4)))
+ {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+ oif = fl.u.ip4.flowi4_oif;
+ break;
+
+ case AF_INET6:
+ rt = rt6_lookup(net, &fl.u.ip6.daddr, NULL, 0, NULL, 0);
+ if ((!rt) || (!rt->dst.dev))
+ {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+ oif = rt->dst.dev->ifindex;
+ break;
+ }
+
+ {
+ struct xfrm_dst_lookup_params params = {
+ .net = net,
+ .dscp = inet_dsfield_to_dscp(tos),
+ .oif = oif,
+ .saddr = NULL,
+ .daddr = &daddr,
+ .mark = xfrm_smark_get(0, x),
+ };
+ dst = __xfrm_dst_lookup(x->props.family, &params);
+ }
+ if (IS_ERR(dst)) {
+ printk(KERN_ERR "%s:%d: FIXME\n",__func__,__LINE__);
+ rc = -1;
+ goto error;
+ }
+
+ if (strcmp(dst->dev->name, "lo") == 0)
+ said->flags |= NLKEY_SAFLAGS_INBOUND;
+
+ said->dev_mtu = dst_mtu(dst);
+ said->mtu = xfrm_state_mtu(x,dst_mtu(dst));
+
+ dst_release(dst);
+error:
+ return rc;
+}
+
+static struct sk_buff * ipsec_xfrm2nlkey (struct net *net, struct xfrm_state *x,
+ const struct km_event *c, unsigned short *msg_id)
+{
+ struct nlkey_sa_id sa_id_msg;
+ struct nlkey_sa_create sa_create_msg;
+ struct nlkey_sa_delete sa_delete_msg;
+ struct nlkey_sa_set_keys sa_set_keys_msg;
+ struct nlkey_sa_set_tunnel sa_set_tunnel_msg;
+ struct nlkey_sa_set_natt sa_set_natt_msg;
+ struct nlkey_sa_set_state sa_set_state_msg;
+ struct nlkey_sa_set_lifetime sa_set_lifetime_msg;
+ struct nlkey_msg msg;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh = NULL;
+ gfp_t allocation = GFP_ATOMIC; //This may called from atomic context
+ unsigned char tunnel, keys, natt, state, lifetime;
+
+ /* supported SA informations */
+ keys = 1; state = 1; tunnel = 1; lifetime = 1; natt = 1;
+
+ /* next message to build */
+ memset(&msg, 0, sizeof(struct nlkey_msg));
+ msg.fcode = *msg_id;
+
+ //printk(KERN_INFO "\n\nipsec_xfrm2nlkey: processing event 0x%x\n", msg.fcode);
+
+ switch (msg.fcode)
+ {
+ case NLKEY_SA_CREATE:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_CREATE\n");
+ if(x) {
+ /* some check before builing message */
+ if(x->id.proto != IPPROTO_ESP) {
+ printk(KERN_ERR "protocol %d not supported in fast path.\n", x->id.proto);
+ *msg_id = NLKEY_NULL_MSG;
+ goto exit;
+ }
+
+ memset(&sa_create_msg, 0, sizeof(struct nlkey_sa_create));
+
+ /* SA global handler */
+ sa_create_msg.sagd = x->handle;
+
+ sa_create_msg.parent_sa_sagd = x->parent_sa_handle;
+
+ /* SA identifier */
+ if(ipsec_nlkey_set_said(net, x, c, &sa_create_msg.said) < 0)
+ {
+ printk(KERN_ERR "%s: set sa ID failed\n", __func__);
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ goto exit;
+ }
+ memcpy(msg.payload, &sa_create_msg, sizeof(struct nlkey_sa_create));
+ msg.length = sizeof(struct nlkey_sa_create);
+ *msg_id = NLKEY_SA_SET_KEYS; /* next message */
+ } else {
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ goto exit;
+ }
+
+ break;
+
+ case NLKEY_SA_SET_KEYS:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_KEYS\n");
+ if(keys) {
+ memset(&sa_set_keys_msg, 0, sizeof(struct nlkey_sa_set_keys));
+
+ /* SA global handler */
+ sa_set_keys_msg.sagd = x->handle;
+
+ /* auth key */
+ if(x->aalg) {
+ if (x->aalg->alg_key_len) {
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits = x->aalg->alg_key_len;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = x->props.aalgo;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_type = 0;
+ memcpy(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key, x->aalg->alg_key,(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits / 8));
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: AUTH - algo %d key %d bits\n", sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits);
+ sa_set_keys_msg.num_keys++;
+ }
+ }
+ /* encrypt key */
+ if(x->ealg) {
+ if (x->ealg->alg_key_len) {
+
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits = x->ealg->alg_key_len;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = x->props.ealgo;
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_type = 1;
+ memcpy(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key, x->ealg->alg_key,(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits / 8));
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: ENCRYPT - algo %d key %d bits\n", sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits);
+ sa_set_keys_msg.num_keys++;
+ }
+ }
+ /* combined key */
+ if (x->aead) {
+ if (x->aead->alg_key_len) {
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits = x->aead->alg_key_len;
+ if (strstr(x->aead->alg_name, "rfc4106(gcm")) /* AES GCM support */
+ {
+ if (x->aead->alg_icv_len == 64)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_GCM_ICV8;
+ else if (x->aead->alg_icv_len == 96)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_GCM_ICV12;
+ else if (x->aead->alg_icv_len == 128)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_GCM_ICV16;
+ }
+ else if (strstr(x->aead->alg_name, "ccm")) /* AES CCM */
+ {
+ if (x->aead->alg_icv_len == 64)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_CCM_ICV8;
+ else if (x->aead->alg_icv_len == 96)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_CCM_ICV12;
+ else if (x->aead->alg_icv_len == 128)
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_AES_CCM_ICV16;
+ }
+ else if (strstr(x->aead->alg_name, "rfc4543(gcm")) /* AES GMAC defined in RFC 4543 derived from AES GCM */
+ {
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg = SADB_X_EALG_NULL_AES_GMAC;
+ }
+
+ sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_type = 1;
+ memcpy(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key, x->aead->alg_key,(sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits/ 8));
+ /*
+ printk(KERN_INFO "ipsec_xfrm2nlkey: ENCRYPT -alg name %s algo %d key %d bits\n",
+ x->aead->alg_name, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_alg, sa_set_keys_msg.keys[sa_set_keys_msg.num_keys].key_bits);
+ */
+ sa_set_keys_msg.num_keys++;
+ }
+ }
+
+ memcpy(msg.payload, &sa_set_keys_msg, sizeof(struct nlkey_sa_set_keys));
+ msg.length = sizeof(struct nlkey_sa_set_keys);
+ *msg_id = NLKEY_SA_SET_TUNNEL; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_TUNNEL; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_TUNNEL:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_TUNNEL\n");
+ if(tunnel && (x->props.mode == XFRM_MODE_TUNNEL)) {
+ memset(&sa_set_tunnel_msg, 0, sizeof(struct nlkey_sa_set_tunnel));
+
+ /* SA global handler */
+ sa_set_tunnel_msg.sagd = x->handle;
+
+ /* Tunnel */
+ sa_set_tunnel_msg.proto_family = x->props.family;
+ if(x->props.family == AF_INET6) {
+ struct ipv6hdr *top_iph = &sa_set_tunnel_msg.h.ipv6h;
+ int dsfield;
+ top_iph->version = 6;
+ top_iph->priority = 0;
+ top_iph->flow_lbl[0] = 0;
+ top_iph->flow_lbl[1] = 0;
+ top_iph->flow_lbl[2] = 0;
+ top_iph->nexthdr = IPPROTO_IPIP;
+ dsfield = ipv6_get_dsfield(top_iph);
+ dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+ if (x->props.flags & XFRM_STATE_NOECN)
+ dsfield &= ~INET_ECN_MASK;
+ ipv6_change_dsfield(top_iph, 0, dsfield);
+ top_iph->hop_limit = 64;
+ memcpy(&top_iph->daddr, x->id.daddr.a6, sizeof(struct in6_addr));
+ memcpy(&top_iph->saddr, x->props.saddr.a6, sizeof(struct in6_addr));
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: IPv6 tunnel\n");
+ //printk(KERN_INFO "dst: %x %x %x %x\n", x->id.daddr.a6[0], x->id.daddr.a6[1], x->id.daddr.a6[2], x->id.daddr.a6[3]);
+ //(KERN_INFO "src: %x %x %x %x\n", x->props.saddr.a6[0], x->props.saddr.a6[1], x->props.saddr.a6[2], x->props.saddr.a6[3]);
+ }
+ else {
+ struct iphdr *top_iph = &sa_set_tunnel_msg.h.ipv4h;
+ top_iph->ihl = 5;
+ top_iph->version = 4;
+ top_iph->tos = 0;
+ top_iph->frag_off = 0;
+ top_iph->ttl = 64;
+ top_iph->saddr = x->props.saddr.a4;
+ top_iph->daddr = x->id.daddr.a4;
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: IPv4 tunnel dst:%x - src:%x \n", x->id.daddr.a4, x->props.saddr.a4);
+ }
+ memcpy(msg.payload, &sa_set_tunnel_msg, sizeof(struct nlkey_sa_set_tunnel));
+ msg.length = sizeof(struct nlkey_sa_set_tunnel);
+ *msg_id = NLKEY_SA_SET_NATT; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_NATT; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_NATT:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_NATT\n");
+ if((natt) && (x->encap)){
+ memset(&sa_set_natt_msg, 0, sizeof(struct nlkey_sa_set_natt));
+
+ /* SA global handler */
+ sa_set_natt_msg.sagd = x->handle;
+ sa_set_natt_msg.sport = x->encap->encap_sport;
+ sa_set_natt_msg.dport = x->encap->encap_dport;
+ //printk(KERN_INFO "src port: %d dst port: %d \n", ntohs(sa_set_natt_msg.sport), ntohs( sa_set_natt_msg.dport));
+ memcpy(msg.payload, &sa_set_natt_msg, sizeof(struct nlkey_sa_set_natt));
+ msg.length = sizeof(struct nlkey_sa_set_natt);
+ *msg_id = NLKEY_SA_SET_LIFETIME; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_LIFETIME; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_LIFETIME:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_SET_LIFETIME\n");
+ if(lifetime) {
+ memset(&sa_set_lifetime_msg, 0, sizeof(struct nlkey_sa_set_lifetime));
+
+ /* SA global handler */
+ sa_set_lifetime_msg.sagd = x->handle;
+
+ /* hard time */
+ sa_set_lifetime_msg.hard_time.allocations = _X2KEY(x->lft.hard_packet_limit);
+ if(_X2KEY(x->lft.hard_byte_limit))
+ memcpy(sa_set_lifetime_msg.hard_time.bytes, &x->lft.hard_byte_limit, sizeof(uint64_t));
+
+ /* soft time */
+ sa_set_lifetime_msg.soft_time.allocations = _X2KEY(x->lft.soft_packet_limit);
+ if(_X2KEY(x->lft.soft_byte_limit))
+ memcpy(sa_set_lifetime_msg.soft_time.bytes, &x->lft.soft_byte_limit, sizeof(uint64_t));
+
+ /* current time */
+ sa_set_lifetime_msg.current_time.allocations = x->curlft.packets;
+ memcpy(sa_set_lifetime_msg.current_time.bytes, &x->curlft.bytes, sizeof(uint64_t));
+
+ memcpy(msg.payload, &sa_set_lifetime_msg, sizeof(struct nlkey_sa_set_lifetime));
+ msg.length = sizeof(struct nlkey_sa_set_lifetime);
+ *msg_id = NLKEY_SA_SET_STATE; /* next message */
+ } else {
+ *msg_id = NLKEY_SA_SET_STATE; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_SET_STATE:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SET_STATE\n");
+ if(state) {
+ memset(&sa_set_state_msg, 0, sizeof(struct nlkey_sa_set_state));
+ memset(&sa_id_msg, 0, sizeof(struct nlkey_sa_id));
+
+ /* SA global handler */
+ sa_set_state_msg.sagd = x->handle;
+ sa_set_state_msg.parent_sa_sagd = x->parent_sa_handle;
+ /* State */
+ sa_set_state_msg.state = x->km.state;
+ // TODO: set the offloaded state once ack received !
+
+ memcpy(msg.payload, &sa_set_state_msg, sizeof(struct nlkey_sa_set_state));
+ msg.length = sizeof(struct nlkey_sa_set_state);
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ } else {
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ goto exit;
+ }
+ break;
+
+ case NLKEY_SA_DELETE:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_DELETE\n");
+ memset(&sa_delete_msg, 0, sizeof(struct nlkey_sa_delete));
+
+ /* SA global handler */
+ sa_delete_msg.sagd = x->handle;
+ memcpy(msg.payload, &sa_delete_msg, sizeof(struct nlkey_sa_delete));
+ msg.length = sizeof(struct nlkey_sa_delete);
+
+
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ break;
+
+ case NLKEY_SA_FLUSH:
+ //printk(KERN_INFO "ipsec_xfrm2nlkey: NLKEY_SA_FLUSH\n");
+ /* No data required for flush SA command */
+
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ break;
+
+ default:
+ printk(KERN_ERR "ipsec_xfrm2nlkey: event 0x%x not supported\n", c->event);
+ *msg_id = NLKEY_NULL_MSG; /* next message */
+ break;
+ }
+
+ /* prepare netlink message for kernel to user space direction */
+ if(msg.length > NLKEY_MSG_LEN)
+ {
+ printk(KERN_ERR "ipsec_xfrm2nlkey: maximum message size reached (%d bytes)\n", msg.length);
+ goto exit;
+ }
+
+ skb = alloc_skb(NLMSG_SPACE(NLKEY_MSG_LEN + NLKEY_HDR_LEN), allocation);
+ if (skb == NULL)
+ goto exit;
+
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(NLKEY_HDR_LEN + msg.length));
+ memcpy(NLMSG_DATA(nlh), (unsigned char *)&msg, (NLKEY_HDR_LEN + msg.length));
+
+ /* whole length of the message i.e. header + payload */
+ nlh->nlmsg_len = NLMSG_SPACE(NLKEY_HDR_LEN + msg.length);
+
+ /* from kernel */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_flags = 0;
+ nlh->nlmsg_type = 0;
+ NETLINK_CB(skb).portid = 0;
+ NETLINK_CB(skb).dst_group = 1;
+exit:
+ return skb;
+}
+
+static int ipsec_nlkey_send(struct net *net, struct xfrm_state *x, const struct km_event *c)
+{
+ struct sk_buff *skb;
+ unsigned short msg_type;
+ int rc = 0;
+
+ /* We may generate more than one message when adding new SA (sa_create + sa_set_state + sa_set_tunnel...) */
+ msg_type = ipsec_sacode_to_nlkeycode((unsigned short)c->event);
+
+ while(msg_type != NLKEY_NULL_MSG)
+ {
+ /* build nlkey message */
+ skb = ipsec_xfrm2nlkey(net, x, c, &msg_type);
+
+ if(skb != NULL)
+ if((rc = netlink_broadcast(nlkey_socket, skb, 0, 1, GFP_ATOMIC)) < 0)
+ return rc;
+ }
+
+ return rc;
+}
+
+
+int ipsec_nlkey_flow(u16 xfrm_nr, u16 *xfrm_handle, const struct flowi *fl, u16 family, u16 dir, u16 ignore_neigh)
+{
+ struct sk_buff *skb;
+ struct nlkey_msg msg;
+ struct nlmsghdr *nlh = NULL;
+ unsigned short *p;
+ gfp_t allocation = GFP_ATOMIC; //This may called from atomic context
+
+ //printk(KERN_INFO "ipsec_nlkey_flow \n");
+
+ /* next message to build */
+ memset(&msg, 0, sizeof(struct nlkey_msg));
+ msg.fcode = NLKEY_FLOW_ADD;
+
+ // Number of SA for this flow
+ p = msg.payload;
+ *p++ = xfrm_nr;
+ msg.length += sizeof(unsigned short);
+ // SA handles list
+ memcpy(p, xfrm_handle, xfrm_nr*sizeof(unsigned short));
+ msg.length += xfrm_nr*sizeof(unsigned short);
+ p+=xfrm_nr;
+ // flow family
+ *p++ = family;
+ msg.length += sizeof(unsigned short);
+ // flow family
+ *p++ = dir;
+ msg.length += sizeof(unsigned short);
+ // flow mode
+ *p++ = ignore_neigh;
+ msg.length += sizeof(unsigned short);
+ // flow descriptor
+ memcpy(p, fl, sizeof(struct flowi));
+ msg.length +=sizeof(struct flowi);
+ p+=sizeof(struct flowi) / sizeof(u16);
+
+ skb = alloc_skb(NLMSG_SPACE(NLKEY_MSG_LEN + NLKEY_HDR_LEN), allocation);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ /* prepare netlink message for kernel to user space direction */
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(NLKEY_HDR_LEN + msg.length));
+ memcpy(NLMSG_DATA(nlh), (unsigned char *)&msg, (NLKEY_HDR_LEN + msg.length));
+
+ /* whole length of the message i.e. header + payload */
+ nlh->nlmsg_len = NLMSG_SPACE(NLKEY_HDR_LEN + msg.length);
+
+ /* from kernel */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_flags = 0;
+ nlh->nlmsg_type = 0;
+ NETLINK_CB(skb).portid = 0;
+ NETLINK_CB(skb).dst_group = 1;
+
+ return(netlink_broadcast(nlkey_socket, skb, 0, 1, allocation));
+}
+EXPORT_SYMBOL(ipsec_nlkey_flow);
+
+
+int ipsec_nlkey_flow_remove(struct flowi *fl, u16 family, u16 dir)
+{
+ struct sk_buff *skb;
+ struct nlkey_msg msg;
+ struct nlmsghdr *nlh = NULL;
+ unsigned short *p;
+ gfp_t allocation = GFP_ATOMIC; //This may called from atomic context
+
+
+ //printk(KERN_INFO "ipsec_nlkey_flow_remove\n");
+
+ /* next message to build */
+ memset(&msg, 0, sizeof(struct nlkey_msg));
+ msg.fcode = NLKEY_FLOW_REMOVE;
+
+ p = msg.payload;
+ // flow family
+ *p++ = family;
+ msg.length += sizeof(unsigned short);
+ // flow family
+ *p++ = dir;
+ msg.length += sizeof(unsigned short);
+ // flow descriptor
+ memcpy(p, fl, sizeof(struct flowi));
+ msg.length +=sizeof(struct flowi);
+ p+=sizeof(struct flowi) / sizeof(u16);
+
+ skb = alloc_skb(NLMSG_SPACE(NLKEY_MSG_LEN + NLKEY_HDR_LEN), allocation);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ /* prepare netlink message for kernel to user space direction */
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(NLKEY_HDR_LEN + msg.length));
+ memcpy(NLMSG_DATA(nlh), (unsigned char *)&msg, (NLKEY_HDR_LEN + msg.length));
+
+ /* whole length of the message i.e. header + payload */
+ nlh->nlmsg_len = NLMSG_SPACE(NLKEY_HDR_LEN + msg.length);
+
+ /* from kernel */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_flags = 0;
+ nlh->nlmsg_type = 0;
+ NETLINK_CB(skb).portid = 0;
+ NETLINK_CB(skb).dst_group = 1;
+
+
+ return(netlink_broadcast(nlkey_socket, skb, 0, 1, allocation));
+
+
+}
+EXPORT_SYMBOL(ipsec_nlkey_flow_remove);
+
+
+
+static void ipsec_nlkey_init(void)
+{
+ struct netlink_kernel_cfg cfg = {
+ .groups = 1,
+ .input = ipsec_nlkey_rcv,
+ };
+ printk(KERN_INFO "Initializing NETLINK_KEY socket\n");
+ nlkey_socket = netlink_kernel_create(&init_net, NETLINK_KEY, &cfg);
+}
+#endif
+
+
static int __net_init pfkey_net_init(struct net *net)
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -3897,6 +4772,11 @@ static void __exit ipsec_pfkey_exit(void)
sock_unregister(PF_KEY);
unregister_pernet_subsys(&pfkey_net_ops);
proto_unregister(&key_proto);
+
+#ifdef NLKEY_SUPPORT
+ /* release NETLINK_KEY socket */
+ sock_release(nlkey_socket->sk_socket);
+#endif
}
static int __init ipsec_pfkey_init(void)
@@ -3913,6 +4793,12 @@ static int __init ipsec_pfkey_init(void)
if (err != 0)
goto out_unregister_pernet;
xfrm_register_km(&pfkeyv2_mgr);
+
+#ifdef NLKEY_SUPPORT
+ /* create NETLINK_KEY socket for IPSec offload on Comcerto */
+ ipsec_nlkey_init();
+#endif
+
out:
return err;

View File

@@ -0,0 +1,288 @@
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3a04665..7e7d13d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -28,6 +28,11 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
+#if defined(CONFIG_CPE_FAST_PATH)
+#ifndef IPSEC_FLOW_CACHE
+#include <net/xfrm.h>
+#endif
+#endif
#include <linux/slab.h>
#include <linux/siphash.h>
@@ -202,9 +207,16 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct,
struct nlattr *nest_proto;
int ret;
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_lock();
+#endif
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (!l4proto->to_nlattr)
+ if (!l4proto->to_nlattr) {
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_unlock();
+#endif
return 0;
+ }
nest_proto = nla_nest_start(skb, CTA_PROTOINFO);
if (!nest_proto)
@@ -214,9 +226,15 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct,
nla_nest_end(skb, nest_proto);
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_unlock();
+#endif
return ret;
nla_put_failure:
+#ifdef CONFIG_CPE_FAST_PATH
+ rcu_read_unlock();
+#endif
return -1;
}
@@ -353,6 +371,18 @@ nla_put_failure:
#define ctnetlink_dump_mark(a, b, c) (0)
#endif
+#if defined(CONFIG_CPE_FAST_PATH)
+static inline int
+ctnetlink_dump_qosconnmark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ nla_put_be64(skb, CTA_QOSCONNMARK, cpu_to_be64(ct->qosconnmark),
+ CTA_QOSCONNMARK_PAD);
+ return 0;
+}
+#else
+#define ctnetlink_dump_qosconnmark(a, b) (0)
+#endif
+
#ifdef CONFIG_NF_CONNTRACK_SECMARK
static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
{
@@ -430,6 +460,59 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
return 0;
}
+#if defined(CONFIG_CPE_FAST_PATH)
+static int
+ctnetlink_dump_comcerto_fp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nlattr *nest_count;
+
+ nest_count = nla_nest_start(skb, CTA_LAYERSCAPE_FP_ORIG | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ nla_put_u32(skb, CTA_COMCERTO_FP_MARK, ct->fp_info[IP_CT_DIR_ORIGINAL].mark);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IFINDEX, ct->fp_info[IP_CT_DIR_ORIGINAL].ifindex);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IIF, ct->fp_info[IP_CT_DIR_ORIGINAL].iif);
+ nla_put_u32(skb, CTA_COMCERTO_FP_UNDERLYING_IIF, ct->fp_info[IP_CT_DIR_ORIGINAL].underlying_iif);
+ nla_put_u16(skb, CTA_COMCERTO_FP_UNDERLYING_VID, ct->fp_info[IP_CT_DIR_ORIGINAL].underlying_vlan_id);
+#ifndef IPSEC_FLOW_CACHE
+ if ((ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle[0]) ||
+ (ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle[MAX_SUPPORTED_XFRMS_PER_DIR]))
+ {
+ nla_put(skb, CTA_COMCERTO_FP_XFRM_HANDLE, sizeof(ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle),
+ ct->fp_info[IP_CT_DIR_ORIGINAL].xfrm_handle);
+ }
+#endif /* IPSEC_FLOW_CACHE */
+ nla_nest_end(skb, nest_count);
+
+ nest_count = nla_nest_start(skb, CTA_LAYERSCAPE_FP_REPLY | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ nla_put_u32(skb, CTA_COMCERTO_FP_MARK, ct->fp_info[IP_CT_DIR_REPLY].mark);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IFINDEX, ct->fp_info[IP_CT_DIR_REPLY].ifindex);
+ nla_put_u32(skb, CTA_COMCERTO_FP_IIF, ct->fp_info[IP_CT_DIR_REPLY].iif);
+ nla_put_u32(skb, CTA_COMCERTO_FP_UNDERLYING_IIF, ct->fp_info[IP_CT_DIR_REPLY].underlying_iif);
+ nla_put_u16(skb, CTA_COMCERTO_FP_UNDERLYING_VID, ct->fp_info[IP_CT_DIR_REPLY].underlying_vlan_id);
+#ifndef IPSEC_FLOW_CACHE
+ if ((ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle[0]) ||
+ (ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle[MAX_SUPPORTED_XFRMS_PER_DIR]))
+ {
+ nla_put(skb, CTA_COMCERTO_FP_XFRM_HANDLE, sizeof(ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle),
+ ct->fp_info[IP_CT_DIR_REPLY].xfrm_handle);
+ }
+#endif /* IPSEC_FLOW_CACHE */
+ nla_nest_end(skb, nest_count);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+#else
+#define ctnetlink_dump_comcerto_fp(a, b) (0)
+#endif
+
#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
@@ -570,7 +653,11 @@ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
{
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_mark(skb, ct, true) < 0 ||
+ ctnetlink_dump_qosconnmark(skb, ct) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
+#ifdef CONFIG_CPE_FAST_PATH
+ ctnetlink_dump_comcerto_fp(skb, ct) < 0 ||
+#endif
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0)
@@ -722,6 +809,13 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
+#ifdef CONFIG_CPE_FAST_PATH
+ + 2 * nla_total_size(0) /* CTA_LAYERSCAPE_FP_ORIG|REPL */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_MARK */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_IFINDEX */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_IIF */
+ + 2 * nla_total_size(sizeof(uint32_t)) /* CTA_COMCERTO_FP_UNDERLYING_IIF */
+#endif
#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
@@ -729,6 +823,9 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#if defined(CONFIG_CPE_FAST_PATH)
+ + nla_total_size(sizeof(u_int64_t)) /* CTA_QOSCONNMARK */
+#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
+ nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
#endif
@@ -806,6 +903,11 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (ctnetlink_dump_comcerto_fp(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
+
if (ctnetlink_dump_id(skb, ct) < 0)
goto nla_put_failure;
@@ -858,6 +960,11 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
goto nla_put_failure;
#endif
+#if defined(CONFIG_CPE_FAST_PATH)
+ if ((events & (1 << IPCT_QOSCONNMARK) || ct->qosconnmark) &&
+ ctnetlink_dump_qosconnmark(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
if (ctnetlink_dump_event_timestamp(skb, ct))
goto nla_put_failure;
@@ -1570,6 +1677,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_NAT_SRC] = { .type = NLA_NESTED },
[CTA_TIMEOUT] = { .type = NLA_U32 },
[CTA_MARK] = { .type = NLA_U32 },
+#if defined(CONFIG_CPE_FAST_PATH)
+ [CTA_QOSCONNMARK] = { .type = NLA_U64 },
+#endif
[CTA_ID] = { .type = NLA_U32 },
[CTA_NAT_DST] = { .type = NLA_NESTED },
[CTA_TUPLE_MASTER] = { .type = NLA_NESTED },
@@ -1906,6 +2016,48 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS])));
}
+#if defined(CONFIG_CPE_FAST_PATH)
+/*
+ * This function detects ctnetlink messages that require
+ * to set the conntrack status to IPS_PERMANENT.
+ * It updates only this bit regardless of other possible
+ * changes.
+ * Return 0 if succesfull
+ */
+static int
+ctnetlink_change_permanent(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+ unsigned int status;
+ u_int32_t id;
+ __be32 conntrack_id = ntohl((__force __be32)nf_ct_get_id(ct));
+
+ if (cda[CTA_STATUS] && cda[CTA_ID]) {
+ status = ntohl(nla_get_be32(cda[CTA_STATUS]));
+ id = ntohl(nla_get_be32(cda[CTA_ID]));
+
+ if (status & IPS_PERMANENT) {
+ if (conntrack_id == id) {
+ ct->status |= IPS_PERMANENT;
+ return 0;
+ }
+ else
+ return -ENOENT;
+ }
+ else if (nf_ct_is_permanent(ct))
+ {
+ /* Clear the PERMANENT bit. */
+ if (conntrack_id == id) {
+ clear_bit(IPS_PERMANENT_BIT, &ct->status);
+ return 0;
+ }
+ else
+ return -ENOENT;
+ }
+ }
+ return -1;
+}
+#endif
+
static int
ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -2209,6 +2361,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
return err;
}
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (cda[CTA_QOSCONNMARK])
+ ct->qosconnmark = be64_to_cpu(nla_get_be64(cda[CTA_QOSCONNMARK]));
+#endif
+
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ctnetlink_change_mark(ct, cda);
@@ -2347,6 +2504,11 @@ ctnetlink_create_conntrack(struct net *net,
goto err2;
}
+#if defined(CONFIG_CPE_FAST_PATH)
+ if (cda[CTA_QOSCONNMARK])
+ ct->qosconnmark = be64_to_cpu(nla_get_be64(cda[CTA_QOSCONNMARK]));
+#endif
+
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ctnetlink_change_mark(ct, cda);
@@ -2473,6 +2635,15 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
+#if defined(CONFIG_CPE_FAST_PATH)
+ /* If the permanent status has been set, this is a specific
+ * message. Don't broadcast the event and don't update the ct */
+ err = ctnetlink_change_permanent(ct, cda);
+ if ((err == 0) || (err == -ENOENT)) {
+ nf_ct_put(ct);
+ return err;
+ }
+#endif
err = ctnetlink_change_conntrack(ct, cda);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |

View File

@@ -0,0 +1,21 @@
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 5d6a0b2b4f3a..7c3d10e2e8b1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config WIRELESS_EXT
- bool
+ def_bool y
config WEXT_CORE
def_bool y
@@ -11,7 +11,7 @@ config WEXT_PROC
depends on WEXT_CORE
config WEXT_PRIV
- bool
+ def_bool y
config CFG80211
tristate "cfg80211 - wireless configuration API"

View File

@@ -0,0 +1,9 @@
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 0a0a0a0..0b0b0b0 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
+obj-$(CONFIG_INET_IPSEC_OFFLOAD) += ipsec_flow.o

View File

@@ -0,0 +1,163 @@
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 62486f8..3cbe4f8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -48,6 +48,11 @@
#include <net/inet_dscp.h>
#include "xfrm_hash.h"
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+#include "ipsec_flow.h"
+#endif
+#endif
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
@@ -179,6 +184,15 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+extern int ipsec_nlkey_flow(u16 xfrm_nr, u16 *xfrm_handle,
+ const struct flowi *fl, u16 family, u16 dir, u16 ignore_neigh);
+int ipsec_flow_init(struct net *net);
+void ipsec_flow_fini(struct net *net);
+#endif
+#endif
+
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -2599,6 +2613,17 @@ static dscp_t xfrm_get_dscp(const struct flowi *fl, int family)
return 0;
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+int xfrm_get_tos(const struct flowi *fl, int family)
+{
+ if (family == AF_INET)
+ return inet_dscp_to_dsfield(fl->u.ip4.flowi4_dscp) & INET_DSCP_MASK;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_get_tos);
+#endif
+
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
{
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -3295,6 +3320,37 @@ no_transform:
dst = dst_orig;
}
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ {
+ struct dst_entry *dst1 = dst;
+ struct xfrm_state *x;
+ u16 xfrm_handle[XFRM_POLICY_TYPE_MAX];
+ u16 ignore_neigh = 0;
+
+ num_xfrms = 0;
+ memset(xfrm_handle, 0, XFRM_POLICY_TYPE_MAX * sizeof(u16));
+ while (((x = dst1->xfrm) != NULL) &&
+ (num_xfrms < XFRM_POLICY_TYPE_MAX)) {
+ xfrm_handle[num_xfrms++] = x->handle;
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ ignore_neigh = 1;
+ dst1 = xfrm_dst_child(dst1);
+
+ if (dst1 == NULL) {
+ err = -EHOSTUNREACH;
+ goto error;
+ }
+ }
+ if (ipsec_flow_add(net, fl, family, dir, xfrm_handle)) {
+ /* sent flow notification to cmm with sa_handle */
+ ipsec_nlkey_flow(num_xfrms, xfrm_handle, fl, family,
+ (unsigned short)dir, ignore_neigh);
+ }
+ }
+#endif
+#endif
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst->xfrm &&
@@ -3853,6 +3909,34 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ {
+ struct xfrm_state *x;
+ u16 xfrm_handle[XFRM_POLICY_TYPE_MAX];
+
+ xfrm_nr = 0;
+ memset(xfrm_handle, 0, XFRM_POLICY_TYPE_MAX * sizeof(u16));
+ for (i = sp->len - 1;
+ (i >= 0) && (xfrm_nr < XFRM_POLICY_TYPE_MAX); i--) {
+ x = sp->xvec[i];
+ xfrm_handle[xfrm_nr++] = x->handle;
+ }
+ if (ipsec_flow_add(net, (const struct flowi *)&fl, family, dir,
+ xfrm_handle)) {
+ /* sent flow notification to cmm with sa_handle */
+ ipsec_nlkey_flow(xfrm_nr, xfrm_handle,
+ (const struct flowi *)&fl, family, dir, 0);
+ }
+ }
+
+ /* Hub and spoke changes: Setting the POLICY_IN direction in the packet */
+ skb->ipsec_xfrm_dir |= (1 << XFRM_POLICY_IN);
+
+std_path:
+#endif
+#endif
+
xfrm_pols_put(pols, npols);
sp->verified_cnt = k;
@@ -4328,6 +4412,14 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out_sysctl;
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ rv = ipsec_flow_init(net);
+ if (rv < 0)
+ goto out_ipsec_flow;
+#endif
+#endif
+
rv = xfrm_nat_keepalive_net_init(net);
if (rv < 0)
goto out_nat_keepalive;
@@ -4335,6 +4427,12 @@ static int __net_init xfrm_net_init(struct net *net)
return 0;
out_nat_keepalive:
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ ipsec_flow_fini(net);
+out_ipsec_flow:
+#endif
+#endif
xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
@@ -4349,6 +4447,11 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
xfrm_nat_keepalive_net_fini(net);
+#ifdef IPSEC_FLOW_CACHE
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ ipsec_flow_fini(net);
+#endif
+#endif
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);

View File

@@ -0,0 +1,291 @@
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9e14e45..d685ed7 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -58,6 +58,10 @@ static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
return refcount_inc_not_zero(&x->refcnt);
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+static unsigned short xfrm_state_handle;
+#endif
+
static inline unsigned int xfrm_dst_hash(struct net *net,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -119,6 +123,9 @@ static void xfrm_hash_transfer(struct hlist_head *list,
struct hlist_head *nsrctable,
struct hlist_head *nspitable,
struct hlist_head *nseqtable,
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ struct hlist_head *nhtable,
+#endif
unsigned int nhashmask)
{
struct hlist_node *tmp;
@@ -150,6 +157,13 @@ static void xfrm_hash_transfer(struct hlist_head *list,
XFRM_STATE_INSERT(byseq, &x->byseq, nseqtable + h,
x->xso.type);
}
+
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && x->in_byh_hash) {
+ h = x->handle & nhashmask;
+ hlist_add_head_rcu(&x->byh, nhtable + h);
+ }
+#endif
}
}
@@ -162,6 +176,9 @@ static void xfrm_hash_resize(struct work_struct *work)
{
struct net *net = container_of(work, struct net, xfrm.state_hash_work);
struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ struct hlist_head *nh, *oh;
+#endif
unsigned long nsize, osize;
unsigned int nhashmask, ohashmask;
int i;
@@ -188,6 +205,16 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(nspi, nsize);
return;
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ nh = xfrm_hash_alloc(nsize);
+ if (!nh) {
+ xfrm_hash_free(ndst, nsize);
+ xfrm_hash_free(nsrc, nsize);
+ xfrm_hash_free(nspi, nsize);
+ xfrm_hash_free(nseq, nsize);
+ return;
+ }
+#endif
spin_lock_bh(&net->xfrm.xfrm_state_lock);
write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
@@ -195,17 +222,27 @@ static void xfrm_hash_resize(struct work_struct *work)
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--)
- xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
+ xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq,
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ nh,
+#endif
+ nhashmask);
osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ oh = xfrm_state_deref_prot(net->xfrm.state_byh, net);
+#endif
ohashmask = net->xfrm.state_hmask;
rcu_assign_pointer(net->xfrm.state_bydst, ndst);
rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
rcu_assign_pointer(net->xfrm.state_byseq, nseq);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ rcu_assign_pointer(net->xfrm.state_byh, nh);
+#endif
net->xfrm.state_hmask = nhashmask;
write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
@@ -219,6 +256,9 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
xfrm_hash_free(oseq, osize);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xfrm_hash_free(oh, osize);
+#endif
}
static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
@@ -744,6 +784,9 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
INIT_HLIST_NODE(&x->byseq);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ INIT_HLIST_NODE(&x->byh);
+#endif
hrtimer_setup(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME,
HRTIMER_MODE_ABS_SOFT);
timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
@@ -754,6 +797,12 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ do {
+ x->handle = xfrm_state_handle++;
+ } while (x->handle == 0);
+ x->in_byh_hash = 0;
+#endif
x->pcpu_num = UINT_MAX;
spin_lock_init(&x->lock);
x->mode_data = NULL;
@@ -829,6 +878,12 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->id.spi)
hlist_del_rcu(&x->byspi);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && x->in_byh_hash) {
+ hlist_del_rcu(&x->byh);
+ x->in_byh_hash = 0;
+ }
+#endif
net->xfrm.state_num--;
xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
@@ -1582,6 +1637,13 @@ found:
net->xfrm.state_byseq + h,
x->xso.type);
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && !x->in_byh_hash) {
+ h = x->handle & net->xfrm.state_hmask;
+ hlist_add_head_rcu(&x->byh, net->xfrm.state_byh + h);
+ x->in_byh_hash = 1;
+ }
+#endif
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
hrtimer_start(&x->mtimer,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
@@ -1752,6 +1814,14 @@ static void __xfrm_state_insert(struct xfrm_state *x)
x->xso.type);
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && !x->in_byh_hash) {
+ h = x->handle & net->xfrm.state_hmask;
+ hlist_add_head_rcu(&x->byh, net->xfrm.state_byh + h);
+ x->in_byh_hash = 1;
+ }
+#endif
+
hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1773,6 +1843,9 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
u32 mark = xnew->mark.v & xnew->mark.m;
u32 if_id = xnew->if_id;
u32 cpu_id = xnew->pcpu_num;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ u16 parent_sa_handle = 0;
+#endif
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
@@ -1782,9 +1855,17 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
x->pcpu_num == cpu_id &&
(mark & x->mark.m) == x->mark.v &&
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
- xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
+ xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) {
x->genid++;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (!parent_sa_handle)
+ parent_sa_handle = x->handle;
+#endif
+ }
}
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xnew->parent_sa_handle = parent_sa_handle;
+#endif
}
void xfrm_state_insert(struct xfrm_state *x)
@@ -2352,6 +2433,37 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
}
EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+struct xfrm_state *__xfrm_state_lookup_byhandle(struct net *net, u16 handle)
+{
+ unsigned int h = handle & net->xfrm.state_hmask;
+ struct xfrm_state *x;
+
+ hlist_for_each_entry(x, net->xfrm.state_byh + h, byh) {
+ if (x->handle != handle)
+ continue;
+
+ xfrm_state_hold(x);
+ return x;
+ }
+
+ return NULL;
+}
+
+struct xfrm_state *
+xfrm_state_lookup_byhandle(struct net *net, u16 handle)
+{
+ struct xfrm_state *x;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x = __xfrm_state_lookup_byhandle(net, handle);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup_byhandle);
+#endif
+
struct xfrm_state *
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
@@ -2603,6 +2715,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family);
XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ if (x->handle && !x->in_byh_hash) {
+ h = x->handle & net->xfrm.state_hmask;
+ hlist_add_head_rcu(&x->byh, net->xfrm.state_byh + h);
+ x->in_byh_hash = 1;
+ }
+#endif
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
goto unlock;
@@ -3279,6 +3398,12 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ net->xfrm.state_byh = xfrm_hash_alloc(sz);
+ if (!net->xfrm.state_byh)
+ goto out_byh;
+ get_random_bytes(&xfrm_state_handle, sizeof(xfrm_state_handle));
+#endif
net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
if (!net->xfrm.state_cache_input)
@@ -3294,6 +3419,10 @@ int __net_init xfrm_state_init(struct net *net)
return 0;
out_state_cache_input:
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xfrm_hash_free(net->xfrm.state_byh, sz);
+out_byh:
+#endif
xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
@@ -3321,9 +3450,15 @@ void xfrm_state_fini(struct net *net)
WARN_ON(!hlist_empty(net->xfrm.state_byspi + i));
WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i));
WARN_ON(!hlist_empty(net->xfrm.state_bydst + i));
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ WARN_ON(!hlist_empty(net->xfrm.state_byh + i));
+#endif
}
sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
+#if defined(CONFIG_INET_IPSEC_OFFLOAD) || defined(CONFIG_INET6_IPSEC_OFFLOAD)
+ xfrm_hash_free(net->xfrm.state_byh, sz);
+#endif
xfrm_hash_free(net->xfrm.state_byseq, sz);
xfrm_hash_free(net->xfrm.state_byspi, sz);
xfrm_hash_free(net->xfrm.state_bysrc, sz);

View File

@@ -0,0 +1,93 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Mono <dev@mono>
Date: Mon, 11 May 2026 00:00:00 +0900
Subject: [PATCH] libnetfilter_conntrack: do not abort on unusable NXP attrs
The NXP ASK extension patch teaches libnetfilter_conntrack about
Comcerto/Layerscape fast-path and QoS conntrack attributes, but it also
uses abi_breakage() when those attributes are present with a shape this
userspace does not expect.
That is too fragile for CMM. CMM dumps the global conntrack table, which
can contain ordinary Kubernetes/Cilium conntrack entries alongside entries
that are relevant to the NXP fast path. A single unexpected or
unrepresentable vendor attribute must not abort the entire dump before CMM
has a chance to ignore the entry.
Keep unsupported attribute IDs ignored as before. For NXP fast-path/QoS
attributes that fail validation or nested parsing, skip only that attribute
or fast-path block and continue parsing the rest of the conntrack object.
Signed-off-by: Mono <dev@mono>
---
src/conntrack/parse_mnl.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/src/conntrack/parse_mnl.c b/src/conntrack/parse_mnl.c
index 33f7824..0000000 100644
--- a/src/conntrack/parse_mnl.c
+++ b/src/conntrack/parse_mnl.c
@@ -873,16 +873,16 @@ nfct_parse_comcerto_fp_attr_cb(const struct nlattr *attr, void *data)
case CTA_COMCERTO_FP_IIF:
case CTA_COMCERTO_FP_UNDERLYING_IIF:
if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
case CTA_COMCERTO_FP_UNDERLYING_VID:
if (mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
case CTA_COMCERTO_FP_XFRM_HANDLE:
/* 4 x u32 = 16 bytes */
if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, 16) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
}
tb[type] = attr;
@@ -1024,11 +1024,11 @@ nfct_parse_conntrack_attr_cb(const struct nlattr *attr, void *data)
case CTA_LAYERSCAPE_FP_ORIG:
case CTA_LAYERSCAPE_FP_REPLY:
if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
case CTA_QOSCONNMARK:
if (mnl_attr_validate(attr, MNL_TYPE_U64) < 0)
- abi_breakage();
+ return MNL_CB_OK;
break;
}
tb[type] = attr;
@@ -1164,18 +1164,21 @@ nfct_payload_parse(const void *payload, size_t payload_len,
/* NXP ASK: Comcerto fast path and QoS */
if (tb[CTA_LAYERSCAPE_FP_ORIG]) {
- if (nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_ORIG], ct,
- __DIR_ORIG) < 0)
- return -1;
+ /*
+ * Do not abort the entire conntrack dump if one fast-path
+ * extension block cannot be represented by this userspace.
+ */
+ nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_ORIG], ct,
+ __DIR_ORIG);
}
if (tb[CTA_LAYERSCAPE_FP_REPLY]) {
- if (nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_REPLY], ct,
- __DIR_REPL) < 0)
- return -1;
+ /* See CTA_LAYERSCAPE_FP_ORIG handling above. */
+ nfct_parse_comcerto_fp(tb[CTA_LAYERSCAPE_FP_REPLY], ct,
+ __DIR_REPL);
}
if (tb[CTA_QOSCONNMARK]) {
ct->qosconnmark = be64toh(mnl_attr_get_u64(tb[CTA_QOSCONNMARK]));
set_bit(ATTR_QOSCONNMARK, ct->head.set);
}
--
2.47.3

40
scripts/fetch-artifact Executable file
View File

@@ -0,0 +1,40 @@
#!/bin/sh
set -eu
# Fetch helper contract:
# DEP_PKG_MIRROR=https://mirror.example.com/monok8s
# mirror URL = ${DEP_PKG_MIRROR}/${mirror_path}
if [ "$#" -ne 3 ]; then
echo "usage: fetch-artifact <mirror-path> <output-file> <upstream-url>" >&2
exit 2
fi
mirror_path="$1"
out="$2"
upstream_url="$3"
mkdir -p "$(dirname "$out")"
rm -f "$out"
if [ -n "${DEP_PKG_MIRROR:-}" ]; then
mirror_url="${DEP_PKG_MIRROR%/}/${mirror_path}"
echo "fetch-artifact: trying mirror: ${mirror_url}" >&2
if curl -fL --retry 3 -o "$out" "$mirror_url"; then
exit 0
fi
rm -f "$out"
if [ "${DEP_PKG_OFFLINE:-0}" = "1" ]; then
echo "fetch-artifact: mirror miss and DEP_PKG_OFFLINE=1: ${mirror_url}" >&2
exit 1
fi
fi
if [ "${DEP_PKG_OFFLINE:-0}" = "1" ]; then
echo "fetch-artifact: DEP_PKG_OFFLINE=1 and no usable mirror for ${mirror_path}" >&2
exit 1
fi
echo "fetch-artifact: fetching upstream: ${upstream_url}" >&2
curl -fL --retry 3 -o "$out" "$upstream_url"