diff --git a/.github/workflows/build-cn10k.yml b/.github/workflows/build-cn10k.yml new file mode 100644 index 0000000000..70576be7c0 --- /dev/null +++ b/.github/workflows/build-cn10k.yml @@ -0,0 +1,189 @@ +name: build-cn10k + +on: + push: + schedule: + - cron: "0 0 * * *" + pull_request: + +permissions: + contents: write + pages: write + id-token: write + packages: write + +jobs: + ubuntu-cn10k-build: + name: ubuntu-cn10k-arm64 + runs-on: ubuntu-24.04-arm + + steps: + - name: Checkout sources + uses: actions/checkout@v4.2.2 + with: + fetch-depth: 0 + fetch-tags: true + - name: Generate cache keys + id: get_ref_keys + run: | + echo 'ccache=ccache-'$(date -u +%Y-m%M) >> $GITHUB_OUTPUT + - name: Retrieve ccache cache + uses: actions/cache@v4 + with: + path: ~/.ccache + key: ${{ steps.get_ref_keys.outputs.ccache }}-${{ github.ref }} + restore-keys: | + ${{ steps.get_ref_keys.outputs.ccache }}-refs/heads/main + - name: Extract version details + id: version + run: | + mkdir -p "${PWD}/artifacts" + git tag --points-at HEAD > /tmp/tags + # Treat HEAD as "release" only if a release tag (YY.MM.0) points at it. + if git tag --points-at HEAD --list '[0-9][0-9].[0-9][0-9].0' | grep -q .; then + PKG_POSTFIX= + NIGHTLY=false + else + PKG_POSTFIX=-devel + NIGHTLY=true + fi + echo "PKG_VERSION_NAME=`./src/scripts/version | awk -F '-' '{print $1}'`" >> "${PWD}/artifacts/env" + echo "MRVL_PKG_VERSION=`cat MRVL_VERSION`" >> "${PWD}/artifacts/env" + echo "CPT_PKG_VERSION=`cat DEP_PKG_VERSION | grep CPT_PKG_VERSION | awk -F'=' '{print $2}'`" >> "${PWD}/artifacts/env" + echo "DPDK_PKG_VERSION=`cat DPDK_VERSION | grep RELEASE_VERSION | awk -F'=' '{print $2}'`" >> "${PWD}/artifacts/env" + echo "DPDK_BASE_PKG_VERSION=`cat DPDK_VERSION | grep BASE_VERSION | awk -F'=' '{print $2}' | awk -F'.' '{print $1"."$2}'`" >> "${PWD}/artifacts/env" + echo "PKG_POSTFIX=${PKG_POSTFIX}" >> "${PWD}/artifacts/env" + source "${PWD}/artifacts/env" + echo "NIGHTLY=${NIGHTLY}" >> $GITHUB_OUTPUT + echo "DPDK_PKG_VERSION=${DPDK_PKG_VERSION}" >> $GITHUB_OUTPUT + echo "DPDK_BASE_PKG_VERSION=${DPDK_BASE_PKG_VERSION}" >> $GITHUB_OUTPUT + - name: Build VPP and generate package + id: build + run: | + set -x + mkdir -p ~/.ccache + BASE_DIR=${PWD} + sudo apt-get update -q -y + sudo apt-get install -y apt-utils gcc meson sudo make dialog ccache git build-essential software-properties-common + sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + sudo apt-get update -q -y + sudo apt-get install -y curl autoconf automake debhelper dkms + sudo apt-get install -y libtool libapr1-dev dh-python libconfuse-dev git-review exuberant-ctags + sudo apt-get install -y cscope pkg-config gcovr lcov chrpath libnuma-dev python3-all + sudo apt-get install -y python3-setuptools check libffi-dev python3-ply libunwind-dev cmake + sudo apt-get install -y ninja-build python3-jsonschema python3-yaml python3-venv python3-dev + sudo apt-get install -y python3-pip libnl-3-dev libnl-route-3-dev libmnl-dev python3-virtualenv + sudo apt-get install -y libssl-dev libelf-dev libpcap-dev iperf3 nasm iperf ethtool tshark jq + sudo apt-get install -y llvm clang clang-format-15 enchant-2 libffi8 + sudo apt-get install -y aspell aspell-en autopoint autotools-dev binfmt-support binutils binutils-aarch64-linux-gnu + sudo apt-get install -y binutils-common bsdextrautils bzip2 ca-certificates clang-14 cmake-data cpp cpp-11 cpp-12 dbus + sudo apt-get install -y dctrl-tools debugedit dh-autoreconf dh-elpa-helper dh-strip-nondeterminism dictionaries-common dirmngr + sudo apt-get install -y distro-info-data dpkg-dev dwz emacsen-common fakeroot file fontconfig-config fonts-dejavu-core g++ g++-11 + sudo apt-get install -y gcc gcc-11 gcc-11-base gcc-12 gcc-13 gettext gettext-base git-man gnupg gnupg-l10n gnupg-utils gpg gpg-agent + sudo apt-get install -y gpg-wks-client gpg-wks-server gpgconf gpgsm groff-base hunspell-en-us icu-devtools intltool-debian + sudo apt-get install -y javascript-common kmod less libalgorithm-diff-perl libalgorithm-diff-xs-perl libalgorithm-merge-perl + sudo apt-get install -y libapparmor1 libapr1 libarchive-cpio-perl libarchive-zip-perl libarchive13 libasan6 libasan8 libaspell15 + sudo apt-get install -y libassuan0 libatomic1 libbcg729-0 libbinutils libbrotli1 libbsd0 libc-ares2 libc-dev-bin libc-devtools + sudo apt-get install -y libc6-dev libcap2-bin libcbor0.10 libcc1-0 libclang-common-14-dev libclang-cpp14 + sudo apt-get install -y libclang1-14 libcommon-sense-perl libconfuse-common libconfuse-doc libconfuse2 libcrypt-dev libctf-nobfd0 + sudo apt-get install -y libctf0 libcurl3-gnutls libcurl4 libdbus-1-3 libdbus-1-dev libdebhelper-perl libdeflate0 libdpkg-perl + sudo apt-get install -y libdw1 libedit2 libelf1 libenchant-2-2 liberror-perl libexpat1 libexpat1-dev libfakeroot libfido2-1 + sudo apt-get install -y libfile-fcntllock-perl libfile-stripnondeterminism-perl libfontconfig1 libfreetype6 libgc1 libgcc-11-dev + sudo apt-get install -y libgcc-12-dev libgd-perl libgd3 libgdbm-compat4 libgdbm6 libglib2.0-0 libglib2.0-data libgomp1 + sudo apt-get install -y libhiredis1.1.0 libhunspell-1.7-0 libhwasan0 libicu-dev libicu74 libiperf0 libisl23 libitm1 libjbig0 + sudo apt-get install -y libjpeg-turbo8 libjpeg8 libjs-jquery libjs-sphinxdoc libjs-underscore libjson-perl libjson-xs-perl + sudo apt-get install -y libjsoncpp25 libkmod2 libksba8 libldap2 libldap2-dev libldap-common libllvm14t64 liblocale-gettext-perl + sudo apt-get install -y liblsan0 libltdl-dev libltdl7 liblua5.2-0 libmagic-mgc libmagic1 libmail-sendmail-perl libmaxminddb0 + sudo apt-get install -y libmd0 libmnl0 libmpc3 libmpfr6 libncurses-dev libnghttp2-14 libnl-3-200 libnl-genl-3-200 + sudo apt-get install -y libnl-route-3-200 libnpth0 libnsl-dev libnuma1 libobjc-11-dev libobjc4 libpam-cap libpcap0.8 + sudo apt-get install -y libpcap0.8-dev libperl5.38t64 libperlio-gzip-perl libpfm4 libpipeline1 libpng16-16 libpsl5 libpython3-dev + sudo apt-get install -y libpython3-stdlib libpython3.12t64 libpython3.12-dev libpython3.12-minimal libreadline8 + sudo apt-get install -y librhash0 librtmp1 libsasl2-2 libsasl2-modules libsasl2-modules-db libsbc1 libsctp-dev libsctp1 + sudo apt-get install -y libsigsegv2 libsmi2ldbl libsnappy1v5 libspandsp2 libspeexdsp1 libsqlite3-0 libssh-4 libssh-gcrypt-4 + sudo apt-get install -y libstdc++-11-dev libsub-override-perl libsubunit-dev libsubunit0 libsys-hostname-long-perl + sudo apt-get install -y libtext-iconv-perl libtiff6 libtinfo-dev libtirpc-dev libtsan0 libtsan2 libtypes-serialiser-perl + sudo apt-get install -y libubsan1 libuchardet0 libuv1 libwebp7 libwireshark-data libwireshark17t64 libwiretap14t64 libwsutil15t64 libx11-6 + sudo apt-get install -y libx11-data libxau6 libxcb1 libxdmcp6 libxext6 libxml2 libxml2-dev libxmuu1 libxpm4 libxslt1.1 + sudo apt-get install -y libyaml-0-2 libz3-4 libz3-dev linux-headers-6.8.0-51 linux-headers-6.8.0-51-generic + sudo apt-get install -y linux-headers-generic linux-libc-dev llvm-14 llvm-14-dev llvm-14-linker-tools llvm-14-runtime + sudo apt-get install -y llvm-14-tools lsb-release lto-disabled-list m4 man-db manpages manpages-dev media-types netbase + sudo apt-get install -y openssh-client openssl patch perl perl-modules-5.38 pinentry-curses po-debconf publicsuffix + sudo apt-get install -y python-babel-localedata python3 python3-attr python3-babel python3-bs4 python3-certifi python3-chardet + sudo apt-get install -y python3-distlib python3-filelock python3-html5lib python3-idna + sudo apt-get install -y python3-importlib-metadata python3-jinja2 python3-lib2to3 python3-lxml python3-markupsafe python3-minimal + sudo apt-get install -y python3-more-itertools python3-pip-whl python3-pkg-resources python3-platformdirs python3-pygments + sudo apt-get install -y python3-pyrsistent python3-requests python3-setuptools-whl python3-six python3-soupsieve python3-tz + sudo apt-get install -y python3-urllib3 python3-webencodings python3-wheel python3-wheel-whl python3-zipp python3.10 + sudo apt-get install -y python3-minimal readline-common rpcsvc-proto shared-mime-info tzdata + sudo apt-get install -y ucf uuid-dev wireshark-common xauth xdg-user-dirs xz-utils zlib1g-dev libgmpxx4ldbl + sudo apt-get install -y liblz4-dev liblzma-dev wget libzstd-dev nettle-dev lsb-release doxygen libarchive-dev + sudo apt-get install -y libnl-xfrm-3-dev sphinx-common python3-sphinx-rtd-theme libfdt-dev libjansson-dev libbsd-dev + sudo apt-get install -y python3-pyelftools gcc-14 bzip2-doc libacl1-dev libattr1-dev libbz2-dev libgmp-dev libbpf-dev libconfig-dev + source ${BASE_DIR}/artifacts/env + DISTRO=ubuntu-`lsb_release -rs` + echo "DISTRO=${DISTRO}" >> ${BASE_DIR}/artifacts/env + echo "cache_dir = ~/.ccache" > ~/.ccache/ccache.conf + ccache -p + git config --global --add safe.directory "${PWD}" + sudo APT_ARGS='-y -q' make install-deps + [[ "$PKG_POSTFIX" == "-devel" ]] && TAG=devel || TAG=${MRVL_PKG_VERSION} + wget "https://github.com/MarvellEmbeddedProcessors/dao/releases/download/oct-ep-target-cn10k-${MRVL_PKG_VERSION}-${DISTRO}-${TAG}/oct-ep-target-cn10k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64.deb" + sudo apt-get install -y ./"oct-ep-target-cn10k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64.deb" + make build-release VPP_PLATFORM=octeon10 + mkdir -p "${PWD}/install/DEBIAN" + mkdir -p "${PWD}/install/usr/share/vpp/api" + cp -r build-root/install-vpp-native/vpp/share/vpp/api/* "${PWD}/install/usr/share/vpp/api" + mv build-root/install-vpp-native/vpp/* install/. + cd "${PWD}/install" + echo 'Package: vpp-'$PKG_VERSION_NAME'-cn10k'$PKG_POSTFIX >> DEBIAN/control + echo 'Version: '$MRVL_PKG_VERSION >> DEBIAN/control + echo "Maintainer: Jerin Jacob (jerinj@marvell.com)" >> DEBIAN/control + echo 'Depends: python3, python3-ply, dpdk-'$DPDK_BASE_PKG_VERSION'-cn10k (= '$DPDK_PKG_VERSION'), cpt-firmware-cn10k (= '$CPT_PKG_VERSION'), oct-ep-target-cn10k'$PKG_POSTFIX' (>= '$MRVL_PKG_VERSION')' >> DEBIAN/control + echo "Architecture: arm64" >> DEBIAN/control + echo "Homepage: https://wiki.fd.io/view/VPP" >> DEBIAN/control + echo "Description: Vector Packet Processing (VPP) for Octeon10" >> DEBIAN/control + cd - + mv "${PWD}/install" "${PWD}/vpp-${PKG_VERSION_NAME}-cn10k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64" + dpkg --build "vpp-${PKG_VERSION_NAME}-cn10k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64" + cp "vpp-${PKG_VERSION_NAME}-cn10k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64.deb" ${BASE_DIR}/artifacts/. + - name: Export version name + id: artifacts + run: | + source "${PWD}/artifacts/env" + echo $PKG_VERSION_NAME + echo "PKG_VERSION_NAME=${PKG_VERSION_NAME}" >> "$GITHUB_OUTPUT" + echo $MRVL_PKG_VERSION + echo "MRVL_PKG_VERSION=${MRVL_PKG_VERSION}" >> "$GITHUB_OUTPUT" + echo $DISTRO + echo "DISTRO=${DISTRO}" >> "$GITHUB_OUTPUT" + echo $PKG_POSTFIX + echo "PKG_POSTFIX=${PKG_POSTFIX}" >> "$GITHUB_OUTPUT" + [[ "$PKG_POSTFIX" == "-devel" ]] && TAG=devel || TAG=${MRVL_PKG_VERSION} + [[ "$PKG_POSTFIX" == "-devel" ]] && IS_DEVEL="true" || IS_DEVEL="false" + echo "TAG=${TAG}" >> "$GITHUB_OUTPUT" + echo "IS_DEVEL=${IS_DEVEL}" >> "$GITHUB_OUTPUT" + - name: Upload debian package as artifact + uses: actions/upload-artifact@v4.3.1 + if: ${{ github.event_name == 'push' }} + with: + name: vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn10k${{ steps.artifacts.outputs.PKG_POSTFIX }}_${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}_arm64.deb + path: ${{ github.workspace }}/artifacts/vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn10k${{ steps.artifacts.outputs.PKG_POSTFIX }}_${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}_arm64.deb + - name: Delete existing release + if: ${{ github.event_name == 'push' }} + env: + GH_TOKEN: ${{ github.token }} + run: | + if gh release view vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn10k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }}; then + gh release delete vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn10k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }} --cleanup-tag -y + else + echo "Release not found" + fi + continue-on-error: true + - name: Release VPP cn10k package + uses: softprops/action-gh-release@v2.2.2 + if: ${{ github.event_name == 'push' }} + with: + draft: false + tag_name: vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn10k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }} + files: | + ${{ github.workspace }}/artifacts/vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn10k${{ steps.artifacts.outputs.PKG_POSTFIX }}_${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}_arm64.deb \ No newline at end of file diff --git a/.github/workflows/build-cn9k.yml b/.github/workflows/build-cn9k.yml new file mode 100644 index 0000000000..265d624c5a --- /dev/null +++ b/.github/workflows/build-cn9k.yml @@ -0,0 +1,202 @@ +name: build-cn9k + +on: + push: + schedule: + - cron: "0 0 * * *" + pull_request: + +permissions: + contents: write + pages: write + id-token: write + packages: write + +jobs: + ubuntu-cn9k-build: + name: ubuntu-cn9k-arm64 + runs-on: ubuntu-24.04-arm + steps: + - name: Checkout sources + uses: actions/checkout@v4.2.2 + with: + fetch-depth: 0 + fetch-tags: true + - name: Generate cache keys + id: get_ref_keys + run: | + echo 'ccache=ccache-'$(date -u +%Y-m%M) >> $GITHUB_OUTPUT + - name: Retrieve ccache cache + uses: actions/cache@v4 + with: + path: ~/.ccache + key: ${{ steps.get_ref_keys.outputs.ccache }}-${{ github.ref }} + restore-keys: | + ${{ steps.get_ref_keys.outputs.ccache }}-refs/heads/main + - name: Extract version details + id: version + run: | + mkdir -p "${PWD}/artifacts" + git tag --points-at HEAD > /tmp/tags + [ -s /tmp/tags ] && PKG_POSTFIX= || PKG_POSTFIX=-devel + FW_PKG_POSTFIX="" + if [ $PKG_POSTFIX = -devel ]; then + FW_PKG_POSTFIX="" + else + FW_PKG_POSTFIX=$PKG_POSTFIX + fi + echo "FW_PKG_POSTFIX=${FW_PKG_POSTFIX}" >> "${PWD}/artifacts/env" + [ -s /tmp/tags ] && NIGHTLY=false || NIGHTLY=true + echo "PKG_VERSION_NAME=`./src/scripts/version | awk -F '-' '{print $1}'`" >> "${PWD}/artifacts/env" + echo "MRVL_PKG_VERSION=`cat MRVL_VERSION`" >> "${PWD}/artifacts/env" + echo "CPT_PKG_VERSION=`cat DEP_PKG_VERSION | grep CPT_PKG_VERSION | awk -F'=' '{print $2}'`" >> "${PWD}/artifacts/env" + echo "DPDK_PKG_VERSION=`cat DPDK_VERSION | grep RELEASE_VERSION | awk -F'=' '{print $2}'`" >> "${PWD}/artifacts/env" + echo "DPDK_BASE_PKG_VERSION=`cat DPDK_VERSION | grep BASE_VERSION | awk -F'=' '{print $2}' | awk -F'.' '{print $1"."$2}'`" >> "${PWD}/artifacts/env" + echo "PKG_POSTFIX=${PKG_POSTFIX}" >> "${PWD}/artifacts/env" + source "${PWD}/artifacts/env" + echo "NIGHTLY=${NIGHTLY}" >> $GITHUB_OUTPUT + echo "DPDK_PKG_VERSION=${DPDK_PKG_VERSION}" >> $GITHUB_OUTPUT + echo "DPDK_BASE_PKG_VERSION=${DPDK_BASE_PKG_VERSION}" >> $GITHUB_OUTPUT + - name: Build VPP and generate package + id: build + run: | + set -x + mkdir -p ~/.ccache + BASE_DIR=${PWD} + sudo apt-get update -q -y + sudo apt-get install -y apt-utils gcc meson sudo make dialog ccache git build-essential software-properties-common + sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + sudo apt-get update -q -y + sudo apt-get install -y curl autoconf automake debhelper dkms + sudo apt-get install -y libtool libapr1-dev dh-python libconfuse-dev git-review exuberant-ctags + sudo apt-get install -y cscope pkg-config gcovr lcov chrpath libnuma-dev python3-all + sudo apt-get install -y python3-setuptools check libffi-dev python3-ply libunwind-dev cmake + sudo apt-get install -y ninja-build python3-jsonschema python3-yaml python3-venv python3-dev + sudo apt-get install -y python3-pip libnl-3-dev libnl-route-3-dev libmnl-dev python3-virtualenv + sudo apt-get install -y libssl-dev libelf-dev libpcap-dev iperf3 nasm iperf ethtool tshark jq + sudo apt-get install -y llvm clang clang-format-15 enchant-2 libffi8 + sudo apt-get install -y aspell aspell-en autopoint autotools-dev binfmt-support binutils binutils-aarch64-linux-gnu + sudo apt-get install -y binutils-common bsdextrautils bzip2 ca-certificates clang-14 cmake-data cpp cpp-11 cpp-12 dbus + sudo apt-get install -y dctrl-tools debugedit dh-autoreconf dh-elpa-helper dh-strip-nondeterminism dictionaries-common dirmngr + sudo apt-get install -y distro-info-data dpkg-dev dwz emacsen-common fakeroot file fontconfig-config fonts-dejavu-core g++ g++-11 + sudo apt-get install -y gcc gcc-11 gcc-11-base gcc-12 gcc-13 gettext gettext-base git-man gnupg gnupg-l10n gnupg-utils gpg gpg-agent + sudo apt-get install -y gpg-wks-client gpg-wks-server gpgconf gpgsm groff-base hunspell-en-us icu-devtools intltool-debian + sudo apt-get install -y javascript-common kmod less libalgorithm-diff-perl libalgorithm-diff-xs-perl libalgorithm-merge-perl + sudo apt-get install -y libapparmor1 libapr1 libarchive-cpio-perl libarchive-zip-perl libarchive13 libasan6 libasan8 libaspell15 + sudo apt-get install -y libassuan0 libatomic1 libbcg729-0 libbinutils libbrotli1 libbsd0 libc-ares2 libc-dev-bin libc-devtools + sudo apt-get install -y libc6-dev libcap2-bin libcbor0.10 libcc1-0 libclang-common-14-dev libclang-cpp14 + sudo apt-get install -y libclang1-14 libcommon-sense-perl libconfuse-common libconfuse-doc libconfuse2 libcrypt-dev libctf-nobfd0 + sudo apt-get install -y libctf0 libcurl3-gnutls libcurl4 libdbus-1-3 libdbus-1-dev libdebhelper-perl libdeflate0 libdpkg-perl + sudo apt-get install -y libdw1 libedit2 libelf1 libenchant-2-2 liberror-perl libexpat1 libexpat1-dev libfakeroot libfido2-1 + sudo apt-get install -y libfile-fcntllock-perl libfile-stripnondeterminism-perl libfontconfig1 libfreetype6 libgc1 libgcc-11-dev + sudo apt-get install -y libgcc-12-dev libgd-perl libgd3 libgdbm-compat4 libgdbm6 libglib2.0-0 libglib2.0-data libgomp1 + sudo apt-get install -y libhiredis1.1.0 libhunspell-1.7-0 libhwasan0 libicu-dev libicu74 libiperf0 libisl23 libitm1 libjbig0 + sudo apt-get install -y libjpeg-turbo8 libjpeg8 libjs-jquery libjs-sphinxdoc libjs-underscore libjson-perl libjson-xs-perl + sudo apt-get install -y libjsoncpp25 libkmod2 libksba8 libldap2 libldap2-dev libldap-common libllvm14t64 liblocale-gettext-perl + sudo apt-get install -y liblsan0 libltdl-dev libltdl7 liblua5.2-0 libmagic-mgc libmagic1 libmail-sendmail-perl libmaxminddb0 + sudo apt-get install -y libmd0 libmnl0 libmpc3 libmpfr6 libncurses-dev libnghttp2-14 libnl-3-200 libnl-genl-3-200 + sudo apt-get install -y libnl-route-3-200 libnpth0 libnsl-dev libnuma1 libobjc-11-dev libobjc4 libpam-cap libpcap0.8 + sudo apt-get install -y libpcap0.8-dev libperl5.38t64 libperlio-gzip-perl libpfm4 libpipeline1 libpng16-16 libpsl5 libpython3-dev + sudo apt-get install -y libpython3-stdlib libpython3.12t64 libpython3.12-dev libpython3.12-minimal libreadline8 + sudo apt-get install -y librhash0 librtmp1 libsasl2-2 libsasl2-modules libsasl2-modules-db libsbc1 libsctp-dev libsctp1 + sudo apt-get install -y libsigsegv2 libsmi2ldbl libsnappy1v5 libspandsp2 libspeexdsp1 libsqlite3-0 libssh-4 libssh-gcrypt-4 + sudo apt-get install -y libstdc++-11-dev libsub-override-perl libsubunit-dev libsubunit0 libsys-hostname-long-perl + sudo apt-get install -y libtext-iconv-perl libtiff6 libtinfo-dev libtirpc-dev libtsan0 libtsan2 libtypes-serialiser-perl + sudo apt-get install -y libubsan1 libuchardet0 libuv1 libwebp7 libwireshark-data libwireshark17t64 libwiretap14t64 libwsutil15t64 libx11-6 + sudo apt-get install -y libx11-data libxau6 libxcb1 libxdmcp6 libxext6 libxml2 libxml2-dev libxmuu1 libxpm4 libxslt1.1 + sudo apt-get install -y libyaml-0-2 libz3-4 libz3-dev linux-headers-6.8.0-51 linux-headers-6.8.0-51-generic + sudo apt-get install -y linux-headers-generic linux-libc-dev llvm-14 llvm-14-dev llvm-14-linker-tools llvm-14-runtime + sudo apt-get install -y llvm-14-tools lsb-release lto-disabled-list m4 man-db manpages manpages-dev media-types netbase + sudo apt-get install -y openssh-client openssl patch perl perl-modules-5.38 pinentry-curses po-debconf publicsuffix + sudo apt-get install -y python-babel-localedata python3 python3-attr python3-babel python3-bs4 python3-certifi python3-chardet + sudo apt-get install -y python3-distlib python3-filelock python3-html5lib python3-idna + sudo apt-get install -y python3-importlib-metadata python3-jinja2 python3-lib2to3 python3-lxml python3-markupsafe python3-minimal + sudo apt-get install -y python3-more-itertools python3-pip-whl python3-pkg-resources python3-platformdirs python3-pygments + sudo apt-get install -y python3-pyrsistent python3-requests python3-setuptools-whl python3-six python3-soupsieve python3-tz + sudo apt-get install -y python3-urllib3 python3-webencodings python3-wheel python3-wheel-whl python3-zipp python3.10 + sudo apt-get install -y python3-minimal readline-common rpcsvc-proto shared-mime-info tzdata + sudo apt-get install -y ucf uuid-dev wireshark-common xauth xdg-user-dirs xz-utils zlib1g-dev libgmpxx4ldbl + sudo apt-get install -y liblz4-dev liblzma-dev wget libzstd-dev nettle-dev lsb-release doxygen libarchive-dev + sudo apt-get install -y libnl-xfrm-3-dev sphinx-common python3-sphinx-rtd-theme libfdt-dev libjansson-dev libbsd-dev + sudo apt-get install -y python3-pyelftools gcc-14 bzip2-doc libacl1-dev libattr1-dev libbz2-dev libgmp-dev libbpf-dev + BASE_DIR=${PWD} + source ${BASE_DIR}/artifacts/env + DISTRO=ubuntu-`lsb_release -rs` + echo "DISTRO=${DISTRO}" >> ${BASE_DIR}/artifacts/env + echo "cache_dir = ~/.ccache" > ~/.ccache/ccache.conf + ccache -p + git config --global --add safe.directory "${PWD}" + sudo APT_ARGS='-y -q' make install-deps + make build-release VPP_PLATFORM=octeon9 + mkdir -p "${PWD}/install/DEBIAN" + mkdir -p "${PWD}/install/usr/share/vpp/api" + cp -r build-root/install-vpp-native/vpp/share/vpp/api/* "${PWD}/install/usr/share/vpp/api" + mv build-root/install-vpp-native/vpp/* install/. + cd "${PWD}/install" + echo 'Package: vpp-'$PKG_VERSION_NAME'-cn9k'$PKG_POSTFIX >> DEBIAN/control + echo 'Version: '$MRVL_PKG_VERSION >> DEBIAN/control + echo "Maintainer: Jerin Jacob (jerinj@marvell.com)" >> DEBIAN/control + echo 'Depends: python3, python3-ply, dpdk-'$DPDK_BASE_PKG_VERSION'-cn9k (= '$DPDK_PKG_VERSION'), cpt-firmware-cn9k'${FW_PKG_POSTFIX}' (= '$CPT_PKG_VERSION')' >> DEBIAN/control + echo "Architecture: arm64" >> DEBIAN/control + echo "Homepage: https://wiki.fd.io/view/VPP" >> DEBIAN/control + echo "Description: Vector Packet Processing (VPP) for Octeon9" >> DEBIAN/control + cd - + mv "${PWD}/install" "${PWD}/vpp-${PKG_VERSION_NAME}-cn9k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64" + dpkg --build "vpp-${PKG_VERSION_NAME}-cn9k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64" + cp "vpp-${PKG_VERSION_NAME}-cn9k${PKG_POSTFIX}_${MRVL_PKG_VERSION}_arm64.deb" ${BASE_DIR}/artifacts/. + - name: Export version name + id: artifacts + run: | + source "${PWD}/artifacts/env" + echo $PKG_VERSION_NAME + echo "PKG_VERSION_NAME=${PKG_VERSION_NAME}" >> "$GITHUB_OUTPUT" + echo $MRVL_PKG_VERSION + echo "MRVL_PKG_VERSION=${MRVL_PKG_VERSION}" >> "$GITHUB_OUTPUT" + echo $DISTRO + echo "DISTRO=${DISTRO}" >> "$GITHUB_OUTPUT" + echo $PKG_POSTFIX + echo "PKG_POSTFIX=${PKG_POSTFIX}" >> "$GITHUB_OUTPUT" + [[ "$PKG_POSTFIX" == "-devel" ]] && TAG=devel || TAG=${MRVL_PKG_VERSION} + [[ "$PKG_POSTFIX" == "-devel" ]] && IS_DEVEL="true" || IS_DEVEL="false" + echo "TAG=${TAG}" >> "$GITHUB_OUTPUT" + echo "IS_DEVEL=${IS_DEVEL}" >> "$GITHUB_OUTPUT" + - name: Upload debian package as artifact + uses: actions/upload-artifact@v4.3.1 + if: ${{ github.event_name == 'push' }} + with: + name: vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k${{ steps.artifacts.outputs.PKG_POSTFIX }}_${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}_arm64.deb + path: ${{ github.workspace }}/artifacts/vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k${{ steps.artifacts.outputs.PKG_POSTFIX }}_${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}_arm64.deb + - name: Delete existing release + if: ${{ github.event_name == 'push' }} + env: + GH_TOKEN: ${{ github.token }} + run: | + if gh release view vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }}; then + gh release delete vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }} --cleanup-tag -y + else + echo "Release not found" + fi + continue-on-error: true + - name: Release VPP cn9k package + uses: softprops/action-gh-release@v2.0.4 + if: ${{ github.event_name == 'push' }} + with: + draft: false + tag_name: vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }} + files: | + ${{ github.workspace }}/artifacts/vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k${{ steps.artifacts.outputs.PKG_POSTFIX }}_${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}_arm64.deb + - name: Dispatch package update event + if: ${{ github.event_name == 'push' }} + run: | + curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.PPA_REPO_SECRET }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/marvellembeddedprocessors/packages/dispatches \ + -d '{"event_type":"dispatch-event", "client_payload": {"package" : "vpp", + "tag": "vpp-${{ steps.artifacts.outputs.PKG_VERSION_NAME }}-cn9k-${{ steps.artifacts.outputs.MRVL_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.artifacts.outputs.TAG }}", + "dpdk_tag" : "dpdk-cn9k-${{ steps.version.outputs.DPDK_BASE_PKG_VERSION }}_${{ steps.version.outputs.DPDK_PKG_VERSION }}-${{ steps.artifacts.outputs.DISTRO }}-${{ steps.version.outputs.DPDK_PKG_VERSION }}", + "has_dpdk" : "true", "distro" : "${{ steps.artifacts.outputs.DISTRO }}", + "platform" : "cn9k", + "devel": "${{ steps.artifacts.outputs.IS_DEVEL }}"}}' \ No newline at end of file diff --git a/.github/workflows/close_prs.yml b/.github/workflows/close_prs.yml deleted file mode 100644 index f8abc37f02..0000000000 --- a/.github/workflows/close_prs.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: Close Pull Request - -on: - pull_request_target: - types: [opened] - -jobs: - run: - runs-on: ubuntu-latest - steps: - - uses: superbrothers/close-pull-request@v3 - with: - # Optional. Post a issue comment just before closing a pull request. - comment: "Thank you so much for your interest! VPP takes patches at https://gerrit.fd.io/ - -``` -git clone https://gerrit.fd.io/r/vpp -``` - -Using [git review](https://www.mediawiki.org/wiki/Gerrit/git-review) to contribute patches is recommended" diff --git a/.gitreview b/.gitreview index 1db08df202..e69de29bb2 100644 --- a/.gitreview +++ b/.gitreview @@ -1,4 +0,0 @@ -[gerrit] -host=gerrit.fd.io -port=29418 -project=vpp diff --git a/DEP_PKG_VERSION b/DEP_PKG_VERSION new file mode 100644 index 0000000000..46e4a66aed --- /dev/null +++ b/DEP_PKG_VERSION @@ -0,0 +1 @@ +CPT_PKG_VERSION=24.09.0 diff --git a/DPDK_VERSION b/DPDK_VERSION new file mode 100644 index 0000000000..6ed68f9774 --- /dev/null +++ b/DPDK_VERSION @@ -0,0 +1,2 @@ +BASE_VERSION=24.11.0 +RELEASE_VERSION=25.07.0 \ No newline at end of file diff --git a/MAINTAINERS b/MAINTAINERS index c310e72822..be3a5b28f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -98,6 +98,14 @@ I: policer M: Neale Ranns F: src/vnet/policer/ +VNET Traffic Management +I: tm +F: src/vnet/tm/ + +VNET Priority Flow Control +I: pfc +F: src/vnet/pfc/ + VNET New Device Drivers Infra I: dev M: Damjan Marion @@ -431,6 +439,11 @@ I: dispatch-trace M: Dave Barach F: src/plugins/dispatch-trace +Plugin - Marvell OCTEON End Point Control Agent +I: octep-cp +M: Bheemappa Agasimundin +F: src/plugins/octep_cp/ + Plugin - DPDK I: dpdk M: Damjan Marion @@ -460,6 +473,12 @@ M: Dave Barach M: Florin Coras F: src/plugins/http_static/ +Plugin - http_static l4 +I: http_static_l4 +M: Dave Barach +M: Florin Coras +F: src/plugins/http_static_l4/ + Plugin - builtinurl I: builtinurl M: Dave Barach @@ -886,6 +905,11 @@ M: Filip Tehlar M: Maros Ondrejicka F: extras/hs-test +CI - Enable VPP dev-ci +I: ci +M: Nawal Kishor +F: ci/ + THE REST I: misc M: vpp-dev Mailing List diff --git a/MRVL_VERSION b/MRVL_VERSION new file mode 100644 index 0000000000..cf7b6a32c3 --- /dev/null +++ b/MRVL_VERSION @@ -0,0 +1 @@ +25.09.0 \ No newline at end of file diff --git a/Makefile b/Makefile index 17d633562a..c4253da768 100644 --- a/Makefile +++ b/Makefile @@ -84,8 +84,12 @@ DEB_DEPENDS += libpcap-dev DEB_DEPENDS += tshark LIBFFI=libffi6 # works on all but 20.04 and debian-testing - -ifeq ($(OS_VERSION_ID),22.04) +ifeq ($(OS_VERSION_ID),24.04) + DEB_DEPENDS += libssl-dev + DEB_DEPENDS += llvm clang clang-format-14 + LIBFFI=libffi8 + DEB_DEPENDS += enchant-2 # for docs +else ifeq ($(OS_VERSION_ID),22.04) DEB_DEPENDS += python3-virtualenv DEB_DEPENDS += libssl-dev DEB_DEPENDS += clang clang-format-11 diff --git a/build/external/Makefile b/build/external/Makefile index af2512c461..e521803628 100644 --- a/build/external/Makefile +++ b/build/external/Makefile @@ -43,13 +43,14 @@ include packages/rdma-core.mk include packages/dpdk.mk include packages/xdp-tools.mk include packages/octeon-roc.mk +include packages/octeon-dao.mk .PHONY: clean clean: @rm -rf $(B) $(I) .PHONY: install -install: $(if $(ARCH_X86_64), ipsec-mb-install) dpdk-install rdma-core-install quicly-install xdp-tools-install $(if $(AARCH64), octeon-roc-install) +install: $(if $(ARCH_X86_64), ipsec-mb-install) dpdk-install rdma-core-install quicly-install xdp-tools-install $(if $(AARCH64), octeon-roc-install octeon-dao-install) .PHONY: config config: $(if $(ARCH_X86_64), ipsec-mb-config) dpdk-config rdma-core-config quicly-build diff --git a/build/external/packages/dpdk.mk b/build/external/packages/dpdk.mk index 2c5a903027..5179b53bc9 100644 --- a/build/external/packages/dpdk.mk +++ b/build/external/packages/dpdk.mk @@ -48,12 +48,9 @@ endif DPDK_DRIVERS_DISABLED := baseband/\*, \ bus/dpaa, \ bus/ifpga, \ - common/cnxk, \ compress/isal, \ - compress/octeontx, \ compress/zlib, \ crypto/ccp, \ - crypto/cnxk, \ crypto/dpaa_sec, \ crypto/openssl, \ crypto/aesni_mb, \ @@ -61,16 +58,11 @@ DPDK_DRIVERS_DISABLED := baseband/\*, \ crypto/kasumi, \ crypto/snow3g, \ crypto/zuc, \ - event/\*, \ mempool/dpaa, \ - mempool/cnxk, \ - net/af_packet, \ net/bnx2x, \ - net/bonding, \ - net/cnxk, \ net/ipn3ke, \ net/liquidio, \ - net/pcap, \ + net/pcap, \ net/pfe, \ net/sfc, \ net/softnic, \ @@ -81,28 +73,11 @@ DPDK_DRIVERS_DISABLED := baseband/\*, \ DPDK_LIBS_DISABLED := acl, \ bbdev, \ bitratestats, \ - bpf, \ - cfgfile, \ - cnxk, \ - distributor, \ - efd, \ - fib, \ - flow_classify, \ - graph, \ - gro, \ - gso, \ jobstats, \ kni, \ - latencystats, \ - lpm, \ + table, \ member, \ - node, \ - pipeline, \ - port, \ - power, \ - rawdev, \ - rib, \ - table + pipeline DPDK_MLX_CONFIG_FLAG := diff --git a/build/external/packages/octeon-dao.mk b/build/external/packages/octeon-dao.mk new file mode 100644 index 0000000000..eb56659fc4 --- /dev/null +++ b/build/external/packages/octeon-dao.mk @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2024 Marvell. + +DAO_DEBUG ?= n +octeon-dao_version := 24.05.2 +octeon-dao_tarball := $(octeon-dao_version).tar.gz +octeon-dao_tarball_md5sum := a06dda68ab8cb91734c305fc17e2a5ae + +octeon-dao_tarball_strip_dirs := 1 +octeon-dao_url := https://github.com/MarvellEmbeddedProcessors/dao/archive/refs/tags/$(octeon-dao_tarball) + +octeon_dao_cmake_args ?= + +DAO_BUILD_TYPE:=release +ifeq ($(DAO_DEBUG), y) +DAO_BUILD_TYPE:=debug +endif + +DAO_MESON_ARGS = \ + --default-library static \ + -Dprefer_static=True \ + --buildtype=$(DAO_BUILD_TYPE)\ + -Denable_kmods=false + +PREFIX = $(CNXK_SDK_SYSROOT) +ifeq (,$(findstring $(OCTEON_VERSION),cn10k cn9k)) + DAO_MESON_ARGS += -Dplatform=native + DAO_MESON_ARGS += --prefix $(octeon-dao_install_dir) + PREFIX = $(octeon-dao_install_dir) +else ifeq ($(OCTEON_VERSION), cn10k) + DAO_MESON_ARGS += --cross-file=$(octeon-dao_src_dir)/config/arm64_cn10k_linux_gcc + DAO_MESON_ARGS += --prefix $(CNXK_SDK_SYSROOT) +else ifeq ($(OCTEON_VERSION), cn9k) + DAO_MESON_ARGS += --cross-file=$(octeon-dao_src_dir)/config/arm64_cn9k_linux_gcc + DAO_MESON_ARGS += --prefix $(CNXK_SDK_SYSROOT) +endif + +PIP_DOWNLOAD_DIR = $(CURDIR)/downloads/ +define octeon-dao_config_cmds + PKG_CONFIG_PATH=${PREFIX}/lib/pkgconfig meson setup $(octeon-dao_src_dir) \ + $(octeon-dao_build_dir) \ + $(DAO_MESON_ARGS)\ + | tee $(dao_config_log) && \ + echo "DAO post meson configuration" +endef + +define octeon-dao_build_cmds + cd ${octeon-dao_build_dir} && rm -f $(octeon-dao_build_log) && \ + meson compile -C ${octeon-dao_build_dir} | tee $(octeon-dao_build_log) +endef + +define octeon-dao_install_cmds + cd ${octeon-dao_build_dir} && \ + meson install &&\ + echo "meson installed directory ${octeon-dao_install_dir}" +endef + +$(eval $(call package,octeon-dao)) diff --git a/build/external/packages/octeon-roc.mk b/build/external/packages/octeon-roc.mk index 4bdeabeac2..c5b8752388 100644 --- a/build/external/packages/octeon-roc.mk +++ b/build/external/packages/octeon-roc.mk @@ -2,12 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 # https://spdx.org/licenses/Apache-2.0.html -octeon-roc_version := 0.2 -octeon-roc_tarball := octeon-roc-v$(octeon-roc_version).tar.gz -octeon-roc_tarball_md5sum := a72bb3b27fd3bbaf58aadd99514620e1 +octeon-roc_version := 25.09 +octeon-roc_tarball := v$(octeon-roc_version).tar.gz +octeon-roc_tarball_md5sum := 593be96859fe9d8660f87d3bcdb1b008 +octeon-roc_github := https://github.com/MarvellEmbeddedProcessors/marvell-octeon-roc octeon-roc_tarball_strip_dirs := 1 -octeon-roc_url := https://github.com/MarvellEmbeddedProcessors/marvell-vpp/archive/refs/tags/$(octeon-roc_tarball) +octeon-roc_url := $(octeon-roc_github)/archive/refs/tags/$(octeon-roc_tarball) define octeon-roc_config_cmds @true diff --git a/build/external/patches/octeon-dao_24.05.2/001-disable-dao-apps.patch b/build/external/patches/octeon-dao_24.05.2/001-disable-dao-apps.patch new file mode 100644 index 0000000000..f72da2f9f2 --- /dev/null +++ b/build/external/patches/octeon-dao_24.05.2/001-disable-dao-apps.patch @@ -0,0 +1,12 @@ +diff --git a/meson.build b/meson.build +index 1dcad58..0564087 100644 +--- a/meson.build ++++ b/meson.build +@@ -36,8 +36,6 @@ subdir('config') + if host_machine.cpu_family() == 'aarch64' + if DAO_BUILD_CONF.has('DAO_LIBDPDK_DEP') + subdir('lib') +- subdir('app') +- subdir('tests') + endif + endif diff --git a/ci/build/build-deps.sh b/ci/build/build-deps.sh new file mode 100755 index 0000000000..797e8ca70e --- /dev/null +++ b/ci/build/build-deps.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Marvell CONFIDENTIAL AND PROPRIETARY NOTE +# +# This software contains information confidential and proprietary to +# Marvell. It shall not be reproduced in whole or in +# part, or transferred to other documents, or disclosed to third +# parties, or used for any purpose other than that for which it was +# obtained, without the prior written consent of Marvell. +# +# Copyright (c) 2025 Marvell. If you received this file from Marvell +# and you have entered into a commercial license agreement (a "Commercial License") +# with Marvell, the file is licensed to you under the terms of the applicable Commercial +# License. In the absence of such license agreement the following file is subject to +# Marvell’s standard Limited Use License Agreement. + +# Script syntax: +# build-deps.sh +# +# Script will: +# 1. Create +# 2. Fetch and build dependencies. + +set -euo pipefail +shopt -s extglob + +CROSS_COMPILE=${CROSS_COMPILE:-aarch64-marvell-linux-gnu} +BUILD_ROOT=$(realpath $1) +LIBUUID_DIR=${BUILD_ROOT}/libuuid +DEPS_DIR=${BUILD_ROOT}/deps-prefix + +function build_libuuid { + rm -rf ${LIBUUID_DIR} + mkdir -p ${LIBUUID_DIR} + cd ${LIBUUID_DIR} + wget https://github.com/util-linux/util-linux/archive/refs/tags/v2.38.tar.gz + tar -xvf v2.38.tar.gz + cd util-linux-2.38 + ./autogen.sh + ./configure --target=${CROSS_COMPILE} --host=${CROSS_COMPILE} \ + --build=x86_64-pc-linux-gnu --disable-all-programs --enable-libuuid \ + --prefix ${DEPS_DIR} + make install +} + +build_libuuid diff --git a/ci/build/build.sh b/ci/build/build.sh new file mode 100755 index 0000000000..cfad2bca6d --- /dev/null +++ b/ci/build/build.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Marvell CONFIDENTIAL AND PROPRIETARY NOTE +# +# This software contains information confidential and proprietary to +# Marvell. It shall not be reproduced in whole or in +# part, or transferred to other documents, or disclosed to third +# parties, or used for any purpose other than that for which it was +# obtained, without the prior written consent of Marvell. +# +# Copyright (c) 2025 Marvell. If you received this file from Marvell +# and you have entered into a commercial license agreement (a "Commercial License") +# with Marvell, the file is licensed to you under the terms of the applicable Commercial +# License. In the absence of such license agreement the following file is subject to +# Marvell’s standard Limited Use License Agreement. + +set -euo pipefail + +function help() { + echo "Builds VPP libraries and applications." + echo "" + echo "Usage:" + echo "$SCRIPT_NAME [ARGUMENTS]..." + echo "" + echo "Mandatory Arguments" + echo "===================" + echo "--build-type | -b : Build type; release/debug" + echo "--deps_dir | -d : Deps dir" + echo "" + echo "Optional Arguments" + echo "===================" + echo "--octeon_version | -o : Version(cn10k, cn9k)" + echo "--help | -h : Print this help and exit" +} + +SCRIPT_NAME="$(basename "$0")" +if ! OPTS=$(getopt \ + -o "b:d:oh" \ + -l "build-type:,deps-dir:,octeon_version,help" \ + -n "$SCRIPT_NAME" \ + -- "$@"); then + help + exit 1 +fi + +DEPS_DIR= +BUILD= +export CROSS="aarch64-marvell-linux-gnu-" +export OCTEON_VERSION=${OCTEON_VERSION:-"cn10k"} +export PLATFORM="cnxk" + +eval set -- "$OPTS" +unset OPTS +while [[ $# -gt 1 ]]; do + case $1 in + -b|--build-type) shift; BUILD=$1;; + -d|--deps-dir) shift; DEPS_DIR=$(realpath $1);; + -o|--octeon-version) shift; OCTEON_VERSION=$1;; + -h|--help) help; exit 0;; + *) help; exit 1;; + esac + shift +done + +if [[ -z $BUILD || -z $DEPS_DIR ]]; then + echo "Build_type and Deps directory should be passed as argument !!" + help + exit 1 +fi + +if [[ $BUILD == "debug" ]]; then + BUILD_TYPE=build +elif [[ $BUILD == "release" ]]; then + BUILD_TYPE=build-release +else + echo "Pass build-type (release/debug)" + help + exit 1 +fi + +DEPS_PREFIX=${DEPS_DIR}/deps-prefix +export cnxk_c_flags="-I/${DEPS_PREFIX}/include/ -L/${DEPS_PREFIX}/lib" +export UNATTENDED=y +export DEBIAN_FRONTEND=noninteractive +# FIXME: Remove install-dep command when these deps are installed in docker container. +make install-dep +make $BUILD_TYPE diff --git a/ci/checkpatch/run_sanity_check.sh b/ci/checkpatch/run_sanity_check.sh new file mode 100755 index 0000000000..4185366577 --- /dev/null +++ b/ci/checkpatch/run_sanity_check.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: Marvell-MIT +# Copyright (c) 2025 Marvell. + +# Add when required. diff --git a/ci/klocwork/klocwork.sh b/ci/klocwork/klocwork.sh new file mode 100755 index 0000000000..f485580d13 --- /dev/null +++ b/ci/klocwork/klocwork.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# Marvell CONFIDENTIAL AND PROPRIETARY NOTE +# +# This software contains information confidential and proprietary to +# Marvell. It shall not be reproduced in whole or in +# part, or transferred to other documents, or disclosed to third +# parties, or used for any purpose other than that for which it was +# obtained, without the prior written consent of Marvell. +# +# Copyright (c) 2025 Marvell. If you received this file from Marvell +# and you have entered into a commercial license agreement (a "Commercial License") +# with Marvell, the file is licensed to you under the terms of the applicable Commercial +# License. In the absence of such license agreement the following file is subject to +# Marvell’s standard Limited Use License Agreement. + +set -euo pipefail + +function help() { + echo "Builds VPP libraries and applications with klocwork" + echo "" + echo "Usage:" + echo "$SCRIPT_NAME [ARGUMENTS]..." + echo "" + echo "Mandatory Arguments" + echo "===================" + echo "--build-root | -r : Build root directory" + echo "--deps_dir | -d : Deps dir" + echo "--cnxk_sdk_sysroot | -s : CNXK_SDK_SYSROOT path" + echo "" + echo "Optional Arguments" + echo "===================" + echo "--octeon_version | -o : Version(cn10k, cn9k)" + echo "--help | -h : Print this help and exit" +} + +SCRIPT_NAME="$(basename "$0")" +if ! OPTS=$(getopt \ + -o "r:d:s:oh" \ + -l "build-root:,deps-dir:,cnxk_sdk_sysroot:,octeon_version,help" \ + -n "$SCRIPT_NAME" \ + -- "$@"); then + help + exit 1 +fi + +BUILD_ROOT= +DEPS_DIR= +CNXK_SYSROOT= +export CROSS="aarch64-marvell-linux-gnu-" +export OCTEON_VERSION="cn10k" +export PLATFORM="cnxk" + +eval set -- "$OPTS" +unset OPTS +while [[ $# -gt 1 ]]; do + case $1 in + -r|--build_root) shift; BUILD_ROOT=$(realpath $1);; + -d|--deps-dir) shift; DEPS_DIR=$(realpath $1);; + -s|--cnxk_sdk_sysroot) shift; CNXK_SYSROOT=$1;; + -o|--octeon-version) shift; OCTEON_VERSION=$1;; + -h|--help) help; exit 0;; + *) help; exit 1;; + esac + shift +done + +if [[ -z $DEPS_DIR || -z $BUILD_ROOT || -z $CNXK_SYSROOT ]]; then + echo "Deps directory, build root and cnxk_sdk_sysroot should be passed as argument !!" + help + exit 1 +fi + +DEPS_PREFIX=${DEPS_DIR}/deps-prefix +export CNXK_SDK_SYSROOT=$CNXK_SYSROOT +export cnxk_c_flags="-I/${DEPS_PREFIX}/include/ -L/${DEPS_PREFIX}/lib" +export UNATTENDED=y +export DEBIAN_FRONTEND=noninteractive +# FIXME: Remove install-dep command when these deps are installed in docker container. +make install-dep +rm -rf .kwlp .kwps +kwcheck create +kwcheck set license.host=llic5-01.marvell.com license.port=33138 + +# List of directories to ignore in klocwork checks +IGNORE_FILES="" + +kwinject --ignore-files $IGNORE_FILES -w make build +kwcheck run -r -b kwinject.out -F detailed --report kwreport-detailed.txt +kwcheck list -F scriptable --report kwreport-scritpable.txt +CNXK_ISSUES=$(wc -l kwreport-scritpable.txt | awk '{print $1}') + +echo "#########################################################################" +echo "Klocwork CNXK Issues: $CNXK_ISSUES" +echo "Klocwork Report : $PWD/kwreport-detailed.txt" +echo "#########################################################################" diff --git a/ci/test/board/board_test_run.sh b/ci/test/board/board_test_run.sh new file mode 100755 index 0000000000..283b05ca72 --- /dev/null +++ b/ci/test/board/board_test_run.sh @@ -0,0 +1,254 @@ +#!/bin/bash + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +set -euox pipefail + +source $TEST_ENV_CONF + +SKIP_SYNC=${SKIP_SYNC:-} +SKIP_TARGET_SETUP=${SKIP_TARGET_SETUP:-} +PROJECT_ROOT=${PROJECT_ROOT:-$PWD} +TARGET_BOARD=${TARGET_BOARD:-root@127.0.0.1} +TARGET_SSH_CMD=${TARGET_SSH_CMD:-"ssh"} +TARGET_SCP_CMD=${TARGET_SCP_CMD:-"scp"} +REMOTE="$TARGET_SSH_CMD $TARGET_BOARD -n" +REMOTE_DIR=${REMOTE_DIR:-/tmp/vpp} +REMOTE_BUILD_DIR=${REMOTE_DIR}/build +BUILD_DIR=${BUILD_DIR:-$PWD/build} +DEPS_DIR=${DEPS_DIR:-${PROJECT_ROOT}/deps-prefix} +REBOOT_ON_FAIL=${REBOOT_ON_FAIL:-} +UTILS_LOCATION=${DPDK_DEVBIND_LOCATION:-ci@10.28.36.188:/home/ci/vpp/perf_stage_bins/$PLAT/} + +function save_log() +{ + local logfile=$1 + local save_name=${2:-} + + if [[ -z $RUN_DIR ]] || [[ ! -d $RUN_DIR ]]; then + return + fi + + if [[ -n $save_name ]]; then + cp $logfile $RUN_DIR/$save_name 2>/dev/null || true + else + cp $logfile $RUN_DIR/ 2>/dev/null || true + fi +} + +function test_init() +{ + $REMOTE 'sudo dmesg -c' 2>&1 > /dev/null + $REMOTE 'uname -a' +} + +# Sync the files +function target_sync() +{ + local sync="rsync -azzh --delete" + if [[ -n $SKIP_SYNC ]]; then + return + fi + echo "Syncing files to target board..." + $REMOTE "rm -rf $REMOTE_DIR" + $REMOTE "mkdir -p $REMOTE_DIR" + # Sync build directory + rsync -e "$TARGET_SSH_CMD" -av $BUILD_DIR/* $TARGET_BOARD:$REMOTE_BUILD_DIR/ + rsync -e "$TARGET_SSH_CMD" -av $BUILD_DIR/../* $TARGET_BOARD:$REMOTE_BUILD_DIR/ + # Sync deps build directory if required + #rsync -e "$TARGET_SSH_CMD" -r $DEPS_DIR/* $TARGET_BOARD:$REMOTE_DIR/deps_build + # Sync dpdk-devbind.py + $TARGET_SSH_CMD $TARGET_BOARD "sudo $TARGET_SCP_CMD $UTILS_LOCATION/dpdk-devbind.py ${REMOTE_BUILD_DIR}/ci/test/board/" + # Sync testpmd + $TARGET_SSH_CMD $TARGET_BOARD "sudo $TARGET_SCP_CMD $UTILS_LOCATION/dpdk-testpmd /usr/local/bin" + # Sync pcap utils + $TARGET_SSH_CMD $TARGET_BOARD "sudo $TARGET_SCP_CMD $UTILS_LOCATION/pcap-* ${REMOTE_BUILD_DIR}/ci/test/common/pcap/" +# + if [[ -n $GENERATOR_BOARD ]]; then + $TARGET_SSH_CMD $GENERATOR_BOARD mkdir -p $TARGET_RUN_DIR/deps + rsync -e "$TARGET_SSH_CMD" -av $BUILD_DIR/* $GENERATOR_BOARD:$REMOTE_BUILD_DIR/ + rsync -e "$TARGET_SSH_CMD" -av $BUILD_DIR/../* $GENERATOR_BOARD:$REMOTE_BUILD_DIR/ + # Sync testpmd + $TARGET_SSH_CMD $GENERATOR_BOARD "sudo $TARGET_SCP_CMD $UTILS_LOCATION/dpdk-testpmd /usr/local/bin" + $sync -e "$TARGET_SSH_CMD" \ + $PROJECT_ROOT/ci/test/board/oxk-devbind-basic.sh \ + $GENERATOR_BOARD:$REMOTE_DIR + fi +} + +function target_setup() +{ + echo "Setting up target" + # Setup the board + export TARGET_BOARD + export TARGET_SSH_CMD + export REMOTE_DIR + if [[ -n $SKIP_TARGET_SETUP ]]; then + return + fi + $PROJECT_ROOT/ci/test/board/oct-target-setup.sh + + if [[ -n $GENERATOR_BOARD ]]; then + # Setup Generator Board also + TARGET_BOARD=$GENERATOR_BOARD VFIO_DEVBIND=$REMOTE_DIR/oxk-devbind-basic.sh \ + $PROJECT_ROOT/ci/test/board/oct-target-setup.sh + fi + +} + +function run_test() +{ + local name=$1 + local tmo + local cmd + local curtime + local exec_bin + local res + + exec_bin=$(get_test_exec_bin $name) + binary_name=$(basename $exec_bin) + tmo=$(get_test_timeout $name) + + # Update sig handlers to pass in test name also. + trap "sig_handler INT $binary_name" INT + trap "sig_handler TERM $binary_name" TERM + trap "sig_handler QUIT $binary_name" QUIT + + test_info_print $name + cmd=$(get_test_command $name) + + curtime=$SECONDS + timeout --foreground -v -k 30 -s 3 $tmo $REMOTE "$cmd" + res=$? + echo -en "\n$name completed in $((SECONDS - curtime)) seconds ... " + if [[ $res -eq 0 ]]; then + echo "TEST SUCCESS (ret = $res)" + elif [[ $res -eq 77 ]]; then + echo "TEST SKIPPED (ret = $res)" + else + echo "TEST FAILURE (ret = $res)" + fi + + return $res +} + +function run_all_tests() +{ + local tst + local res + local failed_tests="" + local passed_tests="" + local skipped_tests="" + local test_num=0 + + # Errors will be handled inline. No need for sig handler + set +e + trap - ERR + + # Read the tests info one by one from the test list created by meson test + while [[ true ]]; do + test_num=$((test_num + 1)) + test_enabled $test_num + res=$? + tst=$(get_test_name $test_num) + if [[ $res == 77 ]]; then + skipped_tests="${skipped_tests}${tst}#" + continue + fi + if [[ $res -ne 0 ]]; then + break + fi + + # Run the tests + run_test $tst + res=$? + if [[ $res -ne 0 ]] && [[ $res -ne 77 ]] ; then + failed_tests="${failed_tests}${tst}#" + if [[ -n $CONTINUE_ON_FAILURE ]]; then + echo "FAILURE: Test $tst failed" + else + test_exit -1 "FAILURE: Test $tst failed" + fi + else + passed_tests="${passed_tests}${tst}#" + fi + done + if [[ -n $STATUS_OUTFILE ]] ; then + echo "FAILED: $failed_tests" > $STATUS_OUTFILE + echo "PASSED: $passed_tests" >> $STATUS_OUTFILE + echo "SKIPPED: $skipped_tests" >> $STATUS_OUTFILE + fi + if [[ -n $failed_tests ]]; then + test_exit -1 "FAILURE: Test(s) [$failed_tests] failed" + fi +} + +function test_exit() +{ + local result=$1 + local msg=$2 + local waittime + + set +e + trap - INT + trap - TERM + trap - ERR + trap - QUIT + + $REMOTE 'dmesg; uptime; cat /proc/uptime' > remote_dmesg.log + save_log remote_dmesg.log + + if [[ $result -ne 0 ]]; then + if [[ -n $REBOOT_ON_FAIL ]]; then + echo "Test case failure, rebooting the board." + waittime=300 + while [[ $waittime -gt 0 ]]; do + $REMOTE true 2> /dev/null && break + sleep 10 + waittime=$((waittime - 1)) + done + + if ($REMOTE true 2> /dev/null); then + echo "Rebooting board failed." + fi + fi + fi + echo "###########################################################" + echo "Run time: $((SECONDS / 60)) mins $((SECONDS % 60)) secs" + echo "$msg" + echo "###########################################################" + + exit $result +} + +function sig_handler() +{ + local signame=$1 + local binary_name=$2 + + # Make sure that sig_handler is fully executed. + set +e + trap - INT + trap - TERM + trap - ERR + trap - QUIT + + $REMOTE "sudo killall -SIGINT $binary_name" 2>/dev/null + + test_exit 1 "Error: Caught signal $signame in $0" +} + +trap "sig_handler INT NONE" INT +trap "sig_handler TERM NONE" TERM +trap "sig_handler ERR NONE" ERR +trap "sig_handler QUIT NONE" QUIT + +test_init +target_sync +target_setup + +run_all_tests + +test_exit 0 "SUCCESS: Tests Completed" diff --git a/ci/test/board/ci_runner.py b/ci/test/board/ci_runner.py new file mode 100755 index 0000000000..9987b8494c --- /dev/null +++ b/ci/test/board/ci_runner.py @@ -0,0 +1,530 @@ +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html +# + +import os +import re +import sys +import string +import argparse +import subprocess + +if sys.version_info >= (3, 0): + from configparser import ConfigParser +else: + from ConfigParser import ConfigParser + +MAJOR_NUM = 1 +MINOR_NUM = 0 +REVISION = 0 +devid_lbk_vf = "a0f8" +devid_evt_vf = "a0f9" +devid_inl_pf = "a0f0" + +CPU_PART_CN96xx = "0x0b2" +CPU_PART_CN98xx = "0x0b1" +CPU_PART_CN10xx = "0xd49" + +NUM_EVTDEV = 2 +NUM_CPTDEV = 2 +NUM_LBKDEV = 4 +NUM_INLDEV = 1 + +EVENT_DEV_LIMIT = 50 + +SYS_DRV_PATH = "/sys/bus/pci/drivers" +SYS_DEV_PATH = "/sys/bus/pci/devices" + + +def findSystemArch(): + command = "cat /proc/cpuinfo" + info = subprocess.check_output(command, shell=True).decode().strip() + cpu_part = "" + for line in info.split("\n"): + if "CPU part" in line: + cpu_part = re.sub(".*CPU part.*: ", "", line, 1) + break + if cpu_part == CPU_PART_CN96xx: + return "_96xx" + if cpu_part == CPU_PART_CN98xx: + return "_98xx" + if cpu_part == CPU_PART_CN10xx: + return "_10xx" + + print("cpu part doesnt match 96/98/10xx") + return "_unknown" + + +class CIRunner: + def __init__(self, dir, dpdk_devbind, verb, dry_run): + # Modify the number of VFs and the kernel driver as needed. + # ========================================================= + self.num_evtdev = NUM_EVTDEV + self.num_cptdev = NUM_CPTDEV + self.num_lbkdev = NUM_LBKDEV + self.num_inldev = NUM_INLDEV + self.drv_cptdev = b"rvu_cptpf" + self.drv_lbkdev = b"rvu_nicvf" + self.event_dev_id = devid_evt_vf.encode("UTF-8") + # ========================================================= + self.evtpfbdf = None + self.evtpfdev = None + self.evtpfbdf2 = None + self.evtpfdev2 = None + self.cptpfbdf = None + self.cptpfdev = None + self.test_dir = dir + self.dpdk_devbind = dpdk_devbind + self.drun = dry_run + self.verb = verb + out = subprocess.Popen( + "uname -m".split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = out.communicate() + print("ARCH={}".format(stdout.decode("utf-8"))) + if b"x86" in stdout: + exit("Cannot execute on x86") + else: + print("Starting CI Runner") + + self.arch_model = findSystemArch() + + def parse_through_event_devices(self, line): + device = line.split(b" ")[0].decode("utf-8") + + # if arch is cn10k, then use pf device, as + # kernel support is not available for vf + if self.arch_model == "_10xx": + return device, devid_evt_vf + + totalvfs_filePath = SYS_DEV_PATH + "/" + str(device) + "/sriov_totalvfs" + + if os.path.isfile(totalvfs_filePath): + command = "cat " + totalvfs_filePath + totalvfs = subprocess.check_output(command, shell=True).decode().strip() + if int(totalvfs) > self.num_evtdev: + return device, devid_evt_vf + + return None, None + + def init_pf(self): + print("==== Init PF devices ====") + + cmd = subprocess.Popen( + f"{self.dpdk_devbind} -s", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + for line in cmd.stdout: + if self.drv_lbkdev in line: + lnparse = line.split(b"'") + dvparse = lnparse[1].split(b" ") + self.lbkpfdev = dvparse[1].strip() + self.lbkpfbdf = lnparse[0].strip() + + if self.drv_cptdev in line: + lnparse = line.split(b"'") + dvparse = lnparse[1].split(b" ") + self.cptpfdev = dvparse[1].strip() + self.cptpfbdf = lnparse[0].strip() + + if self.evtpfdev is None and self.event_dev_id in line: + self.evtpfbdf, self.evtpfdev = self.parse_through_event_devices(line) + + elif ( + self.arch_model == "_10xx" + and self.evtpfdev2 is None + and self.event_dev_id in line + ): + self.evtpfbdf2, self.evtpfdev2 = self.parse_through_event_devices(line) + + def unbind_all(self): + print("==== Unbind all VF devices bound to vfio-pci ====") + + cmd = subprocess.Popen( + f"{self.dpdk_devbind} -s", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + for line in cmd.stdout: + lnparse = line.split(b" ") + cmdstr = "echo {} > {}/{}/driver/unbind".format( + lnparse[0].decode("utf-8"), SYS_DEV_PATH, lnparse[0].decode("utf-8") + ) + self.run_cmd(cmdstr) + + cmdstr = "echo 0 > {}/{}/sriov_numvfs".format(SYS_DEV_PATH, self.evtpfbdf) + self.run_cmd(cmdstr) + + cmdstr = "echo 0 > {}/{}/sriov_numvfs".format( + SYS_DEV_PATH, self.cptpfbdf.decode("utf-8") + ) + self.run_cmd(cmdstr) + + def set_limits_over_eventdev(self, limit, bdf): + if self.arch_model == "_10xx": + return + self.set_limits(limit, bdf) + + def update_limits_over_eventdev(self): + if self.arch_model == "_10xx": + return + + # Clear limits on SSO and SSOW devices + cmd = subprocess.Popen( + "lspci -d:{}".format(self.evtpfdev.decode("utf-8")), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + for line in cmd.stdout: + lnparse = line.split(b" ") + evtbdf = lnparse[0].strip() + limit = 0 + self.set_limits(limit, evtbdf) + + # Set limits on Event devices + limit = EVENT_DEV_LIMIT + self.set_limits_over_eventdev(limit, self.evtpfbdf) + + def bind_driver(self, driver, dev_bdf): + if self.arch_model == "_10xx": + cmdstr = f"{self.dpdk_devbind} -b {driver} {dev_bdf}" + self.run_cmd(cmdstr) + + def bind_evtdev_pf(self): + if self.arch_model == "_10xx": + if self.drun is False: + os.system( + "echo 'event{}_bdf: {}' >> {}/{}".format( + 1, self.evtpfbdf, self.test_dir, "configs/pcie.ini" + ) + ) + os.system( + "echo 'event{}_bdf: {}' >> {}/{}".format( + 2, self.evtpfbdf2, self.test_dir, "configs/pcie.ini" + ) + ) + return + + def bind_evtdev_vf(self): + self.bind_driver("vfio-pci", self.evtpfbdf) + self.update_limits_over_eventdev() + + fnparse = self.evtpfbdf.split(".") + fn = fnparse[1] + dbdparse = fnparse[0] + cmdstr = "echo {} > {}/{}/sriov_numvfs".format( + self.num_evtdev, SYS_DEV_PATH, self.evtpfbdf + ) + self.run_cmd(cmdstr) + + # Resolve Created VFs + lspci = subprocess.Popen( + "lspci | grep {}".format(dbdparse), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + evtcount = 1 + dbdparse = dbdparse.encode("utf-8") + for line in lspci.stdout: + if dbdparse in line: + lnparse = line.split(b" ") + if lnparse[0] != self.evtpfbdf.encode("utf-8"): + limit = 10 + self.set_limits_over_eventdev(limit, lnparse[0]) + cmdstr = "echo {} > {}/{}/driver/unbind".format( + lnparse[0].decode("utf-8"), + SYS_DEV_PATH, + lnparse[0].decode("utf-8"), + ) + self.run_cmd(cmdstr) + + devbdf = lnparse[0].decode("utf-8") + cmdstr = ( + "echo vfio-pci > /sys/bus/pci/devices/%s/driver_override" + % devbdf + ) + self.run_cmd(cmdstr) + cmdstr = "echo %s > /sys/bus/pci/drivers/vfio-pci/bind" % devbdf + self.run_cmd(cmdstr) + # cmdstr = "echo %s > /sys/bus/pci/driver_probe" % devbdf + # self.run_cmd(cmdstr) + + if self.drun is False: + os.system( + "echo 'event{}_bdf: {}' >> {}/{}".format( + evtcount, + lnparse[0].decode("utf-8"), + self.test_dir, + "configs/pcie.ini", + ) + ) + evtcount = evtcount + 1 + + def bind_evtdev(self): + print("==== Binding Event devices ====") + print("Using Eventdev PF device ID: {}".format(self.evtpfdev)) + print("Using Eventdev PF BDF: {}".format(self.evtpfbdf)) + self.evtpfbdf2 and print( + "Using Eventdev PF device ID: {}".format(self.evtpfdev) + ) + self.evtpfbdf2 and print("Using Eventdev PF BDF: {}".format(self.evtpfbdf2)) + + if self.arch_model == "_10xx": + self.bind_evtdev_pf() + else: + self.bind_evtdev_vf() + + def bind_cptdev(self): + print("==== Binding CPT devices ====") + print("Using CPT PF device ID: {}".format(self.cptpfdev.decode("utf-8"))) + print("Using CPT PF BDF: {}".format(self.cptpfbdf.decode("utf-8"))) + fnparse = self.cptpfbdf.split(b".") + fn = fnparse[1] + dbdparse = fnparse[0] + cmdstr = "echo {} > {}/{}/sriov_numvfs".format( + self.num_cptdev, SYS_DEV_PATH, self.cptpfbdf.decode("utf-8") + ) + self.run_cmd(cmdstr) + + # Resolve Created VFs + lspci = subprocess.Popen( + "lspci | grep {}".format(dbdparse.decode("utf-8")), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + cptcount = 1 + for line in lspci.stdout: + if dbdparse in line: + lnparse = line.split(b" ") + if lnparse[0] != self.cptpfbdf: + cmdstr = "echo {} > {}/{}/driver/unbind".format( + lnparse[0].decode("utf-8"), + SYS_DEV_PATH, + lnparse[0].decode("utf-8"), + ) + self.run_cmd(cmdstr) + + devbdf = lnparse[0].decode("utf-8") + cmdstr = ( + "echo vfio-pci > /sys/bus/pci/devices/%s/driver_override" + % devbdf + ) + self.run_cmd(cmdstr) + cmdstr = "echo %s > /sys/bus/pci/drivers/vfio-pci/bind" % devbdf + self.run_cmd(cmdstr) + # cmdstr = "echo %s > /sys/bus/pci/driver_probe" % devbdf + # self.run_cmd(cmdstr) + + if self.drun is False: + os.system( + "echo 'crypto{}_bdf: {}' >> {}/{}".format( + cptcount, + lnparse[0].decode("utf-8"), + self.test_dir, + "configs/pcie.ini", + ) + ) + cptcount = cptcount + 1 + + def bind_lbkdev(self): + print("==== Binding LBK devices ====") + + # Resolve Pre-created VFs + lspci = subprocess.Popen( + "lspci -d:{}".format(devid_lbk_vf), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + lbkcount = 1 + for line in lspci.stdout: + lnparse = line.split(b" ") + if lbkcount <= self.num_lbkdev: + cmdstr = "echo {} > {}/{}/driver/unbind".format( + lnparse[0].decode("utf-8"), SYS_DEV_PATH, lnparse[0].decode("utf-8") + ) + self.run_cmd(cmdstr) + + devbdf = lnparse[0].decode("utf-8") + cmdstr = ( + "echo vfio-pci > /sys/bus/pci/devices/%s/driver_override" % devbdf + ) + self.run_cmd(cmdstr) + cmdstr = "echo %s > /sys/bus/pci/drivers/vfio-pci/bind" % devbdf + self.run_cmd(cmdstr) + # cmdstr = "echo %s > /sys/bus/pci/driver_probe" % devbdf + # self.run_cmd(cmdstr) + + if self.drun is False: + os.system( + "echo 'lbk{}_bdf: {}' >> {}/{}".format( + lbkcount, + lnparse[0].decode("utf-8"), + self.test_dir, + "configs/pcie.ini", + ) + ) + lbkcount = lbkcount + 1 + + def bind_inldev(self): + print("==== Binding Inline devices ====") + + # Resolve Inline device PFs + lspci = subprocess.Popen( + "lspci -d:{}".format(devid_inl_pf), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + inlcount = 1 + for line in lspci.stdout: + lnparse = line.split(b" ") + if inlcount <= self.num_inldev: + cmdstr = "echo {} > {}/{}/driver/unbind".format( + lnparse[0].decode("utf-8"), SYS_DEV_PATH, lnparse[0].decode("utf-8") + ) + self.run_cmd(cmdstr) + + devbdf = lnparse[0].decode("utf-8") + cmdstr = ( + "echo vfio-pci > /sys/bus/pci/devices/%s/driver_override" % devbdf + ) + self.run_cmd(cmdstr) + cmdstr = "echo %s > /sys/bus/pci/drivers/vfio-pci/bind" % devbdf + self.run_cmd(cmdstr) + # cmdstr = "echo %s > /sys/bus/pci/driver_probe" % devbdf + # self.run_cmd(cmdstr) + + if self.drun is False: + os.system( + "echo 'inl{}_bdf: {}' >> {}/{}".format( + inlcount, + lnparse[0].decode("utf-8"), + self.test_dir, + "configs/pcie.ini", + ) + ) + inlcount = inlcount + 1 + + def set_limits(self, limit, evt_bdf): + cmdstr = "echo {} > {}/{}/limits/sso".format( + limit, SYS_DEV_PATH, evt_bdf.decode("utf-8") + ) + self.run_cmd(cmdstr) + cmdstr = "echo {} > {}/{}/limits/ssow".format( + limit, SYS_DEV_PATH, evt_bdf.decode("utf-8") + ) + self.run_cmd(cmdstr) + + def run_cmd(self, cmdstr): + if self.drun: + print(cmdstr) + else: + if self.verb: + print(cmdstr) + os.system(cmdstr) + + def enable_sriov(self): + enable_sriov_file = "/sys/module/vfio_pci/parameters/enable_sriov" + if os.path.isfile(enable_sriov_file): + cmdstr = "echo 1 > " + enable_sriov_file + self.run_cmd(cmdstr) + else: + print("File: " + enable_sriov_file + " doesnt exist") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-b", + "--bind", + action="store_true", + default=False, + help="Bind PCIe addresses to vfio-pci driver", + ) + parser.add_argument( + "-u", + "--unbind", + action="store_true", + default=False, + help="Unbind PCIe addresses from vfio-pci driver", + ) + parser.add_argument( + "-d", + "--dryrun_bind", + action="store_true", + default=False, + help="Dryrun bind PCIe addresses to vfio-pci driver", + ) + parser.add_argument( + "-g", + "--dryrun_unbind", + action="store_true", + default=False, + help="Dryrun unbind PCIe addresses from vfio-pci driver", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + default=False, + help="Optionally used with [-b|-u] for verbose output", + ) + + try: + args = parser.parse_args() + except: + sys.exit("Call 'python ci_runner.py --help' for more info") + + print("Version = {}.{}.{}".format(MAJOR_NUM, MINOR_NUM, REVISION)) + if args.bind or args.unbind: + dryrun = False + elif args.dryrun_bind or args.dryrun_unbind: + dryrun = True + else: + exit(parser.print_help()) + + if args.verbose: + verb = True + else: + verb = False + + # Read the environment variables + test_dir = os.environ.get("TEST_DIR") + dpdk_devbind = os.environ.get("DPDK_DEVBIND") + if test_dir is None or dpdk_devbind is None: + exit("Please set the environment 'TEST_DIR' and 'DPDK_DEVBIND'") + + runner = CIRunner( + dir=test_dir, dpdk_devbind=dpdk_devbind, verb=verb, dry_run=dryrun + ) + + os.system("> {}/configs/pcie.ini".format(test_dir)) + os.system("echo '[default]' >> {}/configs/pcie.ini".format(test_dir)) + + # Init PF devices + runner.init_pf() + + if args.unbind or args.dryrun_unbind: + # Unbind all devices bound to vfio-pci + runner.unbind_all() + else: + runner.enable_sriov() + # Bind event-dev + runner.bind_evtdev() + # Bind cpt-dev + runner.bind_cptdev() + # Bind lbk-dev + runner.bind_lbkdev() + # Bind inline-dev + runner.bind_inldev() diff --git a/ci/test/board/oct-target-setup.sh b/ci/test/board/oct-target-setup.sh new file mode 100755 index 0000000000..793167f2e1 --- /dev/null +++ b/ci/test/board/oct-target-setup.sh @@ -0,0 +1,232 @@ +#!/bin/bash -x +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +# Script syntax: +# oct-target-setup.sh +# +# Optional environment variables: +# HP How many hugepages of default size to enable. +# NOHP Flag disallowing hugepages allocation +# DEVS Space separated list of PCI devices to bind to VFIO. +# VFIO_DEVBIND Alternative location of oxk-devbind-basic.sh script. +# TARGET_BOARD Optional SSH URL for the target board to setup. If not given, +# all commands are run locally. If it is given the script is +# copied to REMOTE_DIR on the TARGET_BOARD and run from there. +# +# Below options are used only when TARGET_BOARD is set. +# +# TARGET_SSH_CMD ssh cmd used to connect to target. Default is "ssh" +# TARGET_SCP_CMD scp cmd used to connect to target. Default is "scp" +# REMOTE_DIR Directory where build dir is located on the remote target. +# It is used to find oxk-devbind-basic.sh script. +# SUDO This is used only when the command is to run as sudo on the +# remote target. Default set to "sudo" i.e. to run as SUDO. +# +# Script will: +# 1. Mount hugetlbfs and enable HP hugepages of default size. +# 2. Bind each PCI device using VFIO_DEVBIND script +# 3. Sets device configuration. + +set -euo pipefail +shopt -s extglob + +function get_cpu_string() { + local cpu_impl + local cpu_str + local cpu_pn + + cpu_pn=$(grep -m 1 'CPU part' /proc/cpuinfo | awk -F': ' '{print $2}') + cpu_impl=$(grep -m 1 'CPU implementer' /proc/cpuinfo | awk -F': ' '{print $2}') + + if [[ $cpu_impl == 0x43 ]] && [[ $cpu_pn == 0x0b1 ]]; then + cpu_str="98xx" + elif [[ $cpu_impl == 0x43 ]] && [[ $cpu_pn == 0x0b2 ]]; then + cpu_str="96xx" + elif [[ $cpu_impl == 0x43 ]] && [[ $cpu_pn == 0x0b4 ]]; then + cpu_str="95xx" + elif [[ $cpu_impl == 0x41 ]] && [[ $cpu_pn == 0xd49 ]]; then + cpu_str="cn10ka" + compatible=`cat /proc/device-tree/compatible` + IFS=',' read -ra list <<< "$compatible" + if [[ "${list[0]}" = "marvell" ]] + then + cpu_str=${list[1]} + fi + else + echo "Invalid CPU (Implementer=$cpu_impl Part Number=$cpu_pn" + exit 1 + fi + echo $cpu_str +} + +function mount_hugetlbfs() { + # Mount hugetlbfs. + if ! mount | grep -q hugepages; then + mount -t hugetlbfs none /dev/hugepages/ + fi +} + +function setup_hp() { + if [[ -n $NO_HP ]]; then + echo "Skipping huge page setup" + return + fi + # Enable HP hugepages. + echo $HP > /proc/sys/vm/nr_hugepages +} + +function setup_devices() { + local npa_pf + local sso_pf + local dma_pf + local dma_vf + local cpt_pf="" + local cpt_vf="" + local inl_pf + local devs + local nix_lbk_vfs + local nix_pfs + local pcid + + nix_lbk_vfs="0002:01:00.1 0002:01:00.2 0002:01:00.3" + devs=${DEVS:-$nix_lbk_vfs} + + if [[ $CPU == "cn10ka" ]] || [[ $CPU == "cn10kb" ]]; then + cpt_pf="0002:20:00.0" + cpt_vf="0002:20:00.1" + elif [[ $IS_CN9K -eq 1 ]]; then + cpt_pf="0002:10:00.0" + cpt_vf="0002:10:00.1" + fi + + # Set KVF Limits + if [[ $CPU == "cn10kb" ]]; then + echo 8 > /sys/bus/pci/devices/$cpt_pf/kvf_limits + else + echo 24 > /sys/bus/pci/devices/$cpt_pf/kvf_limits + fi + + # Disable existing VFs and enable CPT VFs + if [[ -e /sys/bus/pci/devices/$cpt_pf/sriov_numvfs ]]; then + echo 0 > /sys/bus/pci/devices/$cpt_pf/sriov_numvfs + echo 2 > /sys/bus/pci/devices/$cpt_pf/sriov_numvfs + devlink dev info pci/$cpt_pf + devs+=" $cpt_vf" + fi + + # SSO and NPA devices + sso_pf=${SSO_DEV:-$(lspci -d :a0f9 | tail -1 | awk '{ print $1 }')} + npa_pf=${NPA_DEV:-$(lspci -d :a0fb | tail -1 | awk '{ print $1 }')} + devs+=" $sso_pf" + devs+=" $npa_pf" + + # DMA device + dma_pf=$(lspci -d :a080 | tail -1 | awk '{ print $1 }') + if [[ -e /sys/bus/pci/devices/$dma_pf/sriov_numvfs ]]; then + echo 0 > /sys/bus/pci/devices/$dma_pf/sriov_numvfs + echo 1 > /sys/bus/pci/devices/$dma_pf/sriov_numvfs + dma_vf=$(lspci -d :a081 | tail -1 | awk '{ print $1 }') + devs+=" $dma_vf" + fi + + if [[ $CPU == "cn10ka" ]]; then + inl_pf=${INL_DEV:-$(lspci -d :a0f0 | tail -1 | awk '{ print $1 }')} + devs+=" $inl_pf" + fi + + if [[ $CPU == "cn10kb" ]]; then + nix_pfs=${ETH_DEV:-$(lspci -d :a063 | tail -1 | awk '{ print $1 }')} + devs+=" $nix_pfs" + inl_pf=${INL_DEV:-$(lspci -d :a0f0 | tail -1 | awk '{ print $1 }')} + devs+=" $inl_pf" + fi + + # Unbind all SSO devices first + for d in $(lspci -d :a0f9 | awk '{ print $1 }'); do + $VFIO_DEVBIND -u $d || exit 1 + done + + # Bind devices + for d in $devs; do + $VFIO_DEVBIND -b vfio-pci $d || exit 1 + done + + if [[ $IS_CN9K -eq 0 ]]; then + echo "Skipping limits configuration on 106xx" + return + fi + + # Configure limits + pcid="02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10" + if [[ $CPU == "98xx" ]]; then + pcid="$pcid 11 12 13 14 15 16 17 18" + fi + set +euo pipefail + for d in $pcid; do + path=/sys/bus/pci/devices/0002\:$d\:00.0/limits/ + if [[ -d $path ]]; then + echo 0 > ${path}ssow + echo 0 > ${path}sso + fi + done + + path=/sys/bus/pci/devices/$sso_pf/limits/ + if [[ ! -d $path ]]; then + set -euo pipefail + return + fi + echo 256 > ${path}sso + # Max number of available work slots are (2 x num_core) + 4. + # Max limit needs to be set for tests to run in dual workslot mode. + if [[ $CPU == "96xx" ]] || [[ $CPU == "95xx" ]]; then + echo 46 > ${path}ssow + elif [[ $CPU == "98xx" ]]; then + echo 76 > ${path}ssow + fi + echo 8 > ${path}tim + set -euo pipefail +} + +# Environment variables +NO_HP=${NO_HP:-} +HP=${HP:-8} + +if [[ -n ${TARGET_BOARD:-} ]]; then + # Run on remote by copying this script to the remote board + SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + SCRIPTNAME="$(basename $0)" + SUDO=${SUDO:-"sudo"} + TARGET_SSH_CMD=${TARGET_SSH_CMD:-"ssh"} + TARGET_SCP_CMD=${TARGET_SCP_CMD:-"scp"} + REMOTE_DIR=${REMOTE_DIR:-/tmp/vpp} + REMOTE_BUILD_DIR=${REMOTE_DIR}/build + $TARGET_SSH_CMD $TARGET_BOARD mkdir -p $REMOTE_DIR + $TARGET_SCP_CMD $SCRIPTPATH/$SCRIPTNAME $TARGET_BOARD:$REMOTE_DIR/oct-target-setup.sh + VFIO_DEVBIND=${VFIO_DEVBIND:-$REMOTE_BUILD_DIR/ci/test/board/oxk-devbind-basic.sh} + TARGET_EXPORTS="VFIO_DEVBIND=$VFIO_DEVBIND HP=$HP" + $TARGET_SSH_CMD $TARGET_BOARD \ + "$SUDO $TARGET_EXPORTS $REMOTE_DIR/oct-target-setup.sh" + exit 0 +fi + +VFIO_DEVBIND=${VFIO_DEVBIND:-$(command -v oxk-devbind-basic.sh)} +if [[ ! -x $VFIO_DEVBIND ]]; then + echo "VFIO_DEVBIND Invalid. Set VFIO_DEVBIND to a valid oxk-devbind-basic.sh script." + exit 1 +fi + +# Get CPU +CPU=$(get_cpu_string) +IS_CN9K=0 +if [[ "$CPU" == "98xx" ]] || [[ "$CPU" == "96xx" ]] || [[ "$CPU" == "cnf10ka" ]] +then + IS_CN9K=1 +fi + +mount_hugetlbfs +setup_hp +setup_devices + +set -euo pipefail diff --git a/ci/test/board/oxk-devbind-basic.sh b/ci/test/board/oxk-devbind-basic.sh new file mode 100755 index 0000000000..d2fc61aad9 --- /dev/null +++ b/ci/test/board/oxk-devbind-basic.sh @@ -0,0 +1,63 @@ +#! /bin/sh + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +HELP="usage: oxk-devbind-basic.sh [OPTIONS] devices.. +Script for binding/unbinding devices from Linux kernel drivers. +NOTE: Options -b and -u are exclusive. + OPTIONS: + -b driver - Bind given devices to a given driver + -u - Unbind devices from their driver + devices: Space separated List of DBDF addresses (i.e. 0001:02:00.1)" + +OPTS=$(getopt -u -n $0 -o "b:uh" -- $@) + +driver="" +unbind=0 + +eval set -- "$OPTS" + +while true; do + case "$1" in + -h) echo $HELP; exit 0 ;; + -b) driver=$2; shift 2 ;; + -u) unbind=1; shift ;; + --) shift; break ;; + *) echo "Unknown arguments"; echo $HELP; exit 1 ;; + esac +done + +if [ "x$driver" != "x" -a $unbind -eq 1 ]; then + echo "Cannot have -b and -u" + echo "$HELP" + exit 1 +fi + +if [ "x$driver" = "x" -a $unbind -eq 0 ]; then + echo "Please specify either -b or -u" + echo "$HELP" + exit 1 +fi + +for dbdf in $@; do + ddir="/sys/bus/pci/devices/$dbdf" + cur_drv="$(readlink -n $ddir/driver)" + if [ ! -z "$cur_drv" ]; then + cur_drv="$(basename $cur_drv)" + fi + # If user wants to bind and same driver is bound, skip the device + if [ $unbind -eq 0 -a "x$driver" = "x$cur_drv" ]; then + continue + fi + # Either user wanted to unbind or we have to unbind for re-binding + if [ -e $ddir/driver/unbind ]; then + echo $dbdf > "$ddir/driver/unbind" + fi + # If user specified -b then do try to bind + if [ "x$driver" != "x" ]; then + echo $driver > "$ddir/driver_override" + echo $dbdf > /sys/bus/pci/drivers_probe + fi +done diff --git a/ci/test/common/pcap/pcap.env b/ci/test/common/pcap/pcap.env new file mode 100644 index 0000000000..9ddd3da480 --- /dev/null +++ b/ci/test/common/pcap/pcap.env @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Copyright(C) 2025 Marvell. + +PCAP_UTILS_PATH=$(dirname $(readlink -f "${BASH_SOURCE[0]}")) + +if ! [ -e $PCAP_UTILS_PATH/pcap-pkt-cnt ]; then + gcc $PCAP_UTILS_PATH/pcap_len.c -lpcap -o $PCAP_UTILS_PATH/pcap-len + gcc $PCAP_UTILS_PATH/pcap_pkt_cnt.c -lpcap -o $PCAP_UTILS_PATH/pcap-pkt-cnt + gcc $PCAP_UTILS_PATH/pcap_mac.c -lpcap -o $PCAP_UTILS_PATH/pcap-mac +fi + +function pcap_packet_count() +{ + $PCAP_UTILS_PATH/pcap-pkt-cnt $1 +} + +function pcap_length() +{ + $PCAP_UTILS_PATH/pcap-len $1 +} + +function pcap_packet_dmac() +{ + $PCAP_UTILS_PATH/pcap-mac $1 "dst" +} + +function pcap_packet_smac() +{ + $PCAP_UTILS_PATH/pcap-mac $1 "src" +} diff --git a/ci/test/common/pcap/pcap_len.c b/ci/test/common/pcap/pcap_len.c new file mode 100644 index 0000000000..f500c7fc1d --- /dev/null +++ b/ci/test/common/pcap/pcap_len.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + unsigned int total_len = 0; + char errbuf[PCAP_ERRBUF_SIZE]; + struct pcap_pkthdr header; + const u_char *packet; + pcap_t *handle; + + if (argc < 2) + return -1; + + handle = pcap_open_offline (argv[1], errbuf); + + if (handle == NULL) + return -2; + + while ((packet = pcap_next (handle, &header))) + total_len += header.len; + + pcap_close (handle); + + printf ("%u\n", total_len); + return 0; +} diff --git a/ci/test/common/pcap/pcap_mac.c b/ci/test/common/pcap/pcap_mac.c new file mode 100644 index 0000000000..e1cdc57a86 --- /dev/null +++ b/ci/test/common/pcap/pcap_mac.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const struct ether_header *eth_hdr; + char errbuf[PCAP_ERRBUF_SIZE]; + struct pcap_pkthdr header; + const u_char *packet; + const u_char *ptr; + int i, smac = 0; + pcap_t *handle; + + if (argc < 2) + return -1; + + if (argc == 3) + { + if (strncmp (argv[2], "src", strlen ("src")) == 0) + smac = 1; + } + + handle = pcap_open_offline (argv[1], errbuf); + + if (handle == NULL) + return -2; + + while ((packet = pcap_next (handle, &header))) + { + eth_hdr = (const struct ether_header *) packet; + if (smac) + ptr = eth_hdr->ether_shost; + else + ptr = eth_hdr->ether_dhost; + + i = ETHER_ADDR_LEN; + do + { + printf ("%s%02x", (i == ETHER_ADDR_LEN) ? " " : ":", *ptr++); + } + while (--i > 0); + printf ("\n"); + } + + pcap_close (handle); + + return 0; +} diff --git a/ci/test/common/pcap/pcap_pkt_cnt.c b/ci/test/common/pcap/pcap_pkt_cnt.c new file mode 100644 index 0000000000..3ed60fae5b --- /dev/null +++ b/ci/test/common/pcap/pcap_pkt_cnt.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + unsigned int packet_count = 0; + char errbuf[PCAP_ERRBUF_SIZE]; + struct pcap_pkthdr header; + const u_char *packet; + pcap_t *handle; + + if (argc < 2) + return -1; + + handle = pcap_open_offline (argv[1], errbuf); + + if (handle == NULL) + return -2; + + while ((packet = pcap_next (handle, &header))) + packet_count++; + + pcap_close (handle); + + printf ("%u\n", packet_count); + return 0; +} diff --git a/ci/test/common/test_list_helper_funcs.sh b/ci/test/common/test_list_helper_funcs.sh new file mode 100644 index 0000000000..3b0a587946 --- /dev/null +++ b/ci/test/common/test_list_helper_funcs.sh @@ -0,0 +1,175 @@ +#!/bin/bash + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +# Functions required to manipulate the test.list file. + +TEST_LIST=$BUILD_ROOT/ci/test/test.list + +function get_test_name() +{ + local test_num=$1 + local num=1 + local info="LIST_END" + while read -r testinfo; do + if [[ $num == $test_num ]]; then + info=$testinfo + break + fi + num=$((num + 1)) + done <$TEST_LIST + echo $info | awk -F'#' '{print $1}' +} + +function get_test_info() +{ + local test_name=$1 + local name + local info="LIST_END" + while read -r testinfo; do + name=$(echo $testinfo | awk -F'#' '{print $1}') + if [[ $name == $test_name ]]; then + info=$testinfo + break + fi + done <$TEST_LIST + echo $info +} + +function get_test_exec_bin() +{ + echo "vpp" +} + +function get_test_dir() +{ + echo "ci/test/$1" +} + +function get_test_args() +{ + get_test_info $1 | awk -F'#' '{print $4}' +} + +function get_test_extra_args() +{ + local tst=$1 + local args= + + tst="${tst%% }" + IFS=$'\n' + for t in ${CMD_EXTRA_ARGS:-}; do + if [ "${t%,*}" == "$tst" ]; then + args=${t#*,} + break + fi + done + echo $args + IFS=' ' +} + +function get_test_env() +{ + get_test_info $1 | awk -F'#' '{print $5}' +} + +function get_test_timeout() +{ + local tmo=${DEFAULT_CMD_TIMEOUT:-5m} + local tst=$1 + + for t in ${CMD_TIMEOUTS:-}; do + if [ "${t%=*}" == "$tst" ]; then + tmo=${t#*=} + break + fi + done + echo $tmo +} + +function test_enabled() +{ + local test_num=$1 + local tst=$(get_test_name $test_num) + + if [[ $tst == LIST_END ]]; then + return 1 + fi + + echo -e "\n\n#################### Test $test_num: $tst ########################" + + # Check the SKIP_TESTS and RUN_TESTS and make sure that test need indeed be run + if [[ -n $RUN_TESTS ]]; then + if ! (echo "$RUN_TESTS" | grep -q "$tst"); then + echo "Skipping $tst as not on RUN_TESTS list !!" + echo "$test_num: $tst [RUN_TESTS]" >> $RUN_DIR/skip.list + return 77 + fi + elif $(echo "$SKIP_TESTS" | grep -qw "$tst"); then + echo "Skipping $tst on SKIP_TESTS list !!" + echo "$test_num: $tst [SKIP_TESTS]" >> $RUN_DIR/skip.list + return 77 + fi + + if [[ $test_num -lt ${START_TEST_NUM} ]] || [[ $test_num -gt ${END_TEST_NUM} ]]; then + echo "Skipping $tst as test num not within given test num range ($START_TEST_NUM-$END_TEST_NUM) !!" + echo "$test_num: $tst [TEST_NUM_OUT_OF_RANGE $START_TEST_NUM-$END_TEST_NUM]" >> $RUN_DIR/skip.list + return 77 + fi + + echo "$test_num: $tst" >> $RUN_DIR/run.list + return 0 +} + +function test_info_print() +{ + local name=$1 + local exec_bin + local args= + local defargs + local envs + local tmo + local cmd + local test_dir + local extra_args= + + exec_bin=$(get_test_exec_bin $name) + test_dir=$(get_test_dir $name) + defargs=$(get_test_args $name) + envs=$(get_test_env $name) + tmo=$(get_test_timeout $name) + cmd=$(get_test_command $name) + extra_args=$(get_test_extra_args $name) + echo "Test Binary/script -> $exec_bin" + echo "Test Timeout -> $tmo" + echo "Test Environment -> $envs" + echo "Test Directory -> $test_dir" + + # Remove unnecessary arguments from command line + echo "Default arguments -> '$defargs'" + eval set -- "$defargs" + while [[ $# -gt 0 ]]; do + case $1 in + -l) shift; shift;; + --no-huge) shift;; + -m) shift; shift;; + *) args="$args $1"; shift;; + esac + done + echo "Modified arguments -> '$args $extra_args'" + echo "Test Command -> $cmd" +} + +function get_test_command() +{ + local name=$1 + local cmd + local test_dir + + test_dir=$(get_test_dir $name) + + cmd="cd $REMOTE_BUILD_DIR/ci/test/$name && $TARGET_SUDO TARGET_BOARD=$TARGET_BOARD GENERATOR_BOARD=$GENERATOR_BOARD bash ${REMOTE_BUILD_DIR}/ci/test/$name/$name.sh" + echo "$cmd" +} diff --git a/ci/test/common/testpmd/common.env b/ci/test/common/testpmd/common.env new file mode 100644 index 0000000000..e2b5dec99e --- /dev/null +++ b/ci/test/common/testpmd/common.env @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Copyright(C) 2025 Marvell. + +TESTPMD_SCRIPT_PATH=$(dirname $(readlink -f "${BASH_SOURCE[0]}")) + +TESTPMD=$(which dpdk-testpmd) + +if [[ -z $TESTPMD ]]; then + echo "dpdk-testpmd not found !!" + exit 1 +fi + +function testpmd_cleanup() +{ + local prefix=$1 + + # Issue kill + ps -eo "pid,args" | grep testpmd | grep $prefix | \ + awk '{print $1}' | xargs -I[] -n1 kill -9 [] 2>/dev/null || true + + # Wait until the process is killed + while (ps -ef | grep testpmd | grep -q $prefix); do + continue + done +} + +function testpmd_prompt() +{ + local prefix=$1 + local refresh=${2:-} + local skip_bytes=${3:-} + local in=testpmd.in.$prefix + local out=testpmd.out.$prefix + + local cmd="tail -n1 $out" + + if [[ "$skip_bytes" != "" ]] + then + cmd="tail -c +$skip_bytes $out" + fi + + start_ts=`date +%s` + start_ts=$((start_ts + 60)) + while ! ($cmd | grep -q "^testpmd> $"); do + if [ "$refresh" == "yes" ] + then + sleep 0.01 + echo "" >>$in + fi + #Link change may break this logic, timeout loop to continue again + ts=`date +%s` + if (( $ts > $start_ts )) + then + break; + fi + continue; + done +} + +function testpmd_launch() +{ + local prefix=$1 + local eal_args=$2 + local testpmd_args=$3 + local out=testpmd.out.$prefix + local in=testpmd.in.$prefix + local unbuffer="stdbuf -o0" + + testpmd_cleanup $prefix + rm -f $out + rm -f $in + touch $in + tail -f $in | \ + ($unbuffer $TESTPMD $eal_args --file-prefix $prefix -- \ + $testpmd_args -i &>$out) & + # Wait till out file is created + while [[ ! -f $out ]]; do + continue + done + # Wait till testpmd prompt comes up + testpmd_prompt $prefix +} + +function testpmd_cmd() +{ + local prefix=$1 + local cmd=$2 + local in=testpmd.in.$prefix + local skip_bytes=$(stat -c %s testpmd.out.$prefix) + + echo "$cmd" >> $in + testpmd_prompt $prefix "no" $skip_bytes +} + +function testpmd_cmd_refresh() +{ + local prefix=$1 + local cmd=$2 + local in=testpmd.in.$prefix + + echo "$cmd" >> $in + testpmd_prompt $prefix "yes" +} + +function testpmd_quit() +{ + local prefix=$1 + local in=testpmd.in.$prefix + + echo "quit" >> $in + while (ps -ef | grep dpdk-testpmd | grep -q $prefix); do + continue + done +} + +function testpmd_port_stats() +{ + local prefix=$1 + local port=$2 + local in=testpmd.in.$prefix + local out=testpmd.out.$prefix + + echo "show port stats $port" >> $in + sleep 0.5 + testpmd_prompt $prefix + cat $out | tail -n10 | head -n4 +} + +function testpmd_port_rx_count() +{ + local stats=$(testpmd_port_stats $1 $2) + + echo $stats | awk '{print $2}' +} + +function testpmd_port_rx_bytes() +{ + local stats=$(testpmd_port_stats $1 $2) + + echo $stats | awk '{print $6}' +} + +function testpmd_port_tx_count() +{ + local stats=$(testpmd_port_stats $1 $2) + + echo $stats | awk '{print $12}' +} + +function testpmd_port_tx_bytes() +{ + local stats=$(testpmd_port_stats $1 $2) + + echo $stats | awk '{print $16}' +} + +function testpmd_log() +{ + local prefix=$1 + local out=testpmd.out.$prefix + cat $out +} + +function testpmd_log_off() +{ + local prefix=$1 + local offset=$2 + local out=testpmd.out.$prefix + + dd if=$out skip=$offset bs=1 status=none +} + +function testpmd_log_sz() +{ + local prefix=$1 + local out=testpmd.out.$prefix + + stat -c %s $out +} diff --git a/ci/test/common/testpmd/pktgen.env b/ci/test/common/testpmd/pktgen.env new file mode 100644 index 0000000000..2fac32a63b --- /dev/null +++ b/ci/test/common/testpmd/pktgen.env @@ -0,0 +1,90 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Copyright(C) 2025 Marvell. + +PKTGEN_SCRIPT_PATH=$(dirname $(readlink -f "${BASH_SOURCE[0]}")) + +source $PKTGEN_SCRIPT_PATH/common.env + +function pktgen_cleanup() +{ + testpmd_cleanup "pktgen" +} + +function pktgen_prompt() +{ + testpmd_prompt "pktgen" +} + +function pktgen_launch() +{ + local opts + local coremask="0x3" + local port="0002:01:00.1" + local pcapin=$PKTGEN_SCRIPT_PATH/../pcap/sample.pcap + local pcapout="out.pcap" + + if ! opts=$(getopt \ + -o "i:o:p:c:" \ + -l "in-pcap:,out-pcap:,port:,coremask:" \ + -- "$@"); then + echo "Failed to parse pktgen arguments" + exit 1 + fi + + eval set -- "$opts" + while [[ $# -gt 1 ]]; do + case $1 in + -i|--in-pcap) shift; pcapin=$1;; + -o|--out-pcap) shift; pcapout=$1;; + -p|--port) shift; port=$1;; + -c|--coremask) shift; coremask=$1;; + *) echo "Unknown pktgen argument"; exit 1;; + esac + shift + done + + testpmd_launch "pktgen" \ + "-c $coremask -a $port --vdev eth_pcap0,rx_pcap=$pcapin,tx_pcap=$pcapout" \ + "--port-topology=paired --portlist=0,1 --no-flush-rx" +} + +function pktgen_start() +{ + testpmd_cmd "pktgen" "start" +} + +function pktgen_quit() +{ + testpmd_quit "pktgen" +} + +function pktgen_stats() +{ + testpmd_port_stats "pktgen" "0" +} + +function pktgen_rx_count() +{ + testpmd_port_rx_count "pktgen" "0" +} + +function pktgen_rx_bytes() +{ + testpmd_port_rx_bytes "pktgen" "0" +} + +function pktgen_tx_count() +{ + testpmd_port_tx_count "pktgen" "0" +} + +function pktgen_tx_bytes() +{ + testpmd_port_tx_bytes "pktgen" "0" +} + +function pktgen_log() +{ + testpmd_log "pktgen" | awk '{print "PKTGEN# "$0}' +} diff --git a/ci/test/common/vpp/vpp.env b/ci/test/common/vpp/vpp.env new file mode 100644 index 0000000000..66cbd324bb --- /dev/null +++ b/ci/test/common/vpp/vpp.env @@ -0,0 +1,192 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Copyright(C) 2025 Marvell. + +VPP_SCRIPT_PATH=$(dirname $(readlink -f "${BASH_SOURCE[0]}")) +source $VPP_SCRIPT_PATH/../testpmd/common.env + +if [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-cnxk-aarch64/vpp/bin/vpp ]]; then + # This is running from build directory + VPP=$VPP_SCRIPT_PATH/../../../../build-root/install-cnxk-aarch64/vpp/bin/vpp + export LD_LIBRARY_PATH=$VPP_SCRIPT_PATH/../../../../build-root/install-cnxk-aarch64/vpp/lib +elif [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-cnxk_debug-aarch64/vpp/bin/vpp ]]; then + VPP=$VPP_SCRIPT_PATH/../../../../build-root/install-cnxk_debug-aarch64/vpp/bin/vpp + export LD_LIBRARY_PATH=$VPP_SCRIPT_PATH/../../../../build-root/install-cnxk_debug-aarch64/vpp/lib +elif [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-vpp-native/vpp/bin/vpp ]]; then + VPP=$VPP_SCRIPT_PATH/../../../../build-root/install-vpp-native/vpp/bin/vpp + export LD_LIBRARY_PATH=$VPP_SCRIPT_PATH/../../../../build-root/install-vpp-native/vpp/lib +elif [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-vpp_debug-native/vpp/bin/vpp ]]; then + VPP=$VPP_SCRIPT_PATH/../../../../build-root/install-vpp_debug-native/vpp/bin/vpp + export LD_LIBRARY_PATH=$VPP_SCRIPT_PATH/../../../../build-root/install-vpp_debug-native/vpp/lib +else + VPP=$(which vpp) +fi + +if [[ -z $VPP ]]; then + echo "vpp not found !!" + exit 1 +fi + +if [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-cnxk-aarch64/vpp/bin/vppctl ]]; then + # This is running from build directory + VPPCTL=$VPP_SCRIPT_PATH/../../../../build-root/install-cnxk-aarch64/vpp/bin/vppctl +elif [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-cnxk_debug-aarch64/vpp/bin/vppctl ]]; then + VPPCTL=$VPP_SCRIPT_PATH/../../../../build-root/install-cnxk_debug-aarch64/vpp/bin/vppctl +elif [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-vpp-native/vpp/bin/vppctl ]]; then + VPPCTL=$VPP_SCRIPT_PATH/../../../../build-root/install-vpp-native/vpp/bin/vppctl +elif [[ -f $VPP_SCRIPT_PATH/../../../../build-root/install-vpp_debug-native/vpp/bin/vppctl ]]; then + VPPCTL=$VPP_SCRIPT_PATH/../../../../build-root/install-vpp_debug-native/vpp/bin/vppctl +else + VPPCTL=$(which vppctl) +fi + +if [[ -z $VPPCTL ]]; then + echo "vppctl not found !!" + exit 1 +fi + +function vpp_cleanup() +{ + local startup_conf=$1 + + # Issue kill + ps -eo "pid,args" | grep "vpp \-c" | grep $startup_conf | \ + awk '{print $1}' | xargs -I[] -n1 kill -9 [] 2>/dev/null || true + + # Wait until the process is killed + while (ps -ef | grep "vpp \-c" | grep -q $startup_conf); do + continue + done +} + +function vpp_launch() +{ + local startup_conf=$1 + + vpp_cleanup $1 + $VPP -c $startup_conf.conf & >1.log + sleep 2 +} + +function vpp_exec_cmd() +{ + local startup_conf=$1 + local cmd=$2 + set +e + $VPPCTL -s /tmp/$startup_conf/cli.sock $cmd + set -e +} + +function vpp_exec_file() +{ + local startup_conf=$1 + local file=$2 + set +e + $VPPCTL -s /tmp/$startup_conf/cli.sock exec $file + set -e +} + +function vpp_start() +{ + local startup_conf=$1 + set +e + $VPPCTL -s /tmp/$startup_conf/cli.sock exec /tmp/$startup_conf/$startup_conf.exec + sleep 2 + $VPPCTL -s /tmp/$startup_conf/cli.sock clear interfaces + sleep 2 + set -e +} + +function vpp_add_trace() +{ + set +e + local startup_conf=$1 + local port=$2 + + $VPPCTL -s /tmp/$startup_conf/cli.sock trace add $2-rx 100 + set -e +} + +function vpp_show_trace() +{ + set +e + local startup_conf=$1 + + $VPPCTL -s /tmp/$startup_conf/cli.sock show trace > /tmp/$startup_conf/trace.log + cat /tmp/$startup_conf/trace.log + set -e +} + +function vpp_port_down() +{ + local startup_conf=$1 + local port=$2 + + set +e + $VPPCTL -s /tmp/$startup_conf/cli.sock set int state $2 down + set -e +} + +function vpp_stats_all() +{ + set +e + local test=$1 + echo $VPPCTL -s /tmp/$test/cli.sock show interface > /tmp/$test/stats_all.log + $VPPCTL -s /tmp/$test/cli.sock show interface > /tmp/$test/stats_all.log + cat /tmp/$test/stats_all.log + set -e +} + +function vpp_stats() +{ + set +e + local test=$1 + local port=$2 + echo $VPPCTL -s /tmp/$test/cli.sock show interface $port> /tmp/$test/stats.log + $VPPCTL -s /tmp/$test/cli.sock show interface $port> /tmp/$test/stats.log + cat /tmp/$test/stats.log + set -e +} + +function vpp_rx_count() +{ + set +e + $VPPCTL -s /tmp/$1/cli.sock show interface | grep "rx packets" | awk '{print $7}' | tr -d '\r' + set -e +} + +function vpp_rx_bytes() +{ + set +e + $VPPCTL -s /tmp/$1/cli.sock show interface | grep "rx bytes" | awk '{print $3}' | tr -d '\r' + set -e +} +function vpp_tx_count() +{ + set +e + $VPPCTL -s /tmp/$1/cli.sock show interface | grep "tx packets" | awk '{print $3}' | tr -d '\r' + set -e +} + +function vpp_tx_bytes() +{ + set +e + $VPPCTL -s /tmp/$1/cli.sock show interface | grep "tx bytes" | awk '{print $3}' | tr -d '\r' + set -e +} + +function vpp_log() +{ + set +e + local test=$1 + $VPPCTL -s /tmp/$test/cli.sock show log > /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show hardware-interfaces >> /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show device counters >> /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show run >> /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show error >> /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show int >> /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show ipsec sa detail >> /tmp/$test/vpp.log + $VPPCTL -s /tmp/$test/cli.sock show ip fib >> /tmp/$test/vpp.log + awk ' { print FILENAME": " $0 } ' /tmp/$test/vpp.log + set -e +} diff --git a/ci/test/env/cn10k-perf.env b/ci/test/env/cn10k-perf.env new file mode 100644 index 0000000000..9a76eb2a0d --- /dev/null +++ b/ci/test/env/cn10k-perf.env @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2024 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +source $PROJECT_ROOT/ci/test/env/cn10k.env + +# List of perf tests to be run. +RUN_TESTS=" + l3fwd_perf + inl_ipsec_perf +" + +# Update command timeout +CMD_TIMEOUTS="l3fwd_perf=10m inl_ipsec_perf=60m $CMD_TIMEOUTS" + +# Perf stage flag +PERF_STAGE=1 + +# Continue testing regardless of failure +CONTINUE_ON_FAILURE=1 + +# Export the path to this conf so that other scripts can source this conf. +export TEST_ENV_CONF=$PROJECT_ROOT/ci/test/env/cn10k-perf.env diff --git a/ci/test/env/cn10k.env b/ci/test/env/cn10k.env new file mode 100644 index 0000000000..9411149217 --- /dev/null +++ b/ci/test/env/cn10k.env @@ -0,0 +1,94 @@ +#!/bin/bash +# Copyright (c) 2024 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +# Directory to keep run specific data on build machine +RUN_DIR=${RUN_DIR:-$BUILD_DIR} + +# Test run command +TEST_RUN_CMD=$PROJECT_ROOT/ci/test/board/board_test_run.sh + +# Skip syncing build directory to target +SKIP_SYNC=${SKIP_SYNC:-} + +# Skip setting up target. Useful when repeatedly running tests. +SKIP_TARGET_SETUP=${SKIP_TARGET_SETUP:-} + +# Reboot the target if tests fail +REBOOT_ON_FAIL=${REBOOT_ON_FAIL:-} + +# Platform +PLAT=${PLAT:-"cn10k"} + +# Target board user@IP. The user is expected to have passwordless ssh. +TARGET_BOARD=${TARGET_BOARD:-root@127.0.0.1} + +# Generator board user@IP. The user is expected to have passwordless ssh. +GENERATOR_BOARD=${GENERATOR_BOARD:-} + +# Target directory where the VPP build is to be synced +REMOTE_DIR=${REMOTE_DIR:-/tmp/vpp} + +# Directory from where the tests will eventually run. +TARGET_RUN_DIR=${TARGET_RUN_DIR:-$REMOTE_DIR} + +# Dependency install directory on build machine +DEPS_INSTALL_DIR=${DEPS_INSTALL_DIR:-$BUILD_ROOT/build/deps} + +# Sudo command used when running the tests +TARGET_SUDO=sudo + +# SSH command used to access board +TARGET_SSH_CMD=${TARGET_SSH_CMD:-"ssh -o LogLevel=ERROR -o ServerAliveInterval=30 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"} + +# SCP command used to copy files on board +TARGET_SCP_CMD=${TARGET_SCP_CMD:-"scp -o LogLevel=ERROR -o ServerAliveInterval=30 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"} + +# Extra env to be set when running the tests on the target +EXTRA_TARGET_ENV=${EXTRA_TARGET_ENV:-} + +# Default timeout to be applied to the test commands +DEFAULT_CMD_TIMEOUT=${DEFAULT_CMD_TIMEOUT:-30m} + +# Test specific command timeouts. To be given as a string of test=timeout entries. +# Eg: CMD_TIMEOUTS="dump_memzone=20m dump_physmem=30m" +CMD_TIMEOUTS=${CMD_TIMEOUTS:-} + +# Additional arguments to be passed for specific tests. +CMD_EXTRA_ARGS=" +" + +# List of tests to be run. If list is empty all tests are run except those in SKIP_TESTS. +RUN_TESTS=${RUN_TESTS:-} + +# Flag to enable target setup needed for perf stage. +PERF_STAGE=${PERF_STAGE:-} + +# Continue testing regardless of failure +CONTINUE_ON_FAILURE=${CONTINUE_ON_FAILURE:-} + +# File to save status into +STATUS_OUTFILE=${STATUS_OUTFILE:-} + +FIXME_SKIP_TESTS=" +" + +DEFAULT_SKIP_TESTS=" + l3fwd_perf + inl_ipsec_perf + ${FIXME_SKIP_TESTS} +" + +# Tests to skipped. +SKIP_TESTS=${SKIP_TESTS:-$DEFAULT_SKIP_TESTS} + +# Run tests within this test num range +START_TEST_NUM=${START_TEST_NUM:-1} +END_TEST_NUM=${END_TEST_NUM:-999} + +# Functions required to work with test list file. +source $PROJECT_ROOT/ci/test/common/test_list_helper_funcs.sh + +# Export the path to this conf so that other scripts can source this conf. +export TEST_ENV_CONF=$PROJECT_ROOT/ci/test/env/cn10k.env diff --git a/ci/test/inl_ipsec/inl_ipsec.conf b/ci/test/inl_ipsec/inl_ipsec.conf new file mode 100644 index 0000000000..5a71174c31 --- /dev/null +++ b/ci/test/inl_ipsec/inl_ipsec.conf @@ -0,0 +1,66 @@ +unix { + log /tmp/inl_ipsec/vpp.log + cli-listen /tmp/inl_ipsec/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +devices { + dev pci/0002:20:00.1 + { + driver octeon + } + dev pci/0002:1d:00.0 + { + driver octeon + } + dev pci/0002:01:00.6 { + driver octeon + port 0 { + name eth0 + } + } + dev pci/0002:01:01.0 { + driver octeon + port 0 { + name eth1 + } + } +} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/inl_ipsec/inl_ipsec.exec b/ci/test/inl_ipsec/inl_ipsec.exec new file mode 100644 index 0000000000..0a279ffad4 --- /dev/null +++ b/ci/test/inl_ipsec/inl_ipsec.exec @@ -0,0 +1,21 @@ +set int ip address eth0 12.168.101.1/24 +set int state eth0 up + +set int ip address eth1 192.168.1.1/24 +set int state eth1 up + +set ipsec async mode on + +ipsec sa add 1 spi 1 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-cbc-128 integ-alg sha1-96 integ-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 tunnel src 1.1.1.1 dst 1.1.1.2 inbound +ipsec sa add 11 spi 101 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-cbc-128 integ-alg sha1-96 integ-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 tunnel src 1.1.1.2 dst 1.1.1.1 + +ipsec itf create +ipsec tunnel protect sa-out 11 sa-in 1 ipsec0 +set int state ipsec0 up + +ip route add 192.168.101.0/24 via ipsec0 +set ip neighbor eth0 1.1.1.1 00:16:3e:7e:94:9a + +ip route add 1.1.1.0/24 via eth0 +ip route add 192.168.1.0/24 via eth1 + diff --git a/ci/test/inl_ipsec/inl_ipsec.sh b/ci/test/inl_ipsec/inl_ipsec.sh new file mode 100755 index 0000000000..16984d524f --- /dev/null +++ b/ci/test/inl_ipsec/inl_ipsec.sh @@ -0,0 +1,327 @@ +#!/bin/bash +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +#set -e +set -euox pipefail + +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." + +source $OCTEONTESTPATH/common/vpp/vpp.env + +PKTLOSS_ALLOWED_P=0 + +if [[ -d /sys/bus/pci/drivers/octeontx2-nicvf ]]; then + NICVF="octeontx2-nicvf" +else + NICVF="rvu_nicvf" +fi + +function sig_handler() +{ + local status=$? + set +e + trap - ERR + trap - INT + trap - QUIT + trap - EXIT + if [[ $status -ne 0 ]]; then + echo "$1 Handler" + awk ' { print FILENAME": " $0 } ' $APP_FULL_LOG + awk ' { print FILENAME": " $0 } ' $APP_LOG + awk ' { print FILENAME": " $0 } ' $APP_RESULT + awk ' { print FILENAME": " $0 } ' $PING_LOG + fi + vpp_log inl_ipsec + vpp_cleanup inl_ipsec + ip netns exec vm0 ip xfrm state + ip netns exec vm0 ip xfrm state deleteall + ip netns exec vm0 ip xfrm policy deleteall + cleanup_interfaces + ps -eo "pid,args" | grep tcpdump | awk ' { print $1 }' | xargs -I[] -n1 kill -9 [] 2 >/dev/null || true + exit $status +} + +# Display results for ping test +function print_result() +{ + local na + local passed + local failed + local partial + + if [[ "$1" == "0%" ]]; then + echo -e "\tPASS: no packet loss" + echo -e "\t case "$2"\tpacket-size "$3"Bytes\tNo packet loss ----- PASSED" >> $APP_RESULT + #passed=$((passed + 1)) + set +e + elif [[ "$1" == "100%" ]]; then + echo -e "\t$1"" ERROR: packets loss" + echo -e "\t case "$2"\tpacket-size "$3"Bytes\t""$1"" packets loss ----- FAILED" >> $APP_RESULT + #failed=$((failed + 1)) + elif [[ -z "$1" ]]; then + echo -e "\tERROR: Unable to capture Results" + echo -e "\tcase "$2"\tunable to capture Results ----- N/A" >> $APP_RESULT + #na=$((na + 1)) + else + echo -e "\t$1"" packets loss" + echo -e "\t case "$2"\tpacket-size "$3"Bytes\t""$1"" packets loss ----- PARTIAL PASSED" >> $APP_RESULT +# partial=$((partial + 1)) + fi +} + +# Start ping packets from interfaces for all test cases. +# This would need reconfiguration of interfaces. +function start_ping_test() +{ + local errp + + echo -e "ping_pkts :-------updated ip: 192.168.$Y.2" + while [ $Y -le $MAX_Y ]; do + if [[ $1 == "1" ]]; then + if [[ $Y -gt 4 && $Y -lt 8 ]]; then + ((++Y)) + ((++X)) + continue + fi + fi + if [[ $1 == "2" ]] || [[ $1 == "3" ]]; then + if [[ $Y -gt 1 && $Y -lt 8 ]]; then + ((++Y)) + ((++X)) + continue + fi + fi + reconfigure_interfaces + ip netns exec vm0 ip xfrm state list + ip netns exec vm0 ip xfrm policy list + ip netns exec vm0 tcpdump -nexi enP2p1s0v4 >/tmp/inl_ipsec/4 & + ip netns exec vm0 tcpdump -nexi enP2p1s0v4:1 >/tmp/inl_ipsec/4.1 & + ip netns exec vm2 tcpdump -nexi enP2p1s0v6 >/tmp/inl_ipsec/6 & + for pkt_size in $PKT_LIST + do + local itr=0 + echo -e "ping_pkts :-------" "$pkt_size" "$Y" + + while [ $itr -le $PING_RETRY ]; do + ip netns exec vm0 ping 192.168.$Y.2 \ + -i $PKT_GAP -c $PING_PKTS -s \ + $pkt_size $PING_ARGS | tee -a $PING_LOG + + RESULT=`tail -n 3 $PING_LOG | \ + grep -o "\w*\.\w*%\|\w*%"` + + print_result "$RESULT" "$Y" "$pkt_size" + errp=$(echo $RESULT | cut -c 1) + + # Break if success + check=$(echo "$errp <= $PKTLOSS_ALLOWED_P" | bc) + if [ $check -eq 1 ]; then break; fi + ((++itr)) + done + # Wait until the process is killed +# while (ps -ef | grep ping); do +# continue +# done + + if (( $(echo "$errp > $PKTLOSS_ALLOWED_P" | bc) )); then + echo -e "Test Failed as packets loss $RESULT > $PKTLOSS_ALLOWED_P%" + vpp_cleanup inl_ipsec + interfaces_cleanup + exit 1 + fi + done + interfaces_cleanup + ((++Y)) + ((++X)) + done +} + +function run_test() +{ + echo "Starting ping test" | tee $PING_LOG + start_ping_test $1 +} + +function run_inline_ipsec() +{ + X=101 + Y=1 + echo -e "" + echo -e "Inline protocol IPsec" + echo -e "---------------------" + rm -rf /tmp/inl_ipsec + mkdir -p /tmp/inl_ipsec + cp inl_ipsec.exec /tmp/inl_ipsec + vpp_launch inl_ipsec + vpp_start inl_ipsec + + sleep 2 + run_test 2 +} + +#configure vm0 +function configure_vm0() +{ + ip netns exec vm0 ip addr add 192.168.$X.2/24 dev $LBK1 + ip netns exec vm0 ip addr add 1.1.$Y.1/24 dev $LBK1:1 + ip netns exec vm0 ip link set $LBK1 mtu 9000 + ip netns exec vm0 ip link set $LBK1 up + ip netns exec vm0 ip link set $LBK1 address $VM0_MAC + ip netns exec vm0 ip route add 192.168.$Y.0/24 via 192.168.$X.1 + ip netns exec vm0 arp -s 192.168.$X.1 $VM0_MAC + ip netns exec vm0 arp -s 1.1.$Y.2 $VM0_MAC + ip netns exec vm0 ip xfrm state add src 1.1.$Y.1 dst 1.1.$Y.2 proto esp spi $Y reqid 0 mode tunnel ${CASE[$Y]} + ip netns exec vm0 ip xfrm state add src 1.1.$Y.2 dst 1.1.$Y.1 proto esp spi $X reqid 0 mode tunnel ${CASE[$Y]} + ip netns exec vm0 ip xfrm policy add src 192.168.$X.2 dst 192.168.$Y.2 dir out tmpl src 1.1.$Y.1 dst 1.1.$Y.2 proto esp spi $Y reqid 0 mode tunnel + ip netns exec vm0 ip xfrm policy add src 192.168.$Y.2 dst 192.168.$X.2 dir in tmpl src 1.1.$Y.2 dst 1.1.$Y.1 proto esp spi $X reqid 0 mode tunnel +} + +#configure vm2 +function configure_vm2() +{ + ip netns exec vm2 ip addr add 192.168.$Y.2/24 dev $LBK3 + ip netns exec vm2 ip link set $LBK3 mtu 9000 + ip netns exec vm2 ip link set $LBK3 up + ip netns exec vm2 ip link set lo up + ip netns exec vm2 ip link set $LBK3 address $VM2_MAC + ip netns exec vm2 ip route add 192.168.$X.0/24 via 192.168.$Y.1 + ip netns exec vm2 arp -s 192.168.$Y.1 $VM2_MAC +} + +# Configure interfaces +function setup_interfaces() +{ + echo -e "Create namespaces" + ip netns add vm0 + ip netns add vm2 + + echo -e "dev bind $LIF1 $LIF2 $LIF3 $LIF4" + $VFIO_DEVBIND -b $NICVF $LIF1 + #$VFIO_DEVBIND -b $NICVF $LIF2 + $VFIO_DEVBIND -b $NICVF $LIF3 + #$VFIO_DEVBIND -b $NICVF $LIF4 + + echo -e "Bind LBK devices required to act as LBK pairs b/w VPP and Linux" + $VFIO_DEVBIND -b vfio-pci $LIF2 + $VFIO_DEVBIND -b vfio-pci $LIF4 + + $VFIO_DEVBIND -b vfio-pci $INLINE_DEV + + LBK1=`ls /sys/bus/pci/devices/$LIF1/net/` + LBK3=`ls /sys/bus/pci/devices/$LIF3/net/` + + echo -e "Add devices in namespaces $LBK1 $LBK3" + ip link set dev $LBK1 netns vm0 + ip link set dev $LBK3 netns vm2 +} + +function interfaces_cleanup() +{ + echo -e "\ninterfaces_cleanup" + ip netns exec vm0 ip xfrm state + ip netns exec vm0 ip xfrm state deleteall + ip netns exec vm0 ip xfrm policy deleteall + ip netns exec vm0 arp -d 192.168.$X.1 + ip netns exec vm0 arp -d 1.1.$Y.2 + ip netns exec vm0 ip route del 192.168.$Y.0/24 + ip netns exec vm0 ip addr del 192.168.$X.2/24 dev $LBK1 + ip netns exec vm0 ip addr del 1.1.$Y.1/24 dev $LBK1:1 + ip netns exec vm0 ip link set $LBK1 down + + ip netns exec vm2 ip route del 192.168.$X.0/24 + ip netns exec vm2 arp -d 192.168.$Y.1 + ip netns exec vm2 ip addr del 192.168.$Y.2/24 dev $LBK3 + ip netns exec vm2 ip link set $LBK3 down + ip netns exec vm2 ip link set lo down +} + +function reconfigure_interfaces() +{ + echo -e "\nreconfigure_interfaces" + + # Configure vm0 + configure_vm0 + # Configure vm2 + configure_vm2 +} + +function cleanup_interfaces() +{ + ip netns del vm0 + ip netns del vm2 + + # Bind the LIF2 device back to nicvf + $VFIO_DEVBIND -b $NICVF $LIF2 + $VFIO_DEVBIND -b $NICVF $LIF4 +} + +function main() +{ + setup_interfaces + run_inline_ipsec +} + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT +trap "sig_handler QUIT" QUIT +trap "sig_handler EXIT" EXIT + +# script's starting point +CASE=( + "" + "enc cbc(aes) 0xa0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 auth sha1 0xa0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0" + "enc cbc(aes) 0xa0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 auth-trunc sha256 0xa0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 128" +) + +PING_RETRY=1 +PING_PKTS=320 +PKT_LIST="64 380 1410 4000 8000" +PKT_GAP="0.001" +PING_ARGS="" + +#VM0_MAC and VM2_MAC are taken from hard coded destination MAC addresses of ipsec-secgw app +VM0_MAC=00:16:3e:7e:94:9a +VM2_MAC=00:16:3e:22:a1:d9 + +LIF1=0002:01:00.5 +LIF2=0002:01:00.6 +LIF3=0002:01:00.7 +LIF4=0002:01:01.0 + +CDEV_PF=$(lspci -d :a0fd | head -1 | awk '{ print $1 }') +CDEV_VF=$(lspci -d :a0fe | head -1 | awk '{ print $1 }') +if [ -z "$CDEV_PF" ] +then + CDEV_PF=$(lspci -d :a0f2 | head -1 | awk '{ print $1 }') + CDEV_VF=$(lspci -d :a0f3 | head -1 | awk '{ print $1 }') + if [ -z "$CDEV_PF" ] + then + echo "Error: CPTPF not found" + exit 1; + fi +fi + +INLINE_DEV=0002:1d:00.0 + +LBK1="" +LBK3="" + +APP_LOG=app.log +APP_FULL_LOG=app_full.log +APP_RESULT=app_result.log +PING_LOG=ping.log +VFIO_DEVBIND="$OCTEONTESTPATH/board/oxk-devbind-basic.sh" +if ! [[ -f $VFIO_DEVBIND ]] +then +VFIO_DEVBIND=$(which oxk-devbind-basic.sh) +fi + +rm -f $APP_LOG $APP_FULL_LOG $APP_RESULT $PING_LOG + +MAX_X=110 +MAX_Y=2 + +main +exit 0 diff --git a/ci/test/inl_ipsec_perf/aes_cbc_sha1_hmac.cfg b/ci/test/inl_ipsec_perf/aes_cbc_sha1_hmac.cfg new file mode 100644 index 0000000000..5f92996661 --- /dev/null +++ b/ci/test/inl_ipsec_perf/aes_cbc_sha1_hmac.cfg @@ -0,0 +1,21 @@ +set int mac address eth0 00:01:02:03:04:01 +set int ip address eth0 12.168.101.1/24 +set int state eth0 up + +set int ip address eth1 13.168.1.1/24 +set int state eth1 up + +set ipsec async mode on + +#ipsec sa add 1 spi 1 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-cbc-128 integ-alg sha1-96 integ-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 tunnel src 1.1.1.1 dst 1.1.1.2 inbound +#ipsec sa add 11 spi 101 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-cbc-128 integ-alg sha1-96 integ-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 tunnel src 1.1.1.2 dst 1.1.1.1 +# +#ipsec itf create +#ipsec tunnel protect sa-out 11 sa-in 1 ipsec0 +#set int state ipsec0 up +# +#ip route add 192.168.1.2/24 via ipsec0 +#set ip neighbor eth0 1.1.1.1 00:16:3e:7e:94:9a +# +#ip route add 1.1.1.0/24 via eth0 +# diff --git a/ci/test/inl_ipsec_perf/aes_cbc_sha1_hmac_ib.cfg b/ci/test/inl_ipsec_perf/aes_cbc_sha1_hmac_ib.cfg new file mode 100644 index 0000000000..7ad9d59606 --- /dev/null +++ b/ci/test/inl_ipsec_perf/aes_cbc_sha1_hmac_ib.cfg @@ -0,0 +1,19 @@ +set int ip address eth0 12.168.101.1/24 +set int mac address eth0 00:16:3e:22:a1:d9 +set int state eth0 up + +set int ip address eth1 13.168.1.1/24 +set int state eth1 up + +set ipsec async mode on + +#ipsec sa add 1 spi 1 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-cbc-128 integ-alg sha1-96 integ-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 tunnel src 1.1.1.1 dst 1.1.1.2 inbound +#ipsec sa add 11 spi 101 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-cbc-128 integ-alg sha1-96 integ-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 tunnel src 1.1.1.2 dst 1.1.1.1 +# +#ipsec itf create +#ipsec tunnel protect sa-out 11 sa-in 1 ipsec0 +#set int state ipsec0 up +# +#ip route add 192.168.1.2/24 via eth0 +#set ip neighbor eth0 192.168.1.2 00:16:3e:7e:94:9a +# diff --git a/ci/test/inl_ipsec_perf/aes_gcm.cfg b/ci/test/inl_ipsec_perf/aes_gcm.cfg new file mode 100644 index 0000000000..d089248e3b --- /dev/null +++ b/ci/test/inl_ipsec_perf/aes_gcm.cfg @@ -0,0 +1,21 @@ +set int mac address eth0 00:01:02:03:04:01 +set int ip address eth0 12.168.101.1/24 +set int state eth0 up + +set int ip address eth1 13.168.1.1/24 +set int state eth1 up + +set ipsec async mode on + +#ipsec sa add 1 spi 1 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-gcm-128 tunnel src 1.1.1.1 dst 1.1.1.2 inbound +#ipsec sa add 11 spi 101 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-gcm-128 tunnel src 1.1.1.2 dst 1.1.1.1 +# +#ipsec itf create +#ipsec tunnel protect sa-out 11 sa-in 1 ipsec0 +#set int state ipsec0 up +# +#ip route add 192.168.2.2/24 via ipsec0 +#set ip neighbor eth0 1.1.1.1 00:16:3e:7e:94:9a +# +#ip route add 1.1.1.0/24 via eth0 +# diff --git a/ci/test/inl_ipsec_perf/aes_gcm_ib.cfg b/ci/test/inl_ipsec_perf/aes_gcm_ib.cfg new file mode 100644 index 0000000000..8582cd473d --- /dev/null +++ b/ci/test/inl_ipsec_perf/aes_gcm_ib.cfg @@ -0,0 +1,18 @@ +set int ip address eth0 12.168.101.1/24 +set int mac address eth0 00:16:3e:22:a1:d9 +set int state eth0 up + +set int ip address eth1 13.168.1.1/24 +set int state eth1 up + +set ipsec async mode on + +#ipsec sa add 2 spi 2 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-gcm-128 tunnel src 1.1.2.1 dst 1.1.2.2 inbound +#ipsec sa add 11 spi 101 esp crypto-key a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0 crypto-alg aes-gcm-128 tunnel src 1.1.2.2 dst 1.1.2.1 +# +#ipsec itf create +#ipsec tunnel protect sa-out 11 sa-in 2 ipsec0 +#set int state ipsec0 up +# +#ip route add 192.168.2.2/24 via eth0 +#set ip neighbor eth0 192.168.2.2 00:16:3e:7e:94:9a diff --git a/ci/test/inl_ipsec_perf/inl_ipsec_gen.sh b/ci/test/inl_ipsec_perf/inl_ipsec_gen.sh new file mode 100755 index 0000000000..7992f770d3 --- /dev/null +++ b/ci/test/inl_ipsec_perf/inl_ipsec_gen.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (C) 2022 Marvell. + +set -eou pipefail +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." +source $OCTEONTESTPATH/common/testpmd/common.env + +TX_PRFX="tpmd_tx" +RX_PRFX="tpmd_rx" +PORT0="${PORT0:-0002:02:00.0}" +PORT1="${PORT1:-0002:03:00.0}" + +function sig_handler() +{ + local status=$? + set +e + trap - ERR + trap - INT + if [[ $status -ne 0 ]]; then + echo "$1 Handler" + # Dump error logs + testpmd_log $TX_PRFX + testpmd_log $RX_PRFX + fi + + testpmd_cleanup $TX_PRFX + testpmd_cleanup $RX_PRFX + exit $status +} + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT + +launch_testpmd_rx() +{ + #local out=testpmd.out.$1 + testpmd_launch $1 \ + "-c 0x700 -a $PORT1,disable_xqe_drop=1" \ + "--nb-cores=1 --forward-mode=rxonly" \ + /dev/null & + sleep 1 + testpmd_cmd $1 "port stop 0" + testpmd_cmd $1 "set flow_ctrl rx off 0" + testpmd_cmd $1 "set flow_ctrl tx off 0" + testpmd_cmd $1 "port start 0" +} + +launch_testpmd_tx_outb() +{ + testpmd_launch $1 \ + "-c 0xF800 -a $PORT0,disable_xqe_drop=1" \ + "--nb-cores=3 --forward-mode=flowgen --flowgen-flows=$2 --txq=3 --rxq=3 --eth-peer=0,00:01:02:03:04:01" \ + /dev/null & + sleep 1 + testpmd_cmd $1 "port stop 0" + testpmd_cmd $1 "set flow_ctrl rx off 0" + testpmd_cmd $1 "set flow_ctrl tx off 0" + testpmd_cmd $1 "port start 0" +} + +launch_testpmd_tx_inb() +{ + testpmd_launch $1 \ + "-c 0x3F800 --vdev net_pcap0,rx_pcap=$2,rx_pcap=$2,rx_pcap=$2,rx_pcap=$2,rx_pcap=$2,infinite_rx=1 -a $PORT0,disable_xqe_drop=1" \ + "--nb-cores=5 --txq=5 --rxq=5 --no-flush-rx --eth-peer=0,00:01:02:03:04:01" \ + /dev/null & + sleep 1 + testpmd_cmd $1 "port stop 0" + testpmd_cmd $1 "set flow_ctrl rx off 0" + testpmd_cmd $1 "set flow_ctrl tx off 0" + testpmd_cmd $1 "port start 0" +} + +case $TESTPMD_OP in + launch_tx_outb) + launch_testpmd_tx_outb $1 $2 + ;; + launch_tx_inb) + launch_testpmd_tx_inb $1 $2 + ;; + launch_rx) + launch_testpmd_rx $1 + ;; + start) + testpmd_cmd $1 "start tx_first 64" + testpmd_cmd $1 "show port stats all" + ;; + stop) + testpmd_cmd $1 "stop" + ;; + rx_pps) + prev=$(testpmd_log_sz $1) + curr=$prev + testpmd_cmd $1 "show port stats $2" + + while [ $prev -eq $curr ]; do sleep 0.1; curr=$(testpmd_log_sz $1); done + testpmd_prompt $1 + val=`testpmd_log $1 | tail -4 | grep -ao 'Rx-pps: .*' | \ + awk '{print $2}'` + echo $val + ;; + pktsize) + testpmd_cmd $1 "set txpkts $2" + ;; + quit) + testpmd_quit $1 + ;; + log) + testpmd_log $1 + ;; +esac +exit 0 diff --git a/ci/test/inl_ipsec_perf/inl_ipsec_perf.conf b/ci/test/inl_ipsec_perf/inl_ipsec_perf.conf new file mode 100644 index 0000000000..99b4521193 --- /dev/null +++ b/ci/test/inl_ipsec_perf/inl_ipsec_perf.conf @@ -0,0 +1,55 @@ +unix { + log /tmp/inl_ipsec_perf/vpp.log + cli-listen /tmp/inl_ipsec_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +devices { + dev pci/0002:20:00.1 + { + driver octeon + } + dev pci/0002:1d:00.0 + { + driver octeon + } +} + +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/inl_ipsec_perf/inl_ipsec_perf.sh b/ci/test/inl_ipsec_perf/inl_ipsec_perf.sh new file mode 100755 index 0000000000..510f748091 --- /dev/null +++ b/ci/test/inl_ipsec_perf/inl_ipsec_perf.sh @@ -0,0 +1,867 @@ +#!/bin/bash +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +#set -e +set -euox pipefail + +GENERATOR_BOARD=${GENERATOR_BOARD:-} +REMOTE_DIR=${REMOTE_DIR:-$(pwd | cut -d/ -f 1-3)} +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." +PKT_LIST="64 380 1410" +NUM_CAPTURE=3 +MAX_TRY_CNT=5 +CORES=(1 2 4) +COREMASK="0x10000" +TXWAIT=15 +RXWAIT=5 +WS=2 +IS_RXPPS_TXTPMD=0 +TARGET_SSH_CMD=${TARGET_SSH_CMD:-"ssh -o LogLevel=ERROR -o ServerAliveInterval=30 \ + -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"} +TARGET_SSH_CMD="$TARGET_SSH_CMD -n" +GENERATOR_SCRIPT=${GENERATOR_SCRIPT:-inl_ipsec_gen.sh} +WITH_GEN_BOARD=0 + +source $OCTEONTESTPATH/common/testpmd/pktgen.env +source $OCTEONTESTPATH/common/testpmd/common.env +source $OCTEONTESTPATH/common/vpp/vpp.env + +TPMD_RX_PREFIX="tpmd_rx" +TPMD_TX_PREFIX="tpmd_tx" + +declare -i SCLK +declare -i RCLK +declare -i CPTCLK +declare -A PASS_PPS_TABLE +SUDO="sudo" + +! $(cat /proc/device-tree/compatible | grep -q "cn10k") +IS_CN10K=$? +DTC=$(tr -d '\0' > $in + + while [ $prev -eq $curr ]; do sleep 0.1; curr=$(testpmd_log_sz $prefix); done + testpmd_prompt $prefix + done + fi +} + +function rx_stats() +{ + local prefix=$1 + local port=$2 + local in=testpmd.in.$prefix + local out=testpmd.out.$prefix + + if [[ $WITH_GEN_BOARD -eq 1 ]]; then + rxpps=$(exec_testpmd_cmd_gen "rx_pps" $prefix $port) + echo $rxpps + else + prev=$(testpmd_log_sz $prefix) + curr=$prev + + echo "show port stats $port" >> $in + while [ $prev -eq $curr ]; do sleep 0.1; curr=$(testpmd_log_sz $prefix); done + testpmd_prompt $prefix + cat $out | tail -n4 | head -n1 + fi +} + +function capture_rx_pps() +{ + local stats + if [[ $IS_RXPPS_TXTPMD -ne 0 ]]; then + # Specific case of Inline Protocol Single-SA configuration. + # Packets are routed back to originating port. + stats=$(rx_stats $TPMD_TX_PREFIX "0") + else + stats=$(rx_stats $TPMD_RX_PREFIX "0") + fi + + if [[ $WITH_GEN_BOARD -eq 1 ]]; then + echo $stats + else + echo $stats | awk '{print $2}' + fi +} + +# Configure interfaces +function setup_interfaces() +{ + echo -e "dev bind $LIF1 $LIF2 $LIF3 $LIF4" + + $VFIO_DEVBIND -b vfio-pci $LIF1 + $VFIO_DEVBIND -b vfio-pci $LIF2 + $VFIO_DEVBIND -b vfio-pci $LIF3 + $VFIO_DEVBIND -b vfio-pci $LIF4 +} + +function cleanup_interfaces() +{ + # Bind the vfio-pci binded devices back to nicvf + $VFIO_DEVBIND -b $NICVF $LIF1 + $VFIO_DEVBIND -b $NICVF $LIF2 + $VFIO_DEVBIND -b $NICVF $LIF3 + $VFIO_DEVBIND -b $NICVF $LIF4 +} + +function start_testpmd() +{ + if [[ $WITH_GEN_BOARD -eq 1 ]]; then + exec_testpmd_cmd_gen "start" $TPMD_TX_PREFIX "NOP" + else + testpmd_cmd "$TPMD_RX_PREFIX" "start" + testpmd_cmd "$TPMD_TX_PREFIX" "start" + fi +} + +function stop_testpmd() +{ + if [[ $WITH_GEN_BOARD -eq 1 ]]; then + exec_testpmd_cmd_gen "stop" $TPMD_TX_PREFIX "NOP" + else + testpmd_cmd "$TPMD_TX_PREFIX" "stop" + testpmd_cmd "$TPMD_RX_PREFIX" "stop" + fi +} + +function set_pktsize_testpmd() +{ + if [[ $WITH_GEN_BOARD -eq 1 ]]; then + exec_testpmd_cmd_gen "pktsize" "$TPMD_TX_PREFIX" $1 + else + testpmd_cmd "$TPMD_TX_PREFIX" "set txpkts $1" + fi +} + +function quit_testpmd() +{ + if [[ $WITH_GEN_BOARD -eq 1 ]]; then + if [[ $1 == $TPMD_TX_PREFIX ]]; then + exec_testpmd_cmd_gen "log" $1 "NOP" >testpmd.out.$1 + exec_testpmd_cmd_gen "quit" $1 "NOP" + fi + else + testpmd_quit $1 + fi +} + +function outb_perf() +{ + local rx_pps + local avg_pps + local pktsz + local tcnt + local algo + local rn + local i + + [[ $X = 1 ]] && algo="aes-cbc_sha1-hmac" || algo="aes-gcm" + + rn=0 + for pktsz in ${PKT_LIST[@]} + do + set_pktsize_testpmd $pktsz + + tcnt=1 + while [ $tcnt -le $MAX_TRY_CNT ]; do + echo "Try $tcnt" + i=1 + rx_pps=0 + if [[ $tcnt -gt 1 ]]; then + # Restart vpp + vpp_log inl_ipsec_perf + vpp_stats_all inl_ipsec_perf + vpp_cleanup inl_ipsec_perf + echo "Restart vpp" + run_vpp_ipsec $3 + fi + start_testpmd + pmd_rx_dry_run + # Wait for few seconds for traffic to stabilize + sleep $TXWAIT + while [ $i -le $NUM_CAPTURE ]; do + rx_pps=$rx_pps+$(capture_rx_pps) + ((++i)) + sleep $RXWAIT + done + stop_testpmd + avg_pps=$(echo "(($rx_pps) / $NUM_CAPTURE)" | bc) + p=${PASS_PPS_TABLE[$rn,$2]} + echo "pktsize: $pktsz avg_pps: $avg_pps" + echo "pass_pps $p" + if (( $(echo "$avg_pps < $p" | bc) )); then + echo "$1:Low numbers for packet size $pktsz " \ + "($avg_pps < $p) for $3 cores">&2 + else + echo "Test Passed" + break + fi + ((++tcnt)) + sleep $WS + done + if [[ $tcnt -gt $MAX_TRY_CNT ]]; then + echo "Test Failed" + Failed_tests="$Failed_tests \"${TN[$Y]} outbound $algo pktsize:$pktsz num_cores:$3\"" + fi + ((++rn)) + done +} + +function inb_perf() +{ + local rx_pps + local avg_pps + local pktsz + local tcnt + local algo + local rn + local i + + [[ $X = 1 ]] && algo="aes-cbc_sha1-hmac" || algo="aes-gcm" + + rn=0 + for pktsz in ${PKT_LIST[@]} + do + sleep $WS + pmd_tx_launch_for_inb $1 $pktsz $3 + + tcnt=1 + while [ $tcnt -le $MAX_TRY_CNT ]; do + echo "Try $tcnt" + i=1 + rx_pps=0 + if [[ $tcnt -gt 1 ]]; then + # Restart vpp + vpp_log inl_ipsec_perf + vpp_stats_all inl_ipsec_perf + vpp_cleanup inl_ipsec_perf + echo "Restart vpp" + run_vpp_ipsec_inb $3 + fi + start_testpmd + pmd_rx_dry_run + # Wait for few seconds for traffic to stabilize + sleep $TXWAIT + while [ $i -le $NUM_CAPTURE ]; do + rx_pps=$rx_pps+$(capture_rx_pps) + ((++i)) + sleep $RXWAIT + done + stop_testpmd + avg_pps=$(echo "(($rx_pps) / $NUM_CAPTURE)" | bc) + p=${PASS_PPS_TABLE[$rn,$2]} + echo "pktsize: $pktsz avg_pps: $avg_pps" + echo "pass_pps $p" + if (( $(echo "$avg_pps < $p" | bc) )); then + echo "$1:Low numbers for packet size $pktsz " \ + "($avg_pps < $p) for $3 cores">&2 + else + echo "Test Passed" + quit_testpmd "$TPMD_TX_PREFIX" + break + fi + ((++tcnt)) + sleep $WS + done + if [[ $tcnt -gt $MAX_TRY_CNT ]]; then + echo "Test Failed" + quit_testpmd "$TPMD_TX_PREFIX" + Failed_tests="$Failed_tests \"${TN[$Y]} inbound $algo pktsize:$pktsz num_cores:$3\"" + fi + ((++rn)) + done +} + +function get_ref_mops() +{ + local ref_mops + ref_mops=$(awk -v pat=$1 '$0~pat','/end/' \ + $FPATH.$3 | grep $2: | tr -s ' ') + echo $ref_mops +} + +function populate_pass_mops() +{ + local rn=0 + local cn + + for i in ${PKT_LIST[@]} + do + cn=0 + ref_mops=$(get_ref_mops $1 $i $2) + for j in ${CORES[@]} + do + tmp=$(( $cn + 2 )) + ref_n=$(echo "$ref_mops" | cut -d " " -f $tmp) + PASS_PPS_TABLE[$rn,$cn]=$(echo "($ref_n * .97)" | bc) + ((++cn)) + done + ((++rn)) + done +} + +function aes_cbc_sha1_hmac_outb() +{ + local cipher="aes-cbc" + local auth="sha1-hmac" + local algo_str="${cipher}_${auth}" + + echo "Outbound Perf Test: $algo_str" + populate_pass_mops $algo_str "${TYPE[$Y]}.outb" + + outb_perf $algo_str $1 $2 +} + +function aes_cbc_sha1_hmac_inb() +{ + local cipher="aes-cbc" + local auth="sha1-hmac" + local algo_str="${cipher}_${auth}" + local cn + + echo "Inbound Perf Test: $algo_str" + populate_pass_mops $algo_str "${TYPE[$Y]}.inb" + + inb_perf $algo_str $1 $2 +} + +function aes_gcm_outb() +{ + local cipher="aes-gcm" + local algo_str="${cipher}" + local cn + + echo "Outbound Perf Test: $algo_str" + populate_pass_mops $algo_str "${TYPE[$Y]}.outb" + + outb_perf $algo_str $1 $2 +} + +function aes_gcm_inb() +{ + local cipher="aes-gcm" + local algo_str="${cipher}" + local cn + + echo "Inbound Perf Test: $algo_str" + populate_pass_mops $algo_str "${TYPE[$Y]}.inb" + + inb_perf $algo_str $1 $2 +} + +get_system_info + +if [[ $IS_CN10K -ne 0 ]]; then + FNAME="rclk"${RCLK}"_sclk"${SCLK}"."${HW} + FPATH="$OCTEONTESTPATH/inl_ipsec_perf/ref_numbers/cn10k/$FNAME" +else + FNAME="rclk"${RCLK}"_sclk"${SCLK}"_cptclk"${CPTCLK}"."${HW} + FPATH="$OCTEONTESTPATH/inl_ipsec_perf/ref_numbers/cn9k/$FNAME" +fi + +function check_ref_files() +{ + local outb + local inb + + for type in "${TYPE[@]}"; do + if [[ $IS_CN10K -eq 0 ]] && ! supported_by_9k $type; then + continue + fi + inb="$FPATH.$type.inb" + if [[ ! -f $inb ]]; then + echo "File $inb not present" + exit 1 + fi + + if [[ $type = "ip_p_msns" ]]; then + continue + fi + + outb="$FPATH.$type.outb" + if [[ ! -f $outb ]]; then + echo "File $outb not present" + exit 1 + fi + done +} + +check_ref_files + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT +trap "sig_handler QUIT" QUIT +trap "sig_handler EXIT" EXIT + +SSO_DEV=${SSO_DEV:-$(lspci -d :a0f9 | tail -1 | awk '{ print $1 }')} +EVENT_VF=$SSO_DEV + +setup_interfaces +exec_genboard_cleanup + +count=0 +for c in ${CORES[@]} +do + Y=0 + echo "" + echo "Test: ${TN[$Y]} Num_cores: $c" + echo "----------------------" + sleep $WS + + # Outbound + # aes-cbc sha1-hmac + + X=1 + Y=0 + run_vpp_ipsec "$c" + + pmd_rx_launch + pmd_tx_launch $c + aes_cbc_sha1_hmac_outb $count $c + quit_testpmd "$TPMD_TX_PREFIX" + quit_testpmd "$TPMD_RX_PREFIX" + awk ' { print FILENAME": " $0 } ' testpmd.out.$TPMD_TX_PREFIX + vpp_log inl_ipsec_perf + vpp_stats_all inl_ipsec_perf + vpp_cleanup inl_ipsec_perf + sleep $WS + + echo "" + # aes-gcm + + X=2 + Y=1 + run_vpp_ipsec "$c" + + pmd_rx_launch + pmd_tx_launch $c + aes_gcm_outb $count $c + quit_testpmd "$TPMD_TX_PREFIX" + quit_testpmd "$TPMD_RX_PREFIX" + awk ' { print FILENAME": " $0 } ' testpmd.out.$TPMD_TX_PREFIX + vpp_log inl_ipsec_perf + vpp_stats_all inl_ipsec_perf + vpp_cleanup inl_ipsec_perf + + # + echo "" + # Inbound + X=1 + Y=0 + run_vpp_ipsec_inb $c + pmd_rx_launch + aes_cbc_sha1_hmac_inb $count $c + quit_testpmd "$TPMD_RX_PREFIX" + awk ' { print FILENAME": " $0 } ' testpmd.out.$TPMD_TX_PREFIX + vpp_log inl_ipsec_perf + vpp_stats_all inl_ipsec_perf + vpp_cleanup inl_ipsec_perf + + sleep $WS + + echo "" + X=2 + Y=1 + run_vpp_ipsec_inb $c + pmd_rx_launch + aes_gcm_inb $count $c + quit_testpmd "$TPMD_RX_PREFIX" + awk ' { print FILENAME": " $0 } ' testpmd.out.$TPMD_TX_PREFIX + vpp_log inl_ipsec_perf + vpp_stats_all inl_ipsec_perf + vpp_cleanup inl_ipsec_perf + ((++count)) +done + +echo "" +if [[ -n $Failed_tests ]]; then + echo "FAILURE: Test(s) [$Failed_tests] failed" + exit 1 +fi + +exit 0 diff --git a/ci/test/inl_ipsec_perf/inl_ipsec_perf_1.conf b/ci/test/inl_ipsec_perf/inl_ipsec_perf_1.conf new file mode 100644 index 0000000000..99b4521193 --- /dev/null +++ b/ci/test/inl_ipsec_perf/inl_ipsec_perf_1.conf @@ -0,0 +1,55 @@ +unix { + log /tmp/inl_ipsec_perf/vpp.log + cli-listen /tmp/inl_ipsec_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +devices { + dev pci/0002:20:00.1 + { + driver octeon + } + dev pci/0002:1d:00.0 + { + driver octeon + } +} + +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/inl_ipsec_perf/inl_ipsec_perf_2.conf b/ci/test/inl_ipsec_perf/inl_ipsec_perf_2.conf new file mode 100644 index 0000000000..965e287b3a --- /dev/null +++ b/ci/test/inl_ipsec_perf/inl_ipsec_perf_2.conf @@ -0,0 +1,55 @@ +unix { + log /tmp/inl_ipsec_perf/vpp.log + cli-listen /tmp/inl_ipsec_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3-4 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +devices { + dev pci/0002:20:00.1 + { + driver octeon + } + dev pci/0002:1d:00.0 + { + driver octeon + } +} + +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/inl_ipsec_perf/inl_ipsec_perf_4.conf b/ci/test/inl_ipsec_perf/inl_ipsec_perf_4.conf new file mode 100644 index 0000000000..eaadfa3512 --- /dev/null +++ b/ci/test/inl_ipsec_perf/inl_ipsec_perf_4.conf @@ -0,0 +1,55 @@ +unix { + log /tmp/inl_ipsec_perf/vpp.log + cli-listen /tmp/inl_ipsec_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3-6 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +devices { + dev pci/0002:20:00.1 + { + driver octeon + } + dev pci/0002:1d:00.0 + { + driver octeon + } +} + +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_1.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_1.pcap new file mode 100644 index 0000000000..6b83eeb285 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_1.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_2.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_2.pcap new file mode 100644 index 0000000000..f5349a687f Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_2.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_4.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_4.pcap new file mode 100644 index 0000000000..8021865092 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_1410_4.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_1.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_1.pcap new file mode 100644 index 0000000000..c3358018cb Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_1.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_2.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_2.pcap new file mode 100644 index 0000000000..b8f8ffb5e1 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_2.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_4.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_4.pcap new file mode 100644 index 0000000000..438b1a3403 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_380_4.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_1.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_1.pcap new file mode 100644 index 0000000000..056b5cfa25 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_1.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_2.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_2.pcap new file mode 100644 index 0000000000..44166cbf2c Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_2.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_4.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_4.pcap new file mode 100644 index 0000000000..2b45271d33 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-cbc_sha1-hmac_64_4.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_1.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_1.pcap new file mode 100644 index 0000000000..90fde9c52b Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_1.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_2.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_2.pcap new file mode 100644 index 0000000000..d42647be94 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_2.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_4.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_4.pcap new file mode 100644 index 0000000000..24d2412ca1 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_1410_4.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_1.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_1.pcap new file mode 100644 index 0000000000..a40ab80264 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_1.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_2.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_2.pcap new file mode 100644 index 0000000000..3e1ed56b28 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_2.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_4.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_4.pcap new file mode 100644 index 0000000000..48968b1ff1 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_380_4.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_1.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_1.pcap new file mode 100644 index 0000000000..558b3ba0df Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_1.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_2.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_2.pcap new file mode 100644 index 0000000000..fb292e4c47 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_2.pcap differ diff --git a/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_4.pcap b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_4.pcap new file mode 100644 index 0000000000..d2bc682877 Binary files /dev/null and b/ci/test/inl_ipsec_perf/pcap/enc_aes-gcm_64_4.pcap differ diff --git a/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1000.106xx.ip.inb b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1000.106xx.ip.inb new file mode 100644 index 0000000000..b90c42ec7e --- /dev/null +++ b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1000.106xx.ip.inb @@ -0,0 +1,10 @@ +aes-cbc_sha1-hmac +64: 8157035 17676380 34471600 +380: 8155538 17645993 21956102 +1410: 8148850 8322770 8322761 + +aes-gcm +64: 8198632 17730833 36100815 +380: 8200873 17572093 21955849 +1410: 8172678 8389795 8389801 + diff --git a/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1000.106xx.ip.outb b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1000.106xx.ip.outb new file mode 100644 index 0000000000..6df2ae9ada --- /dev/null +++ b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1000.106xx.ip.outb @@ -0,0 +1,10 @@ +aes-cbc_sha1-hmac +64: 7221905 15248204 31554795 +380: 7220807 15195168 27058018 +1410: 7237801 8322772 8322771 + +aes-gcm +64: 7203667 15349844 31428190 +380: 7200141 15354581 27534812 +1410: 7228652 8412384 8412385 + diff --git a/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1100.106xx.ip.inb b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1100.106xx.ip.inb new file mode 100644 index 0000000000..b90c42ec7e --- /dev/null +++ b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1100.106xx.ip.inb @@ -0,0 +1,10 @@ +aes-cbc_sha1-hmac +64: 8157035 17676380 34471600 +380: 8155538 17645993 21956102 +1410: 8148850 8322770 8322761 + +aes-gcm +64: 8198632 17730833 36100815 +380: 8200873 17572093 21955849 +1410: 8172678 8389795 8389801 + diff --git a/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1100.106xx.ip.outb b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1100.106xx.ip.outb new file mode 100644 index 0000000000..6df2ae9ada --- /dev/null +++ b/ci/test/inl_ipsec_perf/ref_numbers/cn10k/rclk2500_sclk1100.106xx.ip.outb @@ -0,0 +1,10 @@ +aes-cbc_sha1-hmac +64: 7221905 15248204 31554795 +380: 7220807 15195168 27058018 +1410: 7237801 8322772 8322771 + +aes-gcm +64: 7203667 15349844 31428190 +380: 7200141 15354581 27534812 +1410: 7228652 8412384 8412385 + diff --git a/ci/test/l3fwd/l3fwd.conf b/ci/test/l3fwd/l3fwd.conf new file mode 100644 index 0000000000..704eb936eb --- /dev/null +++ b/ci/test/l3fwd/l3fwd.conf @@ -0,0 +1,52 @@ +unix { + log /tmp/l3fwd/vpp.log + cli-listen /tmp/l3fwd/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +devices { + dev pci/0002:01:00.2 { + driver octeon + port 0 { + name eth0 + } + } +} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/l3fwd/l3fwd.exec b/ci/test/l3fwd/l3fwd.exec new file mode 100644 index 0000000000..233446b3df --- /dev/null +++ b/ci/test/l3fwd/l3fwd.exec @@ -0,0 +1,10 @@ +set int mac address eth0 d6:bd:c6:81:0f:b1 + +set int ip address eth0 2.0.0.1/24 + +set ip neighbor eth0 3.0.0.2 0a:5f:b6:10:a4:17 + +ip route add 3.0.0.2/24 via eth0 + +set int state eth0 up + diff --git a/ci/test/l3fwd/l3fwd.pcap b/ci/test/l3fwd/l3fwd.pcap new file mode 100644 index 0000000000..98405e6523 Binary files /dev/null and b/ci/test/l3fwd/l3fwd.pcap differ diff --git a/ci/test/l3fwd/l3fwd.sh b/ci/test/l3fwd/l3fwd.sh new file mode 100644 index 0000000000..c011526f46 --- /dev/null +++ b/ci/test/l3fwd/l3fwd.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +set -e + +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." + +source $OCTEONTESTPATH/common/vpp/vpp.env +source $OCTEONTESTPATH/common/testpmd/pktgen.env +source $OCTEONTESTPATH/common/pcap/pcap.env + +NO_HP=${NO_HP:-} +HP=${HP:-8} + +function mount_hugetlbfs() { + # Mount hugetlbfs. + if ! mount | grep -q hugepages; then + mount -t hugetlbfs none /dev/hugepages/ + fi +} + +function setup_hp() { + if [[ -n $NO_HP ]]; then + echo "Skipping huge page setup" + return + fi + # Enable HP hugepages. + echo $HP > /proc/sys/vm/nr_hugepages +} + +function sig_handler() +{ + local status=$? + set +e + trap - ERR + trap - INT + trap - QUIT + trap - EXIT + if [[ $status -ne 0 ]]; then + vpp_stats_all l3fwd + echo $status + echo "$1 Handler" + fi + pktgen_quit + pktgen_cleanup + vpp_cleanup l3fwd + exit $status +} + +PKTGEN_PCAP="l3fwd.pcap" +PKTGEN_PORT="0002:01:00.1" +PKTGEN_COREMASK="0xf0" +L3FWD_PORT="0002:01:00.2" +L3FWD_MAINCORE="0x2" +L3FWD_WORKER_COREMASK="0x4" + + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT +trap "sig_handler QUIT" QUIT +trap "sig_handler EXIT" EXIT + +mount_hugetlbfs +setup_hp + +PCAP_CNT=$(pcap_packet_count $PKTGEN_PCAP) +PCAP_LEN=$(pcap_length $PKTGEN_PCAP) + +echo "Starting l3fwd with Port=$L3FWD_PORT, Worker_Coremask=$L3FWD_WORKER_COREMASK" +rm -rf /tmp/l3fwd +mkdir -p /tmp/l3fwd +cp l3fwd.exec /tmp/l3fwd/ +vpp_launch l3fwd +vpp_start l3fwd +echo "Starting pktgen with Port=$PKTGEN_PORT, Coremask=$PKTGEN_COREMASK, Pcap=$PKTGEN_PCAP" +pktgen_launch -c $PKTGEN_COREMASK -p $PKTGEN_PORT -i $PKTGEN_PCAP +echo "pktgen start" +pktgen_start +sleep 5 +vpp_port_down l3fwd eth0 + +vpp_stats_all l3fwd > /dev/null +pktgen_stats > /dev/null + +echo "-------------------- L3FWD LOGS ---------------------" +vpp_log l3fwd +echo "-------------------- PKTGEN LOGS --------------------" +pktgen_log + +VPP_RX_COUNT=$(vpp_rx_count l3fwd eth0) +VPP_TX_COUNT=$(vpp_tx_count l3fwd eth0) +VPP_RX_BYTES=$(vpp_rx_bytes l3fwd eth0) +VPP_TX_BYTES=$(vpp_tx_bytes l3fwd eth0) + +if [[ $VPP_RX_COUNT -ne $PCAP_CNT ]] || + [[ $VPP_TX_COUNT -ne $PCAP_CNT ]] || + [[ $VPP_RX_BYTES -ne $PCAP_LEN ]] || + [[ $VPP_TX_BYTES -ne $PCAP_LEN ]]; then + echo "FAILURE: Error in l3fwd" + exit 1 +fi + +echo "SUCCESS: l3fwd completed" + +pktgen_quit +pktgen_cleanup diff --git a/ci/test/l3fwd_perf/l3fwd_gen.sh b/ci/test/l3fwd_perf/l3fwd_gen.sh new file mode 100755 index 0000000000..ea64ab2c4f --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_gen.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +TEST_OP=${TEST_OP:-} +set -e + +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." + +source $OCTEONTESTPATH/common/testpmd/common.env + +PRFX="fwd-gen" +PORT0="${PORT0:-0002:02:00.0}" + +VFIO_DEVBIND="$OCTEONTESTPATH/board/oxk-devbind-basic.sh" + +function sig_handler() +{ + local status=$? + set +e + trap - ERR + trap - INT + if [[ $status -ne 0 ]]; then + echo "$1 Handler" + # Dump error logs + testpmd_log $PRFX + fi + + testpmd_cleanup $PRFX + exit $status +} + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT + +case $TEST_OP in + launch) + $VFIO_DEVBIND -b vfio-pci $PORT0 + num_cores=$(grep -c ^processor /proc/cpuinfo) + ((num_cores-=1)) + num_cores=${GEN_CORES:-$num_cores} + ((fwd_cores=num_cores-1)) + num_flows=${GEN_FLOWS} + + # Limit the number forwarding cores on cn10k. + # Tx rate peaks (99 MPPS) after 10 cores and drop after 18. + fwd_cores=$(( fwd_cores < 12 ? fwd_cores : 12 )) + + testpmd_launch $PRFX \ + "-l 1-$num_cores -a $PORT0" \ + "--no-flush-rx --nb-cores=$fwd_cores --forward-mode=flowgen \ + -i --txq=$fwd_cores --rxq=$fwd_cores \ + --flowgen-flows=$num_flows --eth-peer=0,00:01:02:03:04:01" /dev/null + testpmd_cmd $PRFX "port stop 0" + testpmd_cmd $PRFX "set flow_ctrl rx off 0" + testpmd_cmd $PRFX "set flow_ctrl tx off 0" + testpmd_cmd $PRFX "port start 0" + ;; + start) + testpmd_cmd $PRFX "start tx_first 256" + testpmd_cmd $PRFX "show port stats all" + ;; + stop) + testpmd_cmd $PRFX "show port stats all" + testpmd_cmd $PRFX "stop" + ;; + rx_pps) + testpmd_cmd $PRFX "show port stats all" + val=`testpmd_log $PRFX | tail -4 | grep -ao 'Rx-pps: .*' | \ + awk '{print $2}'` + echo $val + ;; + tx_pps) + testpmd_cmd $PRFX "show port stats all" + cut -f 2 -d ":" + val=`testpmd_log $PRFX | tail -4 | grep -ao 'Tx-pps: .*' | \ + awk '{print $2}'` + echo $val + ;; + cleanup) + testpmd_cleanup $PRFX + ;; + log) + testpmd_log $PRFX + ;; +esac + +exit 0 diff --git a/ci/test/l3fwd_perf/l3fwd_perf.exec b/ci/test/l3fwd_perf/l3fwd_perf.exec new file mode 100644 index 0000000000..4c8bc50160 --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_perf.exec @@ -0,0 +1,8 @@ +set int mac address eth0 00:01:02:03:04:01 + +set int ip address eth0 2.0.0.1/24 + +ip route add 0.0.0.0/0 via eth0 + +set int state eth0 up + diff --git a/ci/test/l3fwd_perf/l3fwd_perf.sh b/ci/test/l3fwd_perf/l3fwd_perf.sh new file mode 100755 index 0000000000..b0995af5c2 --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_perf.sh @@ -0,0 +1,382 @@ +#!/bin/bash + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + + +set -e +#set -euox pipefail + +GENERATOR_BOARD=${GENERATOR_BOARD:-} +PLAT=${PLAT:-} +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." +VFIO_DEVBIND="$OCTEONTESTPATH/board/oxk-devbind-basic.sh" + +source $OCTEONTESTPATH/common/testpmd/common.env +source $OCTEONTESTPATH/common/vpp/vpp.env + +# Find the dpdk-testpmd application + +TESTPMD_BIN=$(which dpdk-testpmd) +if [[ -z $TESTPMD_BIN ]]; then + echo "dpdk-testpmd not found !!" + exit 1 +fi +echo $TESTPMD_BIN +declare -i num_tests +declare -a test_name +declare -a test_lbk +SUDO="sudo" +remote_ssh="${TARGET_SSH_CMD:-"ssh -o LogLevel=ERROR -o ServerAliveInterval=30 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"} $GENERATOR_BOARD" +gen=$(realpath ./l3fwd_gen.sh) +MAX_RETRY=${MAX_RETRY:-5} +WITH_GEN_BOARD=0 +GEN_ARG= +G_ENV= +TOLERANCE=${TOLERANCE:-6} + +FWD_PERF_IN=fwd_perf.in +FWD_PERF_OUT=fwd_perf.out +FWD_PERF_OUT_FULL=fwd_perf.out.full +GEN_LOG_FULL=gen.out.full + +START_STR=">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" +END_STR="<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" + +LIF0=0002:01:00.1 +LIF1=0002:01:00.2 + +! $(cat /proc/device-tree/compatible | grep -q "cn10k") +IS_CN10K=$? + +if [[ -z "$GENERATOR_BOARD" ]]; then + echo "Generator board details missing!!" + WITH_GEN_BOARD=0 +else + echo "Found Generator board details $GENERATOR_BOARD" + if [[ $IS_CN10K -ne 0 ]]; then + WITH_GEN_BOARD=1 + fi +fi + +if [[ $WITH_GEN_BOARD -eq 0 ]] +then + IF0=$LIF0 + IF1=$LIF1 + remote_ssh="sh -c " + GEN_PORT=$IF1 + G_ENV="GEN_CORES=6" + SUDO="" + echo "Running locally without generator board" +else + IF0=0002:02:00.0 + GEN_PORT=$IF0 + $VFIO_DEVBIND -b vfio-pci $IF0 + # Dummy whitelist device + IF1=0008:08:08.0 + echo "Running with generator board" +fi + +rm -rf $FWD_PERF_IN $FWD_PERF_OUT $FWD_PERF_OUT_FULL $GEN_LOG_FULL + +function sig_handler() +{ + local status=$? + set +e + trap - ERR + trap - INT + trap - QUIT + trap - EXIT + if [[ $status -ne 0 ]]; then + echo "$1 Handler" + fi + + awk ' { print FILENAME": " $0 } ' $FWD_PERF_OUT_FULL + awk ' { print FILENAME": " $0 } ' $FWD_PERF_OUT + awk ' { print FILENAME": " $0 } ' $GEN_LOG_FULL + + killall -9 dpdk-testpmd + $remote_ssh "sudo killall -9 dpdk-testpmd" + vpp_log l3fwd_perf + vpp_stats_all l3fwd_perf + vpp_cleanup l3fwd_perf + exit $status +} + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT +trap "sig_handler QUIT" QUIT +trap "sig_handler EXIT" EXIT + +# Get CPU PART NUMBER +PARTNUM_106XX=0xd49 +PARTNUM=$(grep -m 1 'CPU part' /proc/cpuinfo | awk -F': ' '{print $2}') +DTC=$(tr -d '\0' >$GEN_LOG_FULL + $remote_ssh "$SUDO PORT0=$GEN_PORT TEST_OP=launch $G_ENV GEN_FLOWS=$CORES $gen $GEN_ARG" +} + +start_gen() { + $remote_ssh "$SUDO PLAT=$PLAT PORT0=$GEN_PORT TEST_OP=start $gen" +} + +stop_gen() { + $remote_ssh "$SUDO PLAT=$PLAT PORT0=$GEN_PORT TEST_OP=stop $gen" +} + +cleanup_gen() { + $remote_ssh "$SUDO PLAT=$PLAT PORT0=$GEN_PORT TEST_OP=log $gen" >>$GEN_LOG_FULL + echo $END_STR ${test_name[$idx]} >>$GEN_LOG_FULL + + $remote_ssh "$SUDO PLAT=$PLAT PORT0=$GEN_PORT TEST_OP=cleanup $gen" +} + +testpmd_pps_local() { + local rx_pps=0 + + echo "show port stats all" >>$FWD_PERF_IN + sleep 1 + echo "show port stats all" >>$FWD_PERF_IN + sleep 1 + echo "show port stats all" >>$FWD_PERF_IN + while ! (tail -n1 $FWD_PERF_OUT | grep -q "testpmd> $") + do + sleep 0.1 + continue; + done + + pps=`cat $FWD_PERF_OUT | \ + grep "Rx-pps:" | awk '{print $2}' | tail -2` + for i in $pps + do + rx_pps=$((rx_pps + i)) + done + echo $rx_pps +} + +check_pps() { + idx=$1 + pass_pps=$(expected_pps $idx) + ref_pps=$(ref_pps $idx) + local retry=3 + + while [[ retry -ne 0 ]] + do + + rx_pps=$($remote_ssh "$SUDO TEST_OP=rx_pps $gen") + + if [[ rx_pps -lt pass_pps ]]; then + echo -n "Low PPS for ${test_name[$idx]} ($rx_pps < $pass_pps)" + echo " (Ref $ref_pps, tolerance $TOLERANCE%)" + else + echo -n "Rx PPS $rx_pps as expected $pass_pps" + echo " (Ref $ref_pps, tolerance $TOLERANCE%)" + return 0 + fi + + sleep 1 + ((retry-=1)) + done + + return 1 +} + +cleanup_one() { + local idx=$1 + + vpp_log l3fwd_perf + vpp_stats_all l3fwd_perf + vpp_cleanup l3fwd_perf + + stop_gen + cleanup_gen $idx + + cat $FWD_PERF_OUT >> $FWD_PERF_OUT_FULL + echo $END_STR ${test_name[$idx]} >>$FWD_PERF_OUT_FULL +} + +run_one() { + unbuffer="$(command -v stdbuf) -o 0" || unbuffer= + local in=$FWD_PERF_IN + local out=$FWD_PERF_OUT + idx=$1 + + echo $START_STR ${test_name[$idx]} >>$FWD_PERF_OUT_FULL + + rm -rf $in $out + touch $in $out + + CORES=${test_ncores[$idx]} + + echo -n "Starting l3fwd with 'n_cores=$CORES port=$IF0 " + rm -rf /tmp/l3fwd_perf + mkdir -p /tmp/l3fwd_perf + cp l3fwd_perf_$CORES.conf /tmp/l3fwd_perf/ + cp l3fwd_perf.exec /tmp/l3fwd_perf + vpp_launch l3fwd_perf_$CORES + vpp_exec_cmd l3fwd_perf "device attach pci/$IF0 driver octeon" + vpp_exec_cmd l3fwd_perf "device create-interface pci/$IF0 port 0 name eth0 num-rx-queues $CORES tx-queues-size 16384" + vpp_start l3fwd_perf + for (( i=0; i<$CORES; i++ )) + do + vpp_exec_cmd l3fwd_perf "set ip neighbor eth0 10.253.0.$i 00:01:02:03:04:00" + vpp_exec_cmd l3fwd_perf "test flow add dst-ip 10.253.0.$i/255.255.255.255 proto 17 redirect-to-queue $i" + vpp_exec_cmd l3fwd_perf "test flow enable index $i eth0" + done + + launch_gen $idx + start_gen +} + +run_fwd_tests() { + + get_system_info + + idx=0 + ret=0 + REF_WITH_GEN_BOARD=$WITH_GEN_BOARD + REF_IF0=$IF0 + REF_IF1=$IF1 + local retry_count=$MAX_RETRY + while [[ idx -lt num_tests ]]; do + + if [[ ${test_lbk[$idx]} -eq 1 ]]; then + # Forcing change to run on LBK interface only + WITH_GEN_BOARD=0 + IF0=$LIF0 + IF1=$LIF1 + else + # Restore for other cases + WITH_GEN_BOARD=$REF_WITH_GEN_BOARD + IF0=$REF_IF0 + IF1=$REF_IF1 + fi + + run_one $idx + + sleep 3 + + set +e + check_pps $idx + local k=$? + set -e + + if [[ k -eq 0 ]]; then + cleanup_one $idx + + ((idx+=1)) + retry_count=$MAX_RETRY + continue + fi + ((retry_count-=1)) || true + + if [[ retry_count -eq 0 ]]; then + echo "FAIL: ${test_name[$idx]}" + cleanup_one $idx + + ((ret+=1)) + ((idx+=1)) + retry_count=$MAX_RETRY + else + echo "Re-run ${test_name[$idx]} $retry_count" + cleanup_one $idx + fi + done + + exit $ret +} + +num_tests=0 + +# Register fwd performance tests. +# Format: + +register_fwd_test "L3FWD_1C" "1" "0" +register_fwd_test "L3FWD_2C" "2" "0" +register_fwd_test "L3FWD_4C" "4" "0" +register_fwd_test "L3FWD_8C" "8" "0" + +run_fwd_tests + +cleanup_gen diff --git a/ci/test/l3fwd_perf/l3fwd_perf_1.conf b/ci/test/l3fwd_perf/l3fwd_perf_1.conf new file mode 100644 index 0000000000..ea0bc279ec --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_perf_1.conf @@ -0,0 +1,52 @@ +unix { + log /tmp/l3fwd_perf/vpp.log + cli-listen /tmp/l3fwd_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 23 + corelist-workers 22 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +#devices { +# dev pci/0002:01:00.1 { +# driver octeon +# port 0 { +# name eth0 +# } +# } +#} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/l3fwd_perf/l3fwd_perf_2.conf b/ci/test/l3fwd_perf/l3fwd_perf_2.conf new file mode 100644 index 0000000000..b06c084291 --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_perf_2.conf @@ -0,0 +1,52 @@ +unix { + log /tmp/l3fwd_perf/vpp.log + cli-listen /tmp/l3fwd_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 23 + corelist-workers 21-22 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +#devices { +# dev pci/0002:01:00.1 { +# driver octeon +# port 0 { +# name eth0 +# } +# } +#} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/l3fwd_perf/l3fwd_perf_4.conf b/ci/test/l3fwd_perf/l3fwd_perf_4.conf new file mode 100644 index 0000000000..63e6f13c14 --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_perf_4.conf @@ -0,0 +1,52 @@ +unix { + log /tmp/l3fwd_perf/vpp.log + cli-listen /tmp/l3fwd_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 23 + corelist-workers 19-22 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +#devices { +# dev pci/0002:01:00.1 { +# driver octeon +# port 0 { +# name eth0 +# } +# } +#} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/l3fwd_perf/l3fwd_perf_8.conf b/ci/test/l3fwd_perf/l3fwd_perf_8.conf new file mode 100644 index 0000000000..cc3aa45d16 --- /dev/null +++ b/ci/test/l3fwd_perf/l3fwd_perf_8.conf @@ -0,0 +1,52 @@ +unix { + log /tmp/l3fwd_perf/vpp.log + cli-listen /tmp/l3fwd_perf/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 23 + corelist-workers 15-22 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +# naturally-aligned +# enable-nat-alignment +} + +#devices { +# dev pci/0002:01:00.1 { +# driver octeon +# port 0 { +# name eth0 +# } +# } +#} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/l3fwd_perf/ref_numbers/rclk2000_sclk1000.cn106.l3fwd b/ci/test/l3fwd_perf/ref_numbers/rclk2000_sclk1000.cn106.l3fwd new file mode 100644 index 0000000000..f0ca89d57f --- /dev/null +++ b/ci/test/l3fwd_perf/ref_numbers/rclk2000_sclk1000.cn106.l3fwd @@ -0,0 +1,7 @@ +PPS of tests on 106xx and multiple RCLK +--------------------------------------- + +L3FWD_1C 12550000 +L3FWD_2C 24550000 +L3FWD_4C 44000000 +L3FWD_8C 74000000 diff --git a/ci/test/l3fwd_perf/ref_numbers/rclk2200_sclk1000.cn106.l3fwd b/ci/test/l3fwd_perf/ref_numbers/rclk2200_sclk1000.cn106.l3fwd new file mode 100644 index 0000000000..6274989d16 --- /dev/null +++ b/ci/test/l3fwd_perf/ref_numbers/rclk2200_sclk1000.cn106.l3fwd @@ -0,0 +1,4 @@ +PPS of tests on 106xx and multiple RCLK +--------------------------------------- + +L3FWD_1C 12550000 diff --git a/ci/test/l3fwd_perf/ref_numbers/rclk2500_sclk1000.cn106.l3fwd b/ci/test/l3fwd_perf/ref_numbers/rclk2500_sclk1000.cn106.l3fwd new file mode 100644 index 0000000000..d3d887b22b --- /dev/null +++ b/ci/test/l3fwd_perf/ref_numbers/rclk2500_sclk1000.cn106.l3fwd @@ -0,0 +1,7 @@ +PPS of tests on 106xx and multiple RCLK +--------------------------------------- + +L3FWD_1C 14494473 +L3FWD_2C 27358193 +L3FWD_4C 50342188 +L3FWD_8C 83399916 diff --git a/ci/test/l3fwd_perf/ref_numbers/rclk2500_sclk1100.cn106.l3fwd b/ci/test/l3fwd_perf/ref_numbers/rclk2500_sclk1100.cn106.l3fwd new file mode 100644 index 0000000000..d3d887b22b --- /dev/null +++ b/ci/test/l3fwd_perf/ref_numbers/rclk2500_sclk1100.cn106.l3fwd @@ -0,0 +1,7 @@ +PPS of tests on 106xx and multiple RCLK +--------------------------------------- + +L3FWD_1C 14494473 +L3FWD_2C 27358193 +L3FWD_4C 50342188 +L3FWD_8C 83399916 diff --git a/ci/test/test.list b/ci/test/test.list new file mode 100644 index 0000000000..491c1e612d --- /dev/null +++ b/ci/test/test.list @@ -0,0 +1,5 @@ +l3fwd# +inl_ipsec# +l3fwd_perf# +inl_ipsec_perf# +tx_cksum# diff --git a/ci/test/test.sh b/ci/test/test.sh new file mode 100755 index 0000000000..4de38cc46f --- /dev/null +++ b/ci/test/test.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# Marvell CONFIDENTIAL AND PROPRIETARY NOTE +# +# This software contains information confidential and proprietary to +# Marvell. It shall not be reproduced in whole or in +# part, or transferred to other documents, or disclosed to third +# parties, or used for any purpose other than that for which it was +# obtained, without the prior written consent of Marvell. +# +# Copyright (c) 2025 Marvell. If you received this file from Marvell +# and you have entered into a commercial license agreement (a "Commercial License") +# with Marvell, the file is licensed to you under the terms of the applicable Commercial +# License. In the absence of such license agreement the following file is subject to +# Marvell’s standard Limited Use License Agreement. + +set -euox pipefail + +function install_packages() { + echo "Enabling internet on target board...." + $REMOTE 'echo "DNS=10.28.116.24 10.31.116.251 10.68.76.63" | sudo tee -a /etc/systemd/resolved.conf' + $REMOTE "sudo systemctl restart systemd-resolved" + sleep 30 + echo "Installing essential packages..." + $REMOTE "sudo apt-get update" + $REMOTE "sudo pip3 install --break-system-packages --no-input syslog_rfc5424_parser noise" +} + +function help() { + set +x + echo "" + echo "Usage:" + echo "$SCRIPT_NAME [ARGUMENTS]..." + echo "" + echo "Mandatory Arguments" + echo "===================" + echo "--build-root | -r : Build root directory" + echo "--test-env | -t : Test Environment" + echo "" + echo "Optional Arguments" + echo "===================" + echo "--run-dir | -d : Run directory [Default=Build Root]" + echo "--project-root | -p : VPP Project root [Default: PWD]" + echo "--help | -h : Print this help and exit" + set -x +} + +SCRIPT_NAME="$(basename "$0")" +if ! OPTS=$(getopt \ + -o "r:d:t:p:h" \ + -l "build-root:,run-dir:,test-env:,project-root:,help" \ + -n "$SCRIPT_NAME" \ + -- "$@"); then + help + exit 1 +fi + +BUILD_ROOT= +TEST_ENV_CONF= +PROJECT_ROOT="$PWD" +TARGET_BOARD=${TARGET_BOARD:-root@127.0.0.1} +TARGET_SSH_CMD=${TARGET_SSH_CMD:-"ssh"} +REMOTE="$TARGET_SSH_CMD $TARGET_BOARD -n" + +eval set -- "$OPTS" +unset OPTS +while [[ $# -gt 1 ]]; do + case $1 in + -r|--build-root) shift; BUILD_ROOT=$1;; + -d|--run-dir) shift; RUN_DIR=$1;; + -t|--test-env) shift; TEST_ENV_CONF=$(realpath $1);; + -p|--project-root) shift; PROJECT_ROOT=$1;; + -h|--help) help; exit 0;; + *) help; exit 1;; + esac + shift +done + +if [[ -z $BUILD_ROOT || -z $TEST_ENV_CONF ]]; then + echo "Build root directory and test env should be given !!" + help + exit 1 +fi + +export PROJECT_ROOT=$(realpath $PROJECT_ROOT) +mkdir -p $BUILD_ROOT +export BUILD_ROOT=$(realpath $BUILD_ROOT) +export BUILD_DIR=$BUILD_ROOT/build +export RUN_DIR=${RUN_DIR:-$BUILD_DIR} +mkdir -p $RUN_DIR + +source $TEST_ENV_CONF + +install_packages +# Run the tests +$TEST_RUN_CMD diff --git a/ci/test/tx_cksum/in.pcap b/ci/test/tx_cksum/in.pcap new file mode 100644 index 0000000000..98405e6523 Binary files /dev/null and b/ci/test/tx_cksum/in.pcap differ diff --git a/ci/test/tx_cksum/in_mseg.pcap b/ci/test/tx_cksum/in_mseg.pcap new file mode 100644 index 0000000000..d552d6cc0a Binary files /dev/null and b/ci/test/tx_cksum/in_mseg.pcap differ diff --git a/ci/test/tx_cksum/out.pcap b/ci/test/tx_cksum/out.pcap new file mode 100644 index 0000000000..a031cfc0d6 Binary files /dev/null and b/ci/test/tx_cksum/out.pcap differ diff --git a/ci/test/tx_cksum/out_mseg.pcap b/ci/test/tx_cksum/out_mseg.pcap new file mode 100644 index 0000000000..322279fb15 Binary files /dev/null and b/ci/test/tx_cksum/out_mseg.pcap differ diff --git a/ci/test/tx_cksum/tx_cksum.conf b/ci/test/tx_cksum/tx_cksum.conf new file mode 100644 index 0000000000..111ddb8f60 --- /dev/null +++ b/ci/test/tx_cksum/tx_cksum.conf @@ -0,0 +1,51 @@ +unix { + log /tmp/tx_cksum/vpp.log + cli-listen /tmp/tx_cksum/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +} + +devices { + dev pci/0002:01:00.2 { + driver octeon + port 0 { + name eth0 + num-rx-queues 4 + } + } +} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/ci/test/tx_cksum/tx_cksum.exec b/ci/test/tx_cksum/tx_cksum.exec new file mode 100644 index 0000000000..233446b3df --- /dev/null +++ b/ci/test/tx_cksum/tx_cksum.exec @@ -0,0 +1,10 @@ +set int mac address eth0 d6:bd:c6:81:0f:b1 + +set int ip address eth0 2.0.0.1/24 + +set ip neighbor eth0 3.0.0.2 0a:5f:b6:10:a4:17 + +ip route add 3.0.0.2/24 via eth0 + +set int state eth0 up + diff --git a/ci/test/tx_cksum/tx_cksum.sh b/ci/test/tx_cksum/tx_cksum.sh new file mode 100644 index 0000000000..836f729988 --- /dev/null +++ b/ci/test/tx_cksum/tx_cksum.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# Copyright (c) 2025 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html + +#set -e +set -euox pipefail + +PRFX="tx_cksum" +OCTEONTESTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." + +source $OCTEONTESTPATH/common/vpp/vpp.env +source $OCTEONTESTPATH/common/testpmd/pktgen.env +source $OCTEONTESTPATH/common/pcap/pcap.env + +TX_PCAP="in.pcap" +EXPECTED_PCAP="out.pcap" +TX_MSEG_PCAP="in_mseg.pcap" +EXPECTED_MSEG_PCAP="out_mseg.pcap" +RECV_PCAP="recv.pcap" +PKTGEN_PORT="0002:01:00.1" +PKTGEN_COREMASK="0xf0" +PORT0="0002:01:00.2" +PORT1="0002:01:00.3" +MAINCORE="0x2" +WORKER_COREMASK="0x4" +CONF_FILE="tx_cksum.conf" +INLINE_CONF_FILE="tx_cksum_inline.conf" + +TMP_DIR=/tmp/$PRFX +rm -rf $TMP_DIR +mkdir -p $TMP_DIR + +NO_HP=${NO_HP:-} +HP=${HP:-8} + +function mount_hugetlbfs() { + # Mount hugetlbfs. + if ! mount | grep -q hugepages; then + mount -t hugetlbfs none /dev/hugepages/ + fi +} + +function setup_hp() { + if [[ -n $NO_HP ]]; then + echo "Skipping huge page setup" + return + fi + # Enable HP hugepages. + echo $HP > /proc/sys/vm/nr_hugepages +} + +function sig_handler() +{ + local status=$? + set +e + trap - ERR + trap - INT + trap - QUIT + trap - EXIT + if [[ $status -ne 0 ]]; then + vpp_stats_all tx_cksum + echo $status + echo "$1 Handler" + fi + pktgen_quit + pktgen_cleanup + vpp_cleanup tx_cksum + exit $status +} + + +trap "sig_handler ERR" ERR +trap "sig_handler INT" INT +trap "sig_handler QUIT" QUIT +trap "sig_handler EXIT" EXIT + +mount_hugetlbfs +setup_hp + +run_tx_cksum_test() { + local CONF_PRFX=$1 + CONF_FILE="${CONF_PRFX}.conf" + echo "Starting VPP with Port0=$PORT0, Conf: $CONF_FILE" + cp tx_cksum.exec /tmp/tx_cksum/ + vpp_launch $CONF_PRFX + vpp_start $PRFX + sleep 2 + + echo "Starting pktgen with Port=$PKTGEN_PORT, Coremask=$PKTGEN_COREMASK, In-pcap=$TX_PCAP, received-pcap=$RECV_PCAP" + pktgen_launch -c $PKTGEN_COREMASK -p $PKTGEN_PORT -i $TX_PCAP -o $RECV_PCAP + sleep 2 + testpmd_cmd "pktgen" "port stop all" + testpmd_cmd "pktgen" "port config mtu 0 9000" + testpmd_cmd "pktgen" "port start all" + sleep 2 + echo "pktgen start" + pktgen_start + sleep 5 + + vpp_port_down $PRFX eth0 + + vpp_stats_all $PRFX > /dev/null + pktgen_stats > /dev/null + + echo "-------------------- TX_CKSUM VPP LOGS ---------------------" + vpp_log $PRFX + echo "-------------------- TX_CKSUM PKTGEN LOGS --------------------" + pktgen_log + + echo "Verifying tx_cksum test" + + VPP_RX_COUNT=$(vpp_rx_count $PRFX eth0) + VPP_TX_COUNT=$(vpp_tx_count $PRFX eth0) + VPP_RX_BYTES=$(vpp_rx_bytes $PRFX eth0) + VPP_TX_BYTES=$(vpp_tx_bytes $PRFX eth0) + + if [[ $VPP_RX_COUNT -ne $PCAP_CNT ]] || + [[ $VPP_TX_COUNT -ne $PCAP_CNT ]] || + [[ $VPP_RX_BYTES -ne $PCAP_LEN ]] || + [[ $VPP_TX_BYTES -ne $PCAP_LEN ]]; then + echo "FAILURE: Error in tx_cksum" + exit 1 + fi + + tcpdump -nr $EXPECTED_PCAP -xvve -t >$TMP_DIR/expect.txt + tcpdump -nr $RECV_PCAP -xvve -t >$TMP_DIR/recv.txt + + # Compare received and expected + diff -sqad $TMP_DIR/recv.txt $TMP_DIR/expect.txt + + pktgen_quit + echo "########## SUCCESS: tx_cksum test completed ##########" + echo " Used TX_PCAP: $TX_PCAP" + echo " Used EXPECTED_PCAP: $EXPECTED_PCAP" + echo " Used CONF_FILE: $CONF_FILE" + echo "######################################################" +} + +run_all_tests() { + local TX_PCAP=$1 + local EXPECTED_PCAP=$2 + + PCAP_CNT=$(pcap_packet_count $TX_PCAP) + PCAP_LEN=$(pcap_length $TX_PCAP) + + export TX_PCAP + export EXPECTED_PCAP + export PCAP_CNT + export PCAP_LEN + + # TEST-1: Run without inline device in startup.conf + if [[ -f "$CONF_FILE" ]]; then + run_tx_cksum_test "tx_cksum" + sleep 1 + vpp_cleanup tx_cksum + sleep 1 + pktgen_cleanup + else + echo "Startup config file $CONF_FILE not found!" + fi + + sleep 10 + + # TEST-2: Run with inline device in startup.conf + if [[ -f "$INLINE_CONF_FILE" ]]; then + run_tx_cksum_test "tx_cksum_inline" + sleep 1 + vpp_cleanup tx_cksum_inline + sleep 1 + pktgen_cleanup + else + echo "Inline config file $INLINE_CONF_FILE not found!" + fi +} + +#Run with single-seg pcap +run_all_tests "$TX_PCAP" "$EXPECTED_PCAP" + +sleep 10 + +#Run with multi-seg pcap +run_all_tests "$TX_MSEG_PCAP" "$EXPECTED_MSEG_PCAP" diff --git a/ci/test/tx_cksum/tx_cksum_inline.conf b/ci/test/tx_cksum/tx_cksum_inline.conf new file mode 100644 index 0000000000..0ce6c21af7 --- /dev/null +++ b/ci/test/tx_cksum/tx_cksum_inline.conf @@ -0,0 +1,57 @@ +unix { + log /tmp/tx_cksum/vpp.log + cli-listen /tmp/tx_cksum/cli.sock + #full-coredump + #interactive + nodaemon +} + +api-trace { + on +} + +logging { + default-syslog-log-level info +} + +cpu { + main-core 2 + corelist-workers 3 +} + +session +{ + event-queue-length 10 +} +#tcp { no-csum-offload } + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unprivileged) +# buffers-per-numa 5000 + + ## Size of buffer data area + ## Default is 2048 + default data-size 2048 +} + +devices { + dev pci/0002:20:00.1 { + driver octeon + } + dev pci/0002:1d:00.0 { + driver octeon + } + dev pci/0002:01:00.2 { + driver octeon + port 0 { + name eth0 + num-rx-queues 4 + } + } +} +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } +} diff --git a/docs/aboutvpp/releasenotes/index.rst b/docs/aboutvpp/releasenotes/index.rst index b22febfa92..6c3a830cbd 100644 --- a/docs/aboutvpp/releasenotes/index.rst +++ b/docs/aboutvpp/releasenotes/index.rst @@ -6,6 +6,7 @@ Release notes .. toctree:: :maxdepth: 2 + v24.02 v23.10 v23.06 v23.02 diff --git a/docs/aboutvpp/releasenotes/v24.02.rst b/docs/aboutvpp/releasenotes/v24.02.rst new file mode 100644 index 0000000000..61051ce759 --- /dev/null +++ b/docs/aboutvpp/releasenotes/v24.02.rst @@ -0,0 +1,567 @@ +Release notes for VPP 24.02 +=========================== + +More than 262 commits since the previous release, including 123 fixes. + +Features +-------- + +- Build System + + - Modify N\_PREFETCH on Arm N2 to achieve best perf (`bef2d6da4 `_) + - Add ability to disable some plugins from packaging and tests (`bc37878ec `_) + +- Infrastructure Library + + - Native AES-CTR implementation (`9caef2a35 `_) + +- Plugins + + - Amazon Elastic Network Adapter (ENA) device driver + + - Amazon Elastic Network Adapter (ENA) native driver (`2d725c612 `_) + + - CNat + + - Add flow hash config to cnat translation (`589fe7ca6 `_) + + - Crypto - ipsecmb + + - Bump intel-ipsec-mb version to 1.4 (`40242b88e `_) + - Bump intel-ipsec-mb version to 1.5 (`adb2c6799 `_) + + - Crypto - native + + - Add AES-CTR (`da3771c25 `_) + + - DPDK + + - Add ConnectX-6LX and ConnectX-7 support (`029f039d5 `_) + - Add Mellanox BlueField NICs (`006c071b0 `_) + - Bump to DPDK 23.11 (`327c32306 `_) + - Bump rdma-core to 49.0 (`b1a1209ce `_) + - Add ID for QAT 4xxx series VF support (`ebe2371e6 `_) + + - GTPU + + - Support non-G-PDU packets and PDU Session (`f9ab6985d `_) + + - IAVF Device driver + + - New driver using new dev infra (`47447f1f5 `_) + + - IPv6 Segment Routing Mobile + + - Implement SRv6 mobile API funcs (`68ac24428 `_) + + - Marvell Octeon device driver + + - Native driver for Marvell Octeon SoC (`01fe7ab88 `_) + + - NPTv6 + + - Icmp6 alg to handle icmp6 error messages (`ff344a98a `_) + +- VNET + + - FLOW + + - Add support for using l2tpv3 as RSS type (`6cb727394 `_) + + - IPSec + + - Allow receiving encrypted IP packets with TFC padding (`8fce54637 `_) + + - New Device Drivers Infra + + - New device driver infra (`38c619115 `_) + + - Session Layer + + - Make port range configurable (`e111bbd12 `_) + + +Known issues +------------ + +For the full list of issues please refer to fd.io `JIRA `_. + +Fixed issues +------------ + +For the full list of fixed issues please refer to: +- fd.io `JIRA `_ +- git `commit log `_ + + +API changes +----------- + +Description of results: + +- *Definition changed*: indicates that the API file was modified between releases. +- *Only in image*: indicates the API is new for this release. +- *Only in file*: indicates the API has been removed in this release. + +============================================================= ================== +Message Name Result +============================================================= ================== +cnat_translation_details definition changed +cnat_translation_update definition changed +dev_attach only in image +dev_attach_reply only in image +dev_create_port_if only in image +dev_create_port_if_reply only in image +dev_detach only in image +dev_detach_reply only in image +dev_remove_port_if only in image +dev_remove_port_if_reply only in image +dhcp_client_detect_enable_disable only in image +dhcp_client_detect_enable_disable_reply only in image +gtpu_add_del_forward only in image +gtpu_add_del_forward_reply only in image +gtpu_add_del_tunnel_v2 only in image +gtpu_add_del_tunnel_v2_reply only in image +gtpu_get_transfer_counts only in image +gtpu_get_transfer_counts_reply only in image +gtpu_tunnel_v2_details only in image +gtpu_tunnel_v2_dump only in image +ipsec_sa_v5_details only in image +ipsec_sa_v5_dump only in image +ipsec_sad_entry_add_v2 only in image +ipsec_sad_entry_add_v2_reply only in image +lldp_details only in image +lldp_dump only in image +lldp_dump_reply only in image +ping_finished_event only in image +rdma_create_v4 only in image +rdma_create_v4_reply only in image +sr_mobile_localsid_add_del only in image +sr_mobile_localsid_add_del_reply only in image +sr_mobile_policy_add only in image +sr_mobile_policy_add_reply only in image +urpf_interface_details only in image +urpf_interface_dump only in image +want_ping_finished_events only in image +want_ping_finished_events_reply only in image +============================================================= ================== + +Found 38 api message signature differences + + +Newly deprecated API messages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These messages are still there in the API, but can and probably +will disappear in the next release. + +- rdma_create_v3 + +In-progress API messages +~~~~~~~~~~~~~~~~~~~~~~~~ + +These messages are provided for testing and experimentation only. +They are *not* subject to any compatibility process, +and therefore can arbitrarily change or disappear at *any* moment. +Also they may have less than satisfactory testing, making +them unsuitable for other use than the technology preview. +If you are intending to use these messages in production projects, +please collaborate with the feature maintainer on their productization. + +- abf_itf_attach_add_del +- abf_itf_attach_add_del_reply +- abf_itf_attach_details +- abf_itf_attach_dump +- abf_plugin_get_version +- abf_plugin_get_version_reply +- abf_policy_add_del +- abf_policy_add_del_reply +- abf_policy_details +- abf_policy_dump +- acl_plugin_use_hash_lookup_get +- acl_plugin_use_hash_lookup_get_reply +- acl_plugin_use_hash_lookup_set +- acl_plugin_use_hash_lookup_set_reply +- bpf_trace_filter_set +- bpf_trace_filter_set_reply +- cnat_get_snat_addresses +- cnat_get_snat_addresses_reply +- cnat_session_details +- cnat_session_dump +- cnat_session_purge +- cnat_session_purge_reply +- cnat_set_snat_addresses +- cnat_set_snat_addresses_reply +- cnat_set_snat_policy +- cnat_set_snat_policy_reply +- cnat_snat_policy_add_del_exclude_pfx +- cnat_snat_policy_add_del_exclude_pfx_reply +- cnat_snat_policy_add_del_if +- cnat_snat_policy_add_del_if_reply +- cnat_translation_del +- cnat_translation_del_reply +- cnat_translation_details +- cnat_translation_dump +- cnat_translation_update +- cnat_translation_update_reply +- det44_get_timeouts_reply +- det44_set_timeouts +- det44_set_timeouts_reply +- dev_attach +- dev_attach_reply +- dev_create_port_if +- dev_create_port_if_reply +- dev_detach +- dev_detach_reply +- dev_remove_port_if +- dev_remove_port_if_reply +- flowprobe_get_params +- flowprobe_get_params_reply +- flowprobe_interface_add_del +- flowprobe_interface_add_del_reply +- flowprobe_interface_details +- flowprobe_interface_dump +- flowprobe_set_params +- flowprobe_set_params_reply +- gbp_bridge_domain_add +- gbp_bridge_domain_add_reply +- gbp_bridge_domain_del +- gbp_bridge_domain_del_reply +- gbp_bridge_domain_details +- gbp_bridge_domain_dump +- gbp_bridge_domain_dump_reply +- gbp_contract_add_del +- gbp_contract_add_del_reply +- gbp_contract_details +- gbp_contract_dump +- gbp_endpoint_add +- gbp_endpoint_add_reply +- gbp_endpoint_del +- gbp_endpoint_del_reply +- gbp_endpoint_details +- gbp_endpoint_dump +- gbp_endpoint_group_add +- gbp_endpoint_group_add_reply +- gbp_endpoint_group_del +- gbp_endpoint_group_del_reply +- gbp_endpoint_group_details +- gbp_endpoint_group_dump +- gbp_ext_itf_add_del +- gbp_ext_itf_add_del_reply +- gbp_ext_itf_details +- gbp_ext_itf_dump +- gbp_recirc_add_del +- gbp_recirc_add_del_reply +- gbp_recirc_details +- gbp_recirc_dump +- gbp_route_domain_add +- gbp_route_domain_add_reply +- gbp_route_domain_del +- gbp_route_domain_del_reply +- gbp_route_domain_details +- gbp_route_domain_dump +- gbp_route_domain_dump_reply +- gbp_subnet_add_del +- gbp_subnet_add_del_reply +- gbp_subnet_details +- gbp_subnet_dump +- gbp_vxlan_tunnel_add +- gbp_vxlan_tunnel_add_reply +- gbp_vxlan_tunnel_del +- gbp_vxlan_tunnel_del_reply +- gbp_vxlan_tunnel_details +- gbp_vxlan_tunnel_dump +- gtpu_add_del_forward +- gtpu_add_del_forward_reply +- gtpu_add_del_tunnel_v2 +- gtpu_add_del_tunnel_v2_reply +- gtpu_get_transfer_counts +- gtpu_get_transfer_counts_reply +- gtpu_tunnel_v2_details +- gtpu_tunnel_v2_dump +- ikev2_child_sa_details +- ikev2_child_sa_dump +- ikev2_initiate_del_child_sa +- ikev2_initiate_del_child_sa_reply +- ikev2_initiate_del_ike_sa +- ikev2_initiate_del_ike_sa_reply +- ikev2_initiate_rekey_child_sa +- ikev2_initiate_rekey_child_sa_reply +- ikev2_initiate_sa_init +- ikev2_initiate_sa_init_reply +- ikev2_nonce_get +- ikev2_nonce_get_reply +- ikev2_profile_add_del +- ikev2_profile_add_del_reply +- ikev2_profile_details +- ikev2_profile_disable_natt +- ikev2_profile_disable_natt_reply +- ikev2_profile_dump +- ikev2_profile_set_auth +- ikev2_profile_set_auth_reply +- ikev2_profile_set_id +- ikev2_profile_set_id_reply +- ikev2_profile_set_ipsec_udp_port +- ikev2_profile_set_ipsec_udp_port_reply +- ikev2_profile_set_liveness +- ikev2_profile_set_liveness_reply +- ikev2_profile_set_ts +- ikev2_profile_set_ts_reply +- ikev2_profile_set_udp_encap +- ikev2_profile_set_udp_encap_reply +- ikev2_sa_details +- ikev2_sa_dump +- ikev2_set_esp_transforms +- ikev2_set_esp_transforms_reply +- ikev2_set_ike_transforms +- ikev2_set_ike_transforms_reply +- ikev2_set_local_key +- ikev2_set_local_key_reply +- ikev2_set_responder +- ikev2_set_responder_hostname +- ikev2_set_responder_hostname_reply +- ikev2_set_responder_reply +- ikev2_set_sa_lifetime +- ikev2_set_sa_lifetime_reply +- ikev2_set_tunnel_interface +- ikev2_set_tunnel_interface_reply +- ikev2_traffic_selector_details +- ikev2_traffic_selector_dump +- ip_neighbor_config_get +- ip_neighbor_config_get_reply +- ip_route_add_del_v2 +- ip_route_add_del_v2_reply +- ip_route_lookup_v2 +- ip_route_lookup_v2_reply +- ip_route_v2_details +- ip_route_v2_dump +- ip_session_redirect_add +- ip_session_redirect_add_reply +- ip_session_redirect_add_v2 +- ip_session_redirect_add_v2_reply +- ip_session_redirect_del +- ip_session_redirect_del_reply +- l2_emulation +- l2_emulation_reply +- lcp_default_ns_get_reply +- lcp_default_ns_set +- lcp_default_ns_set_reply +- lcp_itf_pair_add_del_v2 +- lcp_itf_pair_add_del_v2_reply +- lcp_itf_pair_details +- lldp_details +- mdata_enable_disable +- mdata_enable_disable_reply +- nat44_ed_vrf_tables_v2_details +- nat44_ed_vrf_tables_v2_dump +- nat44_ei_add_del_address_range +- nat44_ei_add_del_address_range_reply +- nat44_ei_add_del_static_mapping +- nat44_ei_add_del_static_mapping_reply +- nat44_ei_address_details +- nat44_ei_address_dump +- nat44_ei_del_session +- nat44_ei_del_session_reply +- nat44_ei_del_user +- nat44_ei_del_user_reply +- nat44_ei_forwarding_enable_disable +- nat44_ei_forwarding_enable_disable_reply +- nat44_ei_ha_flush +- nat44_ei_ha_flush_reply +- nat44_ei_ha_resync +- nat44_ei_ha_resync_completed_event +- nat44_ei_ha_resync_reply +- nat44_ei_ha_set_failover +- nat44_ei_ha_set_failover_reply +- nat44_ei_ha_set_listener +- nat44_ei_ha_set_listener_reply +- nat44_ei_interface_add_del_feature +- nat44_ei_interface_add_del_feature_reply +- nat44_ei_interface_details +- nat44_ei_interface_dump +- nat44_ei_ipfix_enable_disable +- nat44_ei_ipfix_enable_disable_reply +- nat44_ei_plugin_enable_disable +- nat44_ei_plugin_enable_disable_reply +- nat44_ei_set_addr_and_port_alloc_alg +- nat44_ei_set_addr_and_port_alloc_alg_reply +- nat44_ei_set_fq_options +- nat44_ei_set_fq_options_reply +- nat44_ei_set_mss_clamping +- nat44_ei_set_mss_clamping_reply +- nat44_ei_set_timeouts +- nat44_ei_set_timeouts_reply +- nat44_ei_set_workers +- nat44_ei_set_workers_reply +- nat44_ei_show_fq_options +- nat44_ei_show_fq_options_reply +- nat44_ei_show_running_config +- nat44_ei_show_running_config_reply +- nat44_ei_static_mapping_details +- nat44_ei_static_mapping_dump +- nat44_ei_user_details +- nat44_ei_user_dump +- nat44_ei_user_session_details +- nat44_ei_user_session_dump +- nat44_ei_user_session_v2_details +- nat44_ei_user_session_v2_dump +- nat44_ei_worker_details +- nat44_ei_worker_dump +- nat64_plugin_enable_disable +- nat64_plugin_enable_disable_reply +- npt66_binding_add_del +- npt66_binding_add_del_reply +- oddbuf_enable_disable +- oddbuf_enable_disable_reply +- pg_interface_enable_disable_coalesce +- pg_interface_enable_disable_coalesce_reply +- ping_finished_event +- pnat_binding_add +- pnat_binding_add_reply +- pnat_binding_add_v2 +- pnat_binding_add_v2_reply +- pnat_binding_attach +- pnat_binding_attach_reply +- pnat_binding_del +- pnat_binding_del_reply +- pnat_binding_detach +- pnat_binding_detach_reply +- pnat_bindings_details +- pnat_bindings_get +- pnat_bindings_get_reply +- pnat_interfaces_details +- pnat_interfaces_get +- pnat_interfaces_get_reply +- sample_macswap_enable_disable +- sample_macswap_enable_disable_reply +- set_ip_flow_hash_v3 +- set_ip_flow_hash_v3_reply +- sr_localsids_with_packet_stats_details +- sr_localsids_with_packet_stats_dump +- sr_mobile_localsid_add_del +- sr_mobile_localsid_add_del_reply +- sr_mobile_policy_add +- sr_mobile_policy_add_reply +- sr_policies_with_sl_index_details +- sr_policies_with_sl_index_dump +- sr_policy_add_v2 +- sr_policy_add_v2_reply +- sr_policy_mod_v2 +- sr_policy_mod_v2_reply +- sw_interface_ip6nd_ra_details +- sw_interface_ip6nd_ra_dump +- sw_interface_set_vxlan_gbp_bypass +- sw_interface_set_vxlan_gbp_bypass_reply +- test_addresses +- test_addresses2 +- test_addresses2_reply +- test_addresses3 +- test_addresses3_reply +- test_addresses_reply +- test_empty +- test_empty_reply +- test_enum +- test_enum_reply +- test_interface +- test_interface_reply +- test_prefix +- test_prefix_reply +- test_string +- test_string2 +- test_string2_reply +- test_string_reply +- test_vla +- test_vla2 +- test_vla2_reply +- test_vla3 +- test_vla3_reply +- test_vla4 +- test_vla4_reply +- test_vla5 +- test_vla5_reply +- test_vla_reply +- trace_capture_packets +- trace_capture_packets_reply +- trace_clear_cache +- trace_clear_cache_reply +- trace_clear_capture +- trace_clear_capture_reply +- trace_details +- trace_dump +- trace_dump_reply +- trace_filter_function_details +- trace_filter_function_dump +- trace_set_filter_function +- trace_set_filter_function_reply +- trace_set_filters +- trace_set_filters_reply +- trace_v2_details +- trace_v2_dump +- tracenode_enable_disable +- tracenode_enable_disable_reply +- vxlan_gbp_tunnel_add_del +- vxlan_gbp_tunnel_add_del_reply +- vxlan_gbp_tunnel_details +- vxlan_gbp_tunnel_dump +- want_ping_finished_events +- want_ping_finished_events_reply + +Patches that changed API definitions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +``src/plugins/urpf/urpf.api`` + +* `2fa69effc `_ urpf: add interface dump to API + +``src/plugins/srv6-mobile/sr_mobile_types.api`` + +* `68ac24428 `_ srv6-mobile: Implement SRv6 mobile API funcs + +``src/plugins/srv6-mobile/sr_mobile.api`` + +* `68ac24428 `_ srv6-mobile: Implement SRv6 mobile API funcs + +``src/plugins/npt66/npt66.api`` + +* `bdeee2194 `_ npt66: add show command and rx/tx counters + +``src/plugins/gtpu/gtpu.api`` + +* `f9ab6985d `_ gtpu: support non-G-PDU packets and PDU Session + +``src/plugins/dhcp/dhcp.api`` + +* `f3be34e44 `_ dhcp: api to enable client detect on interface + +``src/plugins/ping/ping.api`` + +* `bb1cde678 `_ ping: Simple binary API for running ping based on events + +``src/plugins/lldp/lldp.api`` + +* `9f8d3b9b2 `_ lldp: dump api + +``src/plugins/rdma/rdma.api`` + +* `04d262d1e `_ rdma: add rdma_create_v4 that handles flags properly + +``src/plugins/cnat/cnat.api`` + +* `589fe7ca6 `_ cnat: add flow hash config to cnat translation + +``src/vnet/dev/dev.api`` + +* `ddf6cec37 `_ dev: initial set of APIs + +``src/vnet/ipsec/ipsec.api`` + +* `0e2f188f7 `_ ipsec: huge anti-replay window support + +``src/vnet/ipsec/ipsec_types.api`` + +* `0e2f188f7 `_ ipsec: huge anti-replay window support + +``src/vnet/devices/virtio/virtio.api`` + +* `00c59e496 `_ virtio: virtio_flags api use enumflag instead of enum diff --git a/extras/scripts/crcchecker.py b/extras/scripts/crcchecker.py index 01cb02523d..7dcdb681e1 100755 --- a/extras/scripts/crcchecker.py +++ b/extras/scripts/crcchecker.py @@ -82,13 +82,15 @@ def filelist_from_git_ls(): def is_uncommitted_changes(): """Returns true if there are uncommitted changes in the repo""" - git_status = "git status --porcelain -uno" - returncode = run(git_status.split(), stdout=PIPE, stderr=PIPE) - if returncode.returncode != 0: - sys.exit(returncode.returncode) - - if returncode.stdout: - return True + # Don't run this check in the Jenkins CI + if os.getenv("FDIOTOOLS_IMAGE") is None: + git_status = "git status --porcelain -uno" + returncode = run(git_status.split(), stdout=PIPE, stderr=PIPE) + if returncode.returncode != 0: + sys.exit(returncode.returncode) + + if returncode.stdout: + return True return False diff --git a/scripts/ci/baseline.txt b/scripts/ci/baseline.txt new file mode 100644 index 0000000000..3a849b80c4 --- /dev/null +++ b/scripts/ci/baseline.txt @@ -0,0 +1,9 @@ +# +# SPDX-License-Identifier: BSD-3-Clause +# https://spdx.org/licenses +# Copyright (c) 2018 Marvell. +# +# commit id of base tag for this branch +# Upstream version v24.02 release +500ac0596126576e278e65a64597e8b87fdc55f8 +flowprobe: fix flush callbacks when multiple workers diff --git a/src/cmake/platform/octeon20.cmake b/src/cmake/platform/octeon20.cmake new file mode 100644 index 0000000000..e71b1148e4 --- /dev/null +++ b/src/cmake/platform/octeon20.cmake @@ -0,0 +1,5 @@ + +set(VPP_PLATFORM_CACHE_LINE_SIZE 64) +set(VPP_PLATFORM_MARCH_FLAGS -march=armv8.7-a+sve2) +set(VPP_PLATFORM_BUFFER_ALIGN 128) +set(VPP_PLATFORM_N_PREFETCHES 6) diff --git a/src/cmake/platform/octeon9.cmake b/src/cmake/platform/octeon9.cmake new file mode 100644 index 0000000000..b81d1705d8 --- /dev/null +++ b/src/cmake/platform/octeon9.cmake @@ -0,0 +1,4 @@ +set(VPP_PLATFORM_CACHE_LINE_SIZE 128) +set(VPP_PLATFORM_MARCH_FLAGS -march=armv8.2-a+crc+crypto+lse) +set(VPP_PLATFORM_BUFFER_ALIGN 128) +set(VPP_PLATFORM_N_PREFETCHES 6) diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c index f2d700a079..c84390c310 100644 --- a/src/plugins/crypto_native/aes_cbc.c +++ b/src/plugins/crypto_native/aes_cbc.c @@ -55,17 +55,17 @@ aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 i, j, count, n_left = n_ops; u32xN placeholder_mask = { }; u32xN len = { }; - vnet_crypto_key_index_t key_index[N_AES_BYTES]; - u8 *src[N_AES_BYTES] = {}; - u8 *dst[N_AES_BYTES] = {}; + vnet_crypto_key_index_t key_index[4 * N_AES_LANES]; + u8 *src[4 * N_AES_LANES] = {}; + u8 *dst[4 * N_AES_LANES] = {}; u8xN r[4] = {}; u8xN k[15][4] = {}; - for (i = 0; i < N_AES_BYTES; i++) + for (i = 0; i < 4 * N_AES_LANES; i++) key_index[i] = ~0; more: - for (i = 0; i < N_AES_BYTES; i++) + for (i = 0; i < 4 * N_AES_LANES; i++) if (len[i] == 0) { if (n_left == 0) @@ -198,7 +198,7 @@ aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[], len -= u32xN_splat (count); - for (i = 0; i < N_AES_BYTES; i++) + for (i = 0; i < 4 * N_AES_LANES; i++) { src[i] += count; dst[i] += count; diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c index 41fc5b8c94..51c6dbce84 100644 --- a/src/plugins/dev_ena/rx_node.c +++ b/src/plugins/dev_ena/rx_node.c @@ -251,7 +251,6 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_dev_rx_queue_t *rxq) { ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq); - vnet_dev_port_t *port = rxq->port; vnet_main_t *vnm = vnet_get_main (); vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b; ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8]; @@ -260,13 +259,13 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u16 *csi; uword n_rx_packets = 0, n_rx_bytes = 0; vlib_frame_bitmap_t head_bmp = {}; - u32 sw_if_index = port->intf.sw_if_index; - u32 hw_if_index = port->intf.hw_if_index; + u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); u32 n_trace, n_deq, n_left; u32 cq_next = q->cq_next; - u32 next_index = rxq->next_index; + u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); vlib_frame_t *next_frame; - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 *bi; int maybe_chained; diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c index 982436d9b4..033685722b 100644 --- a/src/plugins/dev_iavf/port.c +++ b/src/plugins/dev_iavf/port.c @@ -257,7 +257,7 @@ avf_msix_n_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line) iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (line), dyn_ctln_enabled.as_u32); vlib_node_set_interrupt_pending (vlib_get_main_by_index (line), - port->intf.rx_node_index); + vnet_dev_get_port_rx_node_idex (port)); } vnet_dev_rv_t diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c index ee6d7e8def..bf650f9bfb 100644 --- a/src/plugins/dev_iavf/rx_node.c +++ b/src/plugins/dev_iavf/rx_node.c @@ -249,14 +249,14 @@ iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 thr_idx = vlib_get_thread_index (); iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm); iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq); - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0; u16 n_tail_desc = 0; u64 or_qw1 = 0; u32 *bi, *to_next, n_left_to_next; - u32 next_index = rxq->next_index; - u32 sw_if_index = port->intf.sw_if_index; - u32 hw_if_index = port->intf.hw_if_index; + u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); + u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); u16 next = arq->next; u16 size = rxq->size; u16 mask = size - 1; diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt index b7c25fe040..f6490883e2 100644 --- a/src/plugins/dev_octeon/CMakeLists.txt +++ b/src/plugins/dev_octeon/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright(c) 2022 Cisco Systems, Inc. -if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10") +if (NOT VPP_PLATFORM_NAME STREQUAL "octeon20" AND NOT VPP_PLATFORM_NAME STREQUAL "octeon10" AND NOT VPP_PLATFORM_NAME STREQUAL "octeon9") return() endif() @@ -11,25 +11,42 @@ vpp_plugin_find_library(dev-octeon OCTEON_ROC_LIB "libocteon-roc.a") if (NOT OCTEON_ROC_DIR) message("OCTEON ROC files not found - Marvell OCTEON device plugin disabled") - return() endif() if (NOT OCTEON_ROC_LIB) message("OCTEON ROC library (libocteon-roc.a) not found - Marvell OCTEON device plugin disabled") +endif() + +if (NOT OCTEON_ROC_DIR OR NOT OCTEON_ROC_LIB) + include (dev_octeon_virtio.mk) return () endif() include_directories (${OCTEON_ROC_DIR}/) +include_directories (${CMAKE_CURRENT_SOURCE_DIR}/dpu/) + +if (VPP_PLATFORM_NAME STREQUAL "octeon9") + add_compile_definitions(PLATFORM_OCTEON9) +endif() add_vpp_plugin(dev_octeon SOURCES init.c + cli.c format.c port.c queue.c roc_helper.c rx_node.c tx_node.c + flow.c + counter.c + crypto.c + ipsec.c + esp_encrypt.c + dpu/dpu.c + tm.c + pfc.c MULTIARCH_SOURCES rx_node.c @@ -39,3 +56,4 @@ add_vpp_plugin(dev_octeon ${OCTEON_ROC_LIB} ) +include (dev_octeon_virtio.mk) diff --git a/src/plugins/dev_octeon/README.md b/src/plugins/dev_octeon/README.md new file mode 100644 index 0000000000..eb07076b6d --- /dev/null +++ b/src/plugins/dev_octeon/README.md @@ -0,0 +1,87 @@ +# Octeon device plugin for VPP {#dev_octeon_doc} + +## Overview +This plugin provides native device support for Marvell OCTEON-10 SoCs. +This OCTEON native implementation optimizes the interface between hardware +and VPP fast-path data structures. It integrates the following hardware +accelerators into VPP: +- Network interface controller (aka NIX) for packet ingress and egress + +## Supported SoC +- OCTEON-10 +- OCTEON-9 + + +## Usage +The following steps demonstrate how you may bring up VPP with dev_octeon, on the +OCTEON platform. + +### Setup + +#### Configure NIX on OCTEON +-# Determine NIX PF on OCTEON +``` +# lspci -d 177d::0200 | grep 'a063' + 0002:02:00.0 Ethernet controller: Cavium, Inc. Device a063 (rev 08) + 0002:03:00.0 Ethernet controller: Cavium, Inc. Device a063 (rev 08) +``` + +-# Bind NIX VF to vfio-pci driver +``` +echo 0002:03:00.0 > /sys/bus/pci/devices/0002:03:00.0/driver/unbind +echo 0002:02:00.0 > /sys/bus/pci/devices/0002:02:00.0/driver/unbind + +echo 177d a063 > /sys/bus/pci/drivers/vfio-pci/new_id + +echo 0002:02:00.0 > /sys/bus/pci/drivers/vfio-pci/bind +echo 0002:03:00.0 > /sys/bus/pci/drivers/vfio-pci/bind + +``` + +### Launch VPP +VPP device bringup with dev_octeon is possible either through vppctl commands or +startup conf. + +#### Device bringup using vppctl +Launch VPP with startup conf. + +``` +# vpp -c /etc/vpp/startup.conf +# vppctl -s /run/vpp/cli.sock + _______ _ _ _____ ___ + __/ __/ _ \ (_)__ | | / / _ \/ _ \ + _/ _// // / / / _ \ | |/ / ___/ ___/ + /_/ /____(_)_/\___/ |___/_/ /_/ + + vpp# device attach pci/0002:02:00.0 driver octeon + vpp# device create-interface pci/0002:02:00.0 port 0 num-rx-queues 4 + vpp# device attach pci/0002:03:00.0 driver octeon + vpp# device create-interface pci/0002:03:00.0 port 0 num-rx-queues 4 +``` + +#### Device bringup using startup.conf device section +``` +devices { + dev pci/0002:02:00.0 + { + driver octeon + port 0 + { + name eth0 + num-rx-queues 4 + num-tx-queues 4 + } + } + + dev pci/0002:03:00.0 + { + driver octeon + port 0 + { + name eth1 + num-rx-queues 5 + num-tx-queues 5 + } + } +} +``` diff --git a/src/plugins/dev_octeon/README_VIRTIO.md b/src/plugins/dev_octeon/README_VIRTIO.md new file mode 100644 index 0000000000..dbf03dfd3c --- /dev/null +++ b/src/plugins/dev_octeon/README_VIRTIO.md @@ -0,0 +1,123 @@ +# Octeon virtio device plugin for VPP {#dev_octeon_virtio_doc} + +## Overview + +This plugin is a virtio device plugin for VPP, supporting packet input and output to +and from the HOST virtio interface over PCIe, with the Marvell OCTEON SoC operating +in endpoint mode. + +This plugin uses DAO library to communicate with the HOST device. The DAO library +employs a platform device to transmit and receive data to and from the HOST device. +Platform devices aren't situated on standard buses such as PCI or USB, this plugin +enlists a virtual bus with VPP, identifying it as virtio.To enhance performance, +this plugin utilizes a dedicated core to transfer host descriptors to Octeon. + +An alternate way for Host-OCTEON communication is using the SDP interface. The +primary difference is that SDP interfaces utilize NIX device bandwidth, thereby +limiting the device to 50Gbps when used in endpoint NIC mode. In contrast, by +using the Virtio plugin, OCTEON can function as a 100Gbps NIC. + +## Supported SoC +- OCTEON-10 + +## Usage +The following steps demonstrate how you may bring up VPP with dev_octeon_virtio, on the +OCTEON platform. + +### Setup + +#### Configure DMA and NPA devices on OCTEON +-# Determine DMA/DPI device on OCTEON. +``` +# lspci -d 177d:a080:0880 + 0000:06:00.0 System peripheral: Cavium, Inc. Device a080 +``` +-# Bind and Create (2 + 2 * number of workers) DMA devices. +``` +echo 0000:06:00.0 > /sys/bus/pci/devices/0000:06:00.0/driver/unbind +echo octeontx2-dpi > /sys/bus/pci/devices/0000:06:00.0/driver_override +echo 0000:06:00.0 > /sys/bus/pci/drivers_probe +echo 32 >/sys/bus/pci/devices/0000:06:00.0/sriov_numvfs + +``` +-# Determine NPA PCI on OCTEON and bind to vfio-pci. +``` +#lspci -d 177d:a0fb:0880 +0002:17:00.0 System peripheral: Cavium, Inc. Device a0fb (rev 54) + +echo 0002:17:00.0 > /sys/bus/pci/devices/0002:17:00.0/driver/unbind +echo 177d a0fb > /sys/bus/pci/drivers/vfio-pci/new_id +echo 0002:17:00.0 > /sys/bus/pci/drivers/vfio-pci/bind +``` +-# Bind platform devices pem0-bar4-mem and dpi_sdp_regs to vfio-platform +``` +echo "vfio-platform" | sudo tee "/sys/bus/platform/devices/*pem0-bar4-mem/driver_override" > /dev/null +echo "*pem0-bar4-mem" | sudo tee "/sys/bus/platform/drivers/vfio-platform/bind" > /dev/null +echo "vfio-platform" | sudo tee "/sys/bus/platform/devices/*dpi_sdp_regs/driver_override" > /dev/null +echo "*dpi_sdp_regs" | sudo tee "/sys/bus/platform/drivers/vfio-platform/bind" > /dev/null +Note: Replace * with actual runtime address attached with platform device. + +``` +### Launch VPP +VPP device bringup with dev_octeon_virtio is possible either through vppctl commands or +startup conf.This plugin takes following device arguments for the first device attach.And +arguments passed on next devices are ignored. + +nb_virtio - Max number of virtio devices will be configured. +dma - List of all DMA devices. +misc - List of all miscellaneous devices (example NPA device). + +DMA devices needed is calculated as: + +2 (for control) + 2 (for virtio service thread) + 2 x (number of workers) + +#### Device bringup using startup.conf device section +``` +cpu { + main-core 1 + corelist-workers 8-9 + corelist-virtio-ctrl 7 +} + +devices { + dev virtio/0 + { + driver octeon_virtio + port 0 + { + name oct_virtio/0 + num-rx-queues 4 + num-tx-queues 4 + } + args 'nb_virtio=2,dma=\"0000:06:00.1,0000:06:00.2,0000:06:00.3,0000:06:00.4,0000:06:00.5,0000:06:00.6,0000:06:00.7,0000:06:01.1,0000:06:01.2,0000:06:01.3\",misc=\"0002:17:00.0\"' + } + + dev virtio/1 + { + driver octeon_virtio + port 1 + { + name oct_virtio/1 + num-rx-queues 2 + num-tx-queues 3 + } + } +} +``` + +#### Device bringup using vppctl +Launch VPP with startup conf. + +``` +# vpp -c /etc/vpp/startup.conf +# vppctl -s /run/vpp/cli.sock + _______ _ _ _____ ___ + __/ __/ _ \ (_)__ | | / / _ \/ _ \ + _/ _// // / / / _ \ | |/ / ___/ ___/ + /_/ /____(_)_/\___/ |___/_/ /_/ + + vpp# vppctl device attach virtio/0 driver octeon_virtio args nb_virtio=2,dma=\"0000:06:00.1,0000:06:00.2,0000:06:00.3,0000:06:00.4,0000:06:00.5,0000:06:00.6,0000:06:00.7,0000:06:01.1,0000:06:01.2,0000:06:01.3\",misc=\"0002:17:00.0\" + vpp# vppctl device create-interface virtio/0 port 0 num-rx-queues 2 num-tx-queues 3 + vpp# vppctl device attach virtio/1 driver octeon_virtio + vpp# vppctl device create-interface virtio/1 port 1 num-rx-queues 2 num-tx-queues 3 +``` diff --git a/src/plugins/dev_octeon/cli.c b/src/plugins/dev_octeon/cli.c new file mode 100644 index 0000000000..e1bb648d22 --- /dev/null +++ b/src/plugins/dev_octeon/cli.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +/** + * @file + * @brief OCTEON CLI implementation. + */ + +#include +#include +#include +#include + +static const char *ul = "=====================================================" + "========================="; + +static void +oct_print_global_counters (vlib_main_t *vm, u64 **stat, u32 n_threads) +{ + u64 global_stat[OCT_MAX_CRYPTO_COUNTERS] = { 0 }; + oct_crypto_main_t *ocm = &oct_crypto_main; + unsigned int n_global_stats = 0; + vlib_simple_counter_main_t *cm; + u32 cnt_idx, thread_idx = 0; + + for (thread_idx = 0; thread_idx < n_threads; thread_idx++) + { + for (cnt_idx = 0; cnt_idx < OCT_MAX_CRYPTO_COUNTERS; cnt_idx++) + { + if (stat[cnt_idx][thread_idx]) + { + global_stat[cnt_idx] += stat[cnt_idx][thread_idx]; + n_global_stats++; + } + } + } + + if (!n_global_stats) + return; + + /* Display cumulative counters */ + vlib_cli_output (vm, "%-16s %-40s %-20s", "", "Global counter", "Value"); + vlib_cli_output (vm, "%-16s %-.40s %-.20s", "", ul, ul); + +#define _(i, s, d) \ + cm = &ocm->s##_counter; \ + if (global_stat[i]) \ + vlib_cli_output (vm, "%-16s %-40s %20Ld", "", cm->name, global_stat[i]); + foreach_crypto_counter; +#undef _ +} + +unsigned int +oct_get_per_thread_stats (u64 **stat, u32 n_threads, u64 *threads_with_stats) +{ + unsigned int cnt_idx, thread_idx = 0, n_threads_with_stats = 0; + + /* Identify threads that have non-zero Octeon crypto counters */ + for (thread_idx = 0; thread_idx < n_threads; thread_idx++) + { + for (cnt_idx = 0; cnt_idx < OCT_MAX_CRYPTO_COUNTERS; cnt_idx++) + { + if (stat[cnt_idx][thread_idx]) + { + threads_with_stats[n_threads_with_stats++] = thread_idx; + break; + } + } + } + + return n_threads_with_stats; +} + +static void +oct_print_per_thread_counters (vlib_main_t *vm, u64 **stat, u32 n_threads) +{ + unsigned int idx, thread_idx = 0, n_threads_with_stats = 0; + oct_crypto_main_t *ocm = &oct_crypto_main; + u64 threads_with_stats[n_threads]; + vlib_simple_counter_main_t *cm; + + n_threads_with_stats = + oct_get_per_thread_stats (stat, n_threads, threads_with_stats); + + if (!n_threads_with_stats) + return; + + vlib_cli_output (vm, "%-16s %-40s %-20s", "Thread", "Per-thread counter", + "Value"); + vlib_cli_output (vm, "%-.16s %-.40s %-.20s", ul, ul, ul); + + for (idx = 0; idx < n_threads_with_stats; idx++) + { + thread_idx = threads_with_stats[idx]; + + vlib_cli_output (vm, "%-16s", vlib_worker_threads[thread_idx].name); + + /* clang-format off */ +#define _(i, s, d) \ + cm = &ocm->s##_counter; \ + if (stat[i][thread_idx]) \ + vlib_cli_output (vm, "%-16s %-40s %20Ld", "", cm->name, \ + stat[i][thread_idx]); + foreach_crypto_counter; +#undef _ + /* clang-format on */ + } + + vlib_cli_output (vm, "\n"); + + return; +} + +static clib_error_t * +oct_crypto_counters_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unsigned int cnt_idx = 0, thread_idx = 0; + oct_crypto_main_t *ocm = &oct_crypto_main; + vlib_simple_counter_main_t *cm; + u64 *stat[OCT_MAX_CRYPTO_COUNTERS] = { 0 }; + counter_t *counters = NULL; + u32 n_threads = vlib_get_n_threads (); + + if (!ocm->n_cpt) + return clib_error_create ( + "No Crypto device attached to dev-octeon plugin"); + +#define _(i, s, d) \ + cm = &ocm->s##_counter; \ + vec_validate_init_empty (stat[i], n_threads, 0); \ + for (thread_idx = 0; thread_idx < n_threads; thread_idx++) \ + { \ + counters = cm->counters[thread_idx]; \ + stat[i][thread_idx] = counters[0]; \ + } + foreach_crypto_counter; +#undef _ + + oct_print_per_thread_counters (vm, stat, n_threads); + + oct_print_global_counters (vm, stat, n_threads); + + for (cnt_idx = 0; cnt_idx < OCT_MAX_CRYPTO_COUNTERS; cnt_idx++) + vec_free (stat[cnt_idx]); + + return 0; +} + +/*? + * This command displays Octeon crypto counters + * + * @cliexpar + * Example of how to display Octeon crypto counters: + * @cliexstart{show octeon crypto counters} + * Per-thread counter Value + * ======================================== ==================== + * + * crypto-inflight-operations 8 + * crypto-success-packets 8 + * + * Global counter Value + * ======================================== ==================== + * crypto-inflight-operations 8 + * crypto-success-packets 8 + * @cliexend +?*/ +VLIB_CLI_COMMAND (oct_crypto_counters_command, static) = { + .path = "show octeon crypto counters", + .short_help = "show octeon crypto counters", + .function = oct_crypto_counters_command_fn, +}; + +static clib_error_t * +oct_crypto_counters_clear_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_simple_counter_main_t *cm; + oct_crypto_main_t *ocm = &oct_crypto_main; + + if (!ocm->n_cpt) + return clib_error_create ( + "No Crypto device attached to dev-octeon plugin"); + +#define _(i, s, d) \ + cm = &ocm->s##_counter; \ + vlib_clear_simple_counters (cm); + foreach_crypto_counter; +#undef _ + + return 0; +} + +/*? + * This command clears Octeon crypto counters + * + * @cliexpar + * @cliexstart{clear octeon crypto counters} + * @cliexend +?*/ +VLIB_CLI_COMMAND (oct_crypto_counters_clear_command, static) = { + .path = "clear octeon crypto counters", + .short_help = "clear octeon crypto counters", + .function = oct_crypto_counters_clear_command_fn, +}; + +static clib_error_t * +oct_ipsec_inline_counters_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + struct roc_nix_stats stats; + oct_inl_dev_main_t *oim = &oct_inl_dev_main; + + if (!oim->inl_dev) + { + return clib_error_create ( + "No Inline device attached to dev-octeon plugin"); + } + + roc_nix_inl_dev_stats_get (&stats); + + vlib_cli_output (vm, "%-40s %20Ld", "rx_ucast", stats.rx_ucast); + vlib_cli_output (vm, "%-40s %20Ld", "rx_bcast", stats.rx_bcast); + vlib_cli_output (vm, "%-40s %20Ld", "rx_mcast", stats.rx_mcast); + vlib_cli_output (vm, "%-40s %20Ld", "rx_drop", stats.rx_drop); + vlib_cli_output (vm, "%-40s %20Ld", "rx_fcs", stats.rx_fcs); + vlib_cli_output (vm, "%-40s %20Ld", "rx_err", stats.rx_err); + vlib_cli_output (vm, "%-40s %20Ld", "rx_drop_bcast", stats.rx_drop_bcast); + vlib_cli_output (vm, "%-40s %20Ld", "rx_drop_mcast", stats.rx_drop_mcast); + vlib_cli_output (vm, "%-40s %20Ld", "rx_drop_l3_bcast", + stats.rx_drop_l3_bcast); + vlib_cli_output (vm, "%-40s %20Ld", "rx_drop_l3_bcast", + stats.rx_drop_l3_mcast); + + return 0; +} + +/*? + * This command displays OCTEON IPsec inline device counters + * + * @cliexpar + * Example of how to display OCTEON IPsec inline device counters: + * @cliexstart{show octeon ipsec inline counters} + * rx_ucast 10 + * rx_bcast 0 + * rx_mcast 0 + * rx_drop 0 + * rx_fcs 0 + * rx_err 0 + * rx_drop_bcast 0 + * rx_drop_mcast 0 + * rx_drop_l3_bcast 0 + * rx_drop_l3_bcast 0 + * @cliexend +?*/ + +VLIB_CLI_COMMAND (oct_ipsec_inline_counters_command, static) = { + .path = "show octeon ipsec inline counters", + .short_help = "show ipsec inline counters", + .function = oct_ipsec_inline_counters_command_fn, +}; + +static clib_error_t * +oct_ipsec_inline_counters_clear_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + roc_nix_inl_dev_stats_reset (); + + return 0; +} + +/*? + * This command clears OCTEON IPsec inline device counters + * + * @cliexpar + * @cliexstart{clear octeon ipsec inline counters} + * @cliexend +?*/ +VLIB_CLI_COMMAND (oct_ipsec_inline_counters_clear_command, static) = { + .path = "clear octeon ipsec inline counters", + .short_help = "clear ipsec inline counters", + .function = oct_ipsec_inline_counters_clear_command_fn, +}; + +static clib_error_t * +oct_aura_available_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + oct_rxq_t *crq; + int i; + + if (oct_main.use_single_rx_aura && oct_main.rx_aura_handle) + vlib_cli_output (vm, "rx queue aura 0x%llx avl_count %llu\n\n", + oct_main.rx_aura_handle, + roc_npa_aura_op_available (oct_main.rx_aura_handle)); + + pool_foreach_pointer (dev, dm->devices) + { + oct_device_t *od = vnet_dev_get_data (dev); + + if (od->type == OCT_DEVICE_TYPE_RVU_PF || + od->type == OCT_DEVICE_TYPE_RVU_VF || + od->type == OCT_DEVICE_TYPE_SDP_VF || + od->type == OCT_DEVICE_TYPE_LBK_VF) + { + vlib_cli_output (vm, "Interface: %U", format_vnet_dev_log, dev, 0); + vlib_cli_output (vm, "%-.25s", ul); + if (!oct_main.use_single_rx_aura) + { + for (i = 0; i < dev->ports[0]->intf.num_rx_queues; i++) + { + crq = + vnet_dev_get_rx_queue_data (dev->ports[0]->rx_queues[i]); + vlib_cli_output ( + vm, "rx queue %d aura 0x%llx avl_count %llu\n", i, + crq->aura_handle, + roc_npa_aura_op_available (crq->aura_handle)); + } + } + for (i = 0; i < dev->ports[0]->intf.num_tx_queues; i++) + { + vlib_cli_output ( + vm, "tx queue %d aura %x avl_count %d\n", i, + od->ctqs[i]->aura_handle, + roc_npa_aura_op_available (od->ctqs[i]->aura_handle)); + } + if (oct_main.inl_dev_initialized && roc_model_is_cn10k ()) + { + crq = vnet_dev_get_rx_queue_data (dev->ports[0]->rx_queues[0]); + vlib_cli_output ( + vm, "meta_aura_handle %x avl_count %d\n", + crq->rq.meta_aura_handle, + roc_npa_aura_op_available (crq->rq.meta_aura_handle)); + } + vlib_cli_output (vm, "\n"); + } + } + return 0; +} + +/*? + * This command displays OCTEON aura avaialbe counts + * + * @cliexpar + * @cliexstart{show octeon aura available} + * @cliexend +?*/ +VLIB_CLI_COMMAND (oct_aura_available_command, static) = { + .path = "show octeon aura available", + .short_help = "show octeon aura available", + .function = oct_aura_available_command_fn, +}; diff --git a/src/plugins/dev_octeon/common.h b/src/plugins/dev_octeon/common.h index a7a051526d..a2cda69dc9 100644 --- a/src/plugins/dev_octeon/common.h +++ b/src/plugins/dev_octeon/common.h @@ -12,7 +12,8 @@ #include static_always_inline u32 -oct_aura_free_all_buffers (vlib_main_t *vm, u64 aura_handle, u16 hdr_off) +oct_aura_free_all_buffers (vlib_main_t *vm, u64 aura_handle, u16 hdr_off, + u32 num_buffers) { u32 n = 0; u64 iova; @@ -20,8 +21,11 @@ oct_aura_free_all_buffers (vlib_main_t *vm, u64 aura_handle, u16 hdr_off) while ((iova = roc_npa_aura_op_alloc (aura_handle, 0))) { vlib_buffer_t *b = (void *) iova + hdr_off; - vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b)); + u32 bi = vlib_get_buffer_index (vm, b); + vlib_buffer_free_no_next (vm, &bi, 1); n++; + if (num_buffers && n == num_buffers) + break; } return n; } diff --git a/src/plugins/dev_octeon/counter.c b/src/plugins/dev_octeon/counter.c new file mode 100644 index 0000000000..b8b283f2b2 --- /dev/null +++ b/src/plugins/dev_octeon/counter.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "oct", + .subclass_name = "counters", +}; + +typedef enum +{ + OCT_PORT_CTR_RX_BYTES, + OCT_PORT_CTR_TX_BYTES, + OCT_PORT_CTR_RX_PACKETS, + OCT_PORT_CTR_TX_PACKETS, + OCT_PORT_CTR_RX_DROPS, + OCT_PORT_CTR_TX_DROPS, + OCT_PORT_CTR_RX_DROP_BYTES, + OCT_PORT_CTR_RX_UCAST, + OCT_PORT_CTR_TX_UCAST, + OCT_PORT_CTR_RX_MCAST, + OCT_PORT_CTR_TX_MCAST, + OCT_PORT_CTR_RX_BCAST, + OCT_PORT_CTR_TX_BCAST, + OCT_PORT_CTR_RX_FCS, + OCT_PORT_CTR_RX_ERR, + OCT_PORT_CTR_RX_DROP_MCAST, + OCT_PORT_CTR_RX_DROP_BCAST, + OCT_PORT_CTR_RX_DROP_L3_MCAST, + OCT_PORT_CTR_RX_DROP_L3_BCAST, +} oct_port_counter_id_t; + +vnet_dev_counter_t oct_port_counters[] = { + VNET_DEV_CTR_RX_BYTES (OCT_PORT_CTR_RX_BYTES), + VNET_DEV_CTR_RX_PACKETS (OCT_PORT_CTR_RX_PACKETS), + VNET_DEV_CTR_RX_DROPS (OCT_PORT_CTR_RX_DROPS), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BYTES, RX, BYTES, "drop"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_FCS, RX, PACKETS, "fcs"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_ERR, RX, PACKETS, "error"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_MCAST, RX, PACKETS, + "drop multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BCAST, RX, PACKETS, + "drop broadcast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_MCAST, RX, PACKETS, + "drop L3 multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_BCAST, RX, PACKETS, + "drop L3 broadcast"), + + VNET_DEV_CTR_TX_BYTES (OCT_PORT_CTR_TX_BYTES), + VNET_DEV_CTR_TX_PACKETS (OCT_PORT_CTR_TX_PACKETS), + VNET_DEV_CTR_TX_DROPS (OCT_PORT_CTR_TX_DROPS), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"), +}; + +typedef enum +{ + OCT_RXQ_CTR_BYTES, + OCT_RXQ_CTR_PKTS, + OCT_RXQ_CTR_DROPS, + OCT_RXQ_CTR_DROP_BYTES, + OCT_RXQ_CTR_ERR, +} oct_rxq_counter_id_t; + +vnet_dev_counter_t oct_rxq_counters[] = { + VNET_DEV_CTR_RX_BYTES (OCT_RXQ_CTR_BYTES), + VNET_DEV_CTR_RX_PACKETS (OCT_RXQ_CTR_PKTS), + VNET_DEV_CTR_RX_DROPS (OCT_RXQ_CTR_DROPS), + VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_DROP_BYTES, RX, BYTES, "drop"), + VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_ERR, RX, PACKETS, "error"), +}; + +typedef enum +{ + OCT_TXQ_CTR_BYTES, + OCT_TXQ_CTR_PKTS, + OCT_TXQ_CTR_DROPS, + OCT_TXQ_CTR_DROP_BYTES, +} oct_txq_counter_id_t; + +vnet_dev_counter_t oct_txq_counters[] = { + VNET_DEV_CTR_TX_BYTES (OCT_TXQ_CTR_BYTES), + VNET_DEV_CTR_TX_PACKETS (OCT_TXQ_CTR_PKTS), + VNET_DEV_CTR_TX_DROPS (OCT_TXQ_CTR_DROPS), + VNET_DEV_CTR_VENDOR (OCT_TXQ_CTR_DROP_BYTES, TX, BYTES, "drop"), +}; + +static vnet_dev_rv_t +oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) +{ + u8 *s = 0; + va_list va; + + va_start (va, fmt); + s = va_format (s, fmt, &va); + va_end (va); + + log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv); + + vec_free (s); + return VNET_DEV_ERR_INTERNAL; +} + +void +oct_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_add_counters (vm, port, oct_port_counters, + ARRAY_LEN (oct_port_counters)); + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + vnet_dev_rx_queue_add_counters (vm, rxq, oct_rxq_counters, + ARRAY_LEN (oct_rxq_counters)); + } + + foreach_vnet_dev_port_tx_queue (txq, port) + { + vnet_dev_tx_queue_add_counters (vm, txq, oct_txq_counters, + ARRAY_LEN (oct_txq_counters)); + } +} + +vnet_dev_rv_t +oct_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + struct roc_nix_stats stats; + + if ((rrv = roc_nix_stats_get (nix, &stats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_get() failed"); + + foreach_vnet_dev_counter (c, port->counter_main) + { + switch (c->user_data) + { + case OCT_PORT_CTR_RX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_octs); + break; + case OCT_PORT_CTR_TX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.tx_octs); + break; + case OCT_PORT_CTR_RX_PACKETS: + vnet_dev_counter_value_update ( + vm, c, stats.rx_ucast + stats.rx_bcast + stats.rx_mcast); + break; + case OCT_PORT_CTR_TX_PACKETS: + vnet_dev_counter_value_update ( + vm, c, stats.tx_ucast + stats.tx_bcast + stats.tx_mcast); + break; + case OCT_PORT_CTR_RX_DROPS: + vnet_dev_counter_value_update (vm, c, stats.rx_drop); + break; + case OCT_PORT_CTR_TX_DROPS: + vnet_dev_counter_value_update (vm, c, stats.tx_drop); + break; + case OCT_PORT_CTR_RX_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_octs); + break; + case OCT_PORT_CTR_RX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_ucast); + break; + case OCT_PORT_CTR_TX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_ucast); + break; + case OCT_PORT_CTR_RX_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_mcast); + break; + case OCT_PORT_CTR_TX_MCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_mcast); + break; + case OCT_PORT_CTR_RX_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_bcast); + break; + case OCT_PORT_CTR_TX_BCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_bcast); + break; + case OCT_PORT_CTR_RX_FCS: + vnet_dev_counter_value_update (vm, c, stats.rx_fcs); + break; + case OCT_PORT_CTR_RX_ERR: + vnet_dev_counter_value_update (vm, c, stats.rx_err); + break; + case OCT_PORT_CTR_RX_DROP_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_mcast); + break; + case OCT_PORT_CTR_RX_DROP_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_bcast); + break; + case OCT_PORT_CTR_RX_DROP_L3_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_mcast); + break; + case OCT_PORT_CTR_RX_DROP_L3_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_bcast); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_rxq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + struct roc_nix_stats_queue qstats; + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_get (nix, crq->rq.qid, 1, &qstats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed"); + + foreach_vnet_dev_counter (c, rxq->counter_main) + { + switch (c->user_data) + { + case OCT_RXQ_CTR_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.rx_octs); + break; + case OCT_RXQ_CTR_PKTS: + vnet_dev_counter_value_update (vm, c, qstats.rx_pkts); + break; + case OCT_RXQ_CTR_DROPS: + vnet_dev_counter_value_update (vm, c, qstats.rx_drop_pkts); + break; + case OCT_RXQ_CTR_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.rx_drop_octs); + break; + case OCT_RXQ_CTR_ERR: + vnet_dev_counter_value_update (vm, c, qstats.rx_error_pkts); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_txq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + struct roc_nix_stats_queue qstats; + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_get (nix, ctq->sq.qid, 0, &qstats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed"); + + foreach_vnet_dev_counter (c, txq->counter_main) + { + switch (c->user_data) + { + case OCT_TXQ_CTR_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.tx_octs); + break; + case OCT_TXQ_CTR_PKTS: + vnet_dev_counter_value_update (vm, c, qstats.tx_pkts); + break; + case OCT_TXQ_CTR_DROPS: + vnet_dev_counter_value_update (vm, c, qstats.tx_drop_pkts); + break; + case OCT_TXQ_CTR_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.tx_drop_octs); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +void +oct_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_reset (nix))) + oct_roc_err (dev, rrv, "roc_nix_stats_reset() failed"); +} + +void +oct_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_t *dev = rxq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_reset (nix, crq->rq.qid, 1))) + oct_roc_err (dev, rrv, + "roc_nix_stats_queue_reset() failed for rx queue %u", + rxq->queue_id); +} + +void +oct_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_reset (nix, ctq->sq.qid, 0))) + oct_roc_err (dev, rrv, + "roc_nix_stats_queue_reset() failed for tx queue %u", + txq->queue_id); +} diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c new file mode 100644 index 0000000000..6bc36bea0b --- /dev/null +++ b/src/plugins/dev_octeon/crypto.c @@ -0,0 +1,2007 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include +#include + +oct_crypto_main_t oct_crypto_main; + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "init", +}; + +static_always_inline void +oct_map_keyindex_to_session (oct_crypto_sess_t *sess, u32 key_index, u8 type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey; + + ckey = vec_elt_at_index (ocm->keys[type], key_index); + + ckey->sess = sess; + sess->key_index = key_index; +} + +static_always_inline oct_crypto_sess_t * +oct_crypto_session_alloc (vlib_main_t *vm, u8 type) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_sess_t *addr = NULL; + oct_crypto_main_t *ocm; + oct_crypto_dev_t *ocd; + u32 size; + + ocm = &oct_crypto_main; + ocd = ocm->crypto_dev[type]; + + size = sizeof (oct_crypto_sess_t); + + addr = oct_plt_init_param.oct_plt_zmalloc (size, ROC_CPTR_CACHE_LINE_SZ); + if (addr == NULL) + { + log_err (ocd->dev, "Failed to allocate crypto session memory"); + return NULL; + } + + return addr; +} + +static_always_inline i32 +oct_crypto_session_create (vlib_main_t *vm, vnet_crypto_key_index_t key_index, + int op_type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_sess_t *session; + vnet_crypto_key_t *key; + oct_crypto_key_t *ckey; + oct_crypto_dev_t *ocd; + + ocd = ocm->crypto_dev[op_type]; + + key = vnet_crypto_get_key (key_index); + + if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + { + /* + * Read crypto or integ key session. And map link key index to same. + */ + if (key->index_crypto != UINT32_MAX) + { + ckey = vec_elt_at_index (ocm->keys[op_type], key->index_crypto); + session = ckey->sess; + } + else if (key->index_integ != UINT32_MAX) + { + ckey = vec_elt_at_index (ocm->keys[op_type], key->index_integ); + session = ckey->sess; + } + else + return -1; + } + else + { + session = oct_crypto_session_alloc (vm, op_type); + if (session == NULL) + return -1; + session->crypto_dev = ocd; + } + + oct_map_keyindex_to_session (session, key_index, op_type); + return 0; +} + +void +oct_crypto_key_del_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey_linked; + oct_crypto_key_t *ckey; + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + if (ckey->sess) + { + /* + * If in case link algo is pointing to same sesison, reset the pointer. + */ + if (ckey->sess->key_index != key_index) + { + ckey_linked = vec_elt_at_index ( + ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], ckey->sess->key_index); + ckey_linked->sess = NULL; + } + + /* Trigger CTX flush + invalidate to remove from CTX_CACHE */ + if (oct_hw_ctx_cache_enable ()) + roc_cpt_lf_ctx_flush (&ckey->sess->crypto_dev->lf, + &ckey->sess->cpt_ctx.se_ctx, true); + + oct_plt_init_param.oct_plt_free (ckey->sess); + ckey->sess = NULL; + } + + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + if (ckey->sess) + { + /* + * If in case link algo is pointing to same sesison, reset the pointer. + */ + if (ckey->sess->key_index != key_index) + { + ckey_linked = vec_elt_at_index ( + ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], ckey->sess->key_index); + ckey_linked->sess = NULL; + } + + /* Trigger CTX flush + invalidate to remove from CTX_CACHE */ + if (oct_hw_ctx_cache_enable ()) + roc_cpt_lf_ctx_flush (&ckey->sess->crypto_dev->lf, + &ckey->sess->cpt_ctx.se_ctx, true); + + oct_plt_init_param.oct_plt_free (ckey->sess); + ckey->sess = NULL; + } +} + +void +oct_crypto_key_add_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey; + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + if (ckey->sess == NULL) + { + if (oct_crypto_session_create (vm, key_index, + VNET_CRYPTO_OP_TYPE_ENCRYPT)) + { + clib_warning ("Unable to create crypto session"); + return; + } + } + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + if (ckey->sess == NULL) + { + if (oct_crypto_session_create (vm, key_index, + VNET_CRYPTO_OP_TYPE_DECRYPT)) + { + clib_warning ("Unable to create crypto session"); + return; + } + } +} + +void +oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + + if (kop == VNET_CRYPTO_KEY_OP_DEL) + { + oct_crypto_key_del_handler (vm, idx); + return; + } + oct_crypto_key_add_handler (vm, idx); + + ocm->started = 1; +} + +static_always_inline void +oct_crypto_session_free (vlib_main_t *vm, oct_crypto_sess_t *sess) +{ + extern oct_plt_init_param_t oct_plt_init_param; + + oct_plt_init_param.oct_plt_free (sess); + return; +} + +#ifdef PLATFORM_OCTEON9 +static inline void +oct_cpt_inst_submit (struct cpt_inst_s *inst, uint64_t lmtline, + uint64_t io_addr) +{ + uint64_t lmt_status; + + do + { + /* Copy CPT command to LMTLINE */ + roc_lmt_mov64 ((void *) lmtline, inst); + + /* + * Make sure compiler does not reorder memcpy and ldeor. + * LMTST transactions are always flushed from the write + * buffer immediately, a DMB is not required to push out + * LMTSTs. + */ + asm volatile("dmb oshst" : : : "memory"); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#endif + +static_always_inline void +oct_crypto_burst_submit (oct_crypto_dev_t *crypto_dev, struct cpt_inst_s *inst, + u32 n_left) +{ + u64 lmt_base; + u64 io_addr; + u32 count; + +#ifdef PLATFORM_OCTEON9 + lmt_base = crypto_dev->lf.lmt_base; + io_addr = crypto_dev->lf.io_addr; + + for (count = 0; count < n_left; count++) + oct_cpt_inst_submit (inst + count, lmt_base, io_addr); +#else + u64 *lmt_line[OCT_MAX_LMT_SZ]; + u64 lmt_arg, core_lmt_id; + + lmt_base = crypto_dev->lmtline.lmt_base; + io_addr = crypto_dev->lmtline.io_addr; + + ROC_LMT_CPT_BASE_ID_GET (lmt_base, core_lmt_id); + + for (count = 0; count < 16; count++) + { + lmt_line[count] = OCT_CPT_LMT_GET_LINE_ADDR (lmt_base, count); + } + + while (n_left > OCT_MAX_LMT_SZ) + { + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id; + + for (count = 0; count < 16; count++) + { + roc_lmt_mov_seg ((void *) lmt_line[count], inst + count, + CPT_LMT_SIZE_COPY); + } + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (OCT_MAX_LMT_SZ - 1) << 12; + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + inst += OCT_MAX_LMT_SZ; + n_left -= OCT_MAX_LMT_SZ; + } + + if (n_left > 0) + { + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id; + + for (count = 0; count < n_left; count++) + { + roc_lmt_mov_seg ((void *) lmt_line[count], inst + count, + CPT_LMT_SIZE_COPY); + } + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (n_left - 1) << 12; + + roc_lmt_submit_steorl (lmt_arg, io_addr); + } +#endif +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp_from_iov (struct roc_sglist_comp *list, uint32_t i, + struct roc_se_iov_ptr *from, + uint32_t from_offset, uint32_t *psize, + struct roc_se_buf_ptr *extra_buf, + uint32_t extra_offset) +{ + uint32_t extra_len = extra_buf ? extra_buf->size : 0; + uint32_t size = *psize; + int32_t j; + + for (j = 0; j < from->buf_cnt; j++) + { + struct roc_sglist_comp *to = &list[i >> 2]; + uint32_t buf_sz = from->bufs[j].size; + void *vaddr = from->bufs[j].vaddr; + uint64_t e_vaddr; + uint32_t e_len; + + if (PREDICT_FALSE (from_offset)) + { + if (from_offset >= buf_sz) + { + from_offset -= buf_sz; + continue; + } + e_vaddr = (uint64_t) vaddr + from_offset; + e_len = clib_min ((buf_sz - from_offset), size); + from_offset = 0; + } + else + { + e_vaddr = (uint64_t) vaddr; + e_len = clib_min (buf_sz, size); + } + + to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len); + to->ptr[i % 4] = clib_host_to_net_u64 (e_vaddr); + + if (extra_len && (e_len >= extra_offset)) + { + /* Break the data at given offset */ + uint32_t next_len = e_len - extra_offset; + uint64_t next_vaddr = e_vaddr + extra_offset; + + if (!extra_offset) + { + i--; + } + else + { + e_len = extra_offset; + size -= e_len; + to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len); + } + + extra_len = clib_min (extra_len, size); + /* Insert extra data ptr */ + if (extra_len) + { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = clib_host_to_net_u16 (extra_len); + to->ptr[i % 4] = + clib_host_to_net_u64 ((uint64_t) extra_buf->vaddr); + size -= extra_len; + } + + next_len = clib_min (next_len, size); + /* insert the rest of the data */ + if (next_len) + { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = clib_host_to_net_u16 (next_len); + to->ptr[i % 4] = clib_host_to_net_u64 (next_vaddr); + size -= next_len; + } + extra_len = 0; + } + else + { + size -= e_len; + } + if (extra_offset) + extra_offset -= size; + i++; + + if (PREDICT_FALSE (!size)) + break; + } + + *psize = size; + return (uint32_t) i; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp_from_iov (struct roc_sg2list_comp *list, u32 i, + struct roc_se_iov_ptr *from, + u32 from_offset, u32 *psize, + struct roc_se_buf_ptr *extra_buf, + u32 extra_offset) +{ + u32 extra_len = extra_buf ? extra_buf->size : 0; + u32 size = *psize, buf_sz, e_len, next_len; + struct roc_sg2list_comp *to; + u64 e_vaddr, next_vaddr; + void *vaddr; + i32 j; + + for (j = 0; j < from->buf_cnt; j++) + { + to = &list[i / 3]; + buf_sz = from->bufs[j].size; + vaddr = from->bufs[j].vaddr; + + if (PREDICT_FALSE (from_offset)) + { + if (from_offset >= buf_sz) + { + from_offset -= buf_sz; + continue; + } + e_vaddr = (u64) vaddr + from_offset; + e_len = clib_min ((buf_sz - from_offset), size); + from_offset = 0; + } + else + { + e_vaddr = (u64) vaddr; + e_len = clib_min (buf_sz, size); + } + + to->u.s.len[i % 3] = (e_len); + to->ptr[i % 3] = (e_vaddr); + to->u.s.valid_segs = (i % 3) + 1; + + if (extra_len && (e_len >= extra_offset)) + { + /* Break the data at given offset */ + next_len = e_len - extra_offset; + next_vaddr = e_vaddr + extra_offset; + + if (!extra_offset) + i--; + else + { + e_len = extra_offset; + size -= e_len; + to->u.s.len[i % 3] = (e_len); + } + + extra_len = clib_min (extra_len, size); + /* Insert extra data ptr */ + if (extra_len) + { + i++; + to = &list[i / 3]; + to->u.s.len[i % 3] = (extra_len); + to->ptr[i % 3] = ((u64) extra_buf->vaddr); + to->u.s.valid_segs = (i % 3) + 1; + size -= extra_len; + } + + next_len = clib_min (next_len, size); + /* insert the rest of the data */ + if (next_len) + { + i++; + to = &list[i / 3]; + to->u.s.len[i % 3] = (next_len); + to->ptr[i % 3] = (next_vaddr); + to->u.s.valid_segs = (i % 3) + 1; + size -= next_len; + } + extra_len = 0; + } + else + size -= e_len; + + if (extra_offset) + extra_offset -= size; + + i++; + + if (PREDICT_FALSE (!size)) + break; + } + + *psize = size; + return (u32) i; +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp_from_buf (struct roc_sglist_comp *list, uint32_t i, + struct roc_se_buf_ptr *from) +{ + struct roc_sglist_comp *to = &list[i >> 2]; + + to->u.s.len[i % 4] = clib_host_to_net_u16 (from->size); + to->ptr[i % 4] = clib_host_to_net_u64 ((uint64_t) from->vaddr); + return ++i; +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp (struct roc_sglist_comp *list, uint32_t i, + uint64_t dma_addr, uint32_t size) +{ + struct roc_sglist_comp *to = &list[i >> 2]; + + to->u.s.len[i % 4] = clib_host_to_net_u16 (size); + to->ptr[i % 4] = clib_host_to_net_u64 (dma_addr); + return ++i; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp (struct roc_sg2list_comp *list, u32 index, + u64 dma_addr, u32 size) +{ + struct roc_sg2list_comp *to = &list[index / 3]; + + to->u.s.len[index % 3] = (size); + to->ptr[index % 3] = (dma_addr); + to->u.s.valid_segs = (index % 3) + 1; + return ++index; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp_from_buf (struct roc_sg2list_comp *list, u32 index, + struct roc_se_buf_ptr *from) +{ + struct roc_sg2list_comp *to = &list[index / 3]; + + to->u.s.len[index % 3] = (from->size); + to->ptr[index % 3] = ((u64) from->vaddr); + to->u.s.valid_segs = (index % 3) + 1; + return ++index; +} + +static_always_inline int __attribute__ ((unused)) +oct_crypto_sg_inst_prep (struct roc_se_fc_params *params, + struct cpt_inst_s *inst, uint64_t offset_ctrl, + const uint8_t *iv_s, int iv_len, uint8_t pack_iv, + uint8_t pdcp_alg_type, int32_t inputlen, + int32_t outputlen, uint32_t passthrough_len, + uint32_t req_flags, int pdcp_flag, int decrypt) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_dev_t *ocd = ocm->crypto_dev[decrypt]; + struct roc_sglist_comp *gather_comp, *scatter_comp; + void *m_vaddr = params->meta_buf.vaddr; + struct roc_se_buf_ptr *aad_buf = NULL; + uint32_t mac_len = 0, aad_len = 0; + struct roc_se_ctx *se_ctx; + uint32_t i, g_size_bytes; + uint64_t *offset_vaddr; + uint32_t s_size_bytes; + uint8_t *in_buffer; + uint32_t size; + uint8_t *iv_d; + int ret = 0; + + se_ctx = params->ctx; + mac_len = se_ctx->mac_len; + + if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + aad_len = params->aad_buf.size; + aad_buf = ¶ms->aad_buf; + } + + /* save space for iv */ + offset_vaddr = m_vaddr; + + m_vaddr = + (uint8_t *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8); + + inst->w4.s.opcode_major |= (uint64_t) ROC_DMA_MODE_SG; + + /* iv offset is 0 */ + *offset_vaddr = offset_ctrl; + + iv_d = ((uint8_t *) offset_vaddr + ROC_SE_OFF_CTRL_LEN); + + if (PREDICT_TRUE (iv_len)) + memcpy (iv_d, iv_s, iv_len); + + /* DPTR has SG list */ + + /* TODO Add error check if space will be sufficient */ + gather_comp = (struct roc_sglist_comp *) ((uint8_t *) m_vaddr + 8); + + /* + * Input Gather List + */ + i = 0; + + /* Offset control word followed by iv */ + + i = oct_crypto_fill_sg_comp (gather_comp, i, (uint64_t) offset_vaddr, + ROC_SE_OFF_CTRL_LEN + iv_len); + + /* Add input data */ + if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = inputlen - iv_len - mac_len; + + if (PREDICT_TRUE (size)) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + i = oct_crypto_fill_sg_comp_from_iov ( + gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, "Insufficient buffer space, size %d needed", + size); + return -1; + } + } + + if (mac_len) + i = + oct_crypto_fill_sg_comp_from_buf (gather_comp, i, ¶ms->mac_buf); + } + else + { + /* input data */ + size = inputlen - iv_len; + if (size) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + i = oct_crypto_fill_sg_comp_from_iov ( + gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, "Insufficient buffer space, size %d needed", + size); + return -1; + } + } + } + + in_buffer = m_vaddr; + ((uint16_t *) in_buffer)[0] = 0; + ((uint16_t *) in_buffer)[1] = 0; + ((uint16_t *) in_buffer)[2] = clib_host_to_net_u16 (i); + + g_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp); + /* + * Output Scatter List + */ + + i = 0; + scatter_comp = + (struct roc_sglist_comp *) ((uint8_t *) gather_comp + g_size_bytes); + + i = oct_crypto_fill_sg_comp ( + scatter_comp, i, (uint64_t) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len); + + /* Add output data */ + if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = outputlen - iv_len - mac_len; + if (size) + { + + uint32_t aad_offset = aad_len ? passthrough_len : 0; + + i = oct_crypto_fill_sg_comp_from_iov ( + scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, "Insufficient buffer space, size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + i = + oct_crypto_fill_sg_comp_from_buf (scatter_comp, i, ¶ms->mac_buf); + } + else + { + /* Output including mac */ + size = outputlen - iv_len; + + if (size) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + + i = oct_crypto_fill_sg_comp_from_iov ( + scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, "Insufficient buffer space, size %d needed", + size); + return -1; + } + } + } + ((uint16_t *) in_buffer)[3] = clib_host_to_net_u16 (i); + s_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp); + + size = g_size_bytes + s_size_bytes + ROC_SG_LIST_HDR_SIZE; + + /* This is DPTR len in case of SG mode */ + inst->w4.s.dlen = size; + + if (PREDICT_FALSE (size > ROC_SG_MAX_DLEN_SIZE)) + { + log_err (ocd->dev, "Exceeds max supported components. Reduce segments"); + ret = -1; + } + + inst->dptr = (uint64_t) in_buffer; + return ret; +} + +static_always_inline int __attribute__ ((unused)) +oct_crypto_sg2_inst_prep (struct roc_se_fc_params *params, + struct cpt_inst_s *inst, u64 offset_ctrl, + const u8 *iv_s, int iv_len, u8 pack_iv, + u8 pdcp_alg_type, i32 inputlen, i32 outputlen, + u32 passthrough_len, u32 req_flags, int pdcp_flag, + int decrypt) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_dev_t *ocd = ocm->crypto_dev[decrypt]; + u32 mac_len = 0, aad_len = 0, size, index, g_size_bytes; + struct roc_sg2list_comp *gather_comp, *scatter_comp; + void *m_vaddr = params->meta_buf.vaddr; + struct roc_se_buf_ptr *aad_buf = NULL; + union cpt_inst_w5 cpt_inst_w5; + union cpt_inst_w6 cpt_inst_w6; + u16 scatter_sz, gather_sz; + struct roc_se_ctx *se_ctx; + u64 *offset_vaddr; + int ret = 0; + u8 *iv_d; + + se_ctx = params->ctx; + mac_len = se_ctx->mac_len; + + if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + aad_len = params->aad_buf.size; + aad_buf = ¶ms->aad_buf; + } + + /* save space for iv */ + offset_vaddr = m_vaddr; + + m_vaddr = (u8 *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8); + + inst->w4.s.opcode_major |= (u64) ROC_DMA_MODE_SG; + + /* This is DPTR len in case of SG mode */ + inst->w4.s.dlen = inputlen + ROC_SE_OFF_CTRL_LEN; + + /* iv offset is 0 */ + *offset_vaddr = offset_ctrl; + iv_d = ((u8 *) offset_vaddr + ROC_SE_OFF_CTRL_LEN); + + if (PREDICT_TRUE (iv_len)) + clib_memcpy (iv_d, iv_s, iv_len); + + /* DPTR has SG list */ + + gather_comp = (struct roc_sg2list_comp *) ((u8 *) m_vaddr); + + /* + * Input Gather List + */ + index = 0; + + /* Offset control word followed by iv */ + + index = oct_crypto_fill_sg2_comp (gather_comp, index, (u64) offset_vaddr, + ROC_SE_OFF_CTRL_LEN + iv_len); + + /* Add input data */ + if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = inputlen - iv_len - mac_len; + if (size) + { + /* input data only */ + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index, + params->src_iov, 0, &size, + aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, + "Insufficient buffer" + " space, size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + index = oct_crypto_fill_sg2_comp_from_buf (gather_comp, index, + ¶ms->mac_buf); + } + else + { + /* input data */ + size = inputlen - iv_len; + if (size) + { + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index, + params->src_iov, 0, &size, + aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, + "Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + gather_sz = (index + 2) / 3; + g_size_bytes = gather_sz * sizeof (struct roc_sg2list_comp); + + /* + * Output Scatter List + */ + + index = 0; + scatter_comp = + (struct roc_sg2list_comp *) ((u8 *) gather_comp + g_size_bytes); + + index = oct_crypto_fill_sg2_comp ( + scatter_comp, index, (u64) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len); + + /* Add output data */ + if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = outputlen - iv_len - mac_len; + if (size) + { + + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index, + params->dst_iov, 0, &size, + aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, + "Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + index = oct_crypto_fill_sg2_comp_from_buf (scatter_comp, index, + ¶ms->mac_buf); + } + else + { + /* Output including mac */ + size = outputlen - iv_len; + if (size) + { + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index, + params->dst_iov, 0, &size, + aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + log_err (ocd->dev, + "Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + scatter_sz = (index + 2) / 3; + + cpt_inst_w5.s.gather_sz = gather_sz; + cpt_inst_w6.s.scatter_sz = scatter_sz; + + cpt_inst_w5.s.dptr = (u64) gather_comp; + cpt_inst_w6.s.rptr = (u64) scatter_comp; + + inst->w5.u64 = cpt_inst_w5.u64; + inst->w6.u64 = cpt_inst_w6.u64; + + if (PREDICT_FALSE ((scatter_sz >> 4) || (gather_sz >> 4))) + { + log_err (ocd->dev, "Exceeds max supported components. Reduce segments"); + ret = -1; + } + + return ret; +} + +static_always_inline int +oct_crypto_cpt_hmac_prep (u32 flags, u64 d_offs, u64 d_lens, + struct roc_se_fc_params *fc_params, + struct cpt_inst_s *inst, u8 is_decrypt) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_dev_t *ocd = ocm->crypto_dev[is_decrypt]; + u32 encr_data_len, auth_data_len, aad_len = 0; + i32 inputlen, outputlen, enc_dlen, auth_dlen; + u32 encr_offset, auth_offset, iv_offset = 0; + union cpt_inst_w4 cpt_inst_w4; + struct roc_se_ctx *se_ctx; + u32 passthrough_len = 0; + const u8 *src = NULL; + u32 cipher_type; + u64 offset_ctrl; + u8 iv_len = 16; + u8 op_minor; + u32 mac_len; + int ret; + + encr_offset = ROC_SE_ENCR_OFFSET (d_offs); + auth_offset = ROC_SE_AUTH_OFFSET (d_offs); + encr_data_len = ROC_SE_ENCR_DLEN (d_lens); + auth_data_len = ROC_SE_AUTH_DLEN (d_lens); + + if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + auth_data_len = 0; + auth_offset = 0; + aad_len = fc_params->aad_buf.size; + } + + se_ctx = fc_params->ctx; + cipher_type = se_ctx->enc_cipher; + mac_len = se_ctx->mac_len; + cpt_inst_w4.u64 = se_ctx->template_w4.u64; + op_minor = cpt_inst_w4.s.opcode_minor; + + if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF)) + { + /* + * When AAD is given, data above encr_offset is pass through + * Since AAD is given as separate pointer and not as offset, + * this is a special case as we need to fragment input data + * into passthrough + encr_data and then insert AAD in between. + */ + passthrough_len = encr_offset; + auth_offset = passthrough_len + iv_len; + encr_offset = passthrough_len + aad_len + iv_len; + auth_data_len = aad_len + encr_data_len; + } + else + { + encr_offset += iv_len; + auth_offset += iv_len; + } + + auth_dlen = auth_offset + auth_data_len; + enc_dlen = encr_data_len + encr_offset; + + cpt_inst_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (is_decrypt) + { + cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + + if (auth_dlen > enc_dlen) + { + inputlen = auth_dlen + mac_len; + outputlen = auth_dlen; + } + else + { + inputlen = enc_dlen + mac_len; + outputlen = enc_dlen; + } + } + else + { + cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + + /* Round up to 16 bytes alignment */ + if (PREDICT_FALSE (encr_data_len & 0xf)) + { + if (PREDICT_TRUE (cipher_type == ROC_SE_AES_CBC) || + (cipher_type == ROC_SE_DES3_CBC)) + enc_dlen = PLT_ALIGN_CEIL (encr_data_len, 8) + encr_offset; + } + + /* + * auth_dlen is larger than enc_dlen in Authentication cases + * like AES GMAC Authentication + */ + if (PREDICT_FALSE (auth_dlen > enc_dlen)) + { + inputlen = auth_dlen; + outputlen = auth_dlen + mac_len; + } + else + { + inputlen = enc_dlen; + outputlen = enc_dlen + mac_len; + } + } + + if (op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST) + outputlen = enc_dlen; + + cpt_inst_w4.s.param1 = encr_data_len; + cpt_inst_w4.s.param2 = auth_data_len; + + if (PREDICT_FALSE ((encr_offset >> 16) || (iv_offset >> 8) || + (auth_offset >> 8))) + { + log_err (ocd->dev, "Offset not supported"); + log_err (ocd->dev, "enc_offset: %d, iv_offset : %d, auth_offset: %d", + encr_offset, iv_offset, auth_offset); + return -1; + } + + offset_ctrl = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + src = fc_params->iv_buf; + + inst->w4.u64 = cpt_inst_w4.u64; + +#ifdef PLATFORM_OCTEON9 + ret = oct_crypto_sg_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0, + 0, inputlen, outputlen, passthrough_len, + flags, 0, is_decrypt); +#else + ret = oct_crypto_sg2_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0, + 0, inputlen, outputlen, passthrough_len, + flags, 0, is_decrypt); +#endif + + if (PREDICT_FALSE (ret)) + { + log_err (ocd->dev, "sg prep failed"); + return -1; + } + + return 0; +} + +static_always_inline void +oct_crypto_scatter_gather_mode ( + oct_crypto_sess_t *sess, struct cpt_inst_s *inst, const bool is_aead, + u8 aad_length, u8 *payload, vnet_crypto_async_frame_elt_t *elts, void *mdata, + u32 cipher_data_length, u32 cipher_data_offset, u32 auth_data_length, + u32 auth_data_offset, vlib_buffer_t *b, u16 adj_len) +{ + struct roc_se_fc_params fc_params = { 0 }; + struct roc_se_ctx *ctx = &sess->cpt_ctx; + u64 d_offs = 0, d_lens = 0; + vlib_buffer_t *buffer = b; + u32 flags = 0, index = 0; + u8 op_minor = 0, cpt_op; + char src[SRC_IOV_SIZE]; + u32 *iv_buf; + + cpt_op = sess->cpt_op; + + if (is_aead) + { + flags |= ROC_SE_VALID_IV_BUF; + iv_buf = (u32 *) elts->iv; + iv_buf[3] = clib_host_to_net_u32 (0x1); + fc_params.iv_buf = elts->iv; + + d_offs = cipher_data_offset; + d_offs = d_offs << 16; + + d_lens = cipher_data_length; + d_lens = d_lens << 32; + + fc_params.aad_buf.vaddr = elts->aad; + fc_params.aad_buf.size = aad_length; + flags |= ROC_SE_VALID_AAD_BUF; + + if (sess->cpt_ctx.mac_len) + { + flags |= ROC_SE_VALID_MAC_BUF; + fc_params.mac_buf.size = sess->cpt_ctx.mac_len; + fc_params.mac_buf.vaddr = elts->tag; + } + } + else + { + op_minor = ctx->template_w4.s.opcode_minor; + + flags |= ROC_SE_VALID_IV_BUF; + + fc_params.iv_buf = elts->iv; + + d_offs = cipher_data_offset; + d_offs = (d_offs << 16) | auth_data_offset; + + d_lens = cipher_data_length; + d_lens = (d_lens << 32) | auth_data_length; + + if (PREDICT_TRUE (sess->cpt_ctx.mac_len)) + { + if (!(op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST)) + { + flags |= ROC_SE_VALID_MAC_BUF; + fc_params.mac_buf.size = sess->cpt_ctx.mac_len; + fc_params.mac_buf.vaddr = elts->digest; + } + } + } + + fc_params.ctx = &sess->cpt_ctx; + + fc_params.src_iov = (void *) src; + + fc_params.src_iov->bufs[index].vaddr = payload; + fc_params.src_iov->bufs[index].size = b->current_length - adj_len; + index++; + + while (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) + { + buffer = vlib_get_buffer (vlib_get_main (), buffer->next_buffer); + fc_params.src_iov->bufs[index].vaddr = + buffer->data + buffer->current_data; + fc_params.src_iov->bufs[index].size = buffer->current_length; + index++; + } + + fc_params.src_iov->buf_cnt = index; + + fc_params.dst_iov = (void *) src; + + fc_params.meta_buf.vaddr = mdata; + fc_params.meta_buf.size = OCT_SCATTER_GATHER_BUFFER_SIZE; + + oct_crypto_cpt_hmac_prep (flags, d_offs, d_lens, &fc_params, inst, cpt_op); +} + +static_always_inline u64 +oct_cpt_inst_w7_get (oct_crypto_sess_t *sess, struct roc_cpt *roc_cpt) +{ + union cpt_inst_w7 inst_w7; + + inst_w7.u64 = 0; + inst_w7.s.cptr = (u64) &sess->cpt_ctx.se_ctx.fctx; + + if (oct_hw_ctx_cache_enable ()) + inst_w7.s.ctx_val = 1; + + /* Set the engine group */ + if (roc_model_is_cn20k ()) + inst_w7.s.egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_SE]; + else + inst_w7.s.egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE]; + + return inst_w7.u64; +} + +static_always_inline i32 +oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, + u32 key_index, u8 type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_dev_t *ocd = ocm->crypto_dev[type]; + vnet_crypto_key_t *crypto_key, *auth_key; + roc_se_cipher_type enc_type = 0; + roc_se_auth_type auth_type = 0; + vnet_crypto_key_t *key; + u32 digest_len = ~0; + i32 rv = 0; + + key = vnet_crypto_get_key (key_index); + + switch (key->async_alg) + { + case VNET_CRYPTO_ALG_AES_128_CBC_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA1_TAG12: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA224_TAG14: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA224_TAG14: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA224_TAG14: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA224; + digest_len = 14; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA256_TAG16: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA384_TAG24: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA512_TAG32: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_MD5_TAG12: + case VNET_CRYPTO_ALG_AES_192_CBC_MD5_TAG12: + case VNET_CRYPTO_ALG_AES_256_CBC_MD5_TAG12: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_MD5_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA1_TAG12: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA256_TAG16: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA384_TAG24: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA512_TAG32: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + case VNET_CRYPTO_ALG_3DES_CBC_MD5_TAG12: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_MD5_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA1_TAG12: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA224_TAG14: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA224; + digest_len = 14; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA256_TAG16: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA384_TAG24: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA512_TAG32: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + default: + log_err (ocd->dev, + "Crypto: Undefined link algo %u specified. Key index %u", + key->async_alg, key_index); + return -1; + } + + if (type == VNET_CRYPTO_OP_TYPE_ENCRYPT) + sess->cpt_ctx.ciph_then_auth = true; + else + sess->cpt_ctx.auth_then_ciph = true; + + sess->iv_length = 16; + sess->cpt_op = type; + + crypto_key = vnet_crypto_get_key (key->index_crypto); + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, crypto_key->data, + vec_len (crypto_key->data)); + if (rv) + { + log_err (ocd->dev, "Error in setting cipher key for enc type %u", + enc_type); + return -1; + } + + auth_key = vnet_crypto_get_key (key->index_integ); + + rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, auth_key->data, + vec_len (auth_key->data), digest_len); + if (rv) + { + log_err (ocd->dev, "Error in setting auth key for auth type %u", + auth_type); + return -1; + } + + sess->cpt_ctx.template_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + else + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + + return 0; +} + +static_always_inline i32 +oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, + u32 key_index, u8 type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_dev_t *ocd = ocm->crypto_dev[type]; + vnet_crypto_key_t *key = vnet_crypto_get_key (key_index); + roc_se_cipher_type enc_type = 0; + roc_se_auth_type auth_type = 0; + u32 digest_len = 16; + i32 rv = 0; + + switch (key->async_alg) + { + case VNET_CRYPTO_ALG_AES_128_GCM: + case VNET_CRYPTO_ALG_AES_192_GCM: + case VNET_CRYPTO_ALG_AES_256_GCM: + enc_type = ROC_SE_AES_GCM; + sess->aes_gcm = 1; + sess->iv_offset = 0; + sess->iv_length = 16; + break; + case VNET_CRYPTO_ALG_CHACHA20_POLY1305: + enc_type = ROC_SE_CHACHA20; + auth_type = ROC_SE_POLY1305; + break; + default: + log_err (ocd->dev, + "Crypto: Undefined cipher algo %u specified. Key index %u", + key->async_alg, key_index); + return -1; + } + + sess->cpt_ctx.mac_len = digest_len; + sess->cpt_op = type; + + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, + vec_len (key->data)); + if (rv) + { + log_err (ocd->dev, "Error in setting cipher key for enc type %u", + enc_type); + return -1; + } + + rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, NULL, 0, digest_len); + if (rv) + { + log_err (ocd->dev, "Error in setting auth key for auth type %u", + auth_type); + return -1; + } + + sess->cpt_ctx.template_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + else + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + + if (enc_type == ROC_SE_CHACHA20) + sess->cpt_ctx.template_w4.s.opcode_minor |= BIT (5); + + return 0; +} + +static_always_inline i32 +oct_crypto_session_init (vlib_main_t *vm, oct_crypto_sess_t *session, + vnet_crypto_key_index_t key_index, int op_type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + vnet_crypto_key_t *key; + oct_crypto_dev_t *ocd; + i32 rv = 0; + + ocd = ocm->crypto_dev[op_type]; + + key = vnet_crypto_get_key (key_index); + + if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + rv = oct_crypto_link_session_update (vm, session, key_index, op_type); + else + rv = oct_crypto_aead_session_update (vm, session, key_index, op_type); + + if (rv) + { + oct_crypto_session_free (vm, session); + return -1; + } + + session->crypto_dev = ocd; + + session->cpt_inst_w7 = + oct_cpt_inst_w7_get (session, session->crypto_dev->roc_cpt); + + if (oct_hw_ctx_cache_enable ()) + roc_se_ctx_init (&session->cpt_ctx); + + session->initialised = 1; + + return 0; +} + +static_always_inline void +oct_crypto_update_frame_error_status (vnet_crypto_async_frame_t *f, + vnet_crypto_op_status_t s) +{ + u32 i; + + for (i = 0; i < f->n_elts; i++) + f->elts[i].status = s; + + f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED; +} + +static_always_inline void +oct_crypto_direct_mode_linked (vlib_buffer_t *buffer, struct cpt_inst_s *inst, + oct_crypto_sess_t *sess, + oct_crypto_inflight_req_t *infl_req, u8 aad_len) +{ + u32 encr_offset, auth_offset, iv_offset; + vnet_crypto_async_frame_elt_t *elts; + union cpt_inst_w4 cpt_inst_w4; + u64 *offset_control_word; + u32 crypto_total_length; + u32 auth_dlen, enc_dlen; + u32 enc_auth_len; + + elts = infl_req->fe; + enc_auth_len = elts->crypto_total_length + elts->integ_length_adj; + crypto_total_length = elts->crypto_total_length; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + { + /* + * Position the offset control word so that it does not + * overlap with the IV. + */ + offset_control_word = (void *) (buffer->data) - ROC_SE_OFF_CTRL_LEN - 4; + + iv_offset = + (void *) elts->iv - (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + } + else + { + offset_control_word = (void *) (elts->iv) - ROC_SE_OFF_CTRL_LEN; + iv_offset = 0; + } + + encr_offset = (void *) (buffer->data + elts->crypto_start_offset) - + (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + auth_offset = (void *) (buffer->data + elts->integ_start_offset) - + (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + *offset_control_word = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + cpt_inst_w4.u64 = sess->cpt_ctx.template_w4.u64; + + cpt_inst_w4.s.param1 = crypto_total_length; + cpt_inst_w4.s.param2 = enc_auth_len; + + auth_dlen = auth_offset + enc_auth_len + ROC_SE_OFF_CTRL_LEN; + enc_dlen = encr_offset + crypto_total_length + ROC_SE_OFF_CTRL_LEN; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + cpt_inst_w4.s.dlen = auth_dlen + sess->cpt_ctx.mac_len; + else + { + /* + * In the case of ESN, 4 bytes of the seqhi will be stored at the end of + * the cipher. This data must be overwritten by the digest data during + * the dequeue process. + */ + if (auth_dlen > enc_dlen) + infl_req->esn_enabled = true; + + cpt_inst_w4.s.dlen = auth_dlen; + } + + infl_req->mac_len = sess->cpt_ctx.mac_len; + + inst->dptr = (uint64_t) offset_control_word; + inst->rptr = (uint64_t) ((void *) offset_control_word + ROC_SE_OFF_CTRL_LEN); + inst->w4.u64 = cpt_inst_w4.u64; +} + +static_always_inline void +oct_crypto_direct_mode_aead (vlib_buffer_t *buffer, struct cpt_inst_s *inst, + oct_crypto_sess_t *sess, + oct_crypto_inflight_req_t *infl_req, u8 aad_len) +{ + u32 encr_offset, auth_offset, iv_offset; + u32 auth_copy_offset, iv_copy_offset; + vnet_crypto_async_frame_elt_t *elts; + union cpt_inst_w4 cpt_inst_w4; + u64 *offset_control_word; + u32 crypto_total_length; + + elts = infl_req->fe; + crypto_total_length = elts->crypto_total_length; + + ((u32 *) elts->iv)[3] = clib_host_to_net_u32 (0x1); + + offset_control_word = (void *) (elts->aad) - ROC_SE_OFF_CTRL_LEN; + encr_offset = (void *) (buffer->data + elts->crypto_start_offset) - + (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + iv_offset = elts->iv - elts->aad; + auth_offset = encr_offset - aad_len; + + *offset_control_word = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + cpt_inst_w4.u64 = sess->cpt_ctx.template_w4.u64; + + cpt_inst_w4.s.param1 = crypto_total_length; + cpt_inst_w4.s.param2 = crypto_total_length + aad_len; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + cpt_inst_w4.s.dlen = encr_offset + elts->crypto_total_length + + ROC_SE_OFF_CTRL_LEN + sess->cpt_ctx.mac_len; + else + cpt_inst_w4.s.dlen = + encr_offset + elts->crypto_total_length + ROC_SE_OFF_CTRL_LEN; + + inst->dptr = (uint64_t) offset_control_word; + inst->rptr = (uint64_t) ((void *) offset_control_word + ROC_SE_OFF_CTRL_LEN); + inst->w4.u64 = cpt_inst_w4.u64; + + /* + * CPT hardware requires the AAD to be followed by the cipher packet. + * Therefore, maintain a copy of the AAD and IV in the inflight request, + * and write the AAD in front of the cipher data before submission. + */ + auth_copy_offset = encr_offset - sess->cpt_ctx.mac_len; + iv_copy_offset = encr_offset - 8; + + clib_memcpy_fast (infl_req->aad, + ((void *) inst->dptr) + auth_copy_offset + 8, 8); + clib_memcpy_fast (infl_req->iv, ((void *) inst->dptr) + iv_copy_offset + 8, + 8); + clib_memcpy_fast (((void *) inst->dptr) + encr_offset + ROC_SE_OFF_CTRL_LEN - + aad_len, + elts->aad, aad_len); + + infl_req->aead_algo = true; +} + +static_always_inline int +oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, + const u8 is_aead, u8 aad_len, const u8 type) +{ + u32 i, enq_tail, enc_auth_len, buffer_index, nb_infl_allowed; + struct cpt_inst_s inst[VNET_CRYPTO_FRAME_SIZE]; + u32 crypto_start_offset, integ_start_offset; + oct_crypto_main_t *ocm = &oct_crypto_main; + vnet_crypto_async_frame_elt_t *elts; + oct_crypto_dev_t *crypto_dev = NULL; + oct_crypto_inflight_req_t *infl_req; + oct_crypto_pending_queue_t *pend_q; + u64 dptr_start_ptr, curr_ptr; + oct_crypto_sess_t *sess; + u32 crypto_total_length; + oct_crypto_key_t *key; + vlib_buffer_t *buffer; + void *sg_data; + u16 adj_len; + + /* GCM packets having 8 bytes of aad and 8 bytes of iv */ + u8 aad_iv = 8 + 8; + + pend_q = &ocm->pend_q[vlib_get_thread_index ()]; + + nb_infl_allowed = pend_q->n_desc - pend_q->n_crypto_inflight; + if (PREDICT_FALSE (nb_infl_allowed < frame->n_elts)) + { + oct_crypto_update_frame_error_status ( + frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + + sg_data = pend_q->sg_data; + + for (i = 0; i < frame->n_elts; i++) + { + enq_tail = pend_q->enq_tail; + infl_req = &pend_q->req_queue[enq_tail]; + infl_req->frame = frame; + infl_req->last_elts = false; + infl_req->index = i; + + elts = &frame->elts[i]; + infl_req->fe = elts; + buffer_index = frame->buffer_indices[i]; + key = vec_elt_at_index (ocm->keys[type], elts->key_index); + + if (PREDICT_FALSE (!key->sess)) + { + oct_crypto_update_frame_error_status ( + frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + + sess = key->sess; + + if (PREDICT_FALSE (!sess->initialised)) + oct_crypto_session_init (vm, sess, elts->key_index, type); + + crypto_dev = sess->crypto_dev; + + clib_memset (inst + i, 0, sizeof (struct cpt_inst_s)); + + buffer = vlib_get_buffer (vm, buffer_index); + + if (is_aead) + { + if (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) + { + dptr_start_ptr = + (u64) (buffer->data + (elts->crypto_start_offset - aad_iv)); + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_total_length = elts->crypto_total_length; + crypto_start_offset = aad_iv; + integ_start_offset = 0; + oct_crypto_scatter_gather_mode ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + ((oct_crypto_scatter_gather_t *) (sg_data)) + enq_tail, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, 0 /* auth_len */, + integ_start_offset /* auth_off */, buffer, adj_len); + } + else + { + oct_crypto_direct_mode_aead (buffer, inst + i, sess, infl_req, + aad_len); + } + } + else + { + if (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) + { + dptr_start_ptr = (u64) (buffer->data + elts->integ_start_offset); + + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_start_offset = + elts->crypto_start_offset - elts->integ_start_offset; + integ_start_offset = 0; + enc_auth_len = + elts->crypto_total_length + elts->integ_length_adj; + crypto_total_length = elts->crypto_total_length; + + oct_crypto_scatter_gather_mode ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + ((oct_crypto_scatter_gather_t *) (sg_data)) + enq_tail, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, + enc_auth_len /* auth_len */, integ_start_offset /* auth_off */, + buffer, adj_len); + } + else + { + oct_crypto_direct_mode_linked (buffer, inst + i, sess, infl_req, + aad_len); + } + } + + inst[i].w7.u64 = sess->cpt_inst_w7; + inst[i].res_addr = (u64) &infl_req->res; + OCT_MOD_INC (pend_q->enq_tail, pend_q->n_desc); + } + + oct_crypto_burst_submit (crypto_dev, inst, frame->n_elts); + + infl_req->last_elts = true; + + pend_q->n_crypto_inflight += frame->n_elts; + pend_q->n_crypto_frame++; + + vlib_increment_simple_counter (pend_q->pending_packets, vm->thread_index, 0, + frame->n_elts); + vlib_increment_simple_counter (pend_q->crypto_frame, vm->thread_index, 0, 1); + + return 0; +} + +int +oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_enc_dec ( + vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_ENCRYPT); +} + +int +oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_enc_dec ( + vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_DECRYPT); +} + +int +oct_crypto_enqueue_aead_aad_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame, u8 aad_len) +{ + return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len, + VNET_CRYPTO_OP_TYPE_ENCRYPT); +} + +static_always_inline int +oct_crypto_enqueue_aead_aad_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame, u8 aad_len) +{ + return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len, + VNET_CRYPTO_OP_TYPE_DECRYPT); +} + +int +oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 8); +} + +int +oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 12); +} + +int +oct_crypto_enqueue_aead_aad_0_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 0); +} + +int +oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 8); +} + +int +oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 12); +} + +int +oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 0); +} + +vnet_crypto_async_frame_t * +oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, + u32 *enqueue_thread_idx) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + u32 deq_head, status = VNET_CRYPTO_OP_STATUS_COMPLETED; + vnet_crypto_async_frame_elt_t *fe = NULL; + oct_crypto_inflight_req_t *infl_req; + oct_crypto_pending_queue_t *pend_q; + vnet_crypto_async_frame_t *frame; + volatile union cpt_res_s *res; + bool last_elts_processed; + vlib_buffer_t *buffer; + + pend_q = &ocm->pend_q[vlib_get_thread_index ()]; + + if (!pend_q->n_crypto_frame) + return NULL; + + last_elts_processed = false; + + for (; last_elts_processed == false;) + { + deq_head = pend_q->deq_head; + infl_req = &pend_q->req_queue[deq_head]; + fe = infl_req->fe; + + res = &infl_req->res; + + if (PREDICT_FALSE (res->cn10k.compcode == CPT_COMP_NOT_DONE)) + return NULL; + + if (PREDICT_FALSE (res->cn10k.uc_compcode)) + { + if (res->cn10k.uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE) + status = fe->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + else + status = fe->status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR; + } + + buffer = + vlib_get_buffer (vm, infl_req->frame->buffer_indices[infl_req->index]); + + /* + * For AEAD, copy the AAD and IV back to their original positions. + * If ESN is enabled (in case of linked algo), overwrite the ESN + * seqhi at the end of the cipher with the digest data. + */ + if (infl_req->aead_algo) + { + clib_memcpy_fast (buffer->data + fe->crypto_start_offset - 8, + infl_req->iv, 8); + clib_memcpy_fast (buffer->data + fe->crypto_start_offset - 16, + infl_req->aad, 8); + } + else if (infl_req->esn_enabled) + clib_memcpy_fast (fe->digest, fe->digest + 4, infl_req->mac_len); + + clib_memset ((void *) &infl_req->res, 0, sizeof (union cpt_res_s)); + last_elts_processed = infl_req->last_elts; + OCT_MOD_INC (pend_q->deq_head, pend_q->n_desc); + } + + frame = infl_req->frame; + + vlib_decrement_simple_counter (pend_q->pending_packets, vm->thread_index, 0, + frame->n_elts); + vlib_increment_simple_counter (pend_q->success_packets, vm->thread_index, 0, + frame->n_elts); + + pend_q->n_crypto_frame--; + pend_q->n_crypto_inflight -= frame->n_elts; + vlib_decrement_simple_counter (pend_q->crypto_frame, vm->thread_index, 0, 1); + + frame->state = status == VNET_CRYPTO_OP_STATUS_COMPLETED ? + VNET_CRYPTO_FRAME_STATE_SUCCESS : + VNET_CRYPTO_FRAME_STATE_ELT_ERROR; + + *nb_elts_processed = frame->n_elts; + *enqueue_thread_idx = frame->enqueue_thread_index; + + return frame; +} + +int +oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev) +{ + u32 engine_index; + + engine_index = vnet_crypto_register_engine (vm, "oct_cryptodev", 100, + "OCT Cryptodev Engine"); + +#define _(n, k, t, a) \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ + oct_crypto_enqueue_aead_aad_##a##_enc); \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \ + oct_crypto_enqueue_aead_aad_##a##_dec); + foreach_oct_crypto_aead_async_alg +#undef _ + +#define _(c, h, k, d) \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ + oct_crypto_enqueue_linked_alg_enc); \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \ + oct_crypto_enqueue_linked_alg_dec); + foreach_oct_crypto_link_async_alg; +#undef _ + + vnet_crypto_register_dequeue_handler (vm, engine_index, + oct_crypto_frame_dequeue); + + vnet_crypto_register_key_handler (vm, engine_index, oct_crypto_key_handler); + + return 0; +} + +int +oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_main_t *ocm = &oct_crypto_main; + u32 n_inflight_req; + int i; + + ocm->pend_q = oct_plt_init_param.oct_plt_zmalloc ( + tm->n_vlib_mains * sizeof (oct_crypto_pending_queue_t), + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q == NULL) + { + log_err (dev, "Failed to allocate memory for crypto pending queue"); + return -1; + } + + /* + * Each pending queue will get number of cpt desc / number of cores. + * And that desc count is shared across inflight entries. + */ + n_inflight_req = (ocd->n_desc / tm->n_vlib_mains); + + for (i = 0; i < tm->n_vlib_mains; ++i) + { + ocm->pend_q[i].n_desc = n_inflight_req; + + ocm->pend_q[i].req_queue = oct_plt_init_param.oct_plt_zmalloc ( + ocm->pend_q[i].n_desc * sizeof (oct_crypto_inflight_req_t), + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q[i].req_queue == NULL) + { + log_err (dev, + "Failed to allocate memory for crypto inflight request"); + goto free; + } + + ocm->pend_q[i].sg_data = oct_plt_init_param.oct_plt_zmalloc ( + OCT_SCATTER_GATHER_BUFFER_SIZE * ocm->pend_q[i].n_desc, + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q[i].sg_data == NULL) + { + log_err (dev, "Failed to allocate crypto scatter gather memory"); + goto free; + } + +#define _(n, s, d) ocm->pend_q[i].s = &ocm->s##_counter; + foreach_crypto_counter; + } + + return 0; + +free: + for (; i >= 0; i--) + { + if (ocm->pend_q[i].req_queue == NULL) + continue; + + oct_plt_init_param.oct_plt_free (ocm->pend_q[i].sg_data); + + oct_plt_init_param.oct_plt_free (ocm->pend_q[i].req_queue); + } + oct_plt_init_param.oct_plt_free (ocm->pend_q); + + return -1; +} diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h new file mode 100644 index 0000000000..b529142ad6 --- /dev/null +++ b/src/plugins/dev_octeon/crypto.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _CRYPTO_H_ +#define _CRYPTO_H_ +#include +#include + +#define OCT_MAX_N_CPT_DEV 2 + +#define OCT_CPT_LF_DEF_NB_DESC 16384 + +#define OCT_CPT_LF_MIN_NB_DESC 1024 +#define OCT_CPT_LF_MAX_NB_DESC 128000 + +#define OCT_MAX_CRYPTO_COUNTERS 3 + +/* counter, name, verbose */ +#define foreach_crypto_counter \ + _ (0, pending_packets, "crypto-pending-packets") \ + _ (1, crypto_frame, "crypto-pending-frames") \ + _ (2, success_packets, "crypto-success-packets") + +/* CRYPTO_ID, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */ +#define foreach_oct_crypto_aead_async_alg \ + _ (AES_128_GCM, 16, 16, 8) \ + _ (AES_128_GCM, 16, 16, 12) \ + _ (AES_192_GCM, 24, 16, 8) \ + _ (AES_192_GCM, 24, 16, 12) \ + _ (AES_256_GCM, 32, 16, 8) \ + _ (AES_256_GCM, 32, 16, 12) \ + _ (CHACHA20_POLY1305, 32, 16, 8) \ + _ (CHACHA20_POLY1305, 32, 16, 12) \ + _ (CHACHA20_POLY1305, 32, 16, 0) + +/* CRYPTO_ID, INTEG_ID, KEY_LENGTH_IN_BYTES, DIGEST_LEN */ +#define foreach_oct_crypto_link_async_alg \ + _ (AES_128_CBC, SHA1, 16, 12) \ + _ (AES_192_CBC, SHA1, 24, 12) \ + _ (AES_256_CBC, SHA1, 32, 12) \ + _ (AES_128_CBC, SHA256, 16, 16) \ + _ (AES_192_CBC, SHA256, 24, 16) \ + _ (AES_256_CBC, SHA256, 32, 16) \ + _ (AES_128_CBC, SHA384, 16, 24) \ + _ (AES_192_CBC, SHA384, 24, 24) \ + _ (AES_256_CBC, SHA384, 32, 24) \ + _ (AES_128_CBC, SHA512, 16, 32) \ + _ (AES_192_CBC, SHA512, 24, 32) \ + _ (AES_256_CBC, SHA512, 32, 32) \ + _ (AES_128_CBC, MD5, 16, 12) \ + _ (AES_192_CBC, MD5, 24, 12) \ + _ (AES_256_CBC, MD5, 32, 12) \ + _ (3DES_CBC, MD5, 24, 12) \ + _ (3DES_CBC, SHA1, 24, 12) \ + _ (3DES_CBC, SHA256, 24, 16) \ + _ (3DES_CBC, SHA384, 24, 24) \ + _ (3DES_CBC, SHA512, 24, 32) \ + _ (AES_128_CTR, SHA1, 16, 12) \ + _ (AES_192_CTR, SHA1, 24, 12) \ + _ (AES_256_CTR, SHA1, 32, 12) \ + _ (AES_128_CTR, SHA256, 16, 16) \ + _ (AES_192_CTR, SHA256, 24, 16) \ + _ (AES_256_CTR, SHA256, 32, 16) \ + _ (AES_128_CTR, SHA384, 16, 24) \ + _ (AES_192_CTR, SHA384, 24, 24) \ + _ (AES_256_CTR, SHA384, 32, 24) \ + _ (AES_128_CTR, SHA512, 16, 32) \ + _ (AES_192_CTR, SHA512, 24, 32) \ + _ (AES_256_CTR, SHA512, 32, 32) + +#define OCT_MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) + +#define OCT_SCATTER_GATHER_BUFFER_SIZE 1024 + +#define CPT_LMT_SIZE_COPY (sizeof (struct cpt_inst_s) / 16) +#define OCT_MAX_LMT_SZ 16 + +#define SRC_IOV_SIZE \ + (sizeof (struct roc_se_iov_ptr) + \ + (sizeof (struct roc_se_buf_ptr) * ROC_MAX_SG_CNT)) + +#define OCT_CPT_LMT_GET_LINE_ADDR(lmt_addr, lmt_num) \ + (void *) ((u64) (lmt_addr) + ((u64) (lmt_num) << ROC_LMT_LINE_SIZE_LOG2)) + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + struct roc_cpt *roc_cpt; + struct roc_cpt_lmtline lmtline; + struct roc_cpt_lf lf; + vnet_dev_t *dev; + u32 n_desc; +} oct_crypto_dev_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** ROC CPT context */ + struct roc_se_ctx cpt_ctx; + /** CPT opcode */ + u16 cpt_op : 4; + /** Flag for AES GCM */ + u16 aes_gcm : 1; + /** IV length in bytes */ + u8 iv_length; + /** Auth IV length in bytes */ + u8 auth_iv_length; + /** IV offset in bytes */ + u16 iv_offset; + /** Auth IV offset in bytes */ + u16 auth_iv_offset; + /** CPT inst word 7 */ + u64 cpt_inst_w7; + /* initialise as part of first packet */ + u8 initialised; + /* store link key index in case of linked algo */ + vnet_crypto_key_index_t key_index; + oct_crypto_dev_t *crypto_dev; +} oct_crypto_sess_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + oct_crypto_sess_t *sess; + oct_crypto_dev_t *crypto_dev; +} oct_crypto_key_t; + +typedef struct oct_crypto_scatter_gather +{ + u8 buf[OCT_SCATTER_GATHER_BUFFER_SIZE]; +} oct_crypto_scatter_gather_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** Result data */ + volatile union cpt_res_s res; + /** Frame pointer */ + vnet_crypto_async_frame_t *frame; + /** Async frame element */ + vnet_crypto_async_frame_elt_t *fe; + /** AAD meta data */ + u8 aad[8]; + /** IV meta data */ + u8 iv[16]; + /** Digest len */ + u8 mac_len; + /** aead */ + bool aead_algo; + /** Set when encrypting linked algo with esn. + * To move digest data */ + bool esn_enabled; + /** Set if this is last element in frame */ + bool last_elts; + /** Index of element in frame */ + int index; +} __plt_cache_aligned oct_crypto_inflight_req_t; + +typedef struct +{ + /** Array of pending request */ + oct_crypto_inflight_req_t *req_queue; + /** Number of inflight operations in queue */ + u32 n_crypto_inflight; + /** Number of frames in queue */ + u32 n_crypto_frame; + /** Tail of queue to be used for enqueue */ + u16 enq_tail; + /** Head of queue to be used for dequeue */ + u16 deq_head; + /** Number of descriptors */ + u16 n_desc; + /** Scatter gather data */ + void **sg_data; + /** Crypto counters for pending pkts, inflight operations + * and successfully dequeued pkts in queue */ +#define _(i, s, d) vlib_simple_counter_main_t *s; + foreach_crypto_counter; +#undef _ +} oct_crypto_pending_queue_t; + +typedef struct +{ + oct_crypto_dev_t *crypto_dev[OCT_MAX_N_CPT_DEV]; + oct_crypto_key_t *keys[VNET_CRYPTO_ASYNC_OP_N_TYPES]; + oct_crypto_pending_queue_t *pend_q; +#define _(i, s, d) vlib_simple_counter_main_t s##_counter; + foreach_crypto_counter; +#undef _ + int n_cpt; + u8 started; +} oct_crypto_main_t; + +static_always_inline bool +oct_hw_ctx_cache_enable (void) +{ + return roc_errata_cpt_hang_on_mixed_ctx_val () || + roc_model_is_cn10ka_b0 () || roc_model_is_cn10kb_a0 (); +} + +extern oct_crypto_main_t oct_crypto_main; + +void oct_crypto_key_del_handler (vlib_main_t *vm, + vnet_crypto_key_index_t key_index); + +void oct_crypto_key_add_handler (vlib_main_t *vm, + vnet_crypto_key_index_t key_index); + +void oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx); + +int oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_0_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +vnet_crypto_async_frame_t *oct_crypto_frame_dequeue (vlib_main_t *vm, + u32 *nb_elts_processed, + u32 *enqueue_thread_idx); +int oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev); +int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev, + oct_crypto_dev_t *ocd); +#endif /* _CRYPTO_H_ */ diff --git a/src/plugins/dev_octeon/dev_octeon_virtio.mk b/src/plugins/dev_octeon/dev_octeon_virtio.mk new file mode 100644 index 0000000000..fff3918d53 --- /dev/null +++ b/src/plugins/dev_octeon/dev_octeon_virtio.mk @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2024 Marvell. + +# Find OCTEON roc files +vpp_plugin_find_library(dev-octeon-virtio DAO_PAL_LIB "libdao_pal.a") +vpp_plugin_find_library(dev-octeon-virtio DAO_VIRT_LIB "libdao_virtio.a") +vpp_plugin_find_library(dev-octeon-virtio DAO_VIRT_NET_LIB "libdao_virtio_net.a") +vpp_plugin_find_library(dev-octeon-virtio DAO_VFIO_LIB "libdao_vfio.a") +vpp_plugin_find_library(dev-octeon-virtio DAO_PEM_LIB "libdao_pem.a") +vpp_plugin_find_library(dev-octeon-virtio DAO_COMM_LIB "libdao_common.a") +vpp_plugin_find_library(dev-octeon-virtio DAO_DPDK_LIB "libdpdk.a") + +vpp_find_path(DAO_NETDEV_INCLUDE_DIR NAMES dao_virtio_netdev.h) + +if (NOT DAO_NETDEV_INCLUDE_DIR) + message("OCTEON VIRTIO DAO files not found - Marvell OCTEON virtio device plugin disabled") + return() +endif() + +set(DAO_CONFG_INCLUDE_DIR "${DAO_NETDEV_INCLUDE_DIR}/..") + +if (NOT DAO_PAL_LIB OR NOT DAO_VIRT_LIB OR NOT DAO_VIRT_NET_LIB OR NOT DAO_VFIO_LIB OR NOT DAO_PEM_LIB OR NOT DAO_COMM_LIB) + message("OCTEON VIRTIO DAO LIBS are not found - Marvell OCTEON virtio device plugin disabled") + return() +endif() + +unset(DAO_LINK_FLAGS) + +get_filename_component(DAO_DPDK_LIB_DIR ${DAO_DPDK_LIB} DIRECTORY) + +link_directories(${DAO_DPDK_LIB_DIR}) +string_append(DAO_LINK_FLAGS "-L${DAO_DPDK_LIB_DIR}") +string_append(DAO_LINK_FLAGS "-lnuma -lz -lelf -lpcap -ljansson -lfdt") +if(OPENSSL_FOUND) + string_append(DAO_LINK_FLAGS "-lssl") + string_append(DAO_LINK_FLAGS "-lcrypto") +endif() + +string_append(DAO_LINK_FLAGS "-Wl,--whole-archive,${DAO_PAL_LIB},${DAO_VIRT_LIB},${DAO_VIRT_NET_LIB},${DAO_VFIO_LIB},${DAO_PEM_LIB},${DAO_COMM_LIB},${DAO_DPDK_LIB},--no-whole-archive") + +include_directories (${DAO_NETDEV_INCLUDE_DIR}/) +include_directories (${DAO_CONFG_INCLUDE_DIR}/) + +add_vpp_plugin(dev_octeon_virtio + SOURCES + virtio.c + virtio_bus.c + virtio_port.c + virtio_ctrl.c + virtio_tx_node.c + virtio_rx_node.c + virtio_format.c + + LINK_FLAGS + "${DAO_LINK_FLAGS}" +) diff --git a/src/plugins/dev_octeon/dpu/dpu.c b/src/plugins/dev_octeon/dpu/dpu.c new file mode 100644 index 0000000000..b04ede4037 --- /dev/null +++ b/src/plugins/dev_octeon/dpu/dpu.c @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +/** + * @file + * @brief Host DPU interface. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +always_inline void +h2d_compute_checksum (vlib_main_t *vm, vlib_buffer_t *b) +{ + ethernet_header_t *e; + ip4_header_t *ip; + tcp_header_t *th; + udp_header_t *uh; + + e = vlib_buffer_get_current (b); + if (PREDICT_TRUE (clib_net_to_host_u16 (e->type) == ETHERNET_TYPE_IP4)) + { + ip = (ip4_header_t *) (((u8 *) e) + sizeof (ethernet_header_t)); + if (ip->protocol == IP_PROTOCOL_TCP) + { + th = (tcp_header_t *) (b->data + b->current_data + + sizeof (ethernet_header_t) + + ip4_header_bytes (ip)); + th->checksum = 0; + th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip); + } + else if (ip->protocol == IP_PROTOCOL_UDP) + { + uh = (udp_header_t *) (b->data + b->current_data + + sizeof (ethernet_header_t) + + ip4_header_bytes (ip)); + uh->checksum = 0; + uh->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip); + } + } +} + +static u8 * +format_h2d_input_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + s = format (s, "h2d-input:\n"); + return s; +} + +VLIB_NODE_FN (h2d_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left, next0, next1, next2, next3; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; + u16 nexts[VLIB_FRAME_SIZE], *next; + vlib_buffer_t **b = bufs; + u32 *from; + + from = vlib_frame_vector_args (frame); + n_left = frame->n_vectors; + next = nexts; + vlib_get_buffers (vm, from, bufs, n_left); + + while (n_left >= 8) + { + vlib_buffer_advance (b[0], OCT_H2D_META_SIZE); + vlib_buffer_advance (b[1], OCT_H2D_META_SIZE); + vlib_buffer_advance (b[2], OCT_H2D_META_SIZE); + vlib_buffer_advance (b[3], OCT_H2D_META_SIZE); + + h2d_compute_checksum (vm, b[0]); + h2d_compute_checksum (vm, b[1]); + h2d_compute_checksum (vm, b[2]); + h2d_compute_checksum (vm, b[3]); + + vnet_feature_next (&next0, b[0]); + vnet_feature_next (&next1, b[1]); + vnet_feature_next (&next2, b[2]); + vnet_feature_next (&next3, b[3]); + + next[0] = (u16) next0; + next[1] = (u16) next1; + next[2] = (u16) next2; + next[3] = (u16) next3; + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[0], sizeof (u32)); + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[1], sizeof (u32)); + if (b[2]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[2], sizeof (u32)); + if (b[3]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[3], sizeof (u32)); + + b += 4; + next += 4; + n_left -= 4; + } + + while (n_left) + { + vlib_buffer_advance (b[0], OCT_H2D_META_SIZE); + h2d_compute_checksum (vm, b[0]); + vnet_feature_next (&next0, b[0]); + next[0] = (u16) next0; + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[0], sizeof (u32)); + b += 1; + next += 1; + n_left -= 1; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + return frame->n_vectors; +} + +VNET_FEATURE_INIT (h2d_input_node, static) = { + .arc_name = "port-rx-eth", + .node_name = "h2d-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VLIB_REGISTER_NODE (h2d_input_node) = { + .vector_size = sizeof (u32), + .format_trace = format_h2d_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 0, + .name = "h2d-input", +}; + +always_inline u8 +d2h_validate_checksum (vlib_main_t *vm, vlib_buffer_t *b) +{ + u8 csum = OCT_D2H_CSUM_VERIFIED; + ethernet_header_t *e; + ip4_header_t *ip; + + e = vlib_buffer_get_current (b); + if (PREDICT_TRUE (clib_net_to_host_u16 (e->type) == ETHERNET_TYPE_IP4)) + { + vlib_buffer_advance (b, sizeof (ethernet_header_t)); + ip = vlib_buffer_get_current (b); + + if (ip->protocol == IP_PROTOCOL_TCP || ip->protocol == IP_PROTOCOL_UDP) + { + ip4_tcp_udp_validate_checksum (vm, b); + if (!(b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT)) + csum = OCT_D2H_CSUM_FAILED; + } + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + } + return csum; +} + +static u8 * +format_d2h_output_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + s = format (s, "d2h-output\n"); + return s; +} + +VLIB_NODE_FN (d2h_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + oct_d2h_meta_t *hdr0, *hdr1, *hdr2, *hdr3; + u32 n_left, next0, next1, next2, next3; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; + u16 nexts[VLIB_FRAME_SIZE], *next; + u8 csum0, csum1, csum2, csum3; + vlib_buffer_t **b = bufs; + u32 *from; + + from = vlib_frame_vector_args (frame); + n_left = frame->n_vectors; + next = nexts; + vlib_get_buffers (vm, from, bufs, n_left); + + while (n_left >= 8) + { + csum0 = d2h_validate_checksum (vm, b[0]); + csum1 = d2h_validate_checksum (vm, b[1]); + csum2 = d2h_validate_checksum (vm, b[2]); + csum3 = d2h_validate_checksum (vm, b[3]); + + vlib_buffer_advance (b[0], -OCT_D2H_META_SIZE); + vlib_buffer_advance (b[1], -OCT_D2H_META_SIZE); + vlib_buffer_advance (b[2], -OCT_D2H_META_SIZE); + vlib_buffer_advance (b[3], -OCT_D2H_META_SIZE); + + clib_prefetch_load ((u8 *) vlib_buffer_get_current (b[4]) - + OCT_D2H_META_SIZE); + clib_prefetch_load ((u8 *) vlib_buffer_get_current (b[5]) - + OCT_D2H_META_SIZE); + clib_prefetch_load ((u8 *) vlib_buffer_get_current (b[6]) - + OCT_D2H_META_SIZE); + clib_prefetch_load ((u8 *) vlib_buffer_get_current (b[7]) - + OCT_D2H_META_SIZE); + + hdr0 = vlib_buffer_get_current (b[0]); + hdr1 = vlib_buffer_get_current (b[1]); + hdr2 = vlib_buffer_get_current (b[2]); + hdr3 = vlib_buffer_get_current (b[3]); + + hdr0->as_u64 = 0; + hdr1->as_u64 = 0; + hdr2->as_u64 = 0; + hdr3->as_u64 = 0; + + hdr0->csum_verified = csum0; + hdr1->csum_verified = csum1; + hdr2->csum_verified = csum2; + hdr3->csum_verified = csum3; + + vnet_feature_next (&next0, b[0]); + vnet_feature_next (&next1, b[1]); + vnet_feature_next (&next2, b[2]); + vnet_feature_next (&next3, b[3]); + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[0], sizeof (u32)); + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[1], sizeof (u32)); + if (b[2]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[2], sizeof (u32)); + if (b[3]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[3], sizeof (u32)); + + next[0] = (u16) next0; + next[1] = (u16) next1; + next[2] = (u16) next2; + next[3] = (u16) next3; + + b += 4; + next += 4; + n_left -= 4; + } + while (n_left) + { + csum0 = d2h_validate_checksum (vm, b[0]); + vlib_buffer_advance (b[0], -OCT_D2H_META_SIZE); + hdr0 = vlib_buffer_get_current (b[0]); + hdr0->as_u64 = 0; + hdr0->csum_verified = csum0; + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + vlib_add_trace (vm, node, b[0], sizeof (u32)); + vnet_feature_next (&next0, b[0]); + next[0] = (u16) next0; + + b += 1; + next += 1; + n_left -= 1; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + return frame->n_vectors; +} + +VNET_FEATURE_INIT (d2h_output_node, static) = { + .arc_name = "interface-output", + .node_name = "d2h-output", + .runs_before = VNET_FEATURES ("interface-output-arc-end"), +}; + +VLIB_REGISTER_NODE (d2h_output_node) = { + .vector_size = sizeof (u32), + .format_trace = format_d2h_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 0, + .name = "d2h-output", +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dev_octeon/dpu/dpu.h b/src/plugins/dev_octeon/dpu/dpu.h new file mode 100644 index 0000000000..15586615bc --- /dev/null +++ b/src/plugins/dev_octeon/dpu/dpu.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _DPU_H_ +#define _DPU_H_ + +/** + * HOST to DPU meta data + */ +typedef struct oct_h2d_meta +{ + u64 as_u64[3]; +} oct_h2d_meta_t; + +#define OCT_H2D_META_SIZE (sizeof (oct_h2d_meta_t)) + +/** + * DPU to HOST meta data + */ +typedef union oct_d2h_meta +{ + u64 as_u64; + struct + { + u64 request_id : 16; + u64 reserved : 2; + u64 csum_verified : 2; + u64 destqport : 22; + u64 sport : 6; + u64 opcode : 16; + }; +} oct_d2h_meta_t; + +#define OCT_D2H_META_SIZE (sizeof (oct_d2h_meta_t)) + +#define OCT_D2H_CSUM_FAILED 0x0 +#define OCT_D2H_L4SUM_VERIFIED 0x1 +#define OCT_D2H_IPSUM_VERIFIED 0x2 +#define OCT_D2H_CSUM_VERIFIED (OCT_D2H_L4SUM_VERIFIED | OCT_D2H_IPSUM_VERIFIED) + +#endif /* _DPU_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dev_octeon/esp_encrypt.c b/src/plugins/dev_octeon/esp_encrypt.c new file mode 100644 index 0000000000..78a75988c7 --- /dev/null +++ b/src/plugins/dev_octeon/esp_encrypt.c @@ -0,0 +1,346 @@ +#include +#include +#include +#include + +extern oct_ipsec_main_t oct_ipsec_main; + +#define foreach_oct_esp_encrypt_error \ + _ (RX_PKTS, "ESP pkts received") \ + _ (RX_POST_PKTS, "ESP-POST pkts received") \ + _ (NOT_L3PKT, "L3 header offset not valid") \ + _ (CHAINING_NOSUPP, "Packet chainining not supported in IPsec") \ + _ (SEQ_CYCLED, "sequence number cycled (packet dropped)") \ + _ (HANDOFF, "handoff") \ + _ (INVALID_SA, "invalid SA") \ + _ (FRAME_ALLOC, "encrypt ipsec frame alloc failed") \ + _ (UNDEFINED, "undefined encrypt error") + +typedef struct +{ + u32 sa_index; + u32 spi; + u32 seq; + u32 sa_seq_hi; + u32 next_index; + u32 owner_thread; + u32 handoff_thread; + u8 udp_encap; + vlib_error_t error; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; + u8 data[256]; + vlib_buffer_t buf; +} oct_esp_encrypt_trace_t; + +#define foreach_oct_esp_encrypt_tun_next \ + _ (DROP4, "ip4-drop") \ + _ (DROP6, "ip6-drop") \ + _ (ADJ_MIDCHAIN_TX, "adj-midchain-tx") + +/* clang-format off */ +typedef enum +{ +#define _(v, s) OCT_ESP_ENCRYPT_TUN_NEXT_##v, + foreach_oct_esp_encrypt_tun_next +#undef _ + OCT_ESP_ENCRYPT_TUN_N_NEXT +} oct_esp_encrypt_tun_next_t; +/* clang-format on */ + +/* clang-format off */ +typedef enum +{ +#define _(sym, str) OCT_ESP_ENCRYPT_ERROR_##sym, + foreach_oct_esp_encrypt_error +#undef _ +#define _(sym, str) OCT_ESP_ENCRYPT_CN10K_ERROR_##sym, + foreach_octeon_cn10k_ipsec_ucc +#undef _ +} oct_esp_encrypt_error_t; +/* clang-format on */ + +/* Packet trace format function */ +static u8 * +format_oct_esp_encrypt_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + oct_esp_encrypt_trace_t *t = va_arg (*args, oct_esp_encrypt_trace_t *); + vlib_error_main_t *em = &vm->error_main; + u32 indent = format_get_indent (s); + vlib_error_t e = t->error; + u32 ci; + + s = format (s, "%U %U\n", format_white_space, indent, format_vnet_buffer, + &t->buf); + + if (e) + { + ci = vlib_error_get_code (&vm->node_main, e); + + ci += node->error_heap_index; + + s = format (s, "%UStatus: %s", format_white_space, indent, + em->counters_heap[ci].name); + + if (t->handoff_thread == t->owner_thread) + s = format (s, ", Handoff thread: %u", t->handoff_thread); + + s = format (s, "\n"); + } + + s = format (s, "%USA owner thread: %u\n", format_white_space, indent, + t->owner_thread); + + if (t->next_index != ~0) + s = format (s, "%Unext node: %U\n", format_white_space, indent, + format_vlib_next_node_name, vm, node->index, t->next_index); + + s = format (s, + "%Uesp: sa-index %d spi %u (0x%08x) seq %u sa-seq-hi %u " + "crypto %U integrity %U%s", + format_white_space, indent, t->sa_index, t->spi, t->spi, t->seq, + t->sa_seq_hi, format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg, + t->udp_encap ? " udp-encap-enabled" : ""); + + if (vm->trace_main.verbose) + { + s = format (s, "\n%U%U", format_white_space, indent + 4, format_hexdump, + &t->data, 128); + } + return s; +} + +static_always_inline void +oct_esp_encrypt_tun_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, vlib_buffer_t *b, + u32 next_index) +{ + oct_esp_encrypt_trace_t *tr; + ipsec_sa_t *sa; + u32 sa_index; + + tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + sa_index = vnet_buffer (b)->ipsec.sad_index; + sa = ipsec_sa_get (sa_index); + tr->next_index = next_index; + tr->sa_index = sa_index; + tr->spi = sa->spi; + tr->seq = sa->seq; + tr->sa_seq_hi = sa->seq_hi; + tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa); + tr->crypto_alg = sa->crypto_alg; + tr->integ_alg = sa->integ_alg; + tr->owner_thread = sa->thread_index; + + clib_memcpy_fast (&tr->buf, b, sizeof b[0] - sizeof b->pre_data); + clib_memcpy_fast (tr->buf.pre_data, b->data, sizeof tr->buf.pre_data); + clib_memcpy_fast (tr->data, vlib_buffer_get_current (b), 256); +} + +static_always_inline u32 +oct_ipsec_sa_index_get (vlib_buffer_t *b, const int is_tun) +{ + u32 sa_index, adj_index; + + if (is_tun) + { + adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX]; + sa_index = ipsec_tun_protect_get_sa_out (adj_index); + vnet_buffer (b)->ipsec.sad_index = sa_index; + } + else + sa_index = vnet_buffer (b)->ipsec.sad_index; + + return sa_index; +} + +static_always_inline uword +oct_esp_encrypt_tun (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, const int is_ip6) +{ + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u32 sa0_index, sa1_index, sa2_index, sa3_index; + u32 current_sa0_index = ~0, current_sa1_index = ~0; + u32 current_sa2_index = ~0, current_sa3_index = ~0; + ipsec_sa_t *sa0 = NULL, *sa1 = NULL, *sa2 = NULL, *sa3 = NULL; + + vlib_get_buffers (vm, from, b, frame->n_vectors); + + while (n_left > 11) + { + vlib_prefetch_buffer_header (b[8], LOAD); + vlib_prefetch_buffer_header (b[9], LOAD); + vlib_prefetch_buffer_header (b[10], LOAD); + vlib_prefetch_buffer_header (b[11], LOAD); + + sa0_index = oct_ipsec_sa_index_get (b[0], 1); + sa1_index = oct_ipsec_sa_index_get (b[1], 1); + sa2_index = oct_ipsec_sa_index_get (b[2], 1); + sa3_index = oct_ipsec_sa_index_get (b[3], 1); + + if (sa0_index != current_sa0_index) + { + sa0 = ipsec_sa_get (sa0_index); + current_sa0_index = sa0_index; + } + if (sa1_index != current_sa1_index) + { + sa1 = ipsec_sa_get (sa1_index); + current_sa1_index = sa1_index; + } + if (sa2_index != current_sa2_index) + { + sa2 = ipsec_sa_get (sa2_index); + current_sa2_index = sa2_index; + } + if (sa3_index != current_sa3_index) + { + sa3 = ipsec_sa_get (sa3_index); + current_sa3_index = sa3_index; + } + + /* + * If this is the first packet to use this SA, assign thread based + * on SA index. Don't need to do core-handoff on OCTEON as send queue + * is used based on thread index. + */ + if (PREDICT_FALSE (sa0->thread_index == 0xFFFF)) + sa0->thread_index = (sa0_index % vlib_num_workers ()) + 1; + if (PREDICT_FALSE (sa1->thread_index == 0xFFFF)) + sa1->thread_index = (sa1_index % vlib_num_workers ()) + 1; + if (PREDICT_FALSE (sa2->thread_index == 0xFFFF)) + sa2->thread_index = (sa2_index % vlib_num_workers ()) + 1; + if (PREDICT_FALSE (sa3->thread_index == 0xFFFF)) + sa3->thread_index = (sa3_index % vlib_num_workers ()) + 1; + + vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; + vnet_buffer (b[1])->ipsec.thread_index = sa1->thread_index; + vnet_buffer (b[2])->ipsec.thread_index = sa2->thread_index; + vnet_buffer (b[3])->ipsec.thread_index = sa3->thread_index; + + vnet_buffer (b[0])->oflags |= VNET_BUFFER_OFFLOAD_F_IPSEC_OFFLOAD; + vnet_buffer (b[1])->oflags |= VNET_BUFFER_OFFLOAD_F_IPSEC_OFFLOAD; + vnet_buffer (b[2])->oflags |= VNET_BUFFER_OFFLOAD_F_IPSEC_OFFLOAD; + vnet_buffer (b[3])->oflags |= VNET_BUFFER_OFFLOAD_F_IPSEC_OFFLOAD; + + b += 4; + n_left -= 4; + } + + current_sa0_index = ~0; + while (n_left > 0) + { + sa0_index = oct_ipsec_sa_index_get (b[0], 1); + + if (sa0_index != current_sa0_index) + { + sa0 = ipsec_sa_get (sa0_index); + current_sa0_index = sa0_index; + } + + /* + * If this is the first packet to use this SA, assign thread based + * on SA index. Don't need to do core-handoff on OCTEON as send queue + * is used based on thread index. + */ + + if (PREDICT_FALSE (0XFFFF == sa0->thread_index)) + sa0->thread_index = (sa0_index % vlib_num_workers ()) + 1; + + vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index; + + vnet_buffer (b[0])->oflags |= VNET_BUFFER_OFFLOAD_F_IPSEC_OFFLOAD; + + b++; + n_left--; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + n_left = frame->n_vectors; + b = bufs; + while (n_left > 0) + { + if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + oct_esp_encrypt_tun_add_trace ( + vm, node, frame, b[0], + OCT_ESP_ENCRYPT_TUN_NEXT_ADJ_MIDCHAIN_TX); + } + + b += 1; + n_left--; + } + } + + vlib_buffer_enqueue_to_single_next (vm, node, from, + OCT_ESP_ENCRYPT_TUN_NEXT_ADJ_MIDCHAIN_TX, + frame->n_vectors); + + return frame->n_vectors; +} + +/** + * @brief OCTEON ESP4 encryption tunnel node. + * @node oct-esp4-encrypt-tun + * + * This is the OCTEON ESP4 encryption tunnel node. + * + * @param vm vlib_main_t corresponding to the current thread + * @param node vlib_node_runtime_t + * @param frame vlib_frame_t + */ +/* clang-format off */ +VLIB_NODE_FN (oct_esp4_encrypt_tun_node) (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return oct_esp_encrypt_tun ( + vm, node, frame, 0); +} + +VLIB_REGISTER_NODE (oct_esp4_encrypt_tun_node) = { + .name = "oct-esp4-encrypt-tun", + .vector_size = sizeof (u32), + .format_trace = format_oct_esp_encrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_next_nodes = OCT_ESP_ENCRYPT_TUN_N_NEXT, + .next_nodes = { +#define _(next, node) [OCT_ESP_ENCRYPT_TUN_NEXT_##next] = node, + foreach_oct_esp_encrypt_tun_next +#undef _ + }, + +}; +/* clang-format on */ + +/** + * @brief OCT ESP6 encryption tunnel node. + * @node oct-esp6-encrypt-tun + * + * This is the ONP ESP6 encryption tunnel node. + * + * @param vm vlib_main_t corresponding to the current thread + * @param node vlib_node_runtime_t + * @param frame vlib_frame_t + */ +/* clang-format off */ +VLIB_NODE_FN (oct_esp6_encrypt_tun_node) (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return oct_esp_encrypt_tun (vm, node, frame, 1); +} + +VLIB_REGISTER_NODE (oct_esp6_encrypt_tun_node) = { + .name = "oct-esp6-encrypt-tun", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + .sibling_of = "oct-esp4-encrypt-tun", +}; +/* clang-format on */ diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c new file mode 100644 index 0000000000..3dde2a4c87 --- /dev/null +++ b/src/plugins/dev_octeon/flow.c @@ -0,0 +1,940 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "flow", +}; + +#define FLOW_IS_ETHERNET_CLASS(f) (f->type == VNET_FLOW_TYPE_ETHERNET) + +#define FLOW_IS_IPV4_CLASS(f) \ + ((f->type == VNET_FLOW_TYPE_IP4) || \ + (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \ + (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \ + (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \ + (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \ + (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \ + (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \ + (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \ + (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH)) + +#define FLOW_IS_IPV6_CLASS(f) \ + ((f->type == VNET_FLOW_TYPE_IP6) || \ + (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \ + (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \ + (f->type == VNET_FLOW_TYPE_IP6_VXLAN)) + +#define FLOW_IS_L3_TYPE(f) \ + ((f->type == VNET_FLOW_TYPE_IP4) || (f->type == VNET_FLOW_TYPE_IP6)) + +#define FLOW_IS_L4_TYPE(f) \ + ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \ + (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \ + (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \ + (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED)) + +#define FLOW_IS_L4_TUNNEL_TYPE(f) \ + ((f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \ + (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \ + (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \ + (f->type == VNET_FLOW_TYPE_IP4_GTPU)) + +#define FLOW_IS_GENERIC_TYPE(f) (f->type == VNET_FLOW_TYPE_GENERIC) + +#define OCT_FLOW_UNSUPPORTED_ACTIONS(f) \ + ((f->actions == VNET_FLOW_ACTION_BUFFER_ADVANCE) || \ + (f->actions == VNET_FLOW_ACTION_REDIRECT_TO_NODE)) + +/* Keep values in sync with vnet/flow.h */ +#define foreach_oct_flow_rss_types \ + _ (1, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_TCP, "ipv4-tcp") \ + _ (2, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_UDP, "ipv4-udp") \ + _ (3, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_SCTP, "ipv4-sctp") \ + _ (5, FLOW_KEY_TYPE_IPV4, "ipv4") \ + _ (9, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_TCP, "ipv6-tcp") \ + _ (10, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_UDP, "ipv6-udp") \ + _ (11, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_SCTP, "ipv6-sctp") \ + _ (13, FLOW_KEY_TYPE_IPV6_EXT, "ipv6-ex") \ + _ (14, FLOW_KEY_TYPE_IPV6, "ipv6") \ + _ (16, FLOW_KEY_TYPE_PORT, "port") \ + _ (17, FLOW_KEY_TYPE_VXLAN, "vxlan") \ + _ (18, FLOW_KEY_TYPE_GENEVE, "geneve") \ + _ (19, FLOW_KEY_TYPE_NVGRE, "nvgre") \ + _ (20, FLOW_KEY_TYPE_GTPU, "gtpu") \ + _ (60, FLOW_KEY_TYPE_L4_DST, "l4-dst-only") \ + _ (61, FLOW_KEY_TYPE_L4_SRC, "l4-src-only") \ + _ (62, FLOW_KEY_TYPE_L3_DST, "l3-dst-only") \ + _ (63, FLOW_KEY_TYPE_L3_SRC, "l3-src-only") + +#define GTPU_PORT 2152 +#define VXLAN_PORT 4789 + +typedef struct +{ + u16 src_port; + u16 dst_port; + u32 verification_tag; + u32 cksum; +} sctp_header_t; + +typedef struct +{ + u8 ver_flags; + u8 type; + u16 length; + u32 teid; +} gtpu_header_t; + +typedef struct +{ + u8 layer; + u16 nxt_proto; + vnet_dev_port_t *port; + struct roc_npc_item_info *items; + struct + { + u8 *spec; + u8 *mask; + u16 off; + } oct_drv; + struct + { + u8 *spec; + u8 *mask; + u16 off; + u16 len; + } generic; +} oct_flow_parse_state; + +static void +oct_flow_convert_rss_types (u64 *key, u64 rss_types) +{ +#define _(a, b, c) \ + if (rss_types & (1UL << a)) \ + *key |= b; + + foreach_oct_flow_rss_types +#undef _ + + return; +} + +vnet_dev_rv_t +oct_flow_validate_params (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_type_t type, u32 flow_index, + uword *priv_data) +{ + vnet_flow_t *flow = vnet_get_flow (flow_index); + u32 last_queue; + u32 qid; + + if ((flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) && + flow->redirect_queue == ~0 && flow->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) + return VNET_DEV_OK; + + if (type == VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER || + type == VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER) + { + log_err (port->dev, "Unsupported request type"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + if (OCT_FLOW_UNSUPPORTED_ACTIONS (flow)) + { + log_err (port->dev, "Unsupported flow action"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) + { + qid = flow->redirect_queue; + if (qid > port->intf.num_rx_queues - 1 || qid < 0) + { + log_err (port->dev, + "Given Q(%d) is invalid, supported range is %d-%d", qid, 0, + port->intf.num_rx_queues - 1); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + } + + if (flow->actions & VNET_FLOW_ACTION_RSS) + { + last_queue = flow->queue_index + flow->queue_num; + if (last_queue > port->intf.num_rx_queues - 1) + { + log_err (port->dev, + "Given Q range(%d-%d) is invalid, supported range is %d-%d", + flow->queue_index, flow->queue_index + flow->queue_num, 0, + port->intf.num_rx_queues - 1); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + } + return VNET_DEV_OK; +} + +static vnet_dev_rv_t +oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions, + struct roc_npc_item_info *item_info, vnet_flow_t *flow, + uword *private_data) +{ + oct_port_t *oct_port = vnet_dev_get_port_data (port); + struct roc_npc_attr attr = { .priority = 1, .ingress = 1 }; + struct roc_npc_flow *npc_flow; + oct_flow_entry_t *flow_entry; + struct roc_npc *npc; + int rv = 0; + + npc = &oct_port->npc; + + for (int i = 0; item_info[i].type != ROC_NPC_ITEM_TYPE_END; i++) + { + log_debug (port->dev, "Flow[%d] Item[%d] type %d spec 0x%U mask 0x%U", + flow->index, i, item_info[i].type, format_hex_bytes, + item_info[i].spec, item_info[i].size, format_hex_bytes, + item_info[i].mask, item_info[i].size); + } + + npc_flow = + roc_npc_flow_create (npc, &attr, item_info, actions, npc->pf_func, &rv); + if (rv) + { + log_err (port->dev, "roc_npc_flow_create failed with '%s' error", + roc_error_msg_get (rv)); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + roc_npc_mcam_clear_counter (npc, npc_flow->ctr_id); + + pool_get_zero (oct_port->flow_entries, flow_entry); + flow_entry->index = flow_entry - oct_port->flow_entries; + flow_entry->vnet_flow_index = flow->index; + flow_entry->npc_flow = npc_flow; + + *private_data = flow_entry->index; + + return VNET_DEV_OK; +} + +static int +oct_parse_l2 (oct_flow_parse_state *pst) +{ + struct roc_npc_flow_item_eth *eth_spec = + (struct roc_npc_flow_item_eth *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_eth *eth_mask = + (struct roc_npc_flow_item_eth *) &pst->oct_drv.mask[pst->oct_drv.off]; + ethernet_header_t *eth_hdr_mask = + (ethernet_header_t *) &pst->generic.mask[pst->generic.off]; + ethernet_header_t *eth_hdr = + (ethernet_header_t *) &pst->generic.spec[pst->generic.off]; + u16 tpid, etype; + + tpid = etype = clib_net_to_host_u16 (eth_hdr->type); + clib_memcpy_fast (eth_spec, eth_hdr, sizeof (ethernet_header_t)); + clib_memcpy_fast (eth_mask, eth_hdr_mask, sizeof (ethernet_header_t)); + eth_spec->has_vlan = 0; + + pst->items[pst->layer].spec = (void *) eth_spec; + pst->items[pst->layer].mask = (void *) eth_mask; + pst->items[pst->layer].size = sizeof (ethernet_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_ETH; + pst->generic.off += sizeof (ethernet_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_eth); + pst->layer++; + + /* Parse VLAN Tags if any */ + struct roc_npc_flow_item_vlan *vlan_spec = + (struct roc_npc_flow_item_vlan *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_vlan *vlan_mask = + (struct roc_npc_flow_item_vlan *) &pst->oct_drv.mask[pst->oct_drv.off]; + ethernet_vlan_header_t *vlan_hdr, *vlan_hdr_mask; + u8 vlan_cnt = 0; + + while (tpid == ETHERNET_TYPE_DOT1AD || tpid == ETHERNET_TYPE_VLAN) + { + if (pst->generic.off >= pst->generic.len) + break; + + vlan_hdr = + (ethernet_vlan_header_t *) &pst->generic.spec[pst->generic.off]; + vlan_hdr_mask = + (ethernet_vlan_header_t *) &pst->generic.mask[pst->generic.off]; + tpid = etype = clib_net_to_host_u16 (vlan_hdr->type); + clib_memcpy (&vlan_spec[vlan_cnt], vlan_hdr, + sizeof (ethernet_vlan_header_t)); + clib_memcpy (&vlan_mask[vlan_cnt], vlan_hdr_mask, + sizeof (ethernet_vlan_header_t)); + pst->items[pst->layer].spec = (void *) &vlan_spec[vlan_cnt]; + pst->items[pst->layer].mask = (void *) &vlan_mask[vlan_cnt]; + pst->items[pst->layer].size = sizeof (ethernet_vlan_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VLAN; + pst->generic.off += sizeof (ethernet_vlan_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_vlan); + pst->layer++; + vlan_cnt++; + } + + /* Inner most vlan tag */ + if (vlan_cnt) + vlan_spec[vlan_cnt - 1].has_more_vlan = 0; + + pst->nxt_proto = etype; + return 0; +} + +static int +oct_parse_l3 (oct_flow_parse_state *pst) +{ + + if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0) + return 0; + + if (pst->nxt_proto == ETHERNET_TYPE_MPLS) + { + int label_stack_bottom = 0; + do + { + + u8 *mpls_spec = &pst->generic.spec[pst->generic.off]; + u8 *mpls_mask = &pst->generic.mask[pst->generic.off]; + + label_stack_bottom = mpls_spec[2] & 1; + pst->items[pst->layer].spec = (void *) mpls_spec; + pst->items[pst->layer].mask = (void *) mpls_mask; + pst->items[pst->layer].size = sizeof (u32); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_MPLS; + pst->generic.off += sizeof (u32); + pst->layer++; + } + while (label_stack_bottom); + + pst->nxt_proto = 0; + return 0; + } + else if (pst->nxt_proto == ETHERNET_TYPE_IP4) + { + ip4_header_t *ip4_spec = + (ip4_header_t *) &pst->generic.spec[pst->generic.off]; + ip4_header_t *ip4_mask = + (ip4_header_t *) &pst->generic.mask[pst->generic.off]; + pst->items[pst->layer].spec = (void *) ip4_spec; + pst->items[pst->layer].mask = (void *) ip4_mask; + pst->items[pst->layer].size = sizeof (ip4_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV4; + pst->generic.off += sizeof (ip4_header_t); + pst->layer++; + pst->nxt_proto = ip4_spec->protocol; + } + else if (pst->nxt_proto == ETHERNET_TYPE_IP6) + { + struct roc_npc_flow_item_ipv6 *ip6_spec = + (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_ipv6 *ip6_mask = + (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.mask[pst->oct_drv.off]; + ip6_header_t *ip6_hdr_mask = + (ip6_header_t *) &pst->generic.mask[pst->generic.off]; + ip6_header_t *ip6_hdr = + (ip6_header_t *) &pst->generic.spec[pst->generic.off]; + u8 nxt_hdr = ip6_hdr->protocol; + + clib_memcpy (ip6_spec, ip6_hdr, sizeof (ip6_header_t)); + clib_memcpy (ip6_mask, ip6_hdr_mask, sizeof (ip6_header_t)); + pst->items[pst->layer].spec = (void *) ip6_spec; + pst->items[pst->layer].mask = (void *) ip6_mask; + pst->items[pst->layer].size = sizeof (ip6_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6; + pst->generic.off += sizeof (ip6_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_ipv6); + pst->layer++; + + while (nxt_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS || + nxt_hdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS || + nxt_hdr == IP_PROTOCOL_IPV6_ROUTE) + { + if (pst->generic.off >= pst->generic.len) + return 0; + + ip6_ext_header_t *ip6_ext_spec = + (ip6_ext_header_t *) &pst->generic.spec[pst->generic.off]; + ip6_ext_header_t *ip6_ext_mask = + (ip6_ext_header_t *) &pst->generic.mask[pst->generic.off]; + nxt_hdr = ip6_ext_spec->next_hdr; + + pst->items[pst->layer].spec = (void *) ip6_ext_spec; + pst->items[pst->layer].mask = (void *) ip6_ext_mask; + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_EXT; + pst->generic.off += ip6_ext_header_len (ip6_ext_spec); + pst->layer++; + } + + if (pst->generic.off >= pst->generic.len) + return 0; + + if (nxt_hdr == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *ip6_ext_frag_spec = + (ip6_frag_hdr_t *) &pst->generic.spec[pst->generic.off]; + ip6_frag_hdr_t *ip6_ext_frag_mask = + (ip6_frag_hdr_t *) &pst->generic.mask[pst->generic.off]; + + pst->items[pst->layer].spec = (void *) ip6_ext_frag_spec; + pst->items[pst->layer].mask = (void *) ip6_ext_frag_mask; + pst->items[pst->layer].size = sizeof (ip6_frag_hdr_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_FRAG_EXT; + pst->generic.off += sizeof (ip6_frag_hdr_t); + pst->layer++; + } + + pst->nxt_proto = nxt_hdr; + } + /* Unsupported L3. */ + else + return -1; + + return 0; +} + +static int +oct_parse_l4 (oct_flow_parse_state *pst) +{ + + if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0) + return 0; + +#define _(protocol_t, protocol_value, ltype) \ + if (pst->nxt_proto == protocol_value) \ + \ + { \ + \ + protocol_t *spec = (protocol_t *) &pst->generic.spec[pst->generic.off]; \ + protocol_t *mask = (protocol_t *) &pst->generic.mask[pst->generic.off]; \ + pst->items[pst->layer].spec = spec; \ + pst->items[pst->layer].mask = mask; \ + \ + pst->items[pst->layer].size = sizeof (protocol_t); \ + \ + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_##ltype; \ + pst->generic.off += sizeof (protocol_t); \ + pst->layer++; \ + return 0; \ + } + + _ (esp_header_t, IP_PROTOCOL_IPSEC_ESP, ESP) + _ (udp_header_t, IP_PROTOCOL_UDP, UDP) + _ (tcp_header_t, IP_PROTOCOL_TCP, TCP) + _ (sctp_header_t, IP_PROTOCOL_SCTP, SCTP) + _ (icmp46_header_t, IP_PROTOCOL_ICMP, ICMP) + _ (icmp46_header_t, IP_PROTOCOL_ICMP6, ICMP) + _ (igmp_header_t, IP_PROTOCOL_IGMP, IGMP) + _ (gre_header_t, IP_PROTOCOL_GRE, GRE) + + /* Unsupported L4. */ + return -1; +} + +static int +oct_parse_tunnel (oct_flow_parse_state *pst) +{ + if (pst->generic.off >= pst->generic.len) + return 0; + + if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_GRE) + { + gre_header_t *gre_hdr = (gre_header_t *) pst->items[pst->layer - 1].spec; + pst->nxt_proto = clib_net_to_host_u16 (gre_hdr->protocol); + goto parse_l3; + } + + else if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_UDP) + { + udp_header_t *udp_h = (udp_header_t *) pst->items[pst->layer - 1].spec; + u16 dport = clib_net_to_host_u16 (udp_h->dst_port); + + if (dport == GTPU_PORT) + { + gtpu_header_t *gtpu_spec = + (gtpu_header_t *) &pst->generic.spec[pst->generic.off]; + gtpu_header_t *gtpu_mask = + (gtpu_header_t *) &pst->generic.mask[pst->generic.off]; + pst->items[pst->layer].spec = (void *) gtpu_spec; + pst->items[pst->layer].mask = (void *) gtpu_mask; + pst->items[pst->layer].size = sizeof (gtpu_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_GTPU; + pst->generic.off += sizeof (gtpu_header_t); + pst->layer++; + pst->nxt_proto = 0; + return 0; + } + else if (dport == VXLAN_PORT) + { + vxlan_header_t *vxlan_spec = + (vxlan_header_t *) &pst->generic.spec[pst->generic.off]; + vxlan_header_t *vxlan_mask = + (vxlan_header_t *) &pst->generic.spec[pst->generic.off]; + pst->items[pst->layer].spec = (void *) vxlan_spec; + pst->items[pst->layer].mask = (void *) vxlan_mask; + pst->items[pst->layer].size = sizeof (vxlan_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VXLAN; + pst->generic.off += sizeof (vxlan_header_t); + pst->layer++; + pst->nxt_proto = 0; + goto parse_l2; + } + } + /* No supported Tunnel detected. */ + else + { + log_err (pst->port->dev, + "Partially parsed till offset %u, not able to parse further", + pst->generic.off); + return 0; + } +parse_l2: + if (oct_parse_l2 (pst)) + return -1; +parse_l3: + if (oct_parse_l3 (pst)) + return -1; + + return oct_parse_l4 (pst); +} + +static vnet_dev_rv_t +oct_flow_generic_pattern_parse (oct_flow_parse_state *pst) +{ + + if (oct_parse_l2 (pst)) + goto err; + + if (oct_parse_l3 (pst)) + goto err; + + if (oct_parse_l4 (pst)) + goto err; + + if (oct_parse_tunnel (pst)) + goto err; + + if (pst->generic.off < pst->generic.len) + { + log_err (pst->port->dev, + "Partially parsed till offset %u, not able to parse further", + pst->generic.off); + goto err; + } + + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_END; + return VNET_DEV_OK; + +err: + return VNET_DEV_ERR_NOT_SUPPORTED; +} + +static vnet_dev_rv_t +oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, + uword *private_data) +{ + oct_main_t *om = &oct_main; + struct roc_npc_item_info item_info[ROC_NPC_ITEM_TYPE_END] = {}; + struct roc_npc_action actions[ROC_NPC_ITEM_TYPE_END] = {}; + oct_port_t *oct_port = vnet_dev_get_port_data (port); + ethernet_header_t eth_spec = {}, eth_mask = {}; + sctp_header_t sctp_spec = {}, sctp_mask = {}; + gtpu_header_t gtpu_spec = {}, gtpu_mask = {}; + ip4_header_t ip4_spec = {}, ip4_mask = {}; + ip6_header_t ip6_spec = {}, ip6_mask = {}; + udp_header_t udp_spec = {}, udp_mask = {}; + tcp_header_t tcp_spec = {}, tcp_mask = {}; + esp_header_t esp_spec = {}, esp_mask = {}; + vnet_flow_ip4_ipsec_esp_t *esp_hdr; + u16 l4_src_port = 0, l4_dst_port = 0; + u16 l4_src_mask = 0, l4_dst_mask = 0; + struct roc_npc_action_rss rss_conf = {}; + struct roc_npc_action_queue conf = {}; + struct roc_npc_action_mark mark = {}; + struct roc_npc *npc = &oct_port->npc; + u8 *flow_spec = 0, *flow_mask = 0; + u8 *drv_spec = 0, *drv_mask = 0; + vnet_dev_rv_t rv = VNET_DEV_OK; + int layer = 0, index = 0; + u16 *queues = NULL; + u64 flow_key = 0; + u8 proto = 0; + u16 action = 0; + + if ((flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) && + flow->redirect_queue == ~0 && flow->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) + { + if (!om->inl_dev_initialized) + { + log_err (port->dev, "Inline device is not initialized"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + esp_hdr = &flow->ip4_ipsec_esp; + esp_spec.spi = clib_host_to_net_u32 (esp_hdr->spi); + esp_mask.spi = 0; /* Any */ + item_info[0].spec = (void *) &esp_spec; + item_info[0].mask = (void *) &esp_mask; + item_info[0].size = sizeof (u32); + item_info[0].type = ROC_NPC_ITEM_TYPE_ESP; + item_info[1].type = ROC_NPC_ITEM_TYPE_END; + + actions[0].conf = (void *) NULL; + actions[0].type = ROC_NPC_ACTION_TYPE_SEC; + actions[1].type = ROC_NPC_ACTION_TYPE_COUNT; + actions[2].type = ROC_NPC_ACTION_TYPE_END; + + return oct_flow_rule_create (port, actions, item_info, flow, + private_data); + } + + if (FLOW_IS_GENERIC_TYPE (flow)) + { + unformat_input_t input; + int rc; + + unformat_init_string ( + &input, (const char *) flow->generic.pattern.spec, + strlen ((const char *) flow->generic.pattern.spec)); + unformat_user (&input, unformat_hex_string, &flow_spec); + unformat_free (&input); + + unformat_init_string ( + &input, (const char *) flow->generic.pattern.mask, + strlen ((const char *) flow->generic.pattern.mask)); + unformat_user (&input, unformat_hex_string, &flow_mask); + unformat_free (&input); + + vec_validate (drv_spec, 1024); + vec_validate (drv_mask, 1024); + oct_flow_parse_state pst = { + .nxt_proto = 0, + .port = port, + .items = item_info, + .oct_drv = { .spec = drv_spec, .mask = drv_mask }, + .generic = { .spec = flow_spec, + .mask = flow_mask, + .len = vec_len (flow_spec) }, + }; + + rc = oct_flow_generic_pattern_parse (&pst); + if (rc) + { + vec_free (flow_spec); + vec_free (flow_mask); + vec_free (drv_spec); + vec_free (drv_mask); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + goto parse_flow_actions; + } + + if (FLOW_IS_ETHERNET_CLASS (flow)) + { + eth_spec.type = clib_host_to_net_u16 (flow->ethernet.eth_hdr.type); + eth_mask.type = 0xFFFF; + + item_info[layer].spec = (void *) ð_spec; + item_info[layer].mask = (void *) ð_mask; + item_info[layer].size = sizeof (ethernet_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_ETH; + layer++; + } + + else if (FLOW_IS_IPV4_CLASS (flow)) + { + vnet_flow_ip4_t *ip4_hdr = &flow->ip4; + proto = ip4_hdr->protocol.prot; + + ip4_spec.src_address = ip4_hdr->src_addr.addr; + ip4_spec.dst_address = ip4_hdr->dst_addr.addr; + ip4_mask.src_address = ip4_hdr->src_addr.mask; + ip4_mask.dst_address = ip4_hdr->dst_addr.mask; + + item_info[layer].spec = (void *) &ip4_spec; + item_info[layer].mask = (void *) &ip4_mask; + item_info[layer].size = sizeof (ip4_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_IPV4; + layer++; + + if (FLOW_IS_L4_TYPE (flow)) + { + vnet_flow_ip4_n_tuple_t *ip4_tuple_hdr = &flow->ip4_n_tuple; + + l4_src_port = clib_host_to_net_u16 (ip4_tuple_hdr->src_port.port); + l4_dst_port = clib_host_to_net_u16 (ip4_tuple_hdr->dst_port.port); + l4_src_mask = clib_host_to_net_u16 (ip4_tuple_hdr->src_port.mask); + l4_dst_mask = clib_host_to_net_u16 (ip4_tuple_hdr->dst_port.mask); + } + } + else if (FLOW_IS_IPV6_CLASS (flow)) + { + vnet_flow_ip6_t *ip6_hdr = &flow->ip6; + proto = ip6_hdr->protocol.prot; + + ip6_spec.src_address = ip6_hdr->src_addr.addr; + ip6_spec.dst_address = ip6_hdr->dst_addr.addr; + ip6_mask.src_address = ip6_hdr->src_addr.mask; + ip6_mask.dst_address = ip6_hdr->dst_addr.mask; + + item_info[layer].spec = (void *) &ip6_spec; + item_info[layer].mask = (void *) &ip6_mask; + item_info[layer].size = sizeof (ip6_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_IPV6; + layer++; + + if (FLOW_IS_L4_TYPE (flow)) + { + vnet_flow_ip6_n_tuple_t *ip6_tuple_hdr = &flow->ip6_n_tuple; + + l4_src_port = clib_host_to_net_u16 (ip6_tuple_hdr->src_port.port); + l4_dst_port = clib_host_to_net_u16 (ip6_tuple_hdr->dst_port.port); + l4_src_mask = clib_host_to_net_u16 (ip6_tuple_hdr->src_port.mask); + l4_dst_mask = clib_host_to_net_u16 (ip6_tuple_hdr->dst_port.mask); + } + } + + if (!proto) + goto end_item_info; + + switch (proto) + { + case IP_PROTOCOL_UDP: + udp_spec.src_port = l4_src_port; + udp_spec.dst_port = l4_dst_port; + udp_mask.src_port = l4_src_mask; + udp_mask.dst_port = l4_dst_mask; + + item_info[layer].spec = (void *) &udp_spec; + item_info[layer].mask = (void *) &udp_mask; + item_info[layer].size = sizeof (udp_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP; + layer++; + + if (FLOW_IS_L4_TUNNEL_TYPE (flow)) + { + switch (flow->type) + { + case VNET_FLOW_TYPE_IP4_GTPU: + gtpu_spec.teid = clib_host_to_net_u32 (flow->ip4_gtpu.teid); + gtpu_mask.teid = 0XFFFFFFFF; + + item_info[layer].spec = (void *) >pu_spec; + item_info[layer].mask = (void *) >pu_mask; + item_info[layer].size = sizeof (gtpu_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU; + layer++; + break; + + default: + log_err (port->dev, "Unsupported L4 tunnel type"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + } /* FLOW_IS_L4_TUNNEL_TYPE */ + break; + + case IP_PROTOCOL_TCP: + tcp_spec.src_port = l4_src_port; + tcp_spec.dst_port = l4_dst_port; + tcp_mask.src_port = l4_src_mask; + tcp_mask.dst_port = l4_dst_mask; + + item_info[layer].spec = (void *) &tcp_spec; + item_info[layer].mask = (void *) &tcp_mask; + item_info[layer].size = sizeof (tcp_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP; + layer++; + break; + + case IP_PROTOCOL_SCTP: + sctp_spec.src_port = l4_src_port; + sctp_spec.dst_port = l4_dst_port; + sctp_mask.src_port = l4_src_mask; + sctp_mask.dst_port = l4_dst_mask; + + item_info[layer].spec = (void *) &sctp_spec; + item_info[layer].mask = (void *) &sctp_mask; + item_info[layer].size = sizeof (sctp_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP; + layer++; + break; + + case IP_PROTOCOL_IPSEC_ESP: + esp_spec.spi = clib_host_to_net_u32 (flow->ip4_ipsec_esp.spi); + esp_mask.spi = 0xFFFFFFFF; + + item_info[layer].spec = (void *) &esp_spec; + item_info[layer].mask = (void *) &esp_mask; + item_info[layer].size = sizeof (u32); + item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP; + layer++; + break; + + default: + log_err (port->dev, "Unsupported IP protocol '%U'", format_ip_protocol, + proto); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + +end_item_info: + item_info[layer].type = ROC_NPC_ITEM_TYPE_END; + +parse_flow_actions: + if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) + { + conf.index = flow->redirect_queue; + actions[action].type = ROC_NPC_ACTION_TYPE_QUEUE; + actions[action].conf = &conf; + action++; + } + + else if (flow->actions & VNET_FLOW_ACTION_DROP) + { + actions[action].type = ROC_NPC_ACTION_TYPE_DROP; + action++; + } + + else if (flow->actions & VNET_FLOW_ACTION_RSS) + { + if (!flow->queue_num) + { + log_err (port->dev, "RSS action has no queues"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + queues = clib_mem_alloc (sizeof (u16) * port->intf.num_rx_queues); + + for (index = 0; index < flow->queue_num; index++) + queues[index] = flow->queue_index++; + + oct_flow_convert_rss_types (&flow_key, flow->rss_types); + if (!flow_key) + { + log_err (port->dev, "Invalid RSS hash function"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + npc->flowkey_cfg_state = flow_key; + rss_conf.queue_num = flow->queue_num; + rss_conf.queue = queues; + + actions[action].type = ROC_NPC_ACTION_TYPE_RSS; + actions[action].conf = &rss_conf; + action++; + } + + if (flow->actions & VNET_FLOW_ACTION_MARK) + { + if (flow->mark_flow_id == 0 || + flow->mark_flow_id > (NPC_FLOW_FLAG_VAL - 2)) + { + log_err (port->dev, "mark flow id must be > 0 and < 0xfffe"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + /* RoC library adds 1 to id, so subtract 1 */ + mark.id = flow->mark_flow_id - 1; + actions[action].type = ROC_NPC_ACTION_TYPE_MARK; + actions[action].conf = &mark; + action++; + } + + /* make count as default action */ + actions[action].type = ROC_NPC_ACTION_TYPE_COUNT; + actions[action + 1].type = ROC_NPC_ACTION_TYPE_END; + + rv = oct_flow_rule_create (port, actions, item_info, flow, private_data); + + if (queues) + clib_mem_free (queues); + + vec_free (flow_spec); + vec_free (flow_mask); + vec_free (drv_spec); + vec_free (drv_mask); + + return rv; +} + +static vnet_dev_rv_t +oct_flow_del (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, + uword *private_data) +{ + oct_port_t *oct_port = vnet_dev_get_port_data (port); + struct roc_npc *npc = &oct_port->npc; + struct roc_npc_flow *npc_flow; + oct_flow_entry_t *flow_entry; + int rv = 0, index; + + index = *private_data; + flow_entry = pool_elt_at_index (oct_port->flow_entries, index); + npc_flow = flow_entry->npc_flow; + rv = roc_npc_flow_destroy (npc, npc_flow); + if (rv) + { + log_err (port->dev, "roc_npc_flow_destroy failed with '%s' error", + roc_error_msg_get (rv)); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + pool_put (oct_port->flow_entries, flow_entry); + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_flow_query (vlib_main_t *vm, vnet_dev_port_t *port, u32 flow_index, + uword private_data, u64 *hits) +{ + oct_port_t *oct_port = vnet_dev_get_port_data (port); + struct roc_npc *npc = &oct_port->npc; + struct roc_npc_flow *npc_flow; + oct_flow_entry_t *flow_entry; + i32 flow_count; + int rv = 0; + + flow_count = pool_elts (oct_port->flow_entries); + if (!flow_count) + { + log_err (port->dev, "Flow entry pool is empty"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + flow_entry = pool_elt_at_index (oct_port->flow_entries, private_data); + npc_flow = flow_entry->npc_flow; + if (npc_flow->ctr_id == NPC_COUNTER_NONE) + { + log_err (port->dev, "Counters are not available for given flow id (%u)", + flow_index); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + rv = roc_npc_mcam_read_counter (npc, npc_flow->ctr_id, hits); + if (rv != 0) + { + log_err (port->dev, "Error reading flow counter for given flow id (%u)", + flow_index); + return VNET_DEV_ERR_INTERNAL; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_flow_ops_fn (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_type_t type, u32 flow_index, + uword *priv_data) +{ + vnet_flow_t *flow = vnet_get_flow (flow_index); + + if (type == VNET_DEV_PORT_CFG_ADD_RX_FLOW) + return oct_flow_add (vm, port, flow, priv_data); + + if (type == VNET_DEV_PORT_CFG_DEL_RX_FLOW) + return oct_flow_del (vm, port, flow, priv_data); + + return VNET_DEV_ERR_NOT_SUPPORTED; +} diff --git a/src/plugins/dev_octeon/format.c b/src/plugins/dev_octeon/format.c index 5ee956ad4f..7cc43c1eec 100644 --- a/src/plugins/dev_octeon/format.c +++ b/src/plugins/dev_octeon/format.c @@ -25,7 +25,7 @@ format_oct_nix_rx_cqe_desc (u8 *s, va_list *args) typeof (d->sg0) *sg0 = &d->sg0; typeof (d->sg0) *sg1 = &d->sg1; - s = format (s, "hdr: cqe_type %u nude %u q %u tag 0x%x", h->cqe_type, + s = format (s, "hdr: cqe_type %u node %u qid %u tag 0x%x", h->cqe_type, h->node, h->q, h->tag); s = format (s, "\n%Uparse:", format_white_space, indent); #define _(n, f) s = format (s, " " #n " " f, p->n) @@ -41,6 +41,8 @@ format_oct_nix_rx_cqe_desc (u8 *s, va_list *args) _ (flow_key_alg, "%u"); _ (eoh_ptr, "%u"); _ (match_id, "0x%x"); + if (p->match_id) + s = format (s, " PKT_RX_FDIR 0x%x", p->match_id); s = format (s, "\n%U ", format_white_space, indent); _ (wqe_aura, "0x%x"); _ (pb_aura, "0x%x"); @@ -162,3 +164,22 @@ format_oct_tx_trace (u8 *s, va_list *args) return s; } + +u8 * +format_oct_port_flow (u8 *s, va_list *args) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *); + u32 flow_index = va_arg (*args, u32); + uword private_data = va_arg (*args, uword); + u64 hits; + + if (flow_index == ~0) + return s; + + if (oct_flow_query (vm, port, flow_index, private_data, &hits) == + VNET_DEV_OK) + s = format (s, "flow (%u) hit count: %lu", flow_index, hits); + + return s; +} diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index bee449f212..2c71888c0d 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -10,11 +10,16 @@ #include #include #include +#include +#include #include #include struct roc_model oct_model; +oct_main_t oct_main; +extern oct_crypto_main_t oct_crypto_main; +extern oct_inl_dev_main_t oct_inl_dev_main; VLIB_REGISTER_LOG_CLASS (oct_log, static) = { .class_name = "octeon", @@ -24,11 +29,26 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = { #define _(f, n, s, d) \ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s }, +vlib_error_desc_t oct_rx_node_counters[] = { + /* clang-format off */ + foreach_octeon_ipsec_ucc + foreach_oct_rx_node_counter + /* clang-format on */ +}; + vlib_error_desc_t oct_tx_node_counters[] = { foreach_oct_tx_node_counter }; #undef _ vnet_dev_node_t oct_rx_node = { .format_trace = format_oct_rx_trace, + .error_counters = oct_rx_node_counters, + .n_error_counters = ARRAY_LEN (oct_rx_node_counters), +}; + +vnet_dev_node_t oct_o20_rx_node = { + .format_trace = format_oct_rx_trace, + .error_counters = oct_rx_node_counters, + .n_error_counters = ARRAY_LEN (oct_rx_node_counters), }; vnet_dev_node_t oct_tx_node = { @@ -37,6 +57,12 @@ vnet_dev_node_t oct_tx_node = { .n_error_counters = ARRAY_LEN (oct_tx_node_counters), }; +vnet_dev_node_t oct_tx_ipsec_tm_node = { + .format_trace = format_oct_tx_trace, + .error_counters = oct_tx_node_counters, + .n_error_counters = ARRAY_LEN (oct_tx_node_counters), +}; + static struct { u16 device_id; @@ -51,10 +77,149 @@ static struct } _ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"), - _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"), + _ (0xa064, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"), + _ (0xa0f8, LBK_VF, "Marvell Octeon Loopback Unit VF"), + _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"), + _ (0xa0f3, O10K_CPT_VF, + "Marvell Octeon-10 Cryptographic Accelerator Unit VF"), + _ (0xa0fe, O9K_CPT_VF, "Marvell Octeon-9 Cryptographic Accelerator Unit VF"), + _ (0xa0f0, RVU_INL_PF, + "Marvell Octeon Resource Virtualization Unit Inline Device PF"), + _ (0xa0f1, RVU_INL_VF, + "Marvell Octeon Resource Virtualization Unit Inline Device VF"), #undef _ }; +static vnet_dev_arg_t oct_drv_args[] = { + { + .id = OCT_DRV_ARG_NPA_MAX_POOLS, + .name = "npa_max_pools", + .desc = "Max NPA pools", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = 128, + }, + { + .id = OCT_DRV_ARG_USE_SINGLE_RX_AURA, + .name = "use_single_rx_aura", + .desc = "Use single rx aura", + .type = VNET_DEV_ARG_TYPE_BOOL, + .default_val.boolean = true, + }, + { + .id = OCT_DRV_ARG_IPSEC_IN_MIN_SPI, + .name = "ipsec_in_min_spi", + .desc = "Inline IPsec inbound minimum spi value", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = 0, + }, + { + .id = OCT_DRV_ARG_IPSEC_IN_MAX_SPI, + .name = "ipsec_in_max_spi", + .desc = "Inline IPsec inbound maximum spi value", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = 8192, + }, + { + .id = OCT_DRV_ARG_IPSEC_OUT_MAX_SA, + .name = "ipsec_out_max_sa", + .desc = "Inline IPsec outbound maximum sa", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = 8192, + }, + { + .id = OCT_DRV_ARG_END, + .name = "end", + .desc = "Argument end", + .type = VNET_DEV_ARG_END, + }, +}; + +static vnet_dev_arg_t oct_port_args[] = { + { + .id = OCT_PORT_ARG_EN_ETH_PAUSE_FRAME, + .name = "eth_pause_frame", + .desc = "Enable ethernet pause frame (flow control) support, " + "applicable to network devices only", + .type = VNET_DEV_ARG_TYPE_BOOL, + .default_val.boolean = false, + }, + { + .id = OCT_PORT_ARG_RSS_FLOW_KEY, + .name = "rss_flow_key", + .desc = "RSS Flow Key Bitmap, applicable to network devices only", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_IPV6 | + FLOW_KEY_TYPE_TCP | FLOW_KEY_TYPE_UDP | + FLOW_KEY_TYPE_SCTP, + }, + { + .id = OCT_PORT_ARG_ALLMULTI_MODE, + .name = "allmulti", + .desc = "Set allmulti mode, applicable to network devices only", + .type = VNET_DEV_ARG_TYPE_BOOL, + .default_val.boolean = false, + }, + { + .id = OCT_PORT_ARG_SWITCH_HDR_TYPE, + .name = "switch_header", + .desc = "Enable switch header and set specific switch header type, " + "applicable to network devices only", + .type = VNET_DEV_ARG_TYPE_STRING, + }, + { + .id = OCT_PORT_ARG_END, + .name = "end", + .desc = "Argument end", + .type = VNET_DEV_ARG_END, + }, +}; + +static vnet_dev_arg_t oct_dev_args[] = { + { + .id = OCT_DEV_ARG_CRYPTO_N_DESC, + .name = "n_desc", + .desc = "number of cpt descriptors, applicable to cpt devices only", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = OCT_CPT_LF_DEF_NB_DESC, + }, + { + .id = OCT_DEV_ARG_END, + .name = "end", + .desc = "Argument end", + .type = VNET_DEV_ARG_END, + }, +}; + +clib_error_t * +oct_inl_inb_ipsec_flow_enable (void) +{ + oct_inl_dev_main_t *inl_main = &oct_inl_dev_main; + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_flow_t flow = { 0 }; + u32 flow_index = ~0; + + if (inl_main->is_inl_ipsec_flow_enabled) + return NULL; + + pool_foreach_pointer (port, dm->ports_by_dev_instance) + { + clib_memset (&flow, 0, sizeof (vnet_flow_t)); + + flow.index = ~0; + flow.actions = VNET_FLOW_ACTION_REDIRECT_TO_QUEUE; + flow.type = VNET_FLOW_TYPE_IP4_IPSEC_ESP; + flow.ip4_ipsec_esp.spi = 0; + flow.redirect_queue = ~0; + + vnet_flow_add (vnm, &flow, &flow_index); + vnet_flow_enable (vnm, flow_index, port->intf.hw_if_index); + } + + inl_main->is_inl_ipsec_flow_enabled = 1; + return NULL; +} + static u8 * oct_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info) { @@ -86,6 +251,56 @@ cnx_return_roc_err (vnet_dev_t *dev, int rrv, char *fmt, ...) return VNET_DEV_ERR_UNSUPPORTED_DEVICE; } +static vnet_dev_rv_t +oct_config_args (vlib_main_t *vm, vnet_dev_driver_t *drv) +{ + if (!oct_main.is_config_done) + { + foreach_vnet_dev_port_args (arg, drv) + { + if (!arg->val_set) + continue; + + if (arg->id == OCT_DRV_ARG_NPA_MAX_POOLS) + { + oct_main.npa_max_pools = vnet_dev_arg_get_uint32 (arg); + + if (oct_main.npa_max_pools < 128 || + (oct_main.npa_max_pools > BIT_ULL (20))) + { + log_err ( + NULL, + "Invalid max-pools value (%u), should be in range of " + "(128 - %u)\n", + oct_main.npa_max_pools, BIT_ULL (20)); + return VNET_DEV_ERR_UNSUPPORTED_CONFIG; + } + } + + if (arg->id == OCT_DRV_ARG_USE_SINGLE_RX_AURA) + oct_main.use_single_rx_aura = vnet_dev_arg_get_bool (arg); + + if (arg->id == OCT_DRV_ARG_IPSEC_IN_MIN_SPI) + oct_inl_dev_main.in_min_spi = vnet_dev_arg_get_uint32 (arg); + + if (arg->id == OCT_DRV_ARG_IPSEC_IN_MAX_SPI) + oct_inl_dev_main.in_max_spi = vnet_dev_arg_get_uint32 (arg); + + if (arg->id == OCT_DRV_ARG_IPSEC_OUT_MAX_SA) + oct_inl_dev_main.out_max_sa = vnet_dev_arg_get_uint32 (arg); + } + oct_main.is_config_done = 1; + } + else + { + log_err (NULL, "Driver config arguments are already initialized or " + "devices are already initialized"); + return VNET_DEV_ERR_UNSUPPORTED_CONFIG; + } + + return 0; +} + static vnet_dev_rv_t oct_alloc (vlib_main_t *vm, vnet_dev_t *dev) { @@ -98,21 +313,35 @@ oct_alloc (vlib_main_t *vm, vnet_dev_t *dev) static vnet_dev_rv_t oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) { - oct_device_t *cd = vnet_dev_get_data (dev); + oct_main_t *om = &oct_main; + oct_ipsec_main_t *oim = &oct_ipsec_main; + oct_inl_dev_main_t *oidm = &oct_inl_dev_main; + u8 bp_index = vlib_buffer_pool_get_default_for_numa (vm, 0); + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bp_index); + struct npa_aura_s aura = {}; + struct npa_pool_s npapool = { .nat_align = 1, + .buf_offset = OCT_EXT_HDR_SIZE / ROC_ALIGN }; + oct_device_t *cd = vnet_dev_get_data (dev), **oct_dev = 0; u8 mac_addr[6]; int rrv; oct_port_t oct_port = {}; + vnet_dev_rv_t rv; *cd->nix = (struct roc_nix){ .reta_sz = ROC_NIX_RSS_RETA_SZ_256, .max_sqb_count = 512, .pci_dev = &cd->plt_pci_dev, + .hw_vlan_ins = true, }; + if (roc_feature_nix_has_own_meta_aura () && + !roc_feature_nix_has_second_pass_drop ()) + cd->nix->local_meta_aura_ena = true; + if ((rrv = roc_nix_dev_init (cd->nix))) return cnx_return_roc_err (dev, rrv, "roc_nix_dev_init"); - if (roc_nix_npc_mac_addr_get (cd->nix, mac_addr)) + if ((rrv = roc_nix_npc_mac_addr_get (cd->nix, mac_addr))) return cnx_return_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get"); vnet_dev_port_add_args_t port_add_args = { @@ -128,6 +357,19 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .rx_offloads = { .ip4_cksum = 1, }, + .tx_offloads = { + .ip4_cksum = 1, + }, + + }, + .default_rss_key = { + .key = { + 0xfe, 0xed, 0x0b, 0xad, 0xfe, 0xed, 0x0b, 0xad, 0xad, 0x0b, 0xed, 0xfe, + 0xad, 0x0b, 0xed, 0xfe, 0x13, 0x57, 0x9b, 0xef, 0x24, 0x68, 0xac, 0x0e, + 0x91, 0x72, 0x53, 0x11, 0x82, 0x64, 0x20, 0x44, 0x12, 0xef, 0x34, 0xcd, + 0x56, 0xbc, 0x78, 0x9a, 0x9a, 0x78, 0xbc, 0x56, 0xcd, 0x34, 0xef, 0x12, + }, + .length = 48, }, .ops = { .init = oct_port_init, @@ -135,10 +377,14 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .start = oct_port_start, .stop = oct_port_stop, .config_change = oct_port_cfg_change, + .config_change_validate = oct_port_cfg_change_validate, .format_status = format_oct_port_status, + .format_flow = format_oct_port_flow, + .clear_counters = oct_port_clear_counters, }, .data_size = sizeof (oct_port_t), .initial_data = &oct_port, + .args = oct_port_args, }, .rx_node = &oct_rx_node, .tx_node = &oct_tx_node, @@ -172,27 +418,250 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) }, }; + if (om->use_single_rx_aura && !om->rx_aura_handle) + { + if ((rrv = roc_npa_pool_create (&om->rx_aura_handle, bp->alloc_size, + bp->n_buffers, &aura, &npapool, 0))) + return cnx_return_roc_err (dev, rrv, "roc_npa_pool_create"); + } + + if (oidm->inl_dev) + { + if (oim->inline_ipsec_sessions) + { + log_err (dev, + "device attach not allowed after any IPsec SA addition"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + if ((rv = oct_init_nix_inline_ipsec (vm, oidm->vdev, dev))) + return rv; + port_add_args.tx_node = &oct_tx_ipsec_tm_node; + } + + if (roc_model_is_cn20k ()) + port_add_args.rx_node = &oct_o20_rx_node; + vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr, mac_addr); log_info (dev, "MAC address is %U", format_ethernet_address, mac_addr); - return vnet_dev_port_add (vm, dev, 0, &port_add_args); + if ((rv = vnet_dev_port_add (vm, dev, 0, &port_add_args))) + return rv; + + pool_get (om->oct_dev, oct_dev); + oct_dev[0] = vnet_dev_get_data (dev); + oct_dev[0]->nix_idx = oct_dev - om->oct_dev; + + return VNET_DEV_OK; +} + +static int +oct_conf_cpt (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd, + int nb_lf) +{ + struct roc_cpt *roc_cpt = ocd->roc_cpt; + int rrv; + + if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_SE)) < 0) + { + log_err (dev, "Could not add CPT SE engines"); + return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); + } + if (!roc_model_is_cn20k ()) + { + if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_IE)) < 0) + { + log_err (dev, "Could not add CPT IE engines"); + return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); + } + if (roc_cpt->eng_grp[CPT_ENG_TYPE_IE] != + ROC_LEGACY_CPT_DFLT_ENG_GRP_SE_IE) + { + log_err (dev, "Invalid CPT IE engine group configuration"); + return -1; + } + } + if (roc_cpt->eng_grp[CPT_ENG_TYPE_SE] != ROC_LEGACY_CPT_DFLT_ENG_GRP_SE) + { + log_err (dev, "Invalid CPT SE engine group configuration"); + return -1; + } + if ((rrv = roc_cpt_dev_configure (roc_cpt, nb_lf, false, 0)) < 0) + { + log_err (dev, "could not configure crypto device %U", + format_vlib_pci_addr, roc_cpt->pci_dev->addr); + return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_configure"); + } + return 0; +} + +static vnet_dev_rv_t +oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) +{ + struct roc_cpt *roc_cpt = ocd->roc_cpt; + struct roc_cpt_lmtline *cpt_lmtline; + struct roc_cpt_lf *cpt_lf; + int rrv; + + cpt_lf = &ocd->lf; + cpt_lmtline = &ocd->lmtline; + + cpt_lf->nb_desc = ocd->n_desc; + cpt_lf->lf_id = 0; + if ((rrv = roc_cpt_lf_init (roc_cpt, cpt_lf)) < 0) + return cnx_return_roc_err (dev, rrv, "roc_cpt_lf_init"); + + roc_cpt_iq_enable (cpt_lf); + + if ((rrv = roc_cpt_lmtline_init (roc_cpt, cpt_lmtline, 0, false) < 0)) + return cnx_return_roc_err (dev, rrv, "roc_cpt_lmtline_init"); + + return 0; +} + +static vnet_dev_rv_t +oct_init_inl_dev (vlib_main_t *vm, vnet_dev_t *dev) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_device_t *od = vnet_dev_get_data (dev); + oct_inl_dev_main_t *oidm = &oct_inl_dev_main; + vnet_dev_rv_t rv; + + if ((STRUCT_SIZE_OF (vlib_buffer_t, pre_data) < 128) || + (STRUCT_OFFSET_OF (vlib_buffer_t, pre_data) % ROC_ALIGN)) + { + log_err (dev, "Failed to initalize inline device: pre_data size should " + "be minimum 128 Bytes and offset of pre_data in vlib " + "should be 128 bytes aligned"); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + oidm->inl_dev = oct_plt_init_param.oct_plt_zmalloc ( + sizeof (struct roc_nix_inl_dev), CLIB_CACHE_LINE_BYTES); + oidm->inl_dev->pci_dev = &od->plt_pci_dev; + oidm->vdev = dev; + + if ((rv = oct_early_init_inline_ipsec (vm, dev))) + return rv; + + if ((rv = oct_init_ipsec_backend (vm, dev))) + return rv; + + oct_main.use_single_rx_aura = 1; + oct_main.inl_dev_initialized = 1; + + return VNET_DEV_OK; } static vnet_dev_rv_t oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev) { + oct_crypto_main_t *ocm = &oct_crypto_main; + extern oct_plt_init_param_t oct_plt_init_param; oct_device_t *cd = vnet_dev_get_data (dev); + oct_crypto_dev_t *ocd = NULL; + u32 n_desc; int rrv; - struct roc_cpt cpt = { - .pci_dev = &cd->plt_pci_dev, - }; - if ((rrv = roc_cpt_dev_init (&cpt))) + if (ocm->n_cpt == OCT_MAX_N_CPT_DEV || ocm->started) + return VNET_DEV_ERR_NOT_SUPPORTED; + + ocd = oct_plt_init_param.oct_plt_zmalloc (sizeof (oct_crypto_dev_t), + CLIB_CACHE_LINE_BYTES); + + ocd->roc_cpt = oct_plt_init_param.oct_plt_zmalloc (sizeof (struct roc_cpt), + CLIB_CACHE_LINE_BYTES); + ocd->roc_cpt->pci_dev = &cd->plt_pci_dev; + + ocd->dev = dev; + ocd->n_desc = OCT_CPT_LF_DEF_NB_DESC; + + foreach_vnet_dev_args (arg, dev) + { + if (arg->id == OCT_DEV_ARG_CRYPTO_N_DESC && + vnet_dev_arg_get_uint32 (arg)) + { + n_desc = vnet_dev_arg_get_uint32 (arg); + if (n_desc < OCT_CPT_LF_MIN_NB_DESC || + n_desc > OCT_CPT_LF_MAX_NB_DESC) + { + log_err (dev, + "number of cpt descriptors should be within range " + "of %u and %u", + OCT_CPT_LF_MIN_NB_DESC, OCT_CPT_LF_MAX_NB_DESC); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + ocd->n_desc = vnet_dev_arg_get_uint32 (arg); + } + } + + if ((rrv = roc_cpt_dev_init (ocd->roc_cpt))) return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init"); + + if ((rrv = oct_conf_cpt (vm, dev, ocd, 1))) + return rrv; + + if ((rrv = oct_conf_cpt_queue (vm, dev, ocd))) + return rrv; + + if (!ocm->n_cpt) + { + /* + * Initialize s/w queues, which are common across multiple + * crypto devices + */ + oct_conf_sw_queue (vm, dev, ocd); + + ocm->crypto_dev[0] = ocd; + /* Initialize counters */ +#define _(i, s, str) \ + ocm->s##_counter.name = str; \ + ocm->s##_counter.stat_segment_name = "/octeon/" str "_counters"; \ + vlib_validate_simple_counter (&ocm->s##_counter, 0); \ + vlib_zero_simple_counter (&ocm->s##_counter, 0); + foreach_crypto_counter; +#undef _ + } + + ocm->crypto_dev[1] = ocd; + + oct_init_crypto_engine_handlers (vm, dev); + + ocm->n_cpt++; + return VNET_DEV_OK; } +static bool +oct_is_nix_bar_mappable (vnet_dev_t *dev, u32 bar) +{ + oct_device_t *cd = vnet_dev_get_data (dev); + + /* Device-BARs mapping table + * +-----+-------+-------+--------+ + * | | cn9k | cn10k | cn20k | + * +-----+-------+-------+--------+ + * | PF | BAR2 | BAR2 | BAR2 | + * | | BAR4 | BAR4 | | + * +-----+-------+-------+--------+ + * | VF | BAR2 | BAR2 | BAR2 | + * | | BAR4 | | | + * +-----+-------+-------+--------+ + */ + + if (bar == 2) + return true; + + if (roc_model_is_cn20k ()) + return false; + + if (roc_model_is_cn10k () && OCT_DEVTYPE_IS_VF (cd->type)) + return false; + + return true; +} + static vnet_dev_rv_t oct_init (vlib_main_t *vm, vnet_dev_t *dev) { @@ -200,6 +669,13 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev) vlib_pci_config_hdr_t pci_hdr; vnet_dev_rv_t rv; + /* + * Drivers config arguments should be initialized by this time + * otherwise don't allow to set after device init + */ + if (!oct_main.is_config_done) + oct_main.is_config_done = 1; + rv = vnet_dev_pci_read_config_header (vm, dev, &pci_hdr); if (rv != VNET_DEV_OK) return rv; @@ -224,24 +700,51 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev) .id.class_id = pci_hdr.class << 16 | pci_hdr.subclass, .pci_handle = vnet_dev_get_pci_handle (dev), }; + cd->msix_handler = NULL; foreach_int (i, 2, 4) { - rv = vnet_dev_pci_map_region (vm, dev, i, - &cd->plt_pci_dev.mem_resource[i].addr); - if (rv != VNET_DEV_OK) - return rv; + if (oct_is_nix_bar_mappable (dev, i)) + { + rv = vnet_dev_pci_map_region (vm, dev, i, + &cd->plt_pci_dev.mem_resource[i].addr); + if (rv != VNET_DEV_OK) + return rv; + } } + if ((rv = vnet_dev_pci_bus_master_enable (vm, dev))) + return rv; + strncpy ((char *) cd->plt_pci_dev.name, dev->device_id, sizeof (cd->plt_pci_dev.name) - 1); - if (cd->type == OCT_DEVICE_TYPE_RVU_PF) - return oct_init_nix (vm, dev); - else if (cd->type == OCT_DEVICE_TYPE_CPT_VF) - return oct_init_cpt (vm, dev); - else - return VNET_DEV_ERR_UNSUPPORTED_DEVICE; + cd->plt_pci_dev.intr_handle = malloc (sizeof (struct oct_pci_intr_handle)); + if (!cd->plt_pci_dev.intr_handle) + return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL; + memset (cd->plt_pci_dev.intr_handle, 0x0, + sizeof (struct oct_pci_intr_handle)); + cd->plt_pci_dev.intr_handle->pci_handle = cd->plt_pci_dev.pci_handle; + + switch (cd->type) + { + case OCT_DEVICE_TYPE_RVU_PF: + case OCT_DEVICE_TYPE_RVU_VF: + case OCT_DEVICE_TYPE_LBK_VF: + case OCT_DEVICE_TYPE_SDP_VF: + return oct_init_nix (vm, dev); + + case OCT_DEVICE_TYPE_O10K_CPT_VF: + case OCT_DEVICE_TYPE_O9K_CPT_VF: + return oct_init_cpt (vm, dev); + + case OCT_DEVICE_TYPE_RVU_INL_PF: + case OCT_DEVICE_TYPE_RVU_INL_VF: + return oct_init_inl_dev (vm, dev); + + default: + return VNET_DEV_ERR_UNSUPPORTED_DEVICE; + } return 0; } @@ -269,14 +772,24 @@ VNET_DEV_REGISTER_DRIVER (octeon) = { .bus = "pci", .device_data_sz = sizeof (oct_device_t), .ops = { + .config_args = oct_config_args, .alloc = oct_alloc, .init = oct_init, .deinit = oct_deinit, .free = oct_free, .probe = oct_probe, }, + .args = oct_dev_args, + .drv_args = oct_drv_args, }; +static int +oct_npa_max_pools_set_cb (struct plt_pci_device *pci_dev) +{ + roc_idev_npa_maxpools_set (oct_main.npa_max_pools); + return 0; +} + static clib_error_t * oct_plugin_init (vlib_main_t *vm) { @@ -290,6 +803,18 @@ oct_plugin_init (vlib_main_t *vm) rv = roc_model_init (&oct_model); if (rv) return clib_error_return (0, "roc_model_init failed"); + + roc_npa_lf_init_cb_register (oct_npa_max_pools_set_cb); + + /* set default values in oct_main */ + oct_main.npa_max_pools = OCT_NPA_MAX_POOLS; + oct_main.use_single_rx_aura = 1; + oct_inl_dev_main.in_min_spi = 0; + oct_inl_dev_main.in_max_spi = 8192; + oct_inl_dev_main.out_max_sa = 8192; + + roc_npa_lf_init_cb_register (oct_npa_max_pools_set_cb); + return 0; } @@ -299,3 +824,5 @@ VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, .description = "dev_octeon", }; + +VLIB_BUFFER_SET_EXT_HDR_SIZE (OCT_EXT_HDR_SIZE); diff --git a/src/plugins/dev_octeon/ipsec.c b/src/plugins/dev_octeon/ipsec.c new file mode 100644 index 0000000000..a8864aa7d8 --- /dev/null +++ b/src/plugins/dev_octeon/ipsec.c @@ -0,0 +1,1545 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include + +#define OCT_NIX_INL_META_POOL_NAME "OCT_NIX_INL_META_POOL" + +oct_ipsec_main_t oct_ipsec_main; +oct_inl_dev_main_t oct_inl_dev_main; + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "ipsec", +}; + +static inline union cpt_eng_caps +oct_cpt_get_eng_caps (oct_crypto_main_t *ocm) +{ + oct_crypto_dev_t *ocd = ocm->crypto_dev[0]; + + if (roc_model_is_cn20k ()) + return ocd->roc_cpt->hw_caps[CPT_ENG_TYPE_SE]; + else + return ocd->roc_cpt->hw_caps[CPT_ENG_TYPE_IE]; +} + +static inline u32 +oct_ipsec_get_inb_sa_sz () +{ + if (roc_model_is_cn20k ()) + return (sizeof (struct roc_ow_ipsec_inb_sa)); + return (sizeof (struct roc_ot_ipsec_inb_sa)); +} + +static inline u32 +oct_ipsec_get_outb_sa_sz () +{ + if (roc_model_is_cn20k ()) + return (sizeof (struct roc_ow_ipsec_outb_sa)); + return (sizeof (struct roc_ot_ipsec_outb_sa)); +} + +static inline void +oct_ipsec_sa_init (void *sa_dptr, bool is_inb) +{ + if (roc_model_is_cn20k ()) + { + if (is_inb) + roc_ow_ipsec_inb_sa_init (sa_dptr); + else + roc_ow_ipsec_outb_sa_init (sa_dptr); + return; + } + + if (is_inb) + roc_ot_ipsec_inb_sa_init (sa_dptr); + else + roc_ot_ipsec_outb_sa_init (sa_dptr); + + return; +} + +static void +oct_ipsec_hmac_opad_ipad_gen (ipsec_sa_t *sa, u8 *hmac_opad_ipad) +{ + u8 opad[128] = { [0 ... 127] = 0x5c }; + u8 ipad[128] = { [0 ... 127] = 0x36 }; + const u8 *key = sa->integ_key.data; + u32 length = sa->integ_key.len; + u32 i; + + /* HMAC OPAD and IPAD */ + for (i = 0; i < 128 && i < length; i++) + { + opad[i] = opad[i] ^ key[i]; + ipad[i] = ipad[i] ^ key[i]; + } + + /* + * Precompute hash of HMAC OPAD and IPAD to avoid + * per-packet computation + */ + switch (sa->integ_alg) + { + case IPSEC_INTEG_ALG_SHA1_96: + roc_hash_sha1_gen (opad, (u32 *) &hmac_opad_ipad[0]); + roc_hash_sha1_gen (ipad, (u32 *) &hmac_opad_ipad[24]); + break; + case IPSEC_INTEG_ALG_SHA_256_96: + case IPSEC_INTEG_ALG_SHA_256_128: + roc_hash_sha256_gen (opad, (u32 *) &hmac_opad_ipad[0], 256); + roc_hash_sha256_gen (ipad, (u32 *) &hmac_opad_ipad[64], 256); + break; + case IPSEC_INTEG_ALG_SHA_384_192: + roc_hash_sha512_gen (opad, (u64 *) &hmac_opad_ipad[0], 384); + roc_hash_sha512_gen (ipad, (u64 *) &hmac_opad_ipad[64], 384); + break; + case IPSEC_INTEG_ALG_SHA_512_256: + roc_hash_sha512_gen (opad, (u64 *) &hmac_opad_ipad[0], 512); + roc_hash_sha512_gen (ipad, (u64 *) &hmac_opad_ipad[64], 512); + break; + default: + break; + } +} + +static_always_inline u64 +oct_ipsec_crypto_inst_w7_get (void *sa) +{ + union cpt_inst_w7 w7; + + w7.u64 = 0; + w7.s.ctx_val = 1; + + if (roc_model_is_cn20k ()) + w7.s.egrp = ROC_CPT_DFLT_ENG_GRP_SE; + else + w7.s.egrp = ROC_LEGACY_CPT_DFLT_ENG_GRP_SE_IE; + + return w7.u64; +} + +static_always_inline i32 +oct_ipsec_sa_common_param_fill (union roc_ot_ipsec_sa_word2 *w2, + u8 *cipher_key, u8 *salt_key, + u8 *hmac_opad_ipad, ipsec_sa_t *sa) +{ + u32 *tmp_salt; + u64 *tmp_key; + int i; + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + w2->s.encap_type = ROC_IE_OT_SA_ENCAP_UDP; + + /* Set protocol - ESP vs AH */ + if (sa->protocol == IPSEC_PROTOCOL_ESP) + w2->s.protocol = ROC_IE_SA_PROTOCOL_ESP; + else + w2->s.protocol = ROC_IE_SA_PROTOCOL_AH; + + /* Set mode - transport vs tunnel */ + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + w2->s.mode = ROC_IE_SA_MODE_TUNNEL; + else + w2->s.mode = ROC_IE_SA_MODE_TRANSPORT; + + if (ipsec_sa_is_set_IS_CTR (sa)) + { + if (ipsec_sa_is_set_IS_AEAD (sa)) + { + /* AEAD is set for AES_GCM */ + if (IPSEC_CRYPTO_ALG_IS_GCM (sa->crypto_alg)) + { + w2->s.enc_type = ROC_IE_SA_ENC_AES_GCM; + w2->s.auth_type = ROC_IE_SA_AUTH_NULL; + } + else + { + clib_warning ("Unsupported AEAD algorithm"); + return -1; + } + } + else + w2->s.enc_type = ROC_IE_SA_ENC_AES_CTR; + } + else + { + switch (sa->crypto_alg) + { + case IPSEC_CRYPTO_ALG_NONE: + w2->s.enc_type = ROC_IE_SA_ENC_NULL; + break; + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + w2->s.enc_type = ROC_IE_SA_ENC_AES_CBC; + break; + case IPSEC_CRYPTO_ALG_3DES_CBC: + w2->s.enc_type = ROC_IE_SA_ENC_3DES_CBC; + break; + default: + clib_warning ("Unsupported encryption algorithm"); + return -1; + } + } + + switch (sa->crypto_alg) + { + case IPSEC_CRYPTO_ALG_AES_GCM_128: + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CTR_128: + w2->s.aes_key_len = ROC_IE_SA_AES_KEY_LEN_128; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_192: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CTR_192: + w2->s.aes_key_len = ROC_IE_SA_AES_KEY_LEN_192; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_256: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + case IPSEC_CRYPTO_ALG_AES_CTR_256: + w2->s.aes_key_len = ROC_IE_SA_AES_KEY_LEN_256; + break; + default: + break; + } + + if (!ipsec_sa_is_set_IS_AEAD (sa)) + { + switch (sa->integ_alg) + { + case IPSEC_INTEG_ALG_NONE: + w2->s.auth_type = ROC_IE_SA_AUTH_NULL; + break; + case IPSEC_INTEG_ALG_SHA1_96: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA1; + break; + case IPSEC_INTEG_ALG_SHA_256_96: + case IPSEC_INTEG_ALG_SHA_256_128: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA2_256; + break; + case IPSEC_INTEG_ALG_SHA_384_192: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA2_384; + break; + case IPSEC_INTEG_ALG_SHA_512_256: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA2_512; + break; + default: + clib_warning ("Unsupported authentication algorithm"); + return -1; + } + } + + oct_ipsec_hmac_opad_ipad_gen (sa, hmac_opad_ipad); + + tmp_key = (u64 *) hmac_opad_ipad; + for (i = 0; i < (int) (ROC_CTX_MAX_OPAD_IPAD_LEN / sizeof (u64)); i++) + tmp_key[i] = clib_net_to_host_u64 (tmp_key[i]); + + if (ipsec_sa_is_set_IS_AEAD (sa)) + { + if (IPSEC_CRYPTO_ALG_IS_GCM (sa->crypto_alg)) + clib_memcpy (salt_key, &sa->salt, OCT_ROC_SALT_LEN); + tmp_salt = (u32 *) salt_key; + *tmp_salt = clib_net_to_host_u32 (*tmp_salt); + } + + /* Populate encryption key */ + clib_memcpy (cipher_key, sa->crypto_key.data, sa->crypto_key.len); + tmp_key = (u64 *) cipher_key; + for (i = 0; i < (int) (ROC_CTX_MAX_CKEY_LEN / sizeof (u64)); i++) + tmp_key[i] = clib_net_to_host_u64 (tmp_key[i]); + + w2->s.spi = sa->spi; + + return 0; +} + +static_always_inline i32 +oct_o20_ipsec_sa_common_param_fill (union roc_ow_ipsec_sa_word2 *w2, + u8 *cipher_key, u8 *salt_key, + u8 *hmac_opad_ipad, ipsec_sa_t *sa) +{ + u32 *tmp_salt; + u64 *tmp_key; + int i; + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + w2->s.encap_type = ROC_IE_OW_SA_ENCAP_UDP; + + /* Set protocol - ESP vs AH */ + if (sa->protocol == IPSEC_PROTOCOL_ESP) + w2->s.protocol = ROC_IE_SA_PROTOCOL_ESP; + else + w2->s.protocol = ROC_IE_SA_PROTOCOL_AH; + + /* Set mode - transport vs tunnel */ + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + w2->s.mode = ROC_IE_SA_MODE_TUNNEL; + else + w2->s.mode = ROC_IE_SA_MODE_TRANSPORT; + + if (ipsec_sa_is_set_IS_CTR (sa)) + { + if (ipsec_sa_is_set_IS_AEAD (sa)) + { + /* AEAD is set for AES_GCM */ + if (IPSEC_CRYPTO_ALG_IS_GCM (sa->crypto_alg)) + { + w2->s.enc_type = ROC_IE_SA_ENC_AES_GCM; + w2->s.auth_type = ROC_IE_SA_AUTH_NULL; + } + else + { + clib_warning ("Unsupported AEAD algorithm"); + return -1; + } + } + else + w2->s.enc_type = ROC_IE_SA_ENC_AES_CTR; + } + else + { + switch (sa->crypto_alg) + { + case IPSEC_CRYPTO_ALG_NONE: + w2->s.enc_type = ROC_IE_SA_ENC_NULL; + break; + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + w2->s.enc_type = ROC_IE_SA_ENC_AES_CBC; + break; + default: + clib_warning ("Unsupported encryption algorithm"); + return -1; + } + } + + switch (sa->crypto_alg) + { + case IPSEC_CRYPTO_ALG_AES_GCM_128: + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CTR_128: + w2->s.aes_key_len = ROC_IE_SA_AES_KEY_LEN_128; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_192: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CTR_192: + w2->s.aes_key_len = ROC_IE_SA_AES_KEY_LEN_192; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_256: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + case IPSEC_CRYPTO_ALG_AES_CTR_256: + w2->s.aes_key_len = ROC_IE_SA_AES_KEY_LEN_256; + break; + default: + break; + } + + if (!ipsec_sa_is_set_IS_AEAD (sa)) + { + switch (sa->integ_alg) + { + case IPSEC_INTEG_ALG_NONE: + w2->s.auth_type = ROC_IE_SA_AUTH_NULL; + break; + case IPSEC_INTEG_ALG_SHA1_96: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA1; + break; + case IPSEC_INTEG_ALG_SHA_256_96: + case IPSEC_INTEG_ALG_SHA_256_128: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA2_256; + break; + case IPSEC_INTEG_ALG_SHA_384_192: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA2_384; + break; + case IPSEC_INTEG_ALG_SHA_512_256: + w2->s.auth_type = ROC_IE_SA_AUTH_SHA2_512; + break; + default: + clib_warning ("Unsupported authentication algorithm"); + return -1; + } + } + + oct_ipsec_hmac_opad_ipad_gen (sa, hmac_opad_ipad); + + tmp_key = (u64 *) hmac_opad_ipad; + for (i = 0; i < (int) (ROC_CTX_MAX_OPAD_IPAD_LEN / sizeof (u64)); i++) + tmp_key[i] = clib_net_to_host_u64 (tmp_key[i]); + + if (ipsec_sa_is_set_IS_AEAD (sa)) + { + if (IPSEC_CRYPTO_ALG_IS_GCM (sa->crypto_alg)) + clib_memcpy (salt_key, &sa->salt, OCT_ROC_SALT_LEN); + tmp_salt = (u32 *) salt_key; + *tmp_salt = clib_net_to_host_u32 (*tmp_salt); + } + + /* Populate encryption key */ + clib_memcpy (cipher_key, sa->crypto_key.data, sa->crypto_key.len); + tmp_key = (u64 *) cipher_key; + for (i = 0; i < (int) (ROC_CTX_MAX_CKEY_LEN / sizeof (u64)); i++) + tmp_key[i] = clib_net_to_host_u64 (tmp_key[i]); + + w2->s.spi = sa->spi; + + return 0; +} + +static_always_inline void +oct_ipsec_sa_len_precalc (ipsec_sa_t *sa, oct_ipsec_encap_len_t *encap) +{ + encap->adj_len = 0; + + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + { + if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa)) + encap->partial_len = ROC_CPT_TUNNEL_IPV6_HDR_LEN; + else + encap->partial_len = ROC_CPT_TUNNEL_IPV4_HDR_LEN; + } + else + { + encap->partial_len = 0; + encap->adj_len = ROC_CPT_TUNNEL_IPV4_HDR_LEN; + } + + if (sa->protocol == IPSEC_PROTOCOL_ESP) + { + encap->partial_len += ROC_CPT_ESP_HDR_LEN; + encap->roundup_len = ROC_CPT_ESP_TRL_LEN; + encap->footer_len = ROC_CPT_ESP_TRL_LEN; + } + else + { + encap->partial_len = ROC_CPT_AH_HDR_LEN; + } + + encap->partial_len += sa->crypto_iv_size; + encap->partial_len += sa->integ_icv_size; + + encap->roundup_byte = sa->esp_block_align; + encap->icv_len = sa->integ_icv_size; + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + encap->partial_len += sizeof (udp_header_t); +} + +static size_t +oct_ipsec_inb_ctx_size (struct roc_ot_ipsec_inb_sa *sa) +{ + size_t size; + + /* Variable based on anti-replay window */ + size = offsetof (struct roc_ot_ipsec_inb_sa, ctx) + + offsetof (struct roc_ot_ipsec_inb_ctx_update_reg, ar_winbits); + + if (sa->w0.s.ar_win) + size += (1 << (sa->w0.s.ar_win - 1)) * sizeof (u64); + + return size; +} + +static size_t +oct_o20_ipsec_inb_ctx_size (struct roc_ow_ipsec_inb_sa *sa) +{ + size_t size; + + /* Variable based on anti-replay window */ + size = offsetof (struct roc_ow_ipsec_inb_sa, ctx) + + offsetof (struct roc_ow_ipsec_inb_ctx_update_reg, ar_winbits); + + if (sa->w0.s.ar_win) + size += (1 << (sa->w0.s.ar_win - 1)) * sizeof (u64); + + return size; +} + +static_always_inline void +oct_ipsec_common_inst_param_fill (void *sa, oct_ipsec_session_t *sess) +{ + union cpt_inst_w2 w2; + union cpt_inst_w3 w3; + + clib_memset (&sess->inst, 0, sizeof (struct cpt_inst_s)); + + sess->inst.w7.u64 = oct_ipsec_crypto_inst_w7_get (sa); + + w2.u64 = 0; + w2.u64 = ((u64) OCT_EVENT_TYPE_FRM_CPU << 28); + sess->inst.w2.u64 = w2.u64; + + /* Populate word3 in CPT instruction template */ + w3.u64 = 0; + w3.s.qord = 1; + sess->inst.w3.u64 = w3.u64; +} + +static i32 +oct_o10_ipsec_inb_session_update (oct_ipsec_session_t *sess, ipsec_sa_t *sa) +{ + union roc_ot_ipsec_sa_word2 w2; + u32 min_spi, max_spi, spi_mask; + struct roc_ot_ipsec_inb_sa *roc_sa; + oct_ipsec_inb_sa_priv_data_t *inb_sa_priv; + union cpt_inst_w4 inst_w4; + union roc_ot_ipsec_inb_param1 param1; + size_t offset; + + /* Ensure SPI is within the range supported by inline pktio device */ + spi_mask = roc_nix_inl_inb_spi_range (NULL, true, &min_spi, &max_spi); + if (sa->spi < min_spi || sa->spi > max_spi) + { + clib_warning ("SPI %u is not within supported range %u-%u", sa->spi, + min_spi, max_spi); + return -1; + } + + roc_sa = (struct roc_ot_ipsec_inb_sa *) roc_nix_inl_inb_sa_get (NULL, true, + sa->spi); + if (!roc_sa) + { + clib_warning ("Failed to create inbound sa session"); + return -1; + } + + inb_sa_priv = roc_nix_inl_ot_ipsec_inb_sa_sw_rsvd (roc_sa); + inb_sa_priv->user_data = sa->stat_index; + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + roc_sa->w10.s.udp_dst_port = 4500; + roc_sa->w10.s.udp_src_port = 4500; + } + + w2.u64 = 0; + int rv = oct_ipsec_sa_common_param_fill ( + &w2, roc_sa->cipher_key, roc_sa->w8.s.salt, roc_sa->hmac_opad_ipad, sa); + if (rv) + return rv; + + oct_ipsec_sa_len_precalc (sa, &sess->encap); + + if (sa->flags & IPSEC_SA_FLAG_USE_ANTI_REPLAY) + roc_sa->w0.s.ar_win = max_log2 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa)) - 5; + + /* Set direction and enable ESN (if needed) */ + w2.s.dir = ROC_IE_SA_DIR_INBOUND; + if (ipsec_sa_is_set_USE_ESN (sa)) + w2.s.esn_en = 1; + + /* + * Default options for pkt_out and pkt_fmt are with + * second pass meta and defrag. + */ + roc_sa->w0.s.pkt_format = ROC_IE_OT_SA_PKT_FMT_META; + + if (sa->flags & IPSEC_SA_FLAG_IS_INL_REASSEMBLY) + roc_sa->w0.s.pkt_output = ROC_IE_OT_SA_PKT_OUTPUT_HW_BASED_DEFRAG; + else + roc_sa->w0.s.pkt_output = ROC_IE_OT_SA_PKT_OUTPUT_NO_FRAG; + + roc_sa->w0.s.pkind = ROC_IE_OT_CPT_PKIND; + + offset = offsetof (struct roc_ot_ipsec_inb_sa, ctx); + roc_sa->w0.s.hw_ctx_off = offset / 8; + roc_sa->w0.s.ctx_push_size = roc_sa->w0.s.hw_ctx_off + 1; + + /* Set context size, in number of 128B units following the first 128B */ + roc_sa->w0.s.ctx_size = + (round_pow2 (oct_ipsec_inb_ctx_size (roc_sa), 128) >> 7) - 1; + + /* Save SA index/SPI in cookie for now */ + roc_sa->w1.s.cookie = plt_cpu_to_be_32 (sa->spi & spi_mask); + + /* Enable SA */ + w2.s.valid = 1; + roc_sa->w2.u64 = w2.u64; + + asm volatile("dmb oshst" ::: "memory"); + + oct_ipsec_common_inst_param_fill (roc_sa, sess); + + /* Populate word4 in CPT instruction template */ + inst_w4.u64 = 0; + inst_w4.s.opcode_major = ROC_IE_OT_MAJOR_OP_PROCESS_INBOUND_IPSEC; + param1.u16 = 0; + /* Disable IP checksum verification by default */ + param1.s.ip_csum_disable = ROC_IE_OT_SA_INNER_PKT_IP_CSUM_DISABLE; + /* Disable L4 checksum verification by default */ + param1.s.l4_csum_disable = ROC_IE_OT_SA_INNER_PKT_L4_CSUM_DISABLE; + param1.s.esp_trailer_disable = 0; + inst_w4.s.param1 = param1.u16; + sess->inst.w4.u64 = inst_w4.u64; + + rv = roc_nix_inl_ctx_write (NULL, roc_sa, roc_sa, true, + sizeof (struct roc_ot_ipsec_inb_sa)); + if (rv) + { + clib_warning ("roc_nix_inl_ctx_write failed with '%s' error", + roc_error_msg_get (rv)); + return rv; + } + + rv = roc_nix_inl_sa_sync (NULL, roc_sa, true, ROC_NIX_INL_SA_OP_FLUSH); + if (rv) + { + clib_warning ( + "roc_nix_inl_sa_sync flush operation failed with '%s' error", + roc_error_msg_get (rv)); + return rv; + } + + return 0; +} + +static i32 +oct_o20_ipsec_inb_session_update (oct_ipsec_session_t *sess, ipsec_sa_t *sa) +{ + union roc_ow_ipsec_sa_word2 w2; + u32 min_spi, max_spi, spi_mask; + struct roc_ow_ipsec_inb_sa *roc_sa; + oct_ipsec_inb_sa_priv_data_t *inb_sa_priv; + union cpt_inst_w4 inst_w4; + union roc_ow_ipsec_inb_param1 param1; + size_t offset; + + /* Ensure SPI is within the range supported by inline pktio device */ + spi_mask = roc_nix_inl_inb_spi_range (NULL, true, &min_spi, &max_spi); + if (sa->spi < min_spi || sa->spi > max_spi) + { + clib_warning ("SPI %u is not within supported range %u-%u", sa->spi, + min_spi, max_spi); + return -1; + } + + roc_sa = (struct roc_ow_ipsec_inb_sa *) roc_nix_inl_inb_sa_get (NULL, true, + sa->spi); + if (!roc_sa) + { + clib_warning ("Failed to create inbound sa session"); + return -1; + } + + inb_sa_priv = roc_nix_inl_ow_ipsec_inb_sa_sw_rsvd (roc_sa); + inb_sa_priv->user_data = sa->stat_index; + + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + roc_sa->w10.s.udp_dst_port = 4500; + roc_sa->w10.s.udp_src_port = 4500; + } + + w2.u64 = 0; + int rv = oct_o20_ipsec_sa_common_param_fill ( + &w2, roc_sa->cipher_key, roc_sa->w8.s.salt, roc_sa->hmac_opad_ipad, sa); + if (rv) + return rv; + + oct_ipsec_sa_len_precalc (sa, &sess->encap); + + if (sa->flags & IPSEC_SA_FLAG_USE_ANTI_REPLAY) + roc_sa->w0.s.ar_win = max_log2 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa)) - 5; + + /* Set direction and enable ESN (if needed) */ + w2.s.dir = ROC_IE_SA_DIR_INBOUND; + if (ipsec_sa_is_set_USE_ESN (sa)) + w2.s.esn_en = 1; + + /* + * Default options for pkt_out and pkt_fmt are with + * second pass meta and defrag. + */ + roc_sa->w0.s.pkt_format = ROC_IE_OW_SA_PKT_FMT_META; + + if (sa->flags & IPSEC_SA_FLAG_IS_INL_REASSEMBLY) + roc_sa->w0.s.pkt_output = ROC_IE_OW_SA_PKT_OUTPUT_HW_BASED_DEFRAG; + else + roc_sa->w0.s.pkt_output = ROC_IE_OW_SA_PKT_OUTPUT_NO_FRAG; + + roc_sa->w0.s.pkind = ROC_IE_OW_CPT_PKIND; + + offset = offsetof (struct roc_ow_ipsec_inb_sa, ctx); + roc_sa->w0.s.hw_ctx_off = offset / 8; + roc_sa->w0.s.ctx_push_size = roc_sa->w0.s.hw_ctx_off + 1; + + /* Set context size, in number of 128B units following the first 128B */ + roc_sa->w0.s.ctx_size = + (round_pow2 (oct_o20_ipsec_inb_ctx_size (roc_sa), 128) >> 7) - 1; + + /* Save SA index/SPI in cookie for now */ + roc_sa->w1.s.cookie = sa->spi & spi_mask; + + /* Enable SA */ + w2.s.valid = 1; + roc_sa->w2.u64 = w2.u64; + + asm volatile ("dmb oshst" ::: "memory"); + + oct_ipsec_common_inst_param_fill (roc_sa, sess); + + /* Populate word4 in CPT instruction template */ + inst_w4.u64 = 0; + inst_w4.s.opcode_major = ROC_IE_OW_MAJOR_OP_PROCESS_INBOUND_IPSEC; + param1.u16 = 0; + /* Disable IP checksum verification by default */ + param1.s.ip_csum_disable = ROC_IE_OW_SA_INNER_PKT_IP_CSUM_DISABLE; + /* Disable L4 checksum verification by default */ + param1.s.l4_csum_disable = ROC_IE_OW_SA_INNER_PKT_L4_CSUM_DISABLE; + param1.s.esp_trailer_disable = 0; + inst_w4.s.param1 = param1.u16; + sess->inst.w4.u64 = inst_w4.u64; + + rv = roc_nix_inl_ctx_write (NULL, roc_sa, roc_sa, true, + sizeof (struct roc_ow_ipsec_inb_sa)); + if (rv) + { + clib_warning ("roc_nix_inl_ctx_write failed with '%s' error", + roc_error_msg_get (rv)); + return rv; + } + + rv = roc_nix_inl_sa_sync (NULL, roc_sa, true, ROC_NIX_INL_SA_OP_FLUSH); + if (rv) + { + clib_warning ( + "roc_nix_inl_sa_sync flush operation failed with '%s' error", + roc_error_msg_get (rv)); + return rv; + } + + return 0; +} + +static i32 +oct_ipsec_inb_session_update (oct_ipsec_session_t *sess, ipsec_sa_t *sa) +{ + if (roc_model_is_cn20k ()) + return oct_o20_ipsec_inb_session_update (sess, sa); + return oct_o10_ipsec_inb_session_update (sess, sa); +} + +int +oct_ipsec_outb_sa_idx_get (oct_device_t *od, u32 *index, u32 spi) +{ + u32 pos, idx; + u64 slab; + int rc; + + if (!od->outb.sa_bmap) + return -ENOTSUP; + + pos = 0; + slab = 0; + /* Scan from the beginning */ + plt_bitmap_scan_init (od->outb.sa_bmap); + + /* Scan bitmap to get the free sa index */ + rc = plt_bitmap_scan (od->outb.sa_bmap, &pos, &slab); + /* Empty bitmap */ + if (rc == 0) + { + plt_err ("Outbound SA' exhausted, use 'ipsec_out_max_sa' " + "devargs to increase"); + return -ERANGE; + } + + /* Get free SA index */ + idx = pos + (slab ? plt_ctz64 (slab) : 0); + + plt_bitmap_clear (od->outb.sa_bmap, idx); + *index = idx; + return 0; +} + +void * +oct_ipsec_get_oct_device_from_outb_sa (u32 sa_index) +{ + ipsec_sa_t *sa = ipsec_sa_get (sa_index); + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + u32 sw_if_index; + vnet_dev_port_t *port; + + sw_if_index = + fib_entry_get_resolving_interface (sa->tunnel.t_fib_entry_index); + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + port = vnet_dev_get_port_from_hw_if_index (si->hw_if_index); + + return (oct_device_t *) vnet_dev_get_data (port->dev); +} + +static_always_inline i32 +oct_o10_ipsec_outb_session_update (oct_ipsec_session_t *sess, ipsec_sa_t *sa) +{ + oct_main_t *om = &oct_main; + union roc_ot_ipsec_outb_param1 param1; + struct roc_ot_ipsec_outb_sa *out_sa; + oct_ipsec_outb_sa_priv_data_t *outb_priv; + union roc_ot_ipsec_sa_word2 w2; + union cpt_inst_w4 inst_w4; + u32 sa_idx; + u64 *ipv6_addr; + size_t offset; + int rv = 0, i = 0; + + vec_validate_aligned (sess->out_sa, vec_len (om->oct_dev), + CLIB_CACHE_LINE_BYTES); + + pool_foreach_pointer (oct_dev, om->oct_dev) + { + /* Alloc an sa index */ + rv = oct_ipsec_outb_sa_idx_get (oct_dev, &sa_idx, sa->spi); + if (rv) + return rv; + + out_sa = sess->out_sa[i] = + roc_nix_inl_ot_ipsec_outb_sa (oct_dev->outb.sa_base, sa_idx); + + outb_priv = roc_nix_inl_ot_ipsec_outb_sa_sw_rsvd (out_sa); + outb_priv->sa_idx = sa_idx; + + roc_ot_ipsec_outb_sa_init (out_sa); + + w2.u64 = 0; + rv = oct_ipsec_sa_common_param_fill (&w2, out_sa->cipher_key, + out_sa->iv.s.salt, + out_sa->hmac_opad_ipad, sa); + if (rv) + return rv; + + /* Set direction and enable ESN (if needed) */ + w2.s.dir = ROC_IE_SA_DIR_OUTBOUND; + if (ipsec_sa_is_set_USE_ESN (sa)) + out_sa->w0.s.esn_en = 1; + + /* Configure tunnel header generation */ + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + { + if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa)) + { + w2.s.outer_ip_ver = ROC_IE_SA_IP_VERSION_6; + + clib_memcpy (&out_sa->outer_hdr.ipv6.src_addr, + &sa->tunnel.t_src.ip.ip6, sizeof (ip6_address_t)); + clib_memcpy (&out_sa->outer_hdr.ipv6.dst_addr, + &sa->tunnel.t_dst.ip.ip6, sizeof (ip6_address_t)); + + /* Convert host to network byte order of ipv6 address */ + ipv6_addr = (u64 *) &out_sa->outer_hdr.ipv6.src_addr; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + ipv6_addr++; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + + ipv6_addr = (u64 *) &out_sa->outer_hdr.ipv6.dst_addr; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + ipv6_addr++; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + } + else + { + w2.s.outer_ip_ver = ROC_IE_SA_IP_VERSION_4; + out_sa->outer_hdr.ipv4.src_addr = + clib_host_to_net_u32 (sa->tunnel.t_src.ip.ip4.as_u32); + out_sa->outer_hdr.ipv4.dst_addr = + clib_host_to_net_u32 (sa->tunnel.t_dst.ip.ip4.as_u32); + } + } + + offset = offsetof (struct roc_ot_ipsec_outb_sa, ctx); + out_sa->w0.s.hw_ctx_off = offset / 8; + out_sa->w0.s.ctx_push_size = out_sa->w0.s.hw_ctx_off + 1; + /* Set context size, in number of 128B units following the first 128B */ + out_sa->w0.s.ctx_size = (round_pow2 (offset, 128) >> 7) - 1; + out_sa->w0.s.ctx_hdr_size = 1; + out_sa->w0.s.aop_valid = 1; + + out_sa->w2.u64 = w2.u64; + + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + { + if (sa->tunnel.t_encap_decap_flags & + TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF) + out_sa->w2.s.ipv4_df_src_or_ipv6_flw_lbl_src = + ROC_IE_OT_SA_COPY_FROM_INNER_IP_HDR; + if (!sa->tunnel.t_dscp) + out_sa->w2.s.dscp_src = ROC_IE_OT_SA_COPY_FROM_INNER_IP_HDR; + else + { + out_sa->w2.s.dscp_src = ROC_IE_OT_SA_COPY_FROM_SA; + out_sa->w10.s.dscp = sa->tunnel.t_dscp; + } + } + + out_sa->w2.s.ipid_gen = 1; + out_sa->w2.s.iv_src = ROC_IE_OT_SA_IV_SRC_FROM_SA; + out_sa->w2.s.valid = 1; + + asm volatile("dmb oshst" ::: "memory"); + + oct_ipsec_sa_len_precalc (sa, &sess->encap); + + oct_ipsec_common_inst_param_fill (out_sa, sess); + + /* Populate word4 in CPT instruction template */ + inst_w4.u64 = 0; + inst_w4.s.opcode_major = ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC; + param1.u16 = 0; + if (sa->tunnel.t_hop_limit) + param1.s.ttl_or_hop_limit = 1; + + /* Enable IP checksum computation by default */ + param1.s.ip_csum_disable = ROC_IE_OT_SA_INNER_PKT_IP_CSUM_ENABLE; + /* Enable L4 checksum computation by default */ + param1.s.l4_csum_disable = ROC_IE_OT_SA_INNER_PKT_L4_CSUM_ENABLE; + + inst_w4.s.param1 = param1.u16; + sess->inst.w4.u64 = inst_w4.u64; + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + out_sa->w10.s.udp_dst_port = 4500; + out_sa->w10.s.udp_src_port = 4500; + } + + rv = roc_nix_inl_ctx_write (oct_dev->nix, out_sa, out_sa, false, + sizeof (struct roc_ot_ipsec_outb_sa)); + if (rv) + { + clib_warning ("roc_nix_inl_ctx_write failed with '%s' error", + roc_error_msg_get (rv)); + return -1; + } + i++; + } + return 0; +} + +static_always_inline i32 +oct_o20_ipsec_outb_session_update (oct_ipsec_session_t *sess, ipsec_sa_t *sa) +{ + oct_main_t *om = &oct_main; + union roc_ow_ipsec_outb_param1 param1; + struct roc_ow_ipsec_outb_sa *out_sa; + oct_ipsec_outb_sa_priv_data_t *outb_priv; + union roc_ow_ipsec_sa_word2 w2; + union cpt_inst_w4 inst_w4; + u32 sa_idx; + u64 *ipv6_addr; + size_t offset; + int rv = 0, i = 0; + + vec_validate_aligned (sess->out_sa, vec_len (om->oct_dev), + CLIB_CACHE_LINE_BYTES); + + pool_foreach_pointer (oct_dev, om->oct_dev) + { + /* Alloc an sa index */ + rv = oct_ipsec_outb_sa_idx_get (oct_dev, &sa_idx, sa->spi); + if (rv) + return rv; + + out_sa = sess->out_sa[i] = + roc_nix_inl_ow_ipsec_outb_sa (oct_dev->outb.sa_base, sa_idx); + + outb_priv = roc_nix_inl_ow_ipsec_outb_sa_sw_rsvd (out_sa); + outb_priv->sa_idx = sa_idx; + + roc_ow_ipsec_outb_sa_init (out_sa); + + w2.u64 = 0; + rv = oct_o20_ipsec_sa_common_param_fill (&w2, out_sa->cipher_key, + out_sa->iv.s.salt, + out_sa->hmac_opad_ipad, sa); + if (rv) + return rv; + + /* Set direction and enable ESN (if needed) */ + w2.s.dir = ROC_IE_SA_DIR_OUTBOUND; + if (ipsec_sa_is_set_USE_ESN (sa)) + out_sa->w0.s.esn_en = 1; + + /* Configure tunnel header generation */ + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + { + if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa)) + { + w2.s.outer_ip_ver = ROC_IE_SA_IP_VERSION_6; + + clib_memcpy (&out_sa->outer_hdr.ipv6.src_addr, + &sa->tunnel.t_src.ip.ip6, sizeof (ip6_address_t)); + clib_memcpy (&out_sa->outer_hdr.ipv6.dst_addr, + &sa->tunnel.t_dst.ip.ip6, sizeof (ip6_address_t)); + + /* Convert host to network byte order of ipv6 address */ + ipv6_addr = (u64 *) &out_sa->outer_hdr.ipv6.src_addr; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + ipv6_addr++; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + + ipv6_addr = (u64 *) &out_sa->outer_hdr.ipv6.dst_addr; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + ipv6_addr++; + *ipv6_addr = clib_host_to_net_u64 (*ipv6_addr); + } + else + { + w2.s.outer_ip_ver = ROC_IE_SA_IP_VERSION_4; + out_sa->outer_hdr.ipv4.src_addr = + clib_host_to_net_u32 (sa->tunnel.t_src.ip.ip4.as_u32); + out_sa->outer_hdr.ipv4.dst_addr = + clib_host_to_net_u32 (sa->tunnel.t_dst.ip.ip4.as_u32); + } + } + + offset = offsetof (struct roc_ow_ipsec_outb_sa, ctx); + out_sa->w0.s.hw_ctx_off = offset / 8; + out_sa->w0.s.ctx_push_size = out_sa->w0.s.hw_ctx_off + 1; + /* Set context size, in number of 128B units following the first 128B */ + out_sa->w0.s.ctx_size = (round_pow2 (offset, 128) >> 7) - 1; + out_sa->w0.s.ctx_hdr_size = 1; + out_sa->w0.s.aop_valid = 1; + + out_sa->w2.u64 = w2.u64; + + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + { + if (sa->tunnel.t_encap_decap_flags & + TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF) + out_sa->w2.s.ipv4_df_src_or_ipv6_flw_lbl_src = + ROC_IE_OW_SA_COPY_FROM_INNER_IP_HDR; + if (!sa->tunnel.t_dscp) + out_sa->w2.s.dscp_src = ROC_IE_OW_SA_COPY_FROM_INNER_IP_HDR; + else + { + out_sa->w2.s.dscp_src = ROC_IE_OW_SA_COPY_FROM_SA; + out_sa->w10.s.dscp = sa->tunnel.t_dscp; + } + } + + out_sa->w2.s.ipid_gen = 1; + out_sa->w2.s.iv_src = ROC_IE_OW_SA_IV_SRC_FROM_SA; + out_sa->w2.s.valid = 1; + + asm volatile ("dmb oshst" ::: "memory"); + + oct_ipsec_sa_len_precalc (sa, &sess->encap); + + oct_ipsec_common_inst_param_fill (out_sa, sess); + + /* Populate word4 in CPT instruction template */ + inst_w4.u64 = 0; + inst_w4.s.opcode_major = ROC_IE_OW_MAJOR_OP_PROCESS_OUTBOUND_IPSEC; + param1.u16 = 0; + if (sa->tunnel.t_hop_limit) + param1.s.ttl_or_hop_limit = 1; + + /* Enable IP checksum computation by default */ + param1.s.ip_csum_disable = ROC_IE_OW_SA_INNER_PKT_IP_CSUM_ENABLE; + /* Enable L4 checksum computation by default */ + param1.s.l4_csum_disable = ROC_IE_OW_SA_INNER_PKT_L4_CSUM_ENABLE; + + inst_w4.s.param1 = param1.u16; + sess->inst.w4.u64 = inst_w4.u64; + if (ipsec_sa_is_set_UDP_ENCAP (sa)) + { + out_sa->w10.s.udp_dst_port = 4500; + out_sa->w10.s.udp_src_port = 4500; + } + + rv = roc_nix_inl_ctx_write (oct_dev->nix, out_sa, out_sa, false, + sizeof (struct roc_ow_ipsec_outb_sa)); + if (rv) + { + clib_warning ("roc_nix_inl_ctx_write failed with '%s' error", + roc_error_msg_get (rv)); + return -1; + } + i++; + } + return 0; +} + +static i32 +oct_ipsec_outb_session_update (oct_ipsec_session_t *sess, ipsec_sa_t *sa) +{ + if (roc_model_is_cn20k ()) + return oct_o20_ipsec_outb_session_update (sess, sa); + return oct_o10_ipsec_outb_session_update (sess, sa); +} + +static i32 +oct_ipsec_session_create (u32 sa_index) +{ + oct_ipsec_main_t *oim = &oct_ipsec_main; + ipsec_sa_t *sa = ipsec_sa_get (sa_index); + oct_ipsec_session_t *session = NULL; + u32 sess_index; + int rv; + + pool_get_aligned (oim->inline_ipsec_sessions, session, ROC_ALIGN); + clib_memset (session, 0, sizeof (*session)); + sess_index = session - oim->inline_ipsec_sessions; + + ASSERT (sa_index == sess_index); + + if (sa->flags & IPSEC_SA_FLAG_IS_INBOUND) + rv = oct_ipsec_inb_session_update (session, sa); + else + rv = oct_ipsec_outb_session_update (session, sa); + + if (rv) + return rv; + + /* Initialize the ITF details in ipsec_session for tunnel SAs */ + if (ipsec_sa_is_set_IS_TUNNEL (sa)) + session->itf_sw_idx = ~0; + return 0; +} + +static i32 +oct_ipsec_session_destroy (u32 sa_index) +{ + oct_main_t *om = &oct_main; + oct_ipsec_main_t *oim = &oct_ipsec_main; + ipsec_sa_t *sa = ipsec_sa_get (sa_index); + oct_ipsec_session_t *session = NULL; + void *roc_sa; + void *sa_dptr = NULL; + int rv, i = 0; + + session = pool_elt_at_index (oim->inline_ipsec_sessions, sa_index); + if (pool_is_free (oim->inline_ipsec_sessions, session)) + return -1; + + if (sa->flags & IPSEC_SA_FLAG_IS_INBOUND) + { + roc_sa = (void *) roc_nix_inl_inb_sa_get (NULL, true, sa->spi); + if (!roc_sa) + { + clib_warning ("roc_nix_inl_inb_sa_get failed to get SA for spi %u", + sa->spi); + return -1; + } + + sa_dptr = plt_zmalloc (oct_ipsec_get_inb_sa_sz (), 8); + if (sa_dptr != NULL) + { + oct_ipsec_sa_init (sa_dptr, true); + rv = roc_nix_inl_ctx_write (NULL, sa_dptr, roc_sa, true, + oct_ipsec_get_inb_sa_sz ()); + if (rv) + { + clib_warning ("roc_nix_inl_ctx_write failed - ROC error %s (%d)", + roc_error_msg_get (rv), rv); + return rv; + } + plt_free (sa_dptr); + } + } + else + { + pool_foreach_pointer (oct_dev, om->oct_dev) + { + sa_dptr = plt_zmalloc (oct_ipsec_get_outb_sa_sz (), 8); + if (sa_dptr != NULL) + { + oct_ipsec_sa_init (sa_dptr, false); + rv = roc_nix_inl_ctx_write (oct_dev->nix, sa_dptr, + session->out_sa[i], false, + oct_ipsec_get_outb_sa_sz ()); + if (rv) + { + clib_warning ( + "Could not write inline outbound session to hardware"); + return rv; + } + plt_free (sa_dptr); + } + i++; + } + } + + clib_memset (session, 0, sizeof (oct_ipsec_session_t)); + pool_put (oim->inline_ipsec_sessions, session); + + return 0; +} + +static clib_error_t * +oct_add_del_session (u32 sa_index, u8 is_add) +{ + ipsec_sa_t *sa; + + if (!is_add) + { + if (oct_ipsec_session_destroy (sa_index) < 0) + { + return clib_error_create ( + "IPsec session destroy operation failed for IPsec " + "index %u", + sa_index); + } + return 0; + } + + if (oct_ipsec_session_create (sa_index) < 0) + return clib_error_create ("ipsec session create failed for sa index %u", + sa_index); + + sa = ipsec_sa_get (sa_index); + + if (sa->flags & IPSEC_SA_FLAG_IS_INBOUND) + return oct_inl_inb_ipsec_flow_enable (); + + return 0; +} + +static clib_error_t * +oct_ipsec_check_support (ipsec_sa_t *sa) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + union cpt_eng_caps hw_caps = oct_cpt_get_eng_caps (ocm); + u8 is_cipher_algo_supported; + u8 is_auth_algo_supported; + + switch (sa->crypto_alg) + { + case IPSEC_CRYPTO_ALG_NONE: + is_cipher_algo_supported = 1; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_128: + case IPSEC_CRYPTO_ALG_AES_GCM_192: + case IPSEC_CRYPTO_ALG_AES_GCM_256: + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + case IPSEC_CRYPTO_ALG_AES_CTR_128: + case IPSEC_CRYPTO_ALG_AES_CTR_192: + case IPSEC_CRYPTO_ALG_AES_CTR_256: + is_cipher_algo_supported = hw_caps.aes; + break; + case IPSEC_CRYPTO_ALG_3DES_CBC: + is_cipher_algo_supported = hw_caps.des; + break; + default: + is_cipher_algo_supported = 0; + break; + } + + switch (sa->integ_alg) + { + case IPSEC_INTEG_ALG_NONE: + is_auth_algo_supported = 1; + break; + case IPSEC_INTEG_ALG_SHA1_96: + case IPSEC_INTEG_ALG_SHA_256_128: + case IPSEC_INTEG_ALG_SHA_384_192: + case IPSEC_INTEG_ALG_SHA_512_256: + is_auth_algo_supported = hw_caps.sha1_sha2; + break; + default: + is_auth_algo_supported = 0; + break; + } + + if (!is_cipher_algo_supported) + return clib_error_create ("crypto-alg %U not supported", + format_ipsec_crypto_alg, sa->crypto_alg); + + if (!is_auth_algo_supported) + return clib_error_create ("integ-alg %U not supported", + format_ipsec_integ_alg, sa->integ_alg); + + return 0; +} + +vnet_dev_rv_t +oct_init_ipsec_backend (vlib_main_t *vm, vnet_dev_t *dev) +{ + ipsec_main_t *im = &ipsec_main; + int rv; + u32 idx; + + idx = ipsec_register_esp_backend ( + vm, im, "octeon backend", "esp4-encrypt", "oct-esp4-encrypt-tun", + "esp4-decrypt", "esp4-decrypt-tun", "esp6-encrypt", "oct-esp6-encrypt-tun", + "esp6-decrypt", "esp6-decrypt-tun", "esp-mpls-encrypt-tun", + oct_ipsec_check_support, oct_add_del_session); + + rv = ipsec_select_esp_backend (im, idx); + if (rv) + { + log_err (dev, "OCTEON IPsec ESP backend selection failed"); + return VNET_DEV_ERR_INTERNAL; + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_ipsec_inl_dev_inb_cfg (vlib_main_t *vm, vnet_dev_t *dev, + oct_inl_dev_cfg_t *inl_dev_cfg) +{ + oct_inl_dev_main_t *inl_dev_main = &oct_inl_dev_main; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_cpt_rxc_time_cfg rxc_cfg = { 0 }; + int rrv; + + cd->nix->ipsec_in_min_spi = inl_dev_main->in_min_spi; + cd->nix->ipsec_in_max_spi = inl_dev_main->in_max_spi; + + if ((rrv = roc_nix_inl_inb_init (cd->nix))) + { + log_err (dev, "roc_nix_inl_inb_init failed - ROC error %s [%d]", + roc_error_msg_get (rrv), rrv); + return VNET_DEV_ERR_INTERNAL; + } + + roc_nix_inb_mode_set (cd->nix, true); + if (roc_model_is_cn10k ()) + roc_nix_inl_inb_set (cd->nix, true); + + if ((rrv = roc_nix_reassembly_configure (&rxc_cfg, 1000))) + { + log_err (dev, "roc_nix_reassembly_configure failed - ROC error %s [%d]", + roc_error_msg_get (rrv), rrv); + return VNET_DEV_ERR_INTERNAL; + } + + inl_dev_main->inb_sa_base = roc_nix_inl_inb_sa_base_get (NULL, true); + inl_dev_main->inb_sa_sz = roc_nix_inl_inb_sa_sz (NULL, true); + + inl_dev_main->inb_spi_mask = + roc_nix_inl_inb_spi_range (NULL, true, NULL, NULL); + + return VNET_DEV_OK; +} + +static int +oct_pool_inl_meta_pool_cb (u64 *aura_handle, uintptr_t *mpool, u32 buf_sz, + u32 nb_bufs, bool destroy, const char *mempool_name) +{ + extern oct_plt_init_param_t oct_plt_init_param; + u64 mem_start, mem_end, elem_addr; + struct npa_pool_s npapool; + struct npa_aura_s aura; + const char *mp_name; + u32 i; + u64 total_sz; + u64 roc_aura_handle; + int rv; + + mp_name = mempool_name ? mempool_name : OCT_NIX_INL_META_POOL_NAME; + + if (destroy) + return 0; + + buf_sz = PLT_ALIGN (buf_sz, ROC_ALIGN); + total_sz = nb_bufs * buf_sz; + + mem_start = (u64) oct_plt_init_param.oct_plt_zmalloc (total_sz, ROC_ALIGN); + if (!mem_start) + { + clib_warning ("Failed to allocate physmem for pool %s", mp_name); + return -1; + } + + clib_memset (&aura, 0, sizeof (struct npa_aura_s)); + clib_memset (&npapool, 0, sizeof (struct npa_pool_s)); + + npapool.nat_align = 1; + + rv = roc_npa_pool_create (&roc_aura_handle, buf_sz, nb_bufs, &aura, &npapool, + mempool_name ? 0 : ROC_NPA_ZERO_AURA_F); + if (rv) + { + clib_warning ("roc_npa_pool_create failed with '%s' error", + roc_error_msg_get (rv)); + return -1; + } + + mem_end = mem_start + total_sz; + + roc_npa_aura_op_range_set (roc_aura_handle, mem_start, mem_end); + + elem_addr = mem_start; + for (i = 0; i < nb_bufs; i++) + { + roc_npa_aura_op_free (roc_aura_handle, 0, elem_addr); + elem_addr += buf_sz; + } + + /* Read back to confirm pointers are freed */ + roc_npa_aura_op_available (roc_aura_handle); + + *aura_handle = roc_aura_handle; + *mpool = (uintptr_t) mem_start; + + return 0; +} + +vnet_dev_rv_t +oct_ipsec_inl_dev_outb_cfg (vnet_dev_t *dev, oct_inl_dev_cfg_t *inl_dev_cfg) +{ + oct_inl_dev_main_t *inl_dev_main = &oct_inl_dev_main; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_cpt_lf *cpt_lf; + u64 cpt_io_addr; + struct plt_bitmap *bmap; + size_t bmap_sz; + void *mem; + int rrv, i; + + nix->outb_nb_desc = inl_dev_cfg->outb_nb_desc = 8192; + nix->outb_nb_crypto_qs = inl_dev_cfg->outb_nb_crypto_qs = 1; + nix->ipsec_out_max_sa = cd->outb.max_sa = inl_dev_main->out_max_sa; + nix->ipsec_out_sso_pffunc = false; + + rrv = roc_nix_inl_outb_init (nix); + if (rrv) + { + log_err (dev, "roc_nix_inl_outb_init failed - ROC error '%s [%d]", + roc_error_msg_get (rrv), rrv); + return VNET_DEV_ERR_INTERNAL; + } + + cpt_lf = roc_nix_inl_outb_lf_base_get (nix); + + cpt_io_addr = cpt_lf->io_addr; + cpt_io_addr |= (ROC_CN10K_CPT_INST_DW_M1 << 4); + cd->cpt_io_addr = cpt_io_addr; + + bmap_sz = plt_bitmap_get_memory_footprint (cd->outb.max_sa); + mem = plt_zmalloc (bmap_sz, PLT_CACHE_LINE_SIZE); + if (mem == NULL) + { + log_err (dev, "Outbound SA bmap alloc failed"); + roc_nix_inl_outb_fini (nix); + + return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL; + } + + bmap = plt_bitmap_init (cd->outb.max_sa, mem, bmap_sz); + if (!bmap) + { + log_err (dev, "Outbound SA bmap init failed"); + roc_nix_inl_outb_fini (nix); + plt_free (mem); + + return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL; + } + + for (i = 0; i < cd->outb.max_sa; i++) + plt_bitmap_set (bmap, i); + + cd->outb.sa_base = roc_nix_inl_outb_sa_base_get (nix); + cd->outb.sa_bmap_mem = mem; + cd->outb.sa_bmap = bmap; + + return VNET_DEV_OK; +} + +void +oct_ipsec_sso_work_cb (uint64_t *gw, void *args, uint32_t soft_exp_event) +{ + vlib_main_t *vm = vlib_get_main (); + struct roc_ot_ipsec_outb_sa *sa; + oct_ipsec_outb_sa_priv_data_t *outb_priv; + vlib_buffer_t *b; + u32 bi; + + switch ((gw[0] >> 28) & 0xF) + { + case OCT_EVENT_TYPE_FRM_INL_DEV: + /* Event from inbound inline dev due to IPSEC packet bad L4 */ + b = (vlib_buffer_t *) (gw[1] - sizeof (vlib_buffer_t)); + bi = vlib_get_buffer_index (vm, b); + vlib_buffer_free_no_next (vm, &bi, 1); + return; + case OCT_EVENT_TYPE_FRM_CPU: + /* Event from outbound inline error */ + b = (vlib_buffer_t *) gw[1]; + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b)); + break; + /* Fall through */ + default: + if (soft_exp_event & 0x1) + { + sa = (struct roc_ot_ipsec_outb_sa *) args; + outb_priv = roc_nix_inl_ot_ipsec_outb_sa_sw_rsvd (sa); + clib_warning ("Soft expiry event received for sa_index %u", + outb_priv->sa_idx); + } + else + { + clib_warning ("Unknown event gw[0] = 0x%016lx, gw[1] = 0x%016lx", + gw[0], gw[1]); + } + return; + } + + return; +} + +vnet_dev_rv_t +oct_early_init_inline_ipsec (vlib_main_t *vm, vnet_dev_t *dev) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + u8 bp_index = vlib_buffer_pool_get_default_for_numa (vm, 0); + vlib_buffer_pool_t *bp = NULL; + extern oct_plt_init_param_t oct_plt_init_param; + oct_inl_dev_main_t *inl_dev_main = &oct_inl_dev_main; + int rrv; + + bp = vec_elt_at_index (bm->buffer_pools, bp_index); + + inl_dev_main->inl_dev->ipsec_in_min_spi = inl_dev_main->in_min_spi; + inl_dev_main->inl_dev->ipsec_in_max_spi = inl_dev_main->in_max_spi; + inl_dev_main->inl_dev->wqe_skip = + STRUCT_OFFSET_OF (vlib_buffer_t, pre_data) / ROC_ALIGN; + inl_dev_main->inl_dev->nb_meta_bufs = bp->n_buffers; + inl_dev_main->inl_dev->res_addr_offset = -1; + if (roc_feature_nix_has_inl_multi_queue ()) + inl_dev_main->inl_dev->nb_inb_cptlfs = 1; + + if ((rrv = roc_nix_inl_dev_init (inl_dev_main->inl_dev)) < 0) + { + log_err (dev, "roc_nix_inl_dev_init: %s [%d]", roc_error_msg_get (rrv), + rrv); + return VNET_DEV_ERR_UNSUPPORTED_DEVICE; + } + + roc_nix_inl_meta_pool_cb_register (oct_pool_inl_meta_pool_cb); + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_init_nix_inline_ipsec (vlib_main_t *vm, vnet_dev_t *inl_dev, + vnet_dev_t *dev) +{ + oct_inl_dev_cfg_t inl_dev_cfg; + vnet_dev_rv_t rv; + + if ((rv = oct_ipsec_inl_dev_inb_cfg (vm, dev, &inl_dev_cfg))) + return rv; + + if ((rv = oct_ipsec_inl_dev_outb_cfg (dev, &inl_dev_cfg))) + return rv; + + /* Register callback to handle security error work */ + roc_nix_inl_cb_register (oct_ipsec_sso_work_cb, NULL); + + return VNET_DEV_OK; +} diff --git a/src/plugins/dev_octeon/ipsec.h b/src/plugins/dev_octeon/ipsec.h new file mode 100644 index 0000000000..18402cf3f3 --- /dev/null +++ b/src/plugins/dev_octeon/ipsec.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _OCTEON_IPSEC_H_ +#define _OCTEON_IPSEC_H_ + +#define OCT_EVENT_TYPE_FRM_INL_DEV 0x0 +#define OCT_EVENT_TYPE_FRM_CPU 0x1 + +#define OCT_ROC_SALT_LEN 4 +#define OCT_EXT_HDR_FROM_VLIB_BUFFER(x) \ + (((oct_ipsec_outbound_pkt_meta_t *) (x)) - 1) + +#define foreach_octeon_ipsec_ucc \ + _ (SUCCESS, success, INFO, "Packet successfully processed") \ + _ (ERR_SA_INVAL, err_sa_inval, ERROR, "SA invalid") \ + _ (ERR_SA_EXPIRED, err_sa_expired, ERROR, "SA hard-expired") \ + _ (ERR_SA_OVERFLOW, err_sa_overflow, ERROR, "SA overflow") \ + _ (ERR_SA_ESP_BAD_ALGO, err_sa_esp_bad_algo, ERROR, "ESP bad algorithm") \ + _ (ERR_SA_AH_BAD_ALGO, err_sa_ah_bad_algo, ERROR, "SA AH bad algorithm") \ + _ (ERR_SA_BAD_CTX, err_sa_bad_ctx, ERROR, "Bad SA context received on CPT") \ + _ (SA_CTX_FLAG_MISMATCH, sa_ctx_flag_mismatch, ERROR, \ + "SA context flags mismatch") \ + _ (ERR_AOP_IPSEC, err_aop_ipsec, ERROR, "AOP logical error") \ + _ (ERR_PKT_IP, err_pkt_ip, ERROR, "Bad IP version or TTL") \ + _ (ERR_PKT_IP6_BAD_EXT, err_pkt_ip6_bad_ext, ERROR, \ + "IPv6 mobility extension not supported") \ + _ (ERR_PKT_IP6_HBH, err_pkt_ip6_hbh, ERROR, \ + "Error with IPv6 hop-by-hop header") \ + _ (ERR_PKT_IP6_BIGEXT, err_pkt_ip6_bigext, ERROR, \ + "IPv6 extension header length exceeded") \ + _ (ERR_PKT_IP_ULP, err_pkt_ip_ulp, ERROR, "Bad protocol in IP header") \ + _ (ERR_PKT_SA_MISMATCH, err_pkt_sa_mismatch, ERROR, \ + "IP address mismatch b/w SA and packet") \ + _ (ERR_PKT_SPI_MISMATCH, err_pkt_spi_mismatch, ERROR, \ + "SPI mismatch b/w SA and packet") \ + _ (ERR_PKT_ESP_BADPAD, err_pkt_esp_badpad, ERROR, \ + "Bad padding in ESP packet") \ + _ (ERR_PKT_BADICV, err_pkt_badicv, ERROR, "ICV verification failed") \ + _ (ERR_PKT_REPLAY_SEQ, err_pkt_replay_seq, ERROR, \ + "Sequence number out of anti-replay window") \ + _ (ERR_PKT_BADNH, err_pkt_badnh, ERROR, "Bad next-hop") \ + _ (ERR_PKT_SA_PORT_MISMATCH, err_pkt_sa_port_mismatch, ERROR, \ + "Port mismatch b/w packet and SA") \ + _ (ERR_PKT_BAD_DLEN, err_pkt_bad_dlen, ERROR, "Dlen mismatch") \ + _ (ERR_SA_ESP_BAD_KEYS, err_sa_esp_bad_keys, ERROR, \ + "Bad key-size for selected ESP algorithm") \ + _ (ERR_SA_AH_BAD_KEYS, err_sa_ah_bad_keys, ERROR, \ + "Bad key-size for selected AH algorithm") \ + _ (ERR_SA_BAD_IP, err_sa_bad_ip, ERROR, \ + "IP version mismatch b/w packet and SA") \ + _ (ERR_PKT_IP_FRAG, err_pkt_ip_frag, ERROR, \ + "IPsec packet is an outer-IP fragment") \ + _ (ERR_PKT_REPLAY_WINDOW, err_pkt_replay_window, ERROR, \ + "Sequence number already seen") \ + _ (SUCCESS_PKT_IP_BADCSUM, success_pkt_ip_badcsum, ERROR, \ + "Bad IP checksum ") \ + _ (SUCCESS_PKT_L4_GOODCSUM, success_pkt_l4_goodcsum, INFO, \ + "Good inner L4 checksum") \ + _ (SUCCESS_PKT_L4_BADCSUM, success_pkt_l4_badcsum, ERROR, \ + "Bad inner L4 checksum") \ + _ (SUCCESS_SA_SOFTEXP_FIRST, success_sa_softexp_first, WARN, \ + "SA soft-expired - first encounter") \ + _ (SUCCESS_PKT_UDPESP_NZCSUM, success_pkt_udpesp_nzcsum, ERROR, \ + "Non-zero UDP checksum in UDP-ESP packet") \ + _ (SUCCESS_SA_SOFTEXP_AGAIN, success_sa_softexp_again, WARN, \ + "SA soft-expired - subsequent encounter") \ + _ (SUCCESS_PKT_UDP_ZEROCSUM, success_pkt_udp_zerocsum, INFO, \ + "Zero UDP checksum") + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (c0); + u64 nixtx[2]; + u8 sg_buffer[128]; +} oct_ipsec_outbound_pkt_meta_t; + +typedef struct +{ + union cpt_res_s res; + u16 dlen_adj; + u16 sa_bytes; +} oct_ipsec_outb_data_t; + +STATIC_ASSERT (sizeof (oct_ipsec_outb_data_t) <= + STRUCT_SIZE_OF (vnet_buffer_opaque2_t, unused), + "Outbound meta-data too large for vnet_buffer_opaque2_t"); + +#define oct_ipsec_outb_data(b) \ + ((oct_ipsec_outb_data_t *) ((u8 *) (b)->opaque2 + \ + STRUCT_OFFSET_OF (vnet_buffer_opaque2_t, \ + unused))) + +typedef struct +{ + uint8_t partial_len; + uint8_t roundup_len; + uint8_t footer_len; + uint8_t roundup_byte; + uint8_t icv_len; + uint8_t adj_len; +} oct_ipsec_encap_len_t; + +typedef struct +{ + u64 user_data; +} oct_ipsec_inb_sa_priv_data_t; + +typedef struct +{ + /* SA index */ + u32 sa_idx; +} oct_ipsec_outb_sa_priv_data_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /* Outbound SA */ + void **out_sa; + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + struct cpt_inst_s inst; + u16 sq; + u32 itf_sw_idx; + /* Packet length for IPsec encapsulation */ + oct_ipsec_encap_len_t encap; +} oct_ipsec_session_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + oct_ipsec_session_t *inline_ipsec_sessions; + +} oct_ipsec_main_t; + +typedef struct +{ + u32 outb_nb_desc; + u16 outb_nb_crypto_qs; +} oct_inl_dev_cfg_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + struct roc_nix_inl_dev *inl_dev; + vnet_dev_t *vdev; + uintptr_t inb_sa_base; + u32 inb_sa_sz; + u32 inb_spi_mask; + u8 is_inl_ipsec_flow_enabled; + u32 in_min_spi; + u32 in_max_spi; + u32 out_max_sa; +} oct_inl_dev_main_t; + +extern oct_ipsec_main_t oct_ipsec_main; +extern oct_inl_dev_main_t oct_inl_dev_main; + +vnet_dev_rv_t oct_init_ipsec_backend (vlib_main_t *vm, vnet_dev_t *dev); + +vnet_dev_rv_t oct_early_init_inline_ipsec (vlib_main_t *vm, vnet_dev_t *dev); +vnet_dev_rv_t oct_init_nix_inline_ipsec (vlib_main_t *vm, vnet_dev_t *inl_dev, + vnet_dev_t *dev); +void *oct_ipsec_get_oct_device_from_outb_sa (u32 sa_index); + +clib_error_t *oct_inl_inb_ipsec_flow_enable (void); + +#endif /* _OCTEON_IPSEC_H_ */ diff --git a/src/plugins/dev_octeon/oct_virtio.h b/src/plugins/dev_octeon/oct_virtio.h new file mode 100644 index 0000000000..91e03df9ee --- /dev/null +++ b/src/plugins/dev_octeon/oct_virtio.h @@ -0,0 +1,163 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ +#ifndef _OCTEON_VIRTIO_H_ +#define _OCTEON_VIRTIO_H_ + +#undef always_inline + +#include +#include +#include +#include +#include +#include + +#define always_inline static inline __attribute__ ((__always_inline__)) + +#include +#include +#include +#include +#include + +#define VIRTIO_NET_RSS_RETA_SIZE 128 +#define OCT_VIRTIO_DEVICE_ID 0xa70d +#define MAX_JUMBO_PKT_LEN 9600 + +#define OCT_ETH_TX_OFFLOAD_IPV4_CKSUM (1 << 0) +#define OCT_ETH_RX_OFFLOAD_CHECKSUM (1 << 1) +#define OCT_ETH_TX_OFFLOAD_TCP_TSO (1 << 2) + +#define foreach_oct_virt_tx_node_counter \ + _ (ENQUE_FAIL, enque_fail, ERROR, "Virtio enqueue failed") + +typedef enum +{ +#define _(f, n, s, d) OCT_VIRT_TX_NODE_CTR_##f, + foreach_oct_virt_tx_node_counter +#undef _ +} oct_tx_node_counter_t; + +typedef struct +{ + u32 sw_if_index; + u16 virtio_id; + u64 tx_q_map; +} oct_virt_tx_trace_t; + +typedef struct +{ + u32 sw_if_index; + u16 virtio_id; + u16 queue_id; + u64 rx_q_map; +} oct_virt_rx_trace_t; + +always_inline vlib_buffer_t * +oct_virt_to_bp (void *b, u16 hdr_len) +{ + return ( + vlib_buffer_t *) ((u8 *) b + + sizeof (((struct dao_virtio_net_hdr *) 0)->desc_data) + + hdr_len - sizeof (vlib_buffer_t)); +} + +always_inline void * +oct_bp_to_virt (vlib_buffer_t *b, u16 hdr_len) +{ + return (void *) ((u8 *) vlib_buffer_get_current (b) - + sizeof (((struct dao_virtio_net_hdr *) 0)->desc_data) - + hdr_len); +} + +typedef struct +{ + u8 status : 1; + u8 full_duplex : 1; + u16 virtio_id; + u32 pem_devid; + u32 speed; +} oct_virtio_device_t; + +typedef struct +{ + u16 reta_size; + u16 vchan_id; + u16 virtio_id; +} oct_virtio_port_t; + +typedef struct +{ + u64 wrkr_cpu_mask; + u64 netdev_map; + u16 netdev_qp_count[DAO_VIRTIO_DEV_MAX]; + u8 dao_lib_initialized; + u8 ip4_csum_offload_enable; +} oct_virtio_main_t; + +typedef struct +{ + u8 state; +} oct_virtio_port_map_t; + +typedef struct +{ + u64 qmap; + u16 last_rx_q; + u16 last_tx_q; + u16 virtio_hdr_sz; +} oct_virtio_q_info_t; + +typedef struct +{ + u64 rx_offloads; + u64 tx_offloads; +} oct_intf_offload_t; + +typedef struct +{ + u8 initialized; + u16 service_core; + u64 netdev_map; + oct_intf_offload_t intf[DAO_VIRTIO_DEV_MAX]; + oct_virtio_q_info_t q_map[DAO_VIRTIO_DEV_MAX]; +} oct_virtio_per_thread_data_t; + +int oct_virtio_dev_status_cb (u16 virtio_devid, u8 status); +int oct_virito_rss_reta_configure (u16 virtio_devid, + struct virtio_net_ctrl_rss *rss); +int oct_virtio_configure_promisc (u16 virtio_devid, u8 enable); +int oct_virtio_configure_allmulti (u16 virtio_devid, u8 enabl); +int oct_virtio_mac_addr_set (u16 virtio_devid, u8 *mac); +int oct_virtio_mac_addr_add (u16 virtio_devid, + struct virtio_net_ctrl_mac *mac_tbl, u8 type); +int oct_virtio_mq_configure (u16 virtio_devid, bool qmap_set); +int oct_virtio_vlib_buffer_alloc (u16 devid, void *buffs[], u16 nb_buffs); +int oct_virtio_vlib_buffer_free (u16 devid, void *buffs[], u16 nb_buffs); + +vnet_dev_rv_t oct_virtio_port_init (vlib_main_t *vm, vnet_dev_port_t *port); +void oct_virtio_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port); + +vnet_dev_rv_t oct_virtio_port_start (vlib_main_t *vm, vnet_dev_port_t *port); + +void oct_virtio_port_stop (vlib_main_t *vm, vnet_dev_port_t *port); + +format_function_t format_oct_virt_rx_trace; +format_function_t format_oct_virt_tx_trace; +u8 *format_oct_virt_port_status (u8 *s, va_list *args); +void oct_virt_buffer_pool_dma_map (vlib_main_t *vm); + +#define log_debug(fmt, ...) \ + vlib_log_debug (oct_virt_log.class, fmt, ##__VA_ARGS__) +#define log_info(fmt, ...) \ + vlib_log_info (oct_virt_log.class, fmt, ##__VA_ARGS__) +#define log_notice(fmt, ...) \ + vlib_log_info (oct_virt_log.class, fmt, ##__VA_ARGS__) +#define log_warn(fmt, ...) \ + vlib_log_info (oct_virt_log.class, fmt, ##__VA_ARGS__) +#define log_err(fmt, ...) \ + vlib_log_info (oct_virt_log.class, fmt, ##__VA_ARGS__) + +#endif /* _OCTEON_VIRTIO_H_ */ diff --git a/src/plugins/dev_octeon/octeon-startup.conf b/src/plugins/dev_octeon/octeon-startup.conf new file mode 100644 index 0000000000..174826dfd4 --- /dev/null +++ b/src/plugins/dev_octeon/octeon-startup.conf @@ -0,0 +1,186 @@ +unix { + log /run/vpp/vpp.log + full-coredump + cli-listen /run/vpp/cli.sock + gid root + + ## run vpp in the interactive mode + # interactive + + ## do not use colors in terminal output + # nocolor + + ## do not display banner + # nobanner +} + +api-trace { + ## This stanza controls binary API tracing. Unless there is a very strong reason, + ## please leave this feature enabled. + on + ## Additional parameters: + ## + ## To set the number of binary API trace records in the circular buffer, configure nitems + ## + ## nitems + ## + ## To save the api message table decode tables, configure a filename. Results in /tmp/ + ## Very handy for understanding api message changes between versions, identifying missing + ## plugins, and so forth. + ## + ## save-api-table +} + +api-segment { + gid root + } + +socksvr { + #default + socket-name /run/vpp/vpp-api.sock +} + +# memory { + ## Set the main heap size, default is 1G + # main-heap-size 2G + + ## Set the main heap page size. Default page size is OS default page + ## which is in most cases 4K. if different page size is specified VPP + ## will try to allocate main heap by using specified page size. + ## special keyword 'default-hugepage' will use system default hugepage + ## size + # main-heap-page-size 1G + ## Set the default huge page size. + # default-hugepage-size 1G +#} + +cpu { + ## In the VPP there is one main thread and optionally the user can create worker(s) + ## The main thread and worker thread(s) can be pinned to CPU core(s) manually or automatically + + ## Manual pinning of thread(s) to CPU core(s) + + ## Set logical CPU core where main thread runs, if main core is not set + ## VPP will use core 1 if available + main-core 1 + + ## Set logical CPU core(s) where worker threads are running + corelist-workers 2-5 + + ## Automatic pinning of thread(s) to CPU core(s) + + ## Sets number of CPU core(s) to be skipped (1 ... N-1) + ## Skipped CPU core(s) are not used for pinning main thread and working thread(s). + ## The main thread is automatically pinned to the first available CPU core and worker(s) + ## are pinned to next free CPU core(s) after core assigned to main thread + # skip-cores 4 + + ## Specify a number of workers to be created + ## Workers are pinned to N consecutive CPU cores while skipping "skip-cores" CPU core(s) + ## and main thread's CPU core + # workers 2 + + ## Set scheduling policy and priority of main and worker threads + + ## Scheduling policy options are: other (SCHED_OTHER), batch (SCHED_BATCH) + ## idle (SCHED_IDLE), fifo (SCHED_FIFO), rr (SCHED_RR) + # scheduler-policy fifo + + ## Scheduling priority is used only for "real-time policies (fifo and rr), + ## and has to be in the range of priorities supported for a particular policy + # scheduler-priority 50 +} + +session +{ + event-queue-length 102400 +} + +buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unpriviledged) + buffers-per-numa 128000 + + ## Size of buffer data area + ## Default is 2048 + # default data-size 2048 + + ## Size of the memory pages allocated for buffer data + ## Default will try 'default-hugepage' then 'default' + ## you can also pass a size in K/M/G e.g. '8M' + page-size default-hugepage +} + +devices { + ## whitelist interface + dev pci/0002:04:00.0 + { + driver octeon + port 0 + { + name eth0 + num-rx-queues 4 + num-tx-queues 4 + } + } + + dev pci/0002:05:00.0 + { + driver octeon + port 0 + { + name eth1 + num-rx-queues 4 + num-tx-queues 4 + } + } +} + +plugins { + plugin dpdk_plugin.so { disable } + plugin onp_plugin.so { disable } + plugin dev_octeon_plugin.so { enable } +} + + +## Statistics Segment +statseg { + # socket-name , name of the stats segment socket + # defaults to /run/vpp/stats.sock + size 4G + # page-size , page size, ie. 2m, defaults to 4k + # per-node-counters on | off, defaults to none + # update-interval , sets the segment scrape / update interval +} + +## L2 FIB +# l2fib { + ## l2fib hash table size. + # table-size 512M + + ## l2fib hash table number of buckets. Must be power of 2. + # num-buckets 524288 +# } + +## ipsec +# { + # ip4 { + ## ipsec for ipv4 tunnel lookup hash number of buckets. + # num-buckets 524288 + # } + # ip6 { + ## ipsec for ipv6 tunnel lookup hash number of buckets. + # num-buckets 524288 + # } +# } + +# logging { + ## set default logging level for logging buffer + ## logging levels: emerg, alert,crit, error, warn, notice, info, debug, disabled + # default-log-level debug + ## set default logging level for syslog or stderr output + # default-syslog-log-level info + ## Set per-class configuration + # class dpdk/cryptodev { rate-limit 100 level debug syslog-level error } +# } diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index a87330c6f9..54b075cbfd 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -9,34 +9,95 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include + +#define OCT_FRAME_SIZE (VLIB_FRAME_SIZE * 4) +#define OCT_EXT_HDR_SIZE \ + PLT_ALIGN (sizeof (oct_ipsec_outbound_pkt_meta_t), ROC_ALIGN) +#define OCT_NPA_MAX_POOLS 128 +#define OCT_BATCH_ALLOC_IOVA0_MASK 0xFFFFFFFFFFFFFF80 + +/* + * L2 header size includes + * ETH_HDR_LEN + ETH_CRC_LEN + * 2 VLAN TAGS + */ +#define OCT_PKTIO_MAX_L2_SIZE 26 + +typedef enum +{ + OCT_DRV_ARG_NPA_MAX_POOLS = 1, + OCT_DRV_ARG_USE_SINGLE_RX_AURA, + OCT_DRV_ARG_IPSEC_IN_MIN_SPI, + OCT_DRV_ARG_IPSEC_IN_MAX_SPI, + OCT_DRV_ARG_IPSEC_OUT_MAX_SA, + OCT_DRV_ARG_END, +} oct_drv_args_t; + +typedef enum +{ + OCT_PORT_ARG_EN_ETH_PAUSE_FRAME = 1, + OCT_PORT_ARG_RSS_FLOW_KEY = 2, + OCT_PORT_ARG_ALLMULTI_MODE, + OCT_PORT_ARG_SWITCH_HDR_TYPE, + OCT_PORT_ARG_END +} oct_port_args_t; + +typedef enum +{ + OCT_DEV_ARG_CRYPTO_N_DESC = 1, + OCT_DEV_ARG_END, +} oct_dev_args_t; typedef enum { OCT_DEVICE_TYPE_UNKNOWN = 0, OCT_DEVICE_TYPE_RVU_PF, - OCT_DEVICE_TYPE_CPT_VF, + OCT_DEVICE_TYPE_RVU_VF, + OCT_DEVICE_TYPE_LBK_VF, + OCT_DEVICE_TYPE_SDP_VF, + OCT_DEVICE_TYPE_O10K_CPT_VF, + OCT_DEVICE_TYPE_O9K_CPT_VF, + OCT_DEVICE_TYPE_RVU_INL_PF, + OCT_DEVICE_TYPE_RVU_INL_VF, } __clib_packed oct_device_type_t; +#define OCT_DEVTYPE_IS_VF(type) \ + ((type) == OCT_DEVICE_TYPE_RVU_VF || (type) == OCT_DEVICE_TYPE_LBK_VF || \ + (type) == OCT_DEVICE_TYPE_SDP_VF || \ + (type) == OCT_DEVICE_TYPE_O10K_CPT_VF || \ + (type) == OCT_DEVICE_TYPE_RVU_INL_VF) + typedef struct { - oct_device_type_t type; - u8 nix_initialized : 1; - u8 status : 1; - u8 full_duplex : 1; - u32 speed; - struct plt_pci_device plt_pci_dev; - struct roc_cpt cpt; - struct roc_nix *nix; -} oct_device_t; + /* vnet flow index */ + u32 vnet_flow_index; + + u32 index; + /* Internal flow object */ + struct roc_npc_flow *npc_flow; +} oct_flow_entry_t; typedef struct { u8 lf_allocated : 1; u8 tm_initialized : 1; u8 npc_initialized : 1; + u8 q_intr_enabled : 1; struct roc_npc npc; + oct_flow_entry_t *flow_entries; + u32 rss_flowkey; } oct_port_t; typedef struct @@ -57,15 +118,30 @@ typedef struct typedef struct { CLIB_ALIGN_MARK (cl, 128); - union - { - struct npa_batch_alloc_status_s status; - u64 iova[16]; - }; + u64 iova[16]; } oct_npa_batch_alloc_cl128_t; +typedef union +{ + struct npa_batch_alloc_status_s status; + u64 as_u64; +} oct_npa_batch_alloc_status_t; + STATIC_ASSERT_SIZEOF (oct_npa_batch_alloc_cl128_t, 128); +struct oct_outb_sa_data +{ + /* SA Bitmap */ + struct plt_bitmap *sa_bmap; + /* SA bitmap memory */ + void *sa_bmap_mem; + + /* SA base */ + u64 sa_base; + + u16 max_sa; +}; + typedef struct { u8 sq_initialized : 1; @@ -75,18 +151,54 @@ typedef struct u64 aura_handle; u64 io_addr; void *lmt_addr; - oct_npa_batch_alloc_cl128_t *ba_buffer; u8 ba_first_cl; u8 ba_num_cl; CLIB_CACHE_LINE_ALIGN_MARK (data0); struct roc_nix_sq sq; + i32 cached_pkts; } oct_txq_t; +typedef struct +{ + oct_device_type_t type; + u16 nix_idx; + u8 nix_initialized : 1; + u8 status : 1; + u8 full_duplex : 1; + u8 mode : 2; + u8 class_en; + u8 rx_pause_en; + u8 tx_pause_en; + u32 speed; + struct plt_pci_device plt_pci_dev; + struct roc_nix *nix; + oct_msix_handler_info_t *msix_handler; + + u32 cached_cpt_pkts; + u64 cpt_io_addr; + oct_txq_t **ctqs; + + struct oct_outb_sa_data outb; +} oct_device_t; + +typedef struct +{ + u8 inl_dev_initialized : 1; + u8 use_single_rx_aura : 1; + u8 is_config_done; + u32 npa_max_pools; + u64 rx_aura_handle; + oct_device_t **oct_dev; +} oct_main_t; + +extern oct_main_t oct_main; + /* format.c */ format_function_t format_oct_port_status; format_function_t format_oct_rx_trace; format_function_t format_oct_tx_trace; +format_function_t format_oct_port_flow; /* port.c */ vnet_dev_rv_t oct_port_init (vlib_main_t *, vnet_dev_port_t *); @@ -95,19 +207,43 @@ void oct_port_stop (vlib_main_t *, vnet_dev_port_t *); void oct_port_deinit (vlib_main_t *, vnet_dev_port_t *); vnet_dev_rv_t oct_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); +vnet_dev_rv_t oct_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); /* queue.c */ vnet_dev_rv_t oct_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *); vnet_dev_rv_t oct_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *); void oct_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *); void oct_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *); -vnet_dev_rv_t oct_rxq_init (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rv_t oct_rxq_init (vlib_main_t *, vnet_dev_rx_queue_t *, u32); vnet_dev_rv_t oct_txq_init (vlib_main_t *, vnet_dev_tx_queue_t *); void oct_rxq_deinit (vlib_main_t *, vnet_dev_rx_queue_t *); +int oct_drain_queue (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq); void oct_txq_deinit (vlib_main_t *, vnet_dev_tx_queue_t *); format_function_t format_oct_rxq_info; format_function_t format_oct_txq_info; +/* flow.c */ +vnet_dev_rv_t oct_flow_ops_fn (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_type_t, u32, uword *); +vnet_dev_rv_t oct_flow_validate_params (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_type_t, u32, + uword *); +vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword, + u64 *); + +/* pfc.c */ +int oct_pfc_sys_init_args (pfc_system_t *pfc); + +/* counter.c */ +void oct_port_add_counters (vlib_main_t *, vnet_dev_port_t *); +void oct_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t oct_port_get_stats (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t oct_rxq_get_stats (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_rx_queue_t *); +vnet_dev_rv_t oct_txq_get_stats (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_tx_queue_t *); + #define log_debug(dev, f, ...) \ vlib_log (VLIB_LOG_LEVEL_DEBUG, oct_log.class, "%U: " f, \ format_vnet_dev_addr, (dev), ##__VA_ARGS__) @@ -124,13 +260,28 @@ format_function_t format_oct_txq_info; vlib_log (VLIB_LOG_LEVEL_ERR, oct_log.class, "%U: " f, \ format_vnet_dev_addr, (dev), ##__VA_ARGS__) +#define foreach_oct_rx_node_counter \ + _ (ERR_NO_TUNNEL, err_no_tunnel, ERROR, "no matching IPsec tunnel") \ + _ (ERR_UNDEFINED, err_undefined, ERROR, "undefined decrypt error") + +/* clang-format off */ +typedef enum +{ +#define _(f, n, s, d) OCT_RX_NODE_CTR_##f, + foreach_octeon_ipsec_ucc + foreach_oct_rx_node_counter +#undef _ +} oct_rx_node_counter_t; +/* clang-format on */ + #define foreach_oct_tx_node_counter \ _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "drop due to buffer chain > 6") \ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \ _ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \ "aura batch alloc issue failed") \ _ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \ - "aura batch alloc not ready") + "aura batch alloc not ready") \ + _ (MTU_EXCEEDED, mtu_exceeded, ERROR, "mtu exceeded") typedef enum { @@ -151,4 +302,19 @@ typedef struct u32 sw_if_index; oct_tx_desc_t desc; } oct_tx_trace_t; + +extern tm_system_t dev_oct_tm_ops; + +#define foreach_oct_fp_flag \ + _ (UNUSED, 0) \ + _ (TRACE_EN, 1) \ + _ (O20, 2) + +typedef enum +{ +#define _(name, bit) OCT_FP_FLAG_##name = (1 << bit), + foreach_oct_fp_flag +#undef _ +} oct_fp_flag_t; + #endif /* _OCTEON_H_ */ diff --git a/src/plugins/dev_octeon/pfc.c b/src/plugins/dev_octeon/pfc.c new file mode 100644 index 0000000000..cff3a2c7fd --- /dev/null +++ b/src/plugins/dev_octeon/pfc.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +pfc_system_t dev_oct_pfc_ops; + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "pfc", +}; + +static vnet_dev_rv_t +oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) +{ + u8 *s = 0; + va_list va; + + va_start (va, fmt); + s = va_format (s, fmt, &va); + va_end (va); + + log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv); + + vec_free (s); + return VNET_DEV_ERR_INTERNAL; +} + +static int +oct_nix_pfc_rq_conf (vnet_dev_port_t *port, uint16_t qid, uint8_t tx_pause, + uint8_t tc) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix_pfc_cfg pfc_cfg; + struct roc_nix *nix = cd->nix; + struct roc_nix_fc_cfg fc_cfg; + enum roc_nix_fc_mode mode; + vnet_dev_rx_queue_t *rxq; + struct roc_nix_rq *rq; + struct roc_nix_cq *cq; + oct_rxq_t *crq; + int rc; + + if (port->rx_queues == NULL) + return -EINVAL; + + if (qid >= port->attr.max_rx_queues) + return -ENOTSUP; + + /* Configure RQ */ + rxq = vnet_dev_get_port_rx_queue_by_id (port, qid); + if (rxq == 0) + return -ENODEV; + + crq = vnet_dev_get_rx_queue_data (rxq); + rq = &crq->rq; + cq = &crq->cq; + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_RQ_CFG; + fc_cfg.rq_cfg.tc = tc; + fc_cfg.rq_cfg.enable = !!tx_pause; + fc_cfg.rq_cfg.rq = rq->qid; + fc_cfg.rq_cfg.pool = rq->aura_handle; + fc_cfg.rq_cfg.spb_pool = rq->spb_aura_handle; + fc_cfg.rq_cfg.cq_drop = cq->drop_thresh; + fc_cfg.rq_cfg.cq_bp = cq->bp_thresh; + fc_cfg.rq_cfg.pool_drop_pct = ROC_NIX_AURA_THRESH; + rc = roc_nix_fc_config_set (nix, &fc_cfg); + if (rc) + return rc; + + rxq->tc = tc; + /* Recheck number of RQ's that have PFC enabled */ + cd->tx_pause_en = 0; + foreach_vnet_dev_port_rx_queue (q, port) + { + /* Skip if RQ does not exist */ + if (!q->enabled) + continue; + + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (q); + rq = &crq->rq; + if (rq->tc != ROC_NIX_PFC_CLASS_INVALID) + cd->tx_pause_en++; + } + + /* Skip if PFC already enabled in mac */ + if (cd->tx_pause_en > 1) + return 0; + + /* Configure MAC block */ + cd->class_en = cd->tx_pause_en ? 0xFF : 0x0; + + if (cd->rx_pause_en) + mode = cd->tx_pause_en ? ROC_NIX_FC_FULL : ROC_NIX_FC_RX; + else + mode = cd->tx_pause_en ? ROC_NIX_FC_TX : ROC_NIX_FC_NONE; + + memset (&pfc_cfg, 0, sizeof (struct roc_nix_pfc_cfg)); + pfc_cfg.mode = mode; + pfc_cfg.tc = cd->class_en; + return roc_nix_pfc_mode_set (nix, &pfc_cfg); +} + +static int +oct_nix_pfc_sq_conf (vnet_dev_port_t *port, uint16_t qid, uint8_t rx_pause, + uint8_t tc) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix_pfc_cfg pfc_cfg; + struct roc_nix *nix = cd->nix; + struct roc_nix_fc_cfg fc_cfg; + enum roc_nix_fc_mode mode; + vnet_dev_tx_queue_t *txq; + struct roc_nix_sq *sq; + oct_txq_t *ctq; + int rc; + + if (port->tx_queues == NULL) + return -EINVAL; + + if (qid >= port->attr.max_tx_queues) + return -ENOTSUP; + + /* Check if RX pause frame is enabled or not and + * confirm user requested for PFC. + */ + if (!cd->rx_pause_en && rx_pause) + { + if ((roc_nix_tm_tree_type_get (nix) == ROC_NIX_TM_DEFAULT) && + port->attr.max_tx_queues > 1) + { + /* + * Disabled xmit will be enabled when new topology is available. + */ + rc = roc_nix_tm_hierarchy_disable (nix); + if (rc) + goto exit; + + rc = roc_nix_tm_pfc_prepare_tree (nix); + if (rc) + goto exit; + + rc = roc_nix_tm_hierarchy_enable (nix, ROC_NIX_TM_PFC, true); + if (rc) + goto exit; + } + } + + txq = vnet_dev_get_port_tx_queue_by_id (port, qid); + if (txq == 0) + { + rc = -ENODEV; + goto exit; + } + ctq = vnet_dev_get_tx_queue_data (txq); + sq = &ctq->sq; + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_TM_CFG; + fc_cfg.tm_cfg.sq = sq->qid; + fc_cfg.tm_cfg.tc = tc; + fc_cfg.tm_cfg.enable = !!rx_pause; + rc = roc_nix_fc_config_set (nix, &fc_cfg); + if (rc) + return rc; + + /* Recheck number of SQ's that have PFC enabled */ + cd->rx_pause_en = 0; + foreach_vnet_dev_port_tx_queue (q, port) + { + /* Skip if RQ does not exist */ + if (!q->enabled) + continue; + + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (q); + sq = &ctq->sq; + if (sq->tc != ROC_NIX_PFC_CLASS_INVALID) + cd->rx_pause_en++; + } + + if (cd->rx_pause_en > 1) + goto exit; + + if (cd->tx_pause_en) + mode = cd->rx_pause_en ? ROC_NIX_FC_FULL : ROC_NIX_FC_TX; + else + mode = cd->rx_pause_en ? ROC_NIX_FC_RX : ROC_NIX_FC_NONE; + + memset (&pfc_cfg, 0, sizeof (struct roc_nix_pfc_cfg)); + pfc_cfg.mode = mode; + pfc_cfg.tc = cd->class_en; + rc = roc_nix_pfc_mode_set (nix, &pfc_cfg); +exit: + return rc; +} + +int +oct_pfc_sys_init_args (pfc_system_t *pfc) +{ + memset (pfc, 0, sizeof (pfc_system_t)); + memcpy (pfc, &dev_oct_pfc_ops, sizeof (pfc_system_t)); + return 0; +} + +int +oct_pfc_sys_configure (u32 hw_if_idx, pfc_params_t *params) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + uint8_t en, tc, mode; + uint16_t qid; + int rc = 0; + + if (cd->mode != PFC_ETH_FC_NONE) + { + return oct_roc_err ( + dev, -ENOTSUP, + "Disable pause frame flow control before configuring PFC"); + } + + if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix)) + return oct_roc_err (dev, -ENOTSUP, + "Prio flow ctrl config is not allowed on SDP/LBK"); + + /* Disallow flow control changes when device is in started state */ + if (port->started) + return oct_roc_err (dev, -EBUSY, "Stop the port=%d for setting PFC", + port->port_id); + + mode = params->mode; + + /* Perform Tx pause configuration on RQ */ + qid = params->tx_pause.rxq; + if (qid < port->attr.max_rx_queues) + { + en = (mode == PFC_ETH_FC_FULL) || (mode == PFC_ETH_FC_TX_PAUSE); + tc = params->tx_pause.tc; + rc = oct_nix_pfc_rq_conf (port, qid, en, tc); + } + + /* Perform Rx pause configuration on SQ */ + qid = params->rx_pause.txq; + if (qid < port->attr.max_tx_queues) + { + en = (mode == PFC_ETH_FC_FULL) || (mode == PFC_ETH_FC_RX_PAUSE); + tc = params->rx_pause.tc; + rc |= oct_nix_pfc_sq_conf (port, qid, en, tc); + } + + log_debug (dev, "hw_if_idx %d\n", hw_if_idx); + log_debug (dev, "mode %x\n", params->mode); + log_debug (dev, "rx_pause.txq %d\n", params->rx_pause.txq); + log_debug (dev, "rx_pause.tc %d\n", params->rx_pause.tc); + log_debug (dev, "tx_pause.pause_time %d\n", params->tx_pause.pause_time); + log_debug (dev, "tx_pause.rxq %d\n", params->tx_pause.rxq); + log_debug (dev, "tx_pause.tc %d\n", params->tx_pause.tc); + return rc; +} + +int +oct_pfc_sys_get_capabilities (u32 hw_if_idx, pfc_capa_params_t *cap) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + + memset (cap, 0, sizeof (*cap)); + cap->tc_max = roc_nix_chan_count_get (nix); + cap->mode = PFC_ETH_FC_FULL; + + log_debug (dev, "Max TC %d Supported mode %d", cap->tc_max, cap->mode); + return 0; +} + +int +oct_pfc_sys_disable_pause_frame_flow_ctrl (u32 hw_if_idx, u32 disable) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_sq *sq; + struct roc_nix_cq *cq; + struct roc_nix_rq *rq; + int rc = 0; + + foreach_vnet_dev_port_rx_queue (q, port) + { + struct roc_nix_fc_cfg fc_cfg; + + /* Skip if RQ does not exist */ + if (!q->enabled) + continue; + + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (q); + rq = &crq->rq; + cq = &crq->cq; + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_RQ_CFG; + fc_cfg.rq_cfg.rq = rq->qid; + fc_cfg.rq_cfg.pool = rq->aura_handle; + fc_cfg.rq_cfg.spb_pool = rq->spb_aura_handle; + fc_cfg.rq_cfg.cq_drop = cq->drop_thresh; + fc_cfg.rq_cfg.cq_bp = cq->bp_thresh; + fc_cfg.rq_cfg.pool_drop_pct = ROC_NIX_AURA_THRESH; + + rc = roc_nix_fc_config_set (nix, &fc_cfg); + if (rc) + return oct_roc_err ( + dev, rc, "Failed to disable flow control on Rx queue %u", rq->qid); + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + struct roc_nix_fc_cfg fc_cfg; + + /* Skip if SQ does not exist */ + if (!q->enabled) + continue; + + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (q); + sq = &ctq->sq; + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_TM_CFG; + fc_cfg.tm_cfg.sq = sq->qid; + rc = roc_nix_fc_config_set (nix, &fc_cfg); + if (rc && rc != -EEXIST) + return oct_roc_err ( + dev, rc, "Failed to disable flow control on Tx queue %u", sq->qid); + } + + rc = roc_nix_fc_mode_set (nix, ROC_NIX_FC_NONE); + if (rc) + return oct_roc_err (dev, rc, "Failed to disable flow control on MAC"); + + cd->mode = PFC_ETH_FC_NONE; + return rc; +} + +pfc_system_t dev_oct_pfc_ops = { + .pfc_configure = oct_pfc_sys_configure, + .pfc_get_capabilities = oct_pfc_sys_get_capabilities, + .pfc_disable_pause_frame_flow_ctrl = + oct_pfc_sys_disable_pause_frame_flow_ctrl, +}; diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c index 5857bc15f7..dd976569ce 100644 --- a/src/plugins/dev_octeon/port.c +++ b/src/plugins/dev_octeon/port.c @@ -8,24 +8,22 @@ #include #include #include +#include #include +#define OCT_FLOW_PREALLOC_SIZE 1 +#define OCT_FLOW_MAX_PRIORITY 7 +#define OCT_ETH_LINK_SPEED_100G 100000 /**< 100 Gbps */ + +extern oct_inl_dev_main_t oct_inl_dev_main; +tm_system_t tm_system_ops; +pfc_system_t pfc_system_ops; + VLIB_REGISTER_LOG_CLASS (oct_log, static) = { .class_name = "octeon", .subclass_name = "port", }; -static const u8 default_rss_key[] = { - 0xfe, 0xed, 0x0b, 0xad, 0xfe, 0xed, 0x0b, 0xad, 0xad, 0x0b, 0xed, 0xfe, - 0xad, 0x0b, 0xed, 0xfe, 0x13, 0x57, 0x9b, 0xef, 0x24, 0x68, 0xac, 0x0e, - 0x91, 0x72, 0x53, 0x11, 0x82, 0x64, 0x20, 0x44, 0x12, 0xef, 0x34, 0xcd, - 0x56, 0xbc, 0x78, 0x9a, 0x9a, 0x78, 0xbc, 0x56, 0xcd, 0x34, 0xef, 0x12 -}; - -static const u32 default_rss_flowkey = - (FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_TCP | - FLOW_KEY_TYPE_UDP | FLOW_KEY_TYPE_SCTP); - static const u64 rxq_cfg = ROC_NIX_LF_RX_CFG_DIS_APAD | ROC_NIX_LF_RX_CFG_IP6_UDP_OPT | ROC_NIX_LF_RX_CFG_L2_LEN_ERR | ROC_NIX_LF_RX_CFG_DROP_RE | @@ -33,6 +31,14 @@ static const u64 rxq_cfg = ROC_NIX_LF_RX_CFG_LEN_OL3 | ROC_NIX_LF_RX_CFG_LEN_OL4 | ROC_NIX_LF_RX_CFG_LEN_IL3 | ROC_NIX_LF_RX_CFG_LEN_IL4; +static int +oct_init_tm_args (tm_system_t *tm) +{ + memset (tm, 0, sizeof (tm_system_t)); + memcpy (tm, &dev_oct_tm_ops, sizeof (tm_system_t)); + return 0; +} + static vnet_dev_rv_t oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) { @@ -49,17 +55,129 @@ oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) return VNET_DEV_ERR_INTERNAL; } +vnet_dev_rv_t +oct_port_flow_control_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_fc_cfg fc_cfg; + struct roc_nix_sq *sq; + struct roc_nix_cq *cq; + struct roc_nix_rq *rq; + int rrv; + + /* Flow control is not supported on SDP/LBK devices */ + if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix)) + { + log_notice (dev, + "pause flow control is not supported on SDP/LBK devices"); + return VNET_DEV_OK; + } + + fc_cfg.type = ROC_NIX_FC_RXCHAN_CFG; + fc_cfg.rxchan_cfg.enable = true; + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_RQ_CFG; + fc_cfg.rq_cfg.enable = true; + fc_cfg.rq_cfg.tc = 0; + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + + rq = &crq->rq; + cq = &crq->cq; + + fc_cfg.rq_cfg.rq = rq->qid; + fc_cfg.rq_cfg.cq_drop = cq->drop_thresh; + + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + } + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_TM_CFG; + fc_cfg.tm_cfg.tc = 0; + fc_cfg.tm_cfg.enable = true; + + foreach_vnet_dev_port_tx_queue (txq, port) + { + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + + sq = &ctq->sq; + + fc_cfg.tm_cfg.sq = sq->qid; + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + } + + /* By default, enable pause frame flow control */ + rrv = roc_nix_fc_mode_set (nix, ROC_NIX_FC_FULL); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_mode_set failed"); + + cd->mode = PFC_ETH_FC_FULL; + return VNET_DEV_OK; +} + +static u16 +oct_parse_switch_hdr_type (const char *value) +{ + if (strcmp (value, "higig2") == 0) + return ROC_PRIV_FLAGS_HIGIG; + + if (strcmp (value, "dsa") == 0) + return ROC_PRIV_FLAGS_EDSA; + + if (strcmp (value, "chlen90b") == 0) + return ROC_PRIV_FLAGS_LEN_90B; + + if (strcmp (value, "exdsa") == 0) + return ROC_PRIV_FLAGS_EXDSA; + + if (strcmp (value, "vlan_exdsa") == 0) + return ROC_PRIV_FLAGS_VLAN_EXDSA; + + return 0; +} + vnet_dev_rv_t oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) { + oct_inl_dev_main_t *inl_main = &oct_inl_dev_main; vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); oct_port_t *cp = vnet_dev_get_port_data (port); + u8 mac_addr[PLT_ETHER_ADDR_LEN]; struct roc_nix *nix = cd->nix; - vnet_dev_rv_t rv; + vnet_dev_rv_t rv = -1; + bool is_allmulti_enable = false, is_flow_ctrl_enable = false; + u32 total_sz = 0; int rrv; - log_debug (dev, "port init: port %u", port->port_id); + log_notice (dev, "port init: port %u", port->port_id); + + foreach_vnet_dev_port_args (arg, port) + { + if (arg->id == OCT_PORT_ARG_ALLMULTI_MODE && vnet_dev_arg_get_bool (arg)) + is_allmulti_enable = true; + else if (arg->id == OCT_PORT_ARG_EN_ETH_PAUSE_FRAME && + vnet_dev_arg_get_bool (arg)) + is_flow_ctrl_enable = true; + else if (arg->id == OCT_PORT_ARG_SWITCH_HDR_TYPE && + vnet_dev_arg_get_string (arg)) + cp->npc.switch_header_type = + oct_parse_switch_hdr_type ((char *) vnet_dev_arg_get_string (arg)); + else if (arg->id == OCT_PORT_ARG_RSS_FLOW_KEY) + cp->rss_flowkey = vnet_dev_arg_get_uint32 (arg); + } if ((rrv = roc_nix_lf_alloc (nix, port->intf.num_rx_queues, port->intf.num_tx_queues, rxq_cfg))) @@ -72,6 +190,32 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) } cp->lf_allocated = 1; + if (!roc_nix_is_vf_or_sdp (nix)) + { + if ((rrv = roc_nix_npc_mac_addr_get (nix, mac_addr))) + { + oct_port_deinit (vm, port); + return oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get failed"); + } + + /* Sync MAC address to CGX/RPM table */ + if ((rrv = roc_nix_mac_addr_set (nix, mac_addr))) + { + oct_port_deinit (vm, port); + return oct_roc_err (dev, rrv, "roc_nix_mac_addr_set failed"); + } + } + + /* Enable allmulti mode, if set by arg */ + if (is_allmulti_enable) + { + if ((rrv = roc_nix_npc_mcast_config (nix, true, false))) + { + oct_port_deinit (vm, port); + return oct_roc_err (dev, rrv, "roc_nix_mac_addr_set failed"); + } + } + if ((rrv = roc_nix_tm_init (nix))) { oct_port_deinit (vm, port); @@ -86,15 +230,24 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) return oct_roc_err (dev, rrv, "roc_nix_tm_hierarchy_enable() failed"); } - if ((rrv = roc_nix_rss_default_setup (nix, default_rss_flowkey))) + rrv = roc_nix_switch_hdr_set (nix, cp->npc.switch_header_type, 0, 0, 0); + if (rrv) + { + oct_port_deinit (vm, port); + return oct_roc_err (dev, rrv, "roc_nix_switch_hdr_set() failed"); + } + + if ((rrv = roc_nix_rss_default_setup (nix, cp->rss_flowkey))) { oct_port_deinit (vm, port); return oct_roc_err (dev, rrv, "roc_nix_rss_default_setup() failed"); } - roc_nix_rss_key_set (nix, default_rss_key); + roc_nix_rss_key_set (nix, port->rss_key.key); cp->npc.roc_nix = nix; + cp->npc.flow_prealloc_size = OCT_FLOW_PREALLOC_SIZE; + cp->npc.flow_max_priority = OCT_FLOW_MAX_PRIORITY; if ((rrv = roc_npc_init (&cp->npc))) { oct_port_deinit (vm, port); @@ -102,14 +255,40 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) } cp->npc_initialized = 1; + if (inl_main->inl_dev) + { + struct roc_nix_fc_cfg fc_cfg; + + fc_cfg.type = ROC_NIX_FC_RXCHAN_CFG; + fc_cfg.rxchan_cfg.enable = true; + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + { + rv = oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + oct_port_deinit (vm, port); + return rv; + } + } + + foreach_vnet_dev_port_rx_queue (q, port) + total_sz += q->size; + foreach_vnet_dev_port_rx_queue (q, port) if (q->enabled) - if ((rv = oct_rxq_init (vm, q))) + if ((rv = oct_rxq_init (vm, q, total_sz))) { oct_port_deinit (vm, port); return rv; } + cd->ctqs = clib_mem_alloc_aligned ( + sizeof (oct_txq_t *) * port->intf.num_tx_queues, CLIB_CACHE_LINE_BYTES); + if (!cd->ctqs) + { + oct_port_deinit (vm, port); + return VNET_DEV_ERR_INTERNAL; + } + foreach_vnet_dev_port_tx_queue (q, port) if (q->enabled) if ((rv = oct_txq_init (vm, q))) @@ -118,6 +297,34 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) return rv; } + if ((rrv = roc_nix_mac_mtu_set (nix, 9200))) + { + rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed"); + return rv; + } + + /* Configure flow control if requested */ + if (is_flow_ctrl_enable && (rv = oct_port_flow_control_init (vm, port))) + { + oct_port_deinit (vm, port); + return rv; + } + + if (roc_nix_register_queue_irqs (nix)) + { + rv = oct_roc_err (dev, rrv, "roc_nix_register_queue_irqs() failed"); + oct_port_deinit (vm, port); + return rv; + } + cp->q_intr_enabled = 1; + oct_port_add_counters (vm, port); + + oct_init_tm_args (&tm_system_ops); + tm_system_register (&tm_system_ops, port->intf.hw_if_index); + + oct_pfc_sys_init_args (&pfc_system_ops); + pfc_system_register (&pfc_system_ops, port->intf.hw_if_index); + return VNET_DEV_OK; } @@ -135,6 +342,9 @@ oct_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port) foreach_vnet_dev_port_tx_queue (q, port) oct_txq_deinit (vm, q); + /* Disable switch hdr pkind */ + roc_nix_switch_hdr_set (nix, 0, 0, 0, 0); + if (cp->npc_initialized) { if ((rrv = roc_npc_fini (&cp->npc))) @@ -148,6 +358,13 @@ oct_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port) cp->tm_initialized = 0; } + /* Unregister queue irqs */ + if (cp->q_intr_enabled) + { + roc_nix_unregister_queue_irqs (nix); + cp->q_intr_enabled = 0; + } + if (cp->lf_allocated) { if ((rrv = roc_nix_lf_free (nix))) @@ -166,10 +383,35 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_port_state_changes_t changes = {}; int rrv; - rrv = roc_nix_mac_link_info_get (nix, &link_info); - if (rrv) + if (oct_port_get_stats (vm, port)) return; + foreach_vnet_dev_port_rx_queue (q, port) + { + if (oct_rxq_get_stats (vm, port, q)) + return; + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + if (oct_txq_get_stats (vm, port, q)) + return; + } + + if (roc_nix_is_lbk (nix) || roc_nix_is_sdp (nix)) + { + link_info.status = 1; + link_info.full_duplex = 1; + link_info.autoneg = 0; + link_info.speed = OCT_ETH_LINK_SPEED_100G; + } + else + { + rrv = roc_nix_mac_link_info_get (nix, &link_info); + if (rrv) + return; + } + if (cd->status != link_info.status) { changes.change.link_state = 1; @@ -187,7 +429,8 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) if (cd->speed != link_info.speed) { changes.change.link_speed = 1; - changes.link_speed = link_info.speed; + /* Convert to Kbps */ + changes.link_speed = link_info.speed * 1000; cd->speed = link_info.speed; } @@ -242,7 +485,9 @@ oct_rxq_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) if ((rrv = roc_nix_rq_ena_dis (&crq->rq, 0))) oct_roc_err (dev, rrv, "roc_nix_rq_ena_dis() failed"); - n = oct_aura_free_all_buffers (vm, crq->aura_handle, crq->hdr_off); + n = oct_drain_queue (vm, rxq); + n += oct_aura_free_all_buffers (vm, crq->aura_handle, crq->hdr_off, + crq->n_enq - n); if (crq->n_enq - n > 0) log_err (dev, "%u buffers leaked on rx queue %u stop", crq->n_enq - n, @@ -261,22 +506,30 @@ oct_txq_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) oct_npa_batch_alloc_cl128_t *cl; u32 n, off = ctq->hdr_off; - n = oct_aura_free_all_buffers (vm, ctq->aura_handle, off); - ctq->n_enq -= n; - - if (ctq->n_enq > 0 && ctq->ba_num_cl > 0) + if (ctq->ba_num_cl > 0) for (n = ctq->ba_num_cl, cl = ctq->ba_buffer + ctq->ba_first_cl; n; cl++, n--) { - if (cl->status.ccode != 0) - for (u32 i = 0; i < cl->status.count; i++) + oct_npa_batch_alloc_status_t st; + + st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_ACQUIRE); + if (st.status.ccode != ALLOC_CCODE_INVAL) + for (u32 i = 0; i < st.status.count; i++) { +#if (CLIB_DEBUG > 0) + if (!i || (i == 8)) + cl->iova[i] &= OCT_BATCH_ALLOC_IOVA0_MASK; +#endif vlib_buffer_t *b = (vlib_buffer_t *) (cl->iova[i] + off); - vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b)); + u32 bi = vlib_get_buffer_index (vm, b); + vlib_buffer_free_no_next (vm, &bi, 1); ctq->n_enq--; } } + n = oct_aura_free_all_buffers (vm, ctq->aura_handle, off, 0); + ctq->n_enq -= n; + if (ctq->n_enq > 0) log_err (dev, "%u buffers leaked on tx queue %u stop", ctq->n_enq, txq->queue_id); @@ -284,49 +537,77 @@ oct_txq_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) log_debug (dev, "%u buffers freed from tx queue %u", n, txq->queue_id); ctq->n_enq = 0; + ctq->ba_num_cl = ctq->ba_first_cl = 0; } vnet_dev_rv_t oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port) { + oct_inl_dev_main_t *inl_main = &oct_inl_dev_main; vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); + oct_port_t *cp = vnet_dev_get_port_data (port); struct roc_nix *nix = cd->nix; - struct roc_nix_eeprom_info eeprom_info = {}; vnet_dev_rv_t rv; int rrv; - log_debug (port->dev, "port start: port %u", port->port_id); + log_info (port->dev, "port start: port %u", port->port_id); foreach_vnet_dev_port_rx_queue (q, port) if ((rv = oct_rxq_start (vm, q)) != VNET_DEV_OK) goto done; + if (inl_main->inl_dev) + { + if ((rrv = roc_nix_inl_rq_ena_dis (nix, true))) + { + rv = oct_roc_err (dev, rrv, "roc_nix_inl_rq_ena_dis failed"); + goto done; + } + } + foreach_vnet_dev_port_tx_queue (q, port) { oct_txq_t *ctq = vnet_dev_get_tx_queue_data (q); ctq->n_enq = 0; } - if ((rrv = roc_nix_mac_mtu_set (nix, 9200))) + if ((rrv = roc_nix_npc_rx_ena_dis (nix, true))) { - rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed"); + rv = oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed"); goto done; } - if ((rrv = roc_nix_npc_rx_ena_dis (nix, true))) + if ((rrv = roc_npc_mcam_enable_all_entries (&cp->npc, true))) { - rv = oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed"); + rv = oct_roc_err (dev, rrv, "roc_npc_mcam_enable_all_entries() failed"); goto done; } - vnet_dev_poll_port_add (vm, port, 0.5, oct_port_poll); - - if (roc_nix_eeprom_info_get (nix, &eeprom_info) == 0) + if (!(roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix))) { - log_debug (dev, "sff_id %u data %U", eeprom_info.sff_id, format_hexdump, - eeprom_info.buf, sizeof (eeprom_info.buf)); + + rv = roc_nix_npc_promisc_ena_dis (nix, port->promisc); + if (rv) + { + return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed"); + } + + if (roc_nix_is_pf (nix)) + { + + rv = roc_nix_mac_promisc_mode_enable (nix, port->promisc); + if (rv) + { + return oct_roc_err (dev, rv, + "roc_nix_mac_promisc_mode_enable(%s) failed", + port->promisc ? "true" : "false"); + } + } } + + vnet_dev_poll_port_add (vm, port, 0.5, oct_port_poll); + done: if (rv != VNET_DEV_OK) oct_port_stop (vm, port); @@ -336,8 +617,10 @@ oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port) void oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) { + oct_inl_dev_main_t *inl_main = &oct_inl_dev_main; vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); + oct_port_t *cp = vnet_dev_get_port_data (port); struct roc_nix *nix = cd->nix; int rrv; @@ -345,6 +628,14 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_poll_port_remove (vm, port, oct_port_poll); + /* Disable all the NPC entries */ + rrv = roc_npc_mcam_enable_all_entries (&cp->npc, false); + if (rrv) + { + oct_roc_err (dev, rrv, "roc_npc_mcam_enable_all_entries() failed"); + return; + } + rrv = roc_nix_npc_rx_ena_dis (nix, false); if (rrv) { @@ -352,15 +643,186 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) return; } + if (inl_main->inl_dev) + { + if ((rrv = roc_nix_inl_rq_ena_dis (nix, false))) + { + oct_roc_err (dev, rrv, "roc_nix_inl_rq_ena_dis failed"); + return; + } + } + foreach_vnet_dev_port_rx_queue (q, port) oct_rxq_stop (vm, q); foreach_vnet_dev_port_tx_queue (q, port) oct_txq_stop (vm, q); + + vnet_dev_port_state_change (vm, port, + (vnet_dev_port_state_changes_t){ + .change.link_state = 1, + .change.link_speed = 1, + .link_speed = 0, + .link_state = 0, + }); + + /* Update the device status */ + cd->status = 0; + cd->speed = 0; } vnet_dev_rv_t -oct_port_cfg_change_precheck (vlib_main_t *vm, vnet_dev_port_t *port, +oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + + if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix)) + return VNET_DEV_ERR_UNSUPPORTED_DEVICE; + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rv; + + rv = roc_nix_npc_promisc_ena_dis (nix, enable); + if (rv) + { + return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed"); + } + + if (!roc_nix_is_pf (nix)) + return VNET_DEV_OK; + + rv = roc_nix_mac_promisc_mode_enable (nix, enable); + if (rv) + { + return oct_roc_err (dev, rv, + "roc_nix_mac_promisc_mode_enable(%s) failed", + enable ? "true" : "false"); + } + + return VNET_DEV_OK; +} + +static vnet_dev_rv_t +oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_hw_addr_t *addr, int is_add, + int is_primary) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + oct_port_t *cp = vnet_dev_get_port_data (port); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + i32 rrv; + + if (is_primary) + { + if (is_add) + { + /* Update mac address at NPC */ + rrv = roc_nix_npc_mac_addr_set (nix, (u8 *) addr); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_set() failed"); + + /* Update mac address at CGX for PFs only */ + if (!roc_nix_is_vf_or_sdp (nix)) + { + rrv = roc_nix_mac_addr_set (nix, (u8 *) addr); + if (rrv) + { + /* Rollback to previous mac address */ + roc_nix_npc_mac_addr_set (nix, + (u8 *) &port->primary_hw_addr); + rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed"); + } + } + + rrv = roc_nix_rss_default_setup (nix, cp->rss_flowkey); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_rss_default_setup() failed"); + } + } + + return rv; +} + +vnet_dev_rv_t +oct_validate_config_max_rx_len (vlib_main_t *vm, vnet_dev_port_t *port, + u32 rx_frame_size) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + + u32 max_len; + i32 min_len; + + if (port->started) + return VNET_DEV_ERR_PORT_STARTED; + + min_len = (i32) rx_frame_size - OCT_PKTIO_MAX_L2_SIZE; + if (min_len < 0 || min_len < NIX_MIN_HW_FRS) + { + log_err ( + dev, + "Requested rx_frame_size is lower than the minimum supported value."); + return VNET_DEV_ERR_INVALID_VALUE; + } + + max_len = roc_nix_max_pkt_len (nix); + if (rx_frame_size > max_len) + { + log_err ( + dev, + "Requested rx_frame_size is higher than the max supported value."); + return VNET_DEV_ERR_INVALID_VALUE; + } + + return rv; +} + +vnet_dev_rv_t +oct_op_config_max_rx_len (vlib_main_t *vm, vnet_dev_port_t *port, + u32 rx_frame_size) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + i32 rrv; + + rrv = roc_nix_mac_max_rx_len_set (nix, rx_frame_size); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_mac_max_rx_len_set() failed"); + + return rv; +} + +vnet_dev_rv_t +oct_op_config_set_rss_key (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_rss_key_t *k) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + vnet_dev_rv_t rv = VNET_DEV_OK; + + roc_nix_rss_key_set (cd->nix, k->key); + + return rv; +} + +vnet_dev_rv_t +oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_cfg_change_req_t *req) { vnet_dev_rv_t rv = VNET_DEV_OK; @@ -368,14 +830,24 @@ oct_port_cfg_change_precheck (vlib_main_t *vm, vnet_dev_port_t *port, switch (req->type) { case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: - if (port->started) - rv = VNET_DEV_ERR_PORT_STARTED; + rv = oct_validate_config_max_rx_len (vm, port, req->max_rx_frame_size); break; case VNET_DEV_PORT_CFG_PROMISC_MODE: + rv = oct_validate_config_promisc_mode (port, req->promisc); + break; case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + case VNET_DEV_PORT_CFG_SET_RSS_KEY: + break; + + case VNET_DEV_PORT_CFG_ADD_RX_FLOW: + case VNET_DEV_PORT_CFG_DEL_RX_FLOW: + case VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER: + case VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER: + rv = oct_flow_validate_params (vm, port, req->type, req->flow_index, + req->private_data); break; default: @@ -394,11 +866,13 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, switch (req->type) { case VNET_DEV_PORT_CFG_PROMISC_MODE: - { - } + rv = oct_op_config_promisc_mode (vm, port, req->promisc); break; case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + rv = oct_port_add_del_eth_addr (vm, port, &req->addr, + /* is_add */ 1, + /* is_primary */ 1); break; case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: @@ -408,6 +882,20 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, break; case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + rv = oct_op_config_max_rx_len (vm, port, req->max_rx_frame_size); + break; + + case VNET_DEV_PORT_CFG_SET_RSS_KEY: + rv = oct_op_config_set_rss_key (vm, port, &req->rss_key); + break; + + case VNET_DEV_PORT_CFG_ADD_RX_FLOW: + case VNET_DEV_PORT_CFG_DEL_RX_FLOW: + case VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER: + case VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER: + rv = oct_flow_ops_fn (vm, port, req->type, req->flow_index, + req->private_data); + break; default: diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c index 9378fc3b7c..da069369d4 100644 --- a/src/plugins/dev_octeon/queue.c +++ b/src/plugins/dev_octeon/queue.c @@ -57,12 +57,20 @@ oct_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); vnet_dev_port_t *port = txq->port; vnet_dev_t *dev = port->dev; + u32 sz = sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS; + vnet_dev_rv_t rv; log_debug (dev, "tx_queue_alloc: queue %u alocated", txq->queue_id); - return vnet_dev_dma_mem_alloc ( - vm, dev, sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS, 128, - (void **) &ctq->ba_buffer); + rv = vnet_dev_dma_mem_alloc (vm, dev, sz, 128, (void **) &ctq->ba_buffer); + + if (rv != VNET_DEV_OK) + return rv; + + clib_memset_u64 (ctq->ba_buffer, OCT_BATCH_ALLOC_IOVA0_MASK, + ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS); + + return rv; } void @@ -78,8 +86,10 @@ oct_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) } vnet_dev_rv_t -oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u32 total_sz) { + oct_main_t *om = &oct_main; + oct_inl_dev_main_t *inl_main = &oct_inl_dev_main; oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); vnet_dev_t *dev = rxq->port->dev; oct_device_t *cd = vnet_dev_get_data (dev); @@ -87,15 +97,27 @@ oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) vlib_get_buffer_pool (vm, vnet_dev_get_rx_queue_buffer_pool_index (rxq)); struct roc_nix *nix = cd->nix; int rrv; - struct npa_aura_s aura = {}; - struct npa_pool_s npapool = { .nat_align = 1 }; + struct npa_pool_s npapool = { .nat_align = 1, + .buf_offset = OCT_EXT_HDR_SIZE / ROC_ALIGN }; + + ASSERT (!(vm->buffer_main->ext_hdr_size % ROC_ALIGN)); - if ((rrv = roc_npa_pool_create (&crq->aura_handle, bp->alloc_size, rxq->size, - &aura, &npapool, 0))) + if (!om->use_single_rx_aura) + total_sz = rxq->size; + + if (!om->use_single_rx_aura || !om->rx_aura_handle) { - oct_rxq_deinit (vm, rxq); - return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed"); + if ((rrv = roc_npa_pool_create (&crq->aura_handle, bp->alloc_size, + total_sz, &aura, &npapool, 0))) + { + oct_rxq_deinit (vm, rxq); + return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed"); + } + } + else + { + crq->aura_handle = om->rx_aura_handle; } crq->npa_pool_initialized = 1; @@ -106,6 +128,12 @@ oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) .qid = rxq->queue_id, }; + if (inl_main->inl_dev) + { + roc_nix_inl_dev_xaq_realloc (crq->aura_handle); + crq->cq.nb_desc = clib_max (crq->cq.nb_desc, 4096); + } + if ((rrv = roc_nix_cq_init (nix, &crq->cq))) { oct_rxq_deinit (vm, rxq); @@ -118,7 +146,8 @@ oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) log_debug (dev, "CQ %u initialised (qmask 0x%x wdata 0x%lx)", crq->cq.qid, crq->cq.qmask, crq->cq.wdata); - crq->hdr_off = vm->buffer_main->ext_hdr_size; + crq->hdr_off = + vm->buffer_main->ext_hdr_size - (npapool.buf_offset * ROC_ALIGN); crq->rq = (struct roc_nix_rq){ .qid = rxq->queue_id, @@ -130,7 +159,7 @@ oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) .flow_tag_width = 32, }; - if ((rrv = roc_nix_rq_init (nix, &crq->rq, 1 /* disable */))) + if ((rrv = roc_nix_rq_init (nix, &crq->rq, false /* disable */))) { oct_rxq_deinit (vm, rxq); return oct_roc_err (dev, rrv, "roc_nix_rq_init(qid = %u) failed", @@ -146,9 +175,109 @@ oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) log_debug (dev, "RQ %u initialised", crq->cq.qid); + if (inl_main->inl_dev) + { + /* Configure inline device rq */ + crq->rq.tag_mask = + 0x0FF00000 | ((uint32_t) OCT_EVENT_TYPE_FRM_INL_DEV << 28); + rrv = roc_nix_inl_dev_rq_get (&crq->rq, 0 /* disable */); + if (rrv) + { + clib_warning ("roc_nix_inl_dev_rq_get failed with '%s' error", + roc_error_msg_get (rrv)); + + return -1; + } + if (!crq->rq.meta_aura_handle && roc_model_is_cn20k ()) + crq->rq.meta_aura_handle = crq->rq.aura_handle; + } + return VNET_DEV_OK; } +static_always_inline vlib_buffer_t * +oct_seg_to_bp (void *p) +{ + return (vlib_buffer_t *) p - 1; +} + +static void +oct_multi_seg_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, + oct_nix_rx_cqe_desc_t *d) +{ + vlib_buffer_t *t; + u8 s0 = d->sg0.segs, s1; + + t = oct_seg_to_bp (d->segs0[1]); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, t)); + + if (s0 == 2) + return; + t = oct_seg_to_bp (d->segs0[2]); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, t)); + + if (d->sg1.subdc != NIX_SUBDC_SG) + return; + + s1 = d->sg1.segs; + if (s1 == 0) + return; + + t = oct_seg_to_bp (d->segs1[0]); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, t)); + + if (s1 == 1) + return; + t = oct_seg_to_bp (d->segs1[1]); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, t)); + + if (s1 == 2) + return; + t = oct_seg_to_bp (d->segs1[2]); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, t)); +} + +int +oct_drain_queue (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base; + oct_nix_lf_cq_op_status_t status; + u32 cq_size = crq->cq.nb_desc; + u32 cq_mask = crq->cq.qmask; + vlib_buffer_t *b; + u32 i, head, n_desc, n, f_cnt = 0; + + /* Free all CQ entries */ + while (1) + { + /* get head and tail from NIX_LF_CQ_OP_STATUS */ + status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status); + if (status.cq_err || status.op_err) + return f_cnt; + + head = status.head; + n_desc = (status.tail - head) & cq_mask; + + if (n_desc == 0) + return f_cnt; + + n = clib_min (cq_size - head, n_desc); + for (i = head; i < n; i++) + { + b = oct_seg_to_bp (descs[i].segs0[0]); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b)); + if (descs[i].sg0.segs > 1) + oct_multi_seg_free (vm, rxq, &descs[i]); + } + f_cnt += n; + plt_write64 ((crq->cq.wdata | n), crq->cq.door); + plt_wmb (); + } + + return f_cnt; +} + void oct_rxq_deinit (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) { @@ -166,6 +295,7 @@ oct_rxq_deinit (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) if (crq->cq_initialized) { + oct_drain_queue (vm, rxq); rrv = roc_nix_cq_fini (&crq->cq); if (rrv) oct_roc_err (dev, rrv, "roc_nix_cq_fini() failed"); @@ -189,14 +319,13 @@ oct_txq_init (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) oct_device_t *cd = vnet_dev_get_data (dev); struct roc_nix *nix = cd->nix; struct npa_aura_s aura = {}; - struct npa_pool_s npapool = { .nat_align = 1 }; - int rrv; + struct npa_pool_s npapool = { .nat_align = 1, + .buf_offset = OCT_EXT_HDR_SIZE / ROC_ALIGN }; vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, 0); + int rrv; - if ((rrv = roc_npa_pool_create ( - &ctq->aura_handle, bp->alloc_size, - txq->size * 6 /* worst case - two SG with 3 segs each = 6 */, &aura, - &npapool, 0))) + if ((rrv = roc_npa_pool_create (&ctq->aura_handle, bp->alloc_size, + bp->n_buffers, &aura, &npapool, 0))) { oct_txq_deinit (vm, txq); return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed"); @@ -224,13 +353,15 @@ oct_txq_init (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) log_debug (dev, "SQ initialised, qid %u, nb_desc %u, max_sqe_sz %u", ctq->sq.qid, ctq->sq.nb_desc, ctq->sq.max_sqe_sz); - ctq->hdr_off = vm->buffer_main->ext_hdr_size; + ctq->hdr_off = + vm->buffer_main->ext_hdr_size - (npapool.buf_offset * ROC_ALIGN); if (ctq->sq.lmt_addr == 0) ctq->sq.lmt_addr = (void *) nix->lmt_base; ctq->io_addr = ctq->sq.io_addr & ~0x7fULL; ctq->lmt_addr = ctq->sq.lmt_addr; + cd->ctqs[ctq->sq.qid] = ctq; return VNET_DEV_OK; } diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c index f10c2cb578..cdd0717037 100644 --- a/src/plugins/dev_octeon/roc_helper.c +++ b/src/plugins/dev_octeon/roc_helper.c @@ -5,10 +5,13 @@ */ #include +#include +#include #include #include #include #include +#include "octeon.h" static oct_plt_memzone_list_t memzone_list; @@ -49,6 +52,12 @@ oct_plt_get_thread_index (void) return __os_thread_index; } +static u64 +oct_plt_get_cache_line_size (void) +{ + return CLIB_CACHE_LINE_BYTES; +} + static void oct_drv_physmem_free (vlib_main_t *vm, void *mem) { @@ -69,13 +78,12 @@ oct_drv_physmem_alloc (vlib_main_t *vm, u32 size, u32 align) if (align) { - /* Force cache line alloc in case alignment is less than cache line */ - align = align < CLIB_CACHE_LINE_BYTES ? CLIB_CACHE_LINE_BYTES : align; + /* Force ROC align alloc in case alignment is less than ROC align */ + align = ((align + ROC_ALIGN - 1) & ~(ROC_ALIGN - 1)); mem = vlib_physmem_alloc_aligned_on_numa (vm, size, align, 0); } else - mem = - vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES, 0); + mem = vlib_physmem_alloc_aligned_on_numa (vm, size, ROC_ALIGN, 0); if (!mem) return NULL; @@ -109,38 +117,39 @@ oct_plt_zmalloc (u32 size, u32 align) return oct_drv_physmem_alloc (vm, size, align); } -static oct_plt_memzone_t * -memzone_get (u32 index) +static void * +oct_plt_realloc (void *addr, u32 size, u32 align) { - if (index == ((u32) ~0)) - return 0; + align = CLIB_CACHE_LINE_ROUND (align); + size = CLIB_CACHE_LINE_ROUND (size); - return pool_elt_at_index (memzone_list.mem_pool, index); + if (align) + return clib_mem_realloc_aligned (addr, size, align); + else + return clib_mem_realloc (addr, size); } -static int -oct_plt_memzone_free (const oct_plt_memzone_t *name) +static oct_plt_memzone_t * +oct_plt_memzone_lookup (const char *name) { - uword *p; - p = hash_get_mem (memzone_list.memzone_by_name, name); - - if (p[0] == ((u32) ~0)) - return -EINVAL; - - hash_unset_mem (memzone_list.memzone_by_name, name); + oct_plt_memzone_t *mem_pool; - pool_put_index (memzone_list.mem_pool, p[0]); + pool_foreach (mem_pool, memzone_list.mem_pool) + { + if (!clib_strcmp (mem_pool->name, name)) + return mem_pool; + } return 0; } -static oct_plt_memzone_t * -oct_plt_memzone_lookup (const char *name) +static int +oct_plt_memzone_free (const oct_plt_memzone_t *mz) { - uword *p; - p = hash_get_mem (memzone_list.memzone_by_name, name); - if (p) - return memzone_get (p[0]); + if (!mz || !oct_plt_memzone_lookup (mz->name)) + return -EINVAL; + + pool_put (memzone_list.mem_pool, mz); return 0; } @@ -160,16 +169,258 @@ oct_plt_memzone_reserve_aligned (const char *name, u64 len, u8 socket, mem_pool->addr = p; mem_pool->index = mem_pool - memzone_list.mem_pool; - hash_set_mem (memzone_list.memzone_by_name, name, mem_pool->index); + strcpy (mem_pool->name, name); return mem_pool; } +static void +plt_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t handle, uint16_t line) +{ + vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, handle); + oct_device_t *cd = vnet_dev_get_data (dev); + + if (cd->msix_handler && cd->msix_handler[line].fn) + cd->msix_handler[line].fn (cd->msix_handler[line].data); +} + +static int +oct_plt_get_num_vectors (oct_pci_dev_handle_t handle) +{ + vlib_main_t *vm = vlib_get_main (); + + return vlib_pci_get_num_msix_interrupts (vm, handle); +} + +static int +oct_plt_intr_enable (oct_pci_dev_handle_t handle, uint16_t start, + uint16_t count, uint8_t enable, + enum oct_msix_rsrc_op_t op) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, handle); + oct_device_t *cd = vnet_dev_get_data (dev); + clib_error_t *error = NULL; + + if (op == OCT_MSIX_RSRC_ALLOC) + { + if (cd->msix_handler) + { + clib_warning ("MSIX handlers already allocated\n"); + return -EINVAL; + } + cd->msix_handler = malloc (sizeof (*cd->msix_handler) * (start + count)); + if (!cd->msix_handler) + { + clib_warning ("MSIX handlers alilocation failed\n"); + return -ENOMEM; + } + } + if (enable) + error = vlib_pci_enable_msix_irq (vm, handle, start, count); + else + error = vlib_pci_disable_msix_irq (vm, handle, start, count); + if (error) + { + clib_error_report (error); + return -EINVAL; + } + if (op == OCT_MSIX_RSRC_FREE) + { + if (cd->msix_handler) + free (cd->msix_handler); + } + + return 0; +} + +static int +oct_plt_intr_config (oct_pci_dev_handle_t handle, uint32_t vec, + plt_msix_handler_function_t handler, void *data, + int enable) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, handle); + oct_device_t *cd = vnet_dev_get_data (dev); + clib_error_t *error = NULL; + + /* Skip AF_PF_MBOX interrupt FIXME */ + if (vec == RVU_PF_INT_VEC_AFPF_MBOX) + return 0; + + if (enable) + { + error = + vlib_pci_register_msix_handler (vm, handle, vec, 1, plt_msix_handler); + if (error) + { + clib_error_report (error); + return -EINVAL; + } + if (cd->msix_handler) + { + cd->msix_handler[vec].fn = handler; + cd->msix_handler[vec].vec = vec; + cd->msix_handler[vec].data = data; + } + error = vlib_pci_enable_msix_irq (vm, handle, vec, 1); + if (error) + { + clib_error_report (error); + return -EINVAL; + } + } + else + { + error = vlib_pci_disable_msix_irq (vm, handle, vec, 1); + if (error) + { + clib_error_report (error); + return -EINVAL; + } + error = vlib_pci_unregister_msix_handler (vm, handle, vec, 1); + if (error) + { + clib_error_report (error); + return -EINVAL; + } + if (cd->msix_handler) + { + cd->msix_handler[vec].fn = NULL; + cd->msix_handler[vec].data = NULL; + } + } + + return 0; +} + +static inline __attribute__ ((__always_inline__)) int +plt_intr_max_intr_get (const struct plt_intr_handle *intr_handle) +{ + if (!intr_handle) + return -EINVAL; + + return intr_handle->max_intr; +} + +static inline __attribute__ ((__always_inline__)) int +plt_intr_max_intr_set (struct plt_intr_handle *intr_handle, int max_intr) +{ + if (!intr_handle) + return -EINVAL; + + intr_handle->max_intr = max_intr; + + return 0; +} + +static int +irq_get_info (struct plt_intr_handle *intr_handle) +{ + int num_vec; + + num_vec = oct_plt_get_num_vectors (intr_handle->pci_handle); + if (num_vec == 0) + { + plt_err ("HW max=%d > PLT_MAX_RXTX_INTR_VEC_ID: %d", num_vec, + PLT_MAX_RXTX_INTR_VEC_ID); + plt_intr_max_intr_set (intr_handle, PLT_MAX_RXTX_INTR_VEC_ID); + } + else + { + if (plt_intr_max_intr_set (intr_handle, num_vec)) + return -1; + } + + return 0; +} + +static int +irq_init (struct plt_intr_handle *intr_handle) +{ + int rc = oct_plt_intr_enable (intr_handle->pci_handle, 0, + plt_intr_max_intr_get (intr_handle), 0, + OCT_MSIX_RSRC_ALLOC); + + if (rc) + plt_err ("Failed to set irqs vector rc=%d", rc); + + return rc; +} + +static int +oct_plt_irq_register (struct oct_pci_intr_handle *intr_handle, + oct_plt_pci_intr_callback_fn cb, void *data, + unsigned int vec) +{ + /* If no max_intr read from VFIO */ + if (plt_intr_max_intr_get (intr_handle) == 0) + { + irq_get_info (intr_handle); + irq_init (intr_handle); + } + + if (vec > (uint32_t) plt_intr_max_intr_get (intr_handle)) + { + plt_err ("Error registering MSI-X interrupts vec:%d > %d", vec, + plt_intr_max_intr_get (intr_handle)); + return -EINVAL; + } + + oct_plt_intr_config (intr_handle->pci_handle, vec, cb, data, 1); + + return 0; +} + +static void +oct_plt_irq_unregister (struct oct_pci_intr_handle *intr_handle, + oct_plt_pci_intr_callback_fn cb, void *data, + unsigned int vec) +{ + if (vec > (uint32_t) plt_intr_max_intr_get (intr_handle)) + { + plt_err ("Error unregistering MSI-X interrupts vec:%d > %d", vec, + plt_intr_max_intr_get (intr_handle)); + return; + } + + oct_plt_intr_config (intr_handle->pci_handle, vec, cb, data, 0); +} + +static int +oct_plt_irq_disable (struct oct_pci_intr_handle *intr_handle) +{ + int rc = -EINVAL; + + if (!intr_handle) + return rc; + + /* Clear max_intr to indicate re-init next time */ + rc = oct_plt_intr_enable (intr_handle->pci_handle, 0, + plt_intr_max_intr_get (intr_handle), 0, + OCT_MSIX_RSRC_FREE); + plt_intr_max_intr_set (intr_handle, 0); + return rc; +} + +static int +oct_plt_irq_reconfigure (struct oct_pci_intr_handle *intr_handle, + uint16_t max_intr) +{ + /* Disable interrupts if enabled. */ + if (plt_intr_max_intr_get (intr_handle)) + oct_plt_irq_disable (intr_handle); + + plt_intr_max_intr_set (intr_handle, max_intr); + return irq_init (intr_handle); +} + oct_plt_init_param_t oct_plt_init_param = { .oct_plt_log_reg_class = vlib_log_register_class, .oct_plt_log = oct_plt_log, .oct_plt_free = oct_plt_free, .oct_plt_zmalloc = oct_plt_zmalloc, + .oct_plt_realloc = oct_plt_realloc, .oct_plt_memzone_free = oct_plt_memzone_free, .oct_plt_memzone_lookup = oct_plt_memzone_lookup, .oct_plt_memzone_reserve_aligned = oct_plt_memzone_reserve_aligned, @@ -178,4 +429,9 @@ oct_plt_init_param_t oct_plt_init_param = { .oct_plt_spinlock_unlock = oct_plt_spinlock_unlock, .oct_plt_spinlock_trylock = oct_plt_spinlock_trylock, .oct_plt_get_thread_index = oct_plt_get_thread_index, + .oct_plt_get_cache_line_size = oct_plt_get_cache_line_size, + .oct_plt_irq_reconfigure = oct_plt_irq_reconfigure, + .oct_plt_irq_register = oct_plt_irq_register, + .oct_plt_irq_unregister = oct_plt_irq_unregister, + .oct_plt_irq_disable = oct_plt_irq_disable }; diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c index c1c4771795..a01ac75d90 100644 --- a/src/plugins/dev_octeon/rx_node.c +++ b/src/plugins/dev_octeon/rx_node.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -19,11 +20,38 @@ typedef struct u64 parse_w0_or; u32 n_left_to_next; u32 *to_next; + u16 *next; u32 n_rx_pkts; u32 n_rx_bytes; u32 n_segs; + u16 buffer_start_index; } oct_rx_node_ctx_t; +static_always_inline u64 +oct_get_wqe_from_cpt_hdr (union cpt_parse_hdr_u *cpt_hdr, const u64 fp_flags) +{ + if (fp_flags & OCT_FP_FLAG_O20) + return cpt_hdr->u64[1]; + return clib_net_to_host_u64 (cpt_hdr->u64[1]); +} + +static_always_inline void * +oct_ipsec_inb_sa_priv (u32 idx, const u64 fp_flags) +{ + oct_inl_dev_main_t *oidm = &oct_inl_dev_main; + struct roc_ot_ipsec_inb_sa *roc_sa; + + if (fp_flags & OCT_FP_FLAG_O20) + { + struct roc_ow_ipsec_inb_sa *roc_sa; + roc_sa = roc_nix_inl_ow_ipsec_inb_sa (oidm->inb_sa_base, idx); + return roc_nix_inl_ow_ipsec_inb_sa_sw_rsvd (roc_sa); + } + + roc_sa = roc_nix_inl_ot_ipsec_inb_sa (oidm->inb_sa_base, idx); + return roc_nix_inl_ot_ipsec_inb_sa_sw_rsvd (roc_sa); +} + static_always_inline vlib_buffer_t * oct_seg_to_bp (void *p) { @@ -31,7 +59,29 @@ oct_seg_to_bp (void *p) } static_always_inline void -oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, +oct_rx_verify_vlib (vlib_main_t *vm, vlib_buffer_t *b) +{ + /* + * Warning: Since this assertion is performed in a critical section, + * with increasing number of worker cores, scaling of packet receive-rates + * will be impacted in debug builds + */ + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, vlib_get_buffer_index (vm, b))); +} + +static_always_inline u32 +oct_rx_n_segs (vlib_main_t *vm, const oct_nix_rx_parse_t *rxp) +{ + struct nix_rx_sg_s *sg; + + sg = (struct nix_rx_sg_s *) (((char *) rxp) + sizeof (rxp->f)); + return sg->segs; +} + +static_always_inline void +oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, + vlib_buffer_template_t *bt, vlib_buffer_t *h, oct_nix_rx_cqe_desc_t *d) { u32 tail_sz = 0, n_tail_segs = 0; @@ -42,6 +92,7 @@ oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, return; b = oct_seg_to_bp (d->segs0[1]); + b->template = *bt; h->next_buffer = vlib_get_buffer_index (vm, b); tail_sz += b->current_length = d->sg0.seg2_size; n_tail_segs++; @@ -52,6 +103,7 @@ oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, p = b; p->flags = VLIB_BUFFER_NEXT_PRESENT; b = oct_seg_to_bp (d->segs0[2]); + b->template = *bt; p->next_buffer = vlib_get_buffer_index (vm, b); tail_sz += b->current_length = d->sg0.seg3_size; n_tail_segs++; @@ -66,6 +118,7 @@ oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, p = b; p->flags = VLIB_BUFFER_NEXT_PRESENT; b = oct_seg_to_bp (d->segs1[0]); + b->template = *bt; p->next_buffer = vlib_get_buffer_index (vm, b); tail_sz += b->current_length = d->sg1.seg1_size; n_tail_segs++; @@ -76,6 +129,7 @@ oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, p = b; p->flags = VLIB_BUFFER_NEXT_PRESENT; b = oct_seg_to_bp (d->segs1[1]); + b->template = *bt; p->next_buffer = vlib_get_buffer_index (vm, b); tail_sz += b->current_length = d->sg1.seg2_size; n_tail_segs++; @@ -86,6 +140,7 @@ oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, p = b; p->flags = VLIB_BUFFER_NEXT_PRESENT; b = oct_seg_to_bp (d->segs1[2]); + b->template = *bt; p->next_buffer = vlib_get_buffer_index (vm, b); tail_sz += b->current_length = d->sg1.seg3_size; n_tail_segs++; @@ -98,17 +153,910 @@ oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h, ctx->n_segs += n_tail_segs; } +/* + * Read upto 4 fragments in case of successful reassembly. + * Fragments which are further segmented are not + * supported currently. + */ +static_always_inline u8 +oct_rx_ipsec_reassembly_success (vlib_main_t *vm, vlib_buffer_template_t *bt, + struct cpt_cn10k_parse_hdr_s *hdr, + oct_nix_rx_cqe_desc_t *d, vlib_buffer_t *buf, + u32 *olen, u32 *esp_len, u32 l2_ol3_hdr_size) +{ + oct_nix_rx_parse_t *rxp_ptr2, *rxp_ptr3; + oct_nix_rx_parse_t *rxp_ptr, *rxp_ptr1; + u16 frag_size1, frag_size2, frag_size3; + vlib_buffer_t *b0, *b1, *b2, *b3; + struct cpt_frag_info_s *frag_info; + oct_nix_rx_parse_t *rxp_meta = &d->parse; + u32 offset, l2_l3_inner_hdr_size; + u64 *wqe_ptr2, *wqe_ptr3; + u64 *wqe_ptr, *wqe_ptr1; + uint64_t *frag_ptr; + u8 frag_cnt; + + wqe_ptr = (u64 *) clib_net_to_host_u64 (hdr->wqe_ptr); + rxp_ptr = (oct_nix_rx_parse_t *) (wqe_ptr + 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr) == 1); + + l2_l3_inner_hdr_size = rxp_meta->f.ldptr - rxp_meta->f.laptr; + frag_cnt = hdr->w0.num_frags; + + buf->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + buf->total_length_not_including_first_buffer = 0; + b0 = buf; + + /* + * fi_offset is 8B offset from cpt_parse_hdr_s + fi_pad to frag_info_s. + * fi_offset 0 indicates 256B. + */ + offset = hdr->w2.fi_offset; + offset = (((offset - 1) & 0x1f) + 1) * 8; + frag_info = PLT_PTR_ADD (hdr, offset); + + if (frag_cnt == 2) + { + frag_size1 = clib_net_to_host_u16 (frag_info->w1.frag_size1); + wqe_ptr1 = (u64 *) clib_net_to_host_u64 (hdr->frag1_wqe_ptr); + b1 = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + rxp_ptr1 = (oct_nix_rx_parse_t *) (wqe_ptr1 + 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr1) == 1); + + oct_rx_verify_vlib (vm, b1); + b1->template = *bt; + + *olen += rxp_ptr1->f.pkt_lenm1 + 1; + *esp_len += rxp_ptr1->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + + b1->current_length = frag_size1; + b1->current_data = l2_l3_inner_hdr_size; + + b0->total_length_not_including_first_buffer += b1->current_length; + b0->flags |= VLIB_BUFFER_NEXT_PRESENT; + b0->next_buffer = vlib_get_buffer_index (vm, b1); + + return 2; + } + + if (PREDICT_FALSE (frag_cnt == 3)) + { + frag_ptr = (uint64_t *) (frag_info + 1); + + frag_size1 = clib_net_to_host_u16 (frag_info->w1.frag_size1); + frag_size2 = clib_net_to_host_u16 (frag_info->w1.frag_size2); + + wqe_ptr1 = (u64 *) clib_net_to_host_u64 (hdr->frag1_wqe_ptr); + wqe_ptr2 = (u64 *) clib_net_to_host_u64 (*frag_ptr); + + b1 = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + b2 = (vlib_buffer_t *) ((u8 *) wqe_ptr2 - 128); + + rxp_ptr1 = (oct_nix_rx_parse_t *) (wqe_ptr1 + 1); + rxp_ptr2 = (oct_nix_rx_parse_t *) (wqe_ptr2 + 1); + + ASSERT (oct_rx_n_segs (vm, rxp_ptr1) == 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr2) == 1); + + b1->template = *bt; + b2->template = *bt; + + *olen += rxp_ptr1->f.pkt_lenm1 + 1; + *olen += rxp_ptr2->f.pkt_lenm1 + 1; + + *esp_len += rxp_ptr1->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + *esp_len += rxp_ptr2->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + + b1->current_length = frag_size1; + b2->current_length = frag_size2; + b1->current_data = l2_l3_inner_hdr_size; + b2->current_data = l2_l3_inner_hdr_size; + + b0->total_length_not_including_first_buffer += b1->current_length; + b0->total_length_not_including_first_buffer += b2->current_length; + + b0->flags |= VLIB_BUFFER_NEXT_PRESENT; + b1->flags |= VLIB_BUFFER_NEXT_PRESENT; + + b0->next_buffer = vlib_get_buffer_index (vm, b1); + b1->next_buffer = vlib_get_buffer_index (vm, b2); + + return 3; + } + + if (PREDICT_FALSE (frag_cnt == 4)) + { + frag_ptr = (uint64_t *) (frag_info + 1); + + frag_size1 = clib_net_to_host_u16 (frag_info->w1.frag_size1); + frag_size2 = clib_net_to_host_u16 (frag_info->w1.frag_size2); + frag_size3 = clib_net_to_host_u16 (frag_info->w1.frag_size3); + + wqe_ptr1 = (u64 *) clib_net_to_host_u64 (hdr->frag1_wqe_ptr); + wqe_ptr2 = (u64 *) clib_net_to_host_u64 (*frag_ptr); + wqe_ptr3 = (u64 *) clib_net_to_host_u64 (*(frag_ptr + 1)); + + b1 = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + b2 = (vlib_buffer_t *) ((u8 *) wqe_ptr2 - 128); + b3 = (vlib_buffer_t *) ((u8 *) wqe_ptr3 - 128); + + rxp_ptr1 = (oct_nix_rx_parse_t *) (wqe_ptr1 + 1); + rxp_ptr2 = (oct_nix_rx_parse_t *) (wqe_ptr2 + 1); + rxp_ptr3 = (oct_nix_rx_parse_t *) (wqe_ptr3 + 1); + + ASSERT (oct_rx_n_segs (vm, rxp_ptr1) == 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr2) == 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr3) == 1); + + b1->template = *bt; + b2->template = *bt; + b3->template = *bt; + + *olen += rxp_ptr1->f.pkt_lenm1 + 1; + *olen += rxp_ptr2->f.pkt_lenm1 + 1; + *olen += rxp_ptr3->f.pkt_lenm1 + 1; + + *esp_len += rxp_ptr1->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + *esp_len += rxp_ptr2->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + *esp_len += rxp_ptr3->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + + b1->current_length = frag_size1; + b2->current_length = frag_size2; + b3->current_length = frag_size3; + b1->current_data = l2_l3_inner_hdr_size; + b2->current_data = l2_l3_inner_hdr_size; + b3->current_data = l2_l3_inner_hdr_size; + + b0->total_length_not_including_first_buffer += b1->current_length; + b0->total_length_not_including_first_buffer += b2->current_length; + b0->total_length_not_including_first_buffer += b3->current_length; + + b0->flags |= VLIB_BUFFER_NEXT_PRESENT; + b1->flags |= VLIB_BUFFER_NEXT_PRESENT; + b2->flags |= VLIB_BUFFER_NEXT_PRESENT; + + b0->next_buffer = vlib_get_buffer_index (vm, b1); + b1->next_buffer = vlib_get_buffer_index (vm, b2); + b2->next_buffer = vlib_get_buffer_index (vm, b3); + + return 4; + } + + return frag_cnt; +} + +/* + * Reassemble failure cases. Read upto 4 fragments. + * Append them to the buffer list. + * Fragments which are further segmented are not + * supported currently. + * */ +static_always_inline u8 +oct_rx_ipsec_reassembly_failure (vlib_main_t *vm, vlib_buffer_template_t *bt, + struct cpt_cn10k_parse_hdr_s *hdr, + oct_nix_rx_cqe_desc_t *d, + vlib_buffer_t **buffs, u16 *next, + u16 *buffer_next_index, u32 *olen, + u32 *esp_len, u32 l2_ol3_hdr_size, + const u64 fp_flags) +{ + oct_nix_rx_parse_t *rxp_ptr2, *rxp_ptr3; + oct_nix_rx_parse_t *rxp_ptr, *rxp_ptr1; + struct cpt_frag_info_s *frag_info; + vlib_buffer_t *b1, *b2, *b3; + u32 l2_sz1, l2_sz2, l2_sz3; + u64 *wqe_ptr2, *wqe_ptr3; + u64 *wqe_ptr, *wqe_ptr1; + u16 rlen1, rlen2, rlen3; + uint64_t *frag_ptr; + uint32_t offset; + u16 next_index = next[*buffer_next_index - 1]; + u8 frag_cnt; + + wqe_ptr = (u64 *) clib_net_to_host_u64 (hdr->wqe_ptr); + rxp_ptr = (oct_nix_rx_parse_t *) (wqe_ptr + 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr) == 1); + + frag_cnt = hdr->w0.num_frags; + + /* + * fi_offset is 8B offset from cpt_parse_hdr_s + fi_pad to frag_info_s. + * fi_offset 0 indicates 256B. + */ + offset = hdr->w2.fi_offset; + offset = (((offset - 1) & 0x1f) + 1) * 8; + frag_info = PLT_PTR_ADD (hdr, offset); + + if (frag_cnt == 2) + { + wqe_ptr1 = (u64 *) clib_net_to_host_u64 (hdr->frag1_wqe_ptr); + b1 = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + rxp_ptr1 = (oct_nix_rx_parse_t *) (wqe_ptr1 + 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr1) == 1); + rlen1 = ((*(wqe_ptr1 + 10)) >> 16) & 0xFFFF; + + oct_rx_verify_vlib (vm, b1); + b1->template = *bt; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + + l2_sz1 = rxp_ptr1->f.lcptr - rxp_ptr1->f.laptr; + b1->current_length = rlen1 + l2_sz1; + b1->current_data = 0; + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + clib_memcpy_fast (b1->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + + *olen += rxp_ptr1->f.pkt_lenm1 + 1; + *esp_len += rxp_ptr1->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + + buffs[*buffer_next_index] = b1; + next[*buffer_next_index] = next_index; + *buffer_next_index = *buffer_next_index + 1; + + return 2; + } + + if (PREDICT_FALSE (frag_cnt == 3)) + { + frag_ptr = (uint64_t *) (frag_info + 1); + + wqe_ptr1 = (u64 *) clib_net_to_host_u64 (hdr->frag1_wqe_ptr); + wqe_ptr2 = (u64 *) clib_net_to_host_u64 (*frag_ptr); + + b1 = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + b2 = (vlib_buffer_t *) ((u8 *) wqe_ptr2 - 128); + + rxp_ptr1 = (oct_nix_rx_parse_t *) (wqe_ptr1 + 1); + rxp_ptr2 = (oct_nix_rx_parse_t *) (wqe_ptr2 + 1); + + ASSERT (oct_rx_n_segs (vm, rxp_ptr1) == 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr2) == 1); + + rlen1 = ((*(wqe_ptr1 + 10)) >> 16) & 0xFFFF; + rlen2 = ((*(wqe_ptr2 + 10)) >> 16) & 0xFFFF; + + oct_rx_verify_vlib (vm, b1); + oct_rx_verify_vlib (vm, b2); + + b1->template = *bt; + b2->template = *bt; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + + l2_sz1 = rxp_ptr1->f.lcptr - rxp_ptr1->f.laptr; + l2_sz2 = rxp_ptr2->f.lcptr - rxp_ptr2->f.laptr; + + b1->current_length = rlen1 + l2_sz1; + b2->current_length = rlen2 + l2_sz2; + b1->current_data = 0; + b2->current_data = 0; + + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + { + clib_memcpy_fast (b1->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + clib_memcpy_fast (b2->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + } + + *olen += rxp_ptr1->f.pkt_lenm1 + 1; + *esp_len += rxp_ptr1->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + *olen += rxp_ptr2->f.pkt_lenm1 + 1; + *esp_len += rxp_ptr2->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + + buffs[*buffer_next_index] = b1; + buffs[*buffer_next_index + 1] = b2; + next[*buffer_next_index] = next_index; + next[*buffer_next_index + 1] = next_index; + *buffer_next_index = *buffer_next_index + 2; + + return 3; + } + + if (PREDICT_FALSE (frag_cnt == 4)) + { + frag_ptr = (uint64_t *) (frag_info + 1); + + wqe_ptr1 = (u64 *) clib_net_to_host_u64 (hdr->frag1_wqe_ptr); + wqe_ptr2 = (u64 *) clib_net_to_host_u64 (*frag_ptr); + wqe_ptr3 = (u64 *) clib_net_to_host_u64 (*(frag_ptr + 1)); + b1 = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + b2 = (vlib_buffer_t *) ((u8 *) wqe_ptr2 - 128); + b3 = (vlib_buffer_t *) ((u8 *) wqe_ptr3 - 128); + rxp_ptr1 = (oct_nix_rx_parse_t *) (wqe_ptr1 + 1); + rxp_ptr2 = (oct_nix_rx_parse_t *) (wqe_ptr2 + 1); + rxp_ptr3 = (oct_nix_rx_parse_t *) (wqe_ptr3 + 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr1) == 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr2) == 1); + ASSERT (oct_rx_n_segs (vm, rxp_ptr3) == 1); + rlen1 = ((*(wqe_ptr1 + 10)) >> 16) & 0xFFFF; + rlen2 = ((*(wqe_ptr2 + 10)) >> 16) & 0xFFFF; + rlen3 = ((*(wqe_ptr3 + 10)) >> 16) & 0xFFFF; + + oct_rx_verify_vlib (vm, b1); + oct_rx_verify_vlib (vm, b2); + oct_rx_verify_vlib (vm, b3); + + b1->template = *bt; + b2->template = *bt; + b3->template = *bt; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + l2_sz1 = rxp_ptr1->f.lcptr - rxp_ptr1->f.laptr; + l2_sz2 = rxp_ptr2->f.lcptr - rxp_ptr2->f.laptr; + l2_sz3 = rxp_ptr3->f.lcptr - rxp_ptr3->f.laptr; + + b1->current_length = rlen1 + l2_sz1; + b2->current_length = rlen2 + l2_sz2; + b3->current_length = rlen3 + l2_sz3; + b1->current_data = 0; + b2->current_data = 0; + b3->current_data = 0; + + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + { + clib_memcpy_fast (b1->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + clib_memcpy_fast (b2->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + clib_memcpy_fast (b3->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + } + + *olen += rxp_ptr1->f.pkt_lenm1 + 1; + *olen += rxp_ptr2->f.pkt_lenm1 + 1; + *olen += rxp_ptr3->f.pkt_lenm1 + 1; + *esp_len += rxp_ptr1->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + *esp_len += rxp_ptr2->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + *esp_len += rxp_ptr3->f.pkt_lenm1 + 1 - l2_ol3_hdr_size; + + buffs[*buffer_next_index] = b1; + buffs[*buffer_next_index + 1] = b2; + buffs[*buffer_next_index + 2] = b3; + next[*buffer_next_index] = next_index; + next[*buffer_next_index + 1] = next_index; + next[*buffer_next_index + 2] = next_index; + *buffer_next_index = *buffer_next_index + 3; + + return 4; + } + + return frag_cnt; +} + +static_always_inline u32 +oct_ipsec_update_itf_sw_idx (oct_ipsec_session_t *session, u32 sa_idx) +{ + clib_bihash_kv_24_16_t bkey60 = { 0 }; + clib_bihash_kv_8_16_t bkey40 = { 0 }; + ipsec_tun_lkup_result_t res; + ipsec4_tunnel_kv_t *key40; + ipsec6_tunnel_kv_t *key60; + ip_address_t *ip_addr; + ipsec_main_t *ipm; + ipsec_sa_t *sa; + i32 rv; + + sa = ipsec_sa_get (sa_idx); + ASSERT (sa); + + ipm = &ipsec_main; + ip_addr = &sa->tunnel.t_src; + + if (AF_IP4 == ip_addr->version) + { + key40 = (ipsec4_tunnel_kv_t *) &bkey40; + ipsec4_tunnel_mk_key (key40, &ip_addr->ip.ip4, + clib_host_to_net_u32 (sa->spi)); + rv = clib_bihash_search_inline_8_16 (&ipm->tun4_protect_by_key, &bkey40); + if (PREDICT_FALSE (rv)) + return ~0; + + clib_memcpy_fast (&res, &bkey40.value, sizeof (res)); + } + else + { + + key60 = (ipsec6_tunnel_kv_t *) &bkey60; + key60->key.remote_ip = ip_addr->ip.ip6; + key60->key.spi = clib_host_to_net_u32 (sa->spi); + key60->key.__pad = 0; + + rv = + clib_bihash_search_inline_24_16 (&ipm->tun6_protect_by_key, &bkey60); + if (PREDICT_FALSE (rv)) + return ~0; + + clib_memcpy_fast (&res, &bkey60.value, sizeof (res)); + } + + /* Store the ITF sw_if_index in the SA session to avoid duplicate + lookups for each packet */ + session->itf_sw_idx = res.sw_if_index; + + return res.sw_if_index; +} + +static_always_inline void +oct_rx_ipsec_update_counters (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t **buffs, u16 *next, + u16 *buffer_next_index, u32 ilen, u8 frag_cnt, + u32 idx, const u8 reass_fail, const u64 fp_flags) +{ + vlib_combined_counter_main_t *rx_counter; + ipsec_main_t *im = &ipsec_main; + oct_ipsec_session_t *session; + oct_ipsec_inb_sa_priv_data_t *inb_sa_priv; + vlib_buffer_t *b = buffs[*buffer_next_index - 1]; + u32 sa_idx, itf_sw_idx; + vnet_interface_main_t *vim; + oct_ipsec_main_t *oim = &oct_ipsec_main; + vnet_main_t *vnm; + int i; + + vnm = im->vnet_main; + vim = &vnm->interface_main; + rx_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX; + + inb_sa_priv = oct_ipsec_inb_sa_priv (idx, fp_flags); + + sa_idx = (u32) inb_sa_priv->user_data; + + vnet_buffer (b)->ipsec.sad_index = sa_idx; + ASSERT (sa_idx < vec_len (oim->inline_ipsec_sessions)); + + session = pool_elt_at_index (oim->inline_ipsec_sessions, sa_idx); + itf_sw_idx = session->itf_sw_idx; + /* + * Check if itf_sw_idx is populated already. First packet on the SA + * populates the itf_sw_idx in the SA session. + */ + if (PREDICT_FALSE (itf_sw_idx == ~0)) + itf_sw_idx = oct_ipsec_update_itf_sw_idx (session, sa_idx); + + /* Update IPsec counters with inner IP length */ + vlib_increment_combined_counter (&ipsec_sa_counters, vm->thread_index, + sa_idx, frag_cnt, ilen); + + if (PREDICT_FALSE (itf_sw_idx == ~0)) + { + b->error = node->errors[OCT_RX_NODE_CTR_ERR_NO_TUNNEL]; + next[*buffer_next_index - 1] = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + + if (reass_fail) + { + for (i = frag_cnt; i > 1; i--) + { + buffs[*buffer_next_index - frag_cnt]->error = + node->errors[OCT_RX_NODE_CTR_ERR_NO_TUNNEL]; + next[*buffer_next_index - frag_cnt] = + VNET_DEV_ETH_RX_PORT_NEXT_DROP; + } + } + } + else + /* Update ITF counters with inner IP length */ + vlib_increment_combined_counter (rx_counter, vm->thread_index, itf_sw_idx, + frag_cnt, ilen); +} + +static_always_inline u8 +oct_rx_ipsec_reassembly (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_template_t *bt, + union cpt_parse_hdr_u *cpt_hdr_u, + oct_nix_rx_cqe_desc_t *d, vlib_buffer_t *b, + vlib_buffer_t **buf, u16 *next, + u16 *buffer_next_index, u32 *olen, u32 *esp_len, + u32 l2_ol3_hdr_size, const u64 fp_flags) +{ + u8 frag_cnt = 1; + u32 idx; + + if (fp_flags & OCT_FP_FLAG_O20) + { + idx = cpt_hdr_u->s.w0.cookie; + } + else + { + struct cpt_cn10k_parse_hdr_s *cpt_hdr = + (struct cpt_cn10k_parse_hdr_s *) cpt_hdr_u; + + idx = cpt_hdr->w0.cookie; + if ((cpt_hdr->w0.num_frags) && !(cpt_hdr->w0.reas_sts)) + frag_cnt = oct_rx_ipsec_reassembly_success ( + vm, bt, cpt_hdr, d, b, olen, esp_len, l2_ol3_hdr_size); + else if (cpt_hdr->w0.reas_sts) + { + frag_cnt = oct_rx_ipsec_reassembly_failure ( + vm, bt, cpt_hdr, d, buf, next, buffer_next_index, olen, esp_len, + l2_ol3_hdr_size, fp_flags); + oct_rx_ipsec_update_counters (vm, node, buf, next, buffer_next_index, + *esp_len, frag_cnt, idx, 1, fp_flags); + return frag_cnt; + } + } + oct_rx_ipsec_update_counters (vm, node, buf, next, buffer_next_index, + *esp_len, frag_cnt, idx, 0, fp_flags); + return frag_cnt; +} + +static_always_inline u8 +oct_is_packet_from_cpt (union nix_rx_parse_u *rxp) +{ +#ifdef PLATFORM_OCTEON9 + return 0; +#else + return rxp->chan >> 11; +#endif +} + +static_always_inline uword +oct_ipsec_is_inl_op_success (union cpt_parse_hdr_u *cpt_hdr, + const u64 fp_flags) +{ + if (fp_flags & OCT_FP_FLAG_O20) + { + u8 hw_ccode = cpt_hdr->s.w3.hw_ccode; + u8 uc_ccode = cpt_hdr->s.w3.uc_ccode; + + return (((1U << hw_ccode) & CPT_COMP_HWGOOD_MASK) && + roc_ie_ow_ucc_is_success (uc_ccode)); + } + else + { + u8 hw_ccode = cpt_hdr->cn10k.w3.hw_ccode; + u8 uc_ccode = cpt_hdr->cn10k.w3.uc_ccode; + + return (((1U << hw_ccode) & CPT_COMP_HWGOOD_MASK) && + roc_ie_ot_ucc_is_success (uc_ccode)); + } +} + +static_always_inline u32 +oct_get_len_from_meta (union cpt_parse_hdr_u *cpt_hdr, u64 w0, u64 w4) +{ + u32 len; + uintptr_t ip; + ip = (uintptr_t) cpt_hdr + ((w4 >> 16) & 0xFF); + ip += ((w0 >> 40) & 0x6); + len = plt_be_to_cpu_16 (*(u16 *) ip); + len += ((w4 >> 16) & 0xFF) - (w4 & 0xFF); + len += (w0 & BIT (42)) ? 40 : 0; + + return len; +} + +static_always_inline void +oct_rx_ipsec_set_error (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t *b, union cpt_parse_hdr_u *cpt_hdr, + const u64 fp_flags) +{ + u8 uc_err; + if (fp_flags & OCT_FP_FLAG_O20) + { + uc_err = cpt_hdr->s.w3.uc_ccode; + switch (uc_err) + { + /* clang-format off */ +#define _(f, n, s, d) \ + case ROC_IE_OW_UCC_##f: \ + b->error = node->errors[OCT_RX_NODE_CTR_##f]; \ + break; + foreach_octeon_ipsec_ucc; +#undef _ + /* clang-format on */ + default: + b->error = node->errors[OCT_RX_NODE_CTR_ERR_UNDEFINED]; + } + } + else + { + uc_err = cpt_hdr->cn10k.w3.uc_ccode; + switch (uc_err) + { + /* clang-format off */ +#define _(f, n, s, d) \ + case ROC_IE_OT_UCC_##f: \ + b->error = node->errors[OCT_RX_NODE_CTR_##f]; \ + break; + foreach_octeon_ipsec_ucc; +#undef _ + /* clang-format on */ + default: + b->error = node->errors[OCT_RX_NODE_CTR_ERR_UNDEFINED]; + } + } +} + +#define OCT_SEG_LEN_SHIFT 16 +#define OCT_SEG_LEN_MASK 0xFFFF + +static_always_inline void +oct_rx_ipsec_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, + const union nix_rx_parse_u *rxp0, + vlib_buffer_template_t *bt, vlib_buffer_t *b) +{ + u32 n_words, n_words_processed, desc_sizem1; + vlib_buffer_t *last_buf, *seg_buf; + u32 n_sg_desc, n_segs, next_seg; + u32 current_desc, bi, sg_len; + vlib_buffer_t *buf = b; + struct nix_rx_sg_s *sg; + u32 total_segs = 0; + u64 seg_len; + i64 len; + + desc_sizem1 = rxp0->desc_sizem1; + if (desc_sizem1 == 0) + return; + + n_words = desc_sizem1 << 1; + n_sg_desc = (n_words / 4) + 1; + + sg = (struct nix_rx_sg_s *) (((char *) rxp0) + sizeof (*rxp0)); + /* Typecast to u64 to read each seg length swiftly */ + seg_len = *(u64 *) sg; + n_segs = sg->segs; + + /* Start with first descriptor */ + current_desc = 0; + + len = buf->current_length; + /* + * We updated length which is valid in single segment case. + * incase of multi seg, update seg1 length and advance total words processed. + * also, updates total bytes in buffer. + */ + sg_len = seg_len & OCT_SEG_LEN_MASK; + len -= sg_len; + if (len < 0) + { + sg_len = sg_len + len; + len = 0; + } + buf->current_length = sg_len; + + /* Process from 2nd segment */ + next_seg = 2; + seg_len = seg_len >> OCT_SEG_LEN_SHIFT; + n_words_processed = 2; + + buf->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + buf->total_length_not_including_first_buffer = 0; + last_buf = buf; + + while (current_desc <= n_sg_desc) + { + while (next_seg <= n_segs) + { + seg_buf = (vlib_buffer_t *) ((*(((u64 *) sg) + n_words_processed)) - + sizeof (vlib_buffer_t)); + seg_buf->template = *bt; + sg_len = seg_len & OCT_SEG_LEN_MASK; + /* + * Adjust last buf data length with negative offset for + * ipsec pkts if needed. + */ + len -= sg_len; + if (len < 0) + { + sg_len = sg_len + len; + len = 0; + } + + seg_buf->current_length = sg_len; + bi = vlib_get_buffer_index (vm, seg_buf); + + if (seg_buf->current_length == 0) + { + vlib_buffer_free_no_next (vm, &bi, 1); + total_segs++; + goto done; + } + + last_buf->flags |= VLIB_BUFFER_NEXT_PRESENT; + last_buf->next_buffer = bi; + last_buf = seg_buf; + seg_len = seg_len >> OCT_SEG_LEN_SHIFT; + buf->total_length_not_including_first_buffer += + seg_buf->current_length; + n_words_processed++; + next_seg++; + total_segs++; + } + current_desc++; + n_sg_desc--; + if (n_sg_desc) + { + struct nix_rx_sg_s *tsg; + + tsg = (struct nix_rx_sg_s *) ((u64 *) sg + n_words_processed); + seg_len = *((u64 *) (tsg)); + n_words_processed++; + /* Start over */ + n_segs = tsg->segs; + next_seg = 1; + } + } + +done: + ctx->n_segs += total_segs; +} + +static_always_inline u32 +oct_rx_inl_ipsec_vlib_from_cq ( + vlib_main_t *vm, vlib_node_runtime_t *node, oct_nix_rx_cqe_desc_t *d, + vlib_buffer_t **b, oct_rx_node_ctx_t *ctx, vlib_buffer_template_t *bt, + union cpt_parse_hdr_u *cpt_hdr, vlib_buffer_t **buffs, u32 *err_flags, + u16 *next, u16 *buffer_next_index, const u64 fp_flags) +{ + union nix_rx_parse_u *orig_rxp, *rxp; + u32 is_fail, olen, esp_sz, l2_ol3_sz; + u64 *wqe_ptr; + u32 err_flag; + u8 frag_cnt; + + rxp = &d->parse.f; + cpt_hdr = (union cpt_parse_hdr_u *) *(((u64 *) d) + 9); + wqe_ptr = (u64 *) oct_get_wqe_from_cpt_hdr (cpt_hdr, fp_flags); + + b[0] = (vlib_buffer_t *) ((u8 *) wqe_ptr - 128); + orig_rxp = (union nix_rx_parse_u *) (wqe_ptr + 1); + l2_ol3_sz = orig_rxp->leptr - orig_rxp->laptr; + olen = orig_rxp->pkt_lenm1 + 1; + esp_sz = olen - l2_ol3_sz; + b[0]->template = *bt; + b[0]->flow_id = d[0].parse.w[3] >> 48; + err_flag = ((d[0].parse.w[0] >> 20) & 0xFFF); + if (PREDICT_FALSE (err_flag)) + { + b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + *err_flags |= err_flag; + } + + is_fail = !oct_ipsec_is_inl_op_success (cpt_hdr, fp_flags); + + buffs[*buffer_next_index] = b[0]; + if (PREDICT_FALSE (is_fail)) + { + b[0]->current_length = olen; + clib_memcpy_fast (rxp, orig_rxp, sizeof (oct_nix_rx_parse_t)); + next[*buffer_next_index] = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + *buffer_next_index = *buffer_next_index + 1; + oct_rx_ipsec_set_error (vm, node, b[0], cpt_hdr, fp_flags); + frag_cnt = 1; + } + else + { + next[*buffer_next_index] = ctx->next_index; + *buffer_next_index = *buffer_next_index + 1; + b[0]->current_length = + oct_get_len_from_meta (cpt_hdr, d[0].parse.w[0], d[0].parse.w[4]); + + frag_cnt = oct_rx_ipsec_reassembly (vm, node, bt, cpt_hdr, &d[0], b[0], + buffs, next, buffer_next_index, + &olen, &esp_sz, l2_ol3_sz, fp_flags); + } + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp, bt, b[0]); + ctx->n_rx_bytes += olen; + ctx->n_segs += frag_cnt; + + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + clib_memcpy_fast (b[0]->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + + return 0; +} + static_always_inline u32 -oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, - vnet_dev_rx_queue_t *rxq, u32 n) +oct_rx_vlib_from_cq (vlib_main_t *vm, oct_nix_rx_cqe_desc_t *d, + vlib_buffer_t **b, oct_rx_node_ctx_t *ctx, + vlib_buffer_template_t *bt, vlib_buffer_t **buffs, + u32 *err_flags, u16 *next, u16 *buffer_next_index, + const u64 fp_flags) +{ + u32 err_flag; + + b[0] = (vlib_buffer_t *) d->segs0[0] - 1; + b[0]->template = *bt; + ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size; + b[0]->flow_id = d[0].parse.w[3] >> 48; + err_flag = ((d[0].parse.w[0] >> 20) & 0xFFF); + if (PREDICT_FALSE (err_flag)) + { + b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + *err_flags |= err_flag; + } + + ctx->n_segs += 1; + if (d[0].sg0.segs > 1) + oct_rx_attach_tail (vm, ctx, bt, b[0], d + 0); + buffs[*buffer_next_index] = b[0]; + next[*buffer_next_index] = ctx->next_index; + *buffer_next_index = *buffer_next_index + 1; + + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + clib_memcpy_fast (b[0]->pre_data, d, sizeof (oct_nix_rx_cqe_desc_t)); + + return 0; +} + +#define OCT_PUSH_META_TO_FREE(_metabuf, _laddr, _loff_p) \ + do \ + { \ + *(u64 *) ((_laddr) + (*(_loff_p) << 3)) = (u64) _metabuf; \ + *(_loff_p) = *(_loff_p) + 1; \ + } \ + while (0) + +#define LMT_OFF(lmt_addr, lmt_num, offset) \ + (void *) ((uintptr_t) (lmt_addr) + \ + ((u64) (lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset)) + +static_always_inline void +oct_rx_flush_meta_burst (u16 lmt_id, u64 data, u16 lnum, uintptr_t aura_handle) +{ + u64 pa; + + /* Prepare PA and Data */ + pa = roc_npa_aura_handle_to_base (aura_handle) + NPA_LF_AURA_BATCH_FREE0; + pa |= ((data & 0x7) << 4); + + data >>= 3; + data <<= 19; + data |= (u64) lmt_id; + data |= (u64) (lnum - 1) << 12; + + roc_lmt_submit_steorl (data, pa); +} + +static_always_inline u32 +oct_rx_batch (vlib_main_t *vm, vlib_node_runtime_t *node, + oct_rx_node_ctx_t *ctx, vnet_dev_rx_queue_t *rxq, u32 n, + vlib_buffer_t **buffers, const u64 fp_flags) { oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); vlib_buffer_template_t bt = rxq->buffer_template; - u32 n_left; + u32 b0_err_flags = 0, b1_err_flags = 0; + u32 b2_err_flags = 0, b3_err_flags = 0; + u32 n_left, err_flags = 0, err_flags_x4 = 0; oct_nix_rx_cqe_desc_t *d = ctx->next_desc; + union cpt_parse_hdr_u *cpt_hdr0, *cpt_hdr1; + union cpt_parse_hdr_u *cpt_hdr2, *cpt_hdr3; + union nix_rx_parse_u *rxp0, *rxp1; + union nix_rx_parse_u *rxp2, *rxp3; + union nix_rx_parse_u *orig_rxp0, *orig_rxp1; + union nix_rx_parse_u *orig_rxp2, *orig_rxp3; + u8 is_b0_from_cpt, is_b1_from_cpt; + u8 is_b2_from_cpt, is_b3_from_cpt; + u64 *wqe_ptr0, *wqe_ptr1; + u64 *wqe_ptr2, *wqe_ptr3; + u32 is_fail0, is_fail1, is_fail2, is_fail3; + u32 olen0, olen1, olen2, olen3; + u32 esp_sz0, esp_sz1, esp_sz2, esp_sz3; + u32 l2_ol3_sz0, l2_ol3_sz1, l2_ol3_sz2, l2_ol3_sz3; vlib_buffer_t *b[4]; + vlib_buffer_t **buffs = buffers + ctx->buffer_start_index; + u16 *next = ctx->next + ctx->buffer_start_index; + u8 frag_cnt0 = 1, frag_cnt1 = 1; + u8 frag_cnt2 = 1, frag_cnt3 = 1; + u8 n_from_cpt, n_cpt_err; + u64 meta_aura_handle; + u64 lbase = crq->lmt_base_addr; + u8 loff = 0, lnum = 0, shft = 0; + u16 lmt_id, buffer_next_index = 0; + u16 cqe_desc_bytes = sizeof (oct_nix_rx_cqe_desc_t); + u64 laddr; + + meta_aura_handle = crq->rq.meta_aura_handle; + ROC_LMT_BASE_ID_GET (lbase, lmt_id); + laddr = lbase; + laddr += 8; - for (n_left = n; n_left >= 8; d += 4, n_left -= 4, ctx->to_next += 4) + bt.flags |= + (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); + + for (n_left = n; n_left >= 8; d += 4, n_left -= 4) { u32 segs = 0; clib_prefetch_store (oct_seg_to_bp (d[4].segs0[0])); @@ -119,47 +1067,526 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, clib_prefetch_store (oct_seg_to_bp (d[7].segs0[0])); b[2] = oct_seg_to_bp (d[2].segs0[0]); b[3] = oct_seg_to_bp (d[3].segs0[0]); - ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]); - ctx->to_next[1] = vlib_get_buffer_index (vm, b[1]); - ctx->to_next[2] = vlib_get_buffer_index (vm, b[2]); - ctx->to_next[3] = vlib_get_buffer_index (vm, b[3]); - b[0]->template = bt; - b[1]->template = bt; - b[2]->template = bt; - b[3]->template = bt; - ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size; - ctx->n_rx_bytes += b[1]->current_length = d[1].sg0.seg1_size; - ctx->n_rx_bytes += b[2]->current_length = d[2].sg0.seg1_size; - ctx->n_rx_bytes += b[3]->current_length = d[3].sg0.seg1_size; - ctx->n_segs += 4; - segs = d[0].sg0.segs + d[1].sg0.segs + d[2].sg0.segs + d[3].sg0.segs; - - if (PREDICT_FALSE (segs > 4)) + + rxp0 = &d[0].parse.f; + rxp1 = &d[1].parse.f; + rxp2 = &d[2].parse.f; + rxp3 = &d[3].parse.f; + + is_b0_from_cpt = oct_is_packet_from_cpt (&d[0].parse.f); + is_b1_from_cpt = oct_is_packet_from_cpt (&d[1].parse.f); + is_b2_from_cpt = oct_is_packet_from_cpt (&d[2].parse.f); + is_b3_from_cpt = oct_is_packet_from_cpt (&d[3].parse.f); + + n_from_cpt = + is_b0_from_cpt + is_b1_from_cpt + is_b2_from_cpt + is_b3_from_cpt; + if (n_from_cpt == 0) { - oct_rx_attach_tail (vm, ctx, b[0], d + 0); - oct_rx_attach_tail (vm, ctx, b[1], d + 1); - oct_rx_attach_tail (vm, ctx, b[2], d + 2); - oct_rx_attach_tail (vm, ctx, b[3], d + 3); + next[buffer_next_index + 0] = ctx->next_index; + next[buffer_next_index + 1] = ctx->next_index; + next[buffer_next_index + 2] = ctx->next_index; + next[buffer_next_index + 3] = ctx->next_index; + + b[0]->template = bt; + b[1]->template = bt; + b[2]->template = bt; + b[3]->template = bt; + + ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size; + ctx->n_rx_bytes += b[1]->current_length = d[1].sg0.seg1_size; + ctx->n_rx_bytes += b[2]->current_length = d[2].sg0.seg1_size; + ctx->n_rx_bytes += b[3]->current_length = d[3].sg0.seg1_size; + + b[0]->flow_id = d[0].parse.w[3] >> 48; + b[1]->flow_id = d[1].parse.w[3] >> 48; + b[2]->flow_id = d[2].parse.w[3] >> 48; + b[3]->flow_id = d[3].parse.w[3] >> 48; + + b0_err_flags = (d[0].parse.w[0] >> 20) & 0xFFF; + b1_err_flags = (d[1].parse.w[0] >> 20) & 0xFFF; + b2_err_flags = (d[2].parse.w[0] >> 20) & 0xFFF; + b3_err_flags = (d[3].parse.w[0] >> 20) & 0xFFF; + + err_flags_x4 = + b0_err_flags | b1_err_flags | b2_err_flags | b3_err_flags; + + if (PREDICT_FALSE (err_flags_x4)) + { + err_flags |= err_flags_x4; + + if (b0_err_flags) + b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (b1_err_flags) + b[1]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (b2_err_flags) + b[2]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (b3_err_flags) + b[3]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + } + + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + { + clib_memcpy_fast (b[0]->pre_data, &d[0], cqe_desc_bytes); + clib_memcpy_fast (b[1]->pre_data, &d[1], cqe_desc_bytes); + clib_memcpy_fast (b[2]->pre_data, &d[2], cqe_desc_bytes); + clib_memcpy_fast (b[3]->pre_data, &d[3], cqe_desc_bytes); + } + + ctx->n_segs += 4; + segs = d[0].sg0.segs + d[1].sg0.segs + d[2].sg0.segs + d[3].sg0.segs; + + if (PREDICT_FALSE (segs > 4)) + { + oct_rx_attach_tail (vm, ctx, &bt, b[0], d + 0); + oct_rx_attach_tail (vm, ctx, &bt, b[1], d + 1); + oct_rx_attach_tail (vm, ctx, &bt, b[2], d + 2); + oct_rx_attach_tail (vm, ctx, &bt, b[3], d + 3); + } + + buffs[buffer_next_index + 0] = b[0]; + buffs[buffer_next_index + 1] = b[1]; + buffs[buffer_next_index + 2] = b[2]; + buffs[buffer_next_index + 3] = b[3]; + + buffer_next_index += 4; } + else if (n_from_cpt == 4) + { + /* All packets are from cpt */ + cpt_hdr0 = (union cpt_parse_hdr_u *) *(((u64 *) &d[0]) + 9); + cpt_hdr1 = (union cpt_parse_hdr_u *) *(((u64 *) &d[1]) + 9); + cpt_hdr2 = (union cpt_parse_hdr_u *) *(((u64 *) &d[2]) + 9); + cpt_hdr3 = (union cpt_parse_hdr_u *) *(((u64 *) &d[3]) + 9); + + wqe_ptr0 = (u64 *) oct_get_wqe_from_cpt_hdr (cpt_hdr0, fp_flags); + wqe_ptr1 = (u64 *) oct_get_wqe_from_cpt_hdr (cpt_hdr1, fp_flags); + wqe_ptr2 = (u64 *) oct_get_wqe_from_cpt_hdr (cpt_hdr2, fp_flags); + wqe_ptr3 = (u64 *) oct_get_wqe_from_cpt_hdr (cpt_hdr3, fp_flags); + + b[0] = (vlib_buffer_t *) ((u8 *) wqe_ptr0 - 128); + b[1] = (vlib_buffer_t *) ((u8 *) wqe_ptr1 - 128); + b[2] = (vlib_buffer_t *) ((u8 *) wqe_ptr2 - 128); + b[3] = (vlib_buffer_t *) ((u8 *) wqe_ptr3 - 128); + + orig_rxp0 = (union nix_rx_parse_u *) (wqe_ptr0 + 1); + orig_rxp1 = (union nix_rx_parse_u *) (wqe_ptr1 + 1); + orig_rxp2 = (union nix_rx_parse_u *) (wqe_ptr2 + 1); + orig_rxp3 = (union nix_rx_parse_u *) (wqe_ptr3 + 1); + + l2_ol3_sz0 = orig_rxp0->leptr - orig_rxp0->laptr; + l2_ol3_sz1 = orig_rxp1->leptr - orig_rxp1->laptr; + l2_ol3_sz2 = orig_rxp2->leptr - orig_rxp2->laptr; + l2_ol3_sz3 = orig_rxp3->leptr - orig_rxp3->laptr; + + olen0 = orig_rxp0->pkt_lenm1 + 1; + olen1 = orig_rxp1->pkt_lenm1 + 1; + olen2 = orig_rxp2->pkt_lenm1 + 1; + olen3 = orig_rxp3->pkt_lenm1 + 1; + + esp_sz0 = olen0 - l2_ol3_sz0; + esp_sz1 = olen1 - l2_ol3_sz1; + esp_sz2 = olen2 - l2_ol3_sz2; + esp_sz3 = olen3 - l2_ol3_sz3; + + b[0]->template = bt; + b[1]->template = bt; + b[2]->template = bt; + b[3]->template = bt; + + is_fail0 = !oct_ipsec_is_inl_op_success (cpt_hdr0, fp_flags); + is_fail1 = !oct_ipsec_is_inl_op_success (cpt_hdr1, fp_flags); + is_fail2 = !oct_ipsec_is_inl_op_success (cpt_hdr2, fp_flags); + is_fail3 = !oct_ipsec_is_inl_op_success (cpt_hdr3, fp_flags); + n_cpt_err = is_fail0 + is_fail1 + is_fail2 + is_fail3; + + if (PREDICT_TRUE (!n_cpt_err)) + { + b[0]->current_length = oct_get_len_from_meta ( + cpt_hdr0, d[0].parse.w[0], d[0].parse.w[4]); + b[1]->current_length = oct_get_len_from_meta ( + cpt_hdr1, d[1].parse.w[0], d[1].parse.w[4]); + b[2]->current_length = oct_get_len_from_meta ( + cpt_hdr2, d[2].parse.w[0], d[2].parse.w[4]); + b[3]->current_length = oct_get_len_from_meta ( + cpt_hdr3, d[3].parse.w[0], d[3].parse.w[4]); + + next[buffer_next_index] = ctx->next_index; + buffs[buffer_next_index] = b[0]; + buffer_next_index += 1; + frag_cnt0 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr0, &d[0], b[0], buffs, next, + &buffer_next_index, &olen0, &esp_sz0, l2_ol3_sz0, fp_flags); + + buffs[buffer_next_index] = b[1]; + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + frag_cnt1 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr1, &d[1], b[1], buffs, next, + &buffer_next_index, &olen1, &esp_sz1, l2_ol3_sz1, fp_flags); + + buffs[buffer_next_index] = b[2]; + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + frag_cnt2 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr2, &d[2], b[2], buffs, next, + &buffer_next_index, &olen2, &esp_sz2, l2_ol3_sz2, fp_flags); + + buffs[buffer_next_index] = b[3]; + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + frag_cnt3 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr3, &d[3], b[3], buffs, next, + &buffer_next_index, &olen3, &esp_sz3, l2_ol3_sz3, fp_flags); + + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp0, &bt, b[0]); + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp1, &bt, b[1]); + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp2, &bt, b[2]); + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp3, &bt, b[3]); + } + else + { + buffs[buffer_next_index] = b[0]; + + if (is_fail0) + { + b[0]->current_length = olen0; + clib_memcpy_fast (rxp0, orig_rxp0, + sizeof (oct_nix_rx_parse_t)); + next[buffer_next_index] = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + buffer_next_index += 1; + oct_rx_ipsec_set_error (vm, node, b[0], cpt_hdr0, fp_flags); + frag_cnt0 = 1; + } + else + { + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + b[0]->current_length = oct_get_len_from_meta ( + cpt_hdr0, d[0].parse.w[0], d[0].parse.w[4]); + + frag_cnt0 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr0, &d[0], b[0], buffs, next, + &buffer_next_index, &olen0, &esp_sz0, l2_ol3_sz0, + fp_flags); + } + /* Success and Failure both cases can be multi seg */ + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp0, &bt, b[0]); + + buffs[buffer_next_index] = b[1]; + + if (is_fail1) + { + b[1]->current_length = olen1; + clib_memcpy_fast (rxp1, orig_rxp1, + sizeof (oct_nix_rx_parse_t)); + next[buffer_next_index] = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + buffer_next_index += 1; + oct_rx_ipsec_set_error (vm, node, b[1], cpt_hdr1, fp_flags); + frag_cnt1 = 1; + } + else + { + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + b[1]->current_length = oct_get_len_from_meta ( + cpt_hdr1, d[1].parse.w[0], d[1].parse.w[4]); + frag_cnt1 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr1, &d[1], b[1], buffs, next, + &buffer_next_index, &olen1, &esp_sz1, l2_ol3_sz1, + fp_flags); + } + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp1, &bt, b[1]); + + buffs[buffer_next_index] = b[2]; + + if (is_fail2) + { + b[2]->current_length = olen2; + clib_memcpy_fast (rxp2, orig_rxp2, + sizeof (oct_nix_rx_parse_t)); + next[buffer_next_index] = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + buffer_next_index += 1; + oct_rx_ipsec_set_error (vm, node, b[2], cpt_hdr2, fp_flags); + frag_cnt2 = 1; + } + else + { + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + b[2]->current_length = oct_get_len_from_meta ( + cpt_hdr2, d[2].parse.w[0], d[2].parse.w[4]); + frag_cnt2 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr2, &d[2], b[2], buffs, next, + &buffer_next_index, &olen2, &esp_sz2, l2_ol3_sz2, + fp_flags); + } + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp2, &bt, b[2]); + + buffs[buffer_next_index] = b[3]; + + if (is_fail3) + { + b[3]->current_length = olen3; + clib_memcpy_fast (rxp3, orig_rxp3, + sizeof (oct_nix_rx_parse_t)); + next[buffer_next_index] = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + buffer_next_index += 1; + oct_rx_ipsec_set_error (vm, node, b[3], cpt_hdr3, fp_flags); + frag_cnt3 = 1; + } + else + { + next[buffer_next_index] = ctx->next_index; + buffer_next_index += 1; + b[3]->current_length = oct_get_len_from_meta ( + cpt_hdr3, d[3].parse.w[0], d[3].parse.w[4]); + + frag_cnt3 = oct_rx_ipsec_reassembly ( + vm, node, &bt, cpt_hdr3, &d[3], b[3], buffs, next, + &buffer_next_index, &olen3, &esp_sz3, l2_ol3_sz3, + fp_flags); + } + oct_rx_ipsec_attach_tail (vm, ctx, orig_rxp3, &bt, b[3]); + } + ctx->n_rx_bytes += olen0 + olen1 + olen2 + olen3; + ctx->n_segs += frag_cnt0 + frag_cnt1 + frag_cnt2 + frag_cnt3; + + b[0]->flow_id = d[0].parse.w[3] >> 48; + b[1]->flow_id = d[1].parse.w[3] >> 48; + b[2]->flow_id = d[2].parse.w[3] >> 48; + b[3]->flow_id = d[3].parse.w[3] >> 48; + + b0_err_flags = (d[0].parse.w[0] >> 20) & 0xFFF; + b1_err_flags = (d[1].parse.w[0] >> 20) & 0xFFF; + b2_err_flags = (d[2].parse.w[0] >> 20) & 0xFFF; + b3_err_flags = (d[3].parse.w[0] >> 20) & 0xFFF; + + err_flags_x4 = + b0_err_flags | b1_err_flags | b2_err_flags | b3_err_flags; + + if (PREDICT_FALSE (err_flags_x4)) + { + err_flags |= err_flags_x4; + + if (b0_err_flags) + b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (b1_err_flags) + b[1]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (b2_err_flags) + b[2]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (b3_err_flags) + b[3]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + } + + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr0, laddr, &loff); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr1, laddr, &loff); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr2, laddr, &loff); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr3, laddr, &loff); + + if (fp_flags & OCT_FP_FLAG_TRACE_EN) + { + clib_memcpy_fast (b[0]->pre_data, &d[0], cqe_desc_bytes); + clib_memcpy_fast (b[1]->pre_data, &d[1], cqe_desc_bytes); + clib_memcpy_fast (b[2]->pre_data, &d[2], cqe_desc_bytes); + clib_memcpy_fast (b[3]->pre_data, &d[3], cqe_desc_bytes); + } + } + else + { + /* CQ ring contains mix of packets from wire and CPT */ + if (is_b0_from_cpt) + { + cpt_hdr0 = (union cpt_parse_hdr_u *) *(((u64 *) &d[0]) + 9); + oct_rx_inl_ipsec_vlib_from_cq (vm, node, &d[0], &b[0], ctx, &bt, + cpt_hdr0, buffs, &err_flags, next, + &buffer_next_index, fp_flags); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr0, laddr, &loff); + } + else + oct_rx_vlib_from_cq (vm, &d[0], &b[0], ctx, &bt, buffs, &err_flags, + next, &buffer_next_index, fp_flags); + + if (is_b1_from_cpt) + { + cpt_hdr1 = (union cpt_parse_hdr_u *) *(((u64 *) &d[1]) + 9); + oct_rx_inl_ipsec_vlib_from_cq (vm, node, &d[1], &b[1], ctx, &bt, + cpt_hdr1, buffs, &err_flags, next, + &buffer_next_index, fp_flags); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr1, laddr, &loff); + } + else + oct_rx_vlib_from_cq (vm, &d[1], &b[1], ctx, &bt, buffs, &err_flags, + next, &buffer_next_index, fp_flags); + if (is_b2_from_cpt) + { + cpt_hdr2 = (union cpt_parse_hdr_u *) *(((u64 *) &d[2]) + 9); + oct_rx_inl_ipsec_vlib_from_cq (vm, node, &d[2], &b[2], ctx, &bt, + cpt_hdr2, buffs, &err_flags, next, + &buffer_next_index, fp_flags); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr2, laddr, &loff); + } + else + oct_rx_vlib_from_cq (vm, &d[2], &b[2], ctx, &bt, buffs, &err_flags, + next, &buffer_next_index, fp_flags); + if (is_b3_from_cpt) + { + cpt_hdr3 = (union cpt_parse_hdr_u *) *(((u64 *) &d[3]) + 9); + oct_rx_inl_ipsec_vlib_from_cq (vm, node, &d[3], &b[3], ctx, &bt, + cpt_hdr3, buffs, &err_flags, next, + &buffer_next_index, fp_flags); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr3, laddr, &loff); + } + else + oct_rx_vlib_from_cq (vm, &d[3], &b[3], ctx, &bt, buffs, &err_flags, + next, &buffer_next_index, fp_flags); + } + /* Check if lmtline border is crossed and adjust lnum */ + if (loff > 15) + { + /* Update aura handle */ + *(u64 *) (laddr - 8) = + (((u64) (15 & 0x1) << 32) | + roc_npa_aura_handle_to_aura (meta_aura_handle)); + loff = loff - 15; + shft += 3; + + lnum++; + laddr = (uintptr_t) LMT_OFF (lbase, lnum, 8); + /* Pick the pointer from 16th index and put it + * at end of this new line. + */ + *(u64 *) (laddr + (loff << 3) - 8) = *(u64 *) (laddr - 8); + } + + /* Flush it when we are in 16th line and might + * overflow it + */ + if (lnum >= 15 && loff >= 12) + { + /* 16 LMT Line size m1 */ + u64 data = BIT_ULL (48) - 1; + + /* Update aura handle */ + *(u64 *) (laddr - 8) = + (((u64) (loff & 0x1) << 32) | + roc_npa_aura_handle_to_aura (meta_aura_handle)); + + data = (data & ~(0x7UL << shft)) | (((u64) loff >> 1) << shft); + + /* Send up to 16 lmt lines of pointers */ + oct_rx_flush_meta_burst (lmt_id, data, lnum + 1, meta_aura_handle); + plt_wmb (); + lnum = 0; + loff = 0; + shft = 0; + /* First pointer starts at 8B offset */ + laddr = (uintptr_t) LMT_OFF (lbase, lnum, 8); + } + } + + if (loff) + { + /* 16 LMT Line size m1 */ + u64 data = BIT_ULL (48) - 1; + + /* Update aura handle */ + *(u64 *) (laddr - 8) = (((u64) (loff & 0x1) << 32) | + roc_npa_aura_handle_to_aura (meta_aura_handle)); + + data = (data & ~(0x7UL << shft)) | (((u64) loff >> 1) << shft); + + /* Send up to 16 lmt lines of pointers */ + oct_rx_flush_meta_burst (lmt_id, data, lnum + 1, meta_aura_handle); + plt_wmb (); + lnum = 0; + loff = 0; + shft = 0; + /* First pointer starts at 8B offset */ + laddr = (uintptr_t) LMT_OFF (lbase, lnum, 8); } - for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1) + for (; n_left; d += 1, n_left -= 1) + { + is_b0_from_cpt = oct_is_packet_from_cpt (&d[0].parse.f); + if (is_b0_from_cpt) + { + cpt_hdr0 = (union cpt_parse_hdr_u *) *(((u64 *) &d[0]) + 9); + oct_rx_inl_ipsec_vlib_from_cq (vm, node, &d[0], &b[0], ctx, &bt, + cpt_hdr0, buffs, &err_flags, next, + &buffer_next_index, fp_flags); + OCT_PUSH_META_TO_FREE ((u64) cpt_hdr0, laddr, &loff); + } + else + + oct_rx_vlib_from_cq (vm, &d[0], &b[0], ctx, &bt, buffs, &err_flags, + next, &buffer_next_index, fp_flags); + } + if (loff) { - b[0] = (vlib_buffer_t *) d->segs0[0] - 1; - ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]); - b[0]->template = bt; - ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size; - ctx->n_segs += 1; - if (d[0].sg0.segs > 1) - oct_rx_attach_tail (vm, ctx, b[0], d + 0); + /* 16 LMT Line size m1 */ + u64 data = BIT_ULL (48) - 1; + + /* Update aura handle */ + *(u64 *) (laddr - 8) = (((u64) (loff & 0x1) << 32) | + roc_npa_aura_handle_to_aura (meta_aura_handle)); + + data = (data & ~(0x7UL << shft)) | (((u64) loff >> 1) << shft); + + /* Send up to 16 lmt lines of pointers */ + oct_rx_flush_meta_burst (lmt_id, data, lnum + 1, meta_aura_handle); + plt_wmb (); } plt_write64 ((crq->cq.wdata | n), crq->cq.door); - ctx->n_rx_pkts += n; - ctx->n_left_to_next -= n; - return n; + ctx->n_rx_pkts += buffer_next_index; + ctx->n_left_to_next -= buffer_next_index; + ctx->buffer_start_index += buffer_next_index; + if (err_flags) + ctx->parse_w0_or = (err_flags << 20); + + return buffer_next_index; } +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) +{ + u32 n_alloc, n_free; + u32 buffer_indices[n_refill]; + vlib_buffer_t *buffers[n_refill]; + u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq); + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + u64 aura = roc_npa_aura_handle_to_aura (crq->aura_handle); + const u64 addr = + roc_npa_aura_handle_to_base (crq->aura_handle) + NPA_LF_AURA_OP_FREE0; + + if (n_refill < 256) + return 0; + + n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_refill); + if (PREDICT_FALSE (n_alloc < n_refill)) + goto alloc_fail; + + vlib_get_buffers (vm, buffer_indices, (vlib_buffer_t **) buffers, n_alloc); + + for (n_free = 0; n_free < n_alloc; n_free++) + roc_store_pair ((u64) buffers[n_free], aura, addr); + + return n_alloc; + +alloc_fail: + vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi); + return 0; +} +#else static_always_inline void oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr, oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi, @@ -212,7 +1639,8 @@ oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) while (n_lines >= batch_max_lines) { - n_alloc = vlib_buffer_alloc (vm, buffer_indices, batch_max_bufs); + n_alloc = + vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_max_bufs, bpi); if (PREDICT_FALSE (n_alloc < batch_max_bufs)) goto alloc_fail; oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0, @@ -254,10 +1682,12 @@ oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) return n_enq; } +#endif static_always_inline void oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, - oct_rx_node_ctx_t *ctx, oct_nix_rx_cqe_desc_t *d, u32 n_desc) + oct_rx_node_ctx_t *ctx, oct_nix_rx_cqe_desc_t *d, u32 n_desc, + vlib_buffer_t **buffs) { u32 i = 0; if (PREDICT_TRUE (ctx->trace_count == 0)) @@ -265,7 +1695,7 @@ oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, while (ctx->n_traced < ctx->trace_count && i < n_desc) { - vlib_buffer_t *b = (vlib_buffer_t *) d[i].segs0[0] - 1; + vlib_buffer_t *b = *buffs; if (PREDICT_TRUE (vlib_trace_buffer (vm, node, ctx->next_index, b, /* follow_chain */ 0))) @@ -273,30 +1703,62 @@ oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, oct_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr)); tr->next_index = ctx->next_index; tr->sw_if_index = ctx->sw_if_index; - tr->desc = d[i]; + tr->desc = *(oct_nix_rx_cqe_desc_t *) b->pre_data; ctx->n_traced++; } i++; + buffs++; + } +} + +static_always_inline void +oct_rx_enq_to_next (vlib_main_t *vm, vlib_node_runtime_t *node, + oct_rx_node_ctx_t *ctx, u8 *is_single_next) +{ +#ifdef PLATFORM_OCTEON9 + vlib_buffer_enqueue_to_single_next (vm, node, ctx->to_next, ctx->next_index, + ctx->n_rx_pkts); +#else + u32 i; + + for (i = 0; i < ctx->n_rx_pkts; i++) + { + if (ctx->next[i] == VNET_DEV_ETH_RX_PORT_NEXT_DROP) + { + *is_single_next = 0; + break; + } } + vlib_buffer_enqueue_to_next (vm, node, ctx->to_next, ctx->next, + ctx->n_rx_pkts); +#endif } static_always_inline uword oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, vnet_dev_port_t *port, - vnet_dev_rx_queue_t *rxq, int with_flows) + vnet_dev_rx_queue_t *rxq, int with_flows, const u64 flags) { vnet_main_t *vnm = vnet_get_main (); u32 thr_idx = vlib_get_thread_index (); oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); - u32 n_desc, head, n, n_enq; + u32 n_desc, head, n, n_enq, n_processed = 0; u32 cq_size = crq->cq.nb_desc; u32 cq_mask = crq->cq.qmask; oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base; oct_nix_lf_cq_op_status_t status; + u32 to_next[OCT_FRAME_SIZE]; + u16 next[OCT_FRAME_SIZE]; + vlib_buffer_t *buffs[OCT_FRAME_SIZE]; + u8 is_single_next = 1; oct_rx_node_ctx_t _ctx = { .next_index = rxq->next_index, .sw_if_index = port->intf.sw_if_index, .hw_if_index = port->intf.hw_if_index, + .to_next = to_next, + .next = next, + .n_left_to_next = OCT_FRAME_SIZE, + .buffer_start_index = 0, }, *ctx = &_ctx; /* get head and tail from NIX_LF_CQ_OP_STATUS */ @@ -308,21 +1770,23 @@ oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, n_desc = (status.tail - head) & cq_mask; if (n_desc == 0) - return 0; - - vlib_get_new_next_frame (vm, node, ctx->next_index, ctx->to_next, - ctx->n_left_to_next); + goto refill; ctx->trace_count = vlib_get_trace_count (vm, node); while (1) { ctx->next_desc = descs + head; - n = clib_min (cq_size - head, clib_min (n_desc, ctx->n_left_to_next)); - n = oct_rx_batch (vm, ctx, rxq, n); - oct_rx_trace (vm, node, ctx, descs + head, n); + n = + clib_min (cq_size - head, clib_min (n_desc, ctx->n_left_to_next / 4)); + + if (PREDICT_TRUE (ctx->trace_count == 0)) + n_processed += oct_rx_batch (vm, node, ctx, rxq, n, buffs, flags); + else + n_processed += oct_rx_batch (vm, node, ctx, rxq, n, buffs, + OCT_FP_FLAG_TRACE_EN | flags); - if (ctx->n_left_to_next == 0) + if (n_processed >= 256) break; status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status); @@ -335,10 +1799,20 @@ oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, break; } + vlib_get_buffer_indices_with_offset (vm, (void **) buffs, ctx->to_next, + n_processed, 0); + + ctx->next = next; + + oct_rx_trace (vm, node, ctx, descs + head, n_processed, buffs); + + oct_rx_enq_to_next (vm, node, ctx, &is_single_next); + if (ctx->n_traced) vlib_set_trace_count (vm, node, ctx->trace_count - ctx->n_traced); - if (PREDICT_TRUE (ctx->next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT)) + if (PREDICT_TRUE (is_single_next && + ctx->next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT)) { vlib_next_frame_t *nf; vlib_frame_t *f; @@ -358,16 +1832,15 @@ oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_no_append (f); } - vlib_put_next_frame (vm, node, ctx->next_index, ctx->n_left_to_next); + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + thr_idx, ctx->hw_if_index, ctx->n_rx_pkts, ctx->n_rx_bytes); +refill: n_enq = crq->n_enq - ctx->n_segs; n_enq += oct_rxq_refill (vm, rxq, rxq->size - n_enq); crq->n_enq = n_enq; - vlib_increment_combined_counter ( - vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - thr_idx, ctx->hw_if_index, ctx->n_rx_pkts, ctx->n_rx_bytes); - return ctx->n_rx_pkts; } @@ -378,7 +1851,29 @@ VNET_DEV_NODE_FN (oct_rx_node) foreach_vnet_dev_rx_queue_runtime (rxq, node) { vnet_dev_port_t *port = rxq->port; - n_rx += oct_rx_node_inline (vm, node, frame, port, rxq, 0); + + if (!rxq->started) + continue; + + n_rx += oct_rx_node_inline (vm, node, frame, port, rxq, 0, 0); + } + + return n_rx; +} + +VNET_DEV_NODE_FN (oct_o20_rx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_rx = 0; + foreach_vnet_dev_rx_queue_runtime (rxq, node) + { + vnet_dev_port_t *port = rxq->port; + + if (!rxq->started) + continue; + + n_rx += + oct_rx_node_inline (vm, node, frame, port, rxq, 0, OCT_FP_FLAG_O20); } return n_rx; diff --git a/src/plugins/dev_octeon/test/configs/pcie.ini b/src/plugins/dev_octeon/test/configs/pcie.ini new file mode 100644 index 0000000000..ab109a1720 --- /dev/null +++ b/src/plugins/dev_octeon/test/configs/pcie.ini @@ -0,0 +1 @@ +[default] diff --git a/src/plugins/dev_octeon/test/configs/unittest.ini b/src/plugins/dev_octeon/test/configs/unittest.ini new file mode 100644 index 0000000000..0ce577547a --- /dev/null +++ b/src/plugins/dev_octeon/test/configs/unittest.ini @@ -0,0 +1,45 @@ +[default] +lbk1_ip: 192.168.1.1 +lbk2_ip: 192.168.1.2 +lbk3_ip: 172.66.4.3 +lbk4_ip: 12.0.0.2 +lbk1_intf_name: eth0 +lbk4_intf_name: eth1 +dut_instance: dev_octeon +igw_instance: native_crypto +aes_gcm_128_crypto_key: 4339314b55523947594d6d3547666b45 + +[ipsec] +DEF_IPSEC_TNL_LOCAL_IP4: 192.168.1.1 +DEF_IPSEC_TNL_REMOTE_IP4: 192.168.1.2 +DEF_IPSEC_OUTBOUND_AES_GCM_128_CRYPTO_KEY: C91KUR9GYMm5GfkE +DEF_IPSEC_INBOUND_AES_GCM_128_CRYPTO_KEY: C91KUR9GYMm5GfkE +DEF_IPSEC_OUTBOUND_AES_GCM_192_CRYPTO_KEY: abcdefghijklmnopabcdefgh +DEF_IPSEC_INBOUND_AES_GCM_192_CRYPTO_KEY: abcdefghijklmnopabcdefgh +DEF_IPSEC_OUTBOUND_AES_GCM_256_CRYPTO_KEY: abcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_INBOUND_AES_GCM_256_CRYPTO_KEY: abcdefghijklmnopabcdabcdefghijkl + +DEF_IPSEC_OUTBOUND_AES_CTR_128_CRYPTO_KEY: abcdefghijklmnop +DEF_IPSEC_INBOUND_AES_CTR_128_CRYPTO_KEY: abcdefghijklmnop +DEF_IPSEC_OUTBOUND_AES_CTR_192_CRYPTO_KEY: abcdefghijklmnopabcdefgh +DEF_IPSEC_INBOUND_AES_CTR_192_CRYPTO_KEY: abcdefghijklmnopabcdefgh +DEF_IPSEC_OUTBOUND_AES_CTR_256_CRYPTO_KEY: abcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_INBOUND_AES_CTR_256_CRYPTO_KEY: abcdefghijklmnopabcdabcdefghijkl + +DEF_IPSEC_OUTBOUND_AES_CBC_128_CRYPTO_KEY: abcdefghijklmnop +DEF_IPSEC_INBOUND_AES_CBC_128_CRYPTO_KEY: abcdefghijklmnop +DEF_IPSEC_OUTBOUND_AES_CBC_192_CRYPTO_KEY: abcdefghijklmnopabcdefgh +DEF_IPSEC_INBOUND_AES_CBC_192_CRYPTO_KEY: abcdefghijklmnopabcdefgh +DEF_IPSEC_OUTBOUND_AES_CBC_256_CRYPTO_KEY: abcdefghijklmnopabcdefghijklmnop +DEF_IPSEC_INBOUND_AES_CBC_256_CRYPTO_KEY: abcdefghijklmnopabcdefghijklmnop + +DEF_IPSEC_OUTBOUND_SHA1_96_INTEG_KEY: abcdefghijklmnopabcd +DEF_IPSEC_INBOUND_SHA1_96_INTEG_KEY: abcdefghijklmnopabcd +DEF_IPSEC_OUTBOUND_SHA_256_96_INTEG_KEY: abcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_INBOUND_SHA_256_96_INTEG_KEY: abcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_OUTBOUND_SHA_256_128_INTEG_KEY: abcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_INBOUND_SHA_256_128_INTEG_KEY: abcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_OUTBOUND_SHA_384_192_INTEG_KEY: abcdefghijklmnopabcdabcdefghijklabcdefghijklmnop +DEF_IPSEC_INBOUND_SHA_384_192_INTEG_KEY: abcdefghijklmnopabcdabcdefghijklabcdefghijklmnop +DEF_IPSEC_OUTBOUND_SHA_512_256_INTEG_KEY: abcdefghijklmnopabcdabcdefghijklabcdefghijklmnopabcdabcdefghijkl +DEF_IPSEC_INBOUND_SHA_512_256_INTEG_KEY: abcdefghijklmnopabcdabcdefghijklabcdefghijklmnopabcdabcdefghijkl diff --git a/src/plugins/dev_octeon/tm.c b/src/plugins/dev_octeon/tm.c new file mode 100644 index 0000000000..a143985297 --- /dev/null +++ b/src/plugins/dev_octeon/tm.c @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tm.h" + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "tm", +}; + +static vnet_dev_rv_t +oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) +{ + u8 *s = 0; + va_list va; + + va_start (va, fmt); + s = va_format (s, fmt, &va); + va_end (va); + + log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv); + + vec_free (s); + return VNET_DEV_ERR_INTERNAL; +} + +int +oct_tm_sys_node_add (u32 hw_if_idx, u32 node_id, i32 parent_node_id, + u32 priority, u32 weight, u32 lvl, + tm_node_params_t *params) + +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_tm_node *parent_node = NULL; + struct roc_nix_tm_node *tm_node = NULL; + struct roc_nix_tm_shaper_profile *profile = NULL; + int rc = 0; + + /* We don't support dynamic updates */ + if (roc_nix_tm_is_user_hierarchy_enabled (nix)) + { + rc = -ERANGE; + return oct_roc_err (dev, rc, "roc_nix_tm_dynamic update not supported"); + } + if (parent_node_id) + { + parent_node = roc_nix_tm_node_get (nix, (u32) parent_node_id); + } + + /* Find the right level */ + if (lvl != ROC_TM_LVL_ROOT && parent_node) + { + lvl = parent_node->lvl + 1; + } + else if (parent_node_id == ROC_NIX_TM_NODE_ID_INVALID) + { + lvl = ROC_TM_LVL_ROOT; + } + else + { + /* Neither proper parent nor proper level id given */ + rc = -ERANGE; + return oct_roc_err (dev, rc, "roc_nix_tm_invalid_parent-id_err"); + } + + tm_node = plt_zmalloc (sizeof (struct roc_nix_tm_node), 0); + if (!tm_node) + { + rc = -ENOMEM; + return oct_roc_err (dev, rc, "oct_nix_tm_node_alloc_failed"); + } + + tm_node->id = node_id; + tm_node->parent_id = (u32) parent_node_id; + tm_node->lvl = lvl; + tm_node->priority = priority; + tm_node->free_fn = plt_free; + tm_node->weight = weight; + tm_node->shaper_profile_id = (u32) params->shaper_profile_id; + + profile = + roc_nix_tm_shaper_profile_get (nix, (u32) params->shaper_profile_id); + + rc = roc_nix_tm_node_add (nix, tm_node); + if (rc < 0) + { + plt_free (tm_node); + return oct_roc_err (dev, rc, "roc_nix_tm_node_add_err"); + } + + roc_nix_tm_shaper_default_red_algo (tm_node, profile); + return 0; +} + +int +oct_tm_sys_node_suspend (u32 hw_if_idx, u32 node_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc; + + rc = roc_nix_tm_node_suspend_resume (nix, node_id, true); + if (rc) + { + return oct_roc_err (dev, rc, "roc_nix_tm_node_suspend_failed"); + } + + return rc; +} + +int +oct_tm_sys_node_resume (u32 hw_if_idx, u32 node_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc; + + rc = roc_nix_tm_node_suspend_resume (nix, node_id, false); + if (rc) + { + return oct_roc_err (dev, rc, "roc_nix_tm_node_resume_failed"); + } + + return rc; +} + +int +oct_tm_sys_node_delete (u32 hw_if_idx, u32 node_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_tm_node *tm_node = NULL; + int rc; + bool free_node = 1; + + if ((rc = roc_nix_tm_is_user_hierarchy_enabled (nix))) + { + rc = -ERANGE; + return oct_roc_err (dev, rc, "roc_nix_tm_dynamic update not supported"); + } + if (node_id == ROC_NIX_TM_NODE_ID_INVALID) + { + rc = -EINVAL; + return oct_roc_err (dev, rc, "oct_tm_node_delete_invalid_node-id"); + } + + tm_node = roc_nix_tm_node_get (nix, node_id); + if (!tm_node) + { + rc = -EINVAL; + return oct_roc_err (dev, rc, "oct_tm_node_delete node-id not found"); + } + + rc = roc_nix_tm_node_delete (nix, tm_node->id, free_node); + if (rc) + { + return oct_roc_err (dev, rc, "roc_nix_tm_delete_failed"); + } + return 0; +} + +int +oct_tm_sys_shaper_profile_create (u32 hw_if_idx, tm_shaper_params_t *params) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_tm_shaper_profile *profile; + int rc; + + if (roc_nix_tm_shaper_profile_get (nix, params->shaper_id)) + { + rc = -EINVAL; + return oct_roc_err (dev, rc, "oct_nix_tm_shaper_exists"); + } + + profile = plt_zmalloc (sizeof (struct roc_nix_tm_shaper_profile), 0); + if (!profile) + { + rc = -ENOMEM; + return oct_roc_err (dev, rc, "oct_nix_tm_shaper_create_alloc_failed"); + } + profile->id = params->shaper_id; + profile->commit_rate = params->commit.rate; + profile->commit_sz = params->commit.burst_size; + profile->peak_rate = params->peak.rate; + profile->peak_sz = params->peak.burst_size; + /* If Byte mode, then convert to bps */ + if (!params->pkt_mode) + { + profile->commit_rate *= 8; + profile->peak_rate *= 8; + profile->commit_sz *= 8; + profile->peak_sz *= 8; + } + profile->pkt_len_adj = params->pkt_len_adj; + profile->pkt_mode = params->pkt_mode; + profile->free_fn = plt_free; + + rc = roc_nix_tm_shaper_profile_add (nix, profile); + + /* Fill error information based on return value */ + if (rc) + { + plt_free (profile); + return oct_roc_err (dev, rc, "roc_nix_tm_shaper_creation_failed"); + } + + return rc; +} + +int +oct_tm_sys_node_shaper_update (u32 hw_if_idx, u32 node_id, u32 profile_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_tm_shaper_profile *profile; + struct roc_nix_tm_node *node; + int rc; + + rc = roc_nix_tm_node_shaper_update (nix, node_id, profile_id, false); + if (rc) + { + return oct_roc_err (dev, rc, "oct_nix_tm_node_shaper_update_failed"); + } + + node = roc_nix_tm_node_get (nix, node_id); + if (!node) + { + rc = -EINVAL; + return oct_roc_err (dev, rc, + "oct_nix_tm_node_shaper_update_node_failure"); + } + + profile = roc_nix_tm_shaper_profile_get (nix, profile_id); + roc_nix_tm_shaper_default_red_algo (node, profile); + + return 0; +} +int +oct_tm_sys_shaper_profile_delete (u32 hw_if_idx, u32 shaper_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc; + + rc = roc_nix_tm_shaper_profile_delete (nix, shaper_id); + if (rc) + { + return oct_roc_err (dev, rc, "roc_nix_tm_shaper_delete_failed"); + } + + return rc; +} + +int +oct_tm_sys_node_sched_weight_update (u32 hw_if_idx, u32 node_id, u32 weight) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_tm_node *node; + int rc = 0; + u32 parent_id, priority; + + node = roc_nix_tm_node_get (nix, node_id); + if (!node) + { + rc = -EINVAL; + return oct_roc_err (dev, rc, "roc_nix_tm_node_get node_id not found"); + } + + parent_id = node->parent_id; + priority = node->priority; + + rc = + roc_nix_tm_node_parent_update (nix, node_id, parent_id, priority, weight); + if (rc) + { + return oct_roc_err (dev, rc, "roc_nix_tm_node_parent_update failed"); + } + + return rc; +} + +int +oct_tm_sys_get_capabilities (u32 hw_if_idx, tm_capa_params_t *cap) +{ + + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc, max_nr_nodes = 0, i, n_lvl; + uint16_t schq[ROC_TM_LVL_MAX]; + + memset (cap, 0, sizeof (*cap)); + + rc = roc_nix_tm_rsrc_count (nix, schq); + if (rc) + { + return oct_roc_err (dev, rc, "oct_tm_sys_get_capabilities failed"); + } + + for (i = 0; i < NIX_TXSCH_LVL_TL1; i++) + max_nr_nodes += schq[i]; + + cap->n_nodes_max = max_nr_nodes + port->intf.num_tx_queues; + + n_lvl = roc_nix_tm_lvl_cnt_get (nix); + /* Consider leaf level */ + cap->n_levels_max = n_lvl + 1; + cap->non_leaf_nodes_identical = 1; + cap->leaf_nodes_identical = 1; + + /* Shaper Capabilities */ + cap->shaper_private_n_max = max_nr_nodes; + cap->shaper_n_max = max_nr_nodes; + cap->shaper_private_dual_rate_n_max = max_nr_nodes; + cap->shaper_private_rate_min = NIX_TM_MIN_SHAPER_RATE / 8; + cap->shaper_private_rate_max = NIX_TM_MAX_SHAPER_RATE / 8; + cap->shaper_private_packet_mode_supported = 1; + cap->shaper_private_byte_mode_supported = 1; + cap->shaper_pkt_length_adjust_min = NIX_TM_LENGTH_ADJUST_MIN; + cap->shaper_pkt_length_adjust_max = NIX_TM_LENGTH_ADJUST_MAX; + + /* Schedule Capabilities */ + cap->sched_n_children_max = schq[n_lvl - 1]; + cap->sched_sp_n_priorities_max = NIX_TM_TLX_SP_PRIO_MAX; + cap->sched_wfq_n_children_per_group_max = cap->sched_n_children_max; + cap->sched_wfq_n_groups_max = 1; + cap->sched_wfq_weight_max = roc_nix_tm_max_sched_wt_get (); + cap->sched_wfq_packet_mode_supported = 1; + cap->sched_wfq_byte_mode_supported = 1; + + return 0; +} + +int +oct_tm_sys_level_get_capabilities (u32 hw_if_idx, tm_level_capa_params_t *cap, + u32 lvl) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc, n_lvl; + uint16_t schq[ROC_TM_LVL_MAX]; + + memset (cap, 0, sizeof (*cap)); + + rc = roc_nix_tm_rsrc_count (nix, schq); + if (rc) + { + return oct_roc_err (dev, rc, "oct_tm_sys_get_capabilities failed"); + } + + n_lvl = roc_nix_tm_lvl_cnt_get (nix); + + if (roc_nix_tm_lvl_is_leaf (nix, lvl)) + { + /* Leaf */ + cap->n_nodes_max = port->intf.num_tx_queues; + cap->n_nodes_leaf_max = port->intf.num_tx_queues; + cap->leaf_nodes_identical = 1; + } + else if (lvl == ROC_TM_LVL_ROOT) + { + /* Root node, a.k.a. TL2(vf)/TL1(pf) */ + cap->n_nodes_max = 1; + cap->n_nodes_nonleaf_max = 1; + cap->non_leaf_nodes_identical = 1; + + cap->nonleaf.shaper_private_supported = true; + cap->nonleaf.shaper_private_dual_rate_supported = + roc_nix_tm_lvl_have_link_access (nix, lvl) ? false : true; + cap->nonleaf.shaper_private_rate_min = NIX_TM_MIN_SHAPER_RATE / 8; + cap->nonleaf.shaper_private_rate_max = NIX_TM_MAX_SHAPER_RATE / 8; + cap->nonleaf.shaper_private_packet_mode_supported = 1; + cap->nonleaf.shaper_private_byte_mode_supported = 1; + + cap->nonleaf.sched_n_children_max = schq[lvl]; + cap->nonleaf.sched_sp_n_priorities_max = + roc_nix_tm_max_prio (nix, lvl) + 1; + cap->nonleaf.sched_wfq_n_groups_max = 1; + cap->nonleaf.sched_wfq_weight_max = roc_nix_tm_max_sched_wt_get (); + cap->nonleaf.sched_wfq_packet_mode_supported = 1; + cap->nonleaf.sched_wfq_byte_mode_supported = 1; + } + else if (lvl < ROC_TM_LVL_MAX) + { + /* TL2, TL3, TL4, MDQ */ + cap->n_nodes_max = schq[lvl]; + cap->n_nodes_nonleaf_max = cap->n_nodes_max; + cap->non_leaf_nodes_identical = 1; + + cap->nonleaf.shaper_private_supported = true; + cap->nonleaf.shaper_private_dual_rate_supported = true; + cap->nonleaf.shaper_private_rate_min = NIX_TM_MIN_SHAPER_RATE / 8; + cap->nonleaf.shaper_private_rate_max = NIX_TM_MAX_SHAPER_RATE / 8; + cap->nonleaf.shaper_private_packet_mode_supported = 1; + cap->nonleaf.shaper_private_byte_mode_supported = 1; + + /* MDQ doesn't support Strict Priority */ + if ((int) lvl == (n_lvl - 1)) + cap->nonleaf.sched_n_children_max = port->intf.num_tx_queues; + else + cap->nonleaf.sched_n_children_max = schq[lvl - 1]; + cap->nonleaf.sched_sp_n_priorities_max = + roc_nix_tm_max_prio (nix, lvl) + 1; + cap->nonleaf.sched_wfq_n_groups_max = 1; + cap->nonleaf.sched_wfq_weight_max = roc_nix_tm_max_sched_wt_get (); + cap->nonleaf.sched_wfq_packet_mode_supported = 1; + cap->nonleaf.sched_wfq_byte_mode_supported = 1; + } + else + { + /* unsupported level */ + return oct_roc_err ( + dev, rc, "oct_tm_sys_get_capabilities unsupported level,failed"); + } + return 0; +} + +int +oct_tm_sys_node_read_stats (u32 hw_if_idx, u32 node_id, + tm_stats_params_t *stats) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc = 0; + int clear = 0; + struct roc_nix_tm_node_stats nix_tm_stats; + struct roc_nix_tm_node *node; + + node = roc_nix_tm_node_get (nix, node_id); + if (!node) + { + goto exit; + } + + if (roc_nix_tm_lvl_is_leaf (nix, node->lvl)) + { + struct roc_nix_stats_queue qstats; + + rc = roc_nix_stats_queue_get (nix, node->id, 0, &qstats); + if (!rc) + { + stats->n_pkts = qstats.tx_pkts; + stats->n_bytes = qstats.tx_octs; + printf (" - STATS for node \n"); + printf (" -- pkts (%" PRIu64 ") bytes (%" PRIu64 ")\n", + stats->n_pkts, stats->n_bytes); + } + goto exit; + } + + rc = roc_nix_tm_node_stats_get (nix, node_id, clear, &nix_tm_stats); + if (!rc) + { + stats->leaf.n_pkts_dropped[TM_COLOR_RED] = + nix_tm_stats.stats[ROC_NIX_TM_NODE_PKTS_DROPPED]; + stats->leaf.n_bytes_dropped[TM_COLOR_RED] = + nix_tm_stats.stats[ROC_NIX_TM_NODE_BYTES_DROPPED]; + } + +exit: + if (rc) + { + return oct_roc_err (dev, rc, "tm_node_read_stats_err"); + } + return rc; +} + +int +oct_tm_sys_start (u32 hw_if_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc = 0; + + if (roc_nix_tm_is_user_hierarchy_enabled (nix)) + { + rc = -EIO; + return oct_roc_err (dev, rc, "oct_nix_tm_hirearchy_exists"); + } + + if (roc_nix_tm_leaf_cnt (nix) < port->intf.num_tx_queues) + { + rc = -EINVAL; + return oct_roc_err (dev, rc, "oct_nix_tm_incomplete hierarchy"); + } + + rc = roc_nix_tm_hierarchy_disable (nix); + if (rc) + { + return oct_roc_err (dev, rc, "oct_nix_tm_hirearchy_exists"); + } + + rc = roc_nix_tm_hierarchy_enable (nix, ROC_NIX_TM_USER, true); + if (rc) + { + return oct_roc_err (dev, rc, "oct_nix_tm_hierarchy_enabled_failed"); + } + return 0; +} + +int +oct_tm_sys_stop (u32 hw_if_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_dev_port_t *port = + vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rc = 0; + + /* Disable hierarchy */ + rc = roc_nix_tm_hierarchy_disable (nix); + if (rc) + { + rc = -EIO; + return oct_roc_err (dev, rc, "oct_nix_tm_stop_failed"); + } + + return 0; +} + +tm_system_t dev_oct_tm_ops = { + .node_add = oct_tm_sys_node_add, + .node_delete = oct_tm_sys_node_delete, + .node_read_stats = oct_tm_sys_node_read_stats, + .tm_get_capabilities = oct_tm_sys_get_capabilities, + .tm_level_get_capabilities = oct_tm_sys_level_get_capabilities, + .shaper_profile_create = oct_tm_sys_shaper_profile_create, + .node_shaper_update = oct_tm_sys_node_shaper_update, + .shaper_profile_delete = oct_tm_sys_shaper_profile_delete, + .node_sched_weight_update = oct_tm_sys_node_sched_weight_update, + .start_tm = oct_tm_sys_start, + .stop_tm = oct_tm_sys_stop, +}; diff --git a/src/plugins/dev_octeon/tm.h b/src/plugins/dev_octeon/tm.h new file mode 100644 index 0000000000..9d7c008b04 --- /dev/null +++ b/src/plugins/dev_octeon/tm.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _OCT_TM_H_ +#define _OCT_TM_H_ + +#include +#include +#include + +#define NIX_TM_DFLT_RR_WT 71 +#define OCT_TM_NODE_ID_NULL -1 +#define OCT_TM_INVALID 0 + +#endif /* _OCT_TM_H_ */ diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index 5deaa82a0c..f2846e82e7 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -14,6 +14,22 @@ #include #include +#include + +#define OCT_TX_NODE (1 << 0) +#define OCT_TX_IPSEC_TM_NODE (1 << 1) + +#define OCT_LMT_GET_LINE_ADDR(lmt_addr, lmt_num) \ + (void *) ((u64) (lmt_addr) + ((u64) (lmt_num) << ROC_LMT_LINE_SIZE_LOG2)) + +#define OCT_SEND_HDR_DWORDS 1 + +/* + * Encoded number of segments to number of dwords macro, + * each value of nb_segs is encoded as 4bits. + */ +#define NIX_SEGDW_MAGIC 0x76654432210ULL +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF) typedef struct { @@ -22,21 +38,71 @@ typedef struct u32 n_tx_bytes; u32 n_drop; vlib_buffer_t *drop[VLIB_FRAME_SIZE]; + u32 n_exd_mtu; + vlib_buffer_t *exd_mtu[VLIB_FRAME_SIZE]; u32 batch_alloc_not_ready; u32 batch_alloc_issue_fail; + int max_pkt_len; u16 lmt_id; u64 lmt_ioaddr; lmt_line_t *lmt_lines; } oct_tx_ctx_t; +#ifdef PLATFORM_OCTEON9 static_always_inline u32 -oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) +oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, + const u64 flags) { oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); - u8 num_cl; - u64 ah; + u16 off = ctq->hdr_off; + u64 ah = ctq->aura_handle; u32 n_freed = 0, n; + + ah = ctq->aura_handle; + + if ((n = roc_npa_aura_op_available (ah)) >= 32) + { + u64 buffers[n]; + u32 bi[n]; + + if (flags & OCT_TX_NODE) + n = clib_min (n, ctq->n_enq); + n_freed = roc_npa_aura_op_bulk_alloc (ah, buffers, n, 0, 1); + vlib_get_buffer_indices_with_offset (vm, (void **) &buffers, bi, n_freed, + off); + vlib_buffer_free_no_next (vm, bi, n_freed); + } + + return n_freed; +} + +static_always_inline void +oct_lmt_copy (void *lmt_addr, u64 io_addr, void *desc, u64 dwords) +{ + u64 lmt_status; + + do + { + roc_lmt_mov_seg (lmt_addr, desc, dwords); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#else +static_always_inline u32 +oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, + const u64 flags) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); oct_npa_batch_alloc_cl128_t *cl; + u32 n_freed = 0, n, n_alloc; + u8 num_cl; + u64 ah; + + if (flags & OCT_TX_NODE) + n_alloc = clib_min (ctq->n_enq, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS); + else + n_alloc = ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS; num_cl = ctq->ba_num_cl; if (num_cl) @@ -46,9 +112,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) for (cl = ctq->ba_buffer + ctq->ba_first_cl; num_cl > 0; num_cl--, cl++) { - u8 count; - if (cl->status.ccode == ALLOC_CCODE_INVAL) + oct_npa_batch_alloc_status_t st; + + if ((st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_RELAXED)) == + OCT_BATCH_ALLOC_IOVA0_MASK + ALLOC_CCODE_INVAL) { + cl_not_ready: ctx->batch_alloc_not_ready++; n_freed = bi - (u32 *) ctq->ba_buffer; if (n_freed > 0) @@ -63,11 +132,15 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return 0; } - count = cl->status.count; + if (st.status.count > 8 && + __atomic_load_n (cl->iova + 8, __ATOMIC_RELAXED) == + OCT_BATCH_ALLOC_IOVA0_MASK) + goto cl_not_ready; + #if (CLIB_DEBUG > 0) - cl->status.count = cl->status.ccode = 0; + cl->iova[0] &= OCT_BATCH_ALLOC_IOVA0_MASK; #endif - if (PREDICT_TRUE (count == 16)) + if (PREDICT_TRUE (st.status.count == 16)) { /* optimize for likely case where cacheline is full */ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, 16, @@ -76,9 +149,9 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) } else { - vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, count, - off); - bi += count; + vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, + st.status.count, off); + bi += st.status.count; } } @@ -89,7 +162,8 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) /* clear status bits in each cacheline */ n = cl - ctq->ba_buffer; for (u32 i = 0; i < n; i++) - ctq->ba_buffer[i].iova[0] = 0; + ctq->ba_buffer[i].iova[0] = ctq->ba_buffer[i].iova[8] = + OCT_BATCH_ALLOC_IOVA0_MASK; ctq->ba_num_cl = ctq->ba_first_cl = 0; } @@ -100,7 +174,7 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) { u64 addr, res; - n = clib_min (n, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS); + n = clib_min (n, n_alloc); oct_npa_batch_alloc_compare_t cmp = { .compare_s = { .aura = roc_npa_aura_handle_to_aura (ah), @@ -122,10 +196,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return n_freed; } +#endif static_always_inline u8 oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, - lmt_line_t *line, u32 flags, int simple, int trace) + lmt_line_t *line, u32 flags, int simple, int trace, u32 *n, + u8 *dpl) { u8 n_dwords = 2; u32 total_len = 0; @@ -140,6 +216,17 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, }, }; + if (PREDICT_FALSE (vlib_buffer_length_in_chain (vm, b) > ctx->max_pkt_len)) + { + ctx->exd_mtu[ctx->n_exd_mtu++] = b; + return 0; + } + +#ifdef PLATFORM_OCTEON9 + /* Override line for Octeon9 */ + line = ctx->lmt_lines; +#endif + if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT) { u8 n_tail_segs = 0; @@ -151,7 +238,7 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, tail_segs[n_tail_segs++] = t; if (n_tail_segs > 5) { - ctx->drop[ctx->n_drop++] = t; + ctx->drop[ctx->n_drop++] = b; return 0; } } @@ -193,19 +280,18 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) { d.hdr_w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM; - d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset; - d.hdr_w1.ol4ptr = - vnet_buffer (b)->l3_hdr_offset + sizeof (ip4_header_t); + d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset - b->current_data; + d.hdr_w1.ol4ptr = d.hdr_w1.ol3ptr + sizeof (ip4_header_t); } if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) { d.hdr_w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM; - d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset; + d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; } else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) { d.hdr_w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM; - d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset; + d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; } } @@ -220,8 +306,15 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; } +#ifdef PLATFORM_OCTEON9 + oct_lmt_copy (line, ctx->lmt_ioaddr, &d, n_dwords); +#else for (u32 i = 0; i < n_dwords; i++) line->dwords[i] = d.as_u128[i]; +#endif + + *dpl = n_dwords; + *n = *n + 1; return n_dwords; } @@ -231,8 +324,9 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, vlib_buffer_t **b, u32 n_pkts, int trace) { u8 dwords_per_line[16], *dpl = dwords_per_line; - u64 lmt_arg, ioaddr, n_lines; - u32 n_left, or_flags_16 = 0; + u64 __attribute__ ((unused)) lmt_arg, ioaddr, n_lines; + u32 __attribute__ ((unused)) or_flags_16 = 0; + u32 n_left, n = 0; const u32 not_simple_flags = VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD; lmt_line_t *l = ctx->lmt_lines; @@ -240,7 +334,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, /* Data Store Memory Barrier - outer shareable domain */ asm volatile("dmb oshst" ::: "memory"); - for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8) + for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8) { u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0; vlib_prefetch_buffer_header (b[8], LOAD); @@ -261,49 +355,56 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, if ((or_f & not_simple_flags) == 0) { int simple = 1; - oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); - dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2; - dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2; + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } else { int simple = 0; - dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } - dpl += 8; + dpl += n; + l += n; + n = 0; } - for (; n_left > 0; n_left -= 1, b += 1, l += 1) + for (; n_left > 0; n_left -= 1, b += 1) { u32 f0 = b[0]->flags; - dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace, &n, &dpl[n]); or_flags_16 |= f0; + dpl += n; + l += n; + n = 0; } lmt_arg = ctx->lmt_id; ioaddr = ctx->lmt_ioaddr; - n_lines = n_pkts; + n_lines = dpl - dwords_per_line; + + if (PREDICT_FALSE (!n_lines)) + return n_pkts; +#ifndef PLATFORM_OCTEON9 if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT)) { dpl = dwords_per_line; @@ -332,21 +433,1342 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, } roc_lmt_submit_steorl (lmt_arg, ioaddr); +#endif return n_pkts; } +static inline u16 +oct_check_fc_nix (struct roc_nix_sq *sq, i32 *fc_cache, u16 pkts) +{ + i32 val, new_val, depth; + u8 retry_count = 32; + + do + { + /* Reduce the cached count */ + val = (i32) __atomic_sub_fetch (fc_cache, pkts, __ATOMIC_RELAXED); + if (val >= 0) + return pkts; + + depth = sq->nb_sqb_bufs_adj - + __atomic_load_n ((u64 *) sq->fc, __ATOMIC_RELAXED); + + if (depth <= 0) + return 0; + + /* Update cached value (fc_cache) when lower than `pkts` */ + new_val = (depth << sq->sqes_per_sqb_log2) - pkts; + if (PREDICT_FALSE (new_val < 0)) + return 0; + + /* Update fc_cache if there is no update done by other cores */ + if (__atomic_compare_exchange_n (fc_cache, &val, new_val, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) + return pkts; + } + while (retry_count--); + + return 0; +} + +static inline u16 +oct_check_fc_cpt (struct roc_cpt_lf *cpt_lf, u32 *fc_cache, u16 pkts) +{ + i32 val, new_val, depth; + u8 retry_count = 32; + + do + { + /* Reduce the cached count */ + val = (i32) __atomic_sub_fetch (fc_cache, pkts, __ATOMIC_RELAXED); + if (val >= 0) + return pkts; + + depth = cpt_lf->nb_desc - clib_atomic_load_relax_n (cpt_lf->fc_addr); + + if (depth <= 0) + return 0; + new_val = depth - pkts; + if (PREDICT_FALSE (new_val < 0)) + return 0; + + /* Update fc_cache if there is no update done by other cores */ + if (__atomic_compare_exchange_n (fc_cache, (u32 *) &val, new_val, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) + return pkts; + } + while (retry_count--); + return 0; +} + +static_always_inline u64 +oct_add_sg_desc (union nix_send_sg_s *sg, int n_segs, vlib_buffer_t *seg1, + vlib_buffer_t *seg2, vlib_buffer_t *seg3) +{ + sg[0].u = 0; + sg[0].segs = n_segs; + sg[0].subdc = NIX_SUBDC_SG; + + switch (n_segs) + { + case 3: + sg[0].seg3_size = seg3->current_length; + sg[3].u = (u64) vlib_buffer_get_current (seg3); + /* Fall through */ + case 2: + sg[0].seg2_size = seg2->current_length; + sg[2].u = (u64) vlib_buffer_get_current (seg2); + /* Fall through */ + case 1: + sg[0].seg1_size = seg1->current_length; + sg[1].u = (u64) vlib_buffer_get_current (seg1); + break; + default: + ASSERT (0); + return 0; + } + + /* Return number of dwords in sub-descriptor */ + return n_segs == 1 ? 1 : 2; +} + +static_always_inline u64 +oct_add_sg_list (union nix_send_sg_s *sg, vlib_buffer_t *b, u64 n_segs) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_t *seg1, *seg2, *seg3; + u64 n_dwords; + + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) + return oct_add_sg_desc (sg, 1, b, NULL, NULL); + + seg1 = b; + n_dwords = 0; + while (n_segs > 2) + { + seg2 = vlib_get_buffer (vm, seg1->next_buffer); + seg3 = vlib_get_buffer (vm, seg2->next_buffer); + + n_dwords += oct_add_sg_desc (sg, 3, seg1, seg2, seg3); + + if (seg3->flags & VLIB_BUFFER_NEXT_PRESENT) + { + seg1 = vlib_get_buffer (vm, seg3->next_buffer); + sg += 4; + } + n_segs -= 3; + } + + if (n_segs == 1) + n_dwords += oct_add_sg_desc (sg, 1, seg1, NULL, NULL); + else if (n_segs == 2) + { + seg2 = vlib_get_buffer (vm, seg1->next_buffer); + n_dwords += oct_add_sg_desc (sg, 2, seg1, seg2, NULL); + } + + return n_dwords; +} + +static_always_inline u64 +oct_add_send_hdr (struct nix_send_hdr_s *hdr, vlib_buffer_t *b, + u64 aura_handle, u64 sq, u64 n_dwords) +{ + vnet_buffer_oflags_t oflags; + + hdr->w0.u = 0; + hdr->w1.u = 0; + hdr->w0.sq = sq; + hdr->w0.aura = roc_npa_aura_handle_to_aura (aura_handle); + hdr->w0.total = b->current_length; + hdr->w0.sizem1 = n_dwords + OCT_SEND_HDR_DWORDS - 1; + + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + hdr->w0.total = vlib_buffer_length_in_chain (vlib_get_main (), b); + + if (!(b->flags & VNET_BUFFER_F_OFFLOAD)) + return OCT_SEND_HDR_DWORDS; + + if (b->flags & VNET_BUFFER_F_OFFLOAD) + { + oflags = vnet_buffer (b)->oflags; + if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) + { + hdr->w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM; + hdr->w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset - b->current_data; + hdr->w1.ol4ptr = hdr->w1.ol3ptr + sizeof (ip4_header_t); + } + + if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) + { + hdr->w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM; + hdr->w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; + } + else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) + { + hdr->w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + hdr->w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; + } + } + return OCT_SEND_HDR_DWORDS; +} + +static_always_inline void +oct_ipsec_append_next_buffer (vlib_main_t *vm, vlib_buffer_t *buffer, + uint16_t bytes_to_append) +{ + u32 buffer_index = 0; + vlib_buffer_t *tmp; + + if (vlib_buffer_alloc (vm, &buffer_index, 1) != 1) + { + clib_warning ("buffer allocation failure"); + return; + } + + tmp = vlib_get_buffer (vm, buffer_index); + buffer->next_buffer = buffer_index; + buffer->flags |= VLIB_BUFFER_NEXT_PRESENT; + buffer->total_length_not_including_first_buffer = 0; + tmp->current_length += bytes_to_append; +} + +static_always_inline uint32_t +oct_ipsec_fill_sg2_buf (vlib_main_t *vm, struct roc_sg2list_comp *list, int i, + vlib_buffer_t **lb) +{ + struct roc_sg2list_comp *to; + + to = &list[i / 3]; + to->u.s.len[i % 3] = lb[0]->current_length; + to->ptr[i % 3] = (u64) vlib_buffer_get_current (lb[0]); + to->u.s.valid_segs = (i % 3) + 1; + i++; + + while (lb[0]->flags & VLIB_BUFFER_NEXT_PRESENT) + { + to = &list[i / 3]; + lb[0] = vlib_get_buffer (vm, lb[0]->next_buffer); + to->ptr[i % 3] = (u64) vlib_buffer_get_current (lb[0]); + to->u.s.len[i % 3] = lb[0]->current_length; + to->u.s.valid_segs = (i % 3) + 1; + i++; + } + + return i; +} + +static_always_inline int +oct_ipsec_outb_prepare_sg2_list (vlib_main_t *vm, vlib_buffer_t *b, + struct cpt_inst_s *inst, u32 bytes_to_append, + u32 dlen, + oct_ipsec_outbound_pkt_meta_t **pkt_meta, + u64 *n_dwords, oct_ipsec_session_t *sess) +{ + u16 buffer_data_size = vlib_buffer_get_default_data_size (vm); + struct roc_sg2list_comp *scatter_comp, *gather_comp; + void *m_data = (void *) pkt_meta[0]->sg_buffer; + union nix_send_sg_s *sg; + union cpt_inst_w5 cpt_inst_w5; + union cpt_inst_w6 cpt_inst_w6; + vlib_buffer_t *last_buf = b; + int n_segs; + + /* Input Gather List */ + n_segs = 0; + gather_comp = (struct roc_sg2list_comp *) ((uint8_t *) m_data + 64); + + n_segs = oct_ipsec_fill_sg2_buf (vm, gather_comp, n_segs, &last_buf); + + cpt_inst_w5.s.gather_sz = ((n_segs + 2) / 3); + + if ((bytes_to_append + last_buf->current_length) > buffer_data_size) + { + /* Need an extra buffer */ + oct_ipsec_append_next_buffer (vm, last_buf, bytes_to_append); + } + else + { + vlib_buffer_put_uninit (last_buf, bytes_to_append); + } + + last_buf = b; + + /* Output Gather List */ + n_segs = 0; + scatter_comp = (struct roc_sg2list_comp *) ((uint8_t *) m_data); + + n_segs = oct_ipsec_fill_sg2_buf (vm, scatter_comp, n_segs, &last_buf); + + cpt_inst_w6.s.scatter_sz = ((n_segs + 2) / 3); + cpt_inst_w5.s.dptr = (uint64_t) gather_comp; + + cpt_inst_w6.s.rptr = (uint64_t) scatter_comp; + + inst->w5.u64 = cpt_inst_w5.u64; + inst->w6.u64 = cpt_inst_w6.u64; + inst->w4.s.dlen = dlen; + inst->w4.s.opcode_major &= (~(ROC_IE_OT_INPLACE_BIT)); + + b->total_length_not_including_first_buffer += bytes_to_append; + + sg = (union nix_send_sg_s *) (pkt_meta[0]->nixtx + 2); + inst->w0.u64 = (uint64_t) vnet_buffer (b)->l3_hdr_offset << 16; + inst->w0.u64 |= NIX_NB_SEGS_TO_SEGDW (n_segs); + inst->w0.u64 |= + (((int64_t) pkt_meta[0]->nixtx - (int64_t) inst->dptr) & 0xFFFFF) << 32; + n_dwords[0] = (n_segs % 3) + (n_segs / 3) * 2; + sg[0].subdc = NIX_SUBDC_SG; + sg[4].subdc = NIX_SUBDC_SG; + + return n_segs; +} + +static_always_inline u32 +oct_get_tx_vlib_buf_segs (vlib_main_t *vm, vlib_buffer_t *b) +{ + /* Each buffer will have atleast 1 segment */ + u32 n_segs = 1; + + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) + return n_segs; + + do + { + b = vlib_get_buffer (vm, b->next_buffer); + n_segs++; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + + return n_segs; +} + +static_always_inline i32 +oct_ipsec_rlen_get (oct_ipsec_encap_len_t *encap, uint32_t plen) +{ + uint32_t enc_payload_len; + + enc_payload_len = round_pow2 (plen + encap->roundup_len - encap->adj_len, + encap->roundup_byte); + + return encap->partial_len + enc_payload_len + encap->adj_len; +} + +static_always_inline u32 +oct_ipsec_esp_add_footer_and_icv (oct_ipsec_encap_len_t *encap, u32 rlen) +{ + /* plain_text len + pad_bytes + ESP_footer size + icv_len */ + return rlen + encap->icv_len - encap->partial_len; +} + +void static_always_inline +oct_prepare_ipsec_inst (vlib_main_t *vm, vlib_buffer_t *b, u64 sq_handle, + u64 aura_handle, + oct_ipsec_outbound_pkt_meta_t **pkt_meta, + struct cpt_inst_s *inst, u64 *n_dwords, + oct_ipsec_session_t *sess) +{ + u16 buffer_data_size = vlib_buffer_get_default_data_size (vm); + struct nix_send_hdr_s *send_hdr; + union nix_send_sg_s *sg; + u64 n_segs; + u16 total_length, dlen_adj; + u16 l3_hdr_offset = vnet_buffer (b)->l3_hdr_offset; + u32 dlen, rlen, sa_bytes; + + send_hdr = (struct nix_send_hdr_s *) pkt_meta[0]->nixtx; + + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + total_length = + b->current_length + b->total_length_not_including_first_buffer; + dlen = total_length - l3_hdr_offset; + rlen = oct_ipsec_rlen_get (&sess->encap, dlen); + dlen_adj = rlen - dlen; + + inst->w4.u64 = sess->inst.w4.u64; + + n_segs = oct_ipsec_outb_prepare_sg2_list ( + vm, b, inst, dlen_adj, total_length, pkt_meta, n_dwords, sess); + } + else + { + dlen = b->current_length - l3_hdr_offset; + + rlen = oct_ipsec_rlen_get (&sess->encap, dlen); + dlen_adj = rlen - dlen; + + if (rlen > buffer_data_size) + { + inst->w4.u64 = sess->inst.w4.u64; + + n_segs = oct_ipsec_outb_prepare_sg2_list (vm, b, inst, dlen_adj, + b->current_length, + pkt_meta, n_dwords, sess); + } + else + { + sg = (union nix_send_sg_s *) (pkt_meta[0]->nixtx + 2); + + inst->dptr = + (u64) ((u8 *) vlib_buffer_get_current (b) + l3_hdr_offset); + inst->rptr = inst->dptr; + /* Set w0 nixtx_offset */ + inst->w0.u64 |= + (((int64_t) pkt_meta[0]->nixtx - (int64_t) inst->dptr) & 0xFFFFF) + << 32; + inst->w0.u64 |= 1; + inst->w4.u64 = sess->inst.w4.u64 | dlen; + + b->current_length += dlen_adj; + n_segs = oct_get_tx_vlib_buf_segs (vm, b); + n_dwords[0] = oct_add_sg_list (sg, b, n_segs); + } + } + + oct_add_send_hdr (send_hdr, b, aura_handle, sq_handle, n_dwords[0]); + + sa_bytes = oct_ipsec_esp_add_footer_and_icv (&sess->encap, rlen); + vlib_increment_combined_counter ( + &ipsec_sa_counters, vlib_get_thread_index (), + vnet_buffer (b)->ipsec.sad_index, 1, sa_bytes); +} + +void static_always_inline +oct_submit_quad_packets (u64 lmt_arg, oct_device_t *cd, + struct cpt_inst_s *inst0, struct cpt_inst_s *inst1, + struct cpt_inst_s *inst2, struct cpt_inst_s *inst3, + u64 *n_dwords, u64 **lmt_line) +{ + roc_lmt_mov_seg ((void *) lmt_line[0], inst0, 4); + roc_lmt_mov_seg ((void *) lmt_line[1], inst1, 4); + roc_lmt_mov_seg ((void *) lmt_line[2], inst2, 4); + roc_lmt_mov_seg ((void *) lmt_line[3], inst3, 4); + + /* Count minus one of LMTSTs in the burst */ + lmt_arg |= 3 << 12; + + /* + * Vector of sizes of each LMTST in the burst. Every 3 bits + * represents size - 1 of one LMTST, except first. + */ + lmt_arg |= (n_dwords[1] - 1) << (19 + (3 * 0)); + lmt_arg |= (n_dwords[2] - 1) << (19 + (3 * 1)); + lmt_arg |= (n_dwords[3] - 1) << (19 + (3 * 2)); + + roc_lmt_submit_steorl (lmt_arg, cd->cpt_io_addr); + + asm volatile("dmb oshst" ::: "memory"); +} + +i32 static_always_inline +oct_pkts_send (vlib_main_t *vm, vlib_node_runtime_t *node, oct_tx_ctx_t *ctx, + vnet_dev_tx_queue_t *txq, u16 tx_pkts, vlib_buffer_t **bufs) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + u32 desc_sz = 10 /* Worst case - Send hdr + Two SG with 3 segs each */; + union nix_send_sg_s *sg8, *sg9, *sg10, *sg11, *sg12, *sg13, *sg14, *sg15; + struct nix_send_hdr_s *send_hdr12, *send_hdr13, *send_hdr14, *send_hdr15; + struct nix_send_hdr_s *send_hdr8, *send_hdr9, *send_hdr10, *send_hdr11; + u64 desc12[desc_sz], desc13[desc_sz], desc14[desc_sz], desc15[desc_sz]; + u64 desc8[desc_sz], desc9[desc_sz], desc10[desc_sz], desc11[desc_sz]; + struct nix_send_hdr_s *send_hdr4, *send_hdr5, *send_hdr6, *send_hdr7; + struct nix_send_hdr_s *send_hdr0, *send_hdr1, *send_hdr2, *send_hdr3; + union nix_send_sg_s *sg0, *sg1, *sg2, *sg3, *sg4, *sg5, *sg6, *sg7; + u64 desc0[desc_sz], desc1[desc_sz], desc2[desc_sz], desc3[desc_sz]; + u64 desc4[desc_sz], desc5[desc_sz], desc6[desc_sz], desc7[desc_sz]; + u64 io_addr, sq_handle, n_dwords[16], n_packets; + void *lmt_line12, *lmt_line13, *lmt_line14, *lmt_line15; + void *lmt_line8, *lmt_line9, *lmt_line10, *lmt_line11; + void *lmt_line0, *lmt_line1, *lmt_line2, *lmt_line3; + void *lmt_line4, *lmt_line5, *lmt_line6, *lmt_line7; + u64 n_segs[16], aura_handle; + u64 lmt_arg, core_lmt_base_addr, core_lmt_id; + u16 n_drop = 0; + u32 from[VLIB_FRAME_SIZE]; + struct roc_nix_sq *sq; + vlib_buffer_t **b; + + sq = &ctq->sq; + b = bufs; + io_addr = sq->io_addr; + sq_handle = sq->qid; + aura_handle = ctq->aura_handle; + + if (PREDICT_FALSE (ctq->cached_pkts < tx_pkts)) + { + ctq->cached_pkts = (sq->nb_sqb_bufs_adj - *((u64 *) sq->fc)) + << sq->sqes_per_sqb_log2; + + if (PREDICT_FALSE (ctq->cached_pkts < tx_pkts)) + { + if (ctq->cached_pkts < 0) + { + n_drop = tx_pkts; + tx_pkts = 0; + goto free_pkts; + } + n_drop = tx_pkts - ctq->cached_pkts; + tx_pkts = ctq->cached_pkts; + } + } + + send_hdr0 = (struct nix_send_hdr_s *) &desc0[0]; + send_hdr1 = (struct nix_send_hdr_s *) &desc1[0]; + send_hdr2 = (struct nix_send_hdr_s *) &desc2[0]; + send_hdr3 = (struct nix_send_hdr_s *) &desc3[0]; + send_hdr4 = (struct nix_send_hdr_s *) &desc4[0]; + send_hdr5 = (struct nix_send_hdr_s *) &desc5[0]; + send_hdr6 = (struct nix_send_hdr_s *) &desc6[0]; + send_hdr7 = (struct nix_send_hdr_s *) &desc7[0]; + send_hdr8 = (struct nix_send_hdr_s *) &desc8[0]; + send_hdr9 = (struct nix_send_hdr_s *) &desc9[0]; + send_hdr10 = (struct nix_send_hdr_s *) &desc10[0]; + send_hdr11 = (struct nix_send_hdr_s *) &desc11[0]; + send_hdr12 = (struct nix_send_hdr_s *) &desc12[0]; + send_hdr13 = (struct nix_send_hdr_s *) &desc13[0]; + send_hdr14 = (struct nix_send_hdr_s *) &desc14[0]; + send_hdr15 = (struct nix_send_hdr_s *) &desc15[0]; + + sg0 = (union nix_send_sg_s *) &desc0[2]; + sg1 = (union nix_send_sg_s *) &desc1[2]; + sg2 = (union nix_send_sg_s *) &desc2[2]; + sg3 = (union nix_send_sg_s *) &desc3[2]; + sg4 = (union nix_send_sg_s *) &desc4[2]; + sg5 = (union nix_send_sg_s *) &desc5[2]; + sg6 = (union nix_send_sg_s *) &desc6[2]; + sg7 = (union nix_send_sg_s *) &desc7[2]; + sg8 = (union nix_send_sg_s *) &desc8[2]; + sg9 = (union nix_send_sg_s *) &desc9[2]; + sg10 = (union nix_send_sg_s *) &desc10[2]; + sg11 = (union nix_send_sg_s *) &desc11[2]; + sg12 = (union nix_send_sg_s *) &desc12[2]; + sg13 = (union nix_send_sg_s *) &desc13[2]; + sg14 = (union nix_send_sg_s *) &desc14[2]; + sg15 = (union nix_send_sg_s *) &desc15[2]; + + core_lmt_base_addr = (u64) sq->lmt_addr; + ROC_LMT_BASE_ID_GET (core_lmt_base_addr, core_lmt_id); + + lmt_line0 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 0); + lmt_line1 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 1); + lmt_line2 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 2); + lmt_line3 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 3); + lmt_line4 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 4); + lmt_line5 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 5); + lmt_line6 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 6); + lmt_line7 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 7); + lmt_line8 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 8); + lmt_line9 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 9); + lmt_line10 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 10); + lmt_line11 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 11); + lmt_line12 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 12); + lmt_line13 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 13); + lmt_line14 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 14); + lmt_line15 = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 15); + + n_packets = tx_pkts; + + while (n_packets > 16) + { + n_segs[0] = oct_get_tx_vlib_buf_segs (vm, b[0]); + n_segs[1] = oct_get_tx_vlib_buf_segs (vm, b[1]); + n_segs[2] = oct_get_tx_vlib_buf_segs (vm, b[2]); + n_segs[3] = oct_get_tx_vlib_buf_segs (vm, b[3]); + n_segs[4] = oct_get_tx_vlib_buf_segs (vm, b[4]); + n_segs[5] = oct_get_tx_vlib_buf_segs (vm, b[5]); + n_segs[6] = oct_get_tx_vlib_buf_segs (vm, b[6]); + n_segs[7] = oct_get_tx_vlib_buf_segs (vm, b[7]); + n_segs[8] = oct_get_tx_vlib_buf_segs (vm, b[8]); + n_segs[9] = oct_get_tx_vlib_buf_segs (vm, b[9]); + n_segs[10] = oct_get_tx_vlib_buf_segs (vm, b[10]); + n_segs[11] = oct_get_tx_vlib_buf_segs (vm, b[11]); + n_segs[12] = oct_get_tx_vlib_buf_segs (vm, b[12]); + n_segs[13] = oct_get_tx_vlib_buf_segs (vm, b[13]); + n_segs[14] = oct_get_tx_vlib_buf_segs (vm, b[14]); + n_segs[15] = oct_get_tx_vlib_buf_segs (vm, b[15]); + + n_dwords[0] = oct_add_sg_list (sg0, b[0], n_segs[0]); + n_dwords[1] = oct_add_sg_list (sg1, b[1], n_segs[1]); + n_dwords[2] = oct_add_sg_list (sg2, b[2], n_segs[2]); + n_dwords[3] = oct_add_sg_list (sg3, b[3], n_segs[3]); + n_dwords[4] = oct_add_sg_list (sg4, b[4], n_segs[4]); + n_dwords[5] = oct_add_sg_list (sg5, b[5], n_segs[5]); + n_dwords[6] = oct_add_sg_list (sg6, b[6], n_segs[6]); + n_dwords[7] = oct_add_sg_list (sg7, b[7], n_segs[7]); + n_dwords[8] = oct_add_sg_list (sg8, b[8], n_segs[8]); + n_dwords[9] = oct_add_sg_list (sg9, b[9], n_segs[9]); + n_dwords[10] = oct_add_sg_list (sg10, b[10], n_segs[10]); + n_dwords[11] = oct_add_sg_list (sg11, b[11], n_segs[11]); + n_dwords[12] = oct_add_sg_list (sg12, b[12], n_segs[12]); + n_dwords[13] = oct_add_sg_list (sg13, b[13], n_segs[13]); + n_dwords[14] = oct_add_sg_list (sg14, b[14], n_segs[14]); + n_dwords[15] = oct_add_sg_list (sg15, b[15], n_segs[15]); + + n_dwords[0] += oct_add_send_hdr (send_hdr0, b[0], aura_handle, sq_handle, + n_dwords[0]); + n_dwords[1] += oct_add_send_hdr (send_hdr1, b[1], aura_handle, sq_handle, + n_dwords[1]); + n_dwords[2] += oct_add_send_hdr (send_hdr2, b[2], aura_handle, sq_handle, + n_dwords[2]); + n_dwords[3] += oct_add_send_hdr (send_hdr3, b[3], aura_handle, sq_handle, + n_dwords[3]); + n_dwords[4] += oct_add_send_hdr (send_hdr4, b[4], aura_handle, sq_handle, + n_dwords[4]); + n_dwords[5] += oct_add_send_hdr (send_hdr5, b[5], aura_handle, sq_handle, + n_dwords[5]); + n_dwords[6] += oct_add_send_hdr (send_hdr6, b[6], aura_handle, sq_handle, + n_dwords[6]); + n_dwords[7] += oct_add_send_hdr (send_hdr7, b[7], aura_handle, sq_handle, + n_dwords[7]); + + n_dwords[8] += oct_add_send_hdr (send_hdr8, b[8], aura_handle, sq_handle, + n_dwords[8]); + n_dwords[9] += oct_add_send_hdr (send_hdr9, b[9], aura_handle, sq_handle, + n_dwords[9]); + n_dwords[10] += oct_add_send_hdr (send_hdr10, b[10], aura_handle, + sq_handle, n_dwords[10]); + n_dwords[11] += oct_add_send_hdr (send_hdr11, b[11], aura_handle, + sq_handle, n_dwords[11]); + n_dwords[12] += oct_add_send_hdr (send_hdr12, b[12], aura_handle, + sq_handle, n_dwords[12]); + n_dwords[13] += oct_add_send_hdr (send_hdr13, b[13], aura_handle, + sq_handle, n_dwords[13]); + n_dwords[14] += oct_add_send_hdr (send_hdr14, b[14], aura_handle, + sq_handle, n_dwords[14]); + n_dwords[15] += oct_add_send_hdr (send_hdr15, b[15], aura_handle, + sq_handle, n_dwords[15]); + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + /* Clear io_addr[6:0] bits */ + io_addr &= ~0x7FULL; + lmt_arg = core_lmt_id; + + /* Set size-1 of first LMTST at io_addr[6:4] */ + io_addr |= (n_dwords[0] - 1) << 4; + + roc_lmt_mov_seg (lmt_line0, desc0, n_dwords[0]); + roc_lmt_mov_seg (lmt_line1, desc1, n_dwords[1]); + roc_lmt_mov_seg (lmt_line2, desc2, n_dwords[2]); + roc_lmt_mov_seg (lmt_line3, desc3, n_dwords[3]); + roc_lmt_mov_seg (lmt_line4, desc4, n_dwords[4]); + roc_lmt_mov_seg (lmt_line5, desc5, n_dwords[5]); + roc_lmt_mov_seg (lmt_line6, desc6, n_dwords[6]); + roc_lmt_mov_seg (lmt_line7, desc7, n_dwords[7]); + roc_lmt_mov_seg (lmt_line8, desc8, n_dwords[8]); + roc_lmt_mov_seg (lmt_line9, desc9, n_dwords[9]); + roc_lmt_mov_seg (lmt_line10, desc10, n_dwords[10]); + roc_lmt_mov_seg (lmt_line11, desc11, n_dwords[11]); + roc_lmt_mov_seg (lmt_line12, desc12, n_dwords[12]); + roc_lmt_mov_seg (lmt_line13, desc13, n_dwords[13]); + roc_lmt_mov_seg (lmt_line14, desc14, n_dwords[14]); + roc_lmt_mov_seg (lmt_line15, desc15, n_dwords[15]); + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (16 - 1) << 12; + + /* + * Set vector of sizes of next 15 LMTSTs. + * Every 3 bits represent size-1 of one LMTST + */ + lmt_arg |= (n_dwords[1] - 1) << (19 + (3 * 0)); + lmt_arg |= (n_dwords[2] - 1) << (19 + (3 * 1)); + lmt_arg |= (n_dwords[3] - 1) << (19 + (3 * 2)); + lmt_arg |= (n_dwords[4] - 1) << (19 + (3 * 3)); + lmt_arg |= (n_dwords[5] - 1) << (19 + (3 * 4)); + lmt_arg |= (n_dwords[6] - 1) << (19 + (3 * 5)); + lmt_arg |= (n_dwords[7] - 1) << (19 + (3 * 6)); + lmt_arg |= (n_dwords[8] - 1) << (19 + (3 * 7)); + lmt_arg |= (n_dwords[9] - 1) << (19 + (3 * 8)); + lmt_arg |= (n_dwords[10] - 1) << (19 + (3 * 9)); + lmt_arg |= (n_dwords[11] - 1) << (19 + (3 * 10)); + lmt_arg |= (n_dwords[12] - 1) << (19 + (3 * 11)); + lmt_arg |= (n_dwords[13] - 1) << (19 + (3 * 12)); + lmt_arg |= (n_dwords[14] - 1) << (19 + (3 * 13)); + lmt_arg |= (n_dwords[15] - 1) << (19 + (3 * 14)); + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + n_packets -= 16; + b += 16; + } + + while (n_packets > 8) + { + n_segs[0] = oct_get_tx_vlib_buf_segs (vm, b[0]); + n_segs[1] = oct_get_tx_vlib_buf_segs (vm, b[1]); + n_segs[2] = oct_get_tx_vlib_buf_segs (vm, b[2]); + n_segs[3] = oct_get_tx_vlib_buf_segs (vm, b[3]); + n_segs[4] = oct_get_tx_vlib_buf_segs (vm, b[4]); + n_segs[5] = oct_get_tx_vlib_buf_segs (vm, b[5]); + n_segs[6] = oct_get_tx_vlib_buf_segs (vm, b[6]); + n_segs[7] = oct_get_tx_vlib_buf_segs (vm, b[7]); + + n_dwords[0] = oct_add_sg_list (sg0, b[0], n_segs[0]); + n_dwords[1] = oct_add_sg_list (sg1, b[1], n_segs[1]); + n_dwords[2] = oct_add_sg_list (sg2, b[2], n_segs[2]); + n_dwords[3] = oct_add_sg_list (sg3, b[3], n_segs[3]); + n_dwords[4] = oct_add_sg_list (sg4, b[4], n_segs[4]); + n_dwords[5] = oct_add_sg_list (sg5, b[5], n_segs[5]); + n_dwords[6] = oct_add_sg_list (sg6, b[6], n_segs[6]); + n_dwords[7] = oct_add_sg_list (sg7, b[7], n_segs[7]); + + n_dwords[0] += oct_add_send_hdr (send_hdr0, b[0], aura_handle, sq_handle, + n_dwords[0]); + n_dwords[1] += oct_add_send_hdr (send_hdr1, b[1], aura_handle, sq_handle, + n_dwords[1]); + n_dwords[2] += oct_add_send_hdr (send_hdr2, b[2], aura_handle, sq_handle, + n_dwords[2]); + n_dwords[3] += oct_add_send_hdr (send_hdr3, b[3], aura_handle, sq_handle, + n_dwords[3]); + n_dwords[4] += oct_add_send_hdr (send_hdr4, b[4], aura_handle, sq_handle, + n_dwords[4]); + n_dwords[5] += oct_add_send_hdr (send_hdr5, b[5], aura_handle, sq_handle, + n_dwords[5]); + n_dwords[6] += oct_add_send_hdr (send_hdr6, b[6], aura_handle, sq_handle, + n_dwords[6]); + n_dwords[7] += oct_add_send_hdr (send_hdr7, b[7], aura_handle, sq_handle, + n_dwords[7]); + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + /* Clear io_addr[6:0] bits */ + io_addr &= ~0x7FULL; + lmt_arg = core_lmt_id; + + /* Set size-1 of first LMTST at io_addr[6:4] */ + io_addr |= (n_dwords[0] - 1) << 4; + + roc_lmt_mov_seg (lmt_line0, desc0, n_dwords[0]); + roc_lmt_mov_seg (lmt_line1, desc1, n_dwords[1]); + roc_lmt_mov_seg (lmt_line2, desc2, n_dwords[2]); + roc_lmt_mov_seg (lmt_line3, desc3, n_dwords[3]); + roc_lmt_mov_seg (lmt_line4, desc4, n_dwords[4]); + roc_lmt_mov_seg (lmt_line5, desc5, n_dwords[5]); + roc_lmt_mov_seg (lmt_line6, desc6, n_dwords[6]); + roc_lmt_mov_seg (lmt_line7, desc7, n_dwords[7]); + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (8 - 1) << 12; + + /* + * Set vector of sizes of next 7 LMTSTs. + * Every 3 bits represent size-1 of one LMTST + */ + lmt_arg |= (n_dwords[1] - 1) << (19 + (3 * 0)); + lmt_arg |= (n_dwords[2] - 1) << (19 + (3 * 1)); + lmt_arg |= (n_dwords[3] - 1) << (19 + (3 * 2)); + lmt_arg |= (n_dwords[4] - 1) << (19 + (3 * 3)); + lmt_arg |= (n_dwords[5] - 1) << (19 + (3 * 4)); + lmt_arg |= (n_dwords[6] - 1) << (19 + (3 * 5)); + lmt_arg |= (n_dwords[7] - 1) << (19 + (3 * 6)); + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + n_packets -= 8; + b += 8; + } + + while (n_packets > 4) + { + n_segs[0] = oct_get_tx_vlib_buf_segs (vm, b[0]); + n_segs[1] = oct_get_tx_vlib_buf_segs (vm, b[1]); + n_segs[2] = oct_get_tx_vlib_buf_segs (vm, b[2]); + n_segs[3] = oct_get_tx_vlib_buf_segs (vm, b[3]); + + n_dwords[0] = oct_add_sg_list (sg0, b[0], n_segs[0]); + n_dwords[1] = oct_add_sg_list (sg1, b[1], n_segs[1]); + n_dwords[2] = oct_add_sg_list (sg2, b[2], n_segs[2]); + n_dwords[3] = oct_add_sg_list (sg3, b[3], n_segs[3]); + + n_dwords[0] += oct_add_send_hdr (send_hdr0, b[0], aura_handle, sq_handle, + n_dwords[0]); + n_dwords[1] += oct_add_send_hdr (send_hdr1, b[1], aura_handle, sq_handle, + n_dwords[1]); + n_dwords[2] += oct_add_send_hdr (send_hdr2, b[2], aura_handle, sq_handle, + n_dwords[2]); + n_dwords[3] += oct_add_send_hdr (send_hdr3, b[3], aura_handle, sq_handle, + n_dwords[3]); + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + /* Clear io_addr[6:0] bits */ + io_addr &= ~0x7FULL; + lmt_arg = core_lmt_id; + + /* Set size-1 of first LMTST at io_addr[6:4] */ + io_addr |= (n_dwords[0] - 1) << 4; + + roc_lmt_mov_seg (lmt_line0, desc0, n_dwords[0]); + roc_lmt_mov_seg (lmt_line1, desc1, n_dwords[1]); + roc_lmt_mov_seg (lmt_line2, desc2, n_dwords[2]); + roc_lmt_mov_seg (lmt_line3, desc3, n_dwords[3]); + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (4 - 1) << 12; + + /* + * Set vector of sizes of next 3 LMTSTs. + * Every 3 bits represent size-1 of one LMTST + */ + lmt_arg |= (n_dwords[1] - 1) << (19 + (3 * 0)); + lmt_arg |= (n_dwords[2] - 1) << (19 + (3 * 1)); + lmt_arg |= (n_dwords[3] - 1) << (19 + (3 * 2)); + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + n_packets -= 4; + b += 4; + } + + while (n_packets) + { + lmt_arg = core_lmt_id; + + if (n_packets > 2) + vlib_prefetch_buffer_header (b[2], LOAD); + + n_segs[0] = oct_get_tx_vlib_buf_segs (vm, b[0]); + + n_dwords[0] = oct_add_sg_list (sg0, b[0], n_segs[0]); + n_dwords[0] += oct_add_send_hdr (send_hdr0, b[0], aura_handle, sq_handle, + n_dwords[0]); + + /* Clear io_addr[6:0] bits */ + io_addr &= ~0x7FULL; + + /* Set size-1 of first LMTST at io_addr[6:4] */ + io_addr |= (n_dwords[0] - 1) << 4; + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + roc_lmt_mov_seg (lmt_line0, desc0, n_dwords[0]); + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + n_packets -= 1; + b += 1; + } + + ctq->cached_pkts -= tx_pkts; + +free_pkts: + if (PREDICT_FALSE (n_drop)) + { + vlib_get_buffer_indices_with_offset (vm, (void **) b, from, n_drop, 0); + vlib_buffer_free (vm, from, n_drop); + } + + return tx_pkts; +} + +i32 static_always_inline +oct_pkts_send_ipsec (vlib_main_t *vm, vlib_node_runtime_t *node, + oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, u16 tx_pkts, + vlib_buffer_t **bufs) +{ + oct_ipsec_main_t *im = &oct_ipsec_main; + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + u16 num_tx_queues = txq->port->intf.num_tx_queues; + u64 aura_handle = ctq->aura_handle; + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + u32 current_sq0, current_sq1, current_sq2, current_sq3; + u64 sq_handle0, sq_handle1, sq_handle2, sq_handle3; + u32 sa0_index, sa1_index, sa2_index, sa3_index; + u32 current_sa0_index = ~0, current_sa1_index = ~0; + u32 current_sa2_index = ~0, current_sa3_index = ~0; + oct_ipsec_session_t *sess0 = NULL, *sess1 = NULL; + oct_ipsec_session_t *sess2 = NULL, *sess3 = NULL; + struct cpt_inst_s inst0 = { 0 }, inst1 = { 0 }, inst2 = { 0 }, inst3 = { 0 }; + u64 core_lmt_base_addr, lmt_arg, core_lmt_id; + oct_ipsec_outbound_pkt_meta_t *pkt_meta[4]; + u16 n_cpt_fc_drop = 0, n_nix_fc_drop = 0; + u16 n_left0, n_left1, n_left2, n_left3; + u16 n_packets; + struct roc_cpt_lf *cpt_lf = NULL; + u32 failed_buff[VLIB_FRAME_SIZE]; + u32 from[VLIB_FRAME_SIZE]; + u16 sq0, sq1, sq2, sq3; + struct roc_nix_sq *sq; + u32 quad_bit, count; + vlib_buffer_t **b; + u64 *lmt_line[4]; + u64 n_dwords[4]; + + b = bufs; + + sq_handle0 = 0; + sq_handle1 = 0; + sq_handle2 = 0; + sq_handle3 = 0; + + current_sq0 = ~0; + current_sq1 = ~0; + current_sq2 = ~0; + current_sq3 = ~0; + + core_lmt_base_addr = (uintptr_t) ctq->lmt_addr; + ROC_LMT_BASE_ID_GET (core_lmt_base_addr, core_lmt_id); + + lmt_line[0] = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 0); + lmt_line[1] = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 1); + lmt_line[2] = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 2); + lmt_line[3] = OCT_LMT_GET_LINE_ADDR (core_lmt_base_addr, 3); + + /* Check CPT flow control */ + cpt_lf = roc_nix_inl_outb_lf_base_get (cd->nix); + n_left0 = oct_check_fc_cpt (cpt_lf, (u32 *) &cd->cached_cpt_pkts, tx_pkts); + n_cpt_fc_drop = tx_pkts - n_left0; + + if (!n_left0) + goto cpt_fc_drop; + + /* Process packets up to CPT queue depth */ + n_packets = n_left0; + + n_left0 = 0; + n_left1 = 0; + n_left2 = 0; + n_left3 = 0; + + while (n_packets > 3) + { + pkt_meta[0] = + (oct_ipsec_outbound_pkt_meta_t *) OCT_EXT_HDR_FROM_VLIB_BUFFER (b[0]); + pkt_meta[1] = + (oct_ipsec_outbound_pkt_meta_t *) OCT_EXT_HDR_FROM_VLIB_BUFFER (b[1]); + pkt_meta[2] = + (oct_ipsec_outbound_pkt_meta_t *) OCT_EXT_HDR_FROM_VLIB_BUFFER (b[2]); + pkt_meta[3] = + (oct_ipsec_outbound_pkt_meta_t *) OCT_EXT_HDR_FROM_VLIB_BUFFER (b[3]); + + sa0_index = vnet_buffer (b[0])->ipsec.sad_index; + if (sa0_index != current_sa0_index) + { + sess0 = pool_elt_at_index (im->inline_ipsec_sessions, sa0_index); + if (!sess0->inst.w7.s.cptr) + { + sess0->inst.w7.s.cptr = (u64) sess0->out_sa[cd->nix_idx]; + sess0->sq = + ((sa0_index % vlib_num_workers ()) + 1) % num_tx_queues; + } + current_sa0_index = sa0_index; + ALWAYS_ASSERT (current_sa0_index < + vec_len (im->inline_ipsec_sessions)); + } + + sa1_index = vnet_buffer (b[1])->ipsec.sad_index; + if (sa1_index != current_sa1_index) + { + sess1 = pool_elt_at_index (im->inline_ipsec_sessions, sa1_index); + if (!sess1->inst.w7.s.cptr) + { + sess1->sq = + ((sa1_index % vlib_num_workers ()) + 1) % num_tx_queues; + sess1->inst.w7.s.cptr = (u64) sess1->out_sa[cd->nix_idx]; + } + current_sa1_index = sa1_index; + ALWAYS_ASSERT (current_sa0_index < + vec_len (im->inline_ipsec_sessions)); + } + + sa2_index = vnet_buffer (b[2])->ipsec.sad_index; + if (sa2_index != current_sa2_index) + { + sess2 = pool_elt_at_index (im->inline_ipsec_sessions, sa2_index); + if (!sess2->inst.w7.s.cptr) + { + sess2->sq = + ((sa2_index % vlib_num_workers ()) + 1) % num_tx_queues; + sess2->inst.w7.s.cptr = (u64) sess2->out_sa[cd->nix_idx]; + } + current_sa2_index = sa2_index; + ALWAYS_ASSERT (current_sa2_index < + vec_len (im->inline_ipsec_sessions)); + } + + sa3_index = vnet_buffer (b[3])->ipsec.sad_index; + if (sa3_index != current_sa3_index) + { + sess3 = pool_elt_at_index (im->inline_ipsec_sessions, sa3_index); + if (!sess3->inst.w7.s.cptr) + { + sess3->sq = + ((sa3_index % vlib_num_workers ()) + 1) % num_tx_queues; + sess3->inst.w7.s.cptr = (u64) sess3->out_sa[cd->nix_idx]; + } + current_sa3_index = sa3_index; + ALWAYS_ASSERT (current_sa3_index < + vec_len (im->inline_ipsec_sessions)); + } + + oct_ipsec_outb_data (b[0])->res.cn10k.compcode = CPT_COMP_NOT_DONE; + oct_ipsec_outb_data (b[1])->res.cn10k.compcode = CPT_COMP_NOT_DONE; + oct_ipsec_outb_data (b[2])->res.cn10k.compcode = CPT_COMP_NOT_DONE; + oct_ipsec_outb_data (b[3])->res.cn10k.compcode = CPT_COMP_NOT_DONE; + + inst0.res_addr = (u64) &oct_ipsec_outb_data (b[0])->res; + inst1.res_addr = (u64) &oct_ipsec_outb_data (b[1])->res; + inst2.res_addr = (u64) &oct_ipsec_outb_data (b[2])->res; + inst3.res_addr = (u64) &oct_ipsec_outb_data (b[3])->res; + + inst0.w2.u64 = sess0->inst.w2.u64; + inst1.w2.u64 = sess1->inst.w2.u64; + inst2.w2.u64 = sess2->inst.w2.u64; + inst3.w2.u64 = sess3->inst.w2.u64; + + inst0.w3.u64 = (uintptr_t) (b[0]); + inst1.w3.u64 = (uintptr_t) (b[1]); + inst2.w3.u64 = (uintptr_t) (b[2]); + inst3.w3.u64 = (uintptr_t) (b[3]); + + inst0.w3.u64 |= 0x1ULL; + inst1.w3.u64 |= 0x1ULL; + inst2.w3.u64 |= 0x1ULL; + inst3.w3.u64 |= 0x1ULL; + + inst0.w7.u64 = sess0->inst.w7.u64; + inst1.w7.u64 = sess1->inst.w7.u64; + inst2.w7.u64 = sess2->inst.w7.u64; + inst3.w7.u64 = sess3->inst.w7.u64; + + sq0 = sess0->sq; + sq1 = sess1->sq; + sq2 = sess2->sq; + sq3 = sess3->sq; + + quad_bit = 0; + count = 0; + + if (current_sq0 != sq0) + { + ctq = cd->ctqs[sq0]; + sq = &ctq->sq; + sq_handle0 = sq->qid; + n_left0 = oct_check_fc_nix (sq, &ctq->cached_pkts, n_packets >> 2); + current_sq0 = sq0; + } + if (current_sq1 != sq1) + { + ctq = cd->ctqs[sq1]; + sq = &ctq->sq; + sq_handle1 = sq->qid; + n_left1 = oct_check_fc_nix (sq, &ctq->cached_pkts, n_packets >> 2); + current_sq1 = sq1; + } + if (current_sq2 != sq2) + { + ctq = cd->ctqs[sq2]; + sq = &ctq->sq; + sq_handle2 = sq->qid; + n_left2 = oct_check_fc_nix (sq, &ctq->cached_pkts, n_packets >> 2); + current_sq2 = sq2; + } + if (current_sq3 != sq3) + { + ctq = cd->ctqs[sq3]; + sq = &ctq->sq; + sq_handle3 = sq->qid; + n_left3 = oct_check_fc_nix (sq, &ctq->cached_pkts, n_packets >> 2); + current_sq3 = sq3; + } + quad_bit |= !(!n_left0) << 0; + quad_bit |= !(!n_left1) << 1; + quad_bit |= !(!n_left2) << 2; + quad_bit |= !(!n_left3) << 3; + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (uint64_t) core_lmt_id; + if (quad_bit == 0x0F) + { + oct_prepare_ipsec_inst (vm, b[0], sq_handle0, aura_handle, + &pkt_meta[0], &inst0, &n_dwords[0], sess0); + oct_prepare_ipsec_inst (vm, b[1], sq_handle1, aura_handle, + &pkt_meta[1], &inst1, &n_dwords[1], sess1); + oct_prepare_ipsec_inst (vm, b[2], sq_handle2, aura_handle, + &pkt_meta[2], &inst2, &n_dwords[2], sess2); + oct_prepare_ipsec_inst (vm, b[3], sq_handle3, aura_handle, + &pkt_meta[3], &inst3, &n_dwords[3], sess3); + + oct_submit_quad_packets (lmt_arg, cd, &inst0, &inst1, &inst2, &inst3, + n_dwords, lmt_line); + + n_left0 -= 1; + n_left1 -= 1; + n_left2 -= 1; + n_left3 -= 1; + count += 4; + } + else if (quad_bit != 0x0) + { + if (n_left0) + { + oct_prepare_ipsec_inst (vm, b[0], sq_handle0, aura_handle, + &pkt_meta[0], &inst0, &n_dwords[0], + sess0), + roc_lmt_mov_seg ((void *) lmt_line[count], &inst0, 4); + count++; + n_left0 -= 1; + } + else + { + failed_buff[n_nix_fc_drop] = vlib_get_buffer_index (vm, b[0]); + n_nix_fc_drop++; + } + if (n_left1) + { + oct_prepare_ipsec_inst (vm, b[1], sq_handle1, aura_handle, + &pkt_meta[1], &inst1, &n_dwords[1], + sess1); + roc_lmt_mov_seg ((void *) lmt_line[count], &inst1, 4); + if (count) + lmt_arg |= (n_dwords[1] - 1) << (19 + (3 * (count - 1))); + count++; + n_left1 -= 1; + } + else + { + failed_buff[n_nix_fc_drop] = vlib_get_buffer_index (vm, b[1]); + n_nix_fc_drop++; + } + if (n_left2) + { + oct_prepare_ipsec_inst (vm, b[2], sq_handle2, aura_handle, + &pkt_meta[2], &inst2, &n_dwords[2], + sess2); + roc_lmt_mov_seg ((void *) lmt_line[count], &inst2, 4); + if (count) + lmt_arg |= (n_dwords[2] - 1) << (19 + (3 * (count - 1))); + count++; + n_left2 -= 1; + } + else + { + failed_buff[n_nix_fc_drop] = vlib_get_buffer_index (vm, b[2]); + n_nix_fc_drop++; + } + if (n_left3) + { + oct_prepare_ipsec_inst (vm, b[3], sq_handle3, aura_handle, + &pkt_meta[3], &inst3, &n_dwords[3], + sess3); + roc_lmt_mov_seg ((void *) lmt_line[count], &inst3, 4); + if (count) + lmt_arg |= (n_dwords[3] - 1) << (19 + (3 * (count - 1))); + count++; + n_left3 -= 1; + } + else + { + failed_buff[n_nix_fc_drop] = vlib_get_buffer_index (vm, b[3]); + n_nix_fc_drop++; + } + if (count == 1) + lmt_arg = ROC_CN10K_CPT_LMT_ARG | core_lmt_id; + else + lmt_arg |= (count - 1) << 12; + roc_lmt_submit_steorl (lmt_arg, cd->cpt_io_addr); + asm volatile("dmb oshst" ::: "memory"); + } + else if (quad_bit == 0x0) + { + failed_buff[n_nix_fc_drop] = vlib_get_buffer_index (vm, b[0]); + failed_buff[n_nix_fc_drop + 1] = vlib_get_buffer_index (vm, b[1]); + failed_buff[n_nix_fc_drop + 2] = vlib_get_buffer_index (vm, b[2]); + failed_buff[n_nix_fc_drop + 3] = vlib_get_buffer_index (vm, b[3]); + n_nix_fc_drop += 4; + } + + b += 4; + n_packets -= 4; + } + + current_sq0 = ~0; + sq_handle0 = 0; + n_left0 = 0; + + while (n_packets) + { + pkt_meta[0] = + (oct_ipsec_outbound_pkt_meta_t *) OCT_EXT_HDR_FROM_VLIB_BUFFER (b[0]); + sa0_index = vnet_buffer (b[0])->ipsec.sad_index; + if (sa0_index != current_sa0_index) + { + sess0 = pool_elt_at_index (im->inline_ipsec_sessions, sa0_index); + if (!sess0->inst.w7.s.cptr) + { + sess0->sq = + ((sa0_index % vlib_num_workers ()) + 1) % num_tx_queues; + sess0->inst.w7.s.cptr = (u64) sess0->out_sa[cd->nix_idx]; + } + current_sa0_index = sa0_index; + ALWAYS_ASSERT (current_sa0_index < + vec_len (im->inline_ipsec_sessions)); + } + + oct_ipsec_outb_data (b[0])->res.cn10k.compcode = CPT_COMP_NOT_DONE; + inst0.res_addr = (u64) &oct_ipsec_outb_data (b[0])->res; + inst0.w2.u64 = sess0->inst.w2.u64; + inst0.w3.u64 = (uintptr_t) (b[0]); + inst0.w3.u64 |= 0x1ULL; + inst0.w7.u64 = sess0->inst.w7.u64; + + sq0 = sess0->sq; + + if (current_sq0 != sq0) + { + ctq = cd->ctqs[sq0]; + sq = &ctq->sq; + sq_handle0 = sq->qid; + n_left0 = oct_check_fc_nix (sq, &ctq->cached_pkts, n_packets); + current_sq0 = sq0; + } + if (!n_left0) + { + failed_buff[n_nix_fc_drop] = vlib_get_buffer_index (vm, b[0]); + n_nix_fc_drop++; + goto next; + } + + oct_prepare_ipsec_inst (vm, b[0], sq_handle0, aura_handle, &pkt_meta[0], + &inst0, &n_dwords[0], sess0); + + roc_lmt_mov_seg ((void *) lmt_line[0], &inst0, 4); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | core_lmt_id; + + roc_lmt_submit_steorl (lmt_arg, cd->cpt_io_addr); + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile("dmb oshst" ::: "memory"); + + n_left0 -= 1; + next: + n_packets -= 1; + b += 1; + } + + /* + * Free packets which failed in nix_fc_check. + * These packet indices are stored in failed_buff, + * as they may not be contiguous when received. + */ + if (PREDICT_FALSE (n_nix_fc_drop)) + vlib_buffer_free (vm, failed_buff, n_nix_fc_drop); + +cpt_fc_drop: + if (PREDICT_FALSE (n_cpt_fc_drop)) + { + vlib_get_buffer_indices_with_offset (vm, (void **) b, from, + n_cpt_fc_drop, 0); + vlib_buffer_free (vm, from, n_cpt_fc_drop); + } + + return tx_pkts - n_cpt_fc_drop - n_nix_fc_drop; +} + +VNET_DEV_NODE_FN (oct_tx_ipsec_tm_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); + vnet_dev_tx_queue_t *txq = rt->tx_queue; + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + u32 node_index = node->node_index; + u32 n_left, n_pkts = frame->n_vectors; + vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; + vlib_buffer_t *ipsec_buff[VLIB_FRAME_SIZE + 8]; + vlib_buffer_t *buff[VLIB_FRAME_SIZE + 8]; + int ipsec_cnt = 0, pkt_cnt = 0; +#ifdef PLATFORM_OCTEON9 + u64 lmt_id = 0; +#else + u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2; +#endif + + oct_tx_ctx_t ctx = { + .node = node, + .hdr_w0_teplate = { + .aura = roc_npa_aura_handle_to_aura (cd->ctqs[0]->aura_handle), + .sq = ctq->sq.qid, + .sizem1 = 1, + }, + .max_pkt_len = roc_nix_max_pkt_len (cd->nix), + .lmt_id = lmt_id, + .lmt_ioaddr = ctq->io_addr, + .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2), + }; + + oct_batch_free (vm, &ctx, txq, OCT_TX_IPSEC_TM_NODE); + + vlib_get_buffers (vm, vlib_frame_vector_args (frame), b, n_pkts); + n_left = n_pkts; + while (n_pkts) + { + if (vnet_buffer (b[0])->oflags & VNET_BUFFER_OFFLOAD_F_IPSEC_OFFLOAD) + ipsec_buff[ipsec_cnt++] = b[0]; + else + buff[pkt_cnt++] = b[0]; + + b++; + n_pkts--; + } + + if (ipsec_cnt) + ipsec_cnt = + oct_pkts_send_ipsec (vm, node, &ctx, txq, ipsec_cnt, ipsec_buff); + + if (pkt_cnt) + pkt_cnt = oct_pkts_send (vm, node, &ctx, txq, pkt_cnt, buff); + + if (PREDICT_FALSE (n_left != (ipsec_cnt + pkt_cnt))) + { + vlib_error_count (vm, node_index, OCT_TX_NODE_CTR_NO_FREE_SLOTS, + (n_left - ipsec_cnt - pkt_cnt)); + } + + return (ipsec_cnt + pkt_cnt); +} + VNET_DEV_NODE_FN (oct_tx_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); vnet_dev_tx_queue_t *txq = rt->tx_queue; oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); u32 node_index = node->node_index; u32 *from = vlib_frame_vector_args (frame); u32 n, n_enq, n_left, n_pkts = frame->n_vectors; vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; +#ifdef PLATFORM_OCTEON9 + u64 lmt_id = 0; +#else u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2; +#endif oct_tx_ctx_t ctx = { .node = node, @@ -355,6 +1777,7 @@ VNET_DEV_NODE_FN (oct_tx_node) .sq = ctq->sq.qid, .sizem1 = 1, }, + .max_pkt_len = roc_nix_max_pkt_len (cd->nix), .lmt_id = lmt_id, .lmt_ioaddr = ctq->io_addr, .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2), @@ -367,7 +1790,7 @@ VNET_DEV_NODE_FN (oct_tx_node) vnet_dev_tx_queue_lock_if_needed (txq); n_enq = ctq->n_enq; - n_enq -= oct_batch_free (vm, &ctx, txq); + n_enq -= oct_batch_free (vm, &ctx, txq, OCT_TX_NODE); if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) { @@ -388,21 +1811,25 @@ VNET_DEV_NODE_FN (oct_tx_node) n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0); } - ctq->n_enq = n_enq + n; + ctq->n_enq = n_enq + n - ctx.n_drop - ctx.n_exd_mtu; if (n < n_pkts) { - n = n_pkts - n; - vlib_buffer_free (vm, from + n, n); + u32 n_free = n_pkts - n; + vlib_buffer_free (vm, from + n, n_free); vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_NO_FREE_SLOTS, - n); - n_pkts -= ctx.n_drop; + n_free); + n_pkts -= n_free; } if (ctx.n_drop) vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG, ctx.n_drop); + if (PREDICT_FALSE (ctx.n_exd_mtu)) + vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_MTU_EXCEEDED, + ctx.n_exd_mtu); + if (ctx.batch_alloc_not_ready) vlib_error_count (vm, node_index, OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY, @@ -423,5 +1850,13 @@ VNET_DEV_NODE_FN (oct_tx_node) n_pkts -= ctx.n_drop; } + if (PREDICT_FALSE (ctx.n_exd_mtu)) + { + u32 bi[VLIB_FRAME_SIZE]; + vlib_get_buffer_indices (vm, ctx.exd_mtu, bi, ctx.n_exd_mtu); + vlib_buffer_free (vm, bi, ctx.n_exd_mtu); + n_pkts -= ctx.n_exd_mtu; + } + return n_pkts; } diff --git a/src/plugins/dev_octeon/virtio.c b/src/plugins/dev_octeon/virtio.c new file mode 100644 index 0000000000..1337decffd --- /dev/null +++ b/src/plugins/dev_octeon/virtio.c @@ -0,0 +1,435 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OCTEON_VIRTIO_DEV "Marvell Octeon virtio network device" + +oct_virtio_main_t *oct_virtio_main = NULL; +oct_virtio_port_map_t *virtio_port_map = NULL; +oct_virtio_per_thread_data_t *oct_virt_thread_data = NULL; + +VLIB_REGISTER_LOG_CLASS (oct_virt_log, static) = { + .class_name = "octeon", + .subclass_name = "virtio_init", +}; + +enum oct_virtio_dev_args_types +{ + DEV_ARG_VIRT_NB_VIRTIO_DEVICES = 1, + DEV_ARG_VIRT_DMA_DEVICE_LIST, + DEV_ARG_VIRT_MISC_DEVICE, + DEV_ARG_VIRT_CSUM_OFFLD_EN, + DEV_ARG_VIRT_END +}; + +static vnet_dev_arg_t oct_virtio_dev_args[] = { + { + .id = DEV_ARG_VIRT_NB_VIRTIO_DEVICES, + .name = "nb_virtio", + .desc = "Number of virtio device", + .type = VNET_DEV_ARG_TYPE_UINT32, + .default_val.uint32 = 1, + }, + { + .id = DEV_ARG_VIRT_DMA_DEVICE_LIST, + .name = "dma", + .desc = "DMA device list", + .type = VNET_DEV_ARG_TYPE_STRING, + }, + { + .id = DEV_ARG_VIRT_MISC_DEVICE, + .name = "misc", + .desc = "Miscellaneous device list", + .type = VNET_DEV_ARG_TYPE_STRING, + }, + { + .id = DEV_ARG_VIRT_CSUM_OFFLD_EN, + .name = "enable_csum_offld", + .desc = "Enable Host checksum offload", + .type = VNET_DEV_ARG_TYPE_BOOL, + .default_val.boolean = 0, + }, + { + .id = DEV_ARG_VIRT_END, + .name = "end", + .desc = "Argument end", + .type = VNET_DEV_ARG_END, + }, +}; + +#define _(f, n, s, d) \ + { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s }, + +vlib_error_desc_t oct_virtio_tx_node_counters[] = { + foreach_oct_virt_tx_node_counter +}; +#undef _ + +vnet_dev_node_t oct_virtio_rx_node = { + .format_trace = format_oct_virt_rx_trace, +}; + +vnet_dev_node_t oct_virtio_tx_node = { + .format_trace = format_oct_virt_tx_trace, + .error_counters = oct_virtio_tx_node_counters, + .n_error_counters = ARRAY_LEN (oct_virtio_tx_node_counters), +}; + +void +oct_virt_buffer_pool_dma_map (vlib_main_t *vm) +{ + uword i; + size_t page_sz; + vlib_physmem_map_t *pm; + vlib_buffer_pool_t *bp; + int iova_mode = rte_eal_iova_mode (); + + vec_foreach (bp, vm->buffer_main->buffer_pools) + { + if (bp->start) + { + pm = vlib_physmem_get_map (vm, bp->physmem_map_index); + page_sz = 1ULL << pm->log2_page_size; + for (i = 0; i < pm->n_pages; i++) + { + char *va = ((char *) pm->base) + i * page_sz; + uword pa = (iova_mode == RTE_IOVA_VA) ? pointer_to_uword (va) : + pm->page_table[i]; + + dao_pal_vfio_dma_map (pointer_to_uword (va), pa, page_sz); + } + } + } +} + +static clib_error_t * +dao_log_read (clib_file_t *uf) +{ + unformat_input_t input; + u8 *line, *s = 0; + int n, n_try; + + n = n_try = 4096; + while (n == n_try) + { + uword len = vec_len (s); + vec_resize (s, len + n_try); + + n = read (uf->file_descriptor, s + len, n_try); + if (n < 0 && errno != EAGAIN) + return clib_error_return_unix (0, "read"); + vec_set_len (s, len + (n < 0 ? 0 : n)); + } + + unformat_init_vector (&input, s); + + while (unformat_user (&input, unformat_line, &line)) + { + vec_add1 (line, 0); + vec_pop (line); + clib_warning ("%v", line); + vec_free (line); + } + + unformat_free (&input); + return 0; +} + +static void +dao_lib_logging (void) +{ + int log_fds[2] = { 0 }; + + if (pipe (log_fds) == 0) + { + if (fcntl (log_fds[0], F_SETFL, O_NONBLOCK) == 0 && + fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0) + { + FILE *f = fdopen (log_fds[1], "a"); + if (f && rte_openlog_stream (f) == 0) + { + clib_file_t t = { 0 }; + t.read_function = dao_log_read; + t.file_descriptor = log_fds[0]; + t.description = format (0, "DAO logging pipe"); + clib_file_add (&file_main, &t); + } + } + else + { + close (log_fds[0]); + close (log_fds[1]); + } + } +} + +static u8 * +oct_virtio_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, + void *dev_info) +{ + oct_dev_bus_virtio_device_info_t *di = dev_info; + + if (di->vendor_id != 0x177d || di->device_id != OCT_VIRTIO_DEVICE_ID) + return 0; + + return format (0, "%s", OCTEON_VIRTIO_DEV); +} + +static char ** +oct_populate_dma_device_list (u16 *nb_elem, u8 *dma_list) +{ + char *device = NULL; + char **vec = NULL; + char *saveptr; + u16 count = 0; + + device = strtok_r ((char *) dma_list, ",", &saveptr); + while (device) + { + vec = reallocarray (vec, count + 1, sizeof (vec)); + vec[count] = strdup (device); + count++; + device = strtok_r (saveptr, ",", &saveptr); + } + *nb_elem = count; + return vec; +} + +static void +oct_virtio_parse_arguments (dao_pal_global_conf_t *conf, vnet_dev_arg_t *args) +{ + vnet_dev_arg_t *a = args; + + for (; a < vec_end (args) && a->val_set; a++) + { + switch (a->id) + { + case DEV_ARG_VIRT_NB_VIRTIO_DEVICES: + conf->nb_virtio_devs = vnet_dev_arg_get_uint32 (a); + break; + case DEV_ARG_VIRT_DMA_DEVICE_LIST: + conf->dma_devices = oct_populate_dma_device_list ( + &conf->nb_dma_devs, vnet_dev_arg_get_string (a)); + break; + case DEV_ARG_VIRT_MISC_DEVICE: + conf->misc_devices = oct_populate_dma_device_list ( + &conf->nb_misc_devices, vnet_dev_arg_get_string (a)); + break; + case DEV_ARG_VIRT_CSUM_OFFLD_EN: + oct_virtio_main->ip4_csum_offload_enable = vnet_dev_arg_get_bool (a); + break; + default: + log_info ("Invalid virtio device arguments received\n"); + } + + } +} + +static vnet_dev_rv_t +oct_virtio_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + u8 mac_addr[6]; + vnet_dev_rv_t rv; + uint64_t lcore_mask; + oct_virtio_port_t ovp = {}; + dao_pal_global_conf_t conf = { 0 }; + struct dao_virtio_netdev_cbs cbs = {}; + oct_dev_bus_virtio_device_data_t *bus_data; + oct_virtio_device_t *device_data = vnet_dev_get_data (dev); + + bus_data = oct_get_bus_virtio_device_data (dev); + + if (!oct_virtio_main->dao_lib_initialized) + { + /** + * The initialization of the DAO library will be carried out using the + * arguments provided during the first initialization of the virtio + * interface. Any arguments provided from the second virtio device + * onwards will be disregarded. + */ + oct_virtio_parse_arguments (&conf, dev->args); + + if (dao_pal_global_init (&conf)) + { + log_err ("dao_pal_global_init failed\n"); + return VNET_DEV_ERR_UNSUPPORTED_CONFIG; + } + + /* Update lcore_mask with main core */ + lcore_mask = DAO_BIT_ULL (vm->cpu_id) | oct_virtio_main->wrkr_cpu_mask; + + log_debug ("lcore_mask %lu\n", lcore_mask); + if (dao_pal_dma_dev_setup (lcore_mask)) + { + log_err ("dao_pal_dma_dev_setup failed\n"); + rv = VNET_DEV_ERR_UNSUPPORTED_CONFIG; + goto finish; + } + + /* Set main core DMA devices for virtio control */ + if (dao_pal_dma_ctrl_dev_set (vm->cpu_id)) + { + log_err ("dao_pal_dma_dev_setup failed\n"); + rv = VNET_DEV_ERR_UNSUPPORTED_CONFIG; + goto finish; + } + + oct_virt_buffer_pool_dma_map (vm); + + cbs.status_cb = oct_virtio_dev_status_cb; + cbs.rss_cb = oct_virito_rss_reta_configure; + cbs.promisc_cb = oct_virtio_configure_promisc; + cbs.allmulti_cb = oct_virtio_configure_allmulti; + cbs.mac_set = oct_virtio_mac_addr_set; + cbs.mac_add = oct_virtio_mac_addr_add; + cbs.mq_configure = oct_virtio_mq_configure; + cbs.extbuf_get = oct_virtio_vlib_buffer_alloc; + cbs.extbuf_put = oct_virtio_vlib_buffer_free; + + dao_virtio_netdev_cb_register (&cbs); + + oct_virtio_main->dao_lib_initialized = 1; + } + + ethernet_mac_address_generate (mac_addr); + + device_data->virtio_id = bus_data->virtio_dev.virtio_id; + ovp.virtio_id = bus_data->virtio_dev.virtio_id; + ovp.reta_size = VIRTIO_NET_RSS_RETA_SIZE; + + vnet_dev_port_add_args_t port_add_args = { + .port = { + .attr = { + .type = VNET_DEV_PORT_TYPE_ETHERNET, + .max_rx_queues = DAO_VIRTIO_MAX_QUEUES, + .max_tx_queues = DAO_VIRTIO_MAX_QUEUES, + .max_supported_rx_frame_size = MAX_JUMBO_PKT_LEN, + .caps = { + .rss = 1, + }, + .rx_offloads = { + .ip4_cksum = 1, + }, + }, + .ops = { + .init = oct_virtio_port_init, + .deinit = oct_virtio_port_deinit, + .start = oct_virtio_port_start, + .stop = oct_virtio_port_stop, + .config_change = NULL, + .format_status = format_oct_virt_port_status, + }, + .data_size = sizeof (oct_virtio_port_t), + .initial_data = &ovp, + }, + .rx_node = &oct_virtio_rx_node, + .tx_node = &oct_virtio_tx_node, + .rx_queue = { + .config = { + .data_size = 0, + .default_size = 1024, + .multiplier = 32, + .min_size = 256, + .max_size = 16384, + }, + .ops = { + .alloc = NULL, + .free = NULL, + .format_info = NULL, + }, + }, + .tx_queue = { + .config = { + .data_size = 0, + .default_size = 1024, + .multiplier = 32, + .min_size = 256, + .max_size = 16384, + }, + .ops = { + .alloc = NULL, + .free = NULL, + .format_info = NULL, + }, + }, + }; + + vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr, mac_addr); + + log_info ("MAC address is %U", format_ethernet_address, mac_addr); + + rv = vnet_dev_port_add (vm, dev, 0, &port_add_args); + + return rv; + +finish: + dao_pal_global_fini (); + return rv; +} + +static clib_error_t * +oct_virtio_worker_init (vlib_main_t *vm) +{ + u16 cpu_id = clib_get_current_cpu_id (); + + oct_virtio_main->wrkr_cpu_mask |= DAO_BIT_ULL (cpu_id); + + return 0; +} + +static clib_error_t * +oct_virtio_exit (vlib_main_t *vm) +{ + dao_pal_global_fini (); + return 0; +} + +static clib_error_t * +oct_virtio_plugin_init (vlib_main_t *vm) +{ + dao_lib_logging (); + vec_validate (virtio_port_map, DAO_VIRTIO_DEV_MAX); + vec_validate (oct_virt_thread_data, DAO_PAL_MAX_WORKERS); + vec_validate_aligned (oct_virtio_main, 1, CLIB_CACHE_LINE_BYTES); + return NULL; +} + +static void +oct_virtio_deinit (vlib_main_t *vm, vnet_dev_t *dev) +{ + log_info ("Device unlinitialized\n"); +} + +VLIB_INIT_FUNCTION (oct_virtio_plugin_init); + +VLIB_WORKER_INIT_FUNCTION (oct_virtio_worker_init); + +VLIB_MAIN_LOOP_EXIT_FUNCTION (oct_virtio_exit); + +VNET_DEV_REGISTER_DRIVER (octeon_virtio) = { + .name = "octeon_virtio", + .bus = "virtio", + .device_data_sz = sizeof (oct_virtio_device_t), + .ops = { + .init = oct_virtio_init, + .deinit = oct_virtio_deinit, + .probe = oct_virtio_probe, + }, + .args = oct_virtio_dev_args, +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "OCTEON virtio device", + .default_disabled = 1, +}; diff --git a/src/plugins/dev_octeon/virtio_bus.c b/src/plugins/dev_octeon/virtio_bus.c new file mode 100644 index 0000000000..1a515c9399 --- /dev/null +++ b/src/plugins/dev_octeon/virtio_bus.c @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +oct_dev_bus_virtio_device_data_t * +oct_get_bus_virtio_device_data (vnet_dev_t *dev) +{ + return (void *) dev->bus_data; +} + +static int +oct_dev_bus_virtio_device_id_to_virtio_id (u32 *addr, char *str) +{ + unformat_input_t input; + uword rv; + unformat_init_string (&input, str, strlen (str)); + + rv = + unformat (&input, "virtio" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%u", addr); + unformat_free (&input); + return rv; +} + +static void * +oct_dev_bus_virtio_get_device_info (vlib_main_t *vm, char *device) +{ + oct_dev_bus_virtio_device_info_t *info; + u32 device_id = 0; + + if (oct_dev_bus_virtio_device_id_to_virtio_id (&device_id, device) == 0) + return 0; + + info = clib_mem_alloc (sizeof (oct_dev_bus_virtio_device_info_t)); + info->virtio_id = device_id; + info->vendor_id = 0x177d; + info->device_id = OCT_VIRTIO_DEVICE_ID; + + return info; +} + +static void +oct_dev_bus_virtio_free_device_info (vlib_main_t *vm, void *dev_info) +{ + clib_mem_free (dev_info); +} + +static vnet_dev_rv_t +oct_dev_bus_virtio_dev_open (vlib_main_t *vm, vnet_dev_t *dev) +{ + oct_dev_bus_virtio_device_info_t *info; + oct_dev_bus_virtio_device_data_t *pd = oct_get_bus_virtio_device_data (dev); + + if ((info = oct_dev_bus_virtio_get_device_info (vm, dev->device_id)) == 0) + return VNET_DEV_ERR_INVALID_DEVICE_ID; + + dev->numa_node = 0; + dev->va_dma = 1; + pd->virtio_dev.device_id = info->device_id; + pd->virtio_dev.vendor_id = info->vendor_id; + pd->virtio_dev.virtio_id = info->virtio_id; + + clib_mem_free (info); + + return VNET_DEV_OK; +} + +static void +oct_bus_virtio_dev_close (vlib_main_t *vm, vnet_dev_t *dev) +{ +} + +static u8 * +format_oct_virtio_device_info (u8 *s, va_list *args) +{ + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + oct_dev_bus_virtio_device_data_t *pdd = oct_get_bus_virtio_device_data (dev); + + s = format (s, "Virtio ID is %u", pdd->virtio_dev.device_id); + + return s; +} + +static u8 * +format_oct_virtio_device_addr (u8 *s, va_list *args) +{ + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + oct_dev_bus_virtio_device_data_t *pdd; + + pdd = oct_get_bus_virtio_device_data (dev); + return format (s, "virtio/%u", pdd->virtio_dev.virtio_id); +} + +VNET_DEV_REGISTER_BUS (virtio) = { + .name = "virtio", + .device_data_size = sizeof (oct_dev_bus_virtio_device_info_t), + .ops = { + .device_open = oct_dev_bus_virtio_dev_open, + .device_close = oct_bus_virtio_dev_close, + .get_device_info = oct_dev_bus_virtio_get_device_info, + .free_device_info = oct_dev_bus_virtio_free_device_info, + .dma_mem_alloc_fn = NULL, + .dma_mem_free_fn = NULL, + .format_device_info = format_oct_virtio_device_info, + .format_device_addr = format_oct_virtio_device_addr, + }, +}; diff --git a/src/plugins/dev_octeon/virtio_bus.h b/src/plugins/dev_octeon/virtio_bus.h new file mode 100644 index 0000000000..c039ca96b2 --- /dev/null +++ b/src/plugins/dev_octeon/virtio_bus.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ +#ifndef _VIRTIO_BUS_H_ +#define _VIRTIO_BUS_H_ +#include + +typedef struct +{ + u16 device_id; + u16 vendor_id; + u16 virtio_id; + u16 reserved; +} oct_dev_bus_virtio_device_info_t; + +typedef struct +{ + oct_dev_bus_virtio_device_info_t virtio_dev; +} oct_dev_bus_virtio_device_data_t; + +oct_dev_bus_virtio_device_data_t * +oct_get_bus_virtio_device_data (vnet_dev_t *dev); + +#endif //_VIRTIO_BUS_H diff --git a/src/plugins/dev_octeon/virtio_ctrl.c b/src/plugins/dev_octeon/virtio_ctrl.c new file mode 100644 index 0000000000..520e3754b8 --- /dev/null +++ b/src/plugins/dev_octeon/virtio_ctrl.c @@ -0,0 +1,370 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ + +#include +#include +#include +#include + +#define OCT_VIRTIO_MAX_WRKS 24 +#define OCT_VIRTIO_CHECKSUM_OFFLOAD_MASK 0x3 +#define OCT_VIRTIO_TSO_OFFLOAD_MASK 0xFFFF + +extern oct_virtio_main_t *oct_virtio_main; +extern oct_virtio_port_map_t *virtio_port_map; +extern oct_virtio_per_thread_data_t *oct_virt_thread_data; + +VLIB_REGISTER_LOG_CLASS (oct_virt_log, static) = { + .class_name = "octeon", + .subclass_name = "virtio_ctl", +}; + +int +oct_virtio_vlib_buffer_free (u16 devid, void *buffs[], u16 nb_buffs) +{ + int i = 0; + u16 hdr_len; + u32 bi[nb_buffs]; + vlib_buffer_t *b[nb_buffs]; + u32 cpu_id = clib_get_current_cpu_id (); + vlib_main_t *vm = vlib_get_first_main (); + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + + hdr_len = ptd[cpu_id].q_map[devid].virtio_hdr_sz; + for (i = 0; i < nb_buffs; i++) + b[i] = oct_virt_to_bp (buffs[i], hdr_len); + + vlib_get_buffer_indices (vm, b, bi, nb_buffs); + vlib_buffer_free_no_next (vm, bi, nb_buffs); + + return 0; +} + +int +oct_virtio_vlib_buffer_alloc (u16 devid, void *buffs[], u16 nb_buffs) +{ + int i = 0; + u16 hdr_len; + u16 allocated; + u32 vbuf_idxs[nb_buffs]; + vlib_buffer_t *b[nb_buffs]; + u32 cpu_id = clib_get_current_cpu_id (); + vlib_main_t *vm = vlib_get_first_main (); + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + + hdr_len = ptd[cpu_id].q_map[devid].virtio_hdr_sz; + allocated = vlib_buffer_alloc (vm, vbuf_idxs, nb_buffs); + if (allocated != nb_buffs) + { + vlib_buffer_free_no_next (vm, vbuf_idxs, allocated); + return -1; + } + vlib_get_buffers (vm, vbuf_idxs, b, nb_buffs); + + for (i = 0; i < nb_buffs; i++) + buffs[i] = oct_bp_to_virt (b[i], hdr_len); + + return 0; +} + +int +oct_virtio_mac_addr_add (u16 virtio_devid, struct virtio_net_ctrl_mac *mac_tbl, + u8 type) +{ + /* Not supported */ + return 0; +} + +int +oct_virtio_mac_addr_set (u16 virtio_devid, u8 *mac) +{ + /* Not supported */ + return 0; +} + +int +oct_virtio_configure_allmulti (u16 virtio_devid, u8 enable) +{ + /* Not supported */ + return 0; +} + +int +oct_virtio_configure_promisc (u16 virtio_devid, u8 enable) +{ + /* Not supported */ + return 0; +} + +static_always_inline void +oct_virtio_clear_lcore_queue_mapping (u16 virtio_devid) +{ + u32 cpu_id = 0; + oct_virtio_main_t *ovm = oct_virtio_main; + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + u64 wrkr_cpu_mask = ovm->wrkr_cpu_mask; + + ovm->netdev_map &= ~(DAO_BIT (virtio_devid)); + while (wrkr_cpu_mask) + { + if (!(wrkr_cpu_mask & (1 << cpu_id))) + { + cpu_id++; + continue; + } + ptd[cpu_id].netdev_map &= ~(DAO_BIT (virtio_devid)); + ptd[cpu_id].q_map[virtio_devid].qmap = 0; + wrkr_cpu_mask &= ~(1 << cpu_id); + cpu_id++; + } + + ovm->netdev_qp_count[virtio_devid] = 0; +} + +static_always_inline u16 +oct_virtio_netdev_hdrlen_get (u16 virtio_devid) +{ + struct virtio_net_hdr vnet_hdr; + u16 virtio_hdr_sz = 0; + u64 feature_bits = 0; + + feature_bits = dao_virtio_netdev_feature_bits_get (virtio_devid); + + if (feature_bits & DAO_BIT_ULL (VIRTIO_NET_F_HASH_REPORT)) + virtio_hdr_sz = offsetof (struct virtio_net_hdr, padding_reserved) + + sizeof (vnet_hdr.padding_reserved); + else + virtio_hdr_sz = offsetof (struct virtio_net_hdr, num_buffers) + + sizeof (vnet_hdr.num_buffers); + return virtio_hdr_sz; +} + +static int +chksum_offload_configure (uint16_t virtio_devid, u64 *tx_offloads, + u64 *rx_offloads) +{ + u64 csum_offload, tso_offload; + + csum_offload = dao_virtio_netdev_feature_bits_get (virtio_devid) & + OCT_VIRTIO_CHECKSUM_OFFLOAD_MASK; + tso_offload = dao_virtio_netdev_feature_bits_get (virtio_devid) & + OCT_VIRTIO_TSO_OFFLOAD_MASK; + + if (csum_offload & DAO_BIT_ULL (VIRTIO_NET_F_CSUM)) + *tx_offloads |= OCT_ETH_TX_OFFLOAD_IPV4_CKSUM; + + if (tso_offload & DAO_BIT_ULL (VIRTIO_NET_F_HOST_TSO4) || + tso_offload & DAO_BIT_ULL (VIRTIO_NET_F_HOST_TSO6)) + { + *tx_offloads |= OCT_ETH_TX_OFFLOAD_TCP_TSO; + log_err ("TSO offload is not supported\n"); + } + + if (csum_offload & DAO_BIT_ULL (VIRTIO_NET_F_GUEST_CSUM)) + *rx_offloads |= OCT_ETH_RX_OFFLOAD_CHECKSUM; + + /** + * We need to configure out interface, but by default, OCTEON interfaces are + * enabled with RX and TX checksum enabled, and currently, we don’t have + * control to enable or disable them. For now, based on these flags, the + * correct flags will be set for the HOST. + */ + return 0; +} + +static_always_inline int +oct_virtio_setup_worker_queue_mapping (u16 virtio_devid, u16 virt_q_count) +{ + u32 cpu_id = 0; + u16 virt_rx_q, q_id; + u64 tx_offloads = 0, rx_offloads = 0; + oct_virtio_main_t *ovm = oct_virtio_main; + u64 wrkr_cpu_mask = ovm->wrkr_cpu_mask; + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + u16 virtio_hdr_sz = 0; + + virtio_hdr_sz = oct_virtio_netdev_hdrlen_get (virtio_devid); + + chksum_offload_configure (virtio_devid, &tx_offloads, &rx_offloads); + + virt_rx_q = virt_q_count / 2; + q_id = 0; + for (q_id = 0; q_id < virt_rx_q && ovm->wrkr_cpu_mask; q_id++) + { + while (!(wrkr_cpu_mask & DAO_BIT_ULL (cpu_id))) + cpu_id++; + + ptd[cpu_id].q_map[virtio_devid].qmap |= DAO_BIT_ULL (q_id); + CLIB_MEMORY_BARRIER (); + ptd[cpu_id].netdev_map |= DAO_BIT (virtio_devid); + + if (oct_virtio_main->ip4_csum_offload_enable) + { + ptd[cpu_id].intf[virtio_devid].tx_offloads = tx_offloads; + ptd[cpu_id].intf[virtio_devid].rx_offloads = rx_offloads; + } + wrkr_cpu_mask &= ~DAO_BIT_ULL (cpu_id); + cpu_id++; + if (!wrkr_cpu_mask) + { + cpu_id = 0; + wrkr_cpu_mask = ovm->wrkr_cpu_mask; + } + } + + for (cpu_id = 0; cpu_id < DAO_PAL_MAX_WORKERS; cpu_id++) + ptd[cpu_id].q_map[virtio_devid].virtio_hdr_sz = virtio_hdr_sz; + + ovm->netdev_qp_count[virtio_devid] = virt_q_count / 2; + CLIB_MEMORY_BARRIER (); + ovm->netdev_map |= DAO_BIT (virtio_devid); + + return 0; +} + +int +oct_virtio_mq_configure (u16 virtio_devid, bool qmap_set) +{ + u16 virt_q_count; + + oct_virtio_clear_lcore_queue_mapping (virtio_devid); + if (!qmap_set) + return 0; + + virt_q_count = dao_virtio_netdev_queue_count (virtio_devid); + log_info ("virtio_dev=%u: virt_q_count=%u\n", virtio_devid, virt_q_count); + if (virt_q_count <= 0 || virt_q_count & 0x1 || + virt_q_count >= (DAO_VIRTIO_MAX_QUEUES - 1)) + { + log_err ("virtio_dev=%d: invalid virt_q_count=%d\n", virtio_devid, + virt_q_count); + return -EIO; + } + + oct_virtio_setup_worker_queue_mapping (virtio_devid, virt_q_count); + + return 0; +} + +int +oct_virito_rss_reta_configure (u16 virtio_devid, + struct virtio_net_ctrl_rss *rss) +{ + u16 virt_q_count; + + oct_virtio_clear_lcore_queue_mapping (virtio_devid); + + if (rss == NULL) + return 0; + + /* Get active virt queue count */ + virt_q_count = dao_virtio_netdev_queue_count (virtio_devid); + + if (virt_q_count <= 0 || virt_q_count & 0x1 || + virt_q_count >= (DAO_VIRTIO_MAX_QUEUES - 1)) + { + log_err ("virtio_dev=%d: invalid virt_q_count=%d\n", virtio_devid, + virt_q_count); + return -EIO; + } + + oct_virtio_setup_worker_queue_mapping (virtio_devid, virt_q_count); + + return 0; +} + +int +oct_virtio_dev_status_cb (u16 virtio_devid, u8 status) +{ + u16 virt_q_count; + + log_debug ("[%s] virtio_dev=%d: status=%s\n", __func__, virtio_devid, + dao_virtio_dev_status_to_str (status)); + + switch (status) + { + case VIRTIO_DEV_RESET: + case VIRTIO_DEV_NEEDS_RESET: + virtio_port_map[virtio_devid].state = 0; + CLIB_MEMORY_BARRIER (); + oct_virtio_clear_lcore_queue_mapping (virtio_devid); + break; + case VIRTIO_DEV_DRIVER_OK: + /* Get active virt queue count */ + virt_q_count = dao_virtio_netdev_queue_count (virtio_devid); + + if (virt_q_count <= 0 || virt_q_count & 0x1 || + virt_q_count >= (DAO_VIRTIO_MAX_QUEUES - 1)) + { + log_err ("virtio_dev=%d: invalid virt_q_count=%d\n", virtio_devid, + virt_q_count); + return -EIO; + } + + oct_virtio_setup_worker_queue_mapping (virtio_devid, virt_q_count); + virtio_port_map[virtio_devid].state = 1; + CLIB_MEMORY_BARRIER (); + break; + default: + break; + } + + return 0; +} + +static_always_inline void +oct_virtio_desc_process (u64 netdev_map, u16 *netdev_qp_count) +{ + u16 dev_id = 0; + + while (netdev_map) + { + if (!(netdev_map & 0x1)) + { + netdev_map >>= 1; + dev_id++; + continue; + } + dao_virtio_net_desc_manage (dev_id, netdev_qp_count[dev_id]); + netdev_map >>= 1; + dev_id++; + } +} + +void +virtio_ctrl_thread_fn (void *args) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) args; + oct_virtio_main_t *ovm = oct_virtio_main; + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + u32 cpu_id = clib_get_current_cpu_id (); + + vlib_worker_thread_init (w); + ovm->wrkr_cpu_mask |= DAO_BIT (cpu_id); + ptd->service_core = cpu_id; + /* Wait till Octeon virtio DAO lib init is complete */ + while (!ovm || !ovm->dao_lib_initialized) + CLIB_PAUSE (); + + /* Assign DMA devices per lcore */ + dao_pal_thread_init (cpu_id); + ovm->wrkr_cpu_mask &= ~(DAO_BIT (cpu_id)); + + while (1) + { + /* Process virtio descriptors */ + oct_virtio_desc_process (ovm->netdev_map, ovm->netdev_qp_count); + + /* Flush and submit DMA ops */ + dao_dma_flush_submit (); + } +} + +VLIB_REGISTER_THREAD (virtio_ctrl_thread_reg, static) = { + .name = "virtio-ctrl", + .short_name = "virt-ctl", + .function = virtio_ctrl_thread_fn, + .no_data_structure_clone = 1, +}; diff --git a/src/plugins/dev_octeon/virtio_format.c b/src/plugins/dev_octeon/virtio_format.c new file mode 100644 index 0000000000..f21e634882 --- /dev/null +++ b/src/plugins/dev_octeon/virtio_format.c @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ + +#include "vlib/pci/pci.h" +#include "vnet/error.h" +#include "vppinfra/error.h" +#include +#include +#include + +u8 * +format_oct_virt_port_status (u8 *s, va_list *args) +{ + return s; +} + +u8 * +format_oct_virt_rx_trace (u8 *s, va_list *args) +{ + va_arg (*args, vlib_main_t *); + va_arg (*args, vlib_node_t *); + oct_virt_rx_trace_t *t = va_arg (*args, oct_virt_rx_trace_t *); + + s = format (s, "octeon-virt-rx: virtio_id %u sw_if_index %u rx_q_map %lu", + t->virtio_id, t->sw_if_index, t->rx_q_map); + return s; +} + +u8 * +format_oct_virt_tx_trace (u8 *s, va_list *args) +{ + va_arg (*args, vlib_main_t *); + va_arg (*args, vlib_node_t *); + oct_virt_tx_trace_t *t = va_arg (*args, oct_virt_tx_trace_t *); + + s = format (s, "octeon-virt-tx: virtio_id %u sw_if_index %u tx_q_map %lu ", + t->virtio_id, t->sw_if_index, t->tx_q_map); + return s; +} diff --git a/src/plugins/dev_octeon/virtio_port.c b/src/plugins/dev_octeon/virtio_port.c new file mode 100644 index 0000000000..c7f00bb7c0 --- /dev/null +++ b/src/plugins/dev_octeon/virtio_port.c @@ -0,0 +1,171 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define VIRTO_NETDEV_SPEED_NUM_UNKNOWN UINT32_MAX /**< Unknown */ +#define OCT_VIRTIO_NIX_RSS_KEY_LEN 48 + +static u64 vchan_bitmap[2] = { 0 }; +extern oct_virtio_port_map_t *virtio_port_map; +extern oct_virtio_main_t *oct_virtio_main; + +VLIB_REGISTER_LOG_CLASS (oct_virt_log, static) = { + .class_name = "octeon", + .subclass_name = "virtio_port", +}; + +int +oct_virtio_dma_vchan_id_allocate (void) +{ + int idx; + int pos; + + for (int i = 0; i < DAO_VIRTIO_DEV_MAX; i++) + { + idx = i / 64; + pos = i % 64; + if (!(vchan_bitmap[idx] & (1ULL << pos))) + { + vchan_bitmap[idx] |= (1ULL << pos); + return i; + } + } + return -1; +} + +void +oct_virtio_dma_vchan_id_free (int id) +{ + int idx; + int pos; + + if (id >= 0 && id < DAO_VIRTIO_DEV_MAX) + { + idx = id / 64; + pos = id % 64; + vchan_bitmap[idx] &= ~(1ULL << pos); + } +} + +vnet_dev_rv_t +oct_virtio_port_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + int rrv; + u16 virtio_devid; + u8 buffer_pool_index; + vlib_buffer_pool_t *bp; + vnet_dev_t *dev = port->dev; + struct dao_virtio_netdev_conf netdev_conf = { 0 }; + oct_virtio_device_t *ovd = vnet_dev_get_data (dev); + oct_virtio_port_t *ovp = vnet_dev_get_port_data (port); + + buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node); + bp = vlib_get_buffer_pool (vm, buffer_pool_index); + virtio_devid = ovd->virtio_id; + + netdev_conf.pem_devid = ovd->pem_devid; + netdev_conf.flags |= DAO_VIRTIO_NETDEV_EXTBUF; + netdev_conf.dataroom_size = bp->data_size; + netdev_conf.reta_size = VIRTIO_NET_RSS_RETA_SIZE; + netdev_conf.link_info.status = 0; + netdev_conf.link_info.speed = VIRTO_NETDEV_SPEED_NUM_UNKNOWN; + netdev_conf.link_info.duplex = 0xFF; + netdev_conf.hash_key_size = OCT_VIRTIO_NIX_RSS_KEY_LEN; + netdev_conf.dma_vchan = oct_virtio_dma_vchan_id_allocate (); + netdev_conf.csum_en = oct_virtio_main->ip4_csum_offload_enable; + memcpy (netdev_conf.mac, port->attr.hw_addr.eth_mac, + sizeof (netdev_conf.mac)); + log_debug ("port start: port %u, virtio_id %u, vchan_id %d\n", port->port_id, + virtio_devid, netdev_conf.dma_vchan); + + dao_pal_dma_vchan_setup (virtio_devid, netdev_conf.dma_vchan, NULL); + /* Initialize virtio net device */ + rrv = dao_virtio_netdev_init (virtio_devid, &netdev_conf); + if (rrv) + { + log_err ("[%s] dao_virtio_netdev_init failed \n", __func__); + oct_virtio_dma_vchan_id_free (netdev_conf.dma_vchan); + return VNET_DEV_ERR_INTERNAL; + } + ovp->vchan_id = netdev_conf.dma_vchan; + + return VNET_DEV_OK; +} + +void +oct_virtio_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_virtio_device_t *ovd = vnet_dev_get_data (dev); + oct_virtio_port_t *ovp = vnet_dev_get_port_data (port); + + log_debug ("clear data for virtio id %u\n", ovd->virtio_id); + dao_virtio_netdev_fini (ovd->virtio_id); + oct_virtio_dma_vchan_id_free (ovp->vchan_id); +} + +void +oct_virt_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + vnet_dev_port_state_changes_t changes = {}; + oct_virtio_device_t *ovd = vnet_dev_get_data (dev); + u16 virtio_devid = ovd->virtio_id; + + if (ovd->status != virtio_port_map[virtio_devid].state) + { + changes.change.link_state = 1; + changes.link_state = virtio_port_map[virtio_devid].state; + ovd->status = virtio_port_map[virtio_devid].state; + } + else + return; + + vnet_dev_port_state_change (vm, port, changes); +} + +vnet_dev_rv_t +oct_virtio_port_start (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_virtio_device_t *ovd = vnet_dev_get_data (dev); + struct dao_virtio_netdev_link_info link_info = { 0 }; + + log_debug ("State up for virtio device %u\n", ovd->virtio_id); + link_info.status = 0x1; + link_info.duplex = 0xFF; + link_info.speed = VIRTO_NETDEV_SPEED_NUM_UNKNOWN; + dao_virtio_netdev_link_sts_update (ovd->virtio_id, &link_info); + + vnet_dev_poll_port_add (vm, port, 0.5, oct_virt_port_poll); + + return VNET_DEV_OK; +} + +void +oct_virtio_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_virtio_device_t *ovd = vnet_dev_get_data (dev); + struct dao_virtio_netdev_link_info link_info = { 0 }; + + log_debug ("[%s] received dev stop port id %d virtio_id %u\n", __func__, + port->port_id, ovd->virtio_id); + + link_info.status = 0x0; + link_info.duplex = 0xFF; + link_info.speed = VIRTO_NETDEV_SPEED_NUM_UNKNOWN; + dao_virtio_netdev_link_sts_update (ovd->virtio_id, &link_info); + vnet_dev_poll_port_remove (vm, port, oct_virt_port_poll); +} diff --git a/src/plugins/dev_octeon/virtio_rx_node.c b/src/plugins/dev_octeon/virtio_rx_node.c new file mode 100644 index 0000000000..e94a6930ff --- /dev/null +++ b/src/plugins/dev_octeon/virtio_rx_node.c @@ -0,0 +1,359 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ + +#include +#include +#include +#include + +#define OCT_VIRT_LENGTH(h) (h->desc_data[1] & 0x00000000FFFFFFFF) +#define OCT_VIRT_NEXT_FLAG(h) \ + (h->desc_data[1] & DAO_BIT_ULL (VRING_DESC_F_NEXT)) + +extern oct_virtio_main_t *oct_virtio_main; +extern oct_virtio_per_thread_data_t *oct_virt_thread_data; + +static_always_inline u32 +oct_virt_trace_rx_buffers (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t **b, u16 nb_pkts, u32 nb_trace, + u64 rx_q_map, u16 virtio_id, u16 next_index) +{ + int idx = 0; + u32 n_traced = 0; + for (idx = 0; idx < nb_pkts && idx < nb_trace; idx++) + { + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[idx], 0))) + { + oct_virt_rx_trace_t *tr = + vlib_add_trace (vm, node, b[idx], sizeof (*tr)); + tr->rx_q_map = rx_q_map; + tr->virtio_id = virtio_id; + n_traced++; + } + } + + return n_traced; +} + +static_always_inline vlib_buffer_t * +oct_virt_populate_inner_segments (vlib_main_t *vm, vlib_buffer_t *head, + void *p, u32 pool_idx, u16 hdr_len) +{ + vlib_buffer_t *prev, *b; + struct dao_virtio_net_hdr *v_hdr; + + v_hdr = (struct dao_virtio_net_hdr *) p; + b = oct_virt_to_bp (p, hdr_len); + head->total_length_not_including_first_buffer += b->current_length; + prev = b; + while (v_hdr->desc_data[0]) + { + v_hdr = (struct dao_virtio_net_hdr *) v_hdr->desc_data[0]; + b = oct_virt_to_bp ((void *) v_hdr->desc_data[0], hdr_len); + b->current_length = OCT_VIRT_LENGTH (v_hdr) - hdr_len; + b->current_data = 0; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + head->total_length_not_including_first_buffer += b->current_length; + prev->buffer_pool_index = pool_idx; + prev->next_buffer = vlib_get_buffer_index (vm, b); + prev = b; + } + head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + return b; +} + +static_always_inline vlib_buffer_t * +oct_virt_process_chained_packets (vlib_main_t *vm, void **pkts, + u32 *nb_pkts_chain, u32 *n_rx_bytes, + u16 hdr_len) +{ + u32 vhdr_len = sizeof (struct virtio_net_hdr); + struct dao_virtio_net_hdr *v_hdr; + vlib_buffer_t *b, *head, *prev = NULL; + u32 buffer_index = 0, len; + u8 pool_idx = 0; + int idx = 0; + + pool_idx = vlib_buffer_pool_get_default_for_numa (vm, 0); + v_hdr = (struct dao_virtio_net_hdr *) pkts[idx]; + len = OCT_VIRT_LENGTH (v_hdr) - hdr_len; + head = oct_virt_to_bp (pkts[idx], hdr_len); + + /** + * If Host uses linux virtio interface skip first buffer as it contains + * only virtio header details + */ + if (len == vhdr_len) + { + buffer_index = vlib_get_buffer_index (vm, head); + vlib_buffer_free_no_next (vm, &buffer_index, 1); + + idx++; + head = oct_virt_to_bp (pkts[idx], hdr_len); + head->buffer_pool_index = pool_idx; + } + + do + { + v_hdr = (struct dao_virtio_net_hdr *) pkts[idx]; + b = oct_virt_to_bp (pkts[idx], hdr_len); + b->current_length = OCT_VIRT_LENGTH (v_hdr) - hdr_len; + b->current_data = 0; + /* Check for DPU side segmentation */ + if (PREDICT_FALSE ((v_hdr->desc_data[0]))) + b = oct_virt_populate_inner_segments (vm, head, pkts[idx], pool_idx, + hdr_len); + + if (prev) + { + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + head->total_length_not_including_first_buffer += b->current_length; + prev->buffer_pool_index = pool_idx; + prev->next_buffer = vlib_get_buffer_index (vm, b); + prev = b; + } + prev = b; + idx++; + } + while (OCT_VIRT_NEXT_FLAG (v_hdr)); + + *nb_pkts_chain = idx; + *n_rx_bytes = + head->current_length + head->total_length_not_including_first_buffer; + + return head; +} + +static_always_inline u32 +oct_virtio_process_virtio_packets (vlib_main_t *vm, void **pkts, + vlib_buffer_t **b, u32 *n_rx_pkts, + u32 *to_next, vnet_dev_rx_queue_t *rxq, + u16 hdr_len) +{ + u8 flags = 0; + int idx = 0; + u32 nb_pkts = *n_rx_pkts, next_nb_pkts = 0; + u32 n_rx_bytes = 0, nb_pkts_chain = 0; + struct dao_virtio_net_hdr *v_hdr[4]; + vlib_buffer_template_t bt = rxq->buffer_template; + + while (nb_pkts >= 8) + { + v_hdr[0] = (struct dao_virtio_net_hdr *) pkts[idx + 0]; + v_hdr[1] = (struct dao_virtio_net_hdr *) pkts[idx + 1]; + v_hdr[2] = (struct dao_virtio_net_hdr *) pkts[idx + 2]; + v_hdr[3] = (struct dao_virtio_net_hdr *) pkts[idx + 3]; + + flags |= OCT_VIRT_NEXT_FLAG (v_hdr[0]); + flags |= OCT_VIRT_NEXT_FLAG (v_hdr[1]); + flags |= OCT_VIRT_NEXT_FLAG (v_hdr[2]); + flags |= OCT_VIRT_NEXT_FLAG (v_hdr[3]); + + if (PREDICT_FALSE (flags)) + break; + + b[0] = oct_virt_to_bp (pkts[idx + 0], hdr_len); + b[1] = oct_virt_to_bp (pkts[idx + 1], hdr_len); + b[2] = oct_virt_to_bp (pkts[idx + 2], hdr_len); + b[3] = oct_virt_to_bp (pkts[idx + 3], hdr_len); + + clib_prefetch_store (oct_virt_to_bp (pkts[idx + 4], hdr_len)); + clib_prefetch_store (oct_virt_to_bp (pkts[idx + 5], hdr_len)); + clib_prefetch_store (oct_virt_to_bp (pkts[idx + 6], hdr_len)); + clib_prefetch_store (oct_virt_to_bp (pkts[idx + 7], hdr_len)); + + b[0]->template = bt; + b[1]->template = bt; + b[2]->template = bt; + b[3]->template = bt; + + b[0]->current_length = OCT_VIRT_LENGTH (v_hdr[0]) - hdr_len; + b[1]->current_length = OCT_VIRT_LENGTH (v_hdr[1]) - hdr_len; + b[2]->current_length = OCT_VIRT_LENGTH (v_hdr[2]) - hdr_len; + b[3]->current_length = OCT_VIRT_LENGTH (v_hdr[3]) - hdr_len; + + n_rx_bytes += b[0]->current_length; + n_rx_bytes += b[1]->current_length; + n_rx_bytes += b[2]->current_length; + n_rx_bytes += b[3]->current_length; + + to_next[0] = vlib_get_buffer_index (vm, b[0]); + to_next[1] = vlib_get_buffer_index (vm, b[1]); + to_next[2] = vlib_get_buffer_index (vm, b[2]); + to_next[3] = vlib_get_buffer_index (vm, b[3]); + + b += 4; + idx += 4; + to_next += 4; + nb_pkts -= 4; + next_nb_pkts += 4; + } + + while (nb_pkts) + { + nb_pkts_chain = 1; + v_hdr[0] = (struct dao_virtio_net_hdr *) pkts[idx]; + b[0] = oct_virt_to_bp (pkts[idx], hdr_len); + b[0]->template = bt; + + if (OCT_VIRT_NEXT_FLAG (v_hdr[0])) + b[0] = oct_virt_process_chained_packets ( + vm, &pkts[idx], &nb_pkts_chain, &n_rx_bytes, hdr_len); + else + { + b[0]->current_length = OCT_VIRT_LENGTH (v_hdr[0]) - hdr_len; + n_rx_bytes += b[0]->current_length; + b[0]->current_data = 0; + } + + to_next[0] = vlib_get_buffer_index (vm, b[0]); + b++; + idx += nb_pkts_chain; + to_next++; + nb_pkts -= nb_pkts_chain; + next_nb_pkts++; + } + + *n_rx_pkts = next_nb_pkts; + + return n_rx_bytes; +} + +static_always_inline uword +oct_virtio_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, vnet_dev_port_t *port, + vnet_dev_rx_queue_t *rxq) +{ + u64 q_map; + u64 rx_q_map; + u32 trace_count; + u32 trace_enabled; + oct_virtio_port_t *ovp; + u32 cpu_id = vm->cpu_id; + void *pkts[VLIB_FRAME_SIZE]; + u32 *to_next, n_left_to_next; + vlib_buffer_t *b[VLIB_FRAME_SIZE]; + vnet_main_t *vnm = vnet_get_main (); + u16 queue, virt_q, virtio_id, hdr_len; + u32 thr_idx = vlib_get_thread_index (); + oct_virtio_main_t *ovm = oct_virtio_main; + u32 n_rx_pkts, n_rx_bytes = 0, rx_pkts_total = 0; + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + + trace_enabled = trace_count = vlib_get_trace_count (vm, node); + ovp = vnet_dev_get_port_data (port); + virtio_id = ovp->virtio_id; + + if (PREDICT_FALSE (!ovm || !ovm->dao_lib_initialized)) + return 0; + /* Assign DMA devices per lcore */ + if (PREDICT_FALSE (!ptd[cpu_id].initialized)) + { + dao_pal_thread_init (cpu_id); + dao_pal_dma_lcore_mem2dev_autofree_set (cpu_id, false); + ptd[cpu_id].initialized = 1; + } + + rx_q_map = ptd[cpu_id].q_map[virtio_id].qmap; + q_map = ptd[cpu_id].q_map[virtio_id].qmap; + + if (!(ptd[cpu_id].netdev_map & (DAO_BIT (virtio_id))) || !q_map) + return 0; + + /* Flush and submit DMA ops */ + dao_dma_flush_submit (); + + queue = ptd[cpu_id].q_map[virtio_id].last_rx_q; + hdr_len = ptd[cpu_id].q_map[virtio_id].virtio_hdr_sz; + + while (rx_q_map) + { + if (!(rx_q_map & DAO_BIT (queue))) + goto next; + + rx_q_map &= ~DAO_BIT (queue); + virt_q = (queue << 1) + 1; + + n_rx_pkts = dao_virtio_net_dequeue_burst_ext (virtio_id, virt_q, pkts, + VLIB_FRAME_SIZE); + + if (!n_rx_pkts) + goto next; + + vlib_get_new_next_frame (vm, node, rxq->next_index, to_next, + n_left_to_next); + + n_rx_bytes += oct_virtio_process_virtio_packets (vm, pkts, b, &n_rx_pkts, + to_next, rxq, hdr_len); + + if (PREDICT_FALSE (trace_count)) + trace_count -= + oct_virt_trace_rx_buffers (vm, node, b, n_rx_pkts, trace_count, + q_map, virtio_id, rxq->next_index); + + if (PREDICT_TRUE (rxq->next_index == + VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = vlib_node_runtime_get_next_frame (vm, node, rxq->next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + /** + * We can set the checksum as OK because, in the host checksum + * offload case, OCTEON Tx will perform the checksum computation. In + * the host non-checksum offload case, the host computes the checksum + * and provides it to OCTEON. + */ + f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = port->intf.sw_if_index; + ef->hw_if_index = port->intf.hw_if_index; + + vlib_frame_no_append (f); + } + + n_left_to_next -= n_rx_pkts; + + vlib_put_next_frame (vm, node, rxq->next_index, n_left_to_next); + rx_pkts_total += n_rx_pkts; + + if (rx_pkts_total == VLIB_FRAME_SIZE) + break; + next: + queue = queue + 1; + if (DAO_BIT (queue) > q_map) + queue = 0; + } + + ptd[cpu_id].q_map[virtio_id].last_rx_q = queue; + + if (PREDICT_FALSE (trace_enabled)) + vlib_set_trace_count (vm, node, trace_count); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + thr_idx, port->intf.hw_if_index, rx_pkts_total, n_rx_bytes); + + return rx_pkts_total; +} + +VNET_DEV_NODE_FN (oct_virtio_rx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_rx = 0; + + foreach_vnet_dev_rx_queue_runtime (rxq, node) + { + vnet_dev_port_t *port = rxq->port; + n_rx += oct_virtio_rx_node_inline (vm, node, frame, port, rxq); + } + + return n_rx; +} diff --git a/src/plugins/dev_octeon/virtio_tx_node.c b/src/plugins/dev_octeon/virtio_tx_node.c new file mode 100644 index 0000000000..3afebe935b --- /dev/null +++ b/src/plugins/dev_octeon/virtio_tx_node.c @@ -0,0 +1,228 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Marvell. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#define OCT_VIRT_MAX_FRAGS 6 + +extern oct_virtio_main_t *oct_virtio_main; +extern oct_virtio_per_thread_data_t *oct_virt_thread_data; + +static_always_inline void +oct_virt_free_to_vlib (vlib_main_t *vm, vlib_node_runtime_t *node, + void *virt_b[], u16 nb_free, u16 hdr_len) +{ + u16 idx = 0; + vlib_buffer_t *b; + u32 b_index; + + while (idx < nb_free) + { + b = oct_virt_to_bp (virt_b[idx], hdr_len); + b_index = vlib_get_buffer_index (vm, b); + vlib_buffer_free_no_next (vm, &b_index, 1); + idx++; + } + + vlib_error_count (vm, node->node_index, OCT_VIRT_TX_NODE_CTR_ENQUE_FAIL, + nb_free); +} + +static_always_inline u32 +oct_virtio_enqueue (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t **b, u16 nb_pkts, u16 virtio_devid) +{ + u64 rx_offload; + vlib_buffer_t *bp; + bool next_present; + u64 tx_q_map, q_map; + u16 idx = 0, nb_frags = 0; + u32 cpu_id = vm->cpu_id; + u16 nb_pkts_left = nb_pkts, hdr_len; + u16 queue, virt_q, sent = 0, cur_sent = 0; + void *virt_b[VLIB_FRAME_SIZE * OCT_VIRT_MAX_FRAGS]; + struct dao_virtio_net_hdr *v_hdr[4], *head; + struct dao_virtio_net_hdr vhdr_init = { 0 }; + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + + tx_q_map = ptd[cpu_id].q_map[virtio_devid].qmap; + q_map = ptd[cpu_id].q_map[virtio_devid].qmap; + hdr_len = ptd[cpu_id].q_map[virtio_devid].virtio_hdr_sz; + rx_offload = ptd[cpu_id].intf[virtio_devid].rx_offloads; + + /* Packets reaching to tx node means we can assume the checksum is good. */ + if (rx_offload & OCT_ETH_RX_OFFLOAD_CHECKSUM) + vhdr_init.hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; + + while (nb_pkts >= 8) + { + next_present = b[0]->flags & VLIB_BUFFER_NEXT_PRESENT || + b[1]->flags & VLIB_BUFFER_NEXT_PRESENT || + b[2]->flags & VLIB_BUFFER_NEXT_PRESENT || + b[3]->flags & VLIB_BUFFER_NEXT_PRESENT; + if (PREDICT_FALSE (next_present)) + break; + + v_hdr[0] = oct_bp_to_virt (b[0], hdr_len); + v_hdr[1] = oct_bp_to_virt (b[1], hdr_len); + v_hdr[2] = oct_bp_to_virt (b[2], hdr_len); + v_hdr[3] = oct_bp_to_virt (b[3], hdr_len); + + *v_hdr[0] = vhdr_init; + *v_hdr[1] = vhdr_init; + *v_hdr[2] = vhdr_init; + *v_hdr[3] = vhdr_init; + + virt_b[idx + 0] = (void *) v_hdr[0]; + virt_b[idx + 1] = (void *) v_hdr[1]; + virt_b[idx + 2] = (void *) v_hdr[2]; + virt_b[idx + 3] = (void *) v_hdr[3]; + + clib_prefetch_store (oct_bp_to_virt (b[4], hdr_len)); + clib_prefetch_store (oct_bp_to_virt (b[5], hdr_len)); + clib_prefetch_store (oct_bp_to_virt (b[6], hdr_len)); + clib_prefetch_store (oct_bp_to_virt (b[7], hdr_len)); + + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + + v_hdr[0]->desc_data[1] = b[0]->current_length; + v_hdr[1]->desc_data[1] = b[1]->current_length; + v_hdr[2]->desc_data[1] = b[2]->current_length; + v_hdr[3]->desc_data[1] = b[3]->current_length; + + /* Number of bytes deviates (+/-) from vlib buffer current data */ + v_hdr[0]->desc_data[0] = ~b[0]->current_data + 1; + v_hdr[1]->desc_data[0] = ~b[1]->current_data + 1; + v_hdr[2]->desc_data[0] = ~b[2]->current_data + 1; + v_hdr[3]->desc_data[0] = ~b[3]->current_data + 1; + + v_hdr[0]->hdr.num_buffers = 1; + v_hdr[1]->hdr.num_buffers = 1; + v_hdr[2]->hdr.num_buffers = 1; + v_hdr[3]->hdr.num_buffers = 1; + + b += 4; + idx += 4; + nb_pkts -= 4; + } + + while (nb_pkts) + { + bp = b[0]; + head = oct_bp_to_virt (bp, hdr_len); + do + { + v_hdr[0] = oct_bp_to_virt (bp, hdr_len); + *v_hdr[0] = vhdr_init; + virt_b[idx] = (void *) v_hdr[0]; + v_hdr[0]->desc_data[1] = bp->current_length; + /* Number of bytes deviates (+/-) from vlib buffer current data */ + v_hdr[0]->desc_data[0] = ~bp->current_data + 1; + next_present = bp->flags & VLIB_BUFFER_NEXT_PRESENT; + v_hdr[0]->hdr.num_buffers = 1; + idx++; + nb_frags++; + } + while (next_present && (bp = vlib_get_buffer (vm, bp->next_buffer))); + + head->hdr.num_buffers = nb_frags; + b++; + nb_pkts--; + nb_frags = 0; + } + + queue = ptd[cpu_id].q_map[virtio_devid].last_tx_q; + nb_pkts_left = idx; + + while (tx_q_map && nb_pkts_left) + { + if (!(tx_q_map & DAO_BIT (queue))) + goto next; + + tx_q_map &= ~(DAO_BIT (queue)); + virt_q = queue << 1; + cur_sent = dao_virtio_net_enqueue_burst_ext ( + virtio_devid, virt_q, &virt_b[sent], nb_pkts_left); + nb_pkts_left -= cur_sent; + sent += cur_sent; + + next: + queue = queue + 1; + if (DAO_BIT (queue) > q_map) + queue = 0; + } + + ptd[cpu_id].q_map[virtio_devid].last_tx_q = queue; + + if (PREDICT_FALSE (nb_pkts_left)) + oct_virt_free_to_vlib (vm, node, &virt_b[sent], nb_pkts_left, hdr_len); + + /* Flush and submit DMA ops */ + dao_dma_flush_submit (); + + return sent; +} + +static_always_inline void +oct_virtio_trace_buffers (vlib_main_t *vm, vlib_node_runtime_t *node, + oct_virtio_port_t *ovp, vlib_buffer_t **b, + u16 n_pkts, u16 virtio_id) +{ + u32 i; + u64 tx_q_map; + u32 cpu_id = clib_get_current_cpu_id (); + oct_virtio_per_thread_data_t *ptd = oct_virt_thread_data; + + tx_q_map = ptd[cpu_id].q_map[virtio_id].qmap; + + for (i = 0; i < n_pkts; i++) + { + if (!(b[i]->flags & VLIB_BUFFER_IS_TRACED)) + continue; + oct_virt_tx_trace_t *t = vlib_add_trace (vm, node, b[i], sizeof (*t)); + t->virtio_id = virtio_id; + t->sw_if_index = vnet_buffer (b[i])->sw_if_index[VLIB_TX]; + t->tx_q_map = tx_q_map; + } +} + +VNET_DEV_NODE_FN (oct_virtio_tx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_tx_pkts; + u16 virtio_id; + oct_virtio_port_t *ovp; + u32 *from = vlib_frame_vector_args (frame); + u16 n_pkts = frame->n_vectors; + vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; + vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); + vnet_dev_tx_queue_t *txq = rt->tx_queue; + + if (!txq) + return 0; + + ovp = vnet_dev_get_port_data (txq->port); + virtio_id = ovp->virtio_id; + + vlib_get_buffers (vm, from, b, n_pkts); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + oct_virtio_trace_buffers (vm, node, ovp, b, n_pkts, virtio_id); + + n_tx_pkts = oct_virtio_enqueue (vm, node, b, n_pkts, virtio_id); + + return n_tx_pkts; +} diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c index ec0e8fb7ff..9d36a51e1b 100644 --- a/src/plugins/dpdk/main.c +++ b/src/plugins/dpdk/main.c @@ -94,7 +94,8 @@ VLIB_INIT_FUNCTION (dpdk_main_init) = }; VLIB_PLUGIN_REGISTER () = { - .version = VPP_BUILD_VER, - .description = "Data Plane Development Kit (DPDK)", + .version = VPP_BUILD_VER, + .description = "Data Plane Development Kit (DPDK)", + .default_disabled = 1, }; /* *INDENT-ON* */ diff --git a/src/plugins/flowprobe/node.c b/src/plugins/flowprobe/node.c index 194664962e..cf03449e13 100644 --- a/src/plugins/flowprobe/node.c +++ b/src/plugins/flowprobe/node.c @@ -105,6 +105,9 @@ vlib_node_registration_t flowprobe_input_l2_node; vlib_node_registration_t flowprobe_output_ip4_node; vlib_node_registration_t flowprobe_output_ip6_node; vlib_node_registration_t flowprobe_output_l2_node; +vlib_node_registration_t flowprobe_flush_ip4_node; +vlib_node_registration_t flowprobe_flush_ip6_node; +vlib_node_registration_t flowprobe_flush_l2_node; /* No counters at the moment */ #define foreach_flowprobe_error \ @@ -945,18 +948,57 @@ flush_record (flowprobe_variant_t which) void flowprobe_flush_callback_ip4 (void) { + vlib_main_t *worker_vm; + u32 i; + + /* Flush for each worker thread */ + for (i = 1; i < vlib_get_n_threads (); i++) + { + worker_vm = vlib_get_main_by_index (i); + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + flowprobe_flush_ip4_node.index); + } + + /* Flush for the main thread */ flush_record (FLOW_VARIANT_IP4); } void flowprobe_flush_callback_ip6 (void) { + vlib_main_t *worker_vm; + u32 i; + + /* Flush for each worker thread */ + for (i = 1; i < vlib_get_n_threads (); i++) + { + worker_vm = vlib_get_main_by_index (i); + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + flowprobe_flush_ip6_node.index); + } + + /* Flush for the main thread */ flush_record (FLOW_VARIANT_IP6); } void flowprobe_flush_callback_l2 (void) { + vlib_main_t *worker_vm; + u32 i; + + /* Flush for each worker thread */ + for (i = 1; i < vlib_get_n_threads (); i++) + { + worker_vm = vlib_get_main_by_index (i); + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + flowprobe_flush_l2_node.index); + } + + /* Flush for the main thread */ flush_record (FLOW_VARIANT_L2); flush_record (FLOW_VARIANT_L2_IP4); flush_record (FLOW_VARIANT_L2_IP6); @@ -1062,6 +1104,32 @@ flowprobe_walker_process (vlib_main_t * vm, return 0; } +static uword +flowprobe_flush_ip4 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) +{ + flush_record (FLOW_VARIANT_IP4); + + return 0; +} + +static uword +flowprobe_flush_ip6 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) +{ + flush_record (FLOW_VARIANT_IP6); + + return 0; +} + +static uword +flowprobe_flush_l2 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) +{ + flush_record (FLOW_VARIANT_L2); + flush_record (FLOW_VARIANT_L2_IP4); + flush_record (FLOW_VARIANT_L2_IP6); + + return 0; +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (flowprobe_input_ip4_node) = { .function = flowprobe_input_ip4_node_fn, @@ -1135,6 +1203,24 @@ VLIB_REGISTER_NODE (flowprobe_walker_node) = { .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_INTERRUPT, }; +VLIB_REGISTER_NODE (flowprobe_flush_ip4_node) = { + .function = flowprobe_flush_ip4, + .name = "flowprobe-flush-ip4", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; +VLIB_REGISTER_NODE (flowprobe_flush_ip6_node) = { + .function = flowprobe_flush_ip6, + .name = "flowprobe-flush-ip6", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; +VLIB_REGISTER_NODE (flowprobe_flush_l2_node) = { + .function = flowprobe_flush_l2, + .name = "flowprobe-flush-l2", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; /* *INDENT-ON* */ /* diff --git a/src/plugins/http_static_l4/CMakeLists.txt b/src/plugins/http_static_l4/CMakeLists.txt new file mode 100644 index 0000000000..845a849907 --- /dev/null +++ b/src/plugins/http_static_l4/CMakeLists.txt @@ -0,0 +1,26 @@ + +# Copyright (c) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(http_static_l4 + SOURCES + http_static.c + static_server.c + http_static.h + + API_FILES + http_static_l4.api + + API_TEST_SOURCES + http_static_test.c +) diff --git a/src/plugins/http_static_l4/FEATURE.yaml b/src/plugins/http_static_l4/FEATURE.yaml new file mode 100644 index 0000000000..b99f2419db --- /dev/null +++ b/src/plugins/http_static_l4/FEATURE.yaml @@ -0,0 +1,10 @@ +--- +name: Static http https l4 server +maintainer: Dave Barach +features: + - An extensible static http/https server with caching +description: "A simple caching static http / https server + A built-in vpp host stack application. + Supports HTTP GET and HTTP POST methods." +state: production +properties: [API, CLI, MULTITHREAD] diff --git a/src/plugins/http_static_l4/http_static.c b/src/plugins/http_static_l4/http_static.c new file mode 100644 index 0000000000..5a87af507e --- /dev/null +++ b/src/plugins/http_static_l4/http_static.c @@ -0,0 +1,84 @@ +/* + * http_static.c - skeleton vpp engine plug-in + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +/* define message IDs */ +#include +#include + +#include + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +#define REPLY_MSG_ID_BASE hmp->msg_id_base +#include + +http_static_l4_main_t http_static_l4_main; + +/* API message handler */ +static void +vl_api_http_static_l4_enable_t_handler (vl_api_http_static_l4_enable_t *mp) +{ + vl_api_http_static_l4_enable_reply_t *rmp; + http_static_l4_main_t *hmp = &http_static_l4_main; + int rv; + + mp->uri[ARRAY_LEN (mp->uri) - 1] = 0; + mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0; + + rv = http_static_l4_server_enable ( + ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), + ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, + mp->uri); + + REPLY_MACRO (VL_API_HTTP_STATIC_L4_ENABLE_REPLY); +} + +#include +static clib_error_t * +http_static_l4_init (vlib_main_t *vm) +{ + http_static_l4_main_t *hmp = &http_static_l4_main; + + hmp->vlib_main = vm; + hmp->vnet_main = vnet_get_main (); + + /* Ask for a correctly-sized block of API message decode slots */ + hmp->msg_id_base = setup_message_id_table (); + + return 0; +} + +VLIB_INIT_FUNCTION (http_static_l4_init); + +VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, + .description = "HTTP Static l4 Server" }; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/http_static_l4/http_static.h b/src/plugins/http_static_l4/http_static.h new file mode 100644 index 0000000000..5fe92b5325 --- /dev/null +++ b/src/plugins/http_static_l4/http_static.h @@ -0,0 +1,236 @@ + +/* + * http_static.h - skeleton vpp engine plug-in header file + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_http_static_l4_h__ +#define __included_http_static_l4_h__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/** @file http_static.h + * Static http server definitions + */ + +#define HTTP_STAT_LINE_LEN 64 +#define HTTP_PATH_LEN 10 + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} http_static_l4_main_t; + +extern http_static_l4_main_t http_static_l4_main; + +/** \brief Session States + */ + +typedef enum +{ + /** Session is closed */ + HTTP_STATE_CLOSED, + /** Session is established */ + HTTP_STATE_ESTABLISHED, + /** Session has more data to receive */ + HTTP_STATE_RECEIVE_MORE_DATA, + /** Session has sent an OK response */ + HTTP_STATE_OK_SENT, + /** Session has sent an HTML response */ + HTTP_STATE_SEND_MORE_DATA, + /** Session has sent file, closes the connection */ + HTTP_STATE_CLOSING, + /** Number of states */ + HTTP_STATE_N_STATES, +} http_session_state_t; + +typedef enum +{ + CALLED_FROM_RX, + CALLED_FROM_TX, + CALLED_FROM_TIMER, +} http_state_machine_called_from_t; + +typedef enum +{ + HTTP_BUILTIN_METHOD_GET = 0, + HTTP_BUILTIN_METHOD_POST, +} http_builtin_method_type_t; + +/** \brief Application session + */ +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** Base class instance variables */ +#define _(type, name) type name; + foreach_app_session_field +#undef _ + /** rx thread index */ + u32 thread_index; + /** rx buffer */ + u8 *rx_buf; + /** vpp session index, handle */ + u32 vpp_session_index; + u64 vpp_session_handle; + /** Timeout timer handle */ + u32 timer_handle; + /** Fully-resolved file path */ + u8 path[HTTP_PATH_LEN]; + /** File data, a vector */ + u8 *data; + /** Current data send offset */ + u32 data_offset; + /** Need to free data in detach_cache_entry */ + int free_data; + + /** File cache pool index */ + u32 cache_pool_index; + /** state machine called from... */ + http_state_machine_called_from_t called_from; +} http_session_t; + +/** \brief In-memory file data cache entry + */ +typedef struct +{ + /** Name of the file */ + u8 filename[HTTP_PATH_LEN]; + /** Contents of the file, as a u8 * vector */ + u8 *data; + /** Last time the cache entry was used */ + f64 last_used; + /** Cache LRU links */ + u32 next_index; + u32 prev_index; + /** Reference count, so we don't recycle while referenced */ + int inuse; +} file_data_cache_t; + +/** \brief Main data structure + */ + +typedef struct +{ + /** Per thread vector of session pools */ + http_session_t **sessions; + /** Per thread Session pool reader writer lock */ + clib_rwlock_t *thr_sessions_lock; + /** vpp session to http session index map */ + u32 **session_to_http_session; + + u8 ***rx_buf_pool; + /** Enable debug messages */ + int debug_level; + + /** vpp message/event queue */ + svm_msg_q_t **vpp_queue; + + /** Unified file data cache pool */ + file_data_cache_t *cache_pool; + /** Hash table which maps file name to file data */ + BVT (clib_bihash) name_to_data; + + /** Hash tables for built-in GET and POST handlers */ + uword *get_url_handlers; + uword *post_url_handlers; + + /** Current cache size */ + u64 cache_size; + /** Max cache size in bytes */ + u64 cache_limit; + /** Number of cache evictions */ + u64 cache_evictions; + + /** Cache LRU listheads */ + u32 first_index; + u32 last_index; + + /** root path to be served */ + u8 *www_root; + + /** filename for index.html redirect */ + char redirect_file_name[HTTP_PATH_LEN]; + + /** Server's event queue */ + svm_queue_t *vl_input_queue; + + /** API client handle */ + u32 my_client_index; + + /** Application index */ + u32 app_index; + + /** Process node index for event scheduling */ + u32 node_index; + + /** Cert and key pair for tls */ + u32 ckpair_index; + + /** Session cleanup timer wheel */ + tw_timer_wheel_2t_1w_2048sl_t tw; + clib_spinlock_t tw_lock; + + /** Time base, so we can generate browser cache control http spew */ + clib_timebase_t timebase; + + /** Number of preallocated fifos, usually 0 */ + u32 prealloc_fifos; + /** Private segment size, usually 0 */ + u64 private_segment_size; + /** Size of the allocated rx, tx fifos, roughly 8K or so */ + u32 fifo_size; + /** fifo dequeue threshold */ + u32 fifo_deq_thresh; + /** The bind URI, defaults to tcp://0.0.0.0/80 */ + u8 *uri; + /** Keep Connection Alive after file fetch is done, default=0 */ + u8 keepalive; + vlib_main_t *vlib_main; +} http_static_l4_server_main_t; + +extern http_static_l4_server_main_t http_static_l4_server_main; + +int http_static_l4_server_enable (u32 fifo_size, u32 cache_limit, + u32 prealloc_fifos, u32 private_segment_size, + u8 *www_root, u8 *uri); + +void http_static_l4_server_register_builtin_handler (void *fp, char *url, + int type); + +#endif /* __included_http_static_l4_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/http_static_l4/http_static_l4.api b/src/plugins/http_static_l4/http_static_l4.api new file mode 100644 index 0000000000..a43bb0079d --- /dev/null +++ b/src/plugins/http_static_l4/http_static_l4.api @@ -0,0 +1,35 @@ + +/** \file + This file defines static http l4 server control-plane API messages +*/ +option version = "2.1.0"; + +/** \brief Configure and enable the static http l4 server + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param fifo_size - size (in bytes) of the session FIFOs + @param cache_size_limit - size (in bytes) of the in-memory file data cache + @param prealloc_fifos - number of preallocated fifos (usually 0) + @param private_segment_size - fifo segment size (usually 0) + @param www_root - html root path + @param uri - bind URI, defaults to "tcp://0.0.0.0/80" +*/ + +autoreply define http_static_l4_enable { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + /* Typical options */ + u32 fifo_size; + u32 cache_size_limit; + /* Unusual options */ + u32 prealloc_fifos; + u32 private_segment_size; + + /* Root of the html path */ + string www_root[256]; + /* The bind URI */ + string uri[256]; +}; diff --git a/src/plugins/http_static_l4/http_static_test.c b/src/plugins/http_static_l4/http_static_test.c new file mode 100644 index 0000000000..7d784a2810 --- /dev/null +++ b/src/plugins/http_static_l4/http_static_test.c @@ -0,0 +1,135 @@ +/* + * http_static.c - skeleton vpp-api-test plug-in + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +uword unformat_sw_if_index (unformat_input_t *input, va_list *args); + +/* Declare message IDs */ +#include +#include + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} http_static_l4_test_main_t; + +http_static_l4_test_main_t http_static_l4_test_main; + +#define __plugin_msg_base http_static_l4_test_main.msg_id_base +#include + +static int +api_http_static_l4_enable (vat_main_t *vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_http_static_l4_enable_t *mp; + u64 tmp; + u8 *www_root = 0; + u8 *uri = 0; + u32 prealloc_fifos = 0; + u32 private_segment_size = 0; + u32 fifo_size = 8 << 10; + u32 cache_size_limit = 1 << 20; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "www-root %s", &www_root)) + ; + else if (unformat (line_input, "prealloc-fifos %d", &prealloc_fifos)) + ; + else if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &tmp)) + { + if (tmp >= 0x100000000ULL) + { + errmsg ("private segment size %llu, too large", tmp); + return -99; + } + private_segment_size = (u32) tmp; + } + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &tmp)) + { + if (tmp >= 0x100000000ULL) + { + errmsg ("fifo-size %llu, too large", tmp); + return -99; + } + fifo_size = (u32) tmp; + } + else if (unformat (line_input, "cache-size %U", unformat_memory_size, + &tmp)) + { + if (tmp < (128ULL << 10)) + { + errmsg ("cache-size must be at least 128kb"); + return -99; + } + cache_size_limit = (u32) tmp; + } + + else if (unformat (line_input, "uri %s", &uri)) + ; + else + { + errmsg ("unknown input `%U'", format_unformat_error, line_input); + return -99; + } + } + + if (www_root == 0) + { + errmsg ("Must specify www-root"); + return -99; + } + + if (uri == 0) + uri = format (0, "tcp://0.0.0.0/80%c", 0); + + /* Construct the API message */ + M (HTTP_STATIC_L4_ENABLE, mp); + strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); + strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); + mp->fifo_size = ntohl (fifo_size); + mp->cache_size_limit = ntohl (cache_size_limit); + mp->prealloc_fifos = ntohl (prealloc_fifos); + mp->private_segment_size = ntohl (private_segment_size); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +#include + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/http_static_l4/static_server.c b/src/plugins/http_static_l4/static_server.c new file mode 100644 index 0000000000..718e1d8216 --- /dev/null +++ b/src/plugins/http_static_l4/static_server.c @@ -0,0 +1,2072 @@ +/* + * Copyright (c) 2017-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define HTTP_CONNECT "Connection:" + +#define HTTP_CONN_LEN (sizeof (HTTP_CONNECT) - 1) +#define HTTP_CONN_KA "Keep-Alive" +#define HTTP_CONN_CL "Close" +#define HTTP_CONN_CT_L "Content-Length: " +#define HTTP_VER_1_0 "HTTP/1.0" + +#define HTTP_BODY_PREFIX "\r\n\r\n" +#define HTTP_201 "HTTP/1.1 201 OK\r\n" +#define HTTP_201_LEN (sizeof (HTTP_201) - 1) +#define HTTP_200 "HTTP/1.1 200 OK\r\n" +#define HTTP_200_LEN (sizeof (HTTP_200) - 1) + +#define IND_HTML "index.html" +#define IND_HTML_LEN (sizeof (IND_HTML) - 1) + +#define HTTP_SESSION_CLOSED INT_MAX + +/** @file static_server.c + * Static http server, sufficient to + * serve .html / .css / .js content. + */ +/*? %%clicmd:group_label Static HTTP Server %% ?*/ + +#define HTTP_FIFO_DEF_THRESH (64 << 10) + +http_static_l4_server_main_t http_static_l4_server_main; + +/** \brief Format the called-from enum + */ + +static u8 * +format_state_machine_called_from (u8 *s, va_list *args) +{ + http_state_machine_called_from_t cf = + va_arg (*args, http_state_machine_called_from_t); + char *which = "bogus!"; + + switch (cf) + { + case CALLED_FROM_RX: + which = "from rx"; + break; + case CALLED_FROM_TX: + which = "from tx"; + break; + case CALLED_FROM_TIMER: + which = "from timer"; + break; + + default: + break; + } + + s = format (s, "%s", which); + return s; +} + +/** \brief Acquire reader lock on the sessions pools + */ +static void +http_static_server_thr_sessions_reader_lock (u32 thread_index) +{ + clib_rwlock_reader_lock ( + &http_static_l4_server_main.thr_sessions_lock[thread_index]); +} + +/** \brief Drop reader lock on the sessions pools + */ +static void +http_static_server_thr_sessions_reader_unlock (u32 thread_index) +{ + clib_rwlock_reader_unlock ( + &http_static_l4_server_main.thr_sessions_lock[thread_index]); +} + +/** \brief Acquire writer lock on the sessions pools + */ +static void +http_static_server_thr_sessions_writer_lock (u32 thread_index) +{ + clib_rwlock_writer_lock ( + &http_static_l4_server_main.thr_sessions_lock[thread_index]); +} + +/** \brief Drop writer lock on the sessions pools + */ +static void +http_static_server_thr_sessions_writer_unlock (u32 thread_index) +{ + clib_rwlock_writer_unlock ( + &http_static_l4_server_main.thr_sessions_lock[thread_index]); +} + +static void +http_static_server_all_sessions_writer_unlock (void) +{ + u32 thr_index; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + /* Lock sessions of all threads */ + for (thr_index = 0; thr_index <= vec_len (hsm->thr_sessions_lock); + thr_index++) + http_static_server_thr_sessions_writer_unlock (thr_index); +} + +static void +http_static_server_all_sessions_writer_lock (void) +{ + u32 thr_index; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + /* Lock sessions of all threads */ + for (thr_index = 0; thr_index <= vec_len (hsm->thr_sessions_lock); + thr_index++) + http_static_server_thr_sessions_writer_lock (thr_index); +} + +static void +http_static_server_all_sessions_reader_unlock (void) +{ + u32 thr_index; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + /* Unlock sessions of all threads */ + for (thr_index = 0; thr_index <= vec_len (hsm->thr_sessions_lock); + thr_index++) + http_static_server_thr_sessions_reader_unlock (thr_index); +} + +static void +http_static_server_all_sessions_reader_lock (void) +{ + u32 thr_index; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + /* Lock sessions of all threads */ + for (thr_index = 0; thr_index <= vec_len (hsm->thr_sessions_lock); + thr_index++) + http_static_server_thr_sessions_reader_lock (thr_index); +} + +/** \brief Start a session cleanup timer + */ +static void +http_static_server_session_timer_start (http_session_t *hs) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u32 hs_handle; + + /* The session layer may fire a callback at a later date... */ + if (!pool_is_free (hsm->sessions[hs->thread_index], hs)) + { + hs_handle = hs->thread_index << 24 | hs->session_index; + clib_spinlock_lock (&http_static_l4_server_main.tw_lock); + hs->timer_handle = tw_timer_start_2t_1w_2048sl ( + &http_static_l4_server_main.tw, hs_handle, 0, 60); + clib_spinlock_unlock (&http_static_l4_server_main.tw_lock); + } +} + +/** \brief stop a session cleanup timer + */ +static void +http_static_server_session_timer_stop (http_session_t *hs) +{ + if (hs->timer_handle == ~0) + return; + clib_spinlock_lock (&http_static_l4_server_main.tw_lock); + tw_timer_stop_2t_1w_2048sl (&http_static_l4_server_main.tw, + hs->timer_handle); + clib_spinlock_unlock (&http_static_l4_server_main.tw_lock); +} + +/** \brief Allocate an http session + */ +static http_session_t * +http_static_server_session_alloc (u32 thread_index) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + http_session_t *hs = NULL; + + pool_get_aligned_zero (hsm->sessions[thread_index], hs, 0); + hs->session_index = hs - hsm->sessions[thread_index]; + hs->thread_index = thread_index; + hs->timer_handle = ~0; + hs->cache_pool_index = ~0; + return hs; +} + +/** \brief Get an http session by index + */ +static http_session_t * +http_static_server_session_get (u32 thread_index, u32 hs_index) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + if (pool_is_free_index (hsm->sessions[thread_index], hs_index)) + return 0; + return pool_elt_at_index (hsm->sessions[thread_index], hs_index); +} + +/** \brief Free an http session + */ +static void +http_static_server_session_free (http_session_t *hs) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + /* Make sure the timer is stopped... */ + http_static_server_session_timer_stop (hs); + pool_put (hsm->sessions[hs->thread_index], hs); + + if (CLIB_DEBUG) + { + u32 save_thread_index; + save_thread_index = hs->thread_index; + /* Poison the entry, preserve timer state and thread index */ + memset (hs, 0xfa, sizeof (*hs)); + hs->timer_handle = ~0; + hs->thread_index = save_thread_index; + } +} + +/** \brief add a session to the vpp < -- > http session index map + */ +static void +http_static_server_session_lookup_add (u32 thread_index, u32 s_index, + u32 hs_index) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + vec_validate (hsm->session_to_http_session[thread_index], s_index); + hsm->session_to_http_session[thread_index][s_index] = hs_index; +} + +/** \brief Remove a session from the vpp < -- > http session index map + */ +static void +http_static_server_session_lookup_del (u32 thread_index, u32 s_index) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + hsm->session_to_http_session[thread_index][s_index] = ~0; +} + +/** \brief lookup a session in the vpp < -- > http session index map + */ + +static http_session_t * +http_static_server_session_lookup (u32 thread_index, u32 s_index) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u32 hs_index; + + if (s_index < vec_len (hsm->session_to_http_session[thread_index])) + { + hs_index = hsm->session_to_http_session[thread_index][s_index]; + return http_static_server_session_get (thread_index, hs_index); + } + return 0; +} + +/** \brief Detach cache entry from session + */ + +static void +http_static_server_detach_cache_entry (http_session_t *hs) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + file_data_cache_t *ep; + + /* + * Decrement cache pool entry reference count + * Note that if e.g. a file lookup fails, the cache pool index + * won't be set + */ + if (hs->cache_pool_index != ~0) + { + ep = pool_elt_at_index (hsm->cache_pool, hs->cache_pool_index); + ep->inuse--; + if (hsm->debug_level > 1) + clib_warning ("index %d refcnt now %d", hs->cache_pool_index, + ep->inuse); + } + hs->cache_pool_index = ~0; + if (hs->free_data) + vec_free (hs->data); + hs->data = 0; + hs->data_offset = 0; + hs->free_data = 0; + hs->path[0] = 0; +} + +/** \brief Disconnect a session + */ +static void +http_static_server_session_disconnect (http_session_t *hs) +{ + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + a->handle = hs->vpp_session_handle; + a->app_index = http_static_l4_server_main.app_index; + vnet_disconnect_session (a); +} + +/** \brief http error boilerplate + */ +static const char *http_error_template = "HTTP/1.1 %s\r\n" + "Date: %U GMT\r\n" + "Content-Type: text/html\r\n" + "Connection: close\r\n" + "Pragma: no-cache\r\n" + "Content-Length: 0\r\n\r\n"; + +#define HTTP_RESPONSE_STR_MAX_SZ 512 +/** \brief http response boilerplate + */ +static const char *http_response_template = "Date: %s GMT\r\n" + "Expires: %s GMT\r\n" + "Server: VPP Static\r\n" + "Content-Type: %s\r\n" + "Content-Length: %d\r\n\r\n"; + +/** \brief receive http data + @param hs - http session + @return -1 failed, 0 for success, 1 partly successful. +*/ +static u32 +static_receive_data (http_session_t *hs) +{ + u32 max_dequeue; + int n_read; + + max_dequeue = svm_fifo_max_dequeue (hs->rx_fifo); + if (PREDICT_FALSE (max_dequeue == 0)) + { + return -1; + } + + n_read = app_recv_stream_raw (hs->rx_fifo, &hs->data[hs->data_offset], + max_dequeue, 0, 0 /* peek */); + hs->data_offset += n_read; + if (n_read != max_dequeue) + clib_warning ("WARNING: max_dequeue %d bytes while read only %d bytes", + max_dequeue, n_read); + if (!svm_fifo_is_empty (hs->rx_fifo)) + return 1; + + svm_fifo_unset_event (hs->rx_fifo); + return 0; +} + +/** \brief send http data + @param hs - http session + @param data - the data vector to transmit + @param length - length of data + @param offset - transmit offset for this operation + @return offset for next transmit operation, may be unchanged w/ full fifo +*/ + +static u32 +static_send_data (http_session_t *hs, u8 *data, u32 length, u32 offset) +{ + u32 bytes_to_send; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + bytes_to_send = length - offset; + + while (bytes_to_send > 0) + { + int actual_transfer; + + actual_transfer = svm_fifo_enqueue ( + hs->tx_fifo, clib_min (bytes_to_send, 4 << 20), data + offset); + + /* Made any progress? */ + if (actual_transfer <= 0) + { + if (hsm->debug_level > 0 && bytes_to_send > 0) + clib_warning ("WARNING: still %d bytes to send", bytes_to_send); + return offset; + } + else + { + offset += actual_transfer; + bytes_to_send -= actual_transfer; + + if (hsm->debug_level && bytes_to_send > 0) + clib_warning ("WARNING: still %d bytes to send", bytes_to_send); + + if (svm_fifo_set_event (hs->tx_fifo)) + session_send_io_evt_to_thread (hs->tx_fifo, + SESSION_IO_EVT_TX_FLUSH); + return offset; + } + } + /* NOTREACHED */ + return ~0; +} + +/** \brief Send an http error string + @param hs - the http session + @param str - the error string, e.g. "404 Not Found" +*/ +static void +send_error (http_session_t *hs, char *str) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u8 *data; + f64 now; + + now = clib_timebase_now (&hsm->timebase); + data = format (0, http_error_template, str, format_clib_timebase_time, now); + static_send_data (hs, data, vec_len (data), 0); + vec_free (data); +} + +/** \brief Retrieve data from the application layer + */ +static int +session_rx_request (http_session_t *hs) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u32 max_dequeue, cursize; + int n_read; + + max_dequeue = svm_fifo_max_dequeue (hs->rx_fifo); + if (PREDICT_FALSE (max_dequeue == 0)) + return -1; + + cursize = vec_len (hs->rx_buf); + if (vec_mem_size (hs->rx_buf) == 0) + vec_pop2 (hsm->rx_buf_pool[hs->thread_index], hs->rx_buf); + + vec_validate (hs->rx_buf, cursize + max_dequeue - 1); + n_read = app_recv_stream_raw (hs->rx_fifo, hs->rx_buf + cursize, max_dequeue, + 0, 0 /* peek */); + ASSERT (n_read == max_dequeue); + if (svm_fifo_is_empty (hs->rx_fifo)) + svm_fifo_unset_event (hs->rx_fifo); + + vec_set_len (hs->rx_buf, cursize + n_read); + return 0; +} + +/** \brief Sanity-check the forward and reverse LRU lists + */ +static inline void +lru_validate (http_static_l4_server_main_t *hsm) +{ +#if CLIB_DEBUG > 0 + f64 last_timestamp; + u32 index; + int i; + file_data_cache_t *ep; + + last_timestamp = 1e70; + for (i = 1, index = hsm->first_index; index != ~0;) + { + ep = pool_elt_at_index (hsm->cache_pool, index); + index = ep->next_index; + /* Timestamps should be smaller (older) as we walk the fwd list */ + if (ep->last_used > last_timestamp) + { + clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f", + ep - hsm->cache_pool, i, ep->last_used, + last_timestamp); + } + last_timestamp = ep->last_used; + i++; + } + + last_timestamp = 0.0; + for (i = 1, index = hsm->last_index; index != ~0;) + { + ep = pool_elt_at_index (hsm->cache_pool, index); + index = ep->prev_index; + /* Timestamps should be larger (newer) as we walk the rev list */ + if (ep->last_used < last_timestamp) + { + clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f", + ep - hsm->cache_pool, i, ep->last_used, + last_timestamp); + } + last_timestamp = ep->last_used; + i++; + } +#endif +} + +/** \brief Remove a data cache entry from the LRU lists + */ +static inline void +lru_remove (http_static_l4_server_main_t *hsm, file_data_cache_t *ep) +{ + file_data_cache_t *next_ep, *prev_ep; + u32 ep_index; + + lru_validate (hsm); + + ep_index = ep - hsm->cache_pool; + + /* Deal with list heads */ + if (ep_index == hsm->first_index) + hsm->first_index = ep->next_index; + if (ep_index == hsm->last_index) + hsm->last_index = ep->prev_index; + + /* Fix next->prev */ + if (ep->next_index != ~0) + { + next_ep = pool_elt_at_index (hsm->cache_pool, ep->next_index); + next_ep->prev_index = ep->prev_index; + } + /* Fix prev->next */ + if (ep->prev_index != ~0) + { + prev_ep = pool_elt_at_index (hsm->cache_pool, ep->prev_index); + prev_ep->next_index = ep->next_index; + } + lru_validate (hsm); +} + +/** \brief Add an entry to the LRU lists, tag w/ supplied timestamp + */ +#if 0 +static inline void +lru_add (http_static_l4_server_main_t *hsm, file_data_cache_t *ep, f64 now) +{ + file_data_cache_t *next_ep; + u32 ep_index; + + lru_validate (hsm); + + ep_index = ep - hsm->cache_pool; + + /* + * Re-add at the head of the forward LRU list, + * tail of the reverse LRU list + */ + if (hsm->first_index != ~0) + { + next_ep = pool_elt_at_index (hsm->cache_pool, hsm->first_index); + next_ep->prev_index = ep_index; + } + + ep->prev_index = ~0; + + /* ep now the new head of the LRU forward list */ + ep->next_index = hsm->first_index; + hsm->first_index = ep_index; + + /* single session case: also the tail of the reverse LRU list */ + if (hsm->last_index == ~0) + hsm->last_index = ep_index; + ep->last_used = now; + + lru_validate (hsm); +} +#endif +/** \brief Remove and re-add a cache entry from/to the LRU lists + */ +#if 0 +static inline void +lru_update (http_static_l4_server_main_t *hsm, file_data_cache_t *ep, f64 now) +{ + lru_remove (hsm, ep); + lru_add (hsm, ep, now); +} +#endif +/** \brief Session-layer (main) data rx callback. + Parse the http request, and reply to it. + Future extensions might include POST processing, active content, etc. +*/ + +/* svm_fifo_add_want_deq_ntf (tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL) +get shoulder-tap when transport dequeues something, set in +xmit routine. */ + +/** \brief closed state - should never really get here + */ +static int +state_closed (session_t *s, http_session_t *hs, + http_state_machine_called_from_t cf) +{ + clib_warning ("WARNING: http session %d, called from %U", hs->session_index, + format_state_machine_called_from, cf); + return -1; +} + +static void +close_session (http_session_t *hs) +{ + http_static_server_session_timer_stop (hs); + hs->timer_handle = ~0; + http_static_server_session_disconnect (hs); +} + +/** \brief Register a builtin GET or POST handler + */ +__clib_export void +http_static_l4_server_register_builtin_handler (void *fp, char *url, + int request_type) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + uword *p, *builtin_table; + + builtin_table = (request_type == HTTP_BUILTIN_METHOD_GET) ? + hsm->get_url_handlers : + hsm->post_url_handlers; + + p = hash_get_mem (builtin_table, url); + + if (p) + { + clib_warning ("WARNING: attempt to replace handler for %s '%s' ignored", + (request_type == HTTP_BUILTIN_METHOD_GET) ? "GET" : "POST", + url); + return; + } + + hash_set_mem (builtin_table, url, (uword) fp); + + /* + * Need to update the hash table pointer in http_static_l4_server_main + * in case we just expanded it... + */ + if (request_type == HTTP_BUILTIN_METHOD_GET) + hsm->get_url_handlers = builtin_table; + else + hsm->post_url_handlers = builtin_table; +} + +static int +v_find_index (u8 *vec, char *str) +{ + int start_index; + u32 slen = (u32) strnlen_s_inline (str, 16); + u32 vlen = vec_len (vec); + + ASSERT (slen > 0); + + if (vlen <= slen) + return -1; + + for (start_index = 0; start_index < (vlen - slen); start_index++) + { + if (!memcmp (&vec[start_index], str, slen)) + return start_index; + } + + return -1; +} + +/** \brief Same func as above (v_find_index) just case-insensitive. + */ +static int +v_find_index_insensitive (u8 *vec, char *str, u32 start_pos) +{ + int start_index; + u32 slen = (u32) strnlen_s_inline (str, 16); + u32 vlen = vec_len (vec); + + ASSERT (slen > 0); + + if (PREDICT_FALSE (vlen - start_pos <= slen)) + return -1; + + for (start_index = start_pos; start_index < (vlen - slen); start_index++) + { + if (!strncasecmp ((const char *) &vec[start_index], str, slen)) + return start_index; + } + + return -1; +} + +/* Support files in following format : N[NN]c c=B|K|M, i.e. 1B,22K,789M */ +static u32 +parse_fileformat (u8 *vec, int *digits) +{ + u32 n_bytes = 0; + char *endptr; + + /* Avoid vpp format() function, it uses spinlocks. */ + if (vec != NULL) + { + n_bytes = (u32) strtol ((char *) vec, &endptr, 10); + } + else + clib_warning ("vec is null"); + if (n_bytes == 0) + return 0; + + *digits = endptr - (char *) vec; + + switch (vec[*digits]) + { + case 'B': + break; + case 'K': + n_bytes <<= 10; + break; + case 'M': + n_bytes <<= 20; + break; + default: + n_bytes = 0; + } + return n_bytes; +} + +/** \brief established state - waiting for GET, POST, etc. + */ +static int +state_established (session_t *s, http_session_t *hs, + http_state_machine_called_from_t cf) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u8 *request = 0; + int i, c, p, rv; + u8 *nrp = NULL; /* new request pointer */ + u8 request_type = HTTP_BUILTIN_METHOD_GET; + int digits = 0; + u32 n_bytes = 0, num_lines, remainder; + u32 pcl = 0; /* Post Content Length */ + u32 pfc = 0; /* Post First Content */ + const char *line = + "001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016\n"; + u8 line_len = HTTP_STAT_LINE_LEN; + u8 *v = NULL; + u8 *key = NULL; + u8 *endptr; + + ASSERT (strlen (line) == HTTP_STAT_LINE_LEN); + + /* Read data from the sessison layer */ + rv = session_rx_request (hs); + + /* No data? Odd, but stay in this state and await further instructions */ + if (rv) + return 0; + + /* Process the client request */ + request = hs->rx_buf; + if (vec_len (request) < 8) + { + if (hsm->debug_level > 1) + clib_warning ("http request:%s too short", request); + send_error (hs, "400 Bad Request"); + close_session (hs); + return -1; + } + + if ((i = v_find_index (request, "GET ")) >= 0) + goto find_end; + else if ((i = v_find_index (request, "POST ")) >= 0) + { + request_type = HTTP_BUILTIN_METHOD_POST; + goto find_end; + } + + if (hsm->debug_level > 1) + clib_warning ("Unknown http method"); + + send_error (hs, "405 Method Not Allowed"); + close_session (hs); + return -1; + +find_end: + /* Lose "GET /" or "POST /" */ + vec_delete (request, + (request_type == HTTP_BUILTIN_METHOD_GET) ? + (i + sizeof ("GET /") - 1) : + (i + sizeof ("POST /") - 1), + 0); + /* Check the HTTP version, for keepalive enabling needs. */ + if (v_find_index (request, HTTP_VER_1_0) >= 0) + hsm->keepalive = 0; + else + hsm->keepalive = 1; + + /* The header connection request, define the respond */ + if ((c = v_find_index (request, HTTP_CONNECT)) >= 0) + { + if (v_find_index_insensitive (request, HTTP_CONN_KA, + c + HTTP_CONN_LEN) >= 0) + hsm->keepalive = 1; + else if (v_find_index_insensitive (request, HTTP_CONN_CL, + c + HTTP_CONN_LEN) >= 0) + hsm->keepalive = 0; + } + + /* Lose "GET /" or "POST /" */ + vec_delete (request, i + (sizeof ("GET /") - 1) + request_type, 0); + + /* Replace 'index.html' file with redirect_file_name + * Relies on sizeof(redirect_file_name) < "index.html" + */ + if (v_find_index (request, "index.html") == 0 && + vec_len (hsm->redirect_file_name)) + + if (request_type == HTTP_BUILTIN_METHOD_POST) + { + /* keep size of file by Content-Length for next rx queue msg read */ + if ((p = v_find_index_insensitive (request, HTTP_CONN_CT_L, + i + sizeof ("POST "))) >= 0) + { + nrp = &request[p + sizeof (HTTP_CONN_CT_L) - 1]; + pcl = (int) strtol ((char *) nrp, (char **) &endptr, 10); + digits = endptr - nrp; + if ((pcl == 0) || (digits > 8)) /* max support ~95MB. */ + { + clib_warning ("Requested file :%s failed length parsing)", + nrp); + close_session (hs); + return -1; + } + } + else + { + clib_warning ("POST length file:%s is invalid\n", nrp); + close_session (hs); + return -1; + } + if ((p = v_find_index_insensitive (nrp, HTTP_BODY_PREFIX, + (u32) (nrp - request))) < 0) + { + clib_warning ("POST body request was not found:%s is invalid\n", + request); + close_session (hs); + return -1; + } + else + { + nrp = nrp + p + sizeof (HTTP_BODY_PREFIX) - 1; + } + pfc = (vec_len (request) - (nrp - request)); + } + + /* find or read the file if we haven't done so yet. */ + if (hs->data == 0) + { + BVT (clib_bihash_kv) kv; + file_data_cache_t *dp; + u8 pl = v_find_index (request, " "); /* pl = path length */ + pl = clib_min (pl, sizeof (hs->path)); + + memset (hs->path, 0, sizeof (hs->path)); + strncpy ((char *) hs->path, (const char *) request, pl); + /* key must be vector for hash purpose */ + vec_resize (key, pl); + clib_memcpy_fast (key, hs->path, pl); + kv.key = (u64) key; + if (hsm->debug_level > 1) + clib_warning ("Using '%s' key for lookup table.\n", kv.key); + + if (hsm->debug_level > 1) + clib_warning ("hs->path:%s, Post Content Length:%d", hs->path, pcl); + + /* If the path name already exists in the cache, or not. */ + if (BV (clib_bihash_search) (&hsm->name_to_data, &kv, &kv) == 0) + { + if (hsm->debug_level > 1) + clib_warning ("lookup '%s' returned %lld", kv.key, kv.value); + + /* found the data.. */ + dp = pool_elt_at_index (hsm->cache_pool, kv.value); + hs->data = dp->data; + /* Update the cache entry, mark it in-use */ + hs->cache_pool_index = dp - hsm->cache_pool; + dp->inuse++; + if (hsm->debug_level > 1) + clib_warning ("index %d refcnt now %d", hs->cache_pool_index, + dp->inuse); + /* If POST request, need to put the replace info */ + if (request_type == HTTP_BUILTIN_METHOD_POST) + { + u32 min, max = 0; + min = clib_min (pcl, vec_bytes (dp->data)); + max = clib_max (pcl, vec_bytes (dp->data)); + vec_delete (dp->data, (u32) (max - min), min); + clib_memcpy_fast (dp->data, nrp, pfc); + /* check if all data has been copied or if there is more data to + * receive */ + if (pcl == pfc) + { + static_send_data (hs, (u8 *) "HTTP/1.1 201 OK\r\n", 17, 0); + hs->session_state = HTTP_STATE_OK_SENT; + } + else + { + hs->data_offset = pfc; + hs->session_state = HTTP_STATE_RECEIVE_MORE_DATA; + } + } + } + else + { + if (hsm->debug_level > 1) + clib_warning ("lookup '%s' failed", kv.key); + + /* if file not found in cache, GET request with format N[NN]C can be + * accepted. */ + if (request_type == HTTP_BUILTIN_METHOD_GET) + { + n_bytes = (int) strtol ((char *) request, (char **) &endptr, 10); + digits = endptr - request; + if ((n_bytes == 0) || (digits > 3)) + { + clib_warning ("Requested file:%s is invalid. valid format " + "is \"N[NN]C\", N{0-9} C{B|K|M}.\n", + request); + close_session (hs); + return -1; + } + + switch (request[digits]) + { + case 'B': + break; + case 'K': + n_bytes <<= 10; + break; + case 'M': + n_bytes <<= 20; + break; + default: + clib_warning ("Requested file:%s is invalid", request); + close_session (hs); + return -1; + } + + /* No recycling, fail if exceeding limit */ + if ((hsm->cache_size + n_bytes) > hsm->cache_limit) + { + clib_warning ("ERROR: cache-size:%llu + file-size:%u may " + "not exceed cache-limit:%llu", + hsm->cache_size, n_bytes, hsm->cache_limit); + close_session (hs); + return -1; + } + /* Read the "file" into memory, 64B lines */ + vec_resize (v, n_bytes); + num_lines = n_bytes / line_len; + remainder = n_bytes % line_len; + for (i = 0; i < num_lines; i++) + clib_memcpy_fast (v + i * line_len, line, strlen (line)); + clib_memcpy_fast (v + i * line_len, line, remainder); + } + else if (request_type == HTTP_BUILTIN_METHOD_POST) + { + /* pcl is the total content to be received by the POST request, + pfc is the current available data (not includes headers). */ + vec_resize (v, pcl); + clib_memcpy_fast (v, nrp, pfc); + if (pcl == pfc) + { + static_send_data (hs, (u8 *) "HTTP/1.1 201 OK\r\n", 17, 0); + hs->session_state = HTTP_STATE_OK_SENT; + } + else + { + hs->session_state = HTTP_STATE_RECEIVE_MORE_DATA; + } + } + + hs->data = v; + + /* Create a cache entry for it */ + pool_get (hsm->cache_pool, dp); + memset (dp, 0, sizeof (*dp)); + strcpy ((char *) dp->filename, (char *) hs->path); + dp->data = hs->data; + hs->cache_pool_index = dp - hsm->cache_pool; + dp->inuse++; + if (hsm->debug_level > 1) + clib_warning ("index %d refcnt now %d", hs->cache_pool_index, + dp->inuse); + /* clib_bihash_kv_vec8_8_t compares vecs, so in current code 'key' + * must be a vec. Creating a vec here does not effect performance, + * since this is the cache-miss slowpath. + */ + kv.value = dp - hsm->cache_pool; + /* Add to the lookup table */ + if (hsm->debug_level > 1) + clib_warning ("add '%s' value %lld", kv.key, kv.value); + + if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv, + 1 /* is_add */) < 0) + { + clib_warning ("BUG: add failed!"); + } + hsm->cache_size += vec_len (dp->data); + } + if (hs->session_state == HTTP_STATE_OK_SENT) + { + hs->data_offset = 0; + } + else if (hs->session_state == HTTP_STATE_RECEIVE_MORE_DATA) + { + hs->data_offset = pfc; + } + } + /* Keep vec-len-reset, because it was done in original code. */ + vec_reset_length (hs->rx_buf); + if (hs->session_state == HTTP_STATE_ESTABLISHED || + hs->session_state == HTTP_STATE_OK_SENT) + { + hs->session_state = HTTP_STATE_OK_SENT; + /* send 200 OK first */ + static_send_data (hs, (u8 *) "HTTP/1.1 200 OK\r\n", 17, 0); + } + + return 1; +} + +/** \brief receive more data state - aggregate more data from Rx fifo into the + * allocated data cache. In this state we manage data receive progress. In case + * no more data has been received after a while, we reattach the data cache + * page. To receive more data we will use the static_receive_data function. + * */ +static int +state_receive_more_data (session_t *s, http_session_t *hs, + http_state_machine_called_from_t cf) +{ + int rv; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + /* Continue receives data */ + rv = static_receive_data (hs); + if (rv != 0) + { + return rv; + } + if (hs->data_offset < vec_len (hs->data)) + { + /* No: ask for a shoulder-tap when the rx fifo has more data */ + svm_fifo_add_want_deq_ntf (hs->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + hs->session_state = HTTP_STATE_RECEIVE_MORE_DATA; + return 0; + } + if (hs->data_offset == vec_len (hs->data)) + { + /* send 201 OK first */ + static_send_data (hs, (u8 *) "HTTP/1.1 201 OK\r\n", 17, 0); + hs->session_state = HTTP_STATE_OK_SENT; + } + + /* Let go of the file cache entry */ + http_static_server_detach_cache_entry (hs); + + /* Finished with this receive, move to CLOSING (no keepalive), + * or back to ESTABLISHED (keepalive). */ + if (hsm->keepalive) + hs->session_state = HTTP_STATE_ESTABLISHED; + else + { + svm_fifo_add_want_deq_ntf (hs->tx_fifo, + SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); + hs->session_state = HTTP_STATE_CLOSING; + } + + return 0; +} + +static int +state_send_more_data (session_t *s, http_session_t *hs, + http_state_machine_called_from_t cf) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + /* Start sending data */ + hs->data_offset = + static_send_data (hs, hs->data, vec_len (hs->data), hs->data_offset); + + /* Did we finish? */ + if (hs->data_offset < vec_len (hs->data)) + { + /* No: ask for a shoulder-tap when the tx fifo has space */ + svm_fifo_add_want_deq_ntf (hs->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + hs->session_state = HTTP_STATE_SEND_MORE_DATA; + return 0; + } + /* Finished with this transaction, move to CLOSING (no keepalive), + * or back to ESTABLISHED (keepalive). + */ + + /* Let go of the file cache entry */ + http_static_server_detach_cache_entry (hs); + if (hsm->keepalive) + hs->session_state = HTTP_STATE_ESTABLISHED; + else + { + svm_fifo_add_want_deq_ntf (hs->tx_fifo, + SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); + hs->session_state = HTTP_STATE_CLOSING; + } + + return 0; +} + +static int +state_sent_ok (session_t *s, http_session_t *hs, + http_state_machine_called_from_t cf) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + char *http_type; + u8 http_response[HTTP_RESPONSE_STR_MAX_SZ]; + f64 now; + u32 offset; + u8 date[CLIB_TIMEBASE_STR_MAX_SZ], expire[CLIB_TIMEBASE_STR_MAX_SZ]; + + http_type = "text/html"; + + if (hs->data == 0) + { + clib_warning ("BUG: hs->data not set for session %d", hs->session_index); + close_session (hs); + return 0; + } + + /* + * Send an http response, which needs the current time, + * the expiration time, and the data length + */ + now = clib_timebase_now (&hsm->timebase); + sprintf_clib_timebase_time (date, now); + sprintf_clib_timebase_time (expire, now + 600.0); + + sprintf ((char *) http_response, http_response_template, (char *) date, + (char *) expire, http_type, vec_len (hs->data)); + offset = + static_send_data (hs, http_response, strlen ((char *) http_response), 0); + if (offset != strlen ((char *) http_response)) + { + clib_warning ("BUG: couldn't send response header!"); + close_session (hs); + return 0; + } + + /* Send data from the beginning... */ + hs->data_offset = 0; + hs->session_state = HTTP_STATE_SEND_MORE_DATA; + return 1; +} + +static int +state_closing (session_t *s, http_session_t *hs, + http_state_machine_called_from_t cf) +{ + close_session (hs); + return HTTP_SESSION_CLOSED; +} + +static void *state_funcs[HTTP_STATE_N_STATES] = { + state_closed, + /* Waiting for GET, POST, etc. */ + state_established, + /* Received more data */ + state_receive_more_data, + /* Sent OK */ + state_sent_ok, + /* Send more data */ + state_send_more_data, + /* Sent all data, closing connection */ + state_closing +}; + +static inline int +http_static_server_rx_tx_callback (session_t *s, + http_state_machine_called_from_t cf) +{ + http_session_t *hs; + int (*fp) (session_t *, http_session_t *, http_state_machine_called_from_t); + int rv; + + /* Acquire a reader lock on the session table */ + http_static_server_thr_sessions_reader_lock (s->thread_index); + hs = http_static_server_session_lookup (s->thread_index, s->session_index); + + if (!hs) + { + clib_warning ("No http session for thread %d session_index %d", + s->thread_index, s->session_index); + http_static_server_thr_sessions_reader_unlock (s->thread_index); + return 0; + } + + /* Execute state machine for this session */ + do + { + fp = state_funcs[hs->session_state]; + rv = (*fp) (s, hs, cf); + if (rv < 0 || rv == HTTP_SESSION_CLOSED) + goto session_closed; + } + while (rv); + + /* Reset the session expiration timer */ + http_static_server_session_timer_stop (hs); + http_static_server_session_timer_start (hs); + +session_closed: + http_static_server_thr_sessions_reader_unlock (s->thread_index); + return 0; +} + +static int +http_static_server_rx_callback (session_t *s) +{ + return http_static_server_rx_tx_callback (s, CALLED_FROM_RX); +} + +static int +http_static_server_tx_callback (session_t *s) +{ + return http_static_server_rx_tx_callback (s, CALLED_FROM_TX); +} + +/** \brief Session accept callback + */ + +static int +http_static_server_session_accept_callback (session_t *s) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + http_session_t *hs; + u32 thresh; + + hsm->vpp_queue[s->thread_index] = + session_main_get_vpp_event_queue (s->thread_index); + + http_static_server_thr_sessions_writer_lock (s->thread_index); + + hs = http_static_server_session_alloc (s->thread_index); + http_static_server_session_lookup_add (s->thread_index, s->session_index, + hs->session_index); + hs->rx_fifo = s->rx_fifo; + hs->tx_fifo = s->tx_fifo; + hs->vpp_session_index = s->session_index; + hs->vpp_session_handle = session_handle (s); + hs->session_state = HTTP_STATE_ESTABLISHED; + http_static_server_session_timer_start (hs); + + http_static_server_thr_sessions_writer_unlock (s->thread_index); + + /* The application sets a threshold for it's fifo to get notified when + * additional data can be enqueued. We want to keep the TX fifo reasonably + * full, however avoid entering a state where the fifo is full all the time + * and small chunks of data are being enqueued each time. + * If the fifo and threshold use the same size, this means that a + * notification will be given when the fifo empties. + */ + thresh = svm_fifo_size (hs->tx_fifo) - hsm->fifo_deq_thresh; + svm_fifo_set_deq_thresh (hs->tx_fifo, thresh); + + s->session_state = SESSION_STATE_READY; + return 0; +} + +/** \brief Session disconnect callback + */ + +static void +http_static_server_session_disconnect_callback (session_t *s) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + + a->handle = session_handle (s); + a->app_index = hsm->app_index; + vnet_disconnect_session (a); +} + +/** \brief Session reset callback + */ + +static void +http_static_server_session_reset_callback (session_t *s) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + + a->handle = session_handle (s); + a->app_index = hsm->app_index; + vnet_disconnect_session (a); +} + +static int +http_static_server_session_connected_callback (u32 app_index, u32 api_context, + session_t *s, + session_error_t err) +{ + clib_warning ("called..."); + return -1; +} + +static int +http_static_server_add_segment_callback (u32 client_index, u64 segment_handle) +{ + return 0; +} + +static void +http_static_session_cleanup (session_t *s, session_cleanup_ntf_t ntf) +{ + http_session_t *hs; + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + if (ntf == SESSION_CLEANUP_TRANSPORT) + return; + + http_static_server_thr_sessions_writer_lock (s->thread_index); + + hs = http_static_server_session_lookup (s->thread_index, s->session_index); + if (!hs) + goto done; + + http_static_server_detach_cache_entry (hs); + http_static_server_session_lookup_del (hs->thread_index, + hs->vpp_session_index); + + vec_reset_length (hs->rx_buf); + vec_add1 (hsm->rx_buf_pool[hs->thread_index], hs->rx_buf); + http_static_server_session_free (hs); + +done: + http_static_server_thr_sessions_writer_unlock (s->thread_index); +} + +/** \brief Session-layer virtual function table + */ +static session_cb_vft_t http_static_server_session_cb_vft = { + .session_accept_callback = http_static_server_session_accept_callback, + .session_disconnect_callback = + http_static_server_session_disconnect_callback, + .session_connected_callback = http_static_server_session_connected_callback, + .add_segment_callback = http_static_server_add_segment_callback, + .builtin_app_rx_callback = http_static_server_rx_callback, + .builtin_app_tx_callback = http_static_server_tx_callback, + .session_reset_callback = http_static_server_session_reset_callback, + .session_cleanup_callback = http_static_session_cleanup, +}; + +static int +http_static_server_attach () +{ + vnet_app_add_cert_key_pair_args_t *ck_pair = + vnet_app_tls_get_test_srv_key_pair (); + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u64 options[APP_OPTIONS_N_OPTIONS]; + vnet_app_attach_args_t _a, *a = &_a; + u64 segment_size = 128 << 20; + + clib_memset (a, 0, sizeof (*a)); + clib_memset (options, 0, sizeof (options)); + + if (hsm->private_segment_size) + segment_size = hsm->private_segment_size; + + a->api_client_index = ~0; + a->name = format (0, "test_http_static_server"); + a->session_cb_vft = &http_static_server_session_cb_vft; + a->options = options; + a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_RX_FIFO_SIZE] = + hsm->fifo_size ? hsm->fifo_size : 8 << 10; + a->options[APP_OPTIONS_TX_FIFO_SIZE] = + hsm->fifo_size ? hsm->fifo_size : 32 << 10; + a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hsm->prealloc_fifos; + a->options[APP_OPTIONS_TLS_ENGINE] = CRYPTO_ENGINE_OPENSSL; + + if (vnet_application_attach (a)) + { + vec_free (a->name); + clib_warning ("failed to attach server"); + return -1; + } + vec_free (a->name); + hsm->app_index = a->app_index; + + vnet_app_add_cert_key_pair (ck_pair); + hsm->ckpair_index = ck_pair->index; + + return 0; +} + +static int +http_static_transport_needs_crypto (transport_proto_t proto) +{ + return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS || + proto == TRANSPORT_PROTO_QUIC; +} + +static int +http_static_server_listen () +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; + vnet_listen_args_t _a, *a = &_a; + char *uri = "tcp://0.0.0.0/80"; + int rv; + + clib_memset (a, 0, sizeof (*a)); + a->app_index = hsm->app_index; + + if (hsm->uri) + uri = (char *) hsm->uri; + + if (parse_uri (uri, &sep)) + return -1; + + clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + if (http_static_transport_needs_crypto (a->sep_ext.transport_proto)) + { + session_endpoint_alloc_ext_cfg (&a->sep_ext, + TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + a->sep_ext.ext_cfg->crypto.ckpair_index = hsm->ckpair_index; + } + + rv = vnet_listen (a); + if (a->sep_ext.ext_cfg) + clib_mem_free (a->sep_ext.ext_cfg); + return rv; +} + +static void +http_static_server_session_close_cb (void *hs_handlep) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + http_session_t *hs; + uword hs_handle; + hs_handle = pointer_to_uword (hs_handlep); + hs = + http_static_server_session_get (hs_handle >> 24, hs_handle & 0x00FFFFFF); + + if (hsm->debug_level > 1) + clib_warning ("terminate thread %d index %d hs %llx", hs_handle >> 24, + hs_handle & 0x00FFFFFF, hs); + if (!hs) + return; + hs->timer_handle = ~0; + http_static_server_session_disconnect (hs); +} + +/** \brief Expired session timer-wheel callback + */ +static void +http_expired_timers_dispatch (u32 *expired_timers) +{ + u32 hs_handle; + int i; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session handle. The first bit is the timer id */ + hs_handle = expired_timers[i] & 0x7FFFFFFF; + session_send_rpc_evt_to_thread (hs_handle >> 24, + http_static_server_session_close_cb, + uword_to_pointer (hs_handle, void *)); + } +} + +/** \brief Timer-wheel expiration process + */ +static uword +http_static_server_process (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + f64 now, timeout = 1.0; + uword *event_data = 0; + uword __clib_unused event_type; + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + now = vlib_time_now (vm); + event_type = vlib_process_get_events (vm, (uword **) &event_data); + + /* expire timers */ + clib_spinlock_lock (&http_static_l4_server_main.tw_lock); + tw_timer_expire_timers_2t_1w_2048sl (&hsm->tw, now); + clib_spinlock_unlock (&http_static_l4_server_main.tw_lock); + + vec_reset_length (event_data); + } + return 0; +} + +VLIB_REGISTER_NODE (http_static_server_process_node) = { + .function = http_static_server_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "static-http-server-process", + .state = VLIB_NODE_STATE_DISABLED, +}; + +static int +http_static_server_create (vlib_main_t *vm) +{ + vlib_thread_main_t *vtm = vlib_get_thread_main (); + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + u32 num_threads, i; + vlib_node_t *n; + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (hsm->vpp_queue, num_threads - 1); + vec_validate (hsm->sessions, num_threads - 1); + vec_validate (hsm->session_to_http_session, num_threads - 1); + vec_validate (hsm->thr_sessions_lock, num_threads - 1); + vec_validate (hsm->rx_buf_pool, num_threads - 1); + + for (i = 0; i <= vec_len (hsm->thr_sessions_lock); i++) + clib_rwlock_init (&hsm->thr_sessions_lock[i]); + + clib_spinlock_init (&hsm->tw_lock); + + if (http_static_server_attach ()) + { + clib_warning ("failed to attach server"); + return -1; + } + if (http_static_server_listen ()) + { + clib_warning ("failed to start listening"); + return -1; + } + + /* Init path-to-cache hash table */ + BV (clib_bihash_init) (&hsm->name_to_data, "http cache", 128, 32 << 20); + + hsm->get_url_handlers = hash_create_string (0, sizeof (uword)); + hsm->post_url_handlers = hash_create_string (0, sizeof (uword)); + + /* Init timer wheel and process */ + tw_timer_wheel_init_2t_1w_2048sl (&hsm->tw, http_expired_timers_dispatch, + 1.0 /* timer interval */, ~0); + vlib_node_set_state (vm, http_static_server_process_node.index, + VLIB_NODE_STATE_POLLING); + n = vlib_get_node (vm, http_static_server_process_node.index); + vlib_start_process (vm, n->runtime_index); + + return 0; +} + +/** \brief API helper function for vl_api_http_static_enable_t messages + */ +int +http_static_l4_server_enable (u32 fifo_size, u32 cache_limit, + u32 prealloc_fifos, u32 private_segment_size, + u8 *www_root, u8 *uri) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + int rv; + + hsm->fifo_size = fifo_size; + hsm->cache_limit = cache_limit; + hsm->prealloc_fifos = prealloc_fifos; + hsm->private_segment_size = private_segment_size; + hsm->www_root = format (0, "%s%c", www_root, 0); + hsm->uri = format (0, "%s%c", uri, 0); + + if (vec_len (hsm->www_root) < 2) + return VNET_API_ERROR_INVALID_VALUE; + + if (hsm->my_client_index != ~0) + return VNET_API_ERROR_APP_ALREADY_ATTACHED; + + vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */); + + rv = http_static_server_create (hsm->vlib_main); + switch (rv) + { + case 0: + break; + default: + vec_free (hsm->www_root); + vec_free (hsm->uri); + return VNET_API_ERROR_INIT_FAILED; + } + return 0; +} + +static clib_error_t * +http_static_l4_server_create_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + unformat_input_t _line_input, *line_input = &_line_input; + u64 seg_size; + u64 fifo_size; + u64 fifo_deq_thresh; + u8 *www_root = 0; + int rv; + + hsm->prealloc_fifos = 0; + hsm->private_segment_size = 0; + hsm->fifo_size = 0; + hsm->fifo_deq_thresh = HTTP_FIFO_DEF_THRESH; + /* 10mb cache limit, before LRU occurs */ + hsm->cache_limit = 10 << 20; + + clib_warning ("Starting http_static_l4_server ..."); + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + goto no_wwwroot; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "www-root %s", &www_root)) + ; + else if (unformat (line_input, "prealloc-fifos %d", + &hsm->prealloc_fifos)) + ; + else if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &seg_size)) + hsm->private_segment_size = seg_size; + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &fifo_size)) + { + if (fifo_size > UINT_MAX) + return clib_error_return (0, "fifo-size can't be over 4gb"); + hsm->fifo_size = + (fifo_size < 1024) ? fifo_size << 10 : (u32) fifo_size; + vlib_cli_output (vm, "fifo-size set to :%u", hsm->fifo_size); + } + else if (unformat (line_input, "fifo-deq-thresh %U", + unformat_memory_size, &fifo_deq_thresh)) + { + hsm->fifo_deq_thresh = (fifo_deq_thresh < 1024) ? + fifo_deq_thresh << 10 : + (u32) fifo_deq_thresh; + vlib_cli_output (vm, "fifo-deq-thresh set to :%u", + hsm->fifo_deq_thresh); + } + else if (unformat (line_input, "cache-size %U", unformat_memory_size, + &hsm->cache_limit)) + { + if (hsm->cache_limit < (128 << 10)) + { + return clib_error_return (0, + "cache-size must be at least 128kb"); + } + } + + else if (unformat (line_input, "uri %s", &hsm->uri)) + ; + else if (unformat (line_input, "debug %d", &hsm->debug_level)) + ; + else if (unformat (line_input, "debug")) + hsm->debug_level = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + unformat_free (line_input); + + if (hsm->fifo_deq_thresh > hsm->fifo_size) + { + return clib_error_return ( + 0, "fifo-deq-thresh:%u can be bigger than fifo-size:%d", + hsm->fifo_deq_thresh, hsm->fifo_size); + } + + if (www_root == 0) + { + no_wwwroot: + return clib_error_return (0, "Must specify www-root "); + } + + if (hsm->my_client_index != (u32) ~0) + { + vec_free (www_root); + return clib_error_return (0, "http server already running..."); + } + + hsm->www_root = www_root; + + vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */); + + rv = http_static_server_create (vm); + switch (rv) + { + case 0: + break; + default: + vec_free (hsm->www_root); + return clib_error_return (0, "server_create returned %d", rv); + } + return 0; +} + +/*? + * Enable the static http server + * + * @cliexpar + * This command enables the static http server. Only the www-root + * parameter is required + * @clistart + * http static server www-root /tmp/www uri tcp://0.0.0.0/80 cache-size 2m + * @cliend + * @cliexcmd{http static server www-root [prealloc-fios ] + * [private-segment-size ] [fifo-size ] [uri ]} +?*/ +VLIB_CLI_COMMAND (http_static_l4_server_create_command, static) = { + .path = "http static l4 server", + .short_help = + "http static l4 server www-root [prealloc-fifos ]\n" + "[private-segment-size ] [fifo-size ] [uri ]\n" + "[debug [nn]]\n", + .function = http_static_l4_server_create_command_fn, +}; + +/** \brief format a file cache entry + */ +u8 * +format_hsm_l4_cache_entry (u8 *s, va_list *args) +{ + file_data_cache_t *ep = va_arg (*args, file_data_cache_t *); + f64 now = va_arg (*args, f64); + + /* Header */ + if (ep == 0) + { + s = format (s, "%40s%12s%20s", "File", "Size", "Age"); + return s; + } + s = format (s, "%40s%12lld%20.2f", ep->filename, vec_len (ep->data), + now - ep->last_used); + return s; +} + +u8 * +format_http_l4_session_state (u8 *s, va_list *args) +{ + http_session_state_t state = va_arg (*args, http_session_state_t); + char *state_string = "bogus!"; + + switch (state) + { + case HTTP_STATE_CLOSED: + state_string = "closed"; + break; + case HTTP_STATE_ESTABLISHED: + state_string = "established"; + break; + case HTTP_STATE_RECEIVE_MORE_DATA: + state_string = "received"; + break; + case HTTP_STATE_OK_SENT: + state_string = "ok sent"; + break; + case HTTP_STATE_SEND_MORE_DATA: + state_string = "send more data"; + break; + case HTTP_STATE_CLOSING: + state_string = "closing"; + break; + default: + break; + } + + return format (s, "%s", state_string); +} + +u8 * +format_http_l4_session (u8 *s, va_list *args) +{ + http_session_t *hs = va_arg (*args, http_session_t *); + int verbose = va_arg (*args, int); + + s = format (s, "[%d]: state %U", hs->session_index, + format_http_l4_session_state, hs->session_state); + if (verbose > 0) + { + s = format (s, "\n path %s, data length %u, data_offset %u", hs->path, + vec_len (hs->data), hs->data_offset); + } + return s; +} + +static clib_error_t * +http_show_static_l4_server_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + file_data_cache_t *ep, **entries = 0; + int verbose = 0; + int show_cache = 0; + int show_sessions = 0; + u32 index; + f64 now; + + if (hsm->www_root == 0) + return clib_error_return (0, "Static server disabled"); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "cache")) + show_cache = 1; + else if (unformat (input, "sessions")) + show_sessions = 1; + else + break; + } + + if ((show_cache + show_sessions) == 0) + return clib_error_return (0, "specify one or more of cache, sessions"); + + if (show_cache) + { + if (verbose == 0) + { + vlib_cli_output ( + vm, + "www_root %s, cache size %lld bytes, limit %lld bytes, " + "evictions %lld", + hsm->www_root, hsm->cache_size, hsm->cache_limit, + hsm->cache_evictions); + return 0; + } + + now = vlib_time_now (vm); + + vlib_cli_output (vm, "%U", format_hsm_l4_cache_entry, 0 /* header */, + now); + + for (index = hsm->first_index; index != ~0;) + { + ep = pool_elt_at_index (hsm->cache_pool, index); + index = ep->next_index; + vlib_cli_output (vm, "%U", format_hsm_l4_cache_entry, ep, now); + } + + vlib_cli_output (vm, "%40s%12lld", "Total Size", hsm->cache_size); + + vec_free (entries); + } + + if (show_sessions) + { + u32 *session_indices = 0; + http_session_t *hs; + int i, j; + + /* Lock for all threads */ + http_static_server_all_sessions_reader_lock (); + + for (i = 0; i < vec_len (hsm->sessions); i++) + { + pool_foreach (hs, hsm->sessions[i]) + { + vec_add1 (session_indices, hs - hsm->sessions[i]); + } + + for (j = 0; j < vec_len (session_indices); j++) + { + vlib_cli_output ( + vm, "%U", format_http_l4_session, + pool_elt_at_index (hsm->sessions[i], session_indices[j]), + verbose); + } + vec_reset_length (session_indices); + } + http_static_server_all_sessions_reader_unlock (); + vec_free (session_indices); + } + return 0; +} + +/*? + * Display static http server l4 cache statistics + * + * @cliexpar + * This command shows the contents of the static http l4 server cache + * @clistart + * show http static server + * @cliend + * @cliexcmd{show http static l4 server sessions cache [verbose [nn]]} +?*/ +VLIB_CLI_COMMAND (http_show_static_l4_server_command, static) = { + .path = "show http static l4 server", + .short_help = "show http static l4 server sessions cache [verbose []]", + .function = http_show_static_l4_server_command_fn, +}; + +static clib_error_t * +http_clear_static_cache_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + file_data_cache_t *dp; + u32 free_index; + u32 busy_items = 0; + BVT (clib_bihash_kv) kv; + + if (hsm->www_root == 0) + return clib_error_return (0, "Static server disabled"); + + /* Lock for all threads */ + http_static_server_all_sessions_reader_lock (); + + /* Walk the LRU list to find active entries */ + free_index = hsm->last_index; + while (free_index != ~0) + { + dp = pool_elt_at_index (hsm->cache_pool, free_index); + free_index = dp->prev_index; + /* Which could be in use... */ + if (dp->inuse) + { + busy_items++; + free_index = dp->next_index; + continue; + } + kv.key = (u64) (dp->filename); + kv.value = ~0ULL; + if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv, 0 /* is_add */) < + 0) + { + clib_warning ("BUG: cache clear delete '%s' FAILED!", dp->filename); + } + + lru_remove (hsm, dp); + hsm->cache_size -= vec_len (dp->data); + hsm->cache_evictions++; + dp->filename[0] = 0; + vec_free (dp->data); + if (hsm->debug_level > 1) + clib_warning ("pool put index %d", dp - hsm->cache_pool); + pool_put (hsm->cache_pool, dp); + free_index = hsm->last_index; + } + http_static_server_all_sessions_reader_unlock (); + if (busy_items > 0) + vlib_cli_output (vm, "Note: %d busy items still in cache...", busy_items); + else + vlib_cli_output (vm, "Cache cleared..."); + return 0; +} + +/*? + * Clear the static http l4 server cache, to force the server to + * reload content from backing files + * + * @cliexpar + * This command clear the static http server cache + * @clistart + * clear http static cache + * @cliend + * @cliexcmd{clear http static l4 cache} +?*/ +VLIB_CLI_COMMAND (clear_http_static_l4_cache_command, static) = { + .path = "clear http static l4 cache", + .short_help = "clear http static l4 cache", + .function = http_clear_static_cache_command_fn, +}; + +static clib_error_t * +http_set_static_l4_redirect_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + unformat_input_t _line_input, *line_input = &_line_input; + u8 *redirect_file = NULL; + int digits = 0; + u32 n_bytes, num_m_args = 0; + + if (hsm->www_root == 0) + return clib_error_return (0, "Static server disabled"); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "file %s", &redirect_file)) + num_m_args++; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + if (num_m_args != 1) + return clib_error_return (0, "Too many params"); + + if (redirect_file == NULL) + return clib_error_return (0, "No file name", redirect_file); + else + clib_warning ("File Name : %s", redirect_file); + + n_bytes = parse_fileformat (redirect_file, &digits); + if (n_bytes == 0 || digits > 3) + return clib_error_return (0, "Bad File : %s, n_bytes=%u, digits=%u", + redirect_file, n_bytes, digits); + + redirect_file[digits + 1] = '\0'; + + /* Lock for all threads */ + http_static_server_all_sessions_writer_lock (); + strcpy (hsm->redirect_file_name, (char *) redirect_file); + http_static_server_all_sessions_writer_unlock (); + vlib_cli_output (vm, "redirect index.html to File Name : %s", redirect_file); + vec_free (redirect_file); + return 0; +} + +VLIB_CLI_COMMAND (http_set_static_l4_redirect_command, static) = { + .path = "set http static l4 redirect", + .short_help = "set http static l4 redirect file ", + .function = http_set_static_l4_redirect_command_fn, +}; + +static clib_error_t * +http_static_l4_server_main_init (vlib_main_t *vm) +{ + http_static_l4_server_main_t *hsm = &http_static_l4_server_main; + + hsm->my_client_index = ~0; + hsm->vlib_main = vm; + hsm->first_index = hsm->last_index = ~0; + + clib_timebase_init (&hsm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE, + &vm->clib_time /* share the system clock */); + + return 0; +} + +VLIB_INIT_FUNCTION (http_static_l4_server_main_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt index c891689b4b..9ffb7e29fa 100644 --- a/src/plugins/linux-cp/CMakeLists.txt +++ b/src/plugins/linux-cp/CMakeLists.txt @@ -26,6 +26,7 @@ endif() vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so) vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200) +vpp_plugin_find_library(linux-cp LIBNL3_XFRM_LIB libnl-xfrm-3.so.200) include_directories(${LIBNL3_INCLUDE_DIR}/libnl3) include_directories(${LIBMNL_INCLUDE_DIR}) @@ -72,7 +73,10 @@ add_vpp_plugin(linux_nl SOURCES lcp_router.c lcp_nl.c + lcp_ipsec.c + lcp_xfrm_nl.c LINK_LIBRARIES + ${LIBNL3_XFRM_LIB} lcp ) diff --git a/src/plugins/linux-cp/README b/src/plugins/linux-cp/README new file mode 100644 index 0000000000..9e63646dd6 --- /dev/null +++ b/src/plugins/linux-cp/README @@ -0,0 +1,169 @@ +XFRM NETLINK SUPPORT IN LINUX NL PLUGIN: +======================================== + +Introduction: +------------- + +The purpose of introducing Linux XFRM netlink support in the linux_nl_plugin +is to mirror Linux XFRM configurations to the VPP IPsec subsystem. These +configurations can be manually set using ip commands or via keying daemons +like StrongSwan. In both cases, the netlink notifications generated from +Linux are read by this XFRM module and translated into VPP's IPsec configuration. + +Highlights: +=========== + +1. The XFRM module is part of the existing linux-nl plugin, added as a new XFRM + process type node.From now on, it will be referred to as the XFRM node/plugin. +2. The XFRM node piggybacks on the libnl-xfrm system library for parsing/extracting + netlink messages. +3. The XFRM node will depend on Kernel XFRM netlink notifications. To read these + messages, the XFRM node registers to XFRMGRP_SA, XFRMGRP_POLICY, and XFRMGRP_EXPIRE + groups of the NETLINK_XFRM protocol of the AF_NETLINK family. +3. The XFRM node will support both policy-based and tunnel/route-based IPsec. The mode + can be selected via startup.conf. +4. The plugin will support packet and byte-based soft life and hard life expiry as the + datapath will be handled in VPP. + +Design: +======= + +The XFRM module is divided into three components: reading XFRM NL notifications +configuring VPP IPsec based on NL notifications, and handling SA expiry. + +1. Reading the XFRM NL notifications: +===================================== + + In this design, a process similar to the Linux netlink plugin is followed. It + includes creating a netlink socket of type NETLINK_XFRM and registering it to + multicast groups such as XFRMGRP_SA, XFRMGRP_POLICY, and XFRMGRP_EXPIRE. Once + the netlink (NL) messages are read from the NETLINK_XFRM socket, they are handled + based on their message type. This handling process is where the second part of the + plugin comes into play: configuring VPP IPsec + +2. Configuring VPP IPsec: +======================== + + Based on the startup configuration, VPP IPsec will be configured to run in one of + the two IPsec modes. + + Policy based IPsec: + ------------------- + + a.In VPP IPsec, the Security Association (SA) and Policy are closely linked + during configuration. Each SA has a unique SA ID. However, the XFRM kernel + does not inherently recognize SA identifiers. In this plugin, we generate a + unique 32-bit SA ID based on the Security Parameter Index (SPI), Destination + IP (DIP), and Protocol (Proto) for a given SA. This ID is then used to associate + the SA with a policy. + + b.Because of a limitation (as described in point 3 of the limitations), adding + an SA message will also handle inbound policy addition. However, outbound policy + handling is part of the Policy notification process. + + + c.Using the tunnel endpoint IP address, we determine the VPP interface on which + IPsec/SPD (Security Policy Database) needs to be enabled. + + d.After creating and enabling the Security Policy Database (SPD) on a specific + interface, all packets passing through that interface undergo policy lookup. + If a packet doesn't match any existing policies, we internally add "allow all" + bypass policies. These bypass policies allow essential messages such as IKE (Internet + Key Exchange), Neighbor Discovery, and keep-alive messages to pass through. + + e.The configuration of bypass policies is handled internally, and there won't be a Netlink + (NL) notification for it. These bypass policies are added when the first Security + Association (SA) or protect policy notification is handled. They are subsequently + deleted when there are no more protect policies in the system. + + + Route based IPsec: + ------------------ + + a.In a manner similar to the policy-based scheme, we derive Security Association (SA) + IDs here. Additionally, as part of the SA notification process, we create an IPIP + tunnel or an IPsec interface. This tunnel is then protected with inbound (inb) and + outbound (outb) SAs. It's important to note that a maximum of 4 inbound SAs and + 1 outbound SA can be bound to a tunnel. + + b.In the context of Policy notifications, we only handle FIB (Forwarding Information Base) + entries via tunnel interfaces. We do not add policies in route mode. + +3. SA Expiry: +============= + +VPP (Vector Packet Processing) does not support soft/hard byte or packet-based expiries directly. +To address this limitation, we've implemented a process node that continuously polls all +available SA (Security Association) counters at fixed intervals. Here's how it works: + + a. Expiry Values and Message Creation: + + 1. The plugin receives expiry values from the XFRM SA (Linux Security Association) Netlink (NL) + notifications. + 2. Based on these values, the plugin constructs an NL SA expiry message (XFRM_MSG_EXPIRE). + 3. The message specifies whether the expiry is hard or soft. + + b. Kernel Interaction: + + 1. The plugin sends the NL SA expiry message to the kernel. + 2. The kernel validates this message against its database. + 3. If the expiry is soft, the kernel initiates rekeying automatically. In this case, our plugin + doesn't need to handle the message further. + 4. If the expiry is hard, there is no separate notification to delete the SA. + + c.Handling Hard Expiry: + + When the plugin receives a hard expiry notification, it deletes the corresponding SA. +This approach ensures that SA management remains consistent even without direct soft/hard expiry support in VPP. + +Limitations: +=========== + +1.The plugin does not support on-demand Security Association (SA) creation by installing trap policies. + This limitation arises because VPP cannot install a policy without a valid SA ID. Consequently, the + strongSwan configuration option "auto = route" cannot be supported. + +2.Given that VPP does not allow configuring the anti-replay window size (which remains fixed at 64), + the plugin does not take into account the replay-window size configured in strongSwan. However, it + does handle enabling or disabling the use-anti-replay flag in VPP. + +3.During negotiations between peers, there's a possibility that a peer sends an ESP packet with an old + Security Association (SA) (SPI X). Meanwhile, the strongSwan on the device under test (DUT) has updated + its inbound (INB) policy to point to a newly negotiated SA (SPI Y). This situation could lead to packet + drops in the DUT's inbound direction until the peer updates its policy to use the new SA (SPI Y). + + To avoid this behavior, the plugin handles INB policy addition and deletion as part of SA addition and + deletion. As long as there is an SA in VPP, all INB packets matching that SA/SPI are accepted. Whenever + we receive a delete SA notification from the kernel, we remove the SA and its associated INB policy + +4. a.We do not handle inbound (INB) policy notifications from the kernel. + b.Forward (FWD) policies are not handled in the plugin because there is no use case for them. + c.The plugin intentionally avoids handling XFRM Policy notifications for BYPASS and DROP actions. + This design optimization ensures compatibility with daemons like strongSwan. + d.In NL (Netlink) policy notifications, we expect only one user template to be present. + +5. The plugin has been tested with AES-GCM and AES-CBC encryption algorithms only. + +6. In route/tunnel mode, VPP supports a maximum of 4 inbound (INB) SAs and 1 outbound (OUTB) SA + bound to the tunnel. + +7. In route/tunnel mode, if StrongSwan is configured such that more than one connection uses the same + tunnel endpoint, it leads to undefined behavior. This is because the tunnel (IPIP/IPsec) instance + is created based on tunnel endpoints and tunnel type. Since the endpoints and type of tunnel are + the same for both connections, it could happen that the second connection (tunnel creation) fails in VPP. + +Startup.conf section: +==================== + +linux-xfrm-nl{ + # Following parameter enables route mode IPsec. + enable-route-mode-ipsec, + # Specifies Ipsec interface type "ipsec" or "ipip". + interface <"interface_type">, + # Set the RX buffer size to be used on the netlink socket. + nl-rx-buffer-size <>, + # Set the batch size - maximum netlink messages to process at one time. + nl-batch-size <>, + # Set the batch delay - how long to wait in ms between processing batches. + nl-batch-delay-ms <> +} diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api index adef1b8515..e7eaa5a366 100644 --- a/src/plugins/linux-cp/lcp.api +++ b/src/plugins/linux-cp/lcp.api @@ -103,6 +103,27 @@ define lcp_itf_pair_add_del_v2_reply i32 retval; vl_api_interface_index_t host_sw_if_index; }; +autoendian define lcp_itf_pair_add_del_v3 +{ + option in_progress; + + u32 client_index; + u32 context; + bool is_add; + vl_api_interface_index_t sw_if_index; + string host_if_name[16]; /* IFNAMSIZ */ + vl_api_lcp_itf_host_type_t host_if_type; + string netns[32]; /* LCP_NS_LEN */ +}; +define lcp_itf_pair_add_del_v3_reply +{ + option in_progress; + + u32 context; + i32 retval; + u32 vif_index; + vl_api_interface_index_t host_sw_if_index; +}; /** \brief Dump Linux Control Plane interface pair data @param client_index - opaque cookie to identify the sender @@ -121,6 +142,19 @@ autoendian define lcp_itf_pair_get_reply i32 retval; u32 cursor; }; +autoendian define lcp_itf_pair_get_v2 +{ + u32 client_index; + u32 context; + u32 cursor; + vl_api_interface_index_t sw_if_index; +}; +autoendian define lcp_itf_pair_get_v2_reply +{ + u32 context; + i32 retval; + u32 cursor; +}; /** \brief Linux Control Plane interface pair dump response @param context - sender context which was passed in the request @@ -148,6 +182,11 @@ service { stream lcp_itf_pair_details; }; +service { + rpc lcp_itf_pair_get_v2 returns lcp_itf_pair_get_v2_reply + stream lcp_itf_pair_details; +}; + /** \brief Replace end/begin */ autoreply define lcp_itf_pair_replace_begin diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c index a217aa708f..991516a3ec 100644 --- a/src/plugins/linux-cp/lcp_api.c +++ b/src/plugins/linux-cp/lcp_api.c @@ -45,7 +45,7 @@ static int vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type, u8 *mp_host_if_name, size_t sizeof_host_if_name, u8 *mp_namespace, size_t sizeof_mp_namespace, - u32 *host_sw_if_index_p) + u32 *host_sw_if_index_p, u32 *vif_index_p) { u8 *host_if_name, *netns; int host_len, netns_len, rv; @@ -64,6 +64,13 @@ vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type, rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type, netns, host_sw_if_index_p); + if (!rv && (vif_index_p != NULL)) + { + lcp_itf_pair_t *pair = + lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index)); + *vif_index_p = pair->lip_vif_index; + } + vec_free (host_if_name); vec_free (netns); @@ -86,7 +93,7 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp) { rv = vl_api_lcp_itf_pair_add ( phy_sw_if_index, lip_host_type, mp->host_if_name, - sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), NULL); + sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), NULL, NULL); } else { @@ -111,10 +118,10 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp) lip_host_type = api_decode_host_type (mp->host_if_type); if (mp->is_add) { - rv = vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type, - mp->host_if_name, - sizeof (mp->host_if_name), mp->netns, - sizeof (mp->netns), &host_sw_if_index); + rv = vl_api_lcp_itf_pair_add ( + phy_sw_if_index, lip_host_type, mp->host_if_name, + sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), + &host_sw_if_index, NULL); } else { @@ -126,6 +133,37 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp) { rmp->host_sw_if_index = host_sw_if_index; }); } +static void +vl_api_lcp_itf_pair_add_del_v3_t_handler (vl_api_lcp_itf_pair_add_del_v3_t *mp) +{ + u32 phy_sw_if_index, host_sw_if_index = ~0, vif_index = ~0; + vl_api_lcp_itf_pair_add_del_v3_reply_t *rmp; + lip_host_type_t lip_host_type; + int rv; + + VALIDATE_SW_IF_INDEX_END (mp); + + phy_sw_if_index = mp->sw_if_index; + lip_host_type = api_decode_host_type (mp->host_if_type); + if (mp->is_add) + { + rv = vl_api_lcp_itf_pair_add ( + phy_sw_if_index, lip_host_type, mp->host_if_name, + sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), + &host_sw_if_index, &vif_index); + } + else + { + rv = lcp_itf_pair_delete (phy_sw_if_index); + } + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V3_REPLY, ({ + rmp->host_sw_if_index = host_sw_if_index; + rmp->vif_index = vif_index; + })); +} + static void send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp, u32 context) @@ -161,6 +199,31 @@ vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp) ({ send_lcp_itf_pair_details (cursor, rp, mp->context); })); } +static void +vl_api_lcp_itf_pair_get_v2_t_handler (vl_api_lcp_itf_pair_get_v2_t *mp) +{ + vl_api_lcp_itf_pair_get_v2_reply_t *rmp; + i32 rv = 0; + + if (mp->sw_if_index == ~0) + { + REPLY_AND_DETAILS_MACRO_END ( + VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool, + ({ send_lcp_itf_pair_details (cursor, rp, mp->context); })); + } + else + { + VALIDATE_SW_IF_INDEX_END (mp); + send_lcp_itf_pair_details ( + lcp_itf_pair_find_by_phy (mp->sw_if_index), + vl_api_client_index_to_registration (mp->client_index), mp->context); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_GET_V2_REPLY, + ({ rmp->cursor = ~0; })); + } +} + static void vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp) { diff --git a/src/plugins/linux-cp/lcp_ipsec.c b/src/plugins/linux-cp/lcp_ipsec.c new file mode 100644 index 0000000000..ff6e79649c --- /dev/null +++ b/src/plugins/linux-cp/lcp_ipsec.c @@ -0,0 +1,1647 @@ +/* + * Copyright (c) 2022 Cisco and/or its affiliates. + * Copyright (c) 2022 Marvell Technology, Inc and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define NL_XFRM_DBG(...) vlib_log_debug (lcp_xfrm_logger, __VA_ARGS__) +#define NL_XFRM_INFO(...) vlib_log_notice (lcp_xfrm_logger, __VA_ARGS__) +#define NL_XFRM_WARN(...) vlib_log_warn (lcp_xfrm_logger, __VA_ARGS__) +#define NL_XFRM_ERR(...) vlib_log_err (lcp_xfrm_logger, __VA_ARGS__) + +/* Keeping size in sync with libnl lib */ +#define ALGO_NAME 64 +#define INB_PROTECT_POL_PRIO 9999 +#define IS_ROUTE_MODE_ENABLED !!nm->is_route_mode +/* size in bytes */ +#define GCM_SALT_SIZE 4 + +#define cpu_to_be(x, bits) \ + if ((bits) == 16) \ + x = clib_host_to_net_u16 (x); \ + else if ((bits) == 32) \ + x = clib_host_to_net_u32 (x); \ + else \ + x = clib_host_to_net_u64 (x); + +/* Random seq number */ +static u32 g_seq = 0; +static vlib_log_class_t lcp_xfrm_logger; +uword *lifetime_by_sa_id; +static int config_bypass = 0; +uword *tun_idx_by_sel_daddr; + +typedef struct sa_life_limits +{ + u64 soft_byte_limit; + u64 hard_byte_limit; + u64 soft_packet_limit; + u64 hard_packet_limit; + u32 sa_id; + + /* Used in tunnel mode */ + int tun_sw_if_idx; + u8 sa_in_tunnel; +} sa_life_limits_t; + +typedef struct policy_db +{ + int tun_sw_if_idx; +} policy_db_t; + +typedef struct sa_expire_req +{ + struct nlmsghdr nlmsg_hdr; + struct xfrm_user_expire xfrm_expire; +} sa_expire_req_nl_t; + +static inline int +lcp_xfrm_is_ipsec_intf_exist (u8 *if_name, u32 *sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + uword *p; + vnet_hw_interface_t *hi; + + p = hash_get (vnm->interface_main.hw_interface_by_name, if_name); + if (!p) + return 0; + + hi = vnet_get_hw_interface (vnm, p[0]); + sw_if_index[0] = hi->sw_if_index; + + return 1; +} + +u32 * +get_mcast_addr (u32 *ip6addr) +{ + u32 *maddr = malloc (sizeof (ip6_address_t)); + + maddr[0] = 0xff020000; + maddr[1] = 0x0; + maddr[2] = 0x1; + maddr[3] = (0xff000000) | (0x00ffffff & clib_net_to_host_u32 (ip6addr[3])); + for (int i = 0; i < 4; i++) + cpu_to_be (maddr[i], 32); + return maddr; +} + +static inline u32 +lcp_xfrm_ipsec_sa_id_table (u32 spi, ip_address_t *addr) +{ + u32 hash = addr->version ^ spi; + + switch (addr->version) + { + case AF_IP4: + hash ^= addr->ip.ip4.as_u32; + break; + case AF_IP6: + hash ^= addr->ip.ip6.as_u32[0] ^ addr->ip.ip6.as_u32[1] ^ + addr->ip.ip6.as_u32[2] ^ addr->ip.ip6.as_u32[3]; + break; + } + hash ^= (hash >> 16); + NL_XFRM_DBG ("### sa_id : %x", hash); + return hash; +} + +static inline fib_protocol_t +lcp_xfrm_mk_proto (uint32_t k) +{ + if (AF_INET6 == k) + return (FIB_PROTOCOL_IP6); + return (FIB_PROTOCOL_IP4); +} + +static inline void +lcp_xfrm_mk_ipaddr (const struct nl_addr *xa, ip_address_t *ia) +{ + fib_protocol_t fproto; + + ip_address_reset (ia); + fproto = lcp_xfrm_mk_proto (nl_addr_get_family (xa)); + + ip_address_set (ia, nl_addr_get_binary_addr (xa), + FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6); +} + +void +get_auth_algo (char *alg, int len, ipsec_integ_alg_t *val) +{ + if (!strcmp (alg, "hmac(md5)")) + *val = IPSEC_INTEG_ALG_MD5_96; + else if (!strcmp (alg, "hmac(sha1)")) + *val = IPSEC_INTEG_ALG_SHA1_96; + else if (!strcmp (alg, "hmac(sha256)")) + *val = IPSEC_INTEG_ALG_SHA_256_128; + else if (!strcmp (alg, "hmac(sha384)")) + *val = IPSEC_INTEG_ALG_SHA_384_192; + else if (!strcmp (alg, "hmac(sha512)")) + *val = IPSEC_INTEG_ALG_SHA_512_256; + else + *val = IPSEC_INTEG_N_ALG; +} + +void +get_crypto_algo (char *alg, int len, ipsec_crypto_alg_t *val) +{ + if (!strcmp (alg, "cbc(aes)")) + { + if (len == 128) + *val = IPSEC_CRYPTO_ALG_AES_CBC_128; + else if (len == 192) + *val = IPSEC_CRYPTO_ALG_AES_CBC_192; + else if (len == 256) + *val = IPSEC_CRYPTO_ALG_AES_CBC_256; + else + *val = IPSEC_CRYPTO_N_ALG; + } + + else if (!strcmp (alg, "rfc4106(gcm(aes))")) + { + /* + * Len includes 4Bsalt as well. So remove it to get actual cipher keylen + */ + len -= GCM_SALT_SIZE * 8; + if (len == 128) + *val = IPSEC_CRYPTO_ALG_AES_GCM_128; + else if (len == 192) + *val = IPSEC_CRYPTO_ALG_AES_GCM_192; + else if (len == 256) + *val = IPSEC_CRYPTO_ALG_AES_GCM_256; + else + *val = IPSEC_CRYPTO_N_ALG; + } + + else if (!strcmp (alg, "ctr(aes)")) + { + if (len == 128) + *val = IPSEC_CRYPTO_ALG_AES_CTR_128; + else if (len == 192) + *val = IPSEC_CRYPTO_ALG_AES_CTR_192; + else if (len == 256) + *val = IPSEC_CRYPTO_ALG_AES_CTR_256; + else + *val = IPSEC_CRYPTO_N_ALG; + } + + else if (!strcmp (alg, "cbc(des)")) + *val = IPSEC_CRYPTO_ALG_DES_CBC; + + else if (!strcmp (alg, "cbc(des3-cede)")) + *val = IPSEC_CRYPTO_ALG_3DES_CBC; + else + *val = IPSEC_CRYPTO_N_ALG; +} + +static inline void +update_port_details (ipsec_policy_t *p, u16 sport, u16 dport, u16 sportmask, + u16 dportmask) +{ + if (!sportmask) + { + p->lport.start = 0; + p->lport.stop = 65535; + } + else if (sportmask == 0xffff) + { + /* Linux XFRM doesn't support port ranges */ + p->lport.start = sport; + p->lport.stop = sport; + } + + if (!dportmask) + { + p->rport.start = 0; + p->rport.stop = 65535; + } + else if (dportmask == 0xffff) + { + p->rport.start = dport; + p->rport.stop = dport; + } +} + +static inline void +update_bypass_policy_addrs (ipsec_policy_t *policy) +{ + ip46_address_t start; + ip46_address_t stop; + + clib_memset (&start, (u8) 0, sizeof (ip46_address_t)); + clib_memset (&stop, (u8) ~0, sizeof (ip46_address_t)); + + clib_memcpy_fast (&policy->laddr.start, &start, sizeof (ip46_address_t)); + clib_memcpy_fast (&policy->laddr.stop, &stop, sizeof (ip46_address_t)); + clib_memcpy_fast (&policy->raddr.start, &start, sizeof (ip46_address_t)); + clib_memcpy_fast (&policy->raddr.stop, &stop, sizeof (ip46_address_t)); +} + +static inline void +lcp_xfrm_config_bypass_policies (u32 spd_id, u8 is_add, u8 is_ip6) +{ + int rv; + u32 p_idx; + vlib_main_t *vm = vlib_get_main (); + ipsec_policy_t policy, policy1; + + /* Bypass policies configured only once across all connections */ + if (config_bypass == 1 && is_add) + return; + + /* + * Adding bypass policy one in inb and one in outb direction + * allowing all ranges 0.0.0.0 - 255.255.255.255 + */ + update_bypass_policy_addrs (&policy); + update_port_details (&policy, 0, 65535, 0, 0); + policy.policy = IPSEC_POLICY_ACTION_BYPASS; + policy.protocol = IPSEC_POLICY_PROTOCOL_ANY; + policy.sa_id = 0; + policy.is_ipv6 = is_ip6; + policy.id = spd_id; + /* + * Setting the least priority for the bypass, which means in outb + * if packet doesn't match any PROTECT policies, then it will always + * hit the BYPASS. + */ + policy.priority = 0; + + clib_memcpy_fast (&policy1, &policy, sizeof (policy1)); + ipsec_policy_mk_type (1, policy.is_ipv6, policy.policy, &policy.type); + + rv = ipsec_add_del_policy (vm, &policy, is_add, &p_idx); + if (!rv) + NL_XFRM_DBG ("bypass policy-index:%d", p_idx); + else + NL_XFRM_ERR ("bypass policy error:%d", rv); + + ipsec_policy_mk_type (0, policy1.is_ipv6, policy1.policy, &policy1.type); + rv = ipsec_add_del_policy (vm, &policy1, is_add, &p_idx); + if (!rv) + NL_XFRM_DBG ("bypass policy-index:%d", p_idx); + else + NL_XFRM_ERR ("bypass policy error:%d", rv); + + config_bypass = !!is_add; + + NL_XFRM_INFO ("Bypass policies %s successfull", + (is_add == 1) ? "addition" : "deletion"); +} + +static inline void +lcp_xfrm_inb_policy_cfg (ip_address_t *t_saddr, ip_address_t *t_daddr, + u32 sa_id, u32 spd_id, u8 is_add) +{ + int rv; + u32 p_idx; + vlib_main_t *vm = vlib_get_main (); + ipsec_policy_t policy; + + if (t_saddr->version == AF_IP4) + { + clib_memcpy_fast (&policy.laddr.start.ip4.as_u32, + &t_saddr->ip.ip4.as_u32, sizeof (ip4_address_t)); + clib_memcpy_fast (&policy.laddr.stop.ip4.as_u32, &t_saddr->ip.ip4.as_u32, + sizeof (ip4_address_t)); + clib_memcpy_fast (&policy.raddr.start.ip4.as_u32, + &t_daddr->ip.ip4.as_u32, sizeof (ip4_address_t)); + clib_memcpy_fast (&policy.raddr.stop.ip4.as_u32, &t_daddr->ip.ip4.as_u32, + sizeof (ip4_address_t)); + policy.is_ipv6 = 0; + } + else + { + clib_memcpy_fast (&policy.laddr.start.ip6.as_u32, + &t_saddr->ip.ip6.as_u32, sizeof (ip6_address_t)); + clib_memcpy_fast (&policy.laddr.stop.ip6.as_u32, &t_saddr->ip.ip6.as_u32, + sizeof (ip6_address_t)); + clib_memcpy_fast (&policy.raddr.start.ip6.as_u32, + &t_daddr->ip.ip6.as_u32, sizeof (ip6_address_t)); + clib_memcpy_fast (&policy.raddr.stop.ip6.as_u32, &t_daddr->ip.ip6.as_u32, + sizeof (ip6_address_t)); + policy.is_ipv6 = 1; + } + + policy.policy = IPSEC_POLICY_ACTION_PROTECT; + /*SA doesn't have details of inner protocol. So set 0 (means accept any)*/ + policy.protocol = IPSEC_POLICY_PROTOCOL_ANY; + policy.sa_id = sa_id; + policy.id = spd_id; + policy.priority = INB_PROTECT_POL_PRIO; + update_port_details (&policy, 0, 65535, 0, 0); + + ipsec_policy_mk_type (0, policy.is_ipv6, policy.policy, &policy.type); + rv = ipsec_add_del_policy (vm, &policy, is_add, &p_idx); + if (!rv) + NL_XFRM_INFO ("ipsec inb policy %s success %U -> %U sa_id: %x spd_id: %x", + ((is_add) ? "add" : "del"), format_ip_address, t_saddr, + format_ip_address, t_daddr, sa_id, spd_id); + else + NL_XFRM_ERR ( + "ipsec inb policy %s fail(err: %d) %U -> %U sa_id: %x spd_id: %x", + ((is_add) ? "add" : "del"), rv, format_ip_address, t_saddr, + format_ip_address, t_daddr, sa_id, spd_id); +} + +static inline int +lcp_xfrm_get_matching_iface (ip46_address_t *addr, u8 is_ipv6) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sif; + u8 iface_found; + + pool_foreach (sif, vnm->interface_main.sw_interfaces) + { + iface_found = ip_interface_has_address (sif->sw_if_index, addr, is_ipv6); + if (iface_found) + { + NL_XFRM_DBG ("Found matching IP on interface index :%u", + sif->sw_if_index); + return sif->sw_if_index; + } + } + return ~0; +} + +static inline u32 +lcp_xfrm_create_spd (ip_address_t *saddr, ip_address_t *daddr, u32 spi, + int *sw_if_index, u8 is_outb) +{ + vlib_main_t *vm = vlib_get_main (); + ipsec_main_t *im = &ipsec_main; + int rv, is_ipv6; + ipsec_spd_t *spd0; + u32 spd_id = 0; + uword *ptr; + ip46_address_t *ip46 = NULL; + + ip46 = (is_outb) ? (&saddr->ip) : (&daddr->ip); + is_ipv6 = (saddr->version == AF_IP4) ? 0 : 1; + + *sw_if_index = lcp_xfrm_get_matching_iface (ip46, !is_ipv6); + if (*sw_if_index == ~0) + return ~0; + + ptr = hash_get (im->spd_index_by_sw_if_index, *sw_if_index); + if (ptr) + { + spd0 = pool_elt_at_index (im->spds, ptr[0]); + NL_XFRM_DBG ("Interface already bound to spd_id: %x", spd0->id); + spd_id = spd0->id; + } + else + { + spd_id = lcp_xfrm_ipsec_sa_id_table (spi, daddr); + rv = ipsec_add_del_spd (vm, spd_id, 1); + if (rv) + { + NL_XFRM_ERR ("spd creation failed"); + return ~0; + } + rv = ipsec_set_interface_spd (vm, *sw_if_index, spd_id, 1); + switch (rv) + { + case VNET_API_ERROR_SYSCALL_ERROR_1: + NL_XFRM_ERR ("no such spd-id"); + return ~0; + case VNET_API_ERROR_SYSCALL_ERROR_2: + NL_XFRM_DBG ("spd already assigned"); + break; + } + NL_XFRM_INFO ("Adding bypass for src %U dst %U", format_ip_address, + saddr, format_ip_address, daddr); + lcp_xfrm_config_bypass_policies (spd_id, 1, is_ipv6); + } + return spd_id; +} + +static inline void +lcp_xfrm_delete_spd (u32 spd_id, ip_address_t *saddr, ip_address_t *daddr, + u8 is_ip6, u8 is_outb) +{ + vlib_main_t *vm = vlib_get_main (); + ipsec_main_t *im = &ipsec_main; + u32 spd_index, *policies; + ipsec_spd_t *spd; + ipsec_policy_t *p0 = NULL; + uword *p; + + p = hash_get (im->spd_index_by_spd_id, spd_id); + if (!p) + return; + spd_index = p[0]; + spd = pool_elt_at_index (im->spds, spd_index); +#define _(t, v) \ + vec_foreach (policies, spd->policies[IPSEC_SPD_POLICY_##t]) \ + { \ + p0 = pool_elt_at_index (im->policies, *policies); \ + if (p0) \ + { \ + if (IPSEC_POLICY_ACTION_BYPASS != p0->policy) \ + return; \ + } \ + } + foreach_ipsec_spd_policy_type +#undef _ + if (!p0) return; + /* + * There will not be a notification indicating the termination of a + * tunnel. So once we detect that no more PROTECT policies are left + * in our database, we delete the bypass policies (which was addded + * internally by us) and delete the SPD as well. + */ + lcp_xfrm_config_bypass_policies (spd->id, 0, is_ip6); + ipsec_add_del_spd (vm, spd->id, 0); +} + +static inline u8 +lcp_xfrm_get_sa_direction (ip46_address_t *saddr, ip46_address_t *daddr, + int *sw_if_index, u8 is_ipv6) +{ + u8 is_outb = 0; + + *sw_if_index = lcp_xfrm_get_matching_iface (saddr, !is_ipv6); + if (~0 == *sw_if_index) + { + *sw_if_index = lcp_xfrm_get_matching_iface (daddr, !is_ipv6); + if (~0 == *sw_if_index) + clib_error_return (0, "SA notification doesn't belong to VPP iface"); + } + else + is_outb = 1; + + return is_outb; +} + +static inline int +find_tunnel_db (ip_address_t *saddr, ip_address_t *daddr, u8 dir, u8 is_ipv6) +{ + ipip_tunnel_key_t key; + ipip_tunnel_t *tun; + + key.mode = IPIP_MODE_P2P; + key.fib_index = fib_table_find (fib_ip_proto (is_ipv6), 0); + + if (is_ipv6) + key.transport = IPIP_TRANSPORT_IP6; + else + key.transport = IPIP_TRANSPORT_IP4; + + if (dir) + { + clib_memcpy_fast (&key.src, &saddr->ip, sizeof (ip46_address_t)); + clib_memcpy_fast (&key.dst, &daddr->ip, sizeof (ip46_address_t)); + } + else + { + clib_memcpy_fast (&key.src, &daddr->ip, sizeof (ip46_address_t)); + clib_memcpy_fast (&key.dst, &saddr->ip, sizeof (ip46_address_t)); + } + + tun = ipip_tunnel_db_find (&key); + if (!tun) + { + NL_XFRM_ERR ("Tunnel iface not found"); + return ~0; + } + + NL_XFRM_DBG ("Tunnel iface found in tunnel DB iterface index: %x", + tun->sw_if_index); + return tun->sw_if_index; +} + +static inline void +lcp_xfrm_update_tunnel (ip_address_t *saddr, ip_address_t *daddr, u8 dir, + u32 sa_id, u8 is_ipv6, struct xfrmnl_sa *sa) +{ + u32 sa_out = 0, *sa_ins = NULL; + ipsec_sa_t *sai = NULL, *sao = NULL; + index_t itpi; + u32 sw_if_index = ~0; + int rv; + u8 *s = NULL; + u8 instance = xfrmnl_sa_get_reqid (sa); + + if (dir) + return; + + if (nm->interface_type == NL_INTERFACE_TYPE_IPIP) + sw_if_index = find_tunnel_db (saddr, daddr, dir, is_ipv6); + else + { + s = format (s, "ipsec%d", instance); + lcp_xfrm_is_ipsec_intf_exist (s, &sw_if_index); + vec_free (s); + } + if (sw_if_index == ~0) + return; + + pool_foreach_index (itpi, ipsec_tun_protect_pool) + { + ipsec_tun_protect_t *itp = + pool_elt_at_index (ipsec_tun_protect_pool, itpi); + if (!itp || (itp->itp_sw_if_index != sw_if_index)) + continue; + sao = ipsec_sa_get (itp->itp_out_sa); + sa_out = sao->id; + FOR_EACH_IPSEC_PROTECT_INPUT_SA ( + itp, sai, if (sa_id != sai->id) vec_add1 (sa_ins, sai->id);) + } + + rv = ipsec_tun_protect_update (sw_if_index, NULL, sa_out, sa_ins); + if (rv) + { + NL_XFRM_ERR ("SA del: Tunnel protect update failure (err: %d)", rv); + return; + } + NL_XFRM_INFO ("Tunnel protect update success (index: %x)", sw_if_index); +} + +void +nl_xfrm_sa_del (struct xfrmnl_sa *sa) +{ + int is_hard = xfrmnl_sa_is_hardexpiry_reached (sa); + u8 fam = xfrmnl_sa_get_family (sa); + u32 spi = xfrmnl_sa_get_spi (sa); + struct nl_addr *dst = xfrmnl_sa_get_daddr (sa); + struct nl_addr *src = xfrmnl_sa_get_saddr (sa); + u8 is_ip6 = (fam == AF_INET) ? 0 : 1; + ip_address_t daddr, saddr; + u8 is_outb = 1; + u32 id = 0; + int sw_if_index, rv = 0; + u32 spd_id = ~0; + + /* + * Dont need to handle EXPIRE due to soft limit as the rekeying will take + * care of installing new and deleting old one. But for hard limit, we + * need to delete SA as part of EXPIRE notification + */ + if ((nl_object_get_msgtype ((struct nl_object *) sa) == XFRM_MSG_EXPIRE) && + (!is_hard)) + return; + + lcp_xfrm_mk_ipaddr (dst, &daddr); + lcp_xfrm_mk_ipaddr (src, &saddr); + + id = lcp_xfrm_ipsec_sa_id_table (spi, &daddr); + + is_outb = + lcp_xfrm_get_sa_direction (&saddr.ip, &daddr.ip, &sw_if_index, is_ip6); + + if (IS_ROUTE_MODE_ENABLED) + lcp_xfrm_update_tunnel (&saddr, &daddr, is_outb, id, is_ip6, sa); + else if ((sw_if_index != ~0) && (is_outb == 0)) + { + is_outb = 0; + spd_id = lcp_xfrm_create_spd (&saddr, &daddr, spi, &rv, is_outb); + lcp_xfrm_inb_policy_cfg (&saddr, &daddr, id, spd_id, 0); + } + + rv = ipsec_sa_unlock_id (id); + if (rv) + { + NL_XFRM_ERR ("ipsec sa %x del failure(err: %d) %U -> %U", id, rv, + format_ip_address, &saddr, format_ip_address, &daddr); + } + else + { + hash_unset (lifetime_by_sa_id, id); + NL_XFRM_INFO ("ipsec sa %x del success %U -> %U", id, format_ip_address, + &saddr, format_ip_address, &daddr); + } + + if (!IS_ROUTE_MODE_ENABLED) + lcp_xfrm_delete_spd (spd_id, &saddr, &daddr, is_ip6, is_outb); + return; +} + +ipsec_sa_t * +get_sa_by_sa_id (u32 sa_id) +{ + ipsec_sa_t *sa; + u32 sai; + + pool_foreach_index (sai, ipsec_sa_pool) + { + sa = ipsec_sa_get (sai); + if (!sa) + return NULL; + if (sa_id == sa->id) + return sa; + } + return NULL; +} + +ipsec_sa_t * +get_reverse_sa_by_tun_ip (ip_address_t *saddr, ip_address_t *daddr, u8 is_ipv6, + u8 dir) +{ + sa_life_limits_t *life = NULL; + ipsec_sa_t *sa; + u8 found = 0; + uword *p = NULL; + u32 sai; + + pool_foreach_index (sai, ipsec_sa_pool) + { + sa = ipsec_sa_get (sai); + if (!sa) + return NULL; + + p = hash_get (lifetime_by_sa_id, sa->id); + if (!p) + continue; + + life = (sa_life_limits_t *) p[0]; + + if (!is_ipv6 && + !ip4_address_compare (&daddr->ip.ip4, &sa->tunnel.t_src.ip.ip4) && + !ip4_address_compare (&saddr->ip.ip4, &sa->tunnel.t_dst.ip.ip4) && + !life->sa_in_tunnel) + found = 1; + else if (!ip6_address_compare (&daddr->ip.ip6, + &sa->tunnel.t_src.ip.ip6) && + !ip6_address_compare (&saddr->ip.ip6, + &sa->tunnel.t_dst.ip.ip6) && + !life->sa_in_tunnel) + found = 1; + + if (found) + { + if (dir) + life->sa_in_tunnel = !dir; + else + /* Its enabled only for inb sa */ + life->sa_in_tunnel = 1; + return sa; + } + } + return NULL; +} + +static inline int +lcp_xfrm_create_ipsec_tunnel (struct xfrmnl_sa *sa, int *sw_if_index, + u32 phy_sw_if_index) +{ + clib_error_t *ret; + int rv = 0; + u8 *s = NULL; + int instance = xfrmnl_sa_get_reqid (sa); + + s = format (s, "ipsec%d", instance); + rv = lcp_xfrm_is_ipsec_intf_exist (s, (u32 *) sw_if_index); + vec_free (s); + + if (rv) + return 0; + + rv = ipsec_itf_create (instance, TUNNEL_MODE_P2P, (u32 *) sw_if_index); + if (rv == VNET_API_ERROR_INVALID_REGISTRATION) + { + NL_XFRM_ERR ("Tunnel instance %x exists sw_if_idx: %x", instance, + sw_if_index[0]); + return 0; + } + + NL_XFRM_DBG ("Tunnel instance %x created succesfully.. index: %x", instance, + *sw_if_index); + + ret = vnet_sw_interface_set_flags (vnet_get_main (), *sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + if (ret) + { + NL_XFRM_ERR ("Error setting flags on tunnel"); + return -1; + } + + vnet_sw_interface_update_unnumbered (*sw_if_index, phy_sw_if_index, 1); + return 0; +} + +static inline int +lcp_xfrm_create_ipip_tunnel (struct xfrmnl_sa *sa, int *sw_if_index, + u8 is_ipv6, u8 dir, ip_address_t *saddr, + ip_address_t *daddr, u32 phy_sw_if_index) +{ + tunnel_encap_decap_flags_t tflags = TUNNEL_ENCAP_DECAP_FLAG_NONE; + u8 fib_index = 0, instance = 0; + clib_error_t *ret; + int rv = 0; + int reqid = xfrmnl_sa_get_reqid (sa); + + /* + * Reqid will be unique and constant for a given connection. + * Hence using the same as tunnel instance + */ + instance = reqid; + + fib_index = fib_table_find (fib_ip_proto (is_ipv6), 0); + + /* If inb sa, then swap the IPs */ + if (!dir) + rv = ipip_add_tunnel (is_ipv6 ? IPIP_TRANSPORT_IP6 : IPIP_TRANSPORT_IP4, + instance, &daddr->ip, &saddr->ip, fib_index, tflags, + IP_DSCP_CS0, TUNNEL_MODE_P2P, (u32 *) sw_if_index); + + else + rv = ipip_add_tunnel (is_ipv6 ? IPIP_TRANSPORT_IP6 : IPIP_TRANSPORT_IP4, + instance, &saddr->ip, &daddr->ip, fib_index, tflags, + IP_DSCP_CS0, TUNNEL_MODE_P2P, (u32 *) sw_if_index); + + if (rv == VNET_API_ERROR_IF_ALREADY_EXISTS) + { + NL_XFRM_DBG ("Tunnel instance %x exists sw_if_idx: %x", instance, + sw_if_index[0]); + return 0; + } + else if (rv == VNET_API_ERROR_INSTANCE_IN_USE) + { + NL_XFRM_ERR ("Tunnel instance %x already in use", instance); + return -1; + } + else if (rv < 0) + { + NL_XFRM_ERR ("Tunnel addition failed(err: %d) for %U->%U", rv, + format_ip_address, saddr, format_ip_address, daddr); + return -1; + } + + NL_XFRM_INFO ("Tunnel instance %x created succesfully.. index: %x", instance, + *sw_if_index); + ret = vnet_sw_interface_set_flags (vnet_get_main (), *sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + if (ret) + { + NL_XFRM_ERR ("Error setting flags on tunnel"); + return -1; + } + + vnet_sw_interface_update_unnumbered (*sw_if_index, phy_sw_if_index, 1); + return 0; +} + +static inline void +lcp_xfrm_protect_tunnel (int sw_if_index, u8 is_ipv6, u32 sa_id, u8 dir, + ip_address_t *saddr, ip_address_t *daddr) +{ + u32 sa_out = 0, sa_in = 0, *sa_ins = NULL; + ipsec_sa_t *sa; + + if (dir == 0) + { + sa_in = sa_id; + sa = get_reverse_sa_by_tun_ip (saddr, daddr, is_ipv6, dir); + /* Outb sa is not yet configured */ + if (!sa) + { + return; + } + sa_out = sa->id; + } + else + { + sa_out = sa_id; + sa = get_reverse_sa_by_tun_ip (saddr, daddr, is_ipv6, dir); + /* Inb sa is not yet configured */ + if (!sa) + { + return; + } + sa_in = sa->id; + } + + index_t itpi; + ipsec_sa_t *sai = NULL; + pool_foreach_index (itpi, ipsec_tun_protect_pool) + { + ipsec_tun_protect_t *itp = + pool_elt_at_index (ipsec_tun_protect_pool, itpi); + if (!itp || (itp->itp_sw_if_index != sw_if_index)) + continue; + FOR_EACH_IPSEC_PROTECT_INPUT_SA ( + itp, sai, if (sai && (sa_in != sai->id)) { + if (vec_len (sa_ins) < ITP_MAX_N_SA_IN) + vec_add1 (sa_ins, sai->id); + }) + } + + /* Adding the curent inb sa to tunnel */ + vec_add1 (sa_ins, sa_in); + + int rv = ipsec_tun_protect_update (sw_if_index, NULL, sa_out, sa_ins); + if (rv) + NL_XFRM_ERR ("Tunnel protect update failure (err: %d)", rv); + else + NL_XFRM_INFO ("Tunnel protect update success for index : %d)", + sw_if_index); +} + +static inline void +lcp_xfrm_configure_route_mode (struct xfrmnl_sa *sa, u8 is_ipv6, + ip_address_t *saddr, ip_address_t *daddr, + u32 sa_id, u8 dir, u32 phy_sw_if_idx) +{ + int ret, sw_if_index = -1; + uword *p = NULL; + + if (nm->interface_type == NL_INTERFACE_TYPE_IPIP) + ret = lcp_xfrm_create_ipip_tunnel (sa, &sw_if_index, is_ipv6, dir, saddr, + daddr, phy_sw_if_idx); + else + ret = lcp_xfrm_create_ipsec_tunnel (sa, &sw_if_index, phy_sw_if_idx); + + if (ret < 0) + return; + + p = hash_get (lifetime_by_sa_id, sa_id); + if (!p) + return; + ((sa_life_limits_t *) p[0])->tun_sw_if_idx = sw_if_index; + + lcp_xfrm_protect_tunnel (sw_if_index, is_ipv6, sa_id, dir, saddr, daddr); +} + +static inline void +nl_xfrm_sa_add (struct xfrmnl_sa *sa) +{ + struct xfrmnl_ltime_cfg *lifetimes = xfrmnl_sa_get_lifetime_cfg (sa); + ipsec_crypto_alg_t crypto_alg = IPSEC_CRYPTO_ALG_NONE; + ipsec_integ_alg_t integ_alg = IPSEC_INTEG_ALG_NONE; + ipsec_sa_flags_t flags = IPSEC_SA_FLAG_NONE; + char key[IPSEC_KEY_MAX_LEN], auth_key[IPSEC_KEY_MAX_LEN]; + char alg_name[ALGO_NAME], auth_alg_name[ALGO_NAME]; + struct nl_addr *dst = xfrmnl_sa_get_daddr (sa); + struct nl_addr *src = xfrmnl_sa_get_saddr (sa); + unsigned int udp_src, udp_dst, encap_type; + sa_life_limits_t *life = NULL, lifetime; + unsigned int key_len, auth_key_len; + ipsec_key_t ck = { 0 }, ik = { 0 }; + u32 spi = xfrmnl_sa_get_spi (sa); + struct nl_addr *encap_oa = NULL; + u8 ip_family, mode, is_ipv6, dir; + u32 salt = 0, icv = 0, sai = 0, id = 0; + ipsec_protocol_t proto = 0; + ip_address_t saddr, daddr; + int if_idx, sw_if_index; + tunnel_t tun = {}; + u32 spd_id; + int rv; + + lcp_xfrm_mk_ipaddr (dst, &daddr); + lcp_xfrm_mk_ipaddr (src, &saddr); + + id = lcp_xfrm_ipsec_sa_id_table (spi, &daddr); + + /* + * Ideal case, this scenaio will never be hit. But when reading SA + * notification from sk_xfrm socket fails and we initate a sync,then there is + * a possibility that we get notification for the one already present in VPP. + * Hence the check + */ + if (get_sa_by_sa_id (id)) + goto error; + + if (xfrmnl_sa_get_flags (sa) & XFRM_STATE_ESN) + { + flags |= IPSEC_SA_FLAG_USE_ESN | IPSEC_SA_FLAG_USE_ANTI_REPLAY; + } + /* + * Kernel SA XFRM doesnt have a flag for AR config. So a non-zero + * replay window size indicates AR is enabled.Also replay window + * size is fixed to 64 in VPP (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE). + * So its not configurable + */ + + if (xfrmnl_sa_get_replay_window (sa)) + { + flags |= IPSEC_SA_FLAG_USE_ANTI_REPLAY; + } + + xfrmnl_sa_get_encap_tmpl (sa, &encap_type, &udp_src, &udp_dst, &encap_oa); + if (encap_type == UDP_ENCAP_ESPINUDP) + { + flags |= IPSEC_SA_FLAG_UDP_ENCAP; + } + else + udp_src = udp_dst = IPSEC_UDP_PORT_NONE; + + lifetime.soft_byte_limit = xfrmnl_ltime_cfg_get_soft_bytelimit (lifetimes); + lifetime.hard_byte_limit = xfrmnl_ltime_cfg_get_hard_bytelimit (lifetimes); + lifetime.soft_packet_limit = + xfrmnl_ltime_cfg_get_soft_packetlimit (lifetimes); + lifetime.hard_packet_limit = + xfrmnl_ltime_cfg_get_hard_packetlimit (lifetimes); + lifetime.sa_id = id; + lifetime.sa_in_tunnel = 0; + lifetime.tun_sw_if_idx = ~0; + + proto = + (50 == xfrmnl_sa_get_proto (sa)) ? IPSEC_PROTOCOL_ESP : IPSEC_PROTOCOL_AH; + ip_family = xfrmnl_sa_get_family (sa); + + if (-1 != xfrmnl_sa_get_aead_params (sa, alg_name, &key_len, &icv, key)) + flags |= IPSEC_SA_FLAG_IS_AEAD; + else + { + if (-1 == xfrmnl_sa_get_crypto_params (sa, alg_name, &key_len, key)) + { + NL_XFRM_ERR ("crypto param extraction failed"); + goto error; + } + if (-1 == xfrmnl_sa_get_auth_params (sa, auth_alg_name, &auth_key_len, + NULL, auth_key)) + { + NL_XFRM_ERR ("auth param extraction failed"); + goto error; + } + + get_auth_algo (auth_alg_name, auth_key_len, &integ_alg); + if (integ_alg == IPSEC_INTEG_N_ALG) + { + NL_XFRM_ERR ("Invalid/Unsupported integ algo: %s keylen: %u", + auth_alg_name, auth_key_len); + goto error; + } + ik.len = auth_key_len / 8; + clib_memcpy_fast (ik.data, (u8 *) auth_key, (auth_key_len / 8)); + } + + get_crypto_algo (alg_name, key_len, &crypto_alg); + if (crypto_alg == IPSEC_CRYPTO_N_ALG) + { + NL_XFRM_ERR ("Invalid/Unsupported crypto algo: %s keylen: %u", alg_name, + key_len); + goto error; + } + + /* + * Key_len/key here includes salt size/value. As per rfc5282 + * GCM salt size will be 4B which will be after cipher key + */ + if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg)) + { + key_len -= GCM_SALT_SIZE * 8; + clib_memcpy_fast (&salt, ((u8 *) key) + (key_len / 8), GCM_SALT_SIZE); + } + /* + * Else for CCM if supported, salt size would be 3B and needs + * to be handled here accordingly + */ + ck.len = key_len / 8; + clib_memcpy_fast (ck.data, (u8 *) key, (key_len / 8)); + + is_ipv6 = (ip_family == AF_INET) ? 0 : 1; + + dir = + lcp_xfrm_get_sa_direction (&saddr.ip, &daddr.ip, &sw_if_index, is_ipv6); + + if (!dir) + flags |= IPSEC_SA_FLAG_IS_INBOUND; + + if (nm->interface_type == NL_INTERFACE_TYPE_IPIP) + { + /* + * Other tunnel flags of VPP defined under + * foreach_tunnel_encap_decap_flag are not supported by Strongswan/XFRM + * NLs. + */ + if (!(xfrmnl_sa_get_flags (sa) & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)) + { + tun.t_encap_decap_flags |= TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP; + } + if (xfrmnl_sa_get_flags (sa) & XFRM_STATE_NOPMTUDISC) + { + tun.t_encap_decap_flags |= TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DF; + } + if (xfrmnl_sa_get_flags (sa) & XFRM_STATE_NOECN) + { + if (!dir) + tun.t_encap_decap_flags |= TUNNEL_ENCAP_DECAP_FLAG_DECAP_COPY_ECN; + else + tun.t_encap_decap_flags |= TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN; + } + } + else + { + mode = (XFRM_MODE_TRANSPORT == xfrmnl_sa_get_mode (sa)) ? 0 : 1; + if (mode) + { + flags |= IPSEC_SA_FLAG_IS_TUNNEL; + if (AF_INET6 == ip_family) + flags |= IPSEC_SA_FLAG_IS_TUNNEL_V6; + } + flags |= IPSEC_SA_FLAG_IS_PROTECT; + } + if (ip_family == AF_INET) + { + is_ipv6 = 0; + tun.t_src.version = tun.t_dst.version = AF_IP4; + clib_memcpy_fast (&tun.t_src.ip.ip4.as_u32, &saddr.ip.ip4.as_u32, + sizeof (ip4_address_t)); + clib_memcpy_fast (&tun.t_dst.ip.ip4.as_u32, &daddr.ip.ip4.as_u32, + sizeof (ip4_address_t)); + } + else + { + is_ipv6 = 1; + tun.t_src.version = tun.t_dst.version = AF_IP6; + clib_memcpy_fast (tun.t_src.ip.ip6.as_u32, &saddr.ip.ip6.as_u32, + sizeof (ip6_address_t)); + clib_memcpy_fast (tun.t_dst.ip.ip6.as_u32, &daddr.ip.ip6.as_u32, + sizeof (ip6_address_t)); + } + + rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg, &ik, + flags, salt, udp_src, udp_dst, 0, &tun, &sai); + if (rv) + { + NL_XFRM_ERR ("ipsec sa add %x failure(err: %d) %U -> %U", id, rv, + format_ip_address, &saddr, format_ip_address, &daddr); + goto error; + } + vec_add1 (life, lifetime); + hash_set (lifetime_by_sa_id, id, life); + + NL_XFRM_INFO ("ipsec sa add %x success %U -> %U", id, format_ip_address, + &saddr, format_ip_address, &daddr); + + if (IS_ROUTE_MODE_ENABLED) + lcp_xfrm_configure_route_mode (sa, is_ipv6, &saddr, &daddr, id, dir, + sw_if_index); + + else if ((sw_if_index != ~0) && (dir == 0)) + { + spd_id = lcp_xfrm_create_spd (&saddr, &daddr, spi, &if_idx, dir); + lcp_xfrm_inb_policy_cfg (&saddr, &daddr, id, spd_id, 1); + } + +error: + return; +} + +static inline void +get_max_addresses_by_prefix (ip_address_t *orig, u8 prefix, ip_address_t *max, + u8 is_ip6) +{ + if (!is_ip6) + { + ip4_prefix_max_address_host_order (&orig->ip.ip4, prefix, &max->ip.ip4); + cpu_to_be (max->ip.ip4.as_u32, 32); + } + else + { + ip6_preflen_to_mask (prefix, &max->ip.ip6); + + for (int i = 0; i < 4; i++) + { + max->ip.ip6.as_u32[i] = ~(max->ip.ip6.as_u32[i]); + max->ip.ip6.as_u32[i] |= orig->ip.ip6.as_u32[i]; + } + } +} + +static inline void +lcp_xfrm_update_addr_ranges (ipsec_policy_t *p, ip_address_t *sel_saddr, + ip_address_t *sel_daddr, u8 sel_saddr_prefix, + u8 sel_daddr_prefix) +{ + ip_address_t sel_stop_saddr, sel_stop_daddr; + + get_max_addresses_by_prefix (sel_saddr, sel_saddr_prefix, &sel_stop_saddr, + p->is_ipv6); + get_max_addresses_by_prefix (sel_daddr, sel_daddr_prefix, &sel_stop_daddr, + p->is_ipv6); + + if (!p->is_ipv6) + { + clib_memcpy_fast (&p->laddr.start.ip4.as_u32, &sel_saddr->ip.ip4.as_u32, + sizeof (ip4_address_t)); + clib_memcpy_fast (&p->laddr.stop.ip4.as_u32, + &sel_stop_saddr.ip.ip4.as_u32, sizeof (ip4_address_t)); + clib_memcpy_fast (&p->raddr.start.ip4.as_u32, &sel_daddr->ip.ip4.as_u32, + sizeof (ip4_address_t)); + clib_memcpy_fast (&p->raddr.stop.ip4.as_u32, + &sel_stop_daddr.ip.ip4.as_u32, sizeof (ip4_address_t)); + } + else + { + clib_memcpy_fast (&p->laddr.start.ip6.as_u32, &sel_saddr->ip.ip6.as_u32, + sizeof (ip6_address_t)); + clib_memcpy_fast (&p->laddr.stop.ip6.as_u32, + &sel_stop_saddr.ip.ip6.as_u32, sizeof (ip6_address_t)); + clib_memcpy_fast (&p->raddr.start.ip6.as_u32, &sel_daddr->ip.ip6.as_u32, + sizeof (ip6_address_t)); + clib_memcpy_fast (&p->raddr.stop.ip6.as_u32, + &sel_stop_daddr.ip.ip6.as_u32, sizeof (ip6_address_t)); + } +} + +static inline int +find_matching_sp (ipsec_policy_t *p, ip_address_t *saddr, ip_address_t *daddr, + u8 sprefix, u8 dprefix, ipsec_spd_policy_type_t type) +{ + ip_address_t saddr_stop, daddr_stop; + u8 matched = 0; + u8 is_ip6 = (saddr->version == AF_IP6) ? 1 : 0; + + get_max_addresses_by_prefix (saddr, sprefix, &saddr_stop, is_ip6); + get_max_addresses_by_prefix (daddr, dprefix, &daddr_stop, is_ip6); + + if (!is_ip6 && !ip4_address_compare (&p->laddr.start.ip4, &saddr->ip.ip4) && + !ip4_address_compare (&p->laddr.stop.ip4, &saddr_stop.ip.ip4) && + !ip4_address_compare (&p->raddr.start.ip4, &daddr->ip.ip4) && + !ip4_address_compare (&p->raddr.stop.ip4, &daddr_stop.ip.ip4) && + (p->type == type)) + matched = 1; + + else if (!ip6_address_compare (&p->laddr.start.ip6, &saddr->ip.ip6) && + !ip6_address_compare (&p->laddr.stop.ip6, &saddr_stop.ip.ip6) && + !ip6_address_compare (&p->raddr.start.ip6, &daddr->ip.ip6) && + !ip6_address_compare (&p->raddr.stop.ip6, &daddr_stop.ip.ip6) && + (p->type == type)) + matched = 1; + if (matched) + { + NL_XFRM_DBG ("Found Matchin policy. Delete it"); + return 1; + } + return 0; +} + +static inline void +lcp_xfrm_del_old_sp (ip_address_t *s_saddr, ip_address_t *s_daddr, + u8 s_sprefix, u8 s_dprefix, ipsec_spd_policy_type_t type) +{ + ipsec_main_t *im = &ipsec_main; + u32 spd_idx, *policies; + ipsec_spd_t *spd; + ipsec_policy_t *p0 = NULL; + int r = 0; + u32 p_idx; + vlib_main_t *vm = vlib_get_main (); + + pool_foreach_index (spd_idx, im->spds) + { + spd = pool_elt_at_index (im->spds, spd_idx); + if (!spd) + return; +#define _(t, v) \ + vec_foreach (policies, spd->policies[IPSEC_SPD_POLICY_##t]) \ + { \ + p0 = pool_elt_at_index (im->policies, *policies); \ + if (!p0) \ + return; \ + if (p0->policy == IPSEC_POLICY_ACTION_BYPASS) \ + continue; \ + r = \ + find_matching_sp (p0, s_saddr, s_daddr, s_sprefix, s_dprefix, type); \ + if (r) \ + goto found; \ + } + foreach_ipsec_spd_policy_type +#undef _ + } + +found: + if (!r) + return; + NL_XFRM_DBG ("Deleting Pol wth spdid:%x and sa_id:%x", p0->id, p0->sa_id); + r = ipsec_add_del_policy (vm, p0, 0, &p_idx); + if (!r) + NL_XFRM_INFO ("ipsec inb policy del success %U -> %U", format_ip_address, + s_saddr, format_ip_address, s_daddr); + else + NL_XFRM_ERR ("ipsec inb policy del fail(err: %d) %U -> %U", r, + format_ip_address, s_saddr, format_ip_address, s_daddr); +} + +static inline void +fib_entry_cfg (ip_address_t *sel_daddr, u8 sel_daddr_prefix, u32 if_idx, + u8 ip6, u8 is_add) +{ + u8 fib_index = 0; + fib_route_path_t *rpath = NULL, path; + fib_prefix_t rpfx; + fib_source_t fib_src = FIB_SOURCE_API; + + clib_memset (&path, 0, sizeof (path)); + path.frp_weight = 1; + path.frp_sw_if_index = if_idx; + vec_add1 (rpath, path); + + rpfx.fp_len = sel_daddr_prefix; + if (!ip6) + { + rpfx.fp_proto = FIB_PROTOCOL_IP4; + memcpy (&rpfx.fp_addr.ip4, &sel_daddr->ip.ip4, sizeof (ip4_address_t)); + } + else + { + rpfx.fp_proto = FIB_PROTOCOL_IP6; + memcpy (&rpfx.fp_addr.ip6, &sel_daddr->ip.ip6, sizeof (ip6_address_t)); + } + + if (!is_add) + fib_table_entry_path_remove2 (fib_index, &rpfx, fib_src, rpath); + else + fib_table_entry_path_add2 (fib_index, &rpfx, fib_src, FIB_ENTRY_FLAG_NONE, + rpath); + vec_free (rpath); +} + +static inline void +lcp_xfrm_tun_cfg_destroy (ip_address_t *sel_daddr, u8 sel_daddr_prefix, + u8 is_ipv6) +{ + int sw_if_index; + uword *p = NULL; + int rv; + + if (!is_ipv6) + p = hash_get (tun_idx_by_sel_daddr, sel_daddr->ip.ip4.as_u32); + else + p = hash_get (tun_idx_by_sel_daddr, + ip6_address_hash_to_u32 (&sel_daddr->ip.ip6)); + + if (!p) + return; + sw_if_index = ((policy_db_t *) p[0])->tun_sw_if_idx; + + if (sw_if_index == ~0) + return; + fib_entry_cfg (sel_daddr, sel_daddr_prefix, sw_if_index, is_ipv6, 0); + + if (!is_ipv6) + hash_unset (tun_idx_by_sel_daddr, sel_daddr->ip.ip4.as_u32); + else + hash_unset (tun_idx_by_sel_daddr, sel_daddr->ip.ip6.as_u32); + + rv = ipsec_tun_protect_del (sw_if_index, NULL); + if (rv) + { + NL_XFRM_ERR ("Tunnel protect del failure (err: %d)", rv); + return; + } + + if (nm->interface_type == NL_INTERFACE_TYPE_IPIP) + rv = ipip_del_tunnel (sw_if_index); + else + rv = ipsec_itf_delete (sw_if_index); + if (rv) + NL_XFRM_ERR ("Tunnel deletion failure (err: %d)", rv); + return; +} + +static inline void +nl_xfrm_sp_del (struct xfrmnl_sp *sp) +{ + struct xfrmnl_sel *sel = xfrmnl_sp_get_sel (sp); + struct nl_addr *sel_src = xfrmnl_sel_get_saddr (sel); + struct nl_addr *sel_dst = xfrmnl_sel_get_daddr (sel); + u8 fam = xfrmnl_sel_get_family (sel); + u8 sel_saddr_prefix = xfrmnl_sel_get_prefixlen_s (sel); + u8 sel_daddr_prefix = xfrmnl_sel_get_prefixlen_d (sel); + ip_address_t sel_saddr, sel_daddr; + u8 dir = xfrmnl_sp_get_dir (sp); + ipsec_spd_policy_type_t type; + u8 is_ip6; + + if (dir != XFRM_POLICY_OUT) + return; + + is_ip6 = (fam == AF_INET6) ? 1 : 0; + + lcp_xfrm_mk_ipaddr (sel_dst, &sel_daddr); + lcp_xfrm_mk_ipaddr (sel_src, &sel_saddr); + + if (IS_ROUTE_MODE_ENABLED) + { + lcp_xfrm_tun_cfg_destroy (&sel_daddr, sel_daddr_prefix, is_ip6); + } + else + { + ipsec_policy_mk_type (1, is_ip6, IPSEC_POLICY_ACTION_PROTECT, &type); + + lcp_xfrm_del_old_sp (&sel_saddr, &sel_daddr, sel_saddr_prefix, + sel_daddr_prefix, type); + } + + return; +} + +static inline void +lcp_xfrm_tun_update_fib (u32 sa_id, ip_address_t *sel_daddr, + u8 sel_daddr_prefix, u8 is_ipv6) +{ + int sw_if_index = -1; + uword *p = NULL; + policy_db_t *pols = NULL, pol; + + p = hash_get (lifetime_by_sa_id, sa_id); + if (!p) + return; + sw_if_index = ((sa_life_limits_t *) p[0])->tun_sw_if_idx; + + pol.tun_sw_if_idx = sw_if_index; + vec_add1 (pols, pol); + if (!is_ipv6) + hash_set (tun_idx_by_sel_daddr, sel_daddr->ip.ip4.as_u32, pols); + else + hash_set (tun_idx_by_sel_daddr, + ip6_address_hash_to_u32 (&sel_daddr->ip.ip6), pols); + + if (sw_if_index != ~0) + fib_entry_cfg (sel_daddr, sel_daddr_prefix, sw_if_index, is_ipv6, 1); +} + +static inline void +nl_xfrm_sp_add (struct xfrmnl_sp *sp, u8 num) +{ + /* User template(tunnel) variables */ + struct xfrmnl_user_tmpl *u_tmpl = xfrmnl_sp_usertemplate_n (sp, (num - 1)); + struct nl_addr *src = xfrmnl_user_tmpl_get_saddr (u_tmpl); + struct nl_addr *dst = xfrmnl_user_tmpl_get_daddr (u_tmpl); + u8 fam = xfrmnl_user_tmpl_get_family (u_tmpl); + u32 spi = xfrmnl_user_tmpl_get_spi (u_tmpl); + ip_address_t saddr, daddr; + u32 sa_id; + + /* Selector variables */ + struct xfrmnl_sel *sel = xfrmnl_sp_get_sel (sp); + struct nl_addr *sel_src = xfrmnl_sel_get_saddr (sel); + struct nl_addr *sel_dst = xfrmnl_sel_get_daddr (sel); + u8 sel_saddr_prefix = xfrmnl_sel_get_prefixlen_s (sel); + u8 sel_daddr_prefix = xfrmnl_sel_get_prefixlen_d (sel); + u16 sel_sportmask = xfrmnl_sel_get_sportmask (sel); + u16 sel_dportmask = xfrmnl_sel_get_dportmask (sel); + u16 sel_dport = xfrmnl_sel_get_dport (sel); + u16 sel_sport = xfrmnl_sel_get_sport (sel); + u8 proto = xfrmnl_sel_get_proto (sel); + ip_address_t sel_saddr, sel_daddr; + + u32 prio = xfrmnl_sp_get_priority (sp); + u8 dir = xfrmnl_sp_get_dir (sp); + u8 is_ipv6 = 0, is_outbound = 0; + vlib_main_t *vm = vlib_get_main (); + ipsec_spd_policy_type_t type; + u32 p_idx, spd_id = 0; + int rv, sw_if_index; + ipsec_policy_t p; + + /* + * Inbound policy additions are handled as part of SA addition. + * Hence we ignore inbound policy notifications from kernel + */ + if (dir != XFRM_POLICY_OUT) + return; + + proto = !proto ? IPSEC_POLICY_PROTOCOL_ANY : proto; + is_ipv6 = (fam == AF_INET6) ? 1 : 0; + + lcp_xfrm_mk_ipaddr (sel_dst, &sel_daddr); + lcp_xfrm_mk_ipaddr (sel_src, &sel_saddr); + lcp_xfrm_mk_ipaddr (dst, &daddr); + lcp_xfrm_mk_ipaddr (src, &saddr); + + sa_id = lcp_xfrm_ipsec_sa_id_table (spi, &daddr); + + if (IS_ROUTE_MODE_ENABLED) + { + /* + * Add a fib entry for dest tun selectors via ipipX interface. + */ + lcp_xfrm_tun_update_fib (sa_id, &sel_daddr, sel_daddr_prefix, is_ipv6); + return; + } + + is_outbound = 1; + spd_id = + lcp_xfrm_create_spd (&saddr, &daddr, spi, &sw_if_index, is_outbound); + + if (sw_if_index == ~0) + { + NL_XFRM_ERR ("SP add Notfn is not for vpp interfaces"); + return; + } + + ipsec_policy_mk_type (is_outbound, is_ipv6, IPSEC_POLICY_ACTION_PROTECT, + &type); + lcp_xfrm_del_old_sp (&sel_saddr, &sel_daddr, sel_saddr_prefix, + sel_daddr_prefix, type); + + p.id = spd_id; + p.priority = prio; + p.is_ipv6 = is_ipv6; + + lcp_xfrm_update_addr_ranges (&p, &sel_saddr, &sel_daddr, sel_saddr_prefix, + sel_daddr_prefix); + p.protocol = proto; + update_port_details (&p, sel_sport, sel_dport, sel_sportmask, sel_dportmask); + p.sa_id = sa_id; + p.policy = IPSEC_POLICY_ACTION_PROTECT; + + ipsec_policy_mk_type (is_outbound, p.is_ipv6, p.policy, &p.type); + rv = ipsec_add_del_policy (vm, &p, 1, &p_idx); + if (!rv) + NL_XFRM_INFO ("ipsec %s policy add success %U -> %U sa_id: %x spd_id: %x", + (!is_outbound ? "inb" : "outb"), format_ip_address, + &sel_saddr, format_ip_address, &sel_daddr, sa_id, spd_id); + else + NL_XFRM_ERR ( + "ipsec %s policy add fail(err: %d) %U -> %U sa_id: %x spd_id: %x", + (!is_outbound ? "inb" : "outb"), rv, format_ip_address, &sel_saddr, + format_ip_address, &sel_daddr, sa_id, spd_id); + + return; +} + +void +nl_xfrm_sa_cfg (struct xfrmnl_sa *sa) +{ + switch (nl_object_get_msgtype ((struct nl_object *) sa)) + { + case XFRM_MSG_UPDSA: + case XFRM_MSG_NEWSA: + nl_xfrm_sa_add (sa); + break; + + case XFRM_MSG_EXPIRE: + case XFRM_MSG_DELSA: + nl_xfrm_sa_del (sa); + break; + } +} + +void +nl_xfrm_sp_cfg (struct xfrmnl_sp *sp) +{ + u8 num_user_tmpl = 0; + + switch (nl_object_get_msgtype ((struct nl_object *) sp)) + { + case XFRM_MSG_UPDPOLICY: + case XFRM_MSG_NEWPOLICY: + num_user_tmpl = xfrmnl_sp_get_nusertemplates (sp); + if (!num_user_tmpl) + { + NL_XFRM_DBG ( + "Don't support allow/drop policies notification from Kernel. \ + Number of user template (%u) should be 1. If more than 1 \ + template is found, we choose only the first one", + num_user_tmpl); + return; + } + nl_xfrm_sp_add (sp, num_user_tmpl); + break; + + case XFRM_MSG_DELPOLICY: + /* DEL notification will not have the user template */ + nl_xfrm_sp_del (sp); + break; + } +} + +static inline u8 +build_nl_expire_msg (ipsec_sa_t *sa, u8 is_hard) +{ + sa_expire_req_nl_t expire_req; + + memset (&expire_req, 0, sizeof (expire_req)); + + /* Fill up the nl header */ + expire_req.nlmsg_hdr.nlmsg_len = + NLMSG_LENGTH (sizeof (expire_req.xfrm_expire)); + expire_req.nlmsg_hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + expire_req.nlmsg_hdr.nlmsg_type = XFRM_MSG_EXPIRE; + expire_req.nlmsg_hdr.nlmsg_seq = ++g_seq; + expire_req.nlmsg_hdr.nlmsg_pid = nl_socket_get_local_port (nm->sk_xfrm); + + /* Fill up the xfrm_user_expire with the SA info that expired */ + expire_req.xfrm_expire.hard = is_hard; + expire_req.xfrm_expire.state.flags = XFRM_STATE_AF_UNSPEC; + expire_req.xfrm_expire.state.id.spi = clib_host_to_net_u32 (sa->spi); + expire_req.xfrm_expire.state.mode = (sa->flags & IPSEC_SA_FLAG_IS_TUNNEL) ? + XFRM_MODE_TUNNEL : + XFRM_MODE_TRANSPORT; + expire_req.xfrm_expire.state.id.proto = + (sa->protocol == IPSEC_PROTOCOL_ESP) ? IPPROTO_ESP : IPPROTO_AH; + + expire_req.xfrm_expire.state.family = + (sa->tunnel.t_dst.version == AF_IP4) ? AF_INET : AF_INET6; + + (sa->tunnel.t_dst.version == AF_IP4) ? + clib_memcpy_fast (&expire_req.xfrm_expire.state.id.daddr.a4, + &sa->tunnel.t_dst.ip.ip4.as_u32, + sizeof (ip4_address_t)) : + clib_memcpy_fast (&expire_req.xfrm_expire.state.id.daddr.a6, + &sa->tunnel.t_dst.ip.ip6.as_u32, sizeof (ip6_address_t)); + + (sa->tunnel.t_src.version == AF_IP4) ? + clib_memcpy_fast (&expire_req.xfrm_expire.state.saddr.a4, + &sa->tunnel.t_src.ip.ip4.as_u32, + sizeof (ip4_address_t)) : + clib_memcpy_fast (&expire_req.xfrm_expire.state.saddr.a6, + &sa->tunnel.t_src.ip.ip6.as_u32, sizeof (ip6_address_t)); + + return send_nl_msg (&expire_req.nlmsg_hdr, XFRMGRP_EXPIRE, XFRM_MSG_EXPIRE); +} + +u8 +check_for_expiry () +{ + sa_life_limits_t *life = NULL; + vlib_counter_t count; + uword *p = NULL; + ipsec_sa_t *sa = NULL; + int rv = 0; + + pool_foreach (sa, ipsec_sa_pool) + { + p = hash_get (lifetime_by_sa_id, sa->id); + if (!p) + continue; + life = (sa_life_limits_t *) p[0]; + vlib_get_combined_counter (&ipsec_sa_counters, sa->stat_index, &count); + + if ((count.packets >= life->hard_packet_limit) || + (count.bytes >= life->hard_byte_limit)) + { + NL_XFRM_INFO ( + "HARD EXPIRY said : %x CntPkt: %u SoftPkt: %u HardPkt: %u", sa->id, + count.packets, life->soft_packet_limit, life->hard_packet_limit); + rv = build_nl_expire_msg (sa, 1); + } + else if ((count.packets >= life->soft_packet_limit) || + (count.bytes >= life->soft_byte_limit)) + { + NL_XFRM_INFO ( + "SOFT EXPIRY said : %x CntPkt: %u SoftPkt: %u HardPkt: %u", sa->id, + count.packets, life->soft_packet_limit, life->hard_packet_limit); + rv = build_nl_expire_msg (sa, 0); + } + if (rv) + vlib_zero_combined_counter (&ipsec_sa_counters, sa->stat_index); + } + return 0; +} + +const nl_xfrm_vft_t lcp_xfrm_vft = { + .nvl_rt_xfrm_sa_cfg = { .is_mp_safe = 0, .cb = nl_xfrm_sa_cfg }, + .nvl_rt_xfrm_sp_cfg = { .is_mp_safe = 0, .cb = nl_xfrm_sp_cfg }, +}; + +static clib_error_t * +lcp_xfrm_init (vlib_main_t *vm) +{ + lcp_xfrm_logger = vlib_log_register_class ("linux-cp", "ipsec"); + + nl_xfrm_register_vft (&lcp_xfrm_vft); + + lifetime_by_sa_id = hash_create (0, sizeof (uword)); + tun_idx_by_sel_daddr = hash_create (0, sizeof (uword)); + + return (NULL); +} + +VLIB_INIT_FUNCTION (lcp_xfrm_init) = { + .runs_before = VLIB_INITS ("lcp_nl_xfrm_init"), +}; + +uword +ipsec_xfrm_expire_process (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + /* init will wake it up */ + vlib_process_wait_for_event (vm); + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, 2); + vlib_process_get_events (vm, NULL); + check_for_expiry (); + } + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_xfrm.h b/src/plugins/linux-cp/lcp_xfrm.h new file mode 100644 index 0000000000..e7257e431f --- /dev/null +++ b/src/plugins/linux-cp/lcp_xfrm.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2022 Cisco and/or its affiliates. + * Copyright (c) 2022 Marvell Technology, Inc and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define NL_RT_COMMON uword is_mp_safe +#define NL_RX_BUF_SIZE_DEF (1 << 28) /* 256 MB */ +#define NL_TX_BUF_SIZE_DEF (1 << 20) /* 1 MB */ +#define NL_BATCH_SIZE_DEF (1 << 11) /* 2048 */ +#define NL_BATCH_DELAY_MS_DEF 50 /* 50 ms, max 20 batch/s */ +#define NL_SYNC_BATCH_LIMIT_DEF (1 << 10) /* 1024 */ +#define NL_SYNC_BATCH_DELAY_MS_DEF 20 /* 20ms, max 50 batch/s */ +#define NL_SYNC_ATTEMPT_DELAY_MS_DEF 2000 /* 2s */ + +#define DAY_F64 (1.0 * (24 * 60 * 60)) + +#define NL_DBG(...) vlib_log_debug (nl_xfrm_main.nl_logger, __VA_ARGS__); +#define NL_WARN(...) vlib_log_warn (nl_xfrm_main.nl_logger, __VA_ARGS__); +#define NL_INFO(...) vlib_log_notice (nl_xfrm_main.nl_logger, __VA_ARGS__); +#define NL_ERROR(...) vlib_log_err (nl_xfrm_main.nl_logger, __VA_ARGS__); + +#define FOREACH_XFRM_VFT(__func, __arg) \ + { \ + nl_xfrm_main_t *nm = &nl_xfrm_main; \ + nl_xfrm_vft_t *__nv; \ + vec_foreach (__nv, nm->nl_xfrm_vfts) \ + { \ + if (!__nv->__func.cb) \ + continue; \ + \ + if (!__nv->__func.is_mp_safe) \ + vlib_worker_thread_barrier_sync (vlib_get_main ()); \ + \ + __nv->__func.cb (__arg); \ + \ + if (!__nv->__func.is_mp_safe) \ + vlib_worker_thread_barrier_release (vlib_get_main ()); \ + } \ + } + +typedef void (*nl_rt_sa_cb_t) (struct xfrmnl_sa *sa); +typedef void (*nl_rt_sp_cb_t) (struct xfrmnl_sp *sp); + +typedef struct nl_rt_sa_cfg_t_ +{ + NL_RT_COMMON; + + nl_rt_sa_cb_t cb; +} nl_rt_sa_cfg_t; + +typedef struct nl_rt_sp_cfg_t_ +{ + NL_RT_COMMON; + + nl_rt_sp_cb_t cb; +} nl_rt_sp_cfg_t; + +typedef struct nl_xfrm_vft_t_ +{ + nl_rt_sa_cfg_t nvl_rt_xfrm_sa_cfg; + nl_rt_sp_cfg_t nvl_rt_xfrm_sp_cfg; +} nl_xfrm_vft_t; + +typedef enum nl_status_t_ +{ + NL_STATUS_NOTIF_PROC, + NL_STATUS_SYNC, +} nl_status_t; + +typedef enum nl_event_type_t_ +{ + NL_EVENT_READ, + NL_EVENT_ERR, +} nl_event_type_t; + +typedef enum nl_interface_type_t_ +{ + NL_INTERFACE_TYPE_IPIP = 1, + NL_INTERFACE_TYPE_IPSEC +} nl_interface_type_t; + +typedef struct nl_msg_info +{ + struct nl_msg *msg; +} nl_msg_info_t; + +typedef struct nl_xfrm_main +{ + + nl_status_t nl_status; + struct nl_sock *sk_xfrm; + u8 xfrm_fd; + u8 is_route_mode; + nl_interface_type_t interface_type; + vlib_log_class_t nl_logger; + nl_xfrm_vft_t *nl_xfrm_vfts; + nl_msg_info_t *nl_msg_queue; + uword clib_file_index; + + u32 rx_buf_size; + u32 tx_buf_size; + u32 batch_size; + u32 batch_delay_ms; + + u32 sync_batch_limit; + u32 sync_batch_delay_ms; + u32 sync_attempt_delay_ms; +} nl_xfrm_main_t; + +extern nl_xfrm_main_t *nm; + +extern void nl_xfrm_register_vft (const nl_xfrm_vft_t *nv); +void nl_xfrm_sa_cfg (struct xfrmnl_sa *sa); +void nl_xfrm_sp_cfg (struct xfrmnl_sp *sp); +u8 check_for_expiry (); +int send_nl_msg (struct nlmsghdr *nl_hdr, unsigned int groups, u8 msg_type); +uword ipsec_xfrm_expire_process (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_xfrm_nl.c b/src/plugins/linux-cp/lcp_xfrm_nl.c new file mode 100644 index 0000000000..f17ff54325 --- /dev/null +++ b/src/plugins/linux-cp/lcp_xfrm_nl.c @@ -0,0 +1,672 @@ +/* + * Copyright (c) 2022 Cisco and/or its affiliates. + * Copyright (c) 2022 Marvell Technology, Inc and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include +#include + +#include +#include +#include +#include +#include + +static nl_xfrm_main_t nl_xfrm_main = { + .rx_buf_size = NL_RX_BUF_SIZE_DEF, + .tx_buf_size = NL_TX_BUF_SIZE_DEF, + .batch_size = NL_BATCH_SIZE_DEF, + .batch_delay_ms = NL_BATCH_DELAY_MS_DEF, + .sync_batch_limit = NL_SYNC_BATCH_LIMIT_DEF, + .sync_batch_delay_ms = NL_SYNC_BATCH_DELAY_MS_DEF, + .sync_attempt_delay_ms = NL_SYNC_ATTEMPT_DELAY_MS_DEF, +}; + +nl_xfrm_main_t *nm = &nl_xfrm_main; + +static void lcp_xfrm_nl_open_sync_socket (); +static void lcp_xfrm_nl_close_sync_socket (); +static void lcp_xfrm_nl_open_socket (); +static void lcp_xfrm_nl_close_socket (); + +void +nl_xfrm_register_vft (const nl_xfrm_vft_t *nv) +{ + vec_add1 (nm->nl_xfrm_vfts, *nv); +} + +static void +nl_sp_cfg (struct xfrmnl_sp *sp, void *arg) +{ + FOREACH_XFRM_VFT (nvl_rt_xfrm_sp_cfg, sp); +} +static void +nl_sa_cfg (struct xfrmnl_sa *sa, void *arg) +{ + FOREACH_XFRM_VFT (nvl_rt_xfrm_sa_cfg, sa); +} + +static void +nl_xfrm_dispatch (struct nl_object *obj, void *arg) +{ + /* nothing can be done without interface mappings */ + if (!lcp_itf_num_pairs ()) + return; + + switch (nl_object_get_msgtype (obj)) + { + case XFRM_MSG_EXPIRE: + case XFRM_MSG_UPDSA: + case XFRM_MSG_NEWSA: + case XFRM_MSG_DELSA: + NL_DBG ("######### SA Notification ######### "); + nl_sa_cfg ((struct xfrmnl_sa *) obj, arg); + break; + + case XFRM_MSG_UPDPOLICY: + case XFRM_MSG_NEWPOLICY: + case XFRM_MSG_DELPOLICY: + NL_DBG ("######### SP Notification ######### "); + nl_sp_cfg ((struct xfrmnl_sp *) obj, arg); + break; + + default: + NL_ERROR ("unhandled xfrm notfn: %s %x", nl_object_get_type (obj)); + break; + } +} + +int +send_nl_msg (struct nlmsghdr *nl_hdr, unsigned int groups, u8 msg_type) +{ + int status; + struct nl_sock *sk_xfrm = nm->sk_xfrm; + struct sockaddr_nl nl_addr; + struct iovec iov; + struct msghdr msg; + struct nl_msg *nlmsg; + + nlmsg = nlmsg_alloc_simple (msg_type, NLM_F_REQUEST); + + memset (&msg, 0, sizeof (struct msghdr)); + memset (&iov, 0, sizeof (struct iovec)); + + iov.iov_base = (void *) nl_hdr; + iov.iov_len = nl_hdr->nlmsg_len; + + msg.msg_name = &nl_addr; + msg.msg_namelen = sizeof (nl_addr); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + memset (&nl_addr, 0, sizeof (nl_addr)); + nl_addr.nl_family = AF_NETLINK; + nl_addr.nl_groups = groups; + nl_addr.nl_pid = 0; + + status = nl_sendmsg (sk_xfrm, nlmsg, &msg); + if (status < 0) + { + NL_ERROR ("Expiry send failed"); + return 0; + } + + return 1; +} + +static int +nl_xfrm_process_msgs (void) +{ + nl_msg_info_t *msg_info; + int err, n_msgs = 0; + + /* process a batch of messages. break if we hit our limit */ + vec_foreach (msg_info, nm->nl_msg_queue) + { + if ((err = nl_msg_parse (msg_info->msg, nl_xfrm_dispatch, msg_info)) < 0) + NL_INFO ("Unable to parse object: %s", nl_geterror (err)); + nlmsg_free (msg_info->msg); + if (++n_msgs >= nm->batch_size) + break; + } + + /* remove the messages we processed from the head of the queue */ + if (n_msgs) + vec_delete (nm->nl_msg_queue, n_msgs, 0); + + NL_INFO ("Processed %u messages", n_msgs); + + return n_msgs; +} + +static int +lcp_xfrm_nl_send_dump_req (int msg_type) +{ + struct nl_sock *sk_xfrm = nm->sk_xfrm; + int err; + struct rtgenmsg rt_hdr = { + .rtgen_family = AF_UNSPEC, + }; + + err = + nl_send_simple (sk_xfrm, msg_type, NLM_F_DUMP, &rt_hdr, sizeof (rt_hdr)); + + if (err < 0) + { + NL_ERROR ("Unable to send a dump request: %s", nl_geterror (err)); + } + else + NL_INFO ("Dump request sent via socket %d ", nl_socket_get_fd (sk_xfrm)); + + return err; +} + +static int +lcp_xfrm_nl_dump_cb (struct nl_msg *msg, void *arg) +{ + int err; + + if ((err = nl_msg_parse (msg, nl_xfrm_dispatch, NULL)) < 0) + NL_ERROR ("Unable to parse object: %s", nl_geterror (err)); + + return NL_OK; +} + +static int +lcp_xfrm_nl_read (int msg_limit, int *is_done_rcvd) +{ + struct nl_sock *sk_xfrm = nm->sk_xfrm; + struct sockaddr_nl nla; + uint8_t *buf = NULL; + int n_bytes; + struct nlmsghdr *hdr; + struct nl_msg *msg = NULL; + int err = 0; + int done = 0; + int n_msgs = 0; + +continue_reading: + n_bytes = nl_recv (sk_xfrm, &nla, &buf, /* creds */ NULL); + if (n_bytes <= 0) + return n_bytes; + + hdr = (struct nlmsghdr *) buf; + while (nlmsg_ok (hdr, n_bytes)) + { + nlmsg_free (msg); + msg = nlmsg_convert (hdr); + if (!msg) + { + err = -NLE_NOMEM; + goto out; + } + + n_msgs++; + + nlmsg_set_proto (msg, NETLINK_XFRM); + nlmsg_set_src (msg, &nla); + + /* Message that terminates a multipart message. Finish parsing and signal + * the caller that all dump replies have been received + */ + if (hdr->nlmsg_type == NLMSG_DONE) + { + done = 1; + goto out; + } + /* Message to be ignored. Continue parsing */ + else if (hdr->nlmsg_type == NLMSG_NOOP) + ; + /* Message that indicates data was lost. Finish parsing and return an + * error + */ + else if (hdr->nlmsg_type == NLMSG_OVERRUN) + { + err = -NLE_MSG_OVERFLOW; + goto out; + } + /* Message that indicates an error. Finish parsing, extract the error + * code, and return it */ + else if (hdr->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr *e = nlmsg_data (hdr); + + if (hdr->nlmsg_len < nlmsg_size (sizeof (*e))) + { + err = -NLE_MSG_TRUNC; + goto out; + } + else if (e->error) + { + err = -nl_syserr2nlerr (e->error); + goto out; + } + /* Message is an acknowledgement (err_code = 0). Continue parsing */ + else + ; + } + /* Message that contains the requested data. Pass it for processing and + * continue parsing + */ + else + { + lcp_xfrm_nl_dump_cb (msg, NULL); + } + + hdr = nlmsg_next (hdr, &n_bytes); + } + + nlmsg_free (msg); + free (buf); + msg = NULL; + buf = NULL; + + if (!done && n_msgs < msg_limit) + goto continue_reading; + +out: + nlmsg_free (msg); + free (buf); + + if (err) + return err; + + *is_done_rcvd = done; + + return n_msgs; +} + +static void +lcp_xfrm_nl_close_sync_socket () +{ + struct nl_sock *sk_xfrm = nm->sk_xfrm; + + if (sk_xfrm) + { + NL_INFO ("Closing netlink synchronization socket %d", + nl_socket_get_fd (sk_xfrm)); + nl_socket_free (sk_xfrm); + nm->sk_xfrm = NULL; + } +} + +static void +lcp_xfrm_nl_open_sync_socket () +{ + struct nl_sock *sk_xfrm; + + /* Allocate a new blocking socket for XFRM that will be used for dump + * requests. Buffer sizes are left default because replies to dump requests + * are flow-controlled and the kernel will not overflow the socket by sending + * these + */ + + nm->sk_xfrm = sk_xfrm = nl_socket_alloc (); + + nl_connect (sk_xfrm, NETLINK_XFRM); + + NL_INFO ("Opened netlink synchronization socket %d", + nl_socket_get_fd (sk_xfrm)); +} + +static inline void +lcp_xfrm_nl_recv_dump_replies () +{ + int is_done = 0, n_msgs; + + do + { + n_msgs = lcp_xfrm_nl_read (nm->sync_batch_limit, &is_done); + if (n_msgs < 0) + { + NL_ERROR ("Error receiving dump replies " + ": %s (%d)", + nl_geterror (n_msgs), n_msgs); + break; + } + else if (n_msgs == 0) + { + NL_ERROR ("EOF while receiving dump replies"); + break; + } + else + NL_INFO ("Processed %u dump replies", n_msgs); + } + while (!is_done); +} + +static inline void +lcp_xfrm_nl_sync () +{ + /* close the xfrm socket listening on XFRM notifications */ + lcp_xfrm_nl_close_socket (); + /* create a new xfrm sync socket only to initiate a DUMP request */ + lcp_xfrm_nl_open_sync_socket (); + + /* get all xfrm cfgs from linux and cfg the same here*/ + lcp_xfrm_nl_send_dump_req (XFRM_MSG_GETSA); + lcp_xfrm_nl_recv_dump_replies (); + lcp_xfrm_nl_send_dump_req (XFRM_MSG_GETPOLICY); + lcp_xfrm_nl_recv_dump_replies (); + + /* close the xfrm sync socket since dump request is handled by now */ + lcp_xfrm_nl_close_sync_socket (); + /* create the xfrm socket to handle XFRM notifications */ + lcp_xfrm_nl_open_socket (); +} + +static uword +nl_xfrm_process (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + uword event_type; + uword *event_data = 0; + f64 wait_time = DAY_F64; + + while (1) + { + if (nm->nl_status == NL_STATUS_NOTIF_PROC) + { + /* If we process a batch of messages and stop because we reached the + * batch size limit, we want to wake up after the batch delay and + * process more. Otherwise we just want to wait for a read event. + */ + vlib_process_wait_for_event_or_clock (vm, wait_time); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + switch (event_type) + { + /* Process batch of queued messages on timeout or read event + * signal + */ + case ~0: + case NL_EVENT_READ: + nl_xfrm_process_msgs (); + wait_time = (vec_len (nm->nl_msg_queue) != 0) ? + nm->batch_delay_ms * 1e-3 : + DAY_F64; + + break; + case NL_EVENT_ERR: + nm->nl_status = NL_STATUS_SYNC; + break; + default: + NL_ERROR ("Unknown event type: %u", (u32) event_type); + } + } + else if (nm->nl_status == NL_STATUS_SYNC) + { + NL_INFO ("Start sync"); + lcp_xfrm_nl_sync (); + nm->nl_status = NL_STATUS_NOTIF_PROC; + NL_INFO ("Sync done"); + } + else + NL_ERROR ("Unknown status: %d", nm->nl_status); + } + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (nl_xfrm_process_node, static) = { + .function = nl_xfrm_process, + .name = "linux-cp-netlink-xfrm-process", + .type = VLIB_NODE_TYPE_PROCESS, + .process_log2_n_stack_bytes = 17, +}; + +VLIB_REGISTER_NODE (ipsec_xfrm_expire_process_node, static) = { + .function = ipsec_xfrm_expire_process, + .name = "ipsec-xfrm-expire-process", + .type = VLIB_NODE_TYPE_PROCESS, + .process_log2_n_stack_bytes = 17, +}; + +static int +nl_xfrm_cb (struct nl_msg *msg, void *arg) +{ + nl_msg_info_t *msg_info = 0; + + /* queue for later */ + vec_add2 (nm->nl_msg_queue, msg_info, 1); + + msg_info->msg = msg; + nlmsg_get (msg); + + return 0; +} + +int +lcp_nl_xfrm_drain_messages (void) +{ + int err; + + /* Read until there's an error */ + while ((err = nl_recvmsgs_default (nm->sk_xfrm)) > -1) + ; + + /* If there was an error other then EAGAIN, signal process node */ + if (err != -NLE_AGAIN) + vlib_process_signal_event (vlib_get_main (), nl_xfrm_process_node.index, + NL_EVENT_ERR, 0); + else + { + /* If netlink notification processing is active, signal process node + * there were notifications read + */ + if (nm->nl_status == NL_STATUS_NOTIF_PROC) + { + vlib_process_signal_event ( + vlib_get_main (), nl_xfrm_process_node.index, NL_EVENT_READ, 0); + } + } + + return err; +} + +static clib_error_t * +nl_xfrm_read_cb (clib_file_t *f) +{ + int err; + + err = lcp_nl_xfrm_drain_messages (); + if (err < 0 && err != -NLE_AGAIN) + NL_ERROR ("Error reading netlink socket (fd %d): %s (%d)", + f->file_descriptor, nl_geterror (err), err); + + return 0; +} + +static clib_error_t * +nl_xfrm_error_cb (clib_file_t *f) +{ + NL_ERROR ("Error polling netlink socket (fd %d)", f->file_descriptor); + + /* notify process node */ + vlib_process_signal_event (vlib_get_main (), nl_xfrm_process_node.index, + NL_EVENT_ERR, 0); + + return clib_error_return (0, "Error polling netlink socket %d", + f->file_descriptor); +} + +/* Set the RX buffer size to be used on the netlink socket */ +void +lcp_xfrm_nl_set_buffer_size (u32 buf_size) +{ + nm->rx_buf_size = buf_size; + + if (nm->sk_xfrm) + nl_socket_set_buffer_size (nm->sk_xfrm, nm->rx_buf_size, nm->tx_buf_size); +} + +/* Set the batch size - maximum netlink messages to process at one time */ +void +lcp_xfrm_nl_set_batch_size (u32 batch_size) +{ + nm->batch_size = batch_size; +} + +/* Set the batch delay - how long to wait in ms between processing batches */ +void +lcp_xfrm_nl_set_batch_delay (u32 batch_delay_ms) +{ + nm->batch_delay_ms = batch_delay_ms; +} + +static clib_error_t * +lcp_xfrm_itf_pair_config (vlib_main_t *vm, unformat_input_t *input) +{ + u32 buf_size, batch_size, batch_delay_ms; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "enable-route-mode-ipsec")) + nm->is_route_mode = 1; + else if (unformat (input, "nl-rx-buffer-size %u", &buf_size)) + lcp_xfrm_nl_set_buffer_size (buf_size); + else if (unformat (input, "nl-batch-size %u", &batch_size)) + lcp_xfrm_nl_set_batch_size (batch_size); + else if (unformat (input, "nl-batch-delay-ms %u", &batch_delay_ms)) + lcp_xfrm_nl_set_batch_delay (batch_delay_ms); + else if (unformat (input, "interface ipsec")) + nm->interface_type = NL_INTERFACE_TYPE_IPSEC; + else if (unformat (input, "interface ipip")) + nm->interface_type = NL_INTERFACE_TYPE_IPIP; + else + return clib_error_return (0, "invalid netlink option: %U", + format_unformat_error, input); + } + + if (nm->interface_type && !nm->is_route_mode) + return clib_error_return ( + 0, "enable-route-mode-ipsec configuration is missing"); + + return NULL; +} + +VLIB_CONFIG_FUNCTION (lcp_xfrm_itf_pair_config, "linux-xfrm-nl"); + +static void +lcp_xfrm_nl_close_socket (void) +{ + /* delete existing fd from epoll fd set */ + if (nm->clib_file_index != ~0) + { + clib_file_main_t *fm = &file_main; + clib_file_t *f = clib_file_get (fm, nm->clib_file_index); + + if (f) + { + NL_INFO ("Stopping poll of fd %u", f->file_descriptor); + fm->file_update (f, UNIX_FILE_UPDATE_DELETE); + } + else + /* stored index was not a valid file, reset stored index to ~0 */ + nm->clib_file_index = ~0; + } + /* If we already created a socket, close/free it */ + if (nm->sk_xfrm) + { + NL_INFO ("Closing netlink socket %d", nl_socket_get_fd (nm->sk_xfrm)); + nl_socket_free (nm->sk_xfrm); + nm->sk_xfrm = NULL; + } +} + +static void +lcp_xfrm_nl_open_socket (void) +{ + int dest_ns_fd = 0, curr_ns_fd = 0; + /* + * Allocate a new socket for xfrm. Notifications do not use sequence + * numbers, disable sequence number checking. + * Define a callback function, which will be called for each + * notification received + */ + dest_ns_fd = lcp_get_default_ns_fd (); + if (dest_ns_fd) + { + curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY); + setns (dest_ns_fd, CLONE_NEWNET); + } + + nm->sk_xfrm = nl_socket_alloc (); + nm->xfrm_fd = nl_socket_get_fd (nm->sk_xfrm); + nl_socket_disable_seq_check (nm->sk_xfrm); + nl_join_groups (nm->sk_xfrm, XFRMGRP_SA | XFRMGRP_POLICY | XFRMGRP_EXPIRE); + nl_connect (nm->sk_xfrm, NETLINK_XFRM); + + /* Set socket in nonblocking mode and increase buffer sizes */ + nl_socket_set_nonblocking (nm->sk_xfrm); + nl_socket_set_buffer_size (nm->sk_xfrm, nm->rx_buf_size, nm->tx_buf_size); + + if (dest_ns_fd && curr_ns_fd >= 0) + { + setns (curr_ns_fd, CLONE_NEWNET); + close (curr_ns_fd); + } + if (nm->clib_file_index == ~0) + { + clib_file_t rt_file = { + .read_function = nl_xfrm_read_cb, + .error_function = nl_xfrm_error_cb, + .file_descriptor = nl_socket_get_fd (nm->sk_xfrm), + .description = format (0, "linux-cp netlink route socket"), + }; + + nm->clib_file_index = clib_file_add (&file_main, &rt_file); + NL_INFO ("Added file %u", nm->clib_file_index); + } + else + /* clib file already created and socket was closed due to error */ + { + clib_file_main_t *fm = &file_main; + clib_file_t *f = clib_file_get (fm, nm->clib_file_index); + + f->file_descriptor = nl_socket_get_fd (nm->sk_xfrm); + fm->file_update (f, UNIX_FILE_UPDATE_ADD); + NL_INFO ("Starting poll of %d", f->file_descriptor); + } + + nl_socket_modify_cb (nm->sk_xfrm, NL_CB_VALID, NL_CB_CUSTOM, nl_xfrm_cb, + NULL); + NL_INFO ("Opened netlink socket %d", nl_socket_get_fd (nm->sk_xfrm)); +} + +clib_error_t * +lcp_nl_xfrm_init (vlib_main_t *vm) +{ + + nm->nl_status = NL_STATUS_NOTIF_PROC; + nm->clib_file_index = ~0; + nm->nl_logger = vlib_log_register_class ("nl", "xfrm"); + + lcp_xfrm_nl_open_socket (); + vlib_process_signal_event (vlib_get_main (), + ipsec_xfrm_expire_process_node.index, 0, 0); + return NULL; +} + +VLIB_INIT_FUNCTION (lcp_nl_xfrm_init) = { + .runs_after = VLIB_INITS ("ipsec_init"), +}; +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/CMakeLists.txt b/src/plugins/octep_cp/CMakeLists.txt new file mode 100644 index 0000000000..6043618677 --- /dev/null +++ b/src/plugins/octep_cp/CMakeLists.txt @@ -0,0 +1,54 @@ +# Copyright (c) 2023 Marvell. +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.org/licenses/Apache-2.0.html +# + +unset(OCTEP_INCLUDE_DIR) +unset(OCTEP_LINK_FLAGS) +unset(OCTEP_LIB_DIR) + +set(OCTEP_LIB_DIR_HINT "/usr/marvell") +set(OCTEP_INC_DIR_HINT "/usr/marvell/include") +set(PLAT "aarch64") + +if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64.*") + return() +endif() + +vpp_find_path(OCTEP_INCLUDE_DIR octep_cp_lib.h HINTS ${OCTEP_INC_DIR_HINT}) +vpp_plugin_find_library(octep_cp OCTEP_LIB "liboctep_cp.a" HINTS ${OCTEP_LIB_DIR_HINT}) + +if (NOT OCTEP_INCLUDE_DIR) + message(WARNING "-- Octep headers not found - Octep-cp plugin disabled") + return() +endif() + +if (NOT OCTEP_LIB) + message(WARNING "-- Octep lib not found - Octep-cp plugin disabled") + return() +else() + message(WARNING "-- Octep lib found - Octep-cp plugin enabled") +endif() + +set(CMAKE_C_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}/compat/${PLAT}") + +get_filename_component(OCTEP_LIB_DIR ${OCTEP_LIB} DIRECTORY) +string_append(OCTEP_LINK_FLAGS "-L${OCTEP_LIB_DIR}") +string_append(OCTEP_LINK_FLAGS "-Wl,--whole-archive,${OCTEP_LIB},-lconfig,--no-whole-archive") + +include_directories (${OCTEP_INCLUDE_DIR}) + +message(STATUS "OCTEP header ${OCTEP_INCLUDE_DIR}") +message(STATUS "OCTEP lib ${OCTEP_LIB_DIR}") +message(STATUS "OCTEP link flag ${OCTEP_LINK_FLAGS}") + +add_vpp_plugin(octep_cp + SOURCES + octep_ctrl.c + octep_config.c + octep_action.c + octep_input.c + LINK_FLAGS + "${OCTEP_LINK_FLAGS}" +) +install(FILES octep_cp_cn10kxx.cfg DESTINATION etc/vpp/ COMPONENT vpp-plugin-octep_cp) diff --git a/src/plugins/octep_cp/compat/aarch64/cp_compat.h b/src/plugins/octep_cp/compat/aarch64/cp_compat.h new file mode 100644 index 0000000000..a1c1db004d --- /dev/null +++ b/src/plugins/octep_cp/compat/aarch64/cp_compat.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef __CP_COMPAT_H__ +#define __CP_COMPAT_H__ + +#include +#include + +#define CP_ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ +#define CP_ETHER_GROUP_ADDR 0x01 /**< Mcast or bcast Eth. addr. */ +#define CP_ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. addr. */ + +#define __cp_always_inline inline __attribute__ ((always_inline)) + +#define cp_rmb() asm volatile("dmb oshld" : : : "memory") +#define cp_io_rmb() cp_rmb () +#define cp_wmb() asm volatile("dmb oshst" : : : "memory") +#define cp_io_wmb() cp_wmb () + +static __cp_always_inline uint32_t +cp_read32_relaxed (const volatile void *addr) +{ + uint32_t val; + + asm volatile("ldr %w[val], [%x[addr]]" : [val] "=r"(val) : [addr] "r"(addr)); + return val; +} + +static __cp_always_inline uint32_t +cp_read32 (const volatile void *addr) +{ + uint32_t val; + val = cp_read32_relaxed (addr); + cp_io_rmb (); + return val; +} + +static __cp_always_inline uint64_t +cp_read64_relaxed (const volatile void *addr) +{ + uint64_t val; + + asm volatile("ldr %x[val], [%x[addr]]" : [val] "=r"(val) : [addr] "r"(addr)); + return val; +} + +static __cp_always_inline uint64_t +cp_read64 (const volatile void *addr) +{ + uint64_t val; + val = cp_read64_relaxed (addr); + cp_io_rmb (); + return val; +} + +static __cp_always_inline void +cp_write32_relaxed (uint32_t val, volatile void *addr) +{ + asm volatile("str %w[val], [%x[addr]]" : : [val] "r"(val), [addr] "r"(addr)); +} + +static __cp_always_inline void +cp_write32 (uint32_t value, volatile void *addr) +{ + cp_io_wmb (); + cp_write32_relaxed (value, addr); +} + +static __cp_always_inline void +cp_write64_relaxed (uint64_t val, volatile void *addr) +{ + asm volatile("str %x[val], [%x[addr]]" : : [val] "r"(val), [addr] "r"(addr)); +} + +static __cp_always_inline void +cp_write64 (uint64_t value, volatile void *addr) +{ + cp_io_wmb (); + cp_write64_relaxed (value, addr); +} + +static inline void +cp_eth_random_addr (uint8_t *addr) +{ + uint64_t r = rand (); + uint8_t *p = (uint8_t *) &r; + + memcpy (addr, p, CP_ETHER_ADDR_LEN); + addr[0] &= (uint8_t) ~CP_ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] |= CP_ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ +} + +#endif /* __CP_COMPAT_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/docs/octep_cp_plugin_doc.md b/src/plugins/octep_cp/docs/octep_cp_plugin_doc.md new file mode 100644 index 0000000000..dc32602f50 --- /dev/null +++ b/src/plugins/octep_cp/docs/octep_cp_plugin_doc.md @@ -0,0 +1,87 @@ +# OCTEON end point control plain plugin (OCTEP-CP) for VPP {#octep_cp_plugin_doc} + +## Overview +This plugin implements Marvell OCTEON PCIe end point control plane protocol. +Marvell OCTEON firmware provides convenience user library liboctep.so to +setup and interact with the host over mailbox. This octep_cp plugin uses +liboctep.so library to read/send control message from/to host over mailbox. + +For the host checksum offload feature, a 24-byte header is added to each +packet. The dev OCTEON plugin’s h2d-input and d2h-output components handle the +checksum computation and verification. + +## Supported SoC +- OCTEON CN10KXX + +## Usage +The following steps demonstrate how you may bring up VPP with octep_cp, on the +OCTEON connected to host. +1. Enable octep_cp plugin in VPP startup.conf file +2. octep_cp plugin initializes liboctep.so library which initializes SDP firmware. +3. If there are any messages from host firmware puts them into mailbox. +4. octep_cp plugin regularly calls liboctep.so API's to check mailbox. +5. octep_cp plugin applies configuration action requested by host and replies + success or failure to host. + +### Setup +1. OCTEON should be connected to host via SDP interface. +2. Determine SDP interface on OCTEON + "lspci | grep SDP" OR "dmesg | grep sdp" + 0002:01:00.1 Ethernet controller: Cavium, Inc. Octeon Tx2 SDP Physical Function (rev 51) + 0002:01:00.2 Ethernet controller: Cavium, Inc. Octeon Tx2 SDP Virtual Function (rev 51) + 0002:01:00.3 Ethernet controller: Cavium, Inc. Octeon Tx2 SDP Virtual Function (rev 51) +3. Bind SDP VF to vfio-pci driver + dpdk-devbind.py -b vfio-pci 0002:01:00.1 +4. Modify startup.conf + - Enable octep_cp plugin + plugins { + plugin octep_cp_plugin.so { enable } + } + - Device bringup using startup.conf device section + devices { + dev pci/0002:01:00.1 + { + driver octeon + port 0 + { + name eth0 + num-rx-queues 4 + num-tx-queues 4 + } + } + dev pci/0002:01:00.2 + { + driver octeon + port 0 + { + name eth1 + num-rx-queues 5 + num-tx-queues 5 + } + } + } +5. Determine SDP interface on HOST side + - lspci | grep Cavium + 17:00.0 Network controller: Cavium, Inc. Device b900 + - load OCTEON PF and VF driver, insmod octeon_ep.ko octeon_ep_vf.ko + - create required VF's with 'echo 1 > /sys/bus/pci/devices/0000\:17\:00.0/sriov_numvfs' + +#### Configuration +This plugin uses /usr/bin/cn10kxx.cfg configuration file to configure +PCIe end point. + +1. Checksum offload configuration on DPU + - Enable checksum offload in /usr/bin/cn10kxx.cfg file + ``` + pkind=1 + ``` + - CLI + ``` + vppctl set int feature eth1 h2d-input arc port-rx-eth + vppctl set int feature eth1 d2h-output arc interface-output + ``` +2. Checksum offload configuration on host + - Enable checksum offload on SDP VF with ethtool + ``` + ethtool -K rx on tx on + ``` diff --git a/src/plugins/octep_cp/octep_action.c b/src/plugins/octep_cp/octep_action.c new file mode 100644 index 0000000000..1c6a04a358 --- /dev/null +++ b/src/plugins/octep_cp/octep_action.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ +#include +#include "octep_action.h" +#include "octep_ctrl_net.h" +#include +#include + +void +octep_update_pktio (uint8_t cmd, uint32_t value) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vnet_sw_interface_t *si; + + pool_foreach (si, im->sw_interfaces) + { + vnet_hw_interface_set_mtu (vnm, si->hw_if_index, value); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/octep_action.h b/src/plugins/octep_cp/octep_action.h new file mode 100644 index 0000000000..3cf9e01522 --- /dev/null +++ b/src/plugins/octep_cp/octep_action.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef __OCTEP_ACTION_H__ +#define __OCTEP_ACTION_H__ + +#include +#include +#include +#include +#include + +struct pf_config +{ + int n_vfs; +}; + +struct pem_config +{ + int n_pfs; + struct pf_config pfconfig[64]; +}; + +struct octep_pf_vf_cfg +{ + struct pem_config pemconfig[4]; + int n_pems; + int pem_idx; + int pf_idx; + int is_vf; + int vf_idx; +}; + +extern struct octep_pf_vf_cfg cfg_idx; + +void octep_update_pktio (uint8_t cmd, uint32_t value); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/octep_config.c b/src/plugins/octep_cp/octep_config.c new file mode 100644 index 0000000000..349989d136 --- /dev/null +++ b/src/plugins/octep_cp/octep_config.c @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include + +#include "octep_cp_lib.h" +#include "octep_config.h" + +struct app_cfg cfg; + +/** + * Object heirarchy + * *(0 or more), +(1 or more) + * + * soc = { pem* }; + * pem = { idx, pf* }; + * pf = { idx, if, info, vf* }; + * vf = { idx, if, info }; + * if = { mtu, mac_addr, link_state, rx_state, autoneg, pause_mode, speed, + * supported_modes, advertisedd_modes}; + * info = { pkind, hb_interval, hb_miss_count }; + */ + +#define CFG_TOKEN_SOC "soc" +#define CFG_TOKEN_BASE_SOC "base_soc" +#define CFG_TOKEN_PEMS "pems" +#define CFG_TOKEN_PFS "pfs" +#define CFG_TOKEN_VFS "vfs" +#define CFG_TOKEN_IDX "idx" +#define CFG_TOKEN_IF_MTU "mtu" +#define CFG_TOKEN_IF_MAC_ADDR "mac_addr" +#define CFG_TOKEN_IF_LSTATE "link_state" +#define CFG_TOKEN_IF_RSTATE "rx_state" +#define CFG_TOKEN_IF_AUTONEG "autoneg" +#define CFG_TOKEN_IF_PMODE "pause_mode" +#define CFG_TOKEN_IF_SPEED "speed" +#define CFG_TOKEN_IF_SMODES "supported_modes" +#define CFG_TOKEN_IF_AMODES "advertised_modes" +#define CFG_TOKEN_IF_NAME "if_name" +#define CFG_TOKEN_INFO_PKIND "pkind" +#define CFG_TOKEN_INFO_HB_INTERVAL "hb_interval" +#define CFG_TOKEN_INFO_HB_MISS_COUNT "hb_miss_count" + +static void +print_if (struct if_cfg *iface) +{ + clib_warning ("mac_addr: %02x:%02x:%02x:%02x:%02x:%02x\n", + iface->mac_addr[0], iface->mac_addr[1], iface->mac_addr[2], + iface->mac_addr[3], iface->mac_addr[4], iface->mac_addr[5]); + clib_warning ("mtu: %d, link: %d, rx: %d, autoneg: 0x%x\n", iface->mtu, + iface->link_state, iface->rx_state, iface->autoneg); + clib_warning ("pause_mode: 0x%x, speed: %d\n", iface->pause_mode, + iface->speed); + clib_warning ("supported_modes: 0x%lx, advertised_modes: 0x%lx\n", + iface->supported_modes, iface->advertised_modes); +} + +static void +print_info (struct octep_fw_info *info) +{ + clib_warning ("pkind: %u, hbi: %u, hbmc: %u\n", info->pkind, + info->hb_interval, info->hb_miss_count); +} + +static void +print_config () +{ + struct pem_cfg *pem; + struct pf_cfg *pf; + struct vf_cfg *vf; + + pem = cfg.pems; + while (pem) + { + pf = pem->pfs; + while (pf) + { + clib_warning ("[%d]:[%d]\n", pem->idx, pf->idx); + print_if (&pf->iface); + print_info (&pf->info); + vf = pf->vfs; + while (vf) + { + clib_warning ("[%d]:[%d]:[%d]\n", pem->idx, pf->idx, vf->idx); + print_if (&vf->iface); + print_info (&vf->info); + vf = vf->next; + } + pf = pf->next; + } + pem = pem->next; + } +} + +static struct pem_cfg * +create_pem (int idx) +{ + struct pem_cfg *pem, *p; + + pem = calloc (sizeof (struct pem_cfg), 1); + if (!pem) + return NULL; + + pem->idx = idx; + if (cfg.pems) + { + p = cfg.pems; + while (p->next) + p = p->next; + + p->next = pem; + } + else + cfg.pems = pem; + + cfg.npem++; + + return pem; +} + +static struct pem_cfg * +get_pem (int idx) +{ + struct pem_cfg *pem; + + if (!cfg.pems) + return NULL; + + pem = cfg.pems; + while (pem) + { + if (pem->idx == idx) + return pem; + pem = pem->next; + } + + return NULL; +} + +static struct pf_cfg * +create_pf (struct pem_cfg *pemcfg, int idx) +{ + struct pf_cfg *pf, *p; + + pf = calloc (sizeof (struct pf_cfg), 1); + if (!pf) + return NULL; + + pf->idx = idx; + if (pemcfg->pfs) + { + p = pemcfg->pfs; + while (p->next) + p = p->next; + + p->next = pf; + } + else + pemcfg->pfs = pf; + + pemcfg->npf++; + + return pf; +} + +static struct pf_cfg * +get_pf (struct pem_cfg *pemcfg, int idx) +{ + struct pf_cfg *pf; + + if (!pemcfg->pfs) + return NULL; + + pf = pemcfg->pfs; + while (pf) + { + if (pf->idx == idx) + return pf; + pf = pf->next; + } + + return NULL; +} + +static struct vf_cfg * +create_vf (struct pf_cfg *pfcfg, int idx) +{ + struct vf_cfg *vf, *p; + + vf = calloc (sizeof (struct vf_cfg), 1); + if (!vf) + return NULL; + + vf->idx = idx; + if (pfcfg->vfs) + { + p = pfcfg->vfs; + while (p->next) + p = p->next; + + p->next = vf; + } + else + pfcfg->vfs = vf; + + pfcfg->nvf++; + + return vf; +} + +static struct vf_cfg * +get_vf (struct pf_cfg *pfcfg, int idx) +{ + struct vf_cfg *vf; + + if (!pfcfg->vfs) + return NULL; + + vf = pfcfg->vfs; + while (vf) + { + if (vf->idx == idx) + return vf; + vf = vf->next; + } + + return vf; +} + +int +get_max_rx_pktlen (void) +{ + struct octep_cp_lib_info info; + + octep_cp_lib_get_info (&info); + if (info.soc_model.flag & + (OCTEP_CP_SOC_MODEL_CN96xx_Ax | OCTEP_CP_SOC_MODEL_CNF95xxN_A0)) + return (16 * 1024); + + return ((64 * 1024) - 1); +} + +static int +parse_if (config_setting_t *lcfg, struct if_cfg *iface) +{ + config_setting_t *mac; + int ival, i, n; + char *if_name = NULL; + + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_MTU, &ival)) + iface->mtu = ival; + + mac = config_setting_get_member (lcfg, CFG_TOKEN_IF_MAC_ADDR); + if (mac) + { + n = config_setting_length (mac); + if (n > ETH_ALEN) + n = ETH_ALEN; + for (i = 0; i < n; i++) + iface->mac_addr[i] = config_setting_get_int_elem (mac, i); + } + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_LSTATE, &ival)) + iface->link_state = ival; + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_RSTATE, &ival)) + iface->rx_state = ival; + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_AUTONEG, &ival)) + iface->autoneg = ival; + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_PMODE, &ival)) + iface->pause_mode = ival; + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_SPEED, &ival)) + iface->speed = ival; + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_SMODES, &ival)) + iface->supported_modes = ival; + if (config_setting_lookup_int (lcfg, CFG_TOKEN_IF_AMODES, &ival)) + iface->advertised_modes = ival; + if (config_setting_lookup_string (lcfg, CFG_TOKEN_IF_NAME, + (const char **) &if_name)) + clib_memcpy (iface->if_name, if_name, strlen (if_name)); + + iface->max_rx_pktlen = get_max_rx_pktlen (); + + return 0; +} + +static int +parse_info (config_setting_t *lcfg, struct octep_fw_info *info) +{ + int ival = 0, ret; + + ret = config_setting_lookup_int (lcfg, CFG_TOKEN_INFO_PKIND, &ival); + if (ival) + { + info->pkind = OCTEP_PKIND_OL_SUPPORTED; + info->fsz = OCTEP_FSZ_OL_SUPPORTED; + info->rx_offloads = OCTEP_RX_OFFLOAD_CKSUM; + info->tx_offloads = OCTEP_TX_OFFLOAD_CKSUM; + } + + ret = config_setting_lookup_int (lcfg, CFG_TOKEN_INFO_HB_INTERVAL, &ival); + info->hb_interval = (ret == CONFIG_TRUE) ? ival : DEFAULT_HB_INTERVAL_MSECS; + + ret = config_setting_lookup_int (lcfg, CFG_TOKEN_INFO_HB_MISS_COUNT, &ival); + info->hb_miss_count = (ret == CONFIG_TRUE) ? ival : DEFAULT_HB_MISS_COUNT; + + return 0; +} + +static int +parse_pf (config_setting_t *pf, struct pf_cfg *pfcfg) +{ + config_setting_t *vfs, *vf; + int nvfs, i, idx, err; + struct vf_cfg *vfcfg; + + err = parse_if (pf, &pfcfg->iface); + if (err) + return err; + + err = parse_info (pf, &pfcfg->info); + if (err) + return err; + + vfs = config_setting_get_member (pf, CFG_TOKEN_VFS); + if (!vfs) + return 0; + nvfs = config_setting_length (vfs); + cfg_idx.pemconfig[0].pfconfig[pfcfg->idx].n_vfs = nvfs; + for (i = 0; i < nvfs; i++) + { + vf = config_setting_get_elem (vfs, i); + if (!vf) + continue; + if (config_setting_lookup_int (vf, CFG_TOKEN_IDX, &idx) == CONFIG_FALSE) + continue; + vfcfg = get_vf (pfcfg, idx); + if (!vfcfg) + { + vfcfg = create_vf (pfcfg, idx); + if (!vfcfg) + { + clib_warning ("Oom for pf[%d]vf[%d]\n", pfcfg->idx, idx); + continue; + } + } + err = parse_if (vf, &vfcfg->iface); + if (err) + return err; + err = parse_info (vf, &vfcfg->info); + if (err) + return err; + } + + return 0; +} + +static int +parse_pem (config_setting_t *pem, struct pem_cfg *pemcfg) +{ + config_setting_t *pfs, *pf; + int npfs, i, idx, err; + struct pf_cfg *pfcfg; + + pfs = config_setting_get_member (pem, CFG_TOKEN_PFS); + if (!pfs) + return 0; + + npfs = config_setting_length (pfs); + for (i = 0; i < npfs; i++) + { + pf = config_setting_get_elem (pfs, i); + if (!pf) + continue; + if (config_setting_lookup_int (pf, CFG_TOKEN_IDX, &idx) == CONFIG_FALSE) + continue; + pfcfg = get_pf (pemcfg, idx); + if (!pfcfg) + { + pfcfg = create_pf (pemcfg, idx); + if (!pfcfg) + { + clib_warning ("Oom for pem[%d]pf[%d]\n", pemcfg->idx, idx); + continue; + } + } + err = parse_pf (pf, pfcfg); + if (err) + return err; + } + + return 0; +} + +static int +parse_pems (config_setting_t *pems) +{ + config_setting_t *pem; + int npems, i, idx, err; + struct pem_cfg *pemcfg; + + npems = config_setting_length (pems); + for (i = 0; i < npems; i++) + { + pem = config_setting_get_elem (pems, i); + if (!pem) + continue; + if (config_setting_lookup_int (pem, CFG_TOKEN_IDX, &idx) == CONFIG_FALSE) + continue; + pemcfg = get_pem (idx); + if (!pemcfg) + { + pemcfg = create_pem (idx); + if (!pemcfg) + { + clib_warning ("Oom for pem[%d]\n", idx); + continue; + } + } + err = parse_pem (pem, pemcfg); + if (err) + return err; + } + + return 0; +} + +static int +parse_base_config (const char *cfg_file_path) +{ + config_setting_t *lcfg, *pems; + config_t fcfg; + int err; + + clib_warning ("base config file : %s\n", cfg_file_path); + config_init (&fcfg); + if (!config_read_file (&fcfg, cfg_file_path)) + { + clib_warning ("%s:%d - %s\n", config_error_file (&fcfg), + config_error_line (&fcfg), config_error_text (&fcfg)); + config_destroy (&fcfg); + return (EXIT_FAILURE); + } + + lcfg = config_lookup (&fcfg, CFG_TOKEN_SOC); + if (!lcfg) + { + config_destroy (&fcfg); + return -EINVAL; + } + + pems = config_setting_get_member (lcfg, CFG_TOKEN_PEMS); + if (pems) + { + err = parse_pems (pems); + if (err) + { + config_destroy (&fcfg); + return err; + } + } + + config_destroy (&fcfg); + + return 0; +} + +int +octep_cp_config_init (const char *cfg_file_path) +{ + config_setting_t *lcfg, *pems; + const char *str; + config_t fcfg; + int err; + + clib_warning ("config file : %s\n", cfg_file_path); + config_init (&fcfg); + if (!config_read_file (&fcfg, cfg_file_path)) + { + clib_warning ("%s:%d - %s\n", config_error_file (&fcfg), + config_error_line (&fcfg), config_error_text (&fcfg)); + config_destroy (&fcfg); + return -EINVAL; + } + + lcfg = config_lookup (&fcfg, CFG_TOKEN_SOC); + if (!lcfg) + { + config_destroy (&fcfg); + return -EINVAL; + } + + if (config_setting_lookup_string (lcfg, CFG_TOKEN_BASE_SOC, &str)) + { + err = parse_base_config (str); + if (err) + { + config_destroy (&fcfg); + return err; + } + } + + pems = config_setting_get_member (lcfg, CFG_TOKEN_PEMS); + if (pems) + { + err = parse_pems (pems); + if (err) + { + config_destroy (&fcfg); + return err; + } + } + + config_destroy (&fcfg); + + print_config (); + + return 0; +} + +int +app_config_get_if_from_msg_info (union octep_cp_msg_info *ctx_info, + union octep_cp_msg_info *msg_info, + struct if_cfg **iface, + struct if_stats **ifstats, + struct octep_fw_info **info) +{ + struct pem_cfg *pem = cfg.pems; + struct pf_cfg *pf; + struct vf_cfg *vf; + + while (pem) + { + if (pem->idx == ctx_info->s.pem_idx) + { + pf = pem->pfs; + while (pf) + { + if (pf->idx == ctx_info->s.pf_idx) + { + if (!msg_info->s.is_vf) + { + clib_warning ("pem[%u] pf[%u]\n", pem->idx, pf->idx); + cfg_idx.pem_idx = pem->idx; + cfg_idx.pf_idx = pf->idx; + cfg_idx.is_vf = 0; + cfg_idx.vf_idx = 0; + *iface = &pf->iface; + *ifstats = &pf->ifstats; + *info = &pf->info; + return 0; + } + vf = pf->vfs; + while (vf) + { + if (vf->idx == msg_info->s.vf_idx) + { + clib_warning ("pem[%u] pf[%u] vf[%u]\n", pem->idx, + pf->idx, vf->idx); + cfg_idx.pem_idx = pem->idx; + cfg_idx.pf_idx = pf->idx; + cfg_idx.is_vf = 1; + cfg_idx.vf_idx = vf->idx; + *iface = &vf->iface; + *ifstats = &vf->ifstats; + *info = &vf->info; + return 0; + } + vf = vf->next; + } + } + pf = pf->next; + } + } + pem = pem->next; + } + + return -ENOENT; +} + +int +octep_cp_config_uninit () +{ + struct pem_cfg *pem, *pp; + struct pf_cfg *pf, *pfp; + struct vf_cfg *vf, *vfp; + + clib_warning ("config uninit\n"); + pem = cfg.pems; + while (pem) + { + pf = pem->pfs; + while (pf) + { + vf = pf->vfs; + while (vf) + { + vfp = vf->next; + free (vf); + vf = vfp; + } + pfp = pf->next; + free (pf); + pf = pfp; + } + pp = pem->next; + free (pem); + pem = pp; + } + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/octep_config.h b/src/plugins/octep_cp/octep_config.h new file mode 100644 index 0000000000..ee84968f1b --- /dev/null +++ b/src/plugins/octep_cp/octep_config.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef __OCTEP_CONFIG_H__ +#define __OCTEP_CONFIG_H__ + +#include + +#include +#include "octep_action.h" + +#ifndef ETH_ALEN +#define ETH_ALEN 6 +#endif + +#define MIN_HB_INTERVAL_MSECS 1000 +#define MAX_HB_INTERVAL_MSECS 15000 +#define DEFAULT_HB_INTERVAL_MSECS MIN_HB_INTERVAL_MSECS +#define IF_NAME_MAX_LEN 256 +#define DEFAULT_HB_MISS_COUNT 20 + +/* Network interface stats */ +struct if_stats +{ + struct octep_iface_rx_stats rx_stats; + struct octep_iface_tx_stats tx_stats; +}; + +/* Network interface data */ +struct if_cfg +{ + u16 idx; + u16 host_if_id; + /* Current MTU of the interface */ + u16 mtu; + /* Max Receive packet length of the interface */ + u16 max_rx_pktlen; + u8 mac_addr[ETH_ALEN]; + /* Enum octep_ctrl_net_state */ + u16 link_state; + /* Enum octep_ctrl_net_state */ + u16 rx_state; + /* OCTEP_LINK_MODE_XXX */ + u8 autoneg; + /* OCTEP_LINK_MODE_XXX */ + u8 pause_mode; + /* SPEED_XXX */ + u32 speed; + /* OCTEP_LINK_MODE_XXX */ + u64 supported_modes; + /* OCTEP_LINK_MODE_XXX */ + u64 advertised_modes; + /* Interface name */ + u8 if_name[IF_NAME_MAX_LEN]; +}; + +/* Virtual function configuration */ +struct vf_cfg +{ + /* VF index */ + int idx; + /* Network interface data */ + struct if_cfg iface; + struct if_stats ifstats; + struct octep_fw_info info; + struct vf_cfg *next; +}; + +/* Physical function configuration */ +struct pf_cfg +{ + /* PF index */ + int idx; + /* Network interface data */ + struct if_cfg iface; + struct if_stats ifstats; + struct octep_fw_info info; + /* Number of vf's */ + int nvf; + /* Configuration for vf's */ + struct vf_cfg *vfs; + struct pf_cfg *next; +}; + +/* PEM configuration */ +struct pem_cfg +{ + /* PEM index */ + int idx; + /* Number of pf's */ + int npf; + /* Nonfiguration for pf's */ + struct pf_cfg *pfs; + struct pem_cfg *next; +}; + +/* App configuration */ +struct app_cfg +{ + /* Number of pem's */ + int npem; + /* Nonfiguration for pem's */ + struct pem_cfg *pems; +}; + +extern struct app_cfg cfg; + +/* + * Parse file and populate configuration. + * + * @param cfg_file_path: Path to configuration file. + * + * return value: 0 on success, -errno on failure. + */ +int octep_cp_config_init (const char *cfg_file_path); + +/* + * Get interface based on information in message header. + * + * @param ctx_info: non-null pointer to message context info. This is the + * pem->pf context used to poll for messages. + * @param msg_info: non-null pointer to message info. This is the info from + * received message. + * @param iface: non-null pointer to struct if_cfg *. + * @param ifstats: non-null pointer to struct if_stats *. + * @param info: non-null pointer to struct octep_fw_info *. + * + * return value: 0 on success, -errno on failure. + */ +int app_config_get_if_from_msg_info (union octep_cp_msg_info *ctx_info, + union octep_cp_msg_info *msg_info, + struct if_cfg **iface, + struct if_stats **ifstats, + struct octep_fw_info **info); + +/* + * Free allocated configuration artifacts. + * return value: 0 on success, -errno on failure. + */ +int octep_cp_config_uninit (); + +#endif /* __OCTEP_CONFIG_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/octep_cp_cn10kxx.cfg b/src/plugins/octep_cp/octep_cp_cn10kxx.cfg new file mode 100755 index 0000000000..bd03983039 --- /dev/null +++ b/src/plugins/octep_cp/octep_cp_cn10kxx.cfg @@ -0,0 +1,804 @@ +soc = { + /* 1 pem */ + pems = ( + { + idx = 0; + /* 1 pf per pem */ + pfs = ( + { + idx = 0; + mac_addr = [0x00, 0x00, 0x00, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + hb_interval = 1000; + hb_miss_count = 20; + pkind = 0; + /* 64 vf's per pf */ + vfs = ( + { + idx = 0; + mac_addr = [0x00, 0x00, 0x00, 0x01, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth1"; + }, + { + idx = 1; + mac_addr = [0x00, 0x00, 0x00, 0x02, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth2"; + }, + { + idx = 2; + mac_addr = [0x00, 0x00, 0x00, 0x03, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth3"; + }, + { + idx = 3; + mac_addr = [0x00, 0x00, 0x00, 0x04, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth4"; + }, + { + idx = 4; + mac_addr = [0x00, 0x00, 0x00, 0x05, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth5"; + }, + { + idx = 5; + mac_addr = [0x00, 0x00, 0x00, 0x06, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth6"; + }, + { + idx = 6; + mac_addr = [0x00, 0x00, 0x00, 0x07, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth7"; + }, + { + idx = 7; + mac_addr = [0x00, 0x00, 0x00, 0x08, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + if_name = "eth8"; + }, + { + idx = 8; + mac_addr = [0x00, 0x00, 0x00, 0x09, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 9; + mac_addr = [0x00, 0x00, 0x00, 0x0a, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 10; + mac_addr = [0x00, 0x00, 0x00, 0x0b, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 11; + mac_addr = [0x00, 0x00, 0x00, 0x0c, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 12; + mac_addr = [0x00, 0x00, 0x00, 0x0d, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 13; + mac_addr = [0x00, 0x00, 0x00, 0x0e, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 14; + mac_addr = [0x00, 0x00, 0x00, 0x0f, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 15; + mac_addr = [0x00, 0x00, 0x00, 0x10, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 16; + mac_addr = [0x00, 0x00, 0x00, 0x11, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 17; + mac_addr = [0x00, 0x00, 0x00, 0x12, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 18; + mac_addr = [0x00, 0x00, 0x00, 0x13, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 19; + mac_addr = [0x00, 0x00, 0x00, 0x14, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 20; + mac_addr = [0x00, 0x00, 0x00, 0x15, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 21; + mac_addr = [0x00, 0x00, 0x00, 0x16, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 22; + mac_addr = [0x00, 0x00, 0x00, 0x17, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 23; + mac_addr = [0x00, 0x00, 0x00, 0x18, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 24; + mac_addr = [0x00, 0x00, 0x00, 0x19, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 25; + mac_addr = [0x00, 0x00, 0x00, 0x1a, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 26; + mac_addr = [0x00, 0x00, 0x00, 0x1b, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 27; + mac_addr = [0x00, 0x00, 0x00, 0x1c, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 28; + mac_addr = [0x00, 0x00, 0x00, 0x1d, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 29; + mac_addr = [0x00, 0x00, 0x00, 0x1e, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 30; + mac_addr = [0x00, 0x00, 0x00, 0x1f, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 31; + mac_addr = [0x00, 0x00, 0x00, 0x20, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 32; + mac_addr = [0x00, 0x00, 0x00, 0x21, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 33; + mac_addr = [0x00, 0x00, 0x00, 0x22, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 34; + mac_addr = [0x00, 0x00, 0x00, 0x23, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 35; + mac_addr = [0x00, 0x00, 0x00, 0x24, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 36; + mac_addr = [0x00, 0x00, 0x00, 0x25, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 37; + mac_addr = [0x00, 0x00, 0x00, 0x26, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 38; + mac_addr = [0x00, 0x00, 0x00, 0x27, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 39; + mac_addr = [0x00, 0x00, 0x00, 0x28, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 40; + mac_addr = [0x00, 0x00, 0x00, 0x29, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 41; + mac_addr = [0x00, 0x00, 0x00, 0x2a, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 42; + mac_addr = [0x00, 0x00, 0x00, 0x2b, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 43; + mac_addr = [0x00, 0x00, 0x00, 0x2c, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 44; + mac_addr = [0x00, 0x00, 0x00, 0x2d, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 45; + mac_addr = [0x00, 0x00, 0x00, 0x2e, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 46; + mac_addr = [0x00, 0x00, 0x00, 0x2f, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 47; + mac_addr = [0x00, 0x00, 0x00, 0x30, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 48; + mac_addr = [0x00, 0x00, 0x00, 0x31, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 49; + mac_addr = [0x00, 0x00, 0x00, 0x32, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 50; + mac_addr = [0x00, 0x00, 0x00, 0x33, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 51; + mac_addr = [0x00, 0x00, 0x00, 0x34, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 52; + mac_addr = [0x00, 0x00, 0x00, 0x35, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 53; + mac_addr = [0x00, 0x00, 0x00, 0x36, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 54; + mac_addr = [0x00, 0x00, 0x00, 0x37, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 55; + mac_addr = [0x00, 0x00, 0x00, 0x38, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 56; + mac_addr = [0x00, 0x00, 0x00, 0x39, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 57; + mac_addr = [0x00, 0x00, 0x00, 0x3a, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 58; + mac_addr = [0x00, 0x00, 0x00, 0x3b, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 59; + mac_addr = [0x00, 0x00, 0x00, 0x3c, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 60; + mac_addr = [0x00, 0x00, 0x00, 0x3d, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 61; + mac_addr = [0x00, 0x00, 0x00, 0x3e, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 62; + mac_addr = [0x00, 0x00, 0x00, 0x3f, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + }, + { + idx = 63; + mac_addr = [0x00, 0x00, 0x00, 0x40, 0x01, 0x01]; + link_state = 0; + rx_state = 0; + autoneg = 0x3; + pause_mode = 0x3; + speed = 10000; + supported_modes = 0x1; + advertised_modes = 0x1; + pkind = 0; + } + ); + } + ); + } + ); +}; diff --git a/src/plugins/octep_cp/octep_ctrl.c b/src/plugins/octep_cp/octep_ctrl.c new file mode 100644 index 0000000000..ea5f1f75e8 --- /dev/null +++ b/src/plugins/octep_cp/octep_ctrl.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "octep_cp_lib.h" +#include "octep_input.h" +#include "octep_config.h" +#include +#include +#include + +/* Control plane version */ +#define CP_VERSION_MAJOR 1 +#define CP_VERSION_MINOR 0 +#define CP_VERSION_VARIANT_MIN 0 +#define CP_VERSION_VARIANT_CUR 1 +#define MAX_EVENTS 16 + +#define CP_VERSION_MIN \ + (OCTEP_CP_VERSION (CP_VERSION_MAJOR, CP_VERSION_MINOR, \ + CP_VERSION_VARIANT_MIN)) + +#define CP_VERSION_MAX \ + (OCTEP_CP_VERSION (CP_VERSION_MAJOR, CP_VERSION_MINOR, \ + CP_VERSION_VARIANT_CUR)) + +#define SOC_CFG_PATH "/etc/vpp/octep_cp_cn10kxx.cfg" +struct octep_pf_vf_cfg cfg_idx; + +/* + * The PERST# (PCI Express Reset) signal is an open drain, active low output + * from the root port. It is released when all power rails and the REFCLK + * signal have stabilized. + */ +static volatile int perst = 0; +static int hb_interval = 0; + +struct octep_cp_lib_cfg cp_lib_cfg = { 0 }; + +static int +process_events () +{ + struct octep_cp_event_info e[MAX_EVENTS]; + int n, i; + + n = octep_cp_lib_recv_event (e, MAX_EVENTS); + if (n < 0) + return n; + + for (i = 0; i < n; i++) + { + if (e[i].e == OCTEP_CP_EVENT_TYPE_PERST) + { + clib_warning ("Event: perst on dom[%d]\n", e[i].u.perst.dom_idx); + perst = 1; + } + } + + return 0; +} + +static int +send_heartbeat () +{ + struct octep_cp_event_info info; + int i, j; + + info.e = OCTEP_CP_EVENT_TYPE_HEARTBEAT; + for (i = 0; i < cp_lib_cfg.ndoms; i++) + { + info.u.hbeat.dom_idx = cp_lib_cfg.doms[i].idx; + for (j = 0; j < cp_lib_cfg.doms[i].npfs; j++) + { + info.u.hbeat.pf_idx = cp_lib_cfg.doms[i].pfs[j].idx; + octep_cp_lib_send_event (&info); + } + } + + return 0; +} + +void +sigint_handler (int sig_num) +{ + if (sig_num == SIGALRM) + { + if (perst) + return; + send_heartbeat (); + alarm (hb_interval); + } +} + +static int +set_fw_ready (int ready) +{ + struct octep_cp_event_info info; + int i, j; + + info.e = OCTEP_CP_EVENT_TYPE_FW_READY; + info.u.fw_ready.ready = ready; + for (i = 0; i < cp_lib_cfg.ndoms; i++) + { + info.u.fw_ready.dom_idx = cp_lib_cfg.doms[i].idx; + for (j = 0; j < cp_lib_cfg.doms[i].npfs; j++) + { + info.u.fw_ready.pf_idx = cp_lib_cfg.doms[i].pfs[j].idx; + octep_cp_lib_send_event (&info); + } + } + + return 0; +} + +static uword +octep_cp_process (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + int err = 0, i, j; + struct pem_cfg *pem; + struct pf_cfg *pf; + const char *soc_cfg = SOC_CFG_PATH; + + /* init will wake it up */ + vlib_process_wait_for_event (vm); + + err = octep_cp_config_init (soc_cfg); + if (err) + return err; + + // signal(SIGINT, sigint_handler); + signal (SIGALRM, sigint_handler); + + cp_lib_cfg.ndoms = cfg.npem; + cp_lib_cfg.min_version = CP_VERSION_MIN; + cp_lib_cfg.max_version = CP_VERSION_MAX; + cfg_idx.n_pems = cfg.npem; + pem = cfg.pems; + i = 0; + while (pem) + { + cp_lib_cfg.doms[i].idx = pem->idx; + cp_lib_cfg.doms[i].npfs = pem->npf; + cfg_idx.pemconfig[i].n_pfs = pem->npf; + pf = pem->pfs; + j = 0; + while (pf) + { + cp_lib_cfg.doms[i].pfs[j++].idx = pf->idx; + if (hb_interval == 0 || pf->info.hb_interval < hb_interval) + hb_interval = pf->info.hb_interval; + + pf = pf->next; + } + pem = pem->next; + i++; + } + err = octep_cp_lib_init (&cp_lib_cfg); + if (err) + return err; + + err = octep_cp_initialize_receive_vector (); + if (err) + { + octep_cp_lib_uninit (); + return err; + } + + set_fw_ready (1); + clib_warning ("Heartbeat interval : %u msecs\n", hb_interval); + hb_interval /= 1000; + alarm (hb_interval); + while (1) + { + /* + * Host PF driver has a timeout of 500ms, so keeping polling interval + * less than that(100ms). Else the host PF driver octeon_ep.ko timesout + */ + vlib_process_wait_for_event_or_clock (vm, 0.1); + vlib_process_get_events (vm, NULL); + loop_process_msgs (); + process_events (); + } + return 0; +} + +static clib_error_t * +octep_cp_process_exit (vlib_main_t *vm) +{ + set_fw_ready (0); + octep_cp_lib_uninit (); + octep_cp_initialize_receive_vector (); + octep_cp_config_uninit (); + return 0; +} + +VLIB_REGISTER_NODE (octep_cp_process_node, static) = { + .function = octep_cp_process, + .name = "octep-cp-process", + .type = VLIB_NODE_TYPE_PROCESS, + .process_log2_n_stack_bytes = 17, +}; + +clib_error_t * +octep_cp_init (vlib_main_t *vm) +{ + + vlib_process_signal_event (vlib_get_main (), octep_cp_process_node.index, 0, + 0); + return NULL; +} + +VLIB_INIT_FUNCTION (octep_cp_init); + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "OCTEON PCI End-point Control Agent", + .default_disabled = 1, +}; + +VLIB_MAIN_LOOP_EXIT_FUNCTION (octep_cp_process_exit); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/octep_input.c b/src/plugins/octep_cp/octep_input.c new file mode 100644 index 0000000000..fd1be1e198 --- /dev/null +++ b/src/plugins/octep_cp/octep_input.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ +#include +#include +#include +#include +#include + +#include "octep_cp_lib.h" +#include "cp_compat.h" +#include "octep_ctrl_net.h" +#include "octep_hw.h" +#include "octep_input.h" +#include "octep_action.h" +#include "octep_config.h" + +#define LOOP_RX_BUF_CNT 6 + +static struct octep_cp_msg rx_msg[LOOP_RX_BUF_CNT]; +static int rx_num = LOOP_RX_BUF_CNT; +static int max_msg_sz = sizeof (union octep_ctrl_net_max_data); + +extern struct octep_cp_lib_cfg cp_lib_cfg; +extern struct octep_pf_vf_cfg cfg_idx; + +static const uint32_t resp_hdr_sz = sizeof (union octep_ctrl_net_resp_hdr); +static const uint32_t mtu_sz = sizeof (struct octep_ctrl_net_h2f_resp_cmd_mtu); +static const uint32_t mac_sz = sizeof (struct octep_ctrl_net_h2f_resp_cmd_mac); +static const uint32_t state_sz = + sizeof (struct octep_ctrl_net_h2f_resp_cmd_state); +static const uint32_t link_info_sz = sizeof (struct octep_ctrl_net_link_info); +static const uint32_t if_stats_sz = + sizeof (struct octep_ctrl_net_h2f_resp_cmd_get_stats); +static const uint32_t info_sz = + sizeof (struct octep_ctrl_net_h2f_resp_cmd_get_info); + +#define CTRL_NET_RESP_OFFLOADS_SZ sizeof (struct octep_ctrl_net_offloads) + +/* + * Initialize max receive burst size and each message size. + * + */ + +int +octep_cp_initialize_receive_vector () +{ + int i, j; + struct octep_cp_msg *msg; + + clib_warning ("Loop: Init\n"); + /* For now only support single buffer messages */ + for (i = 0; i < cp_lib_cfg.ndoms; i++) + { + for (j = 0; j < cp_lib_cfg.doms[i].npfs; j++) + { + if (cp_lib_cfg.doms[i].pfs[j].max_msg_sz < max_msg_sz) + return -EINVAL; + } + } + + for (i = 0; i < rx_num; i++) + { + msg = &rx_msg[i]; + msg->info.s.sz = max_msg_sz; + msg->sg_num = 1; + msg->sg_list[0].sz = max_msg_sz; + msg->sg_list[0].msg = calloc (1, max_msg_sz); + if (!msg->sg_list[0].msg) + goto mem_alloc_fail; + } + + clib_warning ("Loop: using single buffer with msg sz %u.\n", max_msg_sz); + + return 0; + +mem_alloc_fail: + for (i = 0; i < LOOP_RX_BUF_CNT; i++) + { + msg = &rx_msg[i]; + if (msg->sg_list[0].msg) + free (msg->sg_list[0].msg); + msg->sg_list[0].sz = 0; + msg->sg_num = 0; + } + + return -ENOMEM; +} + +static int +process_mtu (struct if_cfg *iface, struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + int ret = 0; + + if (req->mtu.cmd == OCTEP_CTRL_NET_CMD_GET) + { + resp->mtu.val = iface->max_rx_pktlen; + clib_warning ("Cmd: get mtu : %u\n", resp->mtu.val); + ret = mtu_sz; + } + else + { + iface->mtu = req->mtu.val; + clib_warning ("Cmd: set mtu : %u\n", req->mtu.val); + octep_update_pktio (req->mtu.cmd, req->mtu.val); + } + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return ret; +} + +static int +process_mac (struct if_cfg *iface, struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + int ret = 0; + + if (req->mac.cmd == OCTEP_CTRL_NET_CMD_GET) + { + memcpy (&resp->mac.addr, &iface->mac_addr, ETH_ALEN); + ret = mac_sz; + clib_warning ("Cmd: get mac : %02x:%02x:%02x:%02x:%02x:%02x\n", + resp->mac.addr[0], resp->mac.addr[1], resp->mac.addr[2], + resp->mac.addr[3], resp->mac.addr[4], resp->mac.addr[5]); + } + else + { + memcpy (&iface->mac_addr, &req->mac.addr, ETH_ALEN); + clib_warning ("Cmd: set mac : %02x:%02x:%02x:%02x:%02x:%02x\n", + req->mac.addr[0], req->mac.addr[1], req->mac.addr[2], + req->mac.addr[3], req->mac.addr[4], req->mac.addr[5]); + } + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return ret; +} + +static int +process_get_if_stats (struct if_stats *ifstats, + struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + /* Struct if_stats = struct octep_ctrl_net_h2f_resp_cmd_get_stats */ + memcpy (&resp->if_stats, ifstats, if_stats_sz); + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + clib_warning ("Cmd: get if stats\n"); + + return if_stats_sz; +} + +static int +process_link_status (struct if_cfg *iface, struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + int ret = 0; + + if (req->link.cmd == OCTEP_CTRL_NET_CMD_GET) + { + resp->link.state = iface->link_state; + ret = state_sz; + } + else + iface->link_state = req->link.state; + + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return ret; +} + +static int +process_rx_state (struct if_cfg *iface, struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + int ret = 0; + + if (req->rx.cmd == OCTEP_CTRL_NET_CMD_GET) + { + resp->rx.state = iface->rx_state; + ret = state_sz; + } + else + iface->rx_state = req->rx.state; + + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return ret; +} + +static int +process_link_info (struct if_cfg *iface, struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + int ret = 0; + + if (req->link_info.cmd == OCTEP_CTRL_NET_CMD_GET) + { + resp->link_info.supported_modes = iface->supported_modes; + resp->link_info.advertised_modes = iface->advertised_modes; + resp->link_info.autoneg = iface->autoneg; + resp->link_info.pause = iface->pause_mode; + resp->link_info.speed = iface->speed; + ret = link_info_sz; + } + else + { + iface->advertised_modes = req->link_info.info.advertised_modes; + iface->autoneg = req->link_info.info.autoneg; + iface->pause_mode = req->link_info.info.pause; + iface->speed = req->link_info.info.speed; + } + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return ret; +} + +static int +process_get_info (struct octep_fw_info *info, + struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp) +{ + memcpy (&resp->info.fw_info, info, sizeof (struct octep_fw_info)); + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return info_sz; +} + +clib_error_t * +octep_enable_disable_offload_feature_arc (u8 *if_name, bool enable) +{ + uword *p; + u32 hw_if_index; + clib_error_t *error = NULL; + vnet_hw_interface_t *hi = NULL; + vnet_feature_registration_t *reg; + vnet_main_t *vnm = vnet_get_main (); + + if (!(p = hash_get (vnm->interface_main.hw_interface_by_name, if_name))) + return clib_error_return (0, "Unknown interfacse name (%s)... ", + (const char *) if_name); + + hw_if_index = p[0]; + hi = vnet_get_hw_interface (vnm, hw_if_index); + + reg = vnet_get_feature_reg ((const char *) DEVICE_INPUT, + (const char *) DPU_INPUT_NODE); + if (reg == 0) + { + error = clib_error_return ( + 0, + "Feature (%s) not registered to arc (%s)... See 'show " + "features verbose' for valid feature/arc combinations. ", + DPU_INPUT_NODE, DEVICE_INPUT); + return error; + } + + if (reg->enable_disable_cb) + error = reg->enable_disable_cb (hi->sw_if_index, enable); + + if (error) + return error; + + vnet_feature_enable_disable ((const char *) DEVICE_INPUT, + (const char *) DPU_INPUT_NODE, hi->sw_if_index, + enable, 0, 0); + + reg = vnet_get_feature_reg ((const char *) DEVICE_OUTPUT, + (const char *) DPU_OUTPUT_NODE); + if (reg == 0) + return clib_error_return ( + 0, + "Feature (%s) not registered to arc (%s)... See 'show " + "features verbose' for valid feature/arc combinations. ", + DPU_OUTPUT_NODE, DEVICE_OUTPUT); + + if (reg->enable_disable_cb) + error = reg->enable_disable_cb (hi->sw_if_index, enable); + + if (error) + return error; + + vnet_feature_enable_disable ((const char *) DEVICE_OUTPUT, + (const char *) DPU_OUTPUT_NODE, hi->sw_if_index, + enable, 0, 0); + + return error; +} + +static int +process_offloads (struct octep_fw_info *info, + struct octep_ctrl_net_h2f_req *req, + struct octep_ctrl_net_h2f_resp *resp, struct if_cfg *iface) +{ + + if (req->offloads.cmd == OCTEP_CTRL_NET_CMD_GET) + { + resp->offloads.rx_offloads = info->rx_offloads; + resp->offloads.tx_offloads = info->tx_offloads; + resp->offloads.ext_offloads = info->ext_offloads; + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + return CTRL_NET_RESP_OFFLOADS_SZ; + } + + /** + * Disable/enable feature arc based on Host request or existing config. + */ + if (!req->offloads.offloads.rx_offloads && + !req->offloads.offloads.tx_offloads) + { + if (octep_enable_disable_offload_feature_arc (iface->if_name, 0)) + return 0; + } + else if (!info->rx_offloads && !info->tx_offloads) + { + if (octep_enable_disable_offload_feature_arc (iface->if_name, 1)) + return 0; + } + + info->rx_offloads = req->offloads.offloads.rx_offloads; + info->tx_offloads = req->offloads.offloads.tx_offloads; + info->ext_offloads = req->offloads.offloads.ext_offloads; + + resp->hdr.s.reply = OCTEP_CTRL_NET_REPLY_OK; + + return CTRL_NET_RESP_OFFLOADS_SZ; +} + +static int +process_msg (union octep_cp_msg_info *ctx, struct octep_cp_msg *msg) +{ + struct octep_ctrl_net_h2f_req *req; + struct octep_ctrl_net_h2f_resp resp = { 0 }; + struct octep_cp_msg resp_msg; + struct if_cfg *iface; + struct if_stats *ifdata; + struct octep_fw_info *info; + int err = 0, resp_sz = 0; + + err = + app_config_get_if_from_msg_info (ctx, &msg->info, &iface, &ifdata, &info); + if (err) + { + clib_warning ("Invalid msg[%lx]\n", msg->info.words[0]); + return err; + } + + req = (struct octep_ctrl_net_h2f_req *) msg->sg_list[0].msg; + resp.hdr.words[0] = req->hdr.words[0]; + iface->host_if_id = req->hdr.s.sender; + resp_sz = resp_hdr_sz; + switch (req->hdr.s.cmd) + { + case OCTEP_CTRL_NET_H2F_CMD_MTU: + resp_sz += process_mtu (iface, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_MAC: + resp_sz += process_mac (iface, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_GET_IF_STATS: + resp_sz += process_get_if_stats (ifdata, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS: + resp_sz += process_link_status (iface, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_RX_STATE: + resp_sz += process_rx_state (iface, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_LINK_INFO: + resp_sz += process_link_info (iface, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_GET_INFO: + resp_sz += process_get_info (info, req, &resp); + break; + case OCTEP_CTRL_NET_H2F_CMD_OFFLOADS: + resp_sz += process_offloads (info, req, &resp, iface); + break; + default: + clib_warning ("Unhandled Cmd : %u\n", req->hdr.s.cmd); + resp_sz = 0; + break; + } + + if (resp_sz >= resp_hdr_sz) + { + resp_msg.info = msg->info; + resp_msg.info.s.sz = resp_sz; + resp_msg.sg_num = 1; + resp_msg.sg_list[0].sz = resp_sz; + resp_msg.sg_list[0].msg = &resp; + octep_cp_lib_send_msg_resp (ctx, &resp_msg, 1); + ifdata->tx_stats.pkts++; + ifdata->tx_stats.octs += resp_sz; + } + + ifdata->rx_stats.pkts++; + ifdata->rx_stats.octets += msg->info.s.sz; + + return 0; +} + +int +loop_process_msgs () +{ + union octep_cp_msg_info ctx; + struct octep_cp_msg *msg; + int ret, i, j, m; + + for (i = 0; i < cp_lib_cfg.ndoms; i++) + { + ctx.s.pem_idx = cp_lib_cfg.doms[i].idx; + for (j = 0; j < cp_lib_cfg.doms[i].npfs; j++) + { + ctx.s.pf_idx = cp_lib_cfg.doms[i].pfs[j].idx; + ret = octep_cp_lib_recv_msg (&ctx, rx_msg, rx_num); + for (m = 0; m < ret; m++) + { + msg = &rx_msg[m]; + process_msg (&ctx, msg); + /* Library will overwrite msg size in header so reset it */ + msg->info.s.sz = max_msg_sz; + } + } + } + + return 0; +} + +int +octep_cp_uninitialize_receive_vector () +{ + int i; + + clib_warning ("%s\n", __func__); + + for (i = 0; i < rx_num; i++) + { + if (rx_msg[i].sg_list[0].msg) + free (rx_msg[i].sg_list[0].msg); + rx_msg[i].sg_list[0].sz = 0; + } + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/octep_cp/octep_input.h b/src/plugins/octep_cp/octep_input.h new file mode 100644 index 0000000000..3f173d024f --- /dev/null +++ b/src/plugins/octep_cp/octep_input.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef __OCTEP_INPUT_H__ +#define __OCTEP_INPUT_H__ + +#define DEVICE_INPUT "port-rx-eth" +#define DPU_INPUT_NODE "h2d-input" +#define DEVICE_OUTPUT "interface-output" +#define DPU_OUTPUT_NODE "d2h-output" +/* + * Initialize loop mode implementation. + * return value: 0 on success, -errno on failure. + */ +int octep_cp_initialize_receive_vector (); + +/* + * Process interrupts and host messages. + * return value: size of response in words on success, -errno on failure. + */ +int loop_process_msgs (); + +/* + * Process user interrupt signal. + * return value: 0 on success, -errno on failure. + */ +int loop_process_sigusr1 (); + +/* + * Uninitialize loop mode implementation. + * return value: 0 on success, -errno on failure. + */ +int octep_cp_uninitialize_receive_vector (); + +#endif /* __OCTEP_INPUT_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c index 89b4f77e33..79a2151c2e 100644 --- a/src/plugins/tlsopenssl/tls_async.c +++ b/src/plugins/tlsopenssl/tls_async.c @@ -17,28 +17,53 @@ #include #include #include +#include -#define SSL_ASYNC_INFLIGHT 1 -#define SSL_ASYNC_READY 2 -#define SSL_ASYNC_REENTER 3 #define MAX_VECTOR_ASYNC 256 +#define SSL_WANT_NAMES \ + { \ + [0] = "N/A", [SSL_NOTHING] = "SSL_NOTHING", \ + [SSL_WRITING] = "SSL_WRITING", [SSL_READING] = "SSL_READING", \ + [SSL_X509_LOOKUP] = "SSL_X509_LOOKUP", \ + [SSL_ASYNC_PAUSED] = "SSL_ASYNC_PAUSED", \ + [SSL_ASYNC_NO_JOBS] = "SSL_ASYNC_NO_JOBS", \ + [SSL_CLIENT_HELLO_CB] = "SSL_CLIENT_HELLO_CB", \ + } + +const char *ssl_want[] = SSL_WANT_NAMES; + +typedef enum ssl_evt_status_type_ +{ + SSL_ASYNC_INVALID_STATUS = 0, + SSL_ASYNC_INFLIGHT = 1, + SSL_ASYNC_READY = 2, + SSL_ASYNC_REENTER = 3, + SSL_ASYNC_DEQ_DONE = 4, + SSL_ASYNC_CB_EXECUTED = 5, + SSL_ASYNC_MAX_STATUS +} ssl_evt_status_type_t; + typedef struct openssl_tls_callback_arg_ { int thread_index; int event_index; + ssl_async_evt_type_t async_evt_type; + openssl_resume_handler *evt_handler; } openssl_tls_callback_arg_t; typedef struct openssl_event_ { u32 ctx_index; int session_index; - u8 status; + ssl_evt_status_type_t status; - openssl_resume_handler *handler; + transport_send_params_t *tran_sp; openssl_tls_callback_arg_t cb_args; #define thread_idx cb_args.thread_index -#define event_idx cb_args.event_index +#define event_idx cb_args.event_index +#define async_event_type cb_args.async_evt_type +#define async_evt_handler cb_args.evt_handler int next; } openssl_evt_t; @@ -46,12 +71,14 @@ typedef struct openssl_async_queue_ { int evt_run_head; int evt_run_tail; + int depth; } openssl_async_queue_t; typedef struct openssl_async_ { openssl_evt_t ***evt_pool; openssl_async_queue_t *queue; + openssl_async_queue_t *queue_in_init; void (*polling) (void); u8 start_polling; ENGINE *engine; @@ -72,10 +99,13 @@ struct engine_polling }; void qat_init_thread (void *arg); +void dpdk_engine_init_thread (void *arg); +void dpdk_engine_polling (); struct engine_polling engine_list[] = { - {"qat", qat_polling, qat_pre_init, qat_init_thread}, - {"dasync", dasync_polling, NULL, NULL} + { "qat", qat_polling, qat_pre_init, qat_init_thread }, + { "dpdk_engine", dpdk_engine_polling, NULL, dpdk_engine_init_thread }, + { "dasync", dasync_polling, NULL, NULL } }; openssl_async_t openssl_async_main; @@ -98,6 +128,7 @@ evt_pool_init (vlib_main_t * vm) vec_validate (om->evt_pool, num_threads - 1); vec_validate (om->queue, num_threads - 1); + vec_validate (om->queue_in_init, num_threads - 1); om->start_polling = 0; om->engine = 0; @@ -106,6 +137,11 @@ evt_pool_init (vlib_main_t * vm) { om->queue[i].evt_run_head = -1; om->queue[i].evt_run_tail = -1; + om->queue[i].depth = 0; + + om->queue_in_init[i].evt_run_head = -1; + om->queue_in_init[i].evt_run_tail = -1; + om->queue_in_init[i].depth = 0; } om->polling = NULL; @@ -237,7 +273,6 @@ openssl_evt_alloc (void) /* In most cases, tls_async_openssl_callback is called by HW to make event active - * When EAGAIN received, VPP will call this callback to retry */ int tls_async_openssl_callback (SSL * s, void *cb_arg) @@ -247,107 +282,175 @@ tls_async_openssl_callback (SSL * s, void *cb_arg) openssl_tls_callback_arg_t *args = (openssl_tls_callback_arg_t *) cb_arg; int thread_index = args->thread_index; int event_index = args->event_index; - int *evt_run_tail = &om->queue[thread_index].evt_run_tail; - int *evt_run_head = &om->queue[thread_index].evt_run_head; + ssl_async_evt_type_t evt_type = args->async_evt_type; + openssl_async_queue_t *queue; + int *evt_run_tail, *evt_run_head; - TLS_DBG (2, "Set event %d to run\n", event_index); event = openssl_evt_get_w_thread (event_index, thread_index); + if (evt_type == SSL_ASYNC_EVT_INIT) + queue = om->queue_in_init; + else + queue = om->queue; + + evt_run_tail = &queue[thread_index].evt_run_tail; + evt_run_head = &queue[thread_index].evt_run_head; + /* Happend when a recursive case, especially in SW simulation */ if (PREDICT_FALSE (event->status == SSL_ASYNC_READY)) { + clib_warning ("EVT: %p ASYNC_ALREADY Set", event); event->status = SSL_ASYNC_REENTER; return 0; } event->status = SSL_ASYNC_READY; event->next = -1; - if (*evt_run_tail >= 0) + if (*evt_run_head < 0) + *evt_run_head = event_index; + else if (*evt_run_tail >= 0) { event_tail = openssl_evt_get_w_thread (*evt_run_tail, thread_index); event_tail->next = event_index; } + + queue[thread_index].depth++; + *evt_run_tail = event_index; - if (*evt_run_head < 0) - { - *evt_run_head = event_index; - } return 1; } +/* + * Continue an async SSL_write() call. + * This function is _only_ called when continuing an SSL_write() call + * that returned WANT_ASYNC. + * Since it continues the handling of an existing, paused SSL job + * (ASYNC_JOB*), the 'buf' and 'num' params to SSL_write() have + * already been set in the initial call, and are meaningless here. + * Therefore setting buf=null,num=0, to emphasize the point. + * On successful write, TLS context total_async_write bytes are updated. + */ +int +openssl_async_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, + openssl_ctx_t *ctx) +{ + int wrote = 0; + + wrote = SSL_write (ssl, NULL, 0); + ossl_check_err_is_fatal (ssl, wrote); + + ctx->total_async_write -= wrote; + svm_fifo_dequeue_drop (f, wrote); + + return wrote; +} + +static int +openssl_async_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl) +{ + int read; + + read = SSL_read (ssl, NULL, 0); + if (read <= 0) + return read; + + svm_fifo_enqueue_nocopy (f, read); + + return read; +} + int -vpp_tls_async_init_event (tls_ctx_t * ctx, - openssl_resume_handler * handler, - session_t * session) +vpp_tls_async_init_event (tls_ctx_t *ctx, openssl_resume_handler *handler, + transport_send_params_t *sp, session_t *session, + ssl_async_evt_type_t evt_type, int wr_size) { u32 eidx; - openssl_evt_t *event; + openssl_evt_t *event = NULL; openssl_ctx_t *oc = (openssl_ctx_t *) ctx; u32 thread_id = ctx->c_thread_index; - eidx = openssl_evt_alloc (); + if (oc->evt_alloc_flag[evt_type]) + { + eidx = oc->evt_index[evt_type]; + if (evt_type == SSL_ASYNC_EVT_WR) + { + event = openssl_evt_get (eidx); + goto update_wr_evnt; + } + return 1; + } + else + { + eidx = openssl_evt_alloc (); + oc->evt_alloc_flag[evt_type] = true; + } + event = openssl_evt_get (eidx); event->ctx_index = oc->openssl_ctx_index; + /* async call back args */ event->event_idx = eidx; event->thread_idx = thread_id; - event->handler = handler; + event->async_event_type = evt_type; + event->async_evt_handler = handler; event->session_index = session->session_index; - event->status = 0; - ctx->evt_index = eidx; + event->status = SSL_ASYNC_INVALID_STATUS; + oc->evt_index[evt_type] = eidx; + TLS_DBG (2, "EVT: %p ALLOTED_FOR_TYPE_%d: EIDX: %d TID: %d", event, evt_type, + event->event_idx, event->thread_idx); #ifdef HAVE_OPENSSL_ASYNC SSL_set_async_callback_arg (oc->ssl, &event->cb_args); #endif - +update_wr_evnt: + if (evt_type == SSL_ASYNC_EVT_WR) + { + transport_connection_deschedule (&ctx->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + oc->total_async_write = wr_size; + } + event->tran_sp = sp; return 1; } int -vpp_openssl_is_inflight (tls_ctx_t * ctx) +vpp_openssl_is_inflight (tls_ctx_t *ctx) { u32 eidx; openssl_evt_t *event; - eidx = ctx->evt_index; - event = openssl_evt_get (eidx); - - if (event->status == SSL_ASYNC_INFLIGHT) - return 1; - return 0; -} + openssl_ctx_t *oc = (openssl_ctx_t *) ctx; + int i; -int -vpp_tls_async_update_event (tls_ctx_t * ctx, int eagain) -{ - u32 eidx; - openssl_evt_t *event; + for (i = SSL_ASYNC_EVT_INIT; i < SSL_ASYNC_EVT_MAX; i++) + { + eidx = oc->evt_index[i]; + event = openssl_evt_get (eidx); - eidx = ctx->evt_index; - event = openssl_evt_get (eidx); - event->status = SSL_ASYNC_INFLIGHT; - if (eagain) - return tls_async_openssl_callback (0, &event->cb_args); + if (event->status == SSL_ASYNC_INFLIGHT) + return 1; + } - return 1; + return 0; } void -event_handler (void *tls_async) +async_event_handler (void *tls_async) { openssl_resume_handler *handler; openssl_evt_t *event; session_t *session; int thread_index; - tls_ctx_t *ctx; event = (openssl_evt_t *) tls_async; thread_index = event->thread_idx; - ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); - handler = event->handler; + handler = event->async_evt_handler; session = session_get (event->session_index, thread_index); if (handler) { - (*handler) (ctx, session); + (*handler) (event, session); + event->status = SSL_ASYNC_CB_EXECUTED; + TLS_DBG (2, "AFTER_CB_EXECUTION_FOR_EVT: %p for TYPE: %d status: %d", + event, event->async_event_type, event->status); } return; @@ -438,50 +541,120 @@ tls_async_do_job (int eidx, u32 thread_index) if (ctx) { ctx->resume = 1; - session_send_rpc_evt_to_thread (thread_index, event_handler, event); + session_send_rpc_evt_to_thread (thread_index, async_event_handler, + event); } return 1; } int -tls_resume_from_crypto (int thread_index) +handle_async_cb_events (openssl_async_queue_t *queue, int thread_index) { int i; - - openssl_async_t *om = &openssl_async_main; openssl_evt_t *event; - int *evt_run_head = &om->queue[thread_index].evt_run_head; - int *evt_run_tail = &om->queue[thread_index].evt_run_tail; + + int *evt_run_head = &queue[thread_index].evt_run_head; + int *evt_run_tail = &queue[thread_index].evt_run_tail; if (*evt_run_head < 0) return 0; - for (i = 0; i < MAX_VECTOR_ASYNC; i++) + int n_events = queue[thread_index].depth; + for (i = 0; i < n_events; i++) { if (*evt_run_head >= 0) { + TLS_DBG (8, "HEAD_EVT_INDEX: %d CUR_DEPTH: %d", *evt_run_head, + queue[thread_index].depth); event = openssl_evt_get_w_thread (*evt_run_head, thread_index); - tls_async_do_job (*evt_run_head, thread_index); if (PREDICT_FALSE (event->status == SSL_ASYNC_REENTER)) { + TLS_DBG (2, "REENTER_TRIGGERED"); /* recusive event triggered */ - event->status = SSL_ASYNC_READY; - continue; + goto deq_event; } + tls_async_do_job (*evt_run_head, thread_index); - event->status = 0; + deq_event: *evt_run_head = event->next; + event->status = SSL_ASYNC_DEQ_DONE; + queue[thread_index].depth--; - if (event->next < 0) + if (*evt_run_head < 0) { *evt_run_tail = -1; + TLS_DBG (8, "EVTS_DEQ_DONE"); break; } } } return 0; +} + +void +resume_handshake_events (int thread_index) +{ + openssl_async_t *om = &openssl_async_main; + + openssl_async_queue_t *queue = om->queue_in_init; + handle_async_cb_events (queue, thread_index); +} + +void +resume_read_write_events (int thread_index) +{ + openssl_async_t *om = &openssl_async_main; + + openssl_async_queue_t *queue = om->queue; + handle_async_cb_events (queue, thread_index); +} + +int +tls_resume_from_crypto (int thread_index) +{ + resume_read_write_events (thread_index); + resume_handshake_events (thread_index); + return 0; +} + +void +dpdk_engine_polling () +{ + openssl_async_t *om = &openssl_async_main; + int poll_status = 0; + if (om->start_polling) + { + ENGINE_ctrl_cmd (om->engine, "POLL", 0, &poll_status, NULL, 0); + } +} + +void +dpdk_engine_init_thread (void *arg) +{ + vlib_main_t *vm = vlib_get_main (); + tls_main_t *tm = vnet_tls_get_main (); + void *handle = NULL; + + if (tm && tm->engine_path) + handle = dlopen ((char *) tm->engine_path, RTLD_LAZY); + + if (!handle) + { + clib_warning ("dpdk engline library not found"); + return; + } + + u64 *(*fn) () = dlsym (handle, "dpdk_rte_thread_register"); + + if (fn) + { + if (fn () < 0) + clib_warning ("dpdk: cannot register thread %u", vm->thread_index); + } + else + clib_warning ("dpdk_rte_thread_register symbol not found"); } static clib_error_t * @@ -491,6 +664,207 @@ tls_async_init (vlib_main_t * vm) return 0; } +int +tls_async_handshake_event_handler (void *async_evt, void *unused) +{ + openssl_evt_t *event = (openssl_evt_t *) async_evt; + int thread_index = event->thread_idx; + openssl_ctx_t *oc; + tls_ctx_t *ctx; + int rv, err; + + ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); + oc = (openssl_ctx_t *) ctx; + session_t *tls_session = session_get_from_handle (ctx->tls_session_handle); + + if (!SSL_in_init (oc->ssl)) + { + TLS_DBG (8, "[!SSL_in_init]==>CTX: %p EVT: %p EIDX: %d", ctx, event, + event->event_idx); + return 0; + } + + if (ctx->resume) + ctx->resume = 0; + else if (!svm_fifo_max_dequeue_cons (tls_session->rx_fifo)) + return 0; + + rv = SSL_do_handshake (oc->ssl); + err = SSL_get_error (oc->ssl, rv); + + /* Do not remove session from tail */ + if (err == SSL_ERROR_WANT_ASYNC) + return 0; + + if (err == SSL_ERROR_SSL) + { + char buf[512]; + ERR_error_string (ERR_get_error (), buf); + TLS_DBG (2, "[SSL_ERROR_SSL]==>CTX: %p EVT: %p EIDX: %d Buf: %s", ctx, + event, event->event_idx, buf); + openssl_handle_handshake_failure (ctx); + return 0; + } + + if (err == SSL_ERROR_WANT_WRITE || err == SSL_ERROR_WANT_READ) + return 0; + + /* client not supported */ + if (!SSL_is_server (oc->ssl)) + return 0; + + /* Need to check transport status */ + if (ctx->is_passive_close) + { + openssl_handle_handshake_failure (ctx); + return 0; + } + + if (tls_notify_app_accept (ctx)) + { + ctx->c_s_index = SESSION_INVALID_INDEX; + tls_disconnect_transport (ctx); + } + + TLS_DBG (1, + "<=====Handshake for %u complete. TLS cipher is %s EVT: %p =====>", + oc->openssl_ctx_index, SSL_get_cipher (oc->ssl), event); + + return 1; +} + +int +tls_async_read_event_handler (void *async_evt, void *unused) +{ + openssl_evt_t *event = (openssl_evt_t *) async_evt; + int thread_index = event->thread_idx; + session_t *app_session, *tls_session; + openssl_ctx_t *oc; + tls_ctx_t *ctx; + SSL *ssl; + + ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); + oc = (openssl_ctx_t *) ctx; + ssl = oc->ssl; + + ctx->in_async_read = true; + /* read event */ + svm_fifo_t *app_rx_fifo, *tls_rx_fifo; + int read, err; + + app_session = session_get_from_handle (ctx->app_session_handle); + app_rx_fifo = app_session->rx_fifo; + + tls_session = session_get_from_handle (ctx->tls_session_handle); + tls_rx_fifo = tls_session->rx_fifo; + + /* continue the paused job */ + read = openssl_async_read_from_ssl_into_fifo (app_rx_fifo, ssl); + err = SSL_get_error (ssl, read); + + /* Unrecoverable protocol error. Reset connection */ + if (PREDICT_FALSE (read <= 0)) + { + if (err == SSL_ERROR_SSL) + tls_notify_app_io_error (ctx); + + if (SSL_want_async (ssl)) + return 0; + + goto ev_rd_done; + } + + /* + * Managed to read some data. If handshake just completed, session + * may still be in accepting state. + */ + if (app_session->session_state >= SESSION_STATE_READY) + tls_notify_app_enqueue (ctx, app_session); + +ev_rd_done: + /* read done */ + ctx->in_async_read = false; + + if ((SSL_pending (ssl) > 0) || svm_fifo_max_dequeue_cons (tls_rx_fifo)) + tls_add_vpp_q_builtin_rx_evt (tls_session); + + return 1; +} + +int +tls_async_write_event_handler (void *async_evt, void *unused) +{ + openssl_evt_t *event = (openssl_evt_t *) async_evt; + int thread_index = event->thread_idx; + session_t *app_session, *tls_session; + openssl_ctx_t *oc; + tls_ctx_t *ctx; + SSL *ssl; + + ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); + oc = (openssl_ctx_t *) ctx; + ssl = oc->ssl; + + /* write event */ + int wrote = 0; + u32 space, enq_buf; + svm_fifo_t *app_tx_fifo, *tls_tx_fifo; + transport_send_params_t *sp = event->tran_sp; + + app_session = session_get_from_handle (ctx->app_session_handle); + app_tx_fifo = app_session->tx_fifo; + + /* continue the paused job */ + wrote = openssl_async_write_from_fifo_into_ssl (app_tx_fifo, ssl, oc); + if (!wrote) + { + /* paused job not ready, wait */ + if (SSL_want_async (ssl)) + return 0; + } + + /* Unrecoverable protocol error. Reset connection */ + if (PREDICT_FALSE (wrote < 0)) + { + tls_notify_app_io_error (ctx); + return 0; + } + + tls_session = session_get_from_handle (ctx->tls_session_handle); + tls_tx_fifo = tls_session->tx_fifo; + + /* prepare for remaining write(s) */ + space = svm_fifo_max_enqueue_prod (tls_tx_fifo); + /* Leave a bit of extra space for tls ctrl data, if any needed */ + space = clib_max ((int) space - TLSO_CTRL_BYTES, 0); + + if (svm_fifo_needs_deq_ntf (app_tx_fifo, wrote)) + session_dequeue_notify (app_session); + + /* we got here, async write is done or not possible */ + oc->total_async_write = 0; + + if (PREDICT_FALSE (ctx->app_closed && BIO_ctrl_pending (oc->rbio) <= 0)) + openssl_confirm_app_close (ctx); + + /* Deschedule and wait for deq notification if fifo is almost full */ + enq_buf = clib_min (svm_fifo_size (tls_tx_fifo) / 2, TLSO_MIN_ENQ_SPACE); + if (space < wrote + enq_buf) + { + svm_fifo_add_want_deq_ntf (tls_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&ctx->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + else + { + /* Request tx reschedule of the app session */ + app_session->flags |= SESSION_F_CUSTOM_TX; + transport_connection_reschedule (&ctx->connection); + } + + return 1; +} + static uword tls_async_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c index e63413a390..9d80b9e6c6 100644 --- a/src/plugins/tlsopenssl/tls_openssl.c +++ b/src/plugins/tlsopenssl/tls_openssl.c @@ -73,9 +73,17 @@ openssl_ctx_free (tls_ctx_t * ctx) SSL_free (oc->ssl); vec_free (ctx->srv_hostname); SSL_CTX_free (oc->client_ssl_ctx); -#ifdef HAVE_OPENSSL_ASYNC - openssl_evt_free (ctx->evt_index, ctx->c_thread_index); -#endif + + if (openssl_main.async) + { + TLS_DBG (2, "FREEING_ASYNC_EVTS: CTX: %p", ctx); + openssl_evt_free (oc->evt_index[SSL_ASYNC_EVT_INIT], + ctx->c_thread_index); + openssl_evt_free (oc->evt_index[SSL_ASYNC_EVT_RD], + ctx->c_thread_index); + openssl_evt_free (oc->evt_index[SSL_ASYNC_EVT_WR], + ctx->c_thread_index); + } } pool_put_index (openssl_main.ctx_pool[ctx->c_thread_index], @@ -89,6 +97,7 @@ openssl_ctx_detach (tls_ctx_t *ctx) oc_copy = clib_mem_alloc (sizeof (*oc_copy)); clib_memcpy (oc_copy, oc, sizeof (*oc)); + TLS_DBG (2, "CTX: %p detached"); return oc_copy; } @@ -112,6 +121,7 @@ openssl_ctx_attach (u32 thread_index, void *ctx_ptr) sh = (*oc)->ctx.tls_session_handle; BIO_set_data ((*oc)->rbio, uword_to_pointer (sh, void *)); BIO_set_data ((*oc)->wbio, uword_to_pointer (sh, void *)); + TLS_DBG (2, "OSSL_CTX: %p attached", *oc); return ((*oc)->openssl_ctx_index); } @@ -158,12 +168,8 @@ openssl_lctx_get (u32 lctx_index) return pool_elt_at_index (openssl_main.lctx_pool, lctx_index); } -#define ossl_check_err_is_fatal(_ssl, _rv) \ - if (PREDICT_FALSE (_rv < 0 && SSL_get_error (_ssl, _rv) == SSL_ERROR_SSL)) \ - return -1; - -static int -openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl, u32 max_len) +int +openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl) { int read, rv, n_fs, i; const int n_segs = 2; @@ -174,7 +180,6 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl, u32 max_len) if (!max_enq) return 0; - max_enq = clib_min (max_len, max_enq); n_fs = svm_fifo_provision_chunks (f, fs, n_segs, max_enq); if (n_fs < 0) return 0; @@ -183,8 +188,9 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl, u32 max_len) read = SSL_read (ssl, fs[0].data, fs[0].len); if (read <= 0) { + TLS_DBG (2, "FATAL_ERROR"); ossl_check_err_is_fatal (ssl, read); - return 0; + return read; } if (read == (int) fs[0].len) @@ -202,6 +208,7 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl, u32 max_len) } } svm_fifo_enqueue_nocopy (f, read); + TLS_DBG (2, "READ_ENQ_NOCOPY: %d", read); return read; } @@ -235,32 +242,28 @@ openssl_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, u32 max_len) return wrote; } -#ifdef HAVE_OPENSSL_ASYNC -static int -openssl_check_async_status (tls_ctx_t * ctx, openssl_resume_handler * handler, - session_t * session) +int +openssl_handle_want_async (tls_ctx_t *ctx, int evt_type, + transport_send_params_t *sp, int size) { openssl_ctx_t *oc = (openssl_ctx_t *) ctx; - int estatus; - - SSL_get_async_status (oc->ssl, &estatus); - if (estatus == ASYNC_STATUS_EAGAIN) - { - vpp_tls_async_update_event (ctx, 1); - } - else + int ret = 0; + if (evt_type == SSL_ASYNC_EVT_WR) { - vpp_tls_async_update_event (ctx, 0); + /* de-schedule transport connection */ + transport_connection_deschedule (&ctx->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + TLS_DBG (2, "WR_EVT_TOTAL_WRITE: %d", size); + TLS_DBG (2, "CTX_TOTAL_WRITE: %d", oc->total_async_write); + oc->total_async_write = size; } + TLS_DBG (2, "WANT_ASYNC_EVT_TYPE_%d", evt_type); - return 1; - + return ret; } -#endif - -static void -openssl_handle_handshake_failure (tls_ctx_t * ctx) +void +openssl_handle_handshake_failure (tls_ctx_t *ctx) { session_t *app_session; @@ -273,6 +276,7 @@ openssl_handle_handshake_failure (tls_ctx_t * ctx) session_get_if_valid (ctx->c_s_index, ctx->c_thread_index); if (app_session) { + TLS_DBG (2, "FREEING_APP_SESSION: %p", app_session); session_free (app_session); ctx->c_s_index = SESSION_INVALID_INDEX; tls_disconnect_transport (ctx); @@ -284,14 +288,17 @@ openssl_handle_handshake_failure (tls_ctx_t * ctx) /* * Also handles cleanup of the pre-allocated session */ + TLS_DBG (2, "NOTIFY_APP_DISCONNECTED: %p", ctx); tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE); tls_disconnect_transport (ctx); } } int -openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session) +openssl_ctx_handshake_rx (void *hs_ctx, void *hs_tls_session) { + tls_ctx_t *ctx = (tls_ctx_t *) hs_ctx; + session_t *tls_session = (session_t *) hs_tls_session; openssl_ctx_t *oc = (openssl_ctx_t *) ctx; int rv = 0, err; @@ -307,19 +314,19 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session) rv = SSL_do_handshake (oc->ssl); err = SSL_get_error (oc->ssl, rv); -#ifdef HAVE_OPENSSL_ASYNC - if (err == SSL_ERROR_WANT_ASYNC) + if (openssl_main.async && err == SSL_ERROR_WANT_ASYNC) { - openssl_check_async_status (ctx, openssl_ctx_handshake_rx, - tls_session); + TLS_DBG (2, "WANT_ASYNC_HS_RX: CTX: %p", ctx); + vpp_tls_async_init_event (ctx, tls_async_handshake_event_handler, + NULL, tls_session, SSL_ASYNC_EVT_INIT, 0); + return -1; } -#endif + if (err == SSL_ERROR_SSL) { char buf[512]; ERR_error_string (ERR_get_error (), buf); - clib_warning ("Err: %s", buf); - + TLS_DBG (2, "Err: %s", buf); openssl_handle_handshake_failure (ctx); return -1; } @@ -384,13 +391,21 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session) return rv; } -static void -openssl_confirm_app_close (tls_ctx_t * ctx) +void +openssl_confirm_app_close (tls_ctx_t *ctx) { openssl_ctx_t *oc = (openssl_ctx_t *) ctx; SSL_shutdown (oc->ssl); tls_disconnect_transport (ctx); session_transport_closed_notify (&ctx->connection); + session_t *app_session = session_get_from_handle (ctx->app_session_handle); + + /* Shrink FIFOs */ + if (app_session) + { + TLS_DBG (2, "SHRINK_FIFOS"); + session_shrink_fifos (app_session); + } } static int @@ -426,15 +441,26 @@ openssl_ctx_write_tls (tls_ctx_t *ctx, session_t *app_session, wrote = openssl_write_from_fifo_into_ssl (f, oc->ssl, deq_max); - /* Unrecoverable protocol error. Reset connection */ if (PREDICT_FALSE (wrote < 0)) { + int err = SSL_get_error (oc->ssl, wrote); + if (err == SSL_ERROR_WANT_WRITE) + return 0; + /* Unrecoverable protocol error. Reset connection */ tls_notify_app_io_error (ctx); return 0; } if (!wrote) - goto check_tls_fifo; + { + if (openssl_main.async && SSL_want_async (oc->ssl)) + { + vpp_tls_async_init_event (ctx, tls_async_write_event_handler, sp, ts, + SSL_ASYNC_EVT_WR, deq_max); + return 0; + } + goto check_tls_fifo; + } if (svm_fifo_needs_deq_ntf (f, wrote)) session_dequeue_notify (app_session); @@ -534,13 +560,13 @@ static inline int openssl_ctx_read_tls (tls_ctx_t *ctx, session_t *tls_session) { openssl_ctx_t *oc = (openssl_ctx_t *) ctx; - const u32 max_len = 128 << 10; session_t *app_session; svm_fifo_t *f; int read; if (PREDICT_FALSE (SSL_in_init (oc->ssl))) { + TLS_DBG (2, "SSL_INIT_FAILED"); if (openssl_ctx_handshake_rx (ctx, tls_session) < 0) return 0; @@ -548,26 +574,49 @@ openssl_ctx_read_tls (tls_ctx_t *ctx, session_t *tls_session) tls_session = session_get_from_handle (ctx->tls_session_handle); } + if (ctx->in_async_read) + { + TLS_DBG (2, "ASYNC_READ_ALREADY_SET"); + return 0; + } + app_session = session_get_from_handle (ctx->app_session_handle); f = app_session->rx_fifo; - read = openssl_read_from_ssl_into_fifo (f, oc->ssl, max_len); + read = openssl_read_from_ssl_into_fifo (f, oc->ssl); + if (read <= 0) + { + if (openssl_main.async && SSL_want_async (oc->ssl)) + { + ctx->in_async_read = true; + TLS_DBG (2, "WANT_ASYNC_FOR_READ"); + vpp_tls_async_init_event (ctx, tls_async_read_event_handler, NULL, + tls_session, SSL_ASYNC_EVT_RD, 0); + return 0; + } + } /* Unrecoverable protocol error. Reset connection */ - if (PREDICT_FALSE (read < 0)) + if (PREDICT_FALSE ((read < 0) && + (SSL_get_error (oc->ssl, read) == SSL_ERROR_SSL))) { + TLS_DBG (2, "APP_IO_ERR"); tls_notify_app_io_error (ctx); return 0; } - if (read) - tls_notify_app_enqueue (ctx, app_session); + /* If handshake just completed, session may still be in accepting state */ + if (read > 0 && app_session->session_state >= SESSION_STATE_READY) + { + TLS_DBG (2, "NOTIFY_APP_ENQ"); + tls_notify_app_enqueue (ctx, app_session); + } if ((SSL_pending (oc->ssl) > 0) || svm_fifo_max_dequeue_cons (tls_session->rx_fifo)) tls_add_vpp_q_builtin_rx_evt (tls_session); - return read; + return (read > 0) ? read : 0; } static inline int @@ -752,6 +801,10 @@ openssl_ctx_init_client (tls_ctx_t * ctx) if (om->async) SSL_CTX_set_mode (oc->client_ssl_ctx, SSL_MODE_ASYNC); #endif + + /* Set TLSv1_2 */ + SSL_CTX_set_max_proto_version (oc->client_ssl_ctx, TLS1_2_VERSION); + rv = SSL_CTX_set_cipher_list (oc->client_ssl_ctx, (const char *) om->ciphers); if (rv != 1) @@ -796,7 +849,17 @@ openssl_ctx_init_client (tls_ctx_t * ctx) { TLS_DBG (1, "Couldn't set client certificate-key pair"); } - + /* Set TLS Record size */ + if (om->record_size) + { + rv = SSL_CTX_set_max_send_fragment (oc->client_ssl_ctx, om->record_size); + if (rv != 1) + { + TLS_DBG (1, "Couldn't set TLS record-size"); + return -1; + } + TLS_DBG (1, "Using TLS record-size of %d", om->record_size); + } /* * 2. Do the first steps in the handshake. */ @@ -805,7 +868,7 @@ openssl_ctx_init_client (tls_ctx_t * ctx) #ifdef HAVE_OPENSSL_ASYNC session_t *tls_session = session_get_from_handle (ctx->tls_session_handle); - vpp_tls_async_init_event (ctx, openssl_ctx_handshake_rx, tls_session); + openssl_ctx_handshake_rx (ctx, tls_session); #endif while (1) { @@ -814,8 +877,7 @@ openssl_ctx_init_client (tls_ctx_t * ctx) #ifdef HAVE_OPENSSL_ASYNC if (err == SSL_ERROR_WANT_ASYNC) { - openssl_check_async_status (ctx, openssl_ctx_handshake_rx, - tls_session); + TLS_DBG (2, "WANT_ASYNC"); break; } #endif @@ -889,6 +951,39 @@ openssl_start_listen (tls_ctx_t * lctx) TLS_DBG (1, "Couldn't set temp DH parameters"); return -1; } + /* Set TLSv1_2 */ + SSL_CTX_set_max_proto_version (ssl_ctx, TLS1_2_VERSION); + /* Set TLS Record size */ + if (om->record_size) + { + rv = SSL_CTX_set_max_send_fragment (ssl_ctx, om->record_size); + if (rv != 1) + { + TLS_DBG (1, "Couldn't set TLS record-size"); + return -1; + } + } + /* Set TLS Record Split size */ + if (om->record_split_size) + { + rv = SSL_CTX_set_split_send_fragment (ssl_ctx, om->record_split_size); + if (rv != 1) + { + TLS_DBG (1, "Couldn't set TLS record-split-size"); + return -1; + } + } + + /* Set TLS Max Pipeline count */ + if (om->max_pipelines) + { + rv = SSL_CTX_set_max_pipelines (ssl_ctx, om->max_pipelines); + if (rv != 1) + { + TLS_DBG (1, "Couldn't set TLS max-pipelines"); + return -1; + } + } /* * Set the key and cert @@ -1007,27 +1102,27 @@ openssl_ctx_init_server (tls_ctx_t * ctx) TLS_DBG (1, "Initiating handshake for [%u]%u", ctx->c_thread_index, oc->openssl_ctx_index); -#ifdef HAVE_OPENSSL_ASYNC - session_t *tls_session = session_get_from_handle (ctx->tls_session_handle); - vpp_tls_async_init_event (ctx, openssl_ctx_handshake_rx, tls_session); -#endif + if (openssl_main.async) + { + session_t *tls_session = + session_get_from_handle (ctx->tls_session_handle); + openssl_ctx_handshake_rx (ctx, tls_session); + } + while (1) { rv = SSL_do_handshake (oc->ssl); err = SSL_get_error (oc->ssl, rv); -#ifdef HAVE_OPENSSL_ASYNC - if (err == SSL_ERROR_WANT_ASYNC) + if (openssl_main.async && err == SSL_ERROR_WANT_ASYNC) { - openssl_check_async_status (ctx, openssl_ctx_handshake_rx, - tls_session); + TLS_DBG (2, "WANT_ASYNC"); break; } -#endif if (err != SSL_ERROR_WANT_WRITE) break; } - TLS_DBG (2, "tls state for [%u]%u is su", ctx->c_thread_index, + TLS_DBG (2, "tls state for [%u]%u is %s", ctx->c_thread_index, oc->openssl_ctx_index, SSL_state_string_long (oc->ssl)); return 0; } @@ -1038,19 +1133,22 @@ openssl_handshake_is_over (tls_ctx_t * ctx) openssl_ctx_t *mc = (openssl_ctx_t *) ctx; if (!mc->ssl) return 0; + TLS_DBG (2, "HS_FINISHED"); return SSL_is_init_finished (mc->ssl); } static int openssl_transport_close (tls_ctx_t * ctx) { -#ifdef HAVE_OPENSSL_ASYNC - if (vpp_openssl_is_inflight (ctx)) - return 0; -#endif + if (openssl_main.async && vpp_openssl_is_inflight (ctx)) + { + TLS_DBG (2, "EVTS_IN_FLIGHT"); + return 0; + } if (!openssl_handshake_is_over (ctx)) { + TLS_DBG (2, "HS_NOT_OVER"); openssl_handle_handshake_failure (ctx); return 0; } @@ -1221,7 +1319,6 @@ VLIB_INIT_FUNCTION (tls_openssl_init) = }; /* *INDENT-ON* */ -#ifdef HAVE_OPENSSL_ASYNC static clib_error_t * tls_openssl_set_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -1298,7 +1395,45 @@ VLIB_CLI_COMMAND (tls_openssl_set_command, static) = .function = tls_openssl_set_command_fn, }; /* *INDENT-ON* */ -#endif + +static clib_error_t * +tls_openssl_set_tls_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + openssl_main_t *om = &openssl_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "record-size %U", unformat_memory_size, + &om->record_size)) + { + clib_warning ("Using TLS record-size of %d", om->record_size); + } + else if (unformat (input, "record-split-size %U", unformat_memory_size, + &om->record_split_size)) + { + clib_warning ("Using TLS record-split-size of %d", + om->record_split_size); + } + else if (unformat (input, "max-pipelines %U", unformat_memory_size, + &om->max_pipelines)) + { + clib_warning ("Using TLS max-pipelines of %d", om->max_pipelines); + } + else + return clib_error_return (0, "failed: unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +VLIB_CLI_COMMAND (tls_openssl_set_tls, static) = { + .path = "tls openssl set-tls", + .short_help = "tls openssl set-tls [record-size ] [record-split-size " + "] [max-pipelines ]", + .function = tls_openssl_set_tls_fn, +}; /* *INDENT-OFF* */ VLIB_PLUGIN_REGISTER () = { diff --git a/src/plugins/tlsopenssl/tls_openssl.h b/src/plugins/tlsopenssl/tls_openssl.h index 1600cd77ab..260e3cf76d 100644 --- a/src/plugins/tlsopenssl/tls_openssl.h +++ b/src/plugins/tlsopenssl/tls_openssl.h @@ -29,11 +29,36 @@ #define DTLSO_MAX_DGRAM 2000 +#define ossl_check_err_is_fatal(_ssl, _rv) \ + if (PREDICT_FALSE (_rv < 0 && SSL_get_error (_ssl, _rv) == SSL_ERROR_SSL)) \ + return -1; + +#define ERRORPR(fmt_str, ...) \ + do \ + { \ + FILE *f = fopen ("/tmp/vpp-err.log", "a"); \ + if (f != NULL) \ + { \ + struct timespec ts; \ + clock_gettime (CLOCK_REALTIME, &ts); \ + fprintf (f, "[%ld.%06ld]vpp[%d:%lx:%02ld][%d:%s] " fmt_str, \ + ts.tv_sec, ts.tv_nsec / 1000, getpid (), \ + (unsigned long) pthread_self (), vlib_get_thread_index (), \ + __LINE__, __func__, ##__VA_ARGS__); \ + fflush (f); \ + fclose (f); \ + } \ + } \ + while (0) + typedef struct tls_ctx_openssl_ { tls_ctx_t ctx; /**< First */ u32 openssl_ctx_index; SSL_CTX *client_ssl_ctx; + u32 evt_index[SSL_ASYNC_EVT_MAX]; + bool evt_alloc_flag[SSL_ASYNC_EVT_MAX]; + u32 total_async_write; SSL *ssl; BIO *rbio; BIO *wbio; @@ -63,15 +88,18 @@ typedef struct openssl_main_ u8 *ciphers; int engine_init; int async; + u32 record_size; + u32 record_split_size; + u32 max_pipelines; } openssl_main_t; -typedef int openssl_resume_handler (tls_ctx_t * ctx, session_t * tls_session); +typedef int openssl_resume_handler (void *event, void *session); +typedef int (*async_handlers) (void *event, void *session); tls_ctx_t *openssl_ctx_get_w_thread (u32 ctx_index, u8 thread_index); -int vpp_tls_async_init_event (tls_ctx_t * ctx, - openssl_resume_handler * handler, - session_t * session); -int vpp_tls_async_update_event (tls_ctx_t * ctx, int eagain); +int vpp_tls_async_init_event (tls_ctx_t *ctx, openssl_resume_handler *handler, + transport_send_params_t *sp, session_t *session, + ssl_async_evt_type_t evt_type, int wr_size); int tls_async_openssl_callback (SSL * s, void *evt); int openssl_evt_free (int event_idx, u8 thread_index); void openssl_polling_start (ENGINE * engine); @@ -80,7 +108,13 @@ void openssl_async_node_enable_disable (u8 is_en); clib_error_t *tls_openssl_api_init (vlib_main_t * vm); int tls_openssl_set_ciphers (char *ciphers); int vpp_openssl_is_inflight (tls_ctx_t * ctx); +int openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl); +void openssl_handle_handshake_failure (tls_ctx_t *ctx); +void openssl_confirm_app_close (tls_ctx_t *ctx); +int tls_async_write_event_handler (void *event, void *session); +int tls_async_read_event_handler (void *event, void *session); +int tls_async_handshake_event_handler (void *event, void *session); #endif /* SRC_PLUGINS_TLSOPENSSL_TLS_OPENSSL_H_ */ /* diff --git a/src/svm/fifo_segment.c b/src/svm/fifo_segment.c index d5f6291308..d86f012ba1 100644 --- a/src/svm/fifo_segment.c +++ b/src/svm/fifo_segment.c @@ -917,6 +917,44 @@ fifo_segment_duplicate_fifo (fifo_segment_t *fs, svm_fifo_t *f) return nf; } +/** + * Shrink fifo allocated in fifo segment + */ +void +fifo_segment_shrink_fifo (fifo_segment_t *fs, svm_fifo_t *f) +{ + fifo_segment_header_t *fsh = fs->h; + fifo_segment_slice_t *fss; + svm_fifo_shared_t *sf; + svm_fifo_chunk_t *c; + + ASSERT (f->refcnt > 0); + + /* Do not try to shrink buffers if the FIFO contains data */ + if (!svm_fifo_is_empty (f)) + return; + + sf = f->shr; + fss = fsh_slice_get (fsh, sf->slice_index); + + /* Free fifo chunks */ + fsh_slice_collect_chunks (fsh, fss, fs_chunk_ptr (fsh, f->shr->start_chunk)); + + sf->start_chunk = sf->end_chunk = 0; + sf->head_chunk = sf->tail_chunk = 0; + + c = fsh_try_alloc_chunk (fsh, fss, 0); + if (!c) + return; + + sf->head_chunk = sf->start_chunk = fs_chunk_sptr (fsh, c); + while (c->next) + c = fs_chunk_ptr (fsh, c->next); + sf->tail_chunk = sf->end_chunk = fs_chunk_sptr (fsh, c); + + svm_fifo_set_size (f, 0); +} + /** * Free fifo allocated in fifo segment */ diff --git a/src/svm/fifo_segment.h b/src/svm/fifo_segment.h index ec18420726..781c5bb1b5 100644 --- a/src/svm/fifo_segment.h +++ b/src/svm/fifo_segment.h @@ -142,6 +142,14 @@ svm_fifo_t *fifo_segment_alloc_fifo_w_offset (fifo_segment_t *fs, uword offset); svm_fifo_t *fifo_segment_duplicate_fifo (fifo_segment_t *fs, svm_fifo_t *f); +/** + * Shrink fifo allocated in fifo segment + * + * @param fs fifo segment for fifo + * @param f fifo to be shrinked + */ +void fifo_segment_shrink_fifo (fifo_segment_t *fs, svm_fifo_t *f); + /** * Free fifo allocated in fifo segment * diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index 7ea114f870..14c395f63a 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -811,6 +811,8 @@ svm_fifo_clear_deq_ntf (svm_fifo_t * f) clib_atomic_store_rel_n (&f->shr->has_deq_ntf, 1); if (want_deq_ntf & SVM_FIFO_WANT_DEQ_NOTIF) svm_fifo_del_want_deq_ntf (f, SVM_FIFO_WANT_DEQ_NOTIF); + if (want_deq_ntf & SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY) + svm_fifo_del_want_deq_ntf (f, SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); } /** diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index 64155ea3a4..be9e5ed5ca 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -105,6 +105,8 @@ typedef struct ldp_worker_ctx_ } ldp_worker_ctx_t; +__thread ldp_worker_ctx_t _ldp_worker = {}; + /* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure * they are the same size */ STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask), @@ -114,7 +116,6 @@ STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask), typedef struct { - ldp_worker_ctx_t *workers; int init; char app_name[LDP_APP_NAME_MAX]; u32 vlsh_bit_val; @@ -154,7 +155,7 @@ static ldp_main_t *ldp = &ldp_main; static inline ldp_worker_ctx_t * ldp_worker_get_current (void) { - return (ldp->workers + vppcom_worker_index ()); + return &_ldp_worker; } /* @@ -190,14 +191,6 @@ ldp_fd_to_vlsh (int fd) return (fd - ldp->vlsh_bit_val); } -static void -ldp_alloc_workers (void) -{ - if (ldp->workers) - return; - ldp->workers = vec_new (ldp_worker_ctx_t, LDP_MAX_NWORKERS); -} - static void ldp_init_cfg (void) { @@ -285,7 +278,6 @@ ldp_init_cfg (void) static int ldp_init (void) { - ldp_worker_ctx_t *ldpw; int rv; if (ldp->init) @@ -311,10 +303,6 @@ ldp_init (void) return rv; } ldp->vcl_needs_real_epoll = 0; - ldp_alloc_workers (); - - vec_foreach (ldpw, ldp->workers) - clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time)); LDBG (0, "LDP initialization: done!"); @@ -342,13 +330,11 @@ close (int fd) epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (epfd > 0) { - ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); u32 size = sizeof (epfd); LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd); libc_close (epfd); - ldpw->mq_epfd_added = 0; epfd = 0; (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size); @@ -2390,14 +2376,9 @@ epoll_create1 (int flags) if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ()) { - /* Make sure workers have been allocated */ - if (!ldp->workers) - { - ldp_alloc_workers (); - ldpw = ldp_worker_get_current (); - } rv = libc_epoll_create1 (flags); ldp->vcl_needs_real_epoll = 0; + /* Assume this is a request to create the mq epfd */ ldpw->vcl_mq_epfd = rv; LDBG (0, "created vcl epfd %u", rv); return rv; @@ -2658,6 +2639,7 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, if (PREDICT_FALSE (!ldpw->mq_epfd_added)) { struct epoll_event e = { 0 }; + ldpw->vcl_mq_epfd = vppcom_mq_epoll_fd (); e.events = EPOLLIN; e.data.fd = ldpw->vcl_mq_epfd; if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) < diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c index 412db8def3..11283099ef 100644 --- a/src/vcl/vcl_locked.c +++ b/src/vcl/vcl_locked.c @@ -374,7 +374,13 @@ vls_worker_get_current (void) return pool_elt_at_index (vlsm->workers, vls_get_worker_index ()); } -static void +static inline u8 +vls_n_workers (void) +{ + return pool_elts (vlsm->workers); +} + +static vls_worker_t * vls_worker_alloc (void) { vls_worker_t *wrk; @@ -385,6 +391,8 @@ vls_worker_alloc (void) wrk->vcl_wrk_index = vcl_get_worker_index (); vec_validate (wrk->pending_vcl_wrk_cleanup, 16); vec_reset_length (wrk->pending_vcl_wrk_cleanup); + + return wrk; } static void @@ -1300,7 +1308,7 @@ vls_mp_checks (vcl_locked_session_t * vls, int is_add) vcl_session_t *s; u32 owner_wrk; - if (vls_mt_wrk_supported ()) + if (vls_mt_wrk_supported () && vls_n_workers () <= 1) return; ASSERT (wrk->wrk_index == vls->vcl_wrk_index); @@ -1775,6 +1783,7 @@ static void vls_app_fork_child_handler (void) { vcl_worker_t *parent_wrk; + vls_worker_t *vls_wrk; int parent_wrk_index; parent_wrk_index = vcl_get_worker_index (); @@ -1798,11 +1807,11 @@ vls_app_fork_child_handler (void) /* * Allocate/initialize vls worker and share sessions */ - vls_worker_alloc (); + vls_wrk = vls_worker_alloc (); /* Reset number of threads and set wrk index */ vlsl->vls_mt_n_threads = 0; - vlsl->vls_wrk_index = vcl_get_worker_index (); + vlsl->vls_wrk_index = vls_wrk - vlsm->workers; vlsl->select_mp_check = 0; clib_rwlock_init (&vlsl->vls_pool_lock); vls_mt_locks_init (); diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index d9c3b30afe..5174fe844e 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -2830,7 +2830,7 @@ vppcom_epoll_create (void) static void vcl_epoll_ctl_add_unhandled_event (vcl_worker_t *wrk, vcl_session_t *s, - u8 is_epollet, session_evt_type_t evt) + u32 is_epollet, session_evt_type_t evt) { if (!is_epollet) { diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 82fe641278..b5200ba502 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -473,6 +473,10 @@ vlib_buffer_alloc_size (uword ext_hdr_size, uword data_size) uword alloc_size = ext_hdr_size + sizeof (vlib_buffer_t) + data_size; alloc_size = round_pow2 (alloc_size, VLIB_BUFFER_ALIGN); + /* in case when we have even number of 'cachelines', we add one more for + * better cache occupancy */ + alloc_size |= VLIB_BUFFER_ALIGN; + return alloc_size; } diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 7e6ac25f10..751188f976 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -1057,6 +1057,13 @@ vlib_worker_thread_node_refork (void) VLIB_NODE_RUNTIME_DATA_SIZE); } + for (j = vec_len (old_rt); + j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); j++) + { + rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j]; + nm_clone->input_node_counts_by_state[rt->state] += 1; + } + vec_free (old_rt); /* re-clone pre-input nodes */ diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 3225540165..129af6dbf9 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -1410,6 +1410,36 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES teib/teib.api) +############################################################################## +# TM +############################################################################## + +list(APPEND VNET_SOURCES + tm/tm.c + tm/tm_api.c +) + +list(APPEND VNET_HEADERS + tm/tm.h +) + +list(APPEND VNET_API_FILES tm/tm.api) + +############################################################################## +# PFC +############################################################################## + +list(APPEND VNET_SOURCES + pfc/pfc.c + pfc/pfc_api.c +) + +list(APPEND VNET_HEADERS + pfc/pfc.h +) + +list(APPEND VNET_API_FILES pfc/pfc.api) + ############################################################################## # ARP/ND ############################################################################## @@ -1477,6 +1507,8 @@ add_vat_test_library(vnet session/session_test.c l2/l2_test.c ipsec/ipsec_test.c + tm/tm_test.c + pfc/pfc_test.c ) ############################################################################## diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 144f62ac17..e141e948cd 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -114,7 +114,8 @@ STATIC_ASSERT (((VNET_BUFFER_FLAGS_ALL_AVAIL & VLIB_BUFFER_FLAGS_ALL) == 0), _ (3, OUTER_IP_CKSUM, "offload-outer-ip-cksum", 1) \ _ (4, OUTER_UDP_CKSUM, "offload-outer-udp-cksum", 1) \ _ (5, TNL_VXLAN, "offload-vxlan-tunnel", 1) \ - _ (6, TNL_IPIP, "offload-ipip-tunnel", 1) + _ (6, TNL_IPIP, "offload-ipip-tunnel", 1) \ + _ (7, IPSEC_OFFLOAD, "offload-ipsec-outbound", 1) typedef enum { diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h index 877eb183ea..1f13f80e58 100644 --- a/src/vnet/crypto/crypto.h +++ b/src/vnet/crypto/crypto.h @@ -130,7 +130,16 @@ typedef enum _ (AES_256_CBC, SHA512, "aes-256-cbc-hmac-sha-512", 32, 32) \ _ (AES_128_CTR, SHA1, "aes-128-ctr-hmac-sha-1", 16, 12) \ _ (AES_192_CTR, SHA1, "aes-192-ctr-hmac-sha-1", 24, 12) \ - _ (AES_256_CTR, SHA1, "aes-256-ctr-hmac-sha-1", 32, 12) + _ (AES_256_CTR, SHA1, "aes-256-ctr-hmac-sha-1", 32, 12) \ + _ (AES_128_CTR, SHA256, "aes-128-ctr-hmac-sha-256", 16, 16) \ + _ (AES_192_CTR, SHA256, "aes-192-ctr-hmac-sha-256", 24, 16) \ + _ (AES_256_CTR, SHA256, "aes-256-ctr-hmac-sha-256", 32, 16) \ + _ (AES_128_CTR, SHA384, "aes-128-ctr-hmac-sha-384", 16, 24) \ + _ (AES_192_CTR, SHA384, "aes-192-ctr-hmac-sha-384", 24, 24) \ + _ (AES_256_CTR, SHA384, "aes-256-ctr-hmac-sha-384", 32, 24) \ + _ (AES_128_CTR, SHA512, "aes-128-ctr-hmac-sha-512", 16, 32) \ + _ (AES_192_CTR, SHA512, "aes-192-ctr-hmac-sha-512", 24, 32) \ + _ (AES_256_CTR, SHA512, "aes-256-ctr-hmac-sha-512", 32, 32) #define foreach_crypto_async_op_type \ _(ENCRYPT, "async-encrypt") \ diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c index 114b63d666..18bfb88ffe 100644 --- a/src/vnet/dev/api.c +++ b/src/vnet/dev/api.c @@ -205,7 +205,7 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, port->intf.num_rx_queues = args->num_rx_queues; } else - port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1); + port->intf.num_rx_queues = 1; if (args->num_tx_queues) { @@ -273,3 +273,43 @@ vnet_dev_api_remove_port_if (vlib_main_t *vm, return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove); } + +vnet_dev_rv_t +vnet_dev_api_port_set_rss_key (vlib_main_t *vm, + vnet_dev_api_port_set_rss_key_args_t *args) +{ + vnet_dev_port_t *port = 0; + vnet_dev_t *dev = vnet_dev_by_index (args->dev_index); + vnet_dev_rv_t rv = VNET_DEV_OK; + vnet_dev_port_cfg_change_req_t req = { + .type = VNET_DEV_PORT_CFG_SET_RSS_KEY, + .rss_key = args->rss_key, + }; + + if (!dev) + return VNET_DEV_ERR_UNKNOWN_DEVICE; + + log_debug (dev, "port %u rss_key %U", args->port_id, + format_hex_bytes_no_wrap, args->rss_key.key, + args->rss_key.length); + + port = vnet_dev_get_port_by_id (dev, args->port_id); + if (!port) + return VNET_DEV_ERR_UNKNOWN_DEVICE; + + rv = vnet_dev_port_cfg_change_req_validate (vm, port, &req); + if (rv != VNET_DEV_OK) + { + log_err (dev, "RSS key cannot be set"); + return rv; + } + + rv = vnet_dev_process_port_cfg_change_req (vm, port, &req); + if (rv != VNET_DEV_OK) + { + log_err (dev, "device failed to set RSS key"); + return rv; + } + + return rv; +} diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h index 1b7bf27d62..92752bfb12 100644 --- a/src/vnet/dev/api.h +++ b/src/vnet/dev/api.h @@ -65,4 +65,15 @@ vnet_dev_rv_t vnet_dev_api_remove_port_if (vlib_main_t *, vnet_dev_api_remove_port_if_args_t *); +typedef struct +{ + u32 dev_index; + vnet_dev_port_id_t port_id; + vnet_dev_rss_key_t rss_key; +} vnet_dev_api_port_set_rss_key_args_t; + +vnet_dev_rv_t +vnet_dev_api_port_set_rss_key (vlib_main_t *, + vnet_dev_api_port_set_rss_key_args_t *); + #endif /* _VNET_DEV_API_H_ */ diff --git a/src/vnet/dev/args.c b/src/vnet/dev/args.c index e302517cc6..07d1078761 100644 --- a/src/vnet/dev/args.c +++ b/src/vnet/dev/args.c @@ -82,7 +82,7 @@ vnet_dev_arg_parse (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_arg_t *args, else if (a->type == VNET_DEV_ARG_TYPE_UINT32) { u32 val, min = 0, max = CLIB_U32_MAX; - if (!unformat (&in, "%u", &val)) + if (!unformat (&in, "0x%x", &val) && !unformat (&in, "%u", &val)) { err = format (0, "unsigned integer in range %u - %u expected for " diff --git a/src/vnet/dev/args.h b/src/vnet/dev/args.h index 0c49d1fcfe..a256cfe8e0 100644 --- a/src/vnet/dev/args.h +++ b/src/vnet/dev/args.h @@ -24,7 +24,7 @@ typedef enum typedef union { u8 boolean; - u8 uint32; + u32 uint32; u8 *string; } vnet_dev_arg_value_t; diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c index 53be448318..aa7659e738 100644 --- a/src/vnet/dev/cli.c +++ b/src/vnet/dev/cli.c @@ -329,3 +329,70 @@ VLIB_CLI_COMMAND (show_device_counters_cmd, static) = { .function = show_device_counters_cmd_fn, .is_mp_safe = 1, }; + +static clib_error_t * +device_set_rss_key_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_port_set_rss_key_args_t a = {}; + vnet_dev_rv_t rv; + int device_id_set = 0; + int sw_if_index_set = 0; + vnet_dev_device_id_t device_id = {}; + uint32_t sw_if_index, n; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "port %u", &n)) + a.port_id = n; + else if (unformat (input, "dev %U", unformat_c_string_array, &device_id, + sizeof (device_id))) + device_id_set = 1; + else if (unformat (input, "key %U", unformat_vnet_dev_rss_key, + &a.rss_key)) + ; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + sw_if_index_set = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (sw_if_index_set == device_id_set) + return clib_error_return ( + 0, "please specify either interface name or port id"); + + if (sw_if_index_set) + { + vnet_dev_port_t *port = vnet_dev_get_port_from_sw_if_index (sw_if_index); + + if (port == 0) + return clib_error_return (0, "unsupported interface"); + a.port_id = port->port_id; + a.dev_index = port->dev->index; + } + else + { + vnet_dev_t *dev = vnet_dev_by_id (device_id); + if (!dev) + return clib_error_return (0, "please specify valid device id"); + a.dev_index = dev->index; + } + + rv = vnet_dev_api_port_set_rss_key (vm, &a); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to set_rss_key: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_set_rss_key_cmd, static) = { + .path = "device set-rss-key", + .short_help = "device set-rss-key [] [port ] [dev " + "] [key ]", + .function = device_set_rss_key_cmd_fn, + .is_mp_safe = 1, +}; diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c index 8883e727ac..9cb8e94683 100644 --- a/src/vnet/dev/config.c +++ b/src/vnet/dev/config.c @@ -15,34 +15,66 @@ VLIB_REGISTER_LOG_CLASS (dev_log, static) = { .subclass_name = "config", }; +typedef struct +{ + vnet_dev_api_create_port_if_args_t intf; + vnet_dev_api_port_set_rss_key_args_t rss_key_args; +} port_config_t; + static clib_error_t * vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input, - vnet_dev_api_create_port_if_args_t *args) + port_config_t *args) { clib_error_t *err = 0; - log_debug (0, "port %u %U", args->port_id, format_unformat_input, input); + log_debug (0, "port %u %U", args->intf.port_id, format_unformat_input, + input); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { u32 n; - if (unformat (input, "name %U", unformat_c_string_array, args->intf_name, - sizeof (args->intf_name))) + if (unformat (input, "name %U", unformat_c_string_array, + args->intf.intf_name, sizeof (args->intf.intf_name))) ; else if (unformat (input, "num-rx-queues %u", &n)) - args->num_rx_queues = n; + args->intf.num_rx_queues = n; else if (unformat (input, "num-tx-queues %u", &n)) - args->num_tx_queues = n; + args->intf.num_tx_queues = n; else if (unformat (input, "rx-queue-size %u", &n)) - args->rx_queue_size = n; + args->intf.rx_queue_size = n; else if (unformat (input, "tx-queue-size %u", &n)) - args->tx_queue_size = n; + args->intf.tx_queue_size = n; + else if (unformat (input, "rss-key %U", unformat_vnet_dev_rss_key, + &args->rss_key_args.rss_key)) + ; else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags, - &args->flags)) + &args->intf.flags)) ; else if (unformat (input, "args %U", unformat_single_quoted_string, - &args->args)) + &args->intf.args)) + ; + else + { + err = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + break; + } + } + return err; +} + +static clib_error_t * +vnet_dev_config_driver_args (vlib_main_t *vm, unformat_input_t *input, + char *driver_name) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + clib_error_t *err = 0; + u8 *args; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "args %U", unformat_single_quoted_string, &args)) ; else { @@ -51,6 +83,43 @@ vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input, break; } } + + if (err == 0) + { + vnet_dev_driver_t *driver; + vnet_dev_rv_t rv = VNET_DEV_OK; + + vec_foreach (driver, dm->drivers) + { + if (driver_name[0] && + strcmp (driver_name, driver->registration->name)) + continue; + if (driver->registration->drv_args) + { + for (vnet_dev_arg_t *a = driver->registration->drv_args; + a->type != VNET_DEV_ARG_END; a++) + vec_add1 (driver->args, *a); + + if (args) + { + rv = vnet_dev_arg_parse (vm, NULL, driver->args, args); + if (rv != VNET_DEV_OK) + goto done; + + if (driver->ops.config_args) + rv = driver->ops.config_args (vm, driver); + break; + } + } + } + done: + vec_free (args); + + if (rv != VNET_DEV_OK) + err = clib_error_return (0, "error: %U for driver '%s'", + format_vnet_dev_rv, rv, driver_name); + } + return err; } static clib_error_t * @@ -60,7 +129,7 @@ vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input, log_debug (0, "device %s %U", device_id, format_unformat_input, input); clib_error_t *err = 0; vnet_dev_api_attach_args_t args = {}; - vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args; + port_config_t *ports = 0, *p; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -79,10 +148,9 @@ vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input, else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input, &sub_input)) { - vnet_dev_api_create_port_if_args_t *if_args; - vec_add2 (if_args_vec, if_args, 1); - if_args->port_id = n; - err = vnet_dev_config_one_interface (vm, &sub_input, if_args); + vec_add2 (ports, p, 1); + p->intf.port_id = n; + err = vnet_dev_config_one_interface (vm, &sub_input, p); unformat_free (&sub_input); if (err) break; @@ -105,12 +173,21 @@ vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input, if (rv == VNET_DEV_OK) { - vec_foreach (if_args, if_args_vec) + vec_foreach (p, ports) { - if_args->dev_index = args.dev_index; - rv = vnet_dev_api_create_port_if (vm, if_args); + p->intf.dev_index = args.dev_index; + rv = vnet_dev_api_create_port_if (vm, &p->intf); if (rv != VNET_DEV_OK) break; + if (p->rss_key_args.rss_key.length) + { + vnet_dev_api_port_set_rss_key_args_t *rka = &p->rss_key_args; + rka->port_id = p->intf.port_id; + rka->dev_index = p->intf.dev_index; + rv = vnet_dev_api_port_set_rss_key (vm, rka); + if (rv != VNET_DEV_OK) + break; + } } } @@ -119,7 +196,7 @@ vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input, format_vnet_dev_rv, rv, device_id); } - vec_free (if_args_vec); + vec_free (ports); return err; } @@ -128,6 +205,7 @@ dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) { vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_driver_name_t driver_name; unformat_input_t input; clib_error_t *err = 0; @@ -156,6 +234,13 @@ dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt, err = vnet_dev_config_one_device (vm, &no_input, device_id); unformat_free (&no_input); } + else if (unformat (&input, "driver %U %U", unformat_c_string_array, + driver_name, sizeof (driver_name), + unformat_vlib_cli_sub_input, &sub_input)) + { + err = vnet_dev_config_driver_args (vm, &sub_input, driver_name); + unformat_free (&sub_input); + } else err = clib_error_return (0, "unknown input '%U'", format_unformat_error, &input); diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c index 0a1e0a7419..d02839d664 100644 --- a/src/vnet/dev/counters.c +++ b/src/vnet/dev/counters.c @@ -54,7 +54,7 @@ vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm) { for (int i = 0; i < cm->n_counters; i++) { - cm->counter_start[i] = cm->counter_data[i]; + cm->counter_start[i] += cm->counter_data[i]; cm->counter_data[i] = 0; } } diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c index 0e04e9ab64..e04fa161ce 100644 --- a/src/vnet/dev/dev.c +++ b/src/vnet/dev/dev.c @@ -399,6 +399,7 @@ vnet_dev_main_init (vlib_main_t *vm) .mac_addr_change_function = vnet_dev_port_mac_change, .mac_addr_add_del_function = vnet_dev_add_del_mac_address, .flow_ops_function = vnet_dev_flow_ops_fn, + .format_flow = format_vnet_dev_flow, .set_rss_queues_function = vnet_dev_interface_set_rss_queues, }; driver->dev_class_index = vnet_register_device_class (vm, dev_class); diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h index e7c6ca45fb..be6fa3ed40 100644 --- a/src/vnet/dev/dev.h +++ b/src/vnet/dev/dev.h @@ -80,6 +80,7 @@ typedef union typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t; typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t; +typedef struct vnet_dev_driver vnet_dev_driver_t; typedef struct vnet_dev vnet_dev_t; typedef struct vnet_dev_port vnet_dev_port_t; typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t; @@ -90,6 +91,7 @@ typedef struct vnet_dev_counter vnet_dev_counter_t; typedef struct vnet_dev_counter_main vnet_dev_counter_main_t; typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t; +typedef vnet_dev_rv_t (vnet_dev_drv_op_t) (vlib_main_t *, vnet_dev_driver_t *); typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *); typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *); typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) ( @@ -115,6 +117,7 @@ typedef struct vnet_dev_rx_queue_op_t *start; vnet_dev_rx_queue_op_no_rv_t *stop; vnet_dev_rx_queue_op_no_rv_t *free; + vnet_dev_rx_queue_op_no_rv_t *clear_counters; format_function_t *format_info; } vnet_dev_rx_queue_ops_t; @@ -124,6 +127,7 @@ typedef struct vnet_dev_tx_queue_op_t *start; vnet_dev_tx_queue_op_no_rv_t *stop; vnet_dev_tx_queue_op_no_rv_t *free; + vnet_dev_tx_queue_op_no_rv_t *clear_counters; format_function_t *format_info; } vnet_dev_tx_queue_ops_t; @@ -144,7 +148,12 @@ typedef struct _ (ADD_SECONDARY_HW_ADDR) \ _ (REMOVE_SECONDARY_HW_ADDR) \ _ (RXQ_INTR_MODE_ENABLE) \ - _ (RXQ_INTR_MODE_DISABLE) + _ (RXQ_INTR_MODE_DISABLE) \ + _ (ADD_RX_FLOW) \ + _ (DEL_RX_FLOW) \ + _ (GET_RX_FLOW_COUNTER) \ + _ (RESET_RX_FLOW_COUNTER) \ + _ (SET_RSS_KEY) typedef enum { @@ -164,8 +173,14 @@ typedef struct vnet_dev_port_cfg_change_req { u8 promisc : 1; vnet_dev_hw_addr_t addr; - u16 max_rx_frame_size; + u32 max_rx_frame_size; vnet_dev_queue_id_t queue_id; + vnet_dev_rss_key_t rss_key; + struct + { + u32 flow_index; + uword *private_data; + }; }; } vnet_dev_port_cfg_change_req_t; @@ -217,6 +232,7 @@ typedef struct typedef struct { + vnet_dev_drv_op_t *config_args; vnet_dev_op_t *alloc; vnet_dev_op_t *init; vnet_dev_op_no_rv_t *deinit; @@ -236,7 +252,9 @@ typedef struct vnet_dev_port_op_no_rv_t *stop; vnet_dev_port_op_no_rv_t *deinit; vnet_dev_port_op_no_rv_t *free; + vnet_dev_port_op_no_rv_t *clear_counters; format_function_t *format_status; + format_function_t *format_flow; } vnet_dev_port_ops_t; typedef union @@ -264,6 +282,7 @@ typedef struct vnet_dev_rx_queue u8 enabled : 1; u8 started : 1; u8 suspended : 1; + u8 tc : 4; vnet_dev_queue_id_t queue_id; u16 size; u16 next_index; @@ -346,6 +365,7 @@ typedef struct vnet_dev_port u16 txq_sz; u16 rxq_sz; } intf; + vnet_dev_rss_key_t rss_key; CLIB_CACHE_LINE_ALIGN_MARK (data0); u8 data[]; @@ -422,6 +442,7 @@ struct vnet_dev_driver_registration int priority; vnet_dev_ops_t ops; vnet_dev_arg_t *args; + vnet_dev_arg_t *drv_args; }; typedef struct @@ -431,7 +452,7 @@ typedef struct vnet_dev_bus_ops_t ops; } vnet_dev_bus_t; -typedef struct +typedef struct vnet_dev_driver { u32 index; void *dev_data; @@ -439,6 +460,7 @@ typedef struct u32 dev_class_index; vnet_dev_bus_index_t bus_index; vnet_dev_ops_t ops; + vnet_dev_arg_t *args; } vnet_dev_driver_t; typedef struct @@ -471,6 +493,7 @@ typedef struct vnet_dev_arg_t *args; u16 data_size; void *initial_data; + vnet_dev_rss_key_t default_rss_key; } port; vnet_dev_node_t *rx_node; @@ -535,6 +558,7 @@ void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t); /* error.c */ clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *, vnet_dev_rv_t, char *, ...); +int vnet_dev_flow_err (vlib_main_t *, vnet_dev_rv_t); /* handlers.c */ clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *, @@ -654,8 +678,10 @@ format_function_t format_vnet_dev_port_tx_offloads; format_function_t format_vnet_dev_rv; format_function_t format_vnet_dev_rx_queue_info; format_function_t format_vnet_dev_tx_queue_info; +format_function_t format_vnet_dev_flow; unformat_function_t unformat_vnet_dev_flags; unformat_function_t unformat_vnet_dev_port_flags; +unformat_function_t unformat_vnet_dev_rss_key; typedef struct { diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h index 521157abbe..d636f08d4a 100644 --- a/src/vnet/dev/dev_funcs.h +++ b/src/vnet/dev/dev_funcs.h @@ -74,6 +74,37 @@ vnet_dev_get_port_from_hw_if_index (u32 hw_if_index) return port; } +static_always_inline u32 +vnet_dev_get_rx_queue_if_sw_if_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->port->intf.sw_if_index; +} + +static_always_inline u32 +vnet_dev_get_rx_queue_if_hw_if_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->port->intf.hw_if_index; +} + +static_always_inline u32 +vnet_dev_get_port_rx_node_idex (vnet_dev_port_t *port) +{ + return port->intf.rx_node_index; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_sw_if_index (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return 0; + + return vnet_dev_get_port_from_hw_if_index (si->hw_if_index); +} + static_always_inline vnet_dev_t * vnet_dev_by_index (u32 index) { @@ -128,12 +159,6 @@ vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port) ASSERT (vm->thread_index == 0); } -static_always_inline u32 -vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port) -{ - return port->intf.sw_if_index; -} - static_always_inline vnet_dev_port_t * vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) { @@ -144,7 +169,7 @@ vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) } static_always_inline vnet_dev_rx_queue_t * -vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, +vnet_dev_get_port_rx_queue_by_id (vnet_dev_port_t *port, vnet_dev_queue_id_t queue_id) { foreach_vnet_dev_port_rx_queue (q, port) @@ -154,7 +179,7 @@ vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, } static_always_inline vnet_dev_tx_queue_t * -vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port, +vnet_dev_get_port_tx_queue_by_id (vnet_dev_port_t *port, vnet_dev_queue_id_t queue_id) { foreach_vnet_dev_port_tx_queue (q, port) @@ -199,6 +224,18 @@ vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq) __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE); } +static_always_inline vlib_buffer_template_t +vnet_dev_get_rx_queue_if_buffer_template (vnet_dev_rx_queue_t *rxq) +{ + return rxq->buffer_template; +} + +static_always_inline u16 +vnet_dev_get_rx_queue_if_next_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->next_index; +} + static_always_inline u8 vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq) { diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c index df9c6d364e..4e057010af 100644 --- a/src/vnet/dev/error.c +++ b/src/vnet/dev/error.c @@ -6,6 +6,7 @@ #include #include #include +#include clib_error_t * vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv, @@ -27,3 +28,27 @@ vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv, vec_free (s); return err; } + +int +vnet_dev_flow_err (vlib_main_t *vm, vnet_dev_rv_t rv) +{ + if (rv == VNET_DEV_OK) + return 0; + + switch (rv) + { + /* clang-format off */ +#define _(n, e, s) \ + case VNET_DEV_ERR_##e: \ + return VNET_FLOW_ERROR_##e; + foreach_flow_error; +#undef _ + /* clang-format on */ + default: + ASSERT (0); + } + + ASSERT (0); + + return 0; +} diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h index 47e72957da..430a6aef28 100644 --- a/src/vnet/dev/errors.h +++ b/src/vnet/dev/errors.h @@ -39,6 +39,8 @@ _ (UNKNOWN_INTERFACE, "unknown interface") \ _ (UNSUPPORTED_CONFIG, "unsupported config") \ _ (UNSUPPORTED_DEVICE, "unsupported device") \ - _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") + _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \ + _ (ALREADY_DONE, "already done") \ + _ (NO_SUCH_INTERFACE, "no such interface") #endif /* _VNET_DEV_ERRORS_H_ */ diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c index 848cd13a6c..ad739e4d8a 100644 --- a/src/vnet/dev/format.c +++ b/src/vnet/dev/format.c @@ -101,7 +101,7 @@ format_vnet_dev_port_info (u8 *s, va_list *args) u32 indent = format_get_indent (s); s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr, - &port->attr.hw_addr); + &port->primary_hw_addr); s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)", pool_elts (port->rx_queues), port->attr.max_rx_queues, pool_elts (port->tx_queues), port->attr.max_tx_queues); @@ -119,6 +119,10 @@ format_vnet_dev_port_info (u8 *s, va_list *args) s = format (s, " %U", format_vnet_dev_hw_addr, a); } } + if (port->rss_key.length) + s = format (s, "\n%URSS Key is %U", format_white_space, indent, + format_hex_bytes_no_wrap, port->rss_key.key, + port->rss_key.length); s = format (s, "\n%UMax RX frame size is %u (max supported %u)", format_white_space, indent, port->max_rx_frame_size, port->attr.max_supported_rx_frame_size); @@ -490,3 +494,40 @@ format_vnet_dev_port_tx_offloads (u8 *s, va_list *args) return s; } + +u8 * +format_vnet_dev_flow (u8 *s, va_list *args) +{ + u32 dev_instance = va_arg (*args, u32); + u32 flow_index = va_arg (*args, u32); + uword private_data = va_arg (*args, uword); + vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (dev_instance); + + if (port->port_ops.format_flow) + s = format (s, "%U", port->port_ops.format_flow, port, flow_index, + private_data); + + return s; +} + +uword +unformat_vnet_dev_rss_key (unformat_input_t *input, va_list *args) +{ + vnet_dev_rss_key_t *k = va_arg (*args, vnet_dev_rss_key_t *); + u8 *v; + u32 len; + + if (!(unformat_user (input, unformat_hex_string, &v))) + return 0; + + len = vec_len (v); + if (len > sizeof (k->key)) + { + vec_free (v); + return 0; + } + + clib_memcpy (k->key, v, len); + k->length = len; + return 1; +} diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c index fcaef14221..2a55affe3e 100644 --- a/src/vnet/dev/handlers.c +++ b/src/vnet/dev/handlers.c @@ -146,9 +146,47 @@ int vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op, u32 dev_instance, u32 flow_index, uword *private_data) { + vlib_main_t *vm = vlib_get_main (); vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance); - log_warn (p->dev, "unsupported request for flow_ops received"); - return VNET_FLOW_ERROR_NOT_SUPPORTED; + vnet_dev_port_cfg_change_req_t req; + vnet_dev_rv_t rv; + + switch (op) + { + case VNET_FLOW_DEV_OP_ADD_FLOW: + req.type = VNET_DEV_PORT_CFG_ADD_RX_FLOW; + break; + case VNET_FLOW_DEV_OP_DEL_FLOW: + req.type = VNET_DEV_PORT_CFG_DEL_RX_FLOW; + break; + case VNET_FLOW_DEV_OP_GET_COUNTER: + req.type = VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER; + break; + case VNET_FLOW_DEV_OP_RESET_COUNTER: + req.type = VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER; + break; + default: + log_warn (p->dev, "unsupported request for flow_ops received"); + return VNET_FLOW_ERROR_NOT_SUPPORTED; + } + + req.flow_index = flow_index; + req.private_data = private_data; + + rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); + if (rv != VNET_DEV_OK) + { + log_err (p->dev, "validation failed for flow_ops"); + return VNET_FLOW_ERROR_NOT_SUPPORTED; + } + + if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK) + { + log_err (p->dev, "request for flow_ops failed"); + return vnet_dev_flow_err (vm, rv); + } + + return 0; } clib_error_t * diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c index 8a6df54cbc..9cffac2277 100644 --- a/src/vnet/dev/port.c +++ b/src/vnet/dev/port.c @@ -267,6 +267,9 @@ vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id, port->rx_node = *args->rx_node; port->tx_node = *args->tx_node; + if (port->attr.caps.rss && args->port.default_rss_key.length) + port->rss_key = args->port.default_rss_key; + if (args->port.args) for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++) vec_add1 (port->args, *a); @@ -337,6 +340,11 @@ vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port, return VNET_DEV_ERR_NO_SUCH_ENTRY; break; + case VNET_DEV_PORT_CFG_SET_RSS_KEY: + if (!port->attr.caps.rss) + return VNET_DEV_ERR_NOT_SUPPORTED; + break; + default: break; } @@ -370,7 +378,7 @@ vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, { if (req->all_queues == 0) { - rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id); + rxq = vnet_dev_get_port_rx_queue_by_id (port, req->queue_id); if (rxq == 0) return VNET_DEV_ERR_BUG; } @@ -453,6 +461,10 @@ vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, } break; + case VNET_DEV_PORT_CFG_SET_RSS_KEY: + port->rss_key = req->rss_key; + break; + default: break; } @@ -516,6 +528,7 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) u16 n_threads = vlib_get_n_threads (); vnet_dev_main_t *dm = &vnet_dev_main; vnet_dev_t *dev = port->dev; + vnet_dev_tx_queue_t *txq, **qp; vnet_dev_port_t **pp; vnet_dev_rv_t rv; u16 ti = 0; @@ -553,13 +566,15 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) VNET_DEV_OK) goto error; - foreach_vnet_dev_port_tx_queue (q, port) + for (ti = 0; ti < n_threads; ti++) { - q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1); + qp = pool_elt_at_index (port->tx_queues, ti % port->intf.num_tx_queues); + txq = qp[0]; + txq->assigned_threads = clib_bitmap_set (txq->assigned_threads, ti, 1); log_debug (dev, "port %u tx queue %u assigned to thread %u", - port->port_id, q->queue_id, ti); - if (++ti >= n_threads) - break; + port->port_id, txq->queue_id, ti); + if (clib_bitmap_count_set_bits (txq->assigned_threads) > 1) + txq->lock_needed = 1; } /* pool of port pointers helps us to assign unique dev_instance */ @@ -733,16 +748,26 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) void vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) { - if (port->counter_main) + if (port->port_ops.clear_counters) + port->port_ops.clear_counters (vm, port); + else if (port->counter_main) vnet_dev_counters_clear (vm, port->counter_main); foreach_vnet_dev_port_rx_queue (q, port) - if (q->counter_main) - vnet_dev_counters_clear (vm, q->counter_main); + { + if (port->rx_queue_ops.clear_counters) + port->rx_queue_ops.clear_counters (vm, q); + else if (q->counter_main) + vnet_dev_counters_clear (vm, q->counter_main); + } foreach_vnet_dev_port_tx_queue (q, port) - if (q->counter_main) - vnet_dev_counters_clear (vm, q->counter_main); + { + if (port->tx_queue_ops.clear_counters) + port->tx_queue_ops.clear_counters (vm, q); + else if (q->counter_main) + vnet_dev_counters_clear (vm, q->counter_main); + } log_notice (port->dev, "counters cleared on port %u", port->port_id); } diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h index 006d18e5bc..c4301a135f 100644 --- a/src/vnet/dev/types.h +++ b/src/vnet/dev/types.h @@ -17,6 +17,11 @@ typedef struct vnet_dev vnet_dev_t; typedef struct vnet_dev_port vnet_dev_port_t; typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t; typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t; +typedef struct +{ + u8 key[48]; + u8 length; +} vnet_dev_rss_key_t; typedef enum { diff --git a/src/vnet/interface.h b/src/vnet/interface.h index f0cb540f97..596c3cb564 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -44,6 +44,8 @@ #include #include #include +#include +#include #include struct vnet_main_t; @@ -290,6 +292,8 @@ typedef struct _vnet_device_class /* Interface to set rss queues of the interface */ vnet_interface_rss_queues_set_t *set_rss_queues_function; + tm_system_t *tm_sys_impl; + pfc_system_t *pfc_sys_impl; } vnet_device_class_t; u32 vnet_register_device_class (vlib_main_t *, vnet_device_class_t *); diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c index c65f349e5b..7f0b8d90c1 100644 --- a/src/vnet/ip/reass/ip4_full_reass.c +++ b/src/vnet/ip/reass/ip4_full_reass.c @@ -1424,11 +1424,11 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = { }; VNET_FEATURE_INIT (ip4_full_reass_feature, static) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-full-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup", - "ipsec4-input-feature"), - .runs_after = 0, + .arc_name = "ip4-unicast", + .node_name = "ip4-full-reassembly-feature", + .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature", + "ip4-sv-reassembly-feature"), + .runs_after = 0, }; VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm, @@ -1453,15 +1453,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = { }, }; -VNET_FEATURE_INIT (ip4_full_reass_custom, static) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-full-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup", - "ipsec4-input-feature"), - .runs_after = 0, -}; - - #ifndef CLIB_MARCH_VARIANT uword ip4_full_reass_custom_register_next_node (uword node_index) diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index 8a1ac7e1eb..46f7fe6cd0 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -607,6 +607,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 current_sa_bytes = 0, spi = 0; u8 esp_align = 4, iv_sz = 0, icv_sz = 0; ipsec_sa_t *sa0 = 0; + u8 sa_drop_no_crypto = 0; vlib_buffer_t *lb; vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops; vnet_crypto_op_t **integ_ops = &ptd->integ_ops; @@ -692,16 +693,10 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, sa0 = ipsec_sa_get (sa_index0); current_sa_index = sa_index0; - if (PREDICT_FALSE ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE && - sa0->integ_alg == IPSEC_INTEG_ALG_NONE) && - !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0))) - { - err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION; - esp_encrypt_set_next_index (b[0], node, thread_index, err, - n_noop, noop_nexts, drop_next, - sa_index0); - goto trace; - } + sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE && + sa0->integ_alg == IPSEC_INTEG_ALG_NONE) && + !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0)); + vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index, current_sa_index); @@ -715,6 +710,14 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0); } + if (PREDICT_FALSE (sa_drop_no_crypto != 0)) + { + err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION; + esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop, + noop_nexts, drop_next, sa_index0); + goto trace; + } + if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index)) { /* this is the first packet to use this SA, claim the SA diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index 5aef630a33..0ce186046a 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -168,6 +168,8 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, flags |= IPSEC_SA_FLAG_UDP_ENCAP; else if (unformat (line_input, "async")) flags |= IPSEC_SA_FLAG_IS_ASYNC; + else if (unformat (line_input, "inline-reassembly")) + flags |= IPSEC_SA_FLAG_IS_INL_REASSEMBLY; else { error = clib_error_return (0, "parse error: '%U'", diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h index 4f73f1eab0..7e38925692 100644 --- a/src/vnet/ipsec/ipsec_sa.h +++ b/src/vnet/ipsec/ipsec_sa.h @@ -122,7 +122,8 @@ typedef struct ipsec_key_t_ _ (512, IS_ASYNC, "async") \ _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \ _ (2048, IS_NULL_GMAC, "null-gmac") \ - _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge") + _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge") \ + _ (8192, IS_INL_REASSEMBLY, "inline-reassembly") typedef enum ipsec_sad_flags_t_ { diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c index a419d8c4fe..1f8f914275 100644 --- a/src/vnet/ipsec/ipsec_tun_in.c +++ b/src/vnet/ipsec/ipsec_tun_in.c @@ -60,8 +60,8 @@ format_ipsec_tun_protect_input_trace (u8 * s, va_list * args) s = format (s, "IPSec: %U seq %u", format_ipsec6_tunnel_kv, &t->kv6, t->seq); else - s = format (s, "IPSec: %U seq %u sa %d", - format_ipsec4_tunnel_kv, &t->kv4, t->seq); + s = + format (s, "IPSec: %U seq %u", format_ipsec4_tunnel_kv, &t->kv4, t->seq); return s; } diff --git a/src/vnet/pfc/pfc.api b/src/vnet/pfc/pfc.api new file mode 100644 index 0000000000..1e43ca77cb --- /dev/null +++ b/src/vnet/pfc/pfc.api @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +option version = "0.1.0"; + +/** + * @brief Reply for configuring PFC. + * + * This structure specifies the parameters returned in response for configuring the PFC. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + */ +define pfc_sys_configure_reply +{ + u32 context; + i32 retval; +}; + +/** + * @brief Configure PFC on given Rx/Tx queues with given TC and time quanta. + * + * This structure outlines the necessary parameters to configure PFC for given TC and time quanta. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param mode - PFC mode to be configured. + * @param txq - Tx queue to be paused for given TC. + * @param tx_tc - Traffic class to be applied PFC frame processing. + * @param pause_time - Pause time to filled in PFC frames. + * @param rxq - Rx queue which will trigger PFC frames with given TC and pause time if congested. + * @param rx_tc - Traffic class to filled in PFC frames. + */ +define pfc_sys_configure +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 mode; + u32 txq; + u32 tx_tc; + u32 pause_time; + u32 rxq; + u32 rx_tc; +}; + +/** + * @brief Reply for getting the capabilities of a PFC system. + * + * This structure specifies the parameters returned in response for getting the capabilities of a PFC system. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param tc_max - Represents maximum number of traffic classes supported by H/W. + * @param mode - Represents supported mode of operation i.e. RX_PAUSE, TX_PAUSE or FULL. + */ +define pfc_sys_get_capabilities_reply +{ + u32 context; + i32 retval; + u32 tc_max; + u32 mode; +}; + +/** + * @brief Get the capabilities of a PFC system. + * + * This structure outlines the necessary parameters to get the capabilities of a PFC system. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + */ +define pfc_sys_get_capabilities + { + u32 client_index; + u32 context; + u32 sw_if_idx; + }; + +/** + * @brief Reply for disabling priority flow control. + * + * This structure specifies the parameters returned in response to disable PFC. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + */ +define pfc_sys_disable_pause_frame_flow_ctrl_reply +{ + u32 context; + i32 retval; +}; + +/** + * @brief Disable priority flow control. + * + * This structure outlines the necessary parameters to disable applied flow control mechanism. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param disable - Disables priority flow control. + */ +define pfc_sys_disable_pause_frame_flow_ctrl + { + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 disable; + }; diff --git a/src/vnet/pfc/pfc.c b/src/vnet/pfc/pfc.c new file mode 100644 index 0000000000..c5b12e9b54 --- /dev/null +++ b/src/vnet/pfc/pfc.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include + +pfc_system_t pfc_system_main; + +int +pfc_system_register (pfc_system_t *pfc_sys, u32 hw_if_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->pfc_sys_impl = pfc_sys; + + return 0; +} + +int +pfc_sys_configure (u32 hw_if_idx, pfc_params_t *params) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->pfc_sys_impl->pfc_configure (hw_if_idx, params); + + return 0; +} + +int +pfc_sys_get_capabilities (u32 hw_if_idx, pfc_capa_params_t *capa) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->pfc_sys_impl->pfc_get_capabilities (hw_if_idx, capa); + + return 0; +} + +int +pfc_sys_disable_pause_frame_flow_ctrl (u32 hw_if_idx, u32 disable) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->pfc_sys_impl->pfc_disable_pause_frame_flow_ctrl (hw_if_idx, + disable); + + return 0; +} diff --git a/src/vnet/pfc/pfc.h b/src/vnet/pfc/pfc.h new file mode 100644 index 0000000000..edb1371546 --- /dev/null +++ b/src/vnet/pfc/pfc.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _PFC_H_ +#define _PFC_H_ + +#include +#include +#include +#include +#include + +/** + * This enum indicates the flow control mode + */ +typedef enum +{ + /** Disable flow control. */ + PFC_ETH_FC_NONE = 0, + /** Rx pause frame, enable flowctrl on Tx side. */ + PFC_ETH_FC_RX_PAUSE, + /** Tx pause frame, enable flowctrl on Rx side. */ + PFC_ETH_FC_TX_PAUSE, + /**< Enable flow control on both side. */ + PFC_ETH_FC_FULL +} pfc_mode_t; + +/** + * PFC Capabilities params + */ +typedef struct pfc_capa_params_ +{ + /** Maximum supported traffic class as per PFC (802.1Qbb) specification. */ + uint8_t tc_max; + /** PFC mode capabilities. */ + pfc_mode_t mode; +} pfc_capa_params_t; + +/** + * A structure used to configure priority flow control on + * ethernet device for given Rx/Tx queues. + */ +typedef struct pfc_params_ +{ + /** Flow control mode */ + pfc_mode_t mode; + /** Structure shall be used to configure given tx_qid with corresponding tc. + * When device receives PFC frame with mentioned tc, traffic will be paused + * on tx_qid for that tc. + * Valid when (mode == PFC_ETH_FC_RX_PAUSE || mode == PFC_ETH_FC_FULL) + */ + struct + { + /** Tx queue ID */ + uint16_t txq; + /** Traffic class as per PFC (802.1Qbb) spec. The value must be + * in the range [0, max_tx_queues - 1] + */ + uint8_t tc; + } rx_pause; + /** + * Structure shall be used to configure pfc on given rx_qid. + * When rx_qid is congested, PFC frames are generated with tc + * and pause_time to the peer. + * Valid when (mode == PFC_ETH_FC_TX_PAUSE || mode == PFC_ETH_FC_FULL) + */ + struct + { + /** Pause quota in the Pause frame */ + uint16_t pause_time; + /** Rx queue ID */ + uint16_t rxq; + /** Traffic class as per PFC (802.1Qbb) spec. The value must be + * in the range [0, max_rx_queues - 1] + */ + uint8_t tc; + } tx_pause; +} pfc_params_t; + +typedef struct pfc_system_t_ +{ + u32 hw_if_idx; + int (*pfc_configure) (u32 hw_if_idx, pfc_params_t *params); + int (*pfc_get_capabilities) (u32 hw_if_idx, pfc_capa_params_t *capa_param); + int (*pfc_disable_pause_frame_flow_ctrl) (u32 hw_if_idx, u32 disable); +} pfc_system_t; + +/** + * @brief Configure priority flow control on given device. + * @param hw_if_idx - Hardware interface index. + * @param capa_param - Pointer to structure containing pfc parameters. + */ +int pfc_sys_configure (u32 hw_if_idx, pfc_params_t *params); + +/** + * @brief Read capabilities for a pfc system. + * @param hw_if_idx - Hardware interface index. + * @param capa_param - Pointer to structure where capabilities are to be + * filled. + */ +int pfc_sys_get_capabilities (u32 hw_if_idx, pfc_capa_params_t *capa_param); + +/** + * @brief Disable pause flow control. + * @param hw_if_idx - Hardware interface index. + * @param disable - Flag to toggle pause flow control. + */ +int pfc_sys_disable_pause_frame_flow_ctrl (u32 hw_if_idx, u32 disable); + +/** + * @brief Register the Priority Flow Control (PFC) system. + * + * @param pfc_sys - Pointer to the PFC system structure to be registered. + * @param hw_if_idx - Hardware interface index. + * + * @return 0 on success. + */ +int pfc_system_register (pfc_system_t *pfc_sys, u32 hw_if_idx); +#endif diff --git a/src/vnet/pfc/pfc_api.c b/src/vnet/pfc/pfc_api.c new file mode 100644 index 0000000000..248ff0ce99 --- /dev/null +++ b/src/vnet/pfc/pfc_api.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2025 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * Base message ID fot the plugin + */ +static u32 pfc_base_msg_id; +#define REPLY_MSG_ID_BASE pfc_base_msg_id + +#include + +void +vl_api_pfc_sys_configure_t_handler (vl_api_pfc_sys_configure_t *mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + vl_api_pfc_sys_configure_reply_t *rmp; + pfc_params_t params = { 0 }; + int rv = -1; + + params.mode = clib_net_to_host_u32 (mp->mode); + params.rx_pause.txq = clib_net_to_host_u32 (mp->txq); + params.rx_pause.tc = clib_net_to_host_u32 (mp->tx_tc); + params.tx_pause.pause_time = clib_net_to_host_u32 (mp->pause_time); + params.tx_pause.rxq = clib_net_to_host_u32 (mp->rxq); + params.tx_pause.tc = clib_net_to_host_u32 (mp->rx_tc); + + rv = pfc_sys_configure (sw->hw_if_index, ¶ms); + + REPLY_MACRO (VL_API_PFC_SYS_CONFIGURE_REPLY); +} + +void +vl_api_pfc_sys_get_capabilities_t_handler ( + vl_api_pfc_sys_get_capabilities_t *mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + vl_api_pfc_sys_get_capabilities_reply_t *rmp; + pfc_capa_params_t capa = { 0 }; + int rv = -1; + + rv = pfc_sys_get_capabilities (sw->hw_if_index, &capa); + + REPLY_MACRO2 (VL_API_PFC_SYS_GET_CAPABILITIES_REPLY, ({ + rmp->mode = clib_host_to_net_u32 (capa.mode); + rmp->tc_max = clib_host_to_net_u32 (capa.tc_max); + })); +} + +void +vl_api_pfc_sys_disable_pause_frame_flow_ctrl_t_handler ( + vl_api_pfc_sys_disable_pause_frame_flow_ctrl_t *mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + vl_api_pfc_sys_disable_pause_frame_flow_ctrl_reply_t *rmp; + int rv = -1; + + rv = pfc_sys_disable_pause_frame_flow_ctrl ( + sw->hw_if_index, clib_host_to_net_u32 (mp->disable)); + + REPLY_MACRO (VL_API_PFC_SYS_DISABLE_PAUSE_FRAME_FLOW_CTRL_REPLY); +} + +#include + +static clib_error_t * +pfc_api_init (vlib_main_t *vm) +{ + /* Ask for a correctly-sized block of API message decode slots */ + pfc_base_msg_id = setup_message_id_table (); + + return 0; +} + +VLIB_INIT_FUNCTION (pfc_api_init); diff --git a/src/vnet/pfc/pfc_test.c b/src/vnet/pfc/pfc_test.c new file mode 100644 index 0000000000..bef899dc33 --- /dev/null +++ b/src/vnet/pfc/pfc_test.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include + +#include + +#include + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} pfc_test_main_t; + +pfc_test_main_t pfc_test_main; + +#define __plugin_msg_base pfc_test_main.msg_id_base +#include +uword unformat_sw_if_index (unformat_input_t *input, va_list *args); + +/* Declare message IDs */ +#include +#include +#include + +static int +api_pfc_sys_configure (vat_main_t *vam) +{ + u32 pause_time = 0, rxq = 0, rx_tc = 0; + u32 mode = 0, txq = 0, tx_tc = 0; + unformat_input_t *i = vam->input; + vl_api_pfc_sys_configure_t *mp; + u32 msg_size = sizeof (*mp); + u8 sw_if_idx_set = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "mode %u", &mode)) + ; + else if (unformat (i, "txq %u", &txq)) + ; + else if (unformat (i, "tx_tc %u", &tx_tc)) + ; + else if (unformat (i, "pause_time %u", &pause_time)) + ; + else if (unformat (i, "rxq %u", &rxq)) + ; + else if (unformat (i, "rx_tc %u", &rx_tc)) + ; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (PFC_SYS_CONFIGURE, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->mode = clib_host_to_net_u32 (mode); + mp->txq = clib_host_to_net_u32 (txq); + mp->tx_tc = clib_host_to_net_u32 (tx_tc); + mp->pause_time = clib_host_to_net_u32 (pause_time); + mp->rxq = clib_host_to_net_u32 (rxq); + mp->rx_tc = clib_host_to_net_u32 (rx_tc); + + S (mp); + W (ret); + return ret; +} + +static int +api_pfc_sys_get_capabilities (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_pfc_sys_get_capabilities_t *mp; + u32 msg_size = sizeof (*mp); + u8 sw_if_idx_set = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (PFC_SYS_GET_CAPABILITIES, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + + S (mp); + W (ret); + return ret; +} + +static int +api_pfc_sys_disable_pause_frame_flow_ctrl (vat_main_t *vam) +{ + vl_api_pfc_sys_disable_pause_frame_flow_ctrl_t *mp; + unformat_input_t *i = vam->input; + u32 sw_if_idx = 0, disable = 0; + u32 msg_size = sizeof (*mp); + u8 sw_if_idx_set = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "disable %u", &disable)) + ; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (PFC_SYS_DISABLE_PAUSE_FRAME_FLOW_CTRL, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->disable = clib_host_to_net_u32 (disable); + + S (mp); + W (ret); + return ret; +} + +static void +vl_api_pfc_sys_configure_reply_t_handler (vl_api_pfc_sys_configure_reply_t *mp) +{ + vat_main_t *vam = pfc_test_main.vat_main; + if (mp->retval < 0) + clib_warning ("PFC configure failed: %d", mp->retval); + + vam->result_ready = 1; +} + +static void +vl_api_pfc_sys_get_capabilities_reply_t_handler ( + vl_api_pfc_sys_get_capabilities_reply_t *mp) +{ + vat_main_t *vam = pfc_test_main.vat_main; + if (mp->retval < 0) + clib_warning ("PFC capability get failed: %d", mp->retval); + + vam->result_ready = 1; +} + +static void +vl_api_pfc_sys_disable_pause_frame_flow_ctrl_reply_t_handler ( + vl_api_pfc_sys_disable_pause_frame_flow_ctrl_reply_t *mp) +{ + vat_main_t *vam = pfc_test_main.vat_main; + if (mp->retval < 0) + clib_warning ("Pause frame disable failed: %d", mp->retval); + + vam->result_ready = 1; +} + +#include diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 2c69138931..8d82293a07 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -21,6 +21,24 @@ static app_main_t app_main; +/* + * test Certificate and Private key can be overwritten when using + * test-srv-crt-path and test-srv-key-path tls config section. + * If not specified, RSA Certificate and Private key are in use. + */ +static vnet_app_add_cert_key_pair_args_t test_srv_key_pair = { + .cert = (u8 *) test_srv_crt_rsa, + .cert_len = sizeof (test_srv_crt_rsa), + .key = (u8 *) test_srv_key_rsa, + .key_len = sizeof (test_srv_key_rsa) +}; + +vnet_app_add_cert_key_pair_args_t * +vnet_app_tls_get_test_srv_key_pair (void) +{ + return &test_srv_key_pair; +} + #define app_interface_check_thread_and_barrier(_fn, _arg) \ if (PREDICT_FALSE (!vlib_thread_is_main_w_barrier ())) \ { \ diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index b41f7a4c43..d223021329 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -290,6 +290,7 @@ int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a); int vnet_app_del_cert_key_pair (u32 index); /** Ask for app cb on pair deletion */ int vnet_app_add_cert_key_interest (u32 index, u32 app_index); +vnet_app_add_cert_key_pair_args_t *vnet_app_tls_get_test_srv_key_pair (void); uword unformat_vnet_uri (unformat_input_t *input, va_list *args); diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 8c6cf8a93f..5bd1471b6f 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -1132,10 +1132,10 @@ ct_handle_cleanups (void *args) clib_fifo_sub2 (wrk->pending_cleanups, req); ct = ct_connection_get (req->ct_index, thread_index); s = session_get (ct->c_s_index, ct->c_thread_index); - if (!svm_fifo_has_event (s->tx_fifo)) - ct_session_postponed_cleanup (ct); - else + if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT)) clib_fifo_add1 (wrk->pending_cleanups, *req); + else + ct_session_postponed_cleanup (ct); n_to_handle -= 1; } @@ -1411,6 +1411,7 @@ ct_session_tx (session_t * s) peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index); if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return 0; + peer_s->flags |= SESSION_F_RX_EVT; return session_enqueue_notify (peer_s); } diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index d459b73650..e8a20c9fb4 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -853,6 +853,30 @@ segment_manager_alloc_session_fifos (segment_manager_t * sm, return 0; } +void +segment_manager_shrink_fifos (svm_fifo_t *rx_fifo, svm_fifo_t *tx_fifo) +{ + segment_manager_t *sm; + fifo_segment_t *fs; + u32 segment_index; + + if (!rx_fifo || !tx_fifo) + return; + + /* + * It's possible to have no segment manager if the session was removed + * as result of a detach. + */ + if (!(sm = segment_manager_get_if_valid (rx_fifo->segment_manager))) + return; + + segment_index = rx_fifo->segment_index; + fs = segment_manager_get_segment_w_lock (sm, segment_index); + fifo_segment_shrink_fifo (fs, rx_fifo); + fifo_segment_shrink_fifo (fs, tx_fifo); + segment_manager_segment_reader_unlock (sm); +} + void segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo) { diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h index 1e99c4605a..947fbbe226 100644 --- a/src/vnet/session/segment_manager.h +++ b/src/vnet/session/segment_manager.h @@ -148,6 +148,7 @@ int segment_manager_try_alloc_fifos (fifo_segment_t * fs, u32 rx_fifo_size, u32 tx_fifo_size, svm_fifo_t ** rx_fifo, svm_fifo_t ** tx_fifo); +void segment_manager_shrink_fifos (svm_fifo_t *rx_fifo, svm_fifo_t *tx_fifo); void segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo); void segment_manager_detach_fifo (segment_manager_t *sm, svm_fifo_t **f); diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 79681829e5..46aa1844b6 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -284,6 +284,12 @@ session_program_cleanup (session_t *s) session_cleanup_notify (s, SESSION_CLEANUP_SESSION); } +void +session_shrink_fifos (session_t *s) +{ + segment_manager_shrink_fifos (s->rx_fifo, s->tx_fifo); +} + /** * Cleans up session and lookup table. * diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 78158d5f3e..2d1e6cf7f9 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -368,6 +368,7 @@ session_t *session_alloc (u32 thread_index); void session_free (session_t * s); void session_cleanup (session_t *s); void session_program_cleanup (session_t *s); +void session_shrink_fifos (session_t *s); void session_cleanup_half_open (session_handle_t ho_handle); u8 session_is_valid (u32 si, u8 thread_index); diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c index 5594fed44f..6a209dc7af 100644 --- a/src/vnet/srv6/sr_api.c +++ b/src/vnet/srv6/sr_api.c @@ -163,7 +163,7 @@ vl_api_sr_policy_add_v2_t_handler (vl_api_sr_policy_add_v2_t *mp) mp->type, ntohl (mp->fib_table), mp->is_encap, 0, NULL); vec_free (segments); - REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY); + REPLY_MACRO (VL_API_SR_POLICY_ADD_V2_REPLY); } static void diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index c069a9b08d..e08437efbd 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2252,6 +2252,11 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time); session_transport_closed_notify (&tc->connection); + /* Shrink FIFOs */ + session_t *s = session_get_if_valid (tc->connection.s_index, + tc->connection.thread_index); + if (s) + session_shrink_fifos (s); goto drop; break; @@ -2389,6 +2394,11 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_cfg.timewait_time); tcp_program_ack (tc); session_transport_closed_notify (&tc->connection); + /* Shrink FIFOs */ + session_t *s = session_get_if_valid (tc->connection.s_index, + tc->connection.thread_index); + if (s) + session_shrink_fifos (s); break; case TCP_STATE_TIME_WAIT: /* Remain in the TIME-WAIT state. Restart the time-wait diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index a27d731aca..b52413cd30 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -15,6 +15,7 @@ #include #include +#include #include static tls_main_t tls_main; @@ -594,12 +595,13 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index, u32 ctx_handle; ho_ctx = tls_ctx_half_open_get (ho_ctx_index); - ho_ctx->flags |= TLS_CONN_F_HO_DONE; ctx_handle = tls_ctx_alloc (ho_ctx->tls_ctx_engine); ctx = tls_ctx_get (ctx_handle); clib_memcpy_fast (ctx, ho_ctx, sizeof (*ctx)); + /* Half-open freed on tcp half-open cleanup notification */ + __atomic_fetch_or (&ho_ctx->flags, TLS_CONN_F_HO_DONE, __ATOMIC_RELEASE); ctx->c_thread_index = vlib_get_thread_index (); ctx->tls_ctx_handle = ctx_handle; @@ -693,8 +695,12 @@ tls_app_session_cleanup (session_t * s, session_cleanup_ntf_t ntf) } ctx = tls_ctx_get (s->opaque); + TLS_DBG (1, "SESSION_CLEANUP_CTX: %p", ctx); if (!ctx->no_app_session) - session_transport_delete_notify (&ctx->connection); + { + session_transport_delete_notify (&ctx->connection); + TLS_DBG (1, "NO_APP_SESSION_CLEANUP_CTX: %p", ctx); + } tls_ctx_free (ctx); } @@ -1387,6 +1393,27 @@ tls_register_engine (const tls_engine_vft_t * vft, crypto_engine_type_t type) tls_vfts[type] = *vft; } +static clib_error_t * +tls_get_test_file_len_content (char *file_name, u32 *len, u8 **content) +{ + clib_error_t *error; + uword n_bytes; + u8 *rv = 0; + + error = clib_file_n_bytes (file_name, &n_bytes); + if (error) + return error; + + error = clib_file_contents (file_name, &rv); + if (error) + return error; + + *len = n_bytes; + *content = rv; + + return error; +} + static clib_error_t * tls_init (vlib_main_t * vm) { @@ -1402,6 +1429,10 @@ tls_init (vlib_main_t * vm) vec_validate (tm->rx_bufs, num_threads - 1); vec_validate (tm->tx_bufs, num_threads - 1); + /* + * first_seg_size default value 32MB + * add_seg_size default value 256 MB + */ tm->first_seg_size = 32 << 20; tm->add_seg_size = 256 << 20; @@ -1445,6 +1476,38 @@ tls_config_fn (vlib_main_t * vm, unformat_input_t * input) } tm->fifo_size = tmp; } + else if (unformat (input, "engine-path %s", &tm->engine_path)) + ; + else if (unformat (input, "test-srv-crt-path %s", + &tm->test_srv_crt_path)) + { + clib_error_t *err; + vnet_app_add_cert_key_pair_args_t *ck_pair = + vnet_app_tls_get_test_srv_key_pair (); + + err = tls_get_test_file_len_content ( + tm->test_srv_crt_path, &ck_pair->cert_len, &ck_pair->cert); + if (err) + { + vec_free (tm->test_srv_crt_path); + return err; + } + } + else if (unformat (input, "test-srv-key-path %s", + &tm->test_srv_key_path)) + { + clib_error_t *err; + vnet_app_add_cert_key_pair_args_t *ck_pair = + vnet_app_tls_get_test_srv_key_pair (); + + err = tls_get_test_file_len_content ( + tm->test_srv_key_path, &ck_pair->key_len, &ck_pair->key); + if (err) + { + vec_free (tm->test_srv_key_path); + return err; + } + } else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h index 60f96ee5f4..3d3e12fbca 100644 --- a/src/vnet/tls/tls.h +++ b/src/vnet/tls/tls.h @@ -36,6 +36,19 @@ #define TLS_DBG(_lvl, _fmt, _args...) #endif +#define foreach_ssl_async_evt_type \ + _ (INIT, "SSL_in_init async event") \ + _ (RD, "Read async event") \ + _ (WR, "Write async event") \ + _ (MAX, "Maximum async event") + +typedef enum ssl_async_evt_type_ +{ +#define _(sym, str) SSL_ASYNC_EVT_##sym, + foreach_ssl_async_evt_type +#undef _ +} ssl_async_evt_type_t; + /* *INDENT-OFF* */ typedef struct tls_cxt_id_ { @@ -57,7 +70,10 @@ typedef struct tls_cxt_id_ STATIC_ASSERT (sizeof (tls_ctx_id_t) <= TRANSPORT_CONN_ID_LEN, "ctx id must be less than TRANSPORT_CONN_ID_LEN"); -#define foreach_tls_conn_flags _ (HO_DONE, "ho done") +#define foreach_tls_conn_flags \ + _ (HO_DONE, "ho-done") \ + _ (ASYNC_WR, "async-write") \ + _ (ASYNC_RD, "async-read") typedef enum tls_conn_flags_bit_ { @@ -100,9 +116,9 @@ typedef struct tls_ctx_ u8 is_migrated; tls_conn_flags_t flags; u8 *srv_hostname; - u32 evt_index; u32 ckpair_index; transport_proto_t tls_type; + bool in_async_read; } tls_ctx_t; typedef struct tls_main_ @@ -123,6 +139,9 @@ typedef struct tls_main_ u64 first_seg_size; u64 add_seg_size; u32 fifo_size; + char *engine_path; + char *test_srv_key_path; + char *test_srv_crt_path; } tls_main_t; typedef struct tls_engine_vft_ diff --git a/src/vnet/tm/tm.api b/src/vnet/tm/tm.api new file mode 100644 index 0000000000..d3e8e81c07 --- /dev/null +++ b/src/vnet/tm/tm.api @@ -0,0 +1,455 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +option version = "0.1.0"; + +/** + * @brief Reply for adding a traffic management node. + * + * This structure specifies the parameters returned in response to add a new TM node. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param node_id - Identifier of the newly created TM node. + */ +define tm_sys_node_add_reply +{ + u32 context; + i32 retval; + u32 node_id; +}; + +/** + * @brief Add a new traffic management node. + * + * This structure outlines the necessary parameters for adding a new TM node within the VPP system. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param parent_node_id - Identifier of the parent node. + * @param node_id - Identifier for the new TM node. + * @param weight - Weight assigned to the new node. + * @param shaper_id - Identifier of the shaper profile to be applied. + * @param lvl - Level of the new node in the hierarchy. + * @param priority - Priority level of the new node. + */ +define tm_sys_node_add +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + i32 parent_node_id; + u32 node_id; + u32 weight; + i32 shaper_id; + u32 lvl; + u32 priority; +}; + +/** + * @brief Reply for Suspending a traffic management node. + * + * This structure specifies the parameters returned in response for suspending a TM node. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param node_id - Index of the TM node to be suspended. + */ +define tm_sys_node_suspend_reply +{ + u32 context; + i32 retval; + u32 node_id; +}; + +/** + * @brief Suspend an existing traffic management node. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param hw_if_idx - Hardware interface index + * @param node_idx - Index of the TM node to be suspended. + */ +define tm_sys_node_suspend +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 tm_node_id; +}; + +/** + * @brief Reply for resuming a suspended traffic management node. + * + * This structure specifies the parameters returned in response for resuming a TM node. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param node_id - Index of the TM node to be resumed. + */ +define tm_sys_node_resume_reply +{ + u32 context; + i32 retval; + u32 node_id; +}; + +/** + * @brief Resume a suspended traffic management node. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param hw_if_idx - Hardware interface index + * @param node_idx - Index of the TM node to be resumed. + */ +define tm_sys_node_resume +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 tm_node_id; +}; + +/** + * @brief Reply for deleting a traffic management node. + * + * This structure specifies the parameters returned in response for deleting a TM node. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param node_id - Index of the TM node to be deleted. + */ +define tm_sys_node_delete_reply +{ + u32 context; + i32 retval; + u32 node_id; +}; + +/** + * @brief Delete a traffic management node. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param hw_if_idx - Hardware interface index + * @param node_idx - Index of the TM node to be deleted. + */ +define tm_sys_node_delete +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 tm_node_id; +}; + +/** + * @brief Reply for creating a shaper profile. + * + * This structure specifies the parameters returned in response to creating a new shaper profile. + * + * @param context - Sender context, to match reply with request. + * @param shaper_id - Identifier of the newly created shaper profile. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_shaper_profile_create_reply +{ + u32 context; + u32 shaper_id; + i32 retval; +}; + +/** + * @brief Create a new shaper profile. + * + * This structure outlines the necessary parameters to create a new shaper profile. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param tm_shaper_id - Identifier for the new shaper profile. + * @param is_pkt_mode - packet_mode or byte_mode + * @param shaper_commit_rate - Committed rate for the shaper. + * @param shaper_commit_burst - Committed burst size for the shaper. + * @param shaper_peak_rate - Peak rate for the shaper. + * @param shaper_peak_burst - Peak burst size for the shaper. + * @param shaper_len_adjust - Length adjustment for the shaper. + */ +define tm_sys_shaper_profile_create +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 tm_shaper_id; + u8 is_pkt_mode; + u64 shaper_commit_rate; + u64 shaper_commit_burst; + u64 shaper_peak_rate; + u64 shaper_peak_burst; + i32 shaper_len_adjust; +}; + +/** + * @brief Reply for updating a node's shaper profile. + * + * This structure specifies the parameters returned in response to updating a shaper profile for a TM node. + * + * @param context - Sender context, to match reply with request. + * @param shaper_id - Identifier of the updated shaper profile. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_node_shaper_update_reply +{ + u32 context; + u32 shaper_id; + i32 retval; +}; + +/** + * @brief Update a node's shaper profile. + * + * This structure outlines the necessary parameters to update the shaper profile of a TM node. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param shaper_id - Identifier of the shaper profile to be applied. + * @param node_id - Identifier of the TM node to be updated. + */ +define tm_sys_node_shaper_update +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 shaper_id; + u32 node_id; +}; + +/** + * @brief Reply for deleting a shaper profile. + * + * This structure specifies the parameters returned in response to deleting a shaper profile. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param shaper_id - Identifier of the deleted shaper profile. + */ +define tm_sys_shaper_profile_delete_reply +{ + u32 context; + i32 retval; + u32 shaper_id; +}; + +/** + * @brief Delete a shaper profile. + * + * This structure outlines the necessary parameters to delete a shaper profile. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param shaper_id - Identifier of the shaper profile to be deleted. + */ +define tm_sys_shaper_profile_delete +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 shaper_id; +}; + +/** + * @brief Reply for updating the scheduling weight of a TM node. + * + * This structure specifies the parameters returned in response to updating the scheduling weight of a TM node. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + * @param node_id - Identifier of the TM node whose scheduling weight was updated. + */ +define tm_sys_node_sched_weight_update_reply +{ + u32 context; + i32 retval; + u32 node_id; +}; + +/** + * @brief Update the scheduling weight of a TM node. + * + * This structure outlines the necessary parameters to update the scheduling weight of a TM node. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param node_id - Identifier of the TM node to be updated. + * @param weight - New weight to be assigned to the TM node. + */ +define tm_sys_node_sched_weight_update +{ + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 node_id; + u32 weight; +}; + +/** + * @brief Reply for getting the capabilities of a TM system. + * + * This structure specifies the parameters returned in response to getting the capabilities of a TM system. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_get_capabilities_reply +{ + u32 context; + i32 retval; +}; + +/** + * @brief Get the capabilities of a TM system. + * + * This structure outlines the necessary parameters to get the capabilities of a TM system. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + */ +define tm_sys_get_capabilities + { + u32 client_index; + u32 context; + u32 sw_if_idx; + }; + +/** + * @brief Reply for getting the capabilities of a specific level in a TM system. + * + * This structure specifies the parameters returned in response to getting the capabilities of a specific level in a TM system. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_level_get_capabilities_reply +{ + u32 context; + i32 retval; +}; + +/** + * @brief Get the capabilities of a specific level in a TM system. + * + * This structure outlines the necessary parameters to get the capabilities of a specific level in a TM system. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param level - Level in the TM system whose capabilities are to be read. + */ +define tm_sys_level_get_capabilities + { + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 level; + }; + +/** + * @brief Reply for reading the statistics of a TM node. + * + * This structure specifies the parameters returned in response to reading the statistics of a TM node. + * + * @param context - Sender context, to match reply with request. + * @param node_id - Identifier of the TM node whose statistics are being read. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_node_read_stats_reply +{ + u32 context; + u32 node_id; + i32 retval; +}; + +/** + * @brief Read the statistics of a TM node. + * + * This structure outlines the necessary parameters to read the statistics of a TM node. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + * @param node_id - Identifier of the TM node whose statistics are to be read. + */ +define tm_sys_node_read_stats + { + u32 client_index; + u32 context; + u32 sw_if_idx; + u32 node_id; +}; + +/** + * @brief Reply for starting the traffic management (TM) system. + * + * This structure specifies the parameters returned in response to starting the TM system. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_start_tm_reply +{ + u32 context; + i32 retval; +}; + +/** + * @brief Start the traffic management (TM) system. + * + * This structure outlines the necessary parameters to start the TM system. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + */ +define tm_sys_start_tm +{ + u32 client_index; + u32 context; + u32 sw_if_idx; +}; + +/** + * @brief Reply for stoping the traffic management (TM) system. + * + * This structure specifies the parameters returned in response to stoping the TM system. + * + * @param context - Sender context, to match reply with request. + * @param retval - Return value indicating success or failure of the operation. + */ +define tm_sys_stop_tm_reply +{ + u32 context; + i32 retval; +}; + +/** + * @brief Stop the traffic management (TM) system. + * + * This structure outlines the necessary parameters to stop the TM system. + * + * @param client_index - Opaque cookie to identify the sender. + * @param context - Sender context, to match reply with request. + * @param sw_if_idx - Software interface index. + */ +define tm_sys_stop_tm +{ + u32 client_index; + u32 context; + u32 sw_if_idx; +}; diff --git a/src/vnet/tm/tm.c b/src/vnet/tm/tm.c new file mode 100644 index 0000000000..f1039d96d9 --- /dev/null +++ b/src/vnet/tm/tm.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include + +tm_system_t tm_system_main; + +int +tm_system_register (tm_system_t *tm_sys, u32 hw_if_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl = tm_sys; + + return 0; +} + +int +tm_sys_node_add (u32 hw_if_idx, u32 node_id, i32 parent_node_id, u32 priority, + u32 weight, u32 lvl, tm_node_params_t *params) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_add (hw_if_idx, node_id, parent_node_id, + priority, weight, lvl, params); + + return 0; +} + +int +tm_sys_node_suspend (u32 hw_if_idx, u32 node_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_suspend (hw_if_idx, node_id); + + return 0; +} + +int +tm_sys_node_resume (u32 hw_if_idx, u32 node_id) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_resume (hw_if_idx, node_id); + + return 0; +} + +int +tm_sys_node_delete (u32 hw_if_idx, u32 node_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_delete (hw_if_idx, node_idx); + + return 0; +} + +int +tm_sys_shaper_profile_create (u32 hw_if_idx, tm_shaper_params_t *param) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->shaper_profile_create (hw_if_idx, param); + + return 0; +} + +int +tm_sys_node_shaper_update (u32 hw_if_idx, u32 node_id, u32 shaper_id) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_shaper_update (hw_if_idx, node_id, shaper_id); + + return 0; +} + +int +tm_sys_shaper_profile_delete (u32 hw_if_idx, u32 shaper_id) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->shaper_profile_delete (hw_if_idx, shaper_id); + + return 0; +} + +int +tm_sys_node_sched_weight_update (u32 hw_if_idx, u32 node_id, u32 weight) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_sched_weight_update (hw_if_idx, node_id, + weight); + + return 0; +} + +int +tm_sys_node_read_stats (u32 hw_if_idx, u32 node_idx, tm_stats_params_t *param) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->node_read_stats (hw_if_idx, node_idx, param); + + return 0; +} + +int +tm_sys_get_capabilities (u32 hw_if_idx, tm_capa_params_t *param) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->tm_get_capabilities (hw_if_idx, param); + + return 0; +} + +int +tm_sys_level_get_capabilities (u32 hw_if_idx, tm_level_capa_params_t *param, + u32 lvl) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->tm_level_get_capabilities (hw_if_idx, param, lvl); + + return 0; +} +int +tm_sys_start_tm (u32 hw_if_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->start_tm (hw_if_idx); + + return 0; +} + +int +tm_sys_stop_tm (u32 hw_if_idx) +{ + vnet_main_t *vnm = vnet_get_main (); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_idx); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hi->dev_class_index); + + dev_class->tm_sys_impl->stop_tm (hw_if_idx); + + return 0; +} diff --git a/src/vnet/tm/tm.h b/src/vnet/tm/tm.h new file mode 100644 index 0000000000..57d6b7ee64 --- /dev/null +++ b/src/vnet/tm/tm.h @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _TM_H_ +#define _TM_H_ + +#include +#include +#include +#include +#include + +typedef struct tm_node_params_ +{ + /* Shaper profile for the node. */ + i32 shaper_profile_id; + + union + { + struct + { + /* The ingress queue buffer length */ + u32 ingress_q_len; + } leaf; + + struct + { + /** Number of SP priorities. */ + u32 num_sp_priorities; + /* Is scheduling done with pkt mode(1) or byte mode(0). defined per sp + * priority */ + u8 *sched_pkt_mode; + } nonleaf; + }; + + /** Level Identifier of the node in the tm hierarchy */ + u32 level; + + /** Store Node specific data */ + void *data; + + /** TM Node id */ + u32 id; +} tm_node_params_t; + +typedef struct tm_shaper_params_ +{ + struct + { + /** Committed Information Rate. */ + u64 rate; + /** Max burst size for Committed information rate*/ + u64 burst_size; + } commit; + + struct + { + /** Peak Information Rate. */ + u64 rate; + /** Max burst size for Peak information rate. */ + u64 burst_size; + } peak; + + /** Value to be added to the length of each packet for the + * purpose of shaping. */ + i32 pkt_len_adj; + + /** Byte mode of Packet mode */ + u8 pkt_mode; + + /** Shaper profile ID */ + u32 shaper_id; +} tm_shaper_params_t; + +typedef enum +{ + TM_BYTE_BASED_WEIGHTS, + TM_FRAME_BASED_WEIGHTS +} tm_sched_mode_t; + +/** + * TM Color + */ +enum tm_color +{ + TM_COLOR_GREEN = 0, /**< Green */ + TM_COLOR_YELLOW, /**< Yellow */ + TM_COLOR_RED, /**< Red */ + TM_COLORS /**< Number of colors */ +}; + +/** + * The tm_node_stats_type enumeration lists possible packet or octet + * statistics at a tm node. + */ +typedef enum tm_node_stats_type_t +{ + /** Packets dropped by this node after scheduling/shaping at this node */ + TM_NODE_STATS_PKTS_DROPPED, + /** Octets dropped after scheduling/shaping at this node */ + TM_NODE_STATS_OCTETS_DROPPED, + /** Green packets that are sent through this tm node */ + TM_NODE_STATS_GREEN_PKTS, + /** Green octets that are sent through this tm node */ + TM_NODE_STATS_GREEN_OCTETS, + /** Yellow packets that are sent through this tm node */ + TM_NODE_STATS_YELLOW_PKTS, + /** Yellow octets that are sent through this tm node */ + TM_NODE_STATS_YELLOW_OCTETS, + /** Red packets that are sent through this tm node */ + TM_NODE_STATS_RED_PKTS, + /** Red octets that are sent through this tm node */ + TM_NODE_STATS_RED_OCTETS, + /** Node stats max */ + TM_NODE_STATS_MAX, +} tm_node_stats_type_t; + +/** + * Node Capabilities Params + */ +typedef struct tm_capa_params_ +{ + /** Maximum number of nodes. */ + uint32_t n_nodes_max; + + /** Maximum number of levels (i.e. number of nodes connecting the root + * node with any leaf node, including the root and the leaf). + */ + uint32_t n_levels_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes + * (with the exception of the root node) have identical capability set. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes have + * identical capability set. + */ + int leaf_nodes_identical; + + /** Maximum number of shapers, either private or shared. In case the + * implementation does not share any resources between private and + * shared shapers, it is typically equal to the sum of + * *shaper_private_n_max* and *shaper_shared_n_max*. The + * value of zero indicates that traffic shaping is not supported. + */ + uint32_t shaper_n_max; + /** Maximum number of private shapers. Indicates the maximum number of + * nodes that can concurrently have their private shaper enabled. The + * value of zero indicates that private shapers are not supported. + */ + uint32_t shaper_private_n_max; + + /** Maximum number of private shapers that support dual rate shaping. + * Indicates the maximum number of nodes that can concurrently have + * their private shaper enabled with dual rate support. Only valid when + * private shapers are supported. The value of zero indicates that dual + * rate shaping is not available for private shapers. The maximum value + * is *shaper_private_n_max*. + */ + int shaper_private_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_min; + /** Maximum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_max; + + /** Shaper private packet mode supported. When non-zero, this parameter + * indicates that there is at least one node that can be configured + * with packet mode in its private shaper. When shaper is configured + * in packet mode, committed/peak rate provided is interpreted + * in packets per second. + */ + int shaper_private_packet_mode_supported; + + /** Shaper private byte mode supported. When non-zero, this parameter + * indicates that there is at least one node that can be configured + * with byte mode in its private shaper. When shaper is configured + * in byte mode, committed/peak rate provided is interpreted in + * bytes per second. + */ + int shaper_private_byte_mode_supported; + /** Minimum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_min; + + /** Maximum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_max; + + /** Maximum number of children nodes. This parameter indicates that + * there is at least one non-leaf node that can be configured with this + * many children nodes, which might not be true for all the non-leaf + * nodes. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This parameter + * indicates that there is at least one non-leaf node that can be + * configured with this many priority levels for managing its children + * nodes, which might not be true for all the non-leaf nodes. The value + * of zero is invalid. The value of 1 indicates that only priority 0 is + * supported, which essentially means that Strict Priority (SP) + * algorithm is not supported. + */ + uint32_t sched_sp_n_priorities_max; + /** Maximum number of sibling nodes that can have the same priority at + * any given time, i.e. maximum size of the WFQ sibling node group. This + * parameter indicates there is at least one non-leaf node that meets + * this condition, which might not be true for all the non-leaf nodes. + * The value of zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have more than one child + * node at any given time, i.e. maximum number of WFQ sibling node + * groups that have two or more members. This parameter indicates there + * is at least one non-leaf node that meets this condition, which might + * not be true for all the non-leaf nodes. The value of zero states that + * WFQ algorithm is not supported. The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels have at most one + * child node, so there can be only one priority level with two or + * more sibling nodes making up a WFQ group. The maximum value is: + * min(floor(*sched_n_children_max* / 2), *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that all sibling nodes + * with same priority have the same WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** WFQ packet mode supported. When non-zero, this parameter indicates + * that there is at least one non-leaf node that supports packet mode + * for WFQ among its children. WFQ weights will be applied against + * packet count for scheduling children when a non-leaf node + * is configured appropriately. + */ + int sched_wfq_packet_mode_supported; + + /** WFQ byte mode supported. When non-zero, this parameter indicates + * that there is at least one non-leaf node that supports byte mode + * for WFQ among its children. WFQ weights will be applied against + * bytes for scheduling children when a non-leaf node is configured + * appropriately. + */ + int sched_wfq_byte_mode_supported; + +} tm_capa_params_t; + +/** + * Traffic manager level capabilities + */ +typedef struct tm_level_capa_params_ +{ + /** Maximum number of nodes for the current hierarchy level. */ + uint32_t n_nodes_max; + + /** Maximum number of non-leaf nodes for the current hierarchy level. + * The value of 0 indicates that current level only supports leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_nonleaf_max; + + /** Maximum number of leaf nodes for the current hierarchy level. The + * value of 0 indicates that current level only supports non-leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_leaf_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes on + * this level have identical capability set. Valid only when + * *n_nodes_nonleaf_max* is non-zero. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes on this + * level have identical capability set. Valid only when + * *n_nodes_leaf_max* is non-zero. + */ + int leaf_nodes_identical; + union + { + /** Items valid only for the non-leaf nodes on this level. */ + struct + { + /** Private shaper support. When non-zero, it indicates + * there is at least one non-leaf node on this level + * with private shaper support, which may not be the + * case for all the non-leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the non-leaf + * nodes on the current level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level with dual rate private shaper support, which + * may not be the case for all the non-leaf nodes on + * this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes of this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes on this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_max; + + /** Shaper private packet mode supported. When non-zero, + * this parameter indicates there is at least one + * non-leaf node at this level that can be configured + * with packet mode in its private shaper. When private + * shaper is configured in packet mode, committed/peak + * rate provided is interpreted in packets per second. + */ + int shaper_private_packet_mode_supported; + + /** Shaper private byte mode supported. When non-zero, + * this parameter indicates there is at least one + * non-leaf node at this level that can be configured + * with byte mode in its private shaper. When private + * shaper is configured in byte mode, committed/peak + * rate provided is interpreted in bytes per second. + */ + int shaper_private_byte_mode_supported; + + /** Maximum number of children nodes. This parameter + * indicates that there is at least one non-leaf node on + * this level that can be configured with this many + * children nodes, which might not be true for all the + * non-leaf nodes on this level. + */ + uint32_t sched_n_children_max; + /** Maximum number of supported priority levels. This + * parameter indicates that there is at least one + * non-leaf node on this level that can be configured + * with this many priority levels for managing its + * children nodes, which might not be true for all the + * non-leaf nodes on this level. The value of zero is + * invalid. The value of 1 indicates that only priority + * 0 is supported, which essentially means that Strict + * Priority (SP) algorithm is not supported on this + * level. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size of + * the WFQ sibling node group. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which may not be true for + * all the non-leaf nodes on this level. The value of + * zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported on this level. The maximum + * value is *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that + * have two or more members. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which might not be true + * for all the non-leaf nodes. The value of zero states + * that WFQ algorithm is not supported on this level. + * The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels on + * this level have at most one child node, so there can + * be only one priority level with two or more sibling + * nodes making up a WFQ group on this level. The + * maximum value is: + * min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes on this level with same priority + * have the same WFQ weight, so on this level WFQ is + * reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** WFQ packet mode supported. When non-zero, this + * parameter indicates that there is at least one + * non-leaf node at this level that supports packet + * mode for WFQ among its children. WFQ weights will + * be applied against packet count for scheduling + * children when a non-leaf node is configured + * appropriately. + */ + int sched_wfq_packet_mode_supported; + + /** WFQ byte mode supported. When non-zero, this + * parameter indicates that there is at least one + * non-leaf node at this level that supports byte + * mode for WFQ among its children. WFQ weights will + * be applied against bytes for scheduling children + * when a non-leaf node is configured appropriately. + */ + int sched_wfq_byte_mode_supported; + + /** Mask of statistics counter types supported by the + * non-leaf nodes on this level. Every supported + * statistics counter type is supported by at least one + * non-leaf node on this level, which may not be true + * for all the non-leaf nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } nonleaf; + + /** Items valid only for the leaf nodes on this level. */ + struct + { + /** Private shaper support. When non-zero, it indicates + * there is at least one leaf node on this level with + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the leaf nodes + * on this level. When non-zero, it indicates there is + * at least one leaf node on this level with dual rate + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes of this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes on this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_max; + + /** Shaper private packet mode supported. When non-zero, + * this parameter indicates there is at least one leaf + * node at this level that can be configured with + * packet mode in its private shaper. When private + * shaper is configured in packet mode, committed/peak + * rate provided is interpreted in packets per second. + */ + int shaper_private_packet_mode_supported; + /** Shaper private byte mode supported. When non-zero, + * this parameter indicates there is at least one leaf + * node at this level that can be configured with + * byte mode in its private shaper. When private shaper + * is configured in byte mode, committed/peak rate + * provided is interpreted in bytes per second. + */ + int shaper_private_byte_mode_supported; + + } leaf; + }; +} tm_level_capa_params_t; + +/** + * Node statistics counters + */ +typedef struct tm_stats_params_ +{ + /** Number of packets scheduled from current node. */ + uint64_t n_pkts; + + /** Number of bytes scheduled from current node. */ + uint64_t n_bytes; + + /** Statistics counters for leaf nodes only. */ + struct + { + /** Number of packets dropped by current leaf node per each + * color. + */ + uint64_t n_pkts_dropped[TM_COLORS]; + + /** Number of bytes dropped by current leaf node per each + * color. + */ + uint64_t n_bytes_dropped[TM_COLORS]; + + /** Number of packets currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_pkts_queued; + /** Number of bytes currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_bytes_queued; + } leaf; +} tm_stats_params_t; + +typedef struct tm_system_t_ +{ + u32 hw_if_idx; + int (*node_add) (u32 hw_if_idx, u32 node_id, i32 parent_node_id, + u32 priority, u32 weight, u32 lvl, + tm_node_params_t *params); + + int (*node_suspend) (u32 hw_if_idx, u32 node_idx); + int (*node_resume) (u32 hw_if_idx, u32 node_idx); + int (*node_delete) (u32 hw_if_idx, u32 node_idx); + int (*shaper_profile_create) (u32 hw_if_idx, tm_shaper_params_t *param); + int (*shaper_profile_delete) (u32 hw_if_idx, u32 shaper_id); + int (*node_shaper_update) (u32 hw_if_idx, u32 node_id, + u32 shaper_profile_id); + int (*node_sched_weight_update) (u32 hw_if_idx, u32 node_id, u32 weight); + int (*node_read_stats) (u32 hw_if_idx, u32 node_idx, + tm_stats_params_t *param); + int (*tm_get_capabilities) (u32 hw_if_idx, tm_capa_params_t *capa_param); + int (*tm_level_get_capabilities) (u32 hw_if_idx, tm_level_capa_params_t *cap, + u32 lvl); + int (*start_tm) (u32 hw_if_idx); + int (*stop_tm) (u32 hw_if_idx); +} tm_system_t; + +/** + * @brief Add a new traffic management node and connect it to an + * existing parent node. + * + * @param hw_if_idx - Hardware interface index. + * @param node_id - Identifier for the new TM node to be created. + * @param parent_node_id - Identifier of the existing parent node. + * @param priority - Priority level of the new node. + * @param weight - Weight assigned to the new node. + * @param lvl - Level of the new node in the hierarchy. + * @param params - Pointer to the structure containing additional parameters + * for the TM node. + * + * @return 0 on success. + */ +int tm_sys_node_add (u32 hw_if_idx, u32 node_id, i32 parent_node_id, + u32 priority, u32 weight, u32 lvl, + tm_node_params_t *params); + +/** + * @brief Suspend an existing traffic management node. + * + * @param hw_if_idx - Hardware interface index + * @param node_idx - Index of the TM node to be suspended. + * + * @return 0 on success. + */ +int tm_sys_node_suspend (u32 hw_if_idx, u32 node_idx); + +/** + * @brief Resume a suspended traffic management node. + * + * @param hw_if_idx - Hardware interface index + * @param node_idx - Index of the TM node to be resumed. + * + * @return 0 on success. + */ +int tm_sys_node_resume (u32 hw_if_idx, u32 node_idx); + +/** + * @brief Delete an existing traffic management node. + * A node can only be deleted if it has no child nodes + * connected to it. + * + * @param hw_if_idx - Hardware interface index + * @param node_idx - Index of the TM node to be deleted. + * + * @return 0 on success. + */ +int tm_sys_node_delete (u32 hw_if_idx, u32 node_idx); + +/** + * @brief Create a new shaper profile for traffic management. + * + * @param hw_if_idx - Hardware interface index. + * @param param - Pointer to the structure containing the shaper parameters. + * + * @return 0 on success. + */ +int tm_sys_shaper_profile_create (u32 hw_if_idx, tm_shaper_params_t *param); + +/** + * @brief Update the shaper profile id of a TM node. + * + * @param hw_if_idx - Hardware interface index. + * @param node_id - Identifier of the TM node to be updated. + * @param shaper_profile_id - Identifier of the new shaper profile to be + * applied. + * + * @return 0 on success. + */ +int tm_sys_node_shaper_update (u32 hw_if_idx, u32 node_id, + u32 shaper_profile_id); + +/** + * @brief Delete an existing shaper profile. + * + * @param hw_if_idx - Hardware interface index. + * @param shaper_id - Identifier of the shaper profile to be deleted. + * + * @return 0 on success. + */ +int tm_sys_shaper_profile_delete (u32 hw_if_idx, u32 shaper_id); + +/** + * @brief Update the scheduling weight of a TM node. + * + * @param hw_if_idx - Hardware interface index. + * @param node_id - Identifier of the TM node to be updated. + * @param weight - New scheduling weight to be assigned to the node. + * + * @return 0 on success. + */ +int tm_sys_node_sched_weight_update (u32 hw_if_idx, u32 node_id, u32 weight); + +/** + * @brief Read statistics for a specific traffic management node. + * + * @param hw_if_idx - Hardware interface index. + * @param node_idx - Index of the TM node whose statistics are to be read. + * @param param - Pointer to the structure where the statistics will be stored. + * + * @return 0 on success. + */ +int tm_sys_node_read_stats (u32 hw_if_idx, u32 node_idx, + tm_stats_params_t *param); +/** + * @brief Read Capabilities for a specific traffic management system. + */ +int tm_sys_get_capabilities (u32 hw_if_idx, tm_capa_params_t *capa_param); + +/** + * @brief Read level Capabilities for a specific traffic management system. + */ +int tm_sys_level_get_capabilities (u32 hw_if_idx, tm_level_capa_params_t *cap, + u32 lvl); + +/** + * @brief Start the traffic management system. + * + * @param hw_if_idx - Hardware interface index. + * + * @return 0 on success. + */ +int tm_sys_start_tm (u32 hw_if_idx); + +/** + * @brief Stop the traffic management system. + * + * @param hw_if_idx - Hardware interface index. + * + * @return 0 on success. + */ +int tm_sys_stop_tm (u32 hw_if_idx); + +/** + * @brief Register the traffic management (TM) system. + * + * @param tm_sys - Pointer to the TM system structure to be registered. + * @param hw_if_idx - Hardware interface index. + * + * @return 0 on success. + */ +int tm_system_register (tm_system_t *tm_sys, u32 hw_if_idx); +#endif diff --git a/src/vnet/tm/tm_api.c b/src/vnet/tm/tm_api.c new file mode 100644 index 0000000000..cffba4d47c --- /dev/null +++ b/src/vnet/tm/tm_api.c @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * Base message ID fot the plugin + */ +static u32 tm_base_msg_id; +#define REPLY_MSG_ID_BASE tm_base_msg_id + +#include + +void +vl_api_tm_sys_node_add_t_handler (vl_api_tm_sys_node_add_t *mp) +{ + vl_api_tm_sys_node_add_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + tm_node_params_t n_p; + int rv = -1; + u32 node_id = 0; + i32 parent_node_id = 0; + u32 priority = 0; + u32 weight = 0; + u32 lvl = 0; + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->node_id); + parent_node_id = clib_net_to_host_i32 (mp->parent_node_id); + n_p.shaper_profile_id = clib_net_to_host_i32 (mp->shaper_id); + weight = clib_net_to_host_u32 (mp->weight); + priority = clib_net_to_host_u32 (mp->priority); + lvl = clib_net_to_host_u32 (mp->lvl); + + rv = tm_sys_node_add (sw->hw_if_index, node_id, parent_node_id, priority, + weight, lvl, &n_p); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_ADD_REPLY, + ({ rmp->node_id = clib_host_to_net_u32 (node_id); })); +} + +void +vl_api_tm_sys_node_suspend_t_handler (vl_api_tm_sys_node_suspend_t *mp) +{ + vl_api_tm_sys_node_suspend_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + int rv = -1; + u32 node_id = 0; + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->tm_node_id); + + rv = tm_sys_node_suspend (sw->hw_if_index, node_id); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_SUSPEND_REPLY, + ({ rmp->node_id = clib_host_to_net_u32 (node_id); })); +} + +void +vl_api_tm_sys_node_resume_t_handler (vl_api_tm_sys_node_resume_t *mp) +{ + vl_api_tm_sys_node_resume_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + int rv = -1; + u32 node_id = 0; + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->tm_node_id); + rv = tm_sys_node_resume (sw->hw_if_index, node_id); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_RESUME_REPLY, + ({ rmp->node_id = clib_host_to_net_u32 (node_id); })); +} + +void +vl_api_tm_sys_node_delete_t_handler (vl_api_tm_sys_node_delete_t *mp) +{ + vl_api_tm_sys_node_delete_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + u32 node_id = 0; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->tm_node_id); + + rv = tm_sys_node_delete (sw->hw_if_index, node_id); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_DELETE_REPLY, + ({ rmp->node_id = clib_host_to_net_u32 (node_id); })); +} + +void +vl_api_tm_sys_shaper_profile_create_t_handler ( + vl_api_tm_sys_shaper_profile_create_t *mp) +{ + vl_api_tm_sys_shaper_profile_create_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + tm_shaper_params_t s_p; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + s_p.shaper_id = clib_net_to_host_u32 (mp->tm_shaper_id); + s_p.commit.rate = clib_net_to_host_u64 (mp->shaper_commit_rate); + s_p.commit.burst_size = clib_net_to_host_u64 (mp->shaper_commit_burst); + s_p.peak.rate = clib_net_to_host_u64 (mp->shaper_peak_rate); + s_p.peak.burst_size = clib_net_to_host_u64 (mp->shaper_peak_burst); + s_p.pkt_len_adj = clib_net_to_host_i64 (mp->shaper_len_adjust); + s_p.pkt_mode = mp->is_pkt_mode; + + rv = tm_sys_shaper_profile_create (sw->hw_if_index, &s_p); + + REPLY_MACRO2 (VL_API_TM_SYS_SHAPER_PROFILE_CREATE_REPLY, ({ + if (!rv) + rmp->shaper_id = clib_host_to_net_u32 (s_p.shaper_id); + })); +} + +void +vl_api_tm_sys_node_shaper_update_t_handler ( + vl_api_tm_sys_node_shaper_update_t *mp) +{ + vl_api_tm_sys_node_shaper_update_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + u32 shaper_profile_id = 0; + u32 node_id = 0; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->node_id); + shaper_profile_id = clib_net_to_host_u32 (mp->shaper_id); + + rv = tm_sys_node_shaper_update (sw->hw_if_index, node_id, shaper_profile_id); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_SHAPER_UPDATE_REPLY, ({ + if (!rv) + rmp->shaper_id = clib_host_to_net_u32 (shaper_profile_id); + })); +} + +void +vl_api_tm_sys_shaper_profile_delete_t_handler ( + vl_api_tm_sys_shaper_profile_delete_t *mp) +{ + vl_api_tm_sys_shaper_profile_delete_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + u32 shaper_id = 0; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + shaper_id = clib_net_to_host_u32 (mp->shaper_id); + + rv = tm_sys_shaper_profile_delete (sw->hw_if_index, shaper_id); + + REPLY_MACRO2 (VL_API_TM_SYS_SHAPER_PROFILE_DELETE_REPLY, ({ + if (!rv) + rmp->shaper_id = clib_host_to_net_u32 (shaper_id); + })); +} + +void +vl_api_tm_sys_node_sched_weight_update_t_handler ( + vl_api_tm_sys_node_sched_weight_update_t *mp) +{ + vl_api_tm_sys_node_sched_weight_update_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + u32 node_id = 0, weight = 0; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->node_id); + weight = clib_net_to_host_u32 (mp->weight); + + rv = tm_sys_node_sched_weight_update (sw->hw_if_index, node_id, weight); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_SCHED_WEIGHT_UPDATE_REPLY, ({ + if (!rv) + rmp->node_id = clib_host_to_net_u32 (node_id); + })); +} + +void +vl_api_tm_sys_node_read_stats_t_handler (vl_api_tm_sys_node_read_stats_t *mp) +{ + vl_api_tm_sys_node_read_stats_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + tm_stats_params_t s_p = { 0 }; + u32 node_id = 0; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + node_id = clib_net_to_host_u32 (mp->node_id); + + rv = tm_sys_node_read_stats (sw->hw_if_index, node_id, &s_p); + + REPLY_MACRO2 (VL_API_TM_SYS_NODE_READ_STATS_REPLY, + ({ rmp->node_id = clib_host_to_net_u32 (node_id); })); +} + +void +vl_api_tm_sys_get_capabilities_t_handler (vl_api_tm_sys_get_capabilities_t *mp) +{ + vl_api_tm_sys_get_capabilities_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + tm_capa_params_t s_p = { 0 }; + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + rv = tm_sys_get_capabilities (sw->hw_if_index, &s_p); + + REPLY_MACRO (VL_API_TM_SYS_GET_CAPABILITIES_REPLY); +} + +void +vl_api_tm_sys_level_get_capabilities_t_handler ( + vl_api_tm_sys_level_get_capabilities_t *mp) +{ + vl_api_tm_sys_level_get_capabilities_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + tm_level_capa_params_t s_p = { 0 }; + int rv = -1; + u32 lvl = 0; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + rv = tm_sys_level_get_capabilities (sw->hw_if_index, &s_p, lvl); + + REPLY_MACRO (VL_API_TM_SYS_LEVEL_GET_CAPABILITIES_REPLY); +} + +void +vl_api_tm_sys_start_tm_t_handler (vl_api_tm_sys_start_tm_t *mp) +{ + vl_api_tm_sys_start_tm_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + rv = tm_sys_start_tm (sw->hw_if_index); + + REPLY_MACRO (VL_API_TM_SYS_START_TM_REPLY); +} + +void +vl_api_tm_sys_stop_tm_t_handler (vl_api_tm_sys_stop_tm_t *mp) +{ + vl_api_tm_sys_stop_tm_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + int rv = -1; + + vnet_sw_interface_t *sw = + vnet_get_sup_sw_interface (vnm, clib_net_to_host_u32 (mp->sw_if_idx)); + + rv = tm_sys_stop_tm (sw->hw_if_index); + + REPLY_MACRO (VL_API_TM_SYS_STOP_TM_REPLY); +} + +#include + +static clib_error_t * +tm_api_init (vlib_main_t *vm) +{ + /* Ask for a correctly-sized block of API message decode slots */ + tm_base_msg_id = setup_message_id_table (); + + return 0; +} + +VLIB_INIT_FUNCTION (tm_api_init); diff --git a/src/vnet/tm/tm_test.c b/src/vnet/tm/tm_test.c new file mode 100644 index 0000000000..12287631bd --- /dev/null +++ b/src/vnet/tm/tm_test.c @@ -0,0 +1,711 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include +#include +#include +#include + +#include + +#include + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} tm_test_main_t; + +tm_test_main_t tm_test_main; + +#define __plugin_msg_base tm_test_main.msg_id_base +#include +uword unformat_sw_if_index (unformat_input_t *input, va_list *args); + +/* Declare message IDs */ +#include +#include +#include + +static int +api_tm_sys_node_add (vat_main_t *vam) +{ + u32 level, priority, node_id, weight; + i32 parent_node_id = 0; + i32 shaper_id = 0; + u8 priority_set = 0, level_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_add_t *mp; + u32 msg_size = sizeof (*mp); + u8 sw_if_idx_set = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "node_id %u", &node_id)) + ; + else if (unformat (i, "parent_node_id %d", &parent_node_id)) + ; + else if (unformat (i, "shaper_prof %d", &shaper_id)) + ; + else if (unformat (i, "weight %u", &weight)) + ; + else if (unformat (i, "priority %u", &priority)) + priority_set = 1; + else if (unformat (i, "level %u", &level)) + level_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !priority_set || !level_set) + return -EINVAL; + + M (TM_SYS_NODE_ADD, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->node_id = clib_host_to_net_u32 (node_id); + mp->parent_node_id = clib_host_to_net_i32 (parent_node_id); + mp->shaper_id = clib_host_to_net_i32 (shaper_id); + mp->weight = clib_host_to_net_u32 (weight); + mp->priority = clib_host_to_net_u32 (priority); + mp->lvl = clib_host_to_net_u32 (level); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_node_suspend (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0, tm_node_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_suspend_t *mp; + u32 msg_size = sizeof (*mp); + u32 tm_node_id = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "tm_node_id %u", &tm_node_id)) + tm_node_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !tm_node_idx_set) + return -EINVAL; + + M (TM_SYS_NODE_SUSPEND, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->tm_node_id = clib_host_to_net_u32 (tm_node_id); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_node_resume (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0, tm_node_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_resume_t *mp; + u32 msg_size = sizeof (*mp); + u32 tm_node_id = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "tm_node_id %u", &tm_node_id)) + tm_node_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !tm_node_idx_set) + return -EINVAL; + + M (TM_SYS_NODE_RESUME, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->tm_node_id = clib_host_to_net_u32 (tm_node_id); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_node_delete (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0, tm_node_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_delete_t *mp; + u32 msg_size = sizeof (*mp); + u32 tm_node_id = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "tm_node_id %u", &tm_node_id)) + tm_node_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !tm_node_idx_set) + return -EINVAL; + + M (TM_SYS_NODE_DELETE, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->tm_node_id = clib_host_to_net_u32 (tm_node_id); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_shaper_profile_create (vat_main_t *vam) +{ + vl_api_tm_sys_shaper_profile_create_t *mp; + unformat_input_t *i = vam->input; + u32 msg_size = sizeof (*mp); + i32 shaper_len_adjust = 0; + u64 shaper_commit_rate = 0; + u64 shaper_commit_burst = 0; + u64 shaper_peak_rate = 0; + u64 shaper_peak_burst = 0; + u32 tm_shaper_id = 0; + u8 sw_if_idx_set = 0, tm_shaper_id_set = 0; + u32 is_packet_mode = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "shaper_id %u", &tm_shaper_id)) + tm_shaper_id_set = 1; + else if (unformat (i, "packet_mode %u", &is_packet_mode)) + ; + else if (unformat (i, "shaper_peak_burst %llu", &shaper_peak_burst)) + ; + else if (unformat (i, "shaper_commit_rate %llu", &shaper_commit_rate)) + ; + else if (unformat (i, "shaper_commit_burst %llu", &shaper_commit_burst)) + ; + else if (unformat (i, "shaper_peak_rate %llu", &shaper_peak_rate)) + ; + else if (unformat (i, "shaper_len_adjust %d", &shaper_len_adjust)) + ; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !tm_shaper_id_set) + return -EINVAL; + + M (TM_SYS_SHAPER_PROFILE_CREATE, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->tm_shaper_id = clib_host_to_net_u32 (tm_shaper_id); + mp->is_pkt_mode = (u8) is_packet_mode; + mp->shaper_commit_rate = clib_host_to_net_u64 (shaper_commit_rate); + mp->shaper_commit_burst = clib_host_to_net_u64 (shaper_commit_burst); + mp->shaper_peak_rate = clib_host_to_net_u64 (shaper_peak_rate); + mp->shaper_peak_burst = clib_host_to_net_u64 (shaper_peak_burst); + mp->shaper_len_adjust = clib_host_to_net_i64 (shaper_len_adjust); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_node_shaper_update (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0, shaper_profile_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_shaper_update_t *mp; + u32 msg_size = sizeof (*mp); + u32 shaper_profile = 0, node_id = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "shaper_profile %d", &shaper_profile)) + shaper_profile_set = 1; + else if (unformat (i, "node_id %u", &node_id)) + ; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !shaper_profile_set) + return -EINVAL; + + M (TM_SYS_NODE_SHAPER_UPDATE, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->shaper_id = clib_host_to_net_u32 (shaper_profile); + mp->node_id = clib_host_to_net_u32 (node_id); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_shaper_profile_delete (vat_main_t *vam) +{ + vl_api_tm_sys_shaper_profile_delete_t *mp; + unformat_input_t *i = vam->input; + u32 msg_size = sizeof (*mp); + u8 sw_if_idx_set = 0, shaper_id_set = 0; + u32 sw_if_idx = 0; + u32 shaper_id = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "shaper_id %u", &shaper_id)) + shaper_id_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !shaper_id_set) + return -EINVAL; + + M (TM_SYS_SHAPER_PROFILE_DELETE, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->shaper_id = clib_host_to_net_u32 (shaper_id); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_node_sched_weight_update (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_sched_weight_update_t *mp; + u32 msg_size = sizeof (*mp); + u32 node_id = 0, weight = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "node_id %u", &node_id)) + ; + else if (unformat (i, "weight %u", &weight)) + ; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (TM_SYS_NODE_SCHED_WEIGHT_UPDATE, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->node_id = clib_host_to_net_u32 (node_id); + mp->weight = clib_host_to_net_u32 (weight); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_node_read_stats (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0, tm_node_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_node_read_stats_t *mp; + u32 msg_size = sizeof (*mp); + u32 tm_node_id = 0; + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "tm_node_id %u", &tm_node_id)) + tm_node_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !tm_node_idx_set) + return -EINVAL; + + M (TM_SYS_NODE_READ_STATS, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->node_id = clib_host_to_net_u32 (tm_node_id); + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_get_capabilities (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_get_capabilities_t *mp; + u32 msg_size = sizeof (*mp); + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (TM_SYS_GET_CAPABILITIES, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_level_get_capabilities (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0, tm_lvl_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_level_get_capabilities_t *mp; + u32 msg_size = sizeof (*mp); + u32 tm_lvl, sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else if (unformat (i, "tm_level %u", &tm_lvl)) + tm_lvl_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set || !tm_lvl_idx_set) + return -EINVAL; + + M (TM_SYS_LEVEL_GET_CAPABILITIES, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + mp->level = clib_host_to_net_u32 (tm_lvl); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_start_tm (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_start_tm_t *mp; + u32 msg_size = sizeof (*mp); + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (TM_SYS_START_TM, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + + S (mp); + W (ret); + return ret; +} + +static int +api_tm_sys_stop_tm (vat_main_t *vam) +{ + u8 sw_if_idx_set = 0; + unformat_input_t *i = vam->input; + vl_api_tm_sys_stop_tm_t *mp; + u32 msg_size = sizeof (*mp); + u32 sw_if_idx = 0; + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client (msg_size); + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_idx %u", &sw_if_idx)) + sw_if_idx_set = 1; + else + { + clib_warning ("Invalid input, unknown parameter"); + return -EINVAL; + } + } + + if (!sw_if_idx_set) + return -EINVAL; + + M (TM_SYS_STOP_TM, mp); + + mp->sw_if_idx = clib_host_to_net_u32 (sw_if_idx); + + S (mp); + W (ret); + return ret; +} + +static void +vl_api_tm_sys_node_add_reply_t_handler (vl_api_tm_sys_node_add_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM node_add_id : %u\n", clib_net_to_host_u32 (mp->node_id)); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_node_suspend_reply_t_handler ( + vl_api_tm_sys_node_suspend_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM node_suspend_id : %u\n", + clib_net_to_host_u32 (mp->node_id)); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_node_resume_reply_t_handler ( + vl_api_tm_sys_node_resume_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM node_resume_ id : %u\n", + clib_net_to_host_u32 (mp->node_id)); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_node_delete_reply_t_handler ( + vl_api_tm_sys_node_delete_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM node_delete_id : %u\n", + clib_net_to_host_u32 (mp->node_id)); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_shaper_profile_create_reply_t_handler ( + vl_api_tm_sys_shaper_profile_create_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("Shaper profile id : %u\n", + clib_net_to_host_u32 (mp->shaper_id)); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_node_shaper_update_reply_t_handler ( + vl_api_tm_sys_node_shaper_update_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM node updated shaper id : %d\n", + clib_net_to_host_u32 (mp->shaper_id)); + + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_shaper_profile_delete_reply_t_handler ( + vl_api_tm_sys_shaper_profile_delete_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM shaper profile delete id : %u\n", + clib_net_to_host_u32 (mp->shaper_id)); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_node_sched_weight_update_reply_t_handler ( + vl_api_tm_sys_node_sched_weight_update_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM node sched weight updated\n"); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_node_read_stats_reply_t_handler ( + vl_api_tm_sys_node_read_stats_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM stats for node id : %u\n", + clib_net_to_host_u32 (mp->node_id)); + + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_get_capabilities_reply_t_handler ( + vl_api_tm_sys_get_capabilities_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM Capability Passed : %u\n"); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_level_get_capabilities_reply_t_handler ( + vl_api_tm_sys_level_get_capabilities_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + clib_warning ("TM Level Capability Passed : %u\n"); + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_start_tm_reply_t_handler (vl_api_tm_sys_start_tm_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + vam->result_ready = 1; +} + +static void +vl_api_tm_sys_stop_tm_reply_t_handler (vl_api_tm_sys_stop_tm_reply_t *mp) +{ + vat_main_t *vam = tm_test_main.vat_main; + vam->result_ready = 1; +} + +#include diff --git a/src/vppinfra/time_range.c b/src/vppinfra/time_range.c index 4b5e130376..076832b271 100644 --- a/src/vppinfra/time_range.c +++ b/src/vppinfra/time_range.c @@ -272,6 +272,19 @@ format_clib_timebase_time (u8 * s, va_list * args) return (s); } +__clib_export void +sprintf_clib_timebase_time (u8 *s, f64 now) +{ + clib_timebase_component_t _c, *cp = &_c; + + clib_timebase_time_to_components (now, cp); + + sprintf ((char *) s, "%s, %u %s %u %u:%02u:%02u", + day_names_epoch_order[cp->day_name_index], cp->day, + month_short_names[cp->month], cp->year, cp->hour, cp->minute, + cp->second); +} + uword unformat_clib_timebase_range_hms (unformat_input_t * input, va_list * args) { diff --git a/src/vppinfra/time_range.h b/src/vppinfra/time_range.h index 993bf9c360..7e95dcb398 100644 --- a/src/vppinfra/time_range.h +++ b/src/vppinfra/time_range.h @@ -68,6 +68,8 @@ unformat_function_t unformat_clib_timebase_range_hms; unformat_function_t unformat_clib_timebase_range_vector; format_function_t format_clib_timebase_time; +void sprintf_clib_timebase_time (u8 *s, f64 now); +#define CLIB_TIMEBASE_STR_MAX_SZ 60 static inline f64 clib_timebase_summer_offset_fastpath (clib_timebase_t * tb, f64 now) diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index 6a7328bd87..e158526ecc 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -50,8 +50,8 @@ __clib_export __thread uword __os_thread_index = 0; __clib_export __thread uword __os_numa_index = 0; -clib_error_t * -clib_file_n_bytes (char *file, uword * result) +__clib_export clib_error_t * +clib_file_n_bytes (char *file, uword *result) { struct stat s; diff --git a/test/test_flowprobe.py b/test/test_flowprobe.py index 609099980f..ac0433abc0 100644 --- a/test/test_flowprobe.py +++ b/test/test_flowprobe.py @@ -1228,7 +1228,6 @@ def test_0002(self): self.logger.info("FFP_TEST_FINISH_0002") -@tag_fixme_vpp_workers class DatapathTx(MethodHolder, DatapathTestsHolder): """Collect info on Ethernet, IP4 and IP6 datapath (TX) (no timers)""" @@ -1309,7 +1308,6 @@ def test_rewritten_traffic(self): ipfix.remove_vpp_config() -@tag_fixme_vpp_workers class DatapathRx(MethodHolder, DatapathTestsHolder): """Collect info on Ethernet, IP4 and IP6 datapath (RX) (no timers)""" diff --git a/test/test_ipsec_tun_if_esp.py b/test/test_ipsec_tun_if_esp.py index 5131fbefe7..a7f91b9e96 100644 --- a/test/test_ipsec_tun_if_esp.py +++ b/test/test_ipsec_tun_if_esp.py @@ -1231,13 +1231,35 @@ def test_tun_44(self): self.config_sa_tra(p) self.config_protect(p) - tx = self.gen_pkts(self.pg1, src=self.pg1.remote_ip4, dst=p.remote_tun_if_host) + tx = self.gen_pkts( + self.pg1, src=self.pg1.remote_ip4, dst=p.remote_tun_if_host, count=127 + ) self.send_and_assert_no_replies(self.pg1, tx) self.unconfig_protect(p) self.unconfig_sa(p) self.unconfig_network(p) + def test_tun_44_async(self): + """IPSec SA with NULL algos using async crypto""" + p = self.ipv4_params + + self.vapi.ipsec_set_async_mode(async_enable=True) + self.config_network(p) + self.config_sa_tra(p) + self.config_protect(p) + + tx = self.gen_pkts( + self.pg1, src=self.pg1.remote_ip4, dst=p.remote_tun_if_host, count=127 + ) + self.send_and_assert_no_replies(self.pg1, tx) + + self.unconfig_protect(p) + self.unconfig_sa(p) + self.unconfig_network(p) + + self.vapi.ipsec_set_async_mode(async_enable=False) + @tag_fixme_vpp_workers class TestIpsec6MultiTunIfEsp(TemplateIpsec6TunProtect, TemplateIpsec, IpsecTun6):