Step-by-step guide to install the test-bench

python3 -m pip install pip --upgrade
pip install pyopenssl --upgrade

pip install -U dvc[s3]

git clone <https://github.com/dataforgoodfr/12_genai_impact_methodo.git>
cd 12_genai_impact_methodo

dvc remote modify --local

dvc remote modify --local genai-impact-remote access_key_id <SCW_ACCESS_KEY_ID>
dvc remote modify --local genai-impact-remote secret_access_key <SCW_SECRET_ACCESS_KEY>

dvc pull

Build trtllm + triton

cd tensorrtllm_engine
BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.03-py3-min
TRT_VERSION=9.3.0.1
TRT_URL_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/9.3.0/tensorrt-9.3.0.1.linux.x86_64-gnu.cuda-12.2.tar.gz
TRT_URL_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/9.3.0/tensorrt-9.3.0.1.Ubuntu-22.04.aarch64-gnu.cuda-12.2.tar.gz

docker build -t trtllm_base \\
             --build-arg BASE_IMAGE="${BASE_IMAGE}" \\
             --build-arg TRT_VER="${TRT_VERSION}" \\
             --build-arg RELEASE_URL_TRT_x86="${TRT_URL_x86}" \\
             --build-arg RELEASE_URL_TRT_ARM="${TRT_URL_ARM}" \\
             -f dockerfile/Dockerfile.triton.trt_llm_backend .

# Run the build script from Triton Server repo. The flags for some features or
# endpoints can be removed if not needed. Please refer to the support matrix to
# see the aligned versions: <https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html>
TRTLLM_BASE_IMAGE=trtllm_base
TENSORRTLLM_BACKEND_REPO_TAG=v0.9.0
PYTHON_BACKEND_REPO_TAG=r24.03

cd server
./build.py -v --no-container-interactive --enable-logging --enable-stats --enable-tracing \\
              --enable-metrics --enable-gpu-metrics --enable-cpu-metrics \\
              --backend=ensemble --enable-gpu --endpoint=http --endpoint=grpc \\
              --no-container-pull \\
              --image=base,${TRTLLM_BASE_IMAGE} \\
              --backend=tensorrtllm:${TENSORRTLLM_BACKEND_REPO_TAG} \\
              --backend=python:${PYTHON_BACKEND_REPO_TAG}

docker login rg.fr-par.scw.cloud/test-bench-public-registry -u nologin --password-stdin << "..."

docker tag tritonserver:latest rg.fr-par.scw.cloud/test-bench-public-registry/tritonserver:latest

docker push rg.fr-par.scw.cloud/test-bench-public-registry/tritonserver:latest