Step-by-step guide to install the test-bench
python3 -m pip install pip --upgrade
pip install pyopenssl --upgrade
pip install -U dvc[s3]
git clone <https://github.com/dataforgoodfr/12_genai_impact_methodo.git>
cd 12_genai_impact_methodo
dvc remote modify --local
dvc remote modify --local genai-impact-remote access_key_id <SCW_ACCESS_KEY_ID>
dvc remote modify --local genai-impact-remote secret_access_key <SCW_SECRET_ACCESS_KEY>
dvc pull
Build trtllm + triton
cd tensorrtllm_engine
BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.03-py3-min
TRT_VERSION=9.3.0.1
TRT_URL_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/9.3.0/tensorrt-9.3.0.1.linux.x86_64-gnu.cuda-12.2.tar.gz
TRT_URL_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/9.3.0/tensorrt-9.3.0.1.Ubuntu-22.04.aarch64-gnu.cuda-12.2.tar.gz
docker build -t trtllm_base \\
--build-arg BASE_IMAGE="${BASE_IMAGE}" \\
--build-arg TRT_VER="${TRT_VERSION}" \\
--build-arg RELEASE_URL_TRT_x86="${TRT_URL_x86}" \\
--build-arg RELEASE_URL_TRT_ARM="${TRT_URL_ARM}" \\
-f dockerfile/Dockerfile.triton.trt_llm_backend .
# Run the build script from Triton Server repo. The flags for some features or
# endpoints can be removed if not needed. Please refer to the support matrix to
# see the aligned versions: <https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html>
TRTLLM_BASE_IMAGE=trtllm_base
TENSORRTLLM_BACKEND_REPO_TAG=v0.9.0
PYTHON_BACKEND_REPO_TAG=r24.03
cd server
./build.py -v --no-container-interactive --enable-logging --enable-stats --enable-tracing \\
--enable-metrics --enable-gpu-metrics --enable-cpu-metrics \\
--backend=ensemble --enable-gpu --endpoint=http --endpoint=grpc \\
--no-container-pull \\
--image=base,${TRTLLM_BASE_IMAGE} \\
--backend=tensorrtllm:${TENSORRTLLM_BACKEND_REPO_TAG} \\
--backend=python:${PYTHON_BACKEND_REPO_TAG}
docker login rg.fr-par.scw.cloud/test-bench-public-registry -u nologin --password-stdin << "..."
docker tag tritonserver:latest rg.fr-par.scw.cloud/test-bench-public-registry/tritonserver:latest
docker push rg.fr-par.scw.cloud/test-bench-public-registry/tritonserver:latest