FROM quay.io/dmaa/vllm-openai:{{VERSION}} AS vllm-base

# Create a new stage based on vllm-base
FROM vllm-base AS sagemaker-serving

# Ensure the serve script has executable permissions
# RUN chmod +x /usr/bin/serve

# Install required dependencies for Qwen3-VL
# vLLM v0.11.0+ requires transformers >= 4.57.0 for qwen3_vl_moe support
RUN pip install --no-cache-dir --upgrade "transformers>=4.57.0"

# Install qwen-vl-utils for better video/image processing
RUN pip install --no-cache-dir boto3 hf_transfer modelscope "qwen-vl-utils==0.0.14"

# Expose port 8080
EXPOSE 8080
WORKDIR /opt/ml/code


# Set the serve script as the entrypoint
ENTRYPOINT ["/usr/bin/serve"]