ggml-org · dbsanfte · Sep 14, 2025 · Sep 14, 2025 · Sep 14, 2025 · Sep 15, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,147 @@
+FROM ubuntu:24.04
+
+# Build arguments for optional components (default: disabled)
+ARG INSTALL_CUDA=false
+ARG INSTALL_ROCM=false
+ARG INSTALL_VULKAN=false
+ARG INSTALL_PYTHON_DEPS=false
+
+# Avoid prompts from apt
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Copy in a zscaler.crt if one exists
+# This allows the container to access the internet on corporate laptops
+COPY zscaler.cr[t] /usr/local/share/ca-certificates/
+
+# This tells various tools to use the system CA certificates
+ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
+ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
+ENV NODE_OPTIONS=--use-openssl-ca
+
+# Update and install system dependencies
+RUN apt-get update && \
+    apt-get install -y \
+        build-essential \
+        ca-certificates \
+        cmake \
+        git \
+        git-lfs \
+        curl \
+        wget \
+        jq \
+        pkg-config \
+        python3 \
+        python3-pip \
+        python3-venv \
+        libcurl4-openssl-dev \
+        libnuma-dev \
+        libomp-dev \
+        linux-tools-generic \
+        linux-tools-common \
+        numactl \
+        hwloc-nox \
+        libhwloc-dev \
+        ccache \
+        ninja-build \
+        gdb \
+        valgrind \
+        strace \
+        sudo \
+        bc \
+        gh && \
+    update-ca-certificates && \
+    apt-get autoremove -y && \
+    apt-get clean
+
+# Install CUDA 13.0 (conditional)
+RUN if [ "$INSTALL_CUDA" = "true" ]; then \
+        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb -O cuda-keyring.deb && \
+        dpkg -i cuda-keyring.deb && \
+        apt-get update && \
+        apt-get -y install cuda-toolkit-13-0 cuda-drivers && \
+        rm cuda-keyring.deb; \
+    else \
+        echo "Skipping CUDA installation"; \
+    fi
+
+# Install ROCm 6.4 (conditional)
+RUN if [ "$INSTALL_ROCM" = "true" ]; then \
+        mkdir -p --mode=0755 /etc/apt/keyrings && \
+        wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+            gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.4.2 noble main" \
+            | tee /etc/apt/sources.list.d/rocm.list && \
+        echo 'Package: *' \
+            | tee /etc/apt/preferences.d/rocm-pin-600 && \
+        echo 'Pin: release o=repo.radeon.com' \
+            | tee -a /etc/apt/preferences.d/rocm-pin-600 && \
+        echo 'Pin-Priority: 600' \
+            | tee -a /etc/apt/preferences.d/rocm-pin-600 && \
+        apt-get update && \
+        apt-get install -y rocm && \
+        apt-get autoremove -y && \
+        apt-get clean; \
+    else \
+        echo "Skipping ROCm installation"; \
+    fi
+
+# Install Vulkan SDK (conditional)
+RUN if [ "$INSTALL_VULKAN" = "true" ]; then \
+        wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | tee /etc/apt/trusted.gpg.d/lunarg.asc && \
+        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
+        apt-get update && \
+        apt-get install -y vulkan-sdk && \
+        apt-get autoremove -y && \
+        apt-get clean; \
+    else \
+        echo "Skipping Vulkan SDK installation"; \
+    fi
+
+# Install Python dependencies for gguf conversion tools (conditional)
+RUN if [ "$INSTALL_PYTHON_DEPS" = "true" ]; then \
+        python3 -m pip install --break-system-packages \
+            numpy \
+            torch \
+            transformers \
+            sentencepiece \
+            protobuf \
+            gguf; \
+    else \
+        echo "Skipping Python dependencies installation"; \
+    fi
+
+# Set up ccache for faster compilation
+ENV PATH="/usr/lib/ccache:${PATH}"
+ENV CCACHE_DIR="/tmp/ccache"
+ENV CMAKE_C_COMPILER="/usr/lib/ccache/gcc"
+ENV CMAKE_CXX_COMPILER="/usr/lib/ccache/g++"
+ENV CMAKE_C_COMPILER_LAUNCHER="ccache"
+ENV CMAKE_CXX_COMPILER_LAUNCHER="ccache"
+ENV CC="/usr/lib/ccache/gcc"
+ENV CXX="/usr/lib/ccache/g++"
+RUN mkdir -p /tmp/ccache
+
+# Create a non-root user
+RUN useradd -m -s /bin/bash developer && \
+    usermod -aG sudo developer && \
+    echo "developer ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+# Fix ownership of ccache directory for developer user
+RUN chown -R developer:developer /tmp/ccache
+
+# Set working directory
+WORKDIR /workspace
+
+# Switch to non-root user
+USER developer
+
+# Set up shell environment
+RUN echo 'export PS1="\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ "' >> ~/.bashrc && \
+    echo 'alias ll="ls -alF"' >> ~/.bashrc && \
+    echo 'alias la="ls -A"' >> ~/.bashrc && \
+    echo 'alias l="ls -CF"' >> ~/.bashrc
+
+# Expose common ports
+EXPOSE 8080 8081
+
+CMD ["/bin/bash"]
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
@@ -0,0 +1,211 @@
+# llama.cpp Development Container
+
+This dev container provides a complete Ubuntu 24.04 environment for building and testing llama.cpp with NUMA support and optional GPU acceleration.
+
+## Quick Start
+
+1. Open the project in VS Code
+2. When prompted, click "Reopen in Container" or use `Ctrl+Shift+P` → "Dev Containers: Reopen in Container"
+3. The container will build with the basic development tools (no GPU support by default)
+
+## Optional Components
+
+By default, the container includes only the essential build tools. You can enable additional components by editing `.devcontainer/devcontainer.json`:
+
+### CUDA Support (NVIDIA GPUs)
+```json
+"INSTALL_CUDA": "true"
+```
+Installs CUDA toolkit for NVIDIA GPU acceleration.
+
+### ROCm Support (AMD GPUs)  
+```json
+"INSTALL_ROCM": "true"
+```
+Installs ROCm 6.4 for AMD GPU acceleration.
+
+### Python Dependencies
+```json
+"INSTALL_PYTHON_DEPS": "true"
+```
+Installs Python packages for model conversion tools:
+- numpy, torch, transformers, sentencepiece, protobuf, gguf
+
+## Example Configurations
+
+### Full GPU Development (NVIDIA + Python)
+```json
+"build": {
+    "args": {
+        "INSTALL_CUDA": "true",
+        "INSTALL_ROCM": "false", 
+        "INSTALL_PYTHON_DEPS": "true"
+    }
+}
+```
+
+### AMD GPU Development
+```json
+"build": {
+    "args": {
+        "INSTALL_CUDA": "false",
+        "INSTALL_ROCM": "true", 
+        "INSTALL_PYTHON_DEPS": "true"
+    }
+}
+```
+
+### CPU-only with Python tools
+```json
+"build": {
+    "args": {
+        "INSTALL_CUDA": "false",
+        "INSTALL_ROCM": "false", 
+        "INSTALL_PYTHON_DEPS": "true"
+    }
+}
+```
+
+## Making Changes
+
+### Method 1: Interactive Configuration Script (Recommended)
+```bash
+# Run the configuration helper
+chmod +x .devcontainer/configure.sh
+./.devcontainer/configure.sh
+```
+
+### Method 2: Manual Configuration
+1. Edit `.devcontainer/devcontainer.json` 
+2. Set the desired components to `"true"` or `"false"`
+3. Rebuild the container: `Ctrl+Shift+P` → "Dev Containers: Rebuild Container"
+
+## Features
+
+- **Ubuntu 24.04 LTS** base image
+- **Complete build toolchain**: gcc, cmake, ninja, ccache
+- **NUMA support**: libnuma-dev, numactl, hwloc for CPU topology detection
+- **Optional GPU acceleration**: CUDA 12.9 and/or ROCm 6.4 support
+- **Optional Python environment**: with packages for GGUF conversion tools
+- **VS Code integration**: with C/C++, CMake, and Python extensions
+- **Development tools**: gdb, valgrind for debugging
+
+## Quick Start
+
+1. **Open in VS Code**: Make sure you have the "Dev Containers" extension installed, then:
+   - Open the llama.cpp folder in VS Code
+   - Press `Ctrl+Shift+P` (or `Cmd+Shift+P` on Mac)
+   - Type "Dev Containers: Reopen in Container"
+   - Select it and wait for the container to build and start
+
+2. **Build the project**:
+   ```bash
+   cmake -B build -DCMAKE_BUILD_TYPE=Release
+   cmake --build build --parallel
+   ```
+
+3. **Test NUMA functionality**:
+   ```bash
+   # Check NUMA topology
+   numactl --hardware
+
+   # Run with specific NUMA settings
+   numactl --cpunodebind=0 --membind=0 ./build/bin/llama-server --model path/to/model.gguf
+   ```
+
+## Available Tools
+
+### System Tools
+- `numactl`: NUMA policy control
+- `hwloc-info`: Hardware locality information
+- `lscpu`: CPU information
+- `ccache`: Compiler cache for faster rebuilds
+
+### Build Configurations
+
+#### Debug Build (default post-create)
+```bash
+cmake -B build -DCMAKE_BUILD_TYPE=Debug
+cmake --build build --parallel
+```
+
+#### Release Build (optimized)
+```bash
+cmake -B build -DCMAKE_BUILD_TYPE=Release
+cmake --build build --parallel
+```
+
+#### With Additional Options
+```bash
+# Enable OpenBLAS
+cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+
+# Static build
+cmake -B build -DBUILD_SHARED_LIBS=OFF
+
+# Disable CURL if not needed
+cmake -B build -DLLAMA_CURL=OFF
+```
+
+## Testing NUMA Improvements
+
+The container includes tools to test the NUMA improvements:
+
+### NUMA Topology Detection
+```bash
+# Check current NUMA configuration
+numactl --show
+
+# Display NUMA hardware topology
+numactl --hardware
+```
+
+### Performance Testing
+```bash
+# Test with default settings (hyperthreading disabled)
+./build/bin/llama-bench -m model.gguf
+
+# Test with hyperthreading
+./build/bin/llama-bench -m model.gguf --cpu-use-hyperthreading
+
+# Test with specific thread count
+./build/bin/llama-bench -m model.gguf --threads 8
+
+# Test with NUMA binding
+numactl --cpunodebind=0 --membind=0 ./build/bin/llama-bench -m model.gguf
+
+# Test with NUMA mirroring of model weights
+./build/bin/llama-bench -m model.gguf --numa mirror
+```
+
+### Environment Variables
+```bash
+# Disable hyperthreading via environment
+LLAMA_CPU_NO_HYPERTHREADING=1 ./build/bin/llama-server --model model.gguf
+
+# Disable efficiency cores
+LLAMA_CPU_NO_EFFICIENCY_CORES=1 ./build/bin/llama-server --model model.gguf
+```
+
+## Development Workflow
+
+1. **Code changes**: Edit files in VS Code with full IntelliSense support
+2. **Build**: Use `Ctrl+Shift+P` → "CMake: Build" or terminal commands
+3. **Debug**: Set breakpoints and use the integrated debugger
+4. **Test**: Run executables directly or through the testing framework
+
+## Troubleshooting
+
+### Container Build Issues
+- Ensure Docker Desktop is running
+- Try rebuilding: `Ctrl+Shift+P` → "Dev Containers: Rebuild Container"
+
+### NUMA Issues
+- Check if running on a NUMA system: `numactl --hardware`
+- Verify CPU topology detection: `lscpu` and `hwloc-info`
+- Test CPU affinity: `taskset -c 0-3 ./your-program`
+
+### Build Issues
+- Clear build cache: `rm -rf build && cmake -B build`
+- Check ccache stats: `ccache -s`
+- Use verbose build: `cmake --build build --verbose`