helm/h2ogpt-chart/values.yaml

nameOverride: ""
fullnameOverride: ""
namespaceOverride: ""

h2ogpt:
  enabled: true
  stack:
    # -- Run h2oGPT and vLLM on same pod.
    enabled: false 
  replicaCount: 1
  imagePullSecrets: 
  image:
    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
    tag: 
    pullPolicy: IfNotPresent
  initImage:
    repository:
    tag:
    pullPolicy:

  # extra volumes, for more certs, mount under /etc/ssl/more-certs
  extraVolumes: []
  extraVolumeMounts: []

  podAffinity:
  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
  # hostname:
  # zone:

  storage:
    size: 128Gi
    class: 
    useEphemeral: true
  
  externalLLM:
    enabled: false
    secret:

    modelLock:

    openAIAzure:
      enabled: false

    openAI:
      enabled: False

    replicate: 
      enabled: false
  
  visionModels:
    enabled: false
    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model
    # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
    visibleModels: []
    rotateAlignResizeImage: false

# -- Example configs to use when not using Model Lock and External LLM
  # overrideConfig:
  #   base_model: h2oai/h2ogpt-4096-llama2-7b-chat
  #   use_safetensors: True
  #   prompt_type: llama2
  #   save_dir: /workspace/save/
  #   use_gpu_id: False
  #   score_model: None
  #   max_max_new_tokens: 2048
  #   max_new_tokens: 1024

  overrideConfig:
    visible_login_tab: False
    visible_system_tab: False
    visible_models_tab: False
    visible_hosts_tab: False
    # change below to valid vision model or remove this entry
    #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']"
    rotate_align_resize_image: False
    concurrency_count: 100
    top_k_docs_max_show: 100
    num_async: 10
    # change below to valid directory or remove this entry
    #save_dir: "/docker_logs"
    score_model: "None"
    enable_tts: False
    enable_stt: False
    enable_transcriptions: False
    embedding_gpu_id: "cpu"
    hf_embedding_model: "fake"
    openai_server: True
    share: False
    enforce_h2ogpt_api_key: True
    enforce_h2ogpt_ui_key: False
    # change to something secure for ui access to backend
    #h2ogpt_api_keys: "['api_key_change_me']"
    metadata_in_context: ""
    # change or remove if using model hub
    #use_auth_token: "hf_xxxxx"
    # change below to first visible model or remove this entry
    #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']"
    # change so ui or api cannot access without this password
    #admin_pass: "admin_password_change_me"

  service:
    type: NodePort
    webPort: 80
    openaiPort: 5000
    functionPort: 5002
    agentsPort: 5004
    gptPort: 8888
    webServiceAnnotations: {}

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 
    runAsGroup: 
    fsGroup: 

  securityContext:
    runAsNonRoot: true
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - ALL
    seccompProfile:
      type: RuntimeDefault

  resources:
  nodeSelector:
  tolerations:

  env: {}

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

tgi:
  enabled: false
  replicaCount: 1

  image:
    repository: ghcr.io/huggingface/text-generation-inference
    tag: 0.9.3
    pullPolicy: IfNotPresent

  podAffinity:
    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
    # hostname:
    # zone:

  storage:
    size: 512Gi
    class: 
    useEphemeral: true
  
  overrideConfig:
  hfSecret:
  containerArgs:

  service:
    type: ClusterIP
    port: 8080

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
  securityContext:

  resources:
  nodeSelector:
  tolerations:

  env: {}

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

vllm:
  enabled: false
  replicaCount: 1

  image:
    repository: vllm/vllm-openai
    tag: latest
    pullPolicy: IfNotPresent

  podAffinity:
    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
    # hostname:
    # zone:

  imagePullSecrets:

  storage:
    size: 512Gi
    class: 
    useEphemeral: true
  
  overrideConfig:

  containerArgs:
    - "--model"
    - h2oai/h2ogpt-4096-llama2-7b-chat
    - "--tokenizer"
    - hf-internal-testing/llama-tokenizer
    - "--tensor-parallel-size"
    - 2
    - "--seed"
    - 1234
    - "--trust-remote-code"

  service:
    type: ClusterIP
    port: 5000

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 
    runAsGroup: 
    fsGroup: 

  securityContext:
    runAsNonRoot: true
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - ALL
    seccompProfile:

  env:
    VLLM_NO_USAGE_STATS: "1"
    DO_NOT_TRACK: "1"

  resources:

  nodeSelector:

  tolerations:

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

lmdeploy:
  enabled: false
  replicaCount: 1

  image:
    repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy
    tag:
    pullPolicy: IfNotPresent

  podAffinity:
    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
    # hostname:
    # zone:

  storage:
    size: 512Gi
    class:
    useEphemeral: true

  overrideConfig:
  hfSecret:
  containerArgs:
    - "OpenGVLab/InternVL-Chat-V1-5"

  service:
    type: ClusterIP
    port: 23333

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
  securityContext:

  resources:
  nodeSelector:
  tolerations:

  env: {}

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

# -- CA certs
caCertificates: ""