configparser.ini

[tokens]
; Enter your API keys here.
; E.g., OPENAI_API_KEY = sk-xxxxxxx
OPENAI_API_KEY = sk-
ANTHROPIC_API_KEY = sk-
VERSION = 1.0.0


[directory]
; Directory for source files.
DOCS_DIR = ./data/ 
; Directory to store embeddings and Langchain documents.
EMB_DIR = database_store 


; The below parameters are optional to modify:
; --------------------------------------------
[parameters]
; Supported models: llm_chat_gpt3, llm_chat_gpt4, or llm_chat_anthropic.
LLM_MODEL = llm_chat_gpt3
; Maximum tokens for storing chat history (varies based on your LLM token limit).
MAX_CHAT_HISTORY = 1200
; Maximum tokens for LLM context for retrieved information.
MAX_LLM_CONTEXT = 1800
; Maximum tokens for LLM generation.
MAX_LLM_GENERATION = 1000
; Supported embeddings: openAIEmbeddings or hkunlpInstructorLarge (to convert to Vectorstore, please do not use special symbols in EMBEDDING_NAME.).
EMBEDDING_NAME = openAIEmbeddings

; The base (small) chunk size for first stage document retrieval.
BASE_CHUNK_SIZE = 50
; Set to 0 for no overlap.
CHUNK_OVERLAP = 0
; The final retrieval (medium) chunk size will be BASE_CHUNK_SIZE * CHUNK_SCALE.
CHUNK_SCALE = 3
WINDOW_STEPS = 3
; The # tokens of window chunk will be BASE_CHUNK_SIZE * WINDOW_SCALE.
WINDOW_SCALE = 18

; Ratio of BM25 retriever to Chroma Vectorstore retriever.
RETRIEVER_WEIGHTS = 0.5, 0.5
; Number of retrieved chunks will range from FIRST_RETRIEVAL_K to 2*FIRST_RETRIEVAL_K due to the ensemble retriever.
FIRST_RETRIEVAL_K = 3
; Number of retrieved chunks will range from SECOND_RETRIEVAL_K to 2*SECOND_RETRIEVAL_K due to the ensemble retriever.
SECOND_RETRIEVAL_K = 2
; Number of windows (large chunks) for the third retriever.
NUM_WINDOWS = 2
; (The third retrieval gets the final chunks passed to the LLM QA chain. The 'k' value is dynamic (based on MAX_LLM_CONTEXT), depending on the number of rephrased questions and retrieved documents.)


[logging]
; If you do not want to enable logging, set enabled to False.
enabled = True
level = INFO
filename = IncarnaMind.log
format = %(asctime)s [%(levelname)s] %(name)s: %(message)s