File size: 1,732 Bytes
f51bb92
 
 
 
 
f2beb6a
 
ce9ef3e
f51bb92
 
f2daaee
e029e22
f51bb92
6158da4
b83cc65
f0018f2
f51bb92
 
 
 
 
 
 
 
e5cd1d3
 
 
6158da4
e029e22
6d056d5
db6b619
e029e22
 
6158da4
e029e22
f2daaee
e029e22
f2daaee
 
7f989d6
f2daaee
f51bb92
6158da4
 
 
 
 
b83cc65
 
6158da4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
log_dir: '../storage/logs' # str
log_chunk_dir: '../storage/logs/chunks' # str
device: 'cpu' # str [cuda, cpu]

vectorstore:
  load_from_HF: True # bool
  HF_path: "XThomasBU/Colbert_Index" # str
  embedd_files: False # bool
  data_path: '../storage/data' # str
  url_file_path: '../storage/data/urls.txt' # str
  expand_urls: True # bool
  db_option : 'FAISS' # str [FAISS, Chroma, RAGatouille, RAPTOR]
  db_path : '../vectorstores' # str
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
  search_top_k : 3 # int
  score_threshold : 0.2 # float

  faiss_params: # Not used as of now
    index_path: '../vectorstores/faiss.index' # str
    index_type: 'Flat' # str [Flat, HNSW, IVF]
    index_dimension: 384 # int
    index_nlist: 100 # int
    index_nprobe: 10 # int

  colbert_params:
    index_name: "new_idx" # str

llm_params: 
  llm_arch: 'langchain' # [langchain, langgraph_agentic]
  use_history: True # bool
  memory_window: 3 # int
  llm_style: 'Normal' # str [Normal, ELI5, Socratic]
  llm_loader: 'gpt-3.5-turbo-1106' # str [local_llm, gpt-3.5-turbo-1106, gpt-4]
  openai_params:
    temperature: 0.7 # float
  local_llm_params:
    temperature: 0.7 # float

chat_logging:
  log_chat: False # bool
  platform: 'literalai'

splitter_options:
  use_splitter: True # bool
  split_by_token : True # bool
  remove_leftover_delimiters: True # bool
  remove_chunks: False # bool
  chunk_size : 300 # int
  chunk_overlap : 30 # int
  chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
  front_chunks_to_remove : null # int or None
  last_chunks_to_remove : null # int or None
  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings