forked from unclecode/crawl4ai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_llmtxt.py
49 lines (41 loc) · 1.61 KB
/
test_llmtxt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from crawl4ai.llmtxt import AsyncLLMTextManager # Changed to AsyncLLMTextManager
from crawl4ai.async_logger import AsyncLogger
from pathlib import Path
import asyncio
async def main():
current_file = Path(__file__).resolve()
# base_dir = current_file.parent.parent / "local/_docs/llm.txt/test_docs"
base_dir = current_file.parent.parent / "local/_docs/llm.txt"
docs_dir = base_dir
# Create directory if it doesn't exist
docs_dir.mkdir(parents=True, exist_ok=True)
# Initialize logger
logger = AsyncLogger()
# Updated initialization with default batching params
# manager = AsyncLLMTextManager(docs_dir, logger, max_concurrent_calls=3, batch_size=2)
manager = AsyncLLMTextManager(docs_dir, logger, batch_size=2)
# Let's first check what files we have
print("\nAvailable files:")
for f in docs_dir.glob("*.md"):
print(f"- {f.name}")
# Generate index files
print("\nGenerating index files...")
await manager.generate_index_files(
force_generate_facts=False,
clear_bm25_cache=False
)
# Test some relevant queries about Crawl4AI
test_queries = [
"How is using the `arun_many` method?",
]
print("\nTesting search functionality:")
for query in test_queries:
print(f"\nQuery: {query}")
results = manager.search(query, top_k=2)
print(f"Results length: {len(results)} characters")
if results:
print("First 200 chars of results:", results[:200].replace('\n', ' '), "...")
else:
print("No results found")
if __name__ == "__main__":
asyncio.run(main())