Skip to content

Commit

Permalink
qdrant embed and store fn
Browse files Browse the repository at this point in the history
  • Loading branch information
nathan0x6C1 committed Mar 29, 2024
1 parent fd69066 commit c08a5d6
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 125 deletions.
3 changes: 2 additions & 1 deletion imessage-exporter/src/exporters/cypher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@ impl<'a> Exporter<'a> for CYPHER<'a> {
}
}


// TODO: could this be not blocking
let _hydrated_message_result = rt.block_on(index_to_cypher(hydrated_message, &self.graph));



// @mentat replace this line with an invocation of async fn store_data(embedding: Vec<f32>, sentence: &str, guid: String) -> anyhow::Result<PointsOperationResponse> { ... }

// // Render the announcement in-line
Expand Down
130 changes: 130 additions & 0 deletions retrieval/src/embed_and_insert_qdrant_curl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use serde::Serialize;
use uuid::Uuid;

#[derive(Serialize)]
struct PointInsert {
id: Uuid,
vector: Vec<f64>,
payload: Payload,
}

#[derive(Serialize)]
struct Payload {
sentence: String,
}

async fn create_collection(test_collection: String) {
let client = reqwest::Client::new();
// Ensure the collection exists by attempting to create it
let collection_creation_response = client.put(format!("http://localhost:6333/collections/{}", test_collection))
.header("Content-Type", "application/json")
.body(r#"{ "vectors": { "size": 768, "distance": "Dot" } }"#)
.send()
.await
.expect("Failed to create or verify collection existence");
println!("Collection creation or verification response: {:?}", collection_creation_response.status());
}
async fn embed_and_store(text_chunk: &str, client: &reqwest::Client, qdrant_url: &str) {
match crate::embed::embed_text_chunk(text_chunk).await {
Ok(vector_embedding) => {
let point_insert = serde_json::json!({
"points": [{
"id": Uuid::new_v4(),
"vector": vector_embedding,
"payload": {
"sentence": text_chunk.to_string(),
}
}]
});
client.put(qdrant_url)
.json(&point_insert)
.send()
.await
.expect("Failed to insert point into Qdrant");
println!("text chunk stored successfully {} ==> dimensions: {}", text_chunk, vector_embedding.len());
},
Err(e) => println!("Failed to embed sentence: {}. Error: {}", text_chunk, e),
}
}

#[cfg(test)]
mod tests {
use super::*;

#[tokio::test]
async fn test_embed_and_store() {
let mut text_chunks: Vec<&str> = Vec::new();
{
text_chunks = vec![
"i really love turtles. And water.",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.",
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.",
"The quick brown fox jumps over the lazy dog.",
"I love the smell of freshly brewed coffee in the morning.",
"The Eiffel Tower is an iconic landmark in Paris, France.",
"Python is a popular high-level programming language.",
"The Universe is full of countless galaxies and stars.",
"Artificial intelligence is transforming various industries.",
"Climate change is one of the biggest challenges facing humanity.",
"The Great Wall of China is a marvel of ancient engineering.",
"Music has the power to evoke emotions and memories.",
"The Internet has revolutionized the way we communicate and access information.",
"Quantum computing has the potential to solve complex problems.",
"The Mona Lisa is a renowned painting by Leonardo da Vinci.",
"Yoga is a practice that combines physical postures, breathing techniques, and meditation.",
"The Amazon rainforest is home to an incredible diversity of flora and fauna.",
"Blockchain technology enables secure and decentralized transactions.",
"The Taj Mahal is a stunning example of Mughal architecture.",
"Virtual reality is creating immersive experiences in gaming and beyond.",
"The Great Barrier Reef is the world's largest coral reef system.",
"Machine learning algorithms can learn from data and make predictions.",
"The Statue of Liberty is a symbol of freedom and democracy.",
"Renewable energy sources like solar and wind power are crucial for a sustainable future.",
"The Louvre Museum in Paris houses an extensive collection of art and artifacts.",
"Mindfulness meditation can help reduce stress and improve well-being.",
"The International Space Station is a collaborative effort among multiple countries.",
"The Pyramids of Giza are ancient wonders that continue to captivate us.",
"Robotics is advancing rapidly, with applications in manufacturing, healthcare, and more.",
"The Great Barrier Reef is facing threats from climate change and ocean acidification.",
"The Hubble Space Telescope has provided stunning images of the cosmos.",
"The Guggenheim Museum in New York is known for its unique architectural design.",
"Augmented reality overlays digital information onto the real world.",
"The Grand Canyon is a natural wonder carved by the Colorado River.",
"Nanotechnology involves manipulating matter at the nanoscale.",
"The Colosseum in Rome is an iconic symbol of ancient Roman architecture.",
"Sustainable agriculture practices aim to minimize environmental impact.",
"The Large Hadron Collider is the world's largest particle accelerator.",
"The Great Barrier Reef supports a rich diversity of marine life.",
"3D printing technology enables the creation of complex objects layer by layer.",
"The Acropolis in Athens is a testament to ancient Greek civilization.",
"Quantum entanglement is a strange phenomenon in quantum mechanics.",
"The Panama Canal is an engineering marvel that connects the Atlantic and Pacific oceans.",
"Bioinformatics combines biology, computer science, and statistics to analyze biological data.",
"The Northern Lights, or Aurora Borealis, are a mesmerizing natural light display.",
"The Terracotta Army in China consists of thousands of life-sized clay soldiers.",
"Artificial neural networks are inspired by the structure and function of the human brain.",
"The Serengeti National Park in Tanzania is known for its annual wildebeest migration.",
"Genetic engineering involves modifying the DNA of organisms.",
"The Golden Gate Bridge is an iconic suspension bridge in San Francisco.",
"The Higgs boson is a fundamental particle in the Standard Model of particle physics.",
"The Galapagos Islands are known for their unique and diverse wildlife.",
"Quantum cryptography uses principles of quantum mechanics for secure communication.",
"The Chichen Itza is an ancient Mayan city in Mexico, known for its impressive pyramids.",
"The Human Genome Project aimed to sequence the entire human genome.",
];
}

let client = reqwest::Client::new();
let _url = "http://localhost:11434/api/embeddings";

let qdrant_url = "http://localhost:6333/collections/test_collection/points?wait=true";

create_collection("test_collection".to_string());

for text_chunk in text_chunks.iter().take(50) {
embed_and_store(text_chunk, &client, qdrant_url).await;
}
}
}
123 changes: 0 additions & 123 deletions retrieval/src/embed_insert_qdrant_curl.rs

This file was deleted.

2 changes: 1 addition & 1 deletion retrieval/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pub mod embed;
mod qdrant_rs_client;
mod embed_insert_qdrant_curl;
mod embed_and_insert_qdrant_curl;

0 comments on commit c08a5d6

Please sign in to comment.