qdrant vector database for rag.py, with custom collections for experts? #355

cocobeach · 2024-04-04T15:03:11Z

cocobeach
Apr 4, 2024

I was looking into qdrand, it's free it's local and it's easy to install and run with docker from what I gather the rag.py could look something like that but it needs to be integrated in teh actions, and that wouldrequire the dev to look into it:

from flask import Flask, request, jsonify
from langchain.document_loaders import PyPDFLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.models import VectorEntry, Distance

app = Flask(name)

Configuration

MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
QDRANT_URL = "localhost:6333"
COLLECTION_NAME = "pdf_data"

Initialize Qdrant client

client = QdrantClient(url=QDRANT_URL)

Create Qdrant vector store

client.recreate_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorEntry(distance=Distance.COSINE),
)

Create embeddings

embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)

@app.route("/embed-pdf", methods=["POST"])
def embed_pdf():
# Get the PDF file from the request
pdf_file = request.files.get("pdf_file")

# Load the PDF file
loader = PyPDFLoader(pdf_file)
data = loader.load()

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)

# Create embeddings and store in Qdrant
text_chunks = [{"text": doc.page_content, "embedding": embeddings.embed_query(doc.page_content)} for doc in texts]
client.upsert(
    collection_name=COLLECTION_NAME,
    points=text_chunks,
)

return jsonify({"message": "PDF embeddings created successfully"})

@app.route("/embed-text", methods=["POST"])
def embed_text():
# Get the text from the request
text = request.form.get("text")

# Load the text
loader = UnstructuredFileLoader("temp.txt", encoding="utf-8")
loader.write(text)
data = loader.load()

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)

# Create embeddings and store in Qdrant
text_chunks = [{"text": doc.page_content, "embedding": embeddings.embed_query(doc.page_content)} for doc in texts]
client.upsert(
    collection_name=COLLECTION_NAME,
    points=text_chunks,
)

return jsonify({"message": "Text embeddings created successfully"})

@app.route("/search", methods=["POST"])
def search():
query = request.form.get("query")
query_embedding = embeddings.embed_query(query)
hits = client.search(
collection_name=COLLECTION_NAME,
query_vector=query_embedding,
limit=5,
)
results = [{"score": hit.score, "text": hit.payload["text"]} for hit in hits]
return jsonify({"results": results})

if name == "main":
app.run(debug=True, port=5000)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

qdrant vector database for rag.py, with custom collections for experts? #355

{{title}}

Replies: 0 comments

Select a reply

qdrant vector database for rag.py, with custom collections for experts? #355

cocobeach Apr 4, 2024

Configuration

Initialize Qdrant client

Create Qdrant vector store

Create embeddings

Replies: 0 comments

cocobeach
Apr 4, 2024