You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I was looking into qdrand, it's free it's local and it's easy to install and run with docker from what I gather the rag.py could look something like that but it needs to be integrated in teh actions, and that wouldrequire the dev to look into it:
from flask import Flask, request, jsonify
from langchain.document_loaders import PyPDFLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.models import VectorEntry, Distance
@app.route("/embed-pdf", methods=["POST"])
def embed_pdf():
# Get the PDF file from the request
pdf_file = request.files.get("pdf_file")
# Load the PDF file
loader = PyPDFLoader(pdf_file)
data = loader.load()
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)
# Create embeddings and store in Qdrant
text_chunks = [{"text": doc.page_content, "embedding": embeddings.embed_query(doc.page_content)} for doc in texts]
client.upsert(
collection_name=COLLECTION_NAME,
points=text_chunks,
)
return jsonify({"message": "PDF embeddings created successfully"})
@app.route("/embed-text", methods=["POST"])
def embed_text():
# Get the text from the request
text = request.form.get("text")
# Load the text
loader = UnstructuredFileLoader("temp.txt", encoding="utf-8")
loader.write(text)
data = loader.load()
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)
# Create embeddings and store in Qdrant
text_chunks = [{"text": doc.page_content, "embedding": embeddings.embed_query(doc.page_content)} for doc in texts]
client.upsert(
collection_name=COLLECTION_NAME,
points=text_chunks,
)
return jsonify({"message": "Text embeddings created successfully"})
@app.route("/search", methods=["POST"])
def search():
query = request.form.get("query")
query_embedding = embeddings.embed_query(query)
hits = client.search(
collection_name=COLLECTION_NAME,
query_vector=query_embedding,
limit=5,
)
results = [{"score": hit.score, "text": hit.payload["text"]} for hit in hits]
return jsonify({"results": results})
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
I was looking into qdrand, it's free it's local and it's easy to install and run with docker from what I gather the rag.py could look something like that but it needs to be integrated in teh actions, and that wouldrequire the dev to look into it:
from flask import Flask, request, jsonify
from langchain.document_loaders import PyPDFLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.models import VectorEntry, Distance
app = Flask(name)
Configuration
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
QDRANT_URL = "localhost:6333"
COLLECTION_NAME = "pdf_data"
Initialize Qdrant client
client = QdrantClient(url=QDRANT_URL)
Create Qdrant vector store
client.recreate_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorEntry(distance=Distance.COSINE),
)
Create embeddings
embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
@app.route("/embed-pdf", methods=["POST"])
def embed_pdf():
# Get the PDF file from the request
pdf_file = request.files.get("pdf_file")
@app.route("/embed-text", methods=["POST"])
def embed_text():
# Get the text from the request
text = request.form.get("text")
@app.route("/search", methods=["POST"])
def search():
query = request.form.get("query")
query_embedding = embeddings.embed_query(query)
hits = client.search(
collection_name=COLLECTION_NAME,
query_vector=query_embedding,
limit=5,
)
results = [{"score": hit.score, "text": hit.payload["text"]} for hit in hits]
return jsonify({"results": results})
if name == "main":
app.run(debug=True, port=5000)
Beta Was this translation helpful? Give feedback.
All reactions