nicht funktionierende version
This commit is contained in:
50
RAG-Demo.py
50
RAG-Demo.py
@@ -3,9 +3,12 @@ import glob
|
|||||||
import lancedb
|
import lancedb
|
||||||
from PyPDF2 import PdfReader
|
from PyPDF2 import PdfReader
|
||||||
import openai
|
import openai
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog
|
||||||
|
from config import OPENAI_API_KEY
|
||||||
|
|
||||||
# Setze deinen OpenAI API-Schlüssel
|
# Setze deinen OpenAI API-Schlüssel
|
||||||
openai.api_key = 'DEIN_OPENAI_API_KEY'
|
openai.api_key = OPENAI_API_KEY
|
||||||
|
|
||||||
def extract_text_from_pdf(file_path):
|
def extract_text_from_pdf(file_path):
|
||||||
reader = PdfReader(file_path)
|
reader = PdfReader(file_path)
|
||||||
@@ -19,14 +22,14 @@ def extract_text_from_pdf(file_path):
|
|||||||
def get_embedding(text):
|
def get_embedding(text):
|
||||||
# OpenAI Embeddings API verwenden
|
# OpenAI Embeddings API verwenden
|
||||||
response = openai.Embedding.create(
|
response = openai.Embedding.create(
|
||||||
input=[text],
|
input=text,
|
||||||
model="text-embedding-ada-002"
|
model="text-embedding-ada-002"
|
||||||
)
|
)
|
||||||
embedding = response['data'][0]['embedding']
|
embedding = response['data'][0]['embedding']
|
||||||
return embedding
|
return embedding
|
||||||
|
|
||||||
def load_pdfs_to_lancedb(directory, db_path):
|
def load_pdfs_to_lancedb(directory, db_path):
|
||||||
# Verbindung zur LanceDB herstellen
|
# Verbindung zur LanceDB herstellen (synchroner Client)
|
||||||
db = lancedb.connect(db_path)
|
db = lancedb.connect(db_path)
|
||||||
table_name = 'pdf_embeddings'
|
table_name = 'pdf_embeddings'
|
||||||
data = []
|
data = []
|
||||||
@@ -47,7 +50,7 @@ def load_pdfs_to_lancedb(directory, db_path):
|
|||||||
table = db.open_table(table_name)
|
table = db.open_table(table_name)
|
||||||
table.add(data)
|
table.add(data)
|
||||||
else:
|
else:
|
||||||
table = db.create_table(table_name, data=data, mode='overwrite')
|
table = db.create_table(table_name, data=data)
|
||||||
|
|
||||||
def semantic_search(query, db_path, top_k=5):
|
def semantic_search(query, db_path, top_k=5):
|
||||||
db = lancedb.connect(db_path)
|
db = lancedb.connect(db_path)
|
||||||
@@ -60,14 +63,37 @@ def semantic_search(query, db_path, top_k=5):
|
|||||||
return file_links
|
return file_links
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Ersetze durch deine tatsächlichen Pfade
|
import sys
|
||||||
pdf_directory = 'pfad_zum_pdf_verzeichnis'
|
import argparse
|
||||||
lancedb_path = 'pfad_zum_lancedb_verzeichnis'
|
|
||||||
|
# Argument Parser für Kommandozeilenargumente
|
||||||
|
parser = argparse.ArgumentParser(description='PDF-Semantische Suche mit LanceDB')
|
||||||
|
parser.add_argument('--db_dir', type=str, help='Pfad zum LanceDB-Verzeichnis', default='lancedb_data')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
lancedb_path = args.db_dir
|
||||||
|
|
||||||
|
# Tkinter GUI initialisieren
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw() # Hauptfenster ausblenden
|
||||||
|
|
||||||
|
# Ordnerauswahl-Dialog öffnen
|
||||||
|
pdf_directory = filedialog.askdirectory(title="Bitte wählen Sie das PDF-Verzeichnis aus")
|
||||||
|
|
||||||
|
# Überprüfen, ob das Verzeichnis existiert
|
||||||
|
if not pdf_directory or not os.path.isdir(pdf_directory):
|
||||||
|
print("Kein gültiges Verzeichnis ausgewählt. Das Programm wird beendet.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
# PDFs in LanceDB laden
|
# PDFs in LanceDB laden
|
||||||
load_pdfs_to_lancedb(pdf_directory, lancedb_path)
|
load_pdfs_to_lancedb(pdf_directory, lancedb_path)
|
||||||
|
|
||||||
# Semantische Suche durchführen
|
# Semantische Suche durchführen
|
||||||
query = input("Gib deine Suchanfrage ein: ")
|
while True:
|
||||||
results = semantic_search(query, lancedb_path)
|
query = input("Gib deine Suchanfrage ein (oder 'exit' zum Beenden): ")
|
||||||
print("Passende Dateien:")
|
if query.lower() == 'exit':
|
||||||
for file_link in results:
|
break
|
||||||
print(file_link)
|
results = semantic_search(query, lancedb_path)
|
||||||
|
print("Passende Dateien:")
|
||||||
|
for file_link in results:
|
||||||
|
print(file_link)
|
||||||
|
|||||||
BIN
__pycache__/config.cpython-312.pyc
Normal file
BIN
__pycache__/config.cpython-312.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user