funktionierender Prototyp
This commit is contained in:
102
filesearch.py
102
filesearch.py
@@ -1,70 +1,104 @@
|
|||||||
import os
|
import os
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from tkinter import simpledialog, Tk, Label, Entry, Button, Text, filedialog
|
from tkinter import Tk, Label, Entry, Button, Text, filedialog
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
def get_user_input():
|
load_dotenv()
|
||||||
root = None # No parent window for simplicity
|
|
||||||
search_query = simpledialog.askstring("Input", "Enter search query:", parent=root)
|
|
||||||
return search_query
|
|
||||||
|
|
||||||
def list_files(directory):
|
def list_files(directory):
|
||||||
files = []
|
files = []
|
||||||
for root, dirs, filenames in os.walk(directory):
|
for root, dirs, filenames in os.walk(directory):
|
||||||
if not any(d.startswith('.') for d in dirs):
|
# Versteckte Verzeichnisse ausschließen
|
||||||
for filename in filenames:
|
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
||||||
|
for filename in filenames:
|
||||||
|
if filename.endswith(('.docx', '.pdf')):
|
||||||
file_path = os.path.join(root, filename)
|
file_path = os.path.join(root, filename)
|
||||||
if filename.endswith(('.docx', '.pdf')):
|
files.append(file_path)
|
||||||
files.append(file_path)
|
return files
|
||||||
return files
|
|
||||||
|
|
||||||
def compare_with_openai(content, search_query):
|
def compare_with_openai(content, search_query):
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=os.environ.get("OPENAI_API_KEY")
|
api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
|
||||||
)
|
)
|
||||||
chat_completion = client.chat.completions.create(
|
try:
|
||||||
messages=[
|
completion = client.chat.completions.create(
|
||||||
{
|
model="gpt-4o-mini",
|
||||||
"role": "user",
|
messages=[
|
||||||
"content": f"Vergleiche den folgenden Text mit der Suchanfrage '{search_query}':\n\n{content[:1000]}...\n\nIst der Text relevant für die Suchanfrage?"},
|
{
|
||||||
],
|
"role": "user",
|
||||||
model="gpt-4o-mini",
|
"content": f"Vergleiche den folgenden Text mit der Suchanfrage '{search_query}':\n\n{content[:1000]}...\n\nIst der Text relevant für die Suchanfrage? Antworte mit 'Ja' oder 'Nein'."
|
||||||
)
|
},
|
||||||
return "ja" in chat_completion.choices[0].message.content.lower()
|
],
|
||||||
|
max_tokens=5,
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
answer = completion.choices[0].message.content.strip().lower()
|
||||||
|
return "ja" in answer
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler bei der OpenAI-API-Anfrage: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def read_file_content(file_path):
|
||||||
|
if file_path.endswith('.docx'):
|
||||||
|
try:
|
||||||
|
import docx
|
||||||
|
doc = docx.Document(file_path)
|
||||||
|
fullText = []
|
||||||
|
for para in doc.paragraphs:
|
||||||
|
fullText.append(para.text)
|
||||||
|
return '\n'.join(fullText)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler beim Lesen der DOCX-Datei {file_path}: {e}")
|
||||||
|
return ''
|
||||||
|
elif file_path.endswith('.pdf'):
|
||||||
|
try:
|
||||||
|
import PyPDF2
|
||||||
|
with open(file_path, 'rb') as pdf_file:
|
||||||
|
reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
text = ''
|
||||||
|
for page in reader.pages:
|
||||||
|
page_text = page.extract_text()
|
||||||
|
if page_text:
|
||||||
|
text += page_text
|
||||||
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler beim Lesen der PDF-Datei {file_path}: {e}")
|
||||||
|
return ''
|
||||||
|
else:
|
||||||
|
return ''
|
||||||
|
|
||||||
def search_files():
|
def search_files():
|
||||||
search_query = query_entry.get() # Get the search query from the Entry widget
|
search_query = query_entry.get()
|
||||||
if not search_query:
|
if not search_query:
|
||||||
print("No search query provided.")
|
print("Keine Suchanfrage eingegeben.")
|
||||||
return
|
return
|
||||||
|
|
||||||
directory = filedialog.askdirectory() # Auswahldialog für das Verzeichnis
|
directory = filedialog.askdirectory()
|
||||||
if not directory:
|
if not directory:
|
||||||
print("No directory selected.")
|
print("Kein Verzeichnis ausgewählt.")
|
||||||
return
|
return
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
print("Directory does not exist.")
|
print("Verzeichnis existiert nicht.")
|
||||||
return
|
return
|
||||||
|
|
||||||
files = list_files(directory)
|
files = list_files(directory)
|
||||||
found_files = []
|
found_files = []
|
||||||
|
|
||||||
for file_path in files:
|
for file_path in files:
|
||||||
try:
|
file_content = read_file_content(file_path)
|
||||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
|
if file_content:
|
||||||
file_content = file.read()
|
if compare_with_openai(file_content, search_query):
|
||||||
if compare_with_openai(file_content, search_query):
|
found_files.append(file_path)
|
||||||
found_files.append(file_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading {file_path}: {e}")
|
|
||||||
|
|
||||||
result_text.delete(1.0, 'end')
|
result_text.delete(1.0, 'end')
|
||||||
if found_files:
|
if found_files:
|
||||||
result_text.insert('end', "Files containing the search query:\n")
|
result_text.insert('end', "Dateien relevant für die Suchanfrage:\n")
|
||||||
for file in found_files:
|
for file in found_files:
|
||||||
result_text.insert('end', file + '\n')
|
result_text.insert('end', file + '\n')
|
||||||
else:
|
else:
|
||||||
result_text.insert('end', "No files containing the search query found.")
|
result_text.insert('end', "Keine relevanten Dateien gefunden.")
|
||||||
|
|
||||||
# GUI erstellen
|
# GUI erstellen
|
||||||
root = Tk()
|
root = Tk()
|
||||||
|
|||||||
Reference in New Issue
Block a user