From bae4932d12319c7cb5c5d39b42e4ed327a0f95f6 Mon Sep 17 00:00:00 2001 From: Martin Rattensberger Date: Sat, 23 Nov 2024 00:38:11 +0100 Subject: [PATCH] funktionierender Prototyp --- filesearch.py | 104 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 35 deletions(-) diff --git a/filesearch.py b/filesearch.py index ef71aa5..3451342 100644 --- a/filesearch.py +++ b/filesearch.py @@ -1,70 +1,104 @@ import os from openai import OpenAI -from tkinter import simpledialog, Tk, Label, Entry, Button, Text, filedialog +from tkinter import Tk, Label, Entry, Button, Text, filedialog + from dotenv import load_dotenv -def get_user_input(): - root = None # No parent window for simplicity - search_query = simpledialog.askstring("Input", "Enter search query:", parent=root) - return search_query +load_dotenv() def list_files(directory): files = [] for root, dirs, filenames in os.walk(directory): - if not any(d.startswith('.') for d in dirs): - for filename in filenames: + # Versteckte Verzeichnisse ausschließen + dirs[:] = [d for d in dirs if not d.startswith('.')] + for filename in filenames: + if filename.endswith(('.docx', '.pdf')): file_path = os.path.join(root, filename) - if filename.endswith(('.docx', '.pdf')): - files.append(file_path) - return files + files.append(file_path) + return files def compare_with_openai(content, search_query): client = OpenAI( - api_key=os.environ.get("OPENAI_API_KEY") + api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted ) - chat_completion = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": f"Vergleiche den folgenden Text mit der Suchanfrage '{search_query}':\n\n{content[:1000]}...\n\nIst der Text relevant für die Suchanfrage?"}, - ], - model="gpt-4o-mini", - ) - return "ja" in chat_completion.choices[0].message.content.lower() + try: + completion = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "user", + "content": f"Vergleiche den folgenden Text mit der Suchanfrage '{search_query}':\n\n{content[:1000]}...\n\nIst der Text relevant für die Suchanfrage? Antworte mit 'Ja' oder 'Nein'." + }, + ], + max_tokens=5, + temperature=0.0, + ) + answer = completion.choices[0].message.content.strip().lower() + return "ja" in answer + except Exception as e: + print(f"Fehler bei der OpenAI-API-Anfrage: {e}") + return False + +def read_file_content(file_path): + if file_path.endswith('.docx'): + try: + import docx + doc = docx.Document(file_path) + fullText = [] + for para in doc.paragraphs: + fullText.append(para.text) + return '\n'.join(fullText) + except Exception as e: + print(f"Fehler beim Lesen der DOCX-Datei {file_path}: {e}") + return '' + elif file_path.endswith('.pdf'): + try: + import PyPDF2 + with open(file_path, 'rb') as pdf_file: + reader = PyPDF2.PdfReader(pdf_file) + text = '' + for page in reader.pages: + page_text = page.extract_text() + if page_text: + text += page_text + return text + except Exception as e: + print(f"Fehler beim Lesen der PDF-Datei {file_path}: {e}") + return '' + else: + return '' + def search_files(): - search_query = query_entry.get() # Get the search query from the Entry widget + search_query = query_entry.get() if not search_query: - print("No search query provided.") + print("Keine Suchanfrage eingegeben.") return - directory = filedialog.askdirectory() # Auswahldialog für das Verzeichnis + directory = filedialog.askdirectory() if not directory: - print("No directory selected.") + print("Kein Verzeichnis ausgewählt.") return if not os.path.exists(directory): - print("Directory does not exist.") + print("Verzeichnis existiert nicht.") return files = list_files(directory) found_files = [] for file_path in files: - try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: - file_content = file.read() - if compare_with_openai(file_content, search_query): - found_files.append(file_path) - except Exception as e: - print(f"Error reading {file_path}: {e}") + file_content = read_file_content(file_path) + if file_content: + if compare_with_openai(file_content, search_query): + found_files.append(file_path) result_text.delete(1.0, 'end') if found_files: - result_text.insert('end', "Files containing the search query:\n") + result_text.insert('end', "Dateien relevant für die Suchanfrage:\n") for file in found_files: result_text.insert('end', file + '\n') else: - result_text.insert('end', "No files containing the search query found.") + result_text.insert('end', "Keine relevanten Dateien gefunden.") # GUI erstellen root = Tk() @@ -85,4 +119,4 @@ search_button.pack(pady=10) result_text = Text(root, height=15, width=70) result_text.pack(pady=10) -root.mainloop() \ No newline at end of file +root.mainloop()