diff --git a/.gitignore b/.gitignore index afe8238..9d83d26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ -thread-killer.py -log.txt +log.md .env # Byte-compiled / optimized / DLL files diff --git a/app.py b/app.py index a9f7a94..e65d7b1 100644 --- a/app.py +++ b/app.py @@ -1,8 +1,11 @@ -from flask import Flask, render_template, request -from main import get_auto_transcript, get_video_id, create_and_stream +from flask import Flask, render_template, Response, request +from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter +from asyncio import sleep from datetime import datetime -import pytz +import threading, pytz + + app = Flask(__name__, static_folder="website/static", template_folder="website") @app.route('/') @@ -11,25 +14,49 @@ def home(): @app.route('/process_url', methods=['POST']) def process_url(): - # Opens a file to log the video id and the assistants respone to see if I can further improve instructions: - log = open("log.txt", "at", 1) + global thread + log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n") url = request.form['url'] - + log(f"URL: {url}\n") # Extract the video ID from the URL video_id = get_video_id(url) # Modify this function to accept the URL if not video_id: + log(f"Could not parse video id from URL: {url}") return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)" + log(f"Video ID: {video_id}\n\n") # Get the transcript for that video ID transcript = get_auto_transcript(video_id) if (not transcript): + log("## Error: could not retrieve transcript, Assistant won't be called.") return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." - # Process the transcript and stream the result. - response = create_and_stream(transcript) - log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") - # Return a response - return response # Add more detailed output if needed + thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string. + log("Stream preperation complete, sending reply...\n\n") + return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) # Add more detailed output if needed -if __name__ == '__main__': # Change this line to properly check for main +@app.route('/stream_output') +def stream_output(): + def yoink(): + log("
\nStarting stream thread...\n\n") + thread.start() + # Start streaming output from output_stream + log("Starting to stream output.") + while not output_stream.done: + if output_stream.buffer != []: + delta = output_stream.buffer.pop(0) + yield bytes(delta, encoding="utf-8") + else: + awaiter(sleep(0.05)) + log(f"\nStream successfully completely.\n\n
\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n") + output_stream.reset() + thread.join() + log("\n### Task completed sucessfully without errors!") + return + return Response(yoink(), content_type='text/plain', status=200) + + + + +if __name__ == '__main__': app.run(debug=True) \ No newline at end of file diff --git a/log.md b/log.md new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py index 71a7eda..c4c8ead 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,3 @@ - # To parse video ids import re @@ -10,8 +9,43 @@ from youtube_transcript_api.formatters import TextFormatter # OpenAI API stuff import from openai import AssistantEventHandler from openai import OpenAI -# For streaming + +### For streaming from typing_extensions import override +import asyncio +awaiter = asyncio.run + +# The StreamOutput class to handle streaming +class StreamOutput: + + def __init__(self): + self.delta: str = "" + self.response: str = "" + self.done: bool = False + self.buffer: list = [] + + def reset(self): + self.delta = "" + self.response = "" + self.done = False + self.buffer: list = [] + + async def send_delta(self, delta): + self.delta = delta + self.response += delta + def get_index(list): + if len(list) == 0: + return 0 + else: + return len(list)-1 + if self.buffer != []: + try: + if self.delta != self.buffer[get_index(self.buffer)]: + self.buffer.append(delta) + except IndexError as index_error: + log(f"\nCaught IndexError: {str(index_error)}") + self.buffer.append(delta) + else: self.buffer.append(delta) # To get the env var from dotenv import load_dotenv @@ -19,31 +53,15 @@ import os load_dotenv() -### OpenAI Config +# For logging +import pytz +from datetime import datetime -# This is copy and pasted straight up from the quickstart guide: -class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - print(f"\nassistant > ", end="", flush=True) - - @override - def on_text_delta(self, delta, snapshot): - print(delta.value, end="", flush=True) - - def on_tool_call_created(self, tool_call): - print(f"\nassistant > {tool_call.type}\n", flush=True) - - def on_tool_call_delta(self, delta, snapshot): - if delta.type == 'code_interpreter': - if delta.code_interpreter.input: - print(delta.code_interpreter.input, end="", flush=True) - if delta.code_interpreter.outputs: - print(f"\n\noutput >", flush=True) - for output in delta.code_interpreter.outputs: - if output.type == "logs": - print(f"\n{output.logs}", flush=True) - +def log(str): + with open("log.md", "at") as file: + file.write(str) + +### OpenAI Config # Setting up OpenAI Client with API Key api_key = os.getenv("OPENAI_API_KEY") @@ -56,22 +74,29 @@ client = OpenAI( # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" -# uhh no we need a new thread each time tf -# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early +# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: +class EventHandler(AssistantEventHandler): + @override + def on_text_created(self, text) -> None: + awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) + + @override + def on_text_delta(self, delta, snapshot): + awaiter(output_stream.send_delta(delta.value)) + + def on_tool_call_created(self, tool_call): + raise Exception("Assistant shouldn't be calling tools.") + def create_and_stream(transcript): with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages" : [ - {"role": "user", - "content": transcript} - ] - }, - event_handler=EventHandler() + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() ) as stream: - stream.until_done() - messages = stream.get_final_messages() - return messages[0].content[0].text.value + stream.until_done() + output_stream.done = True def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' @@ -82,7 +107,7 @@ def get_video_id(url): id = re.search(youtube_com, url) if not id: - print("Couldn't parse video ID from URL") + # Couldn't parse video ID from URL return None return id.group(1) @@ -92,7 +117,8 @@ def get_auto_transcript(video_id): trans_api_errors = youtube_transcript_api._errors try: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) - except trans_api_errors.TranscriptsDisabled: + except trans_api_errors.TranscriptsDisabled as e: + log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') return None formatter = TextFormatter() # Ensure that you create an instance of TextFormatter @@ -100,12 +126,4 @@ def get_auto_transcript(video_id): txt_transcript = formatter.format_transcript(transcript) return txt_transcript - -# Stores the video id imputted by the user -""" -video_id = get_video_id() - -transcript = get_auto_transcript(video_id) - -create_and_stream(transcript) -""" \ No newline at end of file +output_stream = StreamOutput() \ No newline at end of file diff --git a/website/static/script.js b/website/static/script.js index 474c14c..d28aab1 100644 --- a/website/static/script.js +++ b/website/static/script.js @@ -1,17 +1,18 @@ - document.addEventListener("DOMContentLoaded", (event) => { - document.getElementById('submit').addEventListener('click', function() { + const response_area = document.getElementById('response-area'); + const submit_button = document.getElementById('submit') + submit_button.addEventListener('click', function() { var url = document.getElementById('url_box').value; - const response_area = document.getElementById('response-area'); if (!url) { response_area.innerText = 'Please enter a URL.'; return; } - else{ - response_area.innerText = "Sending URL and retriving transcript." + else { + document.getElementById('url_box').value = ""; } - + + // First, process the URL fetch('/process_url', { method: 'POST', headers: { @@ -19,13 +20,61 @@ document.addEventListener("DOMContentLoaded", (event) => { }, body: new URLSearchParams({ url: url }) }) - .then(response => response.text()) - .then(data => { - response_area.innerText = data; + .then(response => { + if (!response.ok) { + throw new Error('Network response was not ok'); + } + // Extract the text from the response body + return response.text(); // Use .json() if the response is JSON + }) + .then(text => { + submit_button.style.display = "none"; + if (text === "Processing started. Check /stream_output for updates.") { + streamOutput(response_area); + } else { + response_area.innerText = text; // Show any other response message + submit_button.style.display = "flex"; + } }) .catch(error => { - console.error('Error:', error); - response_area.innerText = 'An error occurred. Please try again.'; + console.error('Error processing URL:', error); + response_area.innerText = 'Error processing URL: ' + error.message; + submit_button.style.display = "flex"; }); }); }); + +function streamOutput(response_area) { + // Fetch the streaming output + const streamResponsePromise = fetch('/stream_output'); + response_area.innerHTML = "" + + streamResponsePromise + .then(response => { + const reader = response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + + function readStream() { + reader.read().then(({ done, value }) => { + if(done) { + document.getElementById('submit').style.display = "flex"; + return + } + // Decode and process the chunk + const chunk = decoder.decode(value, { stream: true }); + response_area.innerHTML += chunk; + response_area.scrollTop = response_area.scrollHeight + + // Continue reading + readStream(); + }); + } + + // Start reading the stream + readStream(); + }) + .catch(error => { + console.error('Error fetching stream:', error); + response_area.innerText = 'Error fetching stream: ' + error.message; + }); +} diff --git a/website/static/style.css b/website/static/style.css index afc363b..eacb116 100644 --- a/website/static/style.css +++ b/website/static/style.css @@ -40,9 +40,10 @@ body .content { display: block; height: 90%; min-height: 90vh; - overflow: auto; + text-wrap: wrap; flex-wrap: wrap; align-content: flex-end; + overflow-y: auto; } .form_box {