From 5601e8a874da79fd254b94b499a4320f6e56cb13 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Sat, 5 Oct 2024 16:55:53 -0400 Subject: [PATCH 1/6] Streaming Update (NEEDS OPTIMIZATIONS AND FAILSAFES!!!!!!!!!!!!!!!!!!!!!!) --- app.py | 37 ++++++++++++++++++++++++++++++------ main.py | 8 +++++++- screw-bardo.code-workspace | 10 ++++++++++ website/static/script.js | 39 +++++++++++++++++++++++++++++--------- 4 files changed, 78 insertions(+), 16 deletions(-) create mode 100644 screw-bardo.code-workspace diff --git a/app.py b/app.py index a9f7a94..c3497d9 100644 --- a/app.py +++ b/app.py @@ -1,18 +1,43 @@ -from flask import Flask, render_template, request -from main import get_auto_transcript, get_video_id, create_and_stream +from flask import Flask, render_template, Response, request +from main import get_auto_transcript, get_video_id, create_and_stream, EventHandler from datetime import datetime +import sys +import io import pytz +import time app = Flask(__name__, static_folder="website/static", template_folder="website") +class StreamToLogger(io.StringIO): + def __init__(self): + super().__init__() + + def write(self, message): + # I could probably log stuff here + print(message, end='') # Print to standard output (console) + # You could also log this message or handle it differently. + + @app.route('/') def home(): return render_template('index.html') +@app.route('/streamtest', methods=['POST']) +def streaming(): + def generate(): + for i in range(10): + yield f"Data chunk {i}\n" + time.sleep(1) # Simulating a delay in data generation + + return Response(generate(), content_type='text/plain') + @app.route('/process_url', methods=['POST']) def process_url(): + old_stdout = sys.stdout + new_stdout = StreamToLogger() + sys.stdout = new_stdout # Opens a file to log the video id and the assistants respone to see if I can further improve instructions: - log = open("log.txt", "at", 1) + #log = open("log.txt", "at", 1) url = request.form['url'] # Extract the video ID from the URL @@ -26,10 +51,10 @@ def process_url(): return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." # Process the transcript and stream the result. - response = create_and_stream(transcript) - log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") + # response = create_and_stream(transcript) + # log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") # Return a response - return response # Add more detailed output if needed + return Response(create_and_stream(transcript), content_type="text/plain", status=200, direct_passthrough=True) # Add more detailed output if needed if __name__ == '__main__': # Change this line to properly check for main app.run(debug=True) \ No newline at end of file diff --git a/main.py b/main.py index 71a7eda..cd59220 100644 --- a/main.py +++ b/main.py @@ -26,23 +26,29 @@ class EventHandler(AssistantEventHandler): @override def on_text_created(self, text) -> None: print(f"\nassistant > ", end="", flush=True) + @override def on_text_delta(self, delta, snapshot): print(delta.value, end="", flush=True) + def on_tool_call_created(self, tool_call): print(f"\nassistant > {tool_call.type}\n", flush=True) + def on_tool_call_delta(self, delta, snapshot): if delta.type == 'code_interpreter': if delta.code_interpreter.input: print(delta.code_interpreter.input, end="", flush=True) + if delta.code_interpreter.outputs: print(f"\n\noutput >", flush=True) + for output in delta.code_interpreter.outputs: if output.type == "logs": print(f"\n{output.logs}", flush=True) + # Setting up OpenAI Client with API Key @@ -69,7 +75,7 @@ def create_and_stream(transcript): }, event_handler=EventHandler() ) as stream: - stream.until_done() + stream.until_done() messages = stream.get_final_messages() return messages[0].content[0].text.value diff --git a/screw-bardo.code-workspace b/screw-bardo.code-workspace new file mode 100644 index 0000000..79a7cdb --- /dev/null +++ b/screw-bardo.code-workspace @@ -0,0 +1,10 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "html.format.enable": true + } +} \ No newline at end of file diff --git a/website/static/script.js b/website/static/script.js index 474c14c..e35feb3 100644 --- a/website/static/script.js +++ b/website/static/script.js @@ -8,10 +8,6 @@ document.addEventListener("DOMContentLoaded", (event) => { response_area.innerText = 'Please enter a URL.'; return; } - else{ - response_area.innerText = "Sending URL and retriving transcript." - } - fetch('/process_url', { method: 'POST', headers: { @@ -19,13 +15,38 @@ document.addEventListener("DOMContentLoaded", (event) => { }, body: new URLSearchParams({ url: url }) }) - .then(response => response.text()) - .then(data => { - response_area.innerText = data; + .then(response => { + const reader = response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + + function readStream() { + reader.read().then(({ done, value }) => { + if (done) { + console.log("Stream finished."); + return; + } + + // Decode and process the chunk + const chunk = decoder.decode(value, { stream: true }); + + // Split the received chunk by new line to handle multiple lines (if any) + chunk.split('\n').forEach(data => { + if (data.trim()) { // Avoid empty strings + // Update the inner HTML of the output div + response_area.innerHTML += `

${data}

`; + } + }); + + // Continue reading + readStream(); + }); + } + + // Start reading the stream + readStream(); }) .catch(error => { - console.error('Error:', error); - response_area.innerText = 'An error occurred. Please try again.'; + console.error('Error fetching stream:', error); }); }); }); From 3ea5681f86b061965a8e1f571197102fec3de4e8 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Sat, 5 Oct 2024 16:57:03 -0400 Subject: [PATCH 2/6] Streaming Update (NEEDS OPTIMIZATIONS AND FAILSAFES!!!!!!!!!!!!!!!!!!!!!!) --- .gitignore | 1 - app.py | 40 ++++++++++++--------- main.py | 77 ++++++++++++++++------------------------ website/index.html | 2 +- website/static/script.js | 38 +++++++++++++------- 5 files changed, 81 insertions(+), 77 deletions(-) diff --git a/.gitignore b/.gitignore index afe8238..87d114f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -thread-killer.py log.txt .env diff --git a/app.py b/app.py index c3497d9..1553329 100644 --- a/app.py +++ b/app.py @@ -1,23 +1,12 @@ from flask import Flask, render_template, Response, request -from main import get_auto_transcript, get_video_id, create_and_stream, EventHandler +from main import get_auto_transcript, get_video_id, create_and_stream, output_buffer, output_lock from datetime import datetime -import sys -import io +import threading import pytz import time app = Flask(__name__, static_folder="website/static", template_folder="website") -class StreamToLogger(io.StringIO): - def __init__(self): - super().__init__() - - def write(self, message): - # I could probably log stuff here - print(message, end='') # Print to standard output (console) - # You could also log this message or handle it differently. - - @app.route('/') def home(): return render_template('index.html') @@ -33,9 +22,6 @@ def streaming(): @app.route('/process_url', methods=['POST']) def process_url(): - old_stdout = sys.stdout - new_stdout = StreamToLogger() - sys.stdout = new_stdout # Opens a file to log the video id and the assistants respone to see if I can further improve instructions: #log = open("log.txt", "at", 1) url = request.form['url'] @@ -50,11 +36,31 @@ def process_url(): if (not transcript): return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." + + # Start processing in a separate thread + threading.Thread(target=create_and_stream, args=(transcript,)).start() + # Process the transcript and stream the result. # response = create_and_stream(transcript) # log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") # Return a response - return Response(create_and_stream(transcript), content_type="text/plain", status=200, direct_passthrough=True) # Add more detailed output if needed + return Response("Processing started. Check /stream_output for updates.", status=200) # Add more detailed output if needed + +@app.route('/stream_output') +def stream_output(): + def yoink(): + while True: + with output_lock: + if output_buffer: + message = output_buffer.pop(0) + yield f"{message}" + + time.sleep(0.005) # Adjust as necessary for your application + + return Response(yoink(), content_type='text/plain') + + + if __name__ == '__main__': # Change this line to properly check for main app.run(debug=True) \ No newline at end of file diff --git a/main.py b/main.py index cd59220..f97dd3c 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,12 @@ from openai import OpenAI # For streaming from typing_extensions import override +import threading + +# Output buffer and thread lock +output_buffer = [] +output_lock = threading.Lock() + # To get the env var from dotenv import load_dotenv import os @@ -21,36 +27,6 @@ load_dotenv() ### OpenAI Config -# This is copy and pasted straight up from the quickstart guide: -class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - print(f"\nassistant > ", end="", flush=True) - - - @override - def on_text_delta(self, delta, snapshot): - print(delta.value, end="", flush=True) - - - def on_tool_call_created(self, tool_call): - print(f"\nassistant > {tool_call.type}\n", flush=True) - - - def on_tool_call_delta(self, delta, snapshot): - if delta.type == 'code_interpreter': - if delta.code_interpreter.input: - print(delta.code_interpreter.input, end="", flush=True) - - if delta.code_interpreter.outputs: - print(f"\n\noutput >", flush=True) - - for output in delta.code_interpreter.outputs: - if output.type == "logs": - print(f"\n{output.logs}", flush=True) - - - # Setting up OpenAI Client with API Key api_key = os.getenv("OPENAI_API_KEY") client = OpenAI( @@ -62,23 +38,32 @@ client = OpenAI( # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" -# uhh no we need a new thread each time tf -# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early -def create_and_stream(transcript): - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages" : [ - {"role": "user", - "content": transcript} - ] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - messages = stream.get_final_messages() - return messages[0].content[0].text.value +# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: +class EventHandler(AssistantEventHandler): + @override + def on_text_created(self, text) -> None: + with output_lock: + output_buffer.append(f"\nassistant > {text}") + + @override + def on_text_delta(self, delta, snapshot): + with output_lock: + output_buffer.append(delta.value) + def on_tool_call_created(self, tool_call): + with output_lock: + output_buffer.append(f"\nassistant > {tool_call.type}\n") + +def create_and_stream(transcript): + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' diff --git a/website/index.html b/website/index.html index 24cc215..f283176 100644 --- a/website/index.html +++ b/website/index.html @@ -17,7 +17,7 @@
-

Response will appear here.

+
Response will appear here.        
diff --git a/website/static/script.js b/website/static/script.js index e35feb3..633ae33 100644 --- a/website/static/script.js +++ b/website/static/script.js @@ -1,13 +1,14 @@ - document.addEventListener("DOMContentLoaded", (event) => { + const response_area = document.getElementById('response-area'); document.getElementById('submit').addEventListener('click', function() { var url = document.getElementById('url_box').value; - const response_area = document.getElementById('response-area'); if (!url) { response_area.innerText = 'Please enter a URL.'; return; } + + // First, process the URL fetch('/process_url', { method: 'POST', headers: { @@ -15,6 +16,26 @@ document.addEventListener("DOMContentLoaded", (event) => { }, body: new URLSearchParams({ url: url }) }) + .then(response => { + if (!response.ok) { + throw new Error('Network response was not ok'); + } + // Start streaming once processing is started + streamOutput(response_area); + }) + .catch(error => { + console.error('Error processing URL:', error); + response_area.innerText = 'Error processing URL: ' + error.message; + }); + }); +}); + +function streamOutput(response_area) { + // Fetch the streaming output + const streamResponsePromise = fetch('/stream_output'); + response_area.innerHTML = "" + + streamResponsePromise .then(response => { const reader = response.body.getReader(); const decoder = new TextDecoder("utf-8"); @@ -28,14 +49,7 @@ document.addEventListener("DOMContentLoaded", (event) => { // Decode and process the chunk const chunk = decoder.decode(value, { stream: true }); - - // Split the received chunk by new line to handle multiple lines (if any) - chunk.split('\n').forEach(data => { - if (data.trim()) { // Avoid empty strings - // Update the inner HTML of the output div - response_area.innerHTML += `

${data}

`; - } - }); + response_area.innerHTML += chunk; // Continue reading readStream(); @@ -47,6 +61,6 @@ document.addEventListener("DOMContentLoaded", (event) => { }) .catch(error => { console.error('Error fetching stream:', error); + response_area.innerText = 'Error fetching stream: ' + error.message; }); - }); -}); +} From 05b7c247a7088636186ddc13cff82b0153f3b4cf Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 15 Oct 2024 14:26:59 -0400 Subject: [PATCH 3/6] =?UTF-8?q?streaming=20actually=20works=20=F0=9F=A4=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 52 +++++++++++----------- main.py | 94 +++++++++++++++++++++++++++------------- website/static/script.js | 12 ++--- website/static/style.css | 4 +- 4 files changed, 97 insertions(+), 65 deletions(-) diff --git a/app.py b/app.py index 1553329..02ffb50 100644 --- a/app.py +++ b/app.py @@ -1,30 +1,28 @@ from flask import Flask, render_template, Response, request -from main import get_auto_transcript, get_video_id, create_and_stream, output_buffer, output_lock +from main import get_auto_transcript, get_video_id, create_and_stream, output_stream, fake_stream, awaiter +from asyncio import sleep from datetime import datetime import threading import pytz import time + + app = Flask(__name__, static_folder="website/static", template_folder="website") @app.route('/') def home(): return render_template('index.html') -@app.route('/streamtest', methods=['POST']) -def streaming(): - def generate(): - for i in range(10): - yield f"Data chunk {i}\n" - time.sleep(1) # Simulating a delay in data generation - - return Response(generate(), content_type='text/plain') - @app.route('/process_url', methods=['POST']) def process_url(): # Opens a file to log the video id and the assistants respone to see if I can further improve instructions: #log = open("log.txt", "at", 1) url = request.form['url'] + if url == "test": + global thread + thread = threading.Thread(name="test_thread", target=fake_stream) + return Response("teehee", status=200) # Extract the video ID from the URL video_id = get_video_id(url) # Modify this function to accept the URL @@ -36,31 +34,31 @@ def process_url(): if (not transcript): return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." + thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string. - # Start processing in a separate thread - threading.Thread(target=create_and_stream, args=(transcript,)).start() - - # Process the transcript and stream the result. - # response = create_and_stream(transcript) - # log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") - # Return a response return Response("Processing started. Check /stream_output for updates.", status=200) # Add more detailed output if needed @app.route('/stream_output') def stream_output(): def yoink(): - while True: - with output_lock: - if output_buffer: - message = output_buffer.pop(0) - yield f"{message}" - - time.sleep(0.005) # Adjust as necessary for your application - - return Response(yoink(), content_type='text/plain') + print("Starting stream thread.") + thread.start() + # Start streaming output from output_stream + print("Starting to stream output...") + most_recent = "" + while not output_stream.done: + if output_stream.buffer != []: + delta = output_stream.buffer.pop(0) + yield bytes(delta, encoding="utf-8") + else: + awaiter(sleep(0.05)) + output_stream.reset() + thread.join() + return + return Response(yoink(), content_type='text/plain', status=200) -if __name__ == '__main__': # Change this line to properly check for main +if __name__ == '__main__': app.run(debug=True) \ No newline at end of file diff --git a/main.py b/main.py index f97dd3c..6c7e477 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,3 @@ - # To parse video ids import re @@ -10,14 +9,45 @@ from youtube_transcript_api.formatters import TextFormatter # OpenAI API stuff import from openai import AssistantEventHandler from openai import OpenAI -# For streaming + +### For streaming from typing_extensions import override +import asyncio +awaiter = asyncio.run -import threading - -# Output buffer and thread lock -output_buffer = [] -output_lock = threading.Lock() +# The StreamOutput class to handle streaming +class StreamOutput: + + def __init__(self): + self.delta: str = "" + self.response: str = "" + self.done: bool = False + self.buffer: list = [] + + def reset(self): + self.delta = "" + self.response = "" + self.done = False + self.buffer: list = [] + print("Reset stream output obj") + + async def send_delta(self, delta): + self.delta = delta + self.response += delta + def get_index(list): + if len(list) == 0: + return 0 + else: + return len(list)-1 + if self.buffer != []: + try: + if self.delta != self.buffer[get_index(self.buffer)]: + self.buffer.append(delta) + except IndexError as index_error: + print(index_error) + self.buffer.append(delta) + + else: self.buffer.append(delta) # To get the env var from dotenv import load_dotenv @@ -25,6 +55,7 @@ import os load_dotenv() + ### OpenAI Config # Setting up OpenAI Client with API Key @@ -42,28 +73,37 @@ asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" class EventHandler(AssistantEventHandler): @override def on_text_created(self, text) -> None: - with output_lock: - output_buffer.append(f"\nassistant > {text}") + awaiter(output_stream.send_delta("Response Recieved:\nScrew-Bardo: ")) @override def on_text_delta(self, delta, snapshot): - with output_lock: - output_buffer.append(delta.value) + awaiter(output_stream.send_delta(delta.value)) def on_tool_call_created(self, tool_call): - with output_lock: - output_buffer.append(f"\nassistant > {tool_call.type}\n") + raise Exception("Assistant shouldn't be calling tools.") def create_and_stream(transcript): - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages": [{"role": "user", "content": transcript}] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + output_stream.done = True + +def fake_stream(): + i = 0 + STREAM = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18] + print("Starting fake stream.") + while i <= len(STREAM)-1: + awaiter(asyncio.sleep(0.05)) + awaiter(output_stream.send_delta(str(STREAM[i]))) + i += 1 + output_stream.done = True + return + def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' @@ -91,12 +131,4 @@ def get_auto_transcript(video_id): txt_transcript = formatter.format_transcript(transcript) return txt_transcript - -# Stores the video id imputted by the user -""" -video_id = get_video_id() - -transcript = get_auto_transcript(video_id) - -create_and_stream(transcript) -""" \ No newline at end of file +output_stream = StreamOutput() \ No newline at end of file diff --git a/website/static/script.js b/website/static/script.js index 633ae33..28a9479 100644 --- a/website/static/script.js +++ b/website/static/script.js @@ -1,6 +1,7 @@ document.addEventListener("DOMContentLoaded", (event) => { const response_area = document.getElementById('response-area'); - document.getElementById('submit').addEventListener('click', function() { + const submit_button = document.getElementById('submit') + submit_button.addEventListener('click', function() { var url = document.getElementById('url_box').value; if (!url) { @@ -21,11 +22,13 @@ document.addEventListener("DOMContentLoaded", (event) => { throw new Error('Network response was not ok'); } // Start streaming once processing is started + submit_button.style.display = "none"; streamOutput(response_area); }) .catch(error => { console.error('Error processing URL:', error); response_area.innerText = 'Error processing URL: ' + error.message; + submit_button.style.display = "flex"; }); }); }); @@ -42,11 +45,10 @@ function streamOutput(response_area) { function readStream() { reader.read().then(({ done, value }) => { - if (done) { - console.log("Stream finished."); - return; + if(done) { + document.getElementById('submit').style.display = "flex"; + return } - // Decode and process the chunk const chunk = decoder.decode(value, { stream: true }); response_area.innerHTML += chunk; diff --git a/website/static/style.css b/website/static/style.css index afc363b..04c2e74 100644 --- a/website/static/style.css +++ b/website/static/style.css @@ -37,10 +37,10 @@ body .content { } #response-area { - display: block; + display: flex; height: 90%; min-height: 90vh; - overflow: auto; + text-wrap: wrap; flex-wrap: wrap; align-content: flex-end; } From d66fe2155c54e7248b479b83b559ce1297502cf0 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 15 Oct 2024 20:45:01 -0400 Subject: [PATCH 4/6] Somewhat messy, but ready for production. --- app.py | 30 +++--- log.md | 195 +++++++++++++++++++++++++++++++++++++++ main.py | 29 +++--- website/static/script.js | 18 +++- website/static/style.css | 3 +- 5 files changed, 238 insertions(+), 37 deletions(-) create mode 100644 log.md diff --git a/app.py b/app.py index 02ffb50..e65d7b1 100644 --- a/app.py +++ b/app.py @@ -1,10 +1,8 @@ from flask import Flask, render_template, Response, request -from main import get_auto_transcript, get_video_id, create_and_stream, output_stream, fake_stream, awaiter +from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter from asyncio import sleep from datetime import datetime -import threading -import pytz -import time +import threading, pytz @@ -16,44 +14,44 @@ def home(): @app.route('/process_url', methods=['POST']) def process_url(): - # Opens a file to log the video id and the assistants respone to see if I can further improve instructions: - #log = open("log.txt", "at", 1) + global thread + log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n") url = request.form['url'] - if url == "test": - global thread - thread = threading.Thread(name="test_thread", target=fake_stream) - return Response("teehee", status=200) - + log(f"URL: {url}\n") # Extract the video ID from the URL video_id = get_video_id(url) # Modify this function to accept the URL if not video_id: + log(f"Could not parse video id from URL: {url}") return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)" + log(f"Video ID: {video_id}\n\n") # Get the transcript for that video ID transcript = get_auto_transcript(video_id) if (not transcript): + log("## Error: could not retrieve transcript, Assistant won't be called.") return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string. - - return Response("Processing started. Check /stream_output for updates.", status=200) # Add more detailed output if needed + log("Stream preperation complete, sending reply...\n\n") + return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) # Add more detailed output if needed @app.route('/stream_output') def stream_output(): def yoink(): - print("Starting stream thread.") + log("
\nStarting stream thread...\n\n") thread.start() # Start streaming output from output_stream - print("Starting to stream output...") - most_recent = "" + log("Starting to stream output.") while not output_stream.done: if output_stream.buffer != []: delta = output_stream.buffer.pop(0) yield bytes(delta, encoding="utf-8") else: awaiter(sleep(0.05)) + log(f"\nStream successfully completely.\n\n
\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n") output_stream.reset() thread.join() + log("\n### Task completed sucessfully without errors!") return return Response(yoink(), content_type='text/plain', status=200) diff --git a/log.md b/log.md new file mode 100644 index 0000000..348d788 --- /dev/null +++ b/log.md @@ -0,0 +1,195 @@ + + + +## New Entry at 2024-10-15 20:39:11 + +URL: https://www.youtube.com/watch?v=dQw4w9WgXcQ +Video ID: dQw4w9WgXcQ + +Stream preperation complete, sending reply... + +
+Starting stream thread... + +Starting to stream output. +Stream successfully completely. + +
+ +--- + +### Completed Assistant Response: +Response Recieved: + +Screw-Bardo: + +It appears that the transcript provided contains lyrics from a popular song rather than a social studies lecture. Given this, I will proceed with the task of taking notes and generating questions based on what is present in the text, even though it does not relate to social studies. + +**Notes:** +- Love + - Involves rules and commitments + - Emphasizes feelings and understanding +- Relationship + - Characters have known each other for a long time + - Heartache without expression + - Mutual understanding of feelings +- Commitment + - Pledges never to give up, let down, or desert + - Promises made to avoid causing pain or sadness + +**Questions:** + +Q: What is the central theme of the lyrics? +A: The central theme is love and commitment. + +Q: What has been shared between the characters? +A: They have known each other for a long time. + +Q: What emotions are expressed in the relationship? +A: There is heartache and mutual understanding. + +Q: What promises are made in the lyrics? +A: Promises are made to never give up, let down, or desert the other. + +Q: How do the characters feel about their current situation? +A: They feel a need to express unspoken feelings. + +--- + + +### Task completed sucessfully without errors! + + +## New Entry at 2024-10-15 20:39:30 + +URL: https://www.youtube.com/watch?v=ivBcfQDtMhQ +Video ID: ivBcfQDtMhQ + +Stream preperation complete, sending reply... + +
+Starting stream thread... + +Starting to stream output. +Stream successfully completely. + +
+ +--- + +### Completed Assistant Response: +Response Recieved: + +Screw-Bardo: + +### Notes + +- **U.S. Response to World War II** + - Initially isolated and neutral + - Disillusionment from World War I + - 260,000 American deaths + - No territorial or colonial gain + - Lack of belief in the League of Nations + - Did not join or sign Treaty of Versailles + - War profiteering concerns + - General Gerald Nye + - Formed the Nye Committee to investigate war profiteering + - International debts not paid + - Countries not repaying war debts to the U.S. + - Geographical isolation + - Distance from Europe (3,000 miles away) + - Domestic issues + - Focus on Great Depression recovery + - Neutrality Acts (1935, 1936, 1937) + - Made it illegal to choose sides in foreign conflicts + - Included arms embargo against warring nations + - **FDR's Role** + - Circumvented Neutrality Acts with Cash and Carry + - Saw war as an opportunity to lift the U.S. out of the Great Depression + - Restricted foreign travel for Americans + - Response to sinking of the Lusitania + +- **America First Committee** + - Promoted by Charles Lindbergh + - Advocated for prioritizing American interests over European affairs + +- **Neutrality Renouncement (1937)** + - FDR began military buildup + - First peacetime draft (Selective Service Act of 1940) + - Ages 21 to 35 + - Aimed to create an army of several million + +- **Aid to Allies** + - Lend-Lease Act (1941) + - U.S. supported Allies by supplying war materials + - Atlantic Charter (1941) + - Agreement between Roosevelt and Churchill outlining war aims + +- **Industrial Mobilization** + - Massive increase in production (e.g., arms and materials) + - 1940: 6,000 planes + - 1945: 60,000 planes + - Weapons production increased significantly + - Women's contribution to the workforce + - Example: Rosie the Riveter + +- **Government Agencies** + - War Production Board + - Restricted civilian goods production + - Office of Price Administration (OPA) + - Set prices and rents + - Office of War Information + - Censored information to control public messaging + - War financing through bonds and taxes + - 49 billion dollars in bonds by 1945 + +- **Labor Relations** + - AFL and CIO agreements to avoid strikes + - Wage increases followed by freezes + - Managing strikes, like coal strike in 1943 + +### Questions and Answers + +Q: Why did the U.S. initially maintain an isolationist stance during World War II? +A: The U.S. was disillusioned from World War I and focused on domestic economic recovery. + +Q: What were the Neutrality Acts? +A: Laws that made it illegal for the U.S. to choose sides in foreign conflicts or export arms to warring nations. + +Q: Who was Charles Lindbergh, and what did he advocate for? +A: A prominent leader of the America First Committee who promoted prioritizing American interests over involvement in Europe. + +Q: What was the Lend-Lease Act? +A: A U.S. program that supplied Allied nations with war materials during World War II. + +Q: How did the U.S. government finance the war effort? +A: Through the sale of war bonds and increased taxes. + +--- + + +### Task completed sucessfully without errors! + + +## New Entry at 2024-10-15 20:40:02 + +URL: https://www.youtube.com/watch?v=ivBc5QDtMhQ +Video ID: ivBc5QDtMhQ + + + +# Exception while fetching transcript: + + +Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ivBc5QDtMhQ! This is most likely caused by: + +Subtitles are disabled for this video + +If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem! +## Error: could not retrieve transcript, Assistant won't be called. + + +## New Entry at 2024-10-15 20:43:26 + +URL: https://www.youtube.com/watch?v=ivBfQDtMhQ +Could not parse video id from URL: https://www.youtube.com/watch?v=ivBfQDtMhQ \ No newline at end of file diff --git a/main.py b/main.py index 6c7e477..c4c8ead 100644 --- a/main.py +++ b/main.py @@ -29,7 +29,6 @@ class StreamOutput: self.response = "" self.done = False self.buffer: list = [] - print("Reset stream output obj") async def send_delta(self, delta): self.delta = delta @@ -44,9 +43,8 @@ class StreamOutput: if self.delta != self.buffer[get_index(self.buffer)]: self.buffer.append(delta) except IndexError as index_error: - print(index_error) + log(f"\nCaught IndexError: {str(index_error)}") self.buffer.append(delta) - else: self.buffer.append(delta) # To get the env var @@ -55,6 +53,13 @@ import os load_dotenv() +# For logging +import pytz +from datetime import datetime + +def log(str): + with open("log.md", "at") as file: + file.write(str) ### OpenAI Config @@ -73,7 +78,7 @@ asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" class EventHandler(AssistantEventHandler): @override def on_text_created(self, text) -> None: - awaiter(output_stream.send_delta("Response Recieved:\nScrew-Bardo: ")) + awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) @override def on_text_delta(self, delta, snapshot): @@ -93,17 +98,6 @@ def create_and_stream(transcript): stream.until_done() output_stream.done = True -def fake_stream(): - i = 0 - STREAM = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18] - print("Starting fake stream.") - while i <= len(STREAM)-1: - awaiter(asyncio.sleep(0.05)) - awaiter(output_stream.send_delta(str(STREAM[i]))) - i += 1 - output_stream.done = True - return - def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' @@ -113,7 +107,7 @@ def get_video_id(url): id = re.search(youtube_com, url) if not id: - print("Couldn't parse video ID from URL") + # Couldn't parse video ID from URL return None return id.group(1) @@ -123,7 +117,8 @@ def get_auto_transcript(video_id): trans_api_errors = youtube_transcript_api._errors try: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) - except trans_api_errors.TranscriptsDisabled: + except trans_api_errors.TranscriptsDisabled as e: + log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') return None formatter = TextFormatter() # Ensure that you create an instance of TextFormatter diff --git a/website/static/script.js b/website/static/script.js index 28a9479..d28aab1 100644 --- a/website/static/script.js +++ b/website/static/script.js @@ -8,6 +8,9 @@ document.addEventListener("DOMContentLoaded", (event) => { response_area.innerText = 'Please enter a URL.'; return; } + else { + document.getElementById('url_box').value = ""; + } // First, process the URL fetch('/process_url', { @@ -21,9 +24,17 @@ document.addEventListener("DOMContentLoaded", (event) => { if (!response.ok) { throw new Error('Network response was not ok'); } - // Start streaming once processing is started - submit_button.style.display = "none"; - streamOutput(response_area); + // Extract the text from the response body + return response.text(); // Use .json() if the response is JSON + }) + .then(text => { + submit_button.style.display = "none"; + if (text === "Processing started. Check /stream_output for updates.") { + streamOutput(response_area); + } else { + response_area.innerText = text; // Show any other response message + submit_button.style.display = "flex"; + } }) .catch(error => { console.error('Error processing URL:', error); @@ -52,6 +63,7 @@ function streamOutput(response_area) { // Decode and process the chunk const chunk = decoder.decode(value, { stream: true }); response_area.innerHTML += chunk; + response_area.scrollTop = response_area.scrollHeight // Continue reading readStream(); diff --git a/website/static/style.css b/website/static/style.css index 04c2e74..eacb116 100644 --- a/website/static/style.css +++ b/website/static/style.css @@ -37,12 +37,13 @@ body .content { } #response-area { - display: flex; + display: block; height: 90%; min-height: 90vh; text-wrap: wrap; flex-wrap: wrap; align-content: flex-end; + overflow-y: auto; } .form_box { From 3687f5af728a2c011936b62e3d1d14cef34990c2 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 15 Oct 2024 20:45:58 -0400 Subject: [PATCH 5/6] why was this ever here --- screw-bardo.code-workspace | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 screw-bardo.code-workspace diff --git a/screw-bardo.code-workspace b/screw-bardo.code-workspace deleted file mode 100644 index 79a7cdb..0000000 --- a/screw-bardo.code-workspace +++ /dev/null @@ -1,10 +0,0 @@ -{ - "folders": [ - { - "path": "." - } - ], - "settings": { - "html.format.enable": true - } -} \ No newline at end of file From 4bc7722162709f2ed4c96fdc0a9459af05c3fb77 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 15 Oct 2024 20:48:03 -0400 Subject: [PATCH 6/6] okay I'm sobbing I forgot to do the not python thigns --- .gitignore | 2 +- log.md | 195 ----------------------------------------------------- 2 files changed, 1 insertion(+), 196 deletions(-) diff --git a/.gitignore b/.gitignore index 87d114f..9d83d26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -log.txt +log.md .env # Byte-compiled / optimized / DLL files diff --git a/log.md b/log.md index 348d788..e69de29 100644 --- a/log.md +++ b/log.md @@ -1,195 +0,0 @@ - - - -## New Entry at 2024-10-15 20:39:11 - -URL: https://www.youtube.com/watch?v=dQw4w9WgXcQ -Video ID: dQw4w9WgXcQ - -Stream preperation complete, sending reply... - -
-Starting stream thread... - -Starting to stream output. -Stream successfully completely. - -
- ---- - -### Completed Assistant Response: -Response Recieved: - -Screw-Bardo: - -It appears that the transcript provided contains lyrics from a popular song rather than a social studies lecture. Given this, I will proceed with the task of taking notes and generating questions based on what is present in the text, even though it does not relate to social studies. - -**Notes:** -- Love - - Involves rules and commitments - - Emphasizes feelings and understanding -- Relationship - - Characters have known each other for a long time - - Heartache without expression - - Mutual understanding of feelings -- Commitment - - Pledges never to give up, let down, or desert - - Promises made to avoid causing pain or sadness - -**Questions:** - -Q: What is the central theme of the lyrics? -A: The central theme is love and commitment. - -Q: What has been shared between the characters? -A: They have known each other for a long time. - -Q: What emotions are expressed in the relationship? -A: There is heartache and mutual understanding. - -Q: What promises are made in the lyrics? -A: Promises are made to never give up, let down, or desert the other. - -Q: How do the characters feel about their current situation? -A: They feel a need to express unspoken feelings. - ---- - - -### Task completed sucessfully without errors! - - -## New Entry at 2024-10-15 20:39:30 - -URL: https://www.youtube.com/watch?v=ivBcfQDtMhQ -Video ID: ivBcfQDtMhQ - -Stream preperation complete, sending reply... - -
-Starting stream thread... - -Starting to stream output. -Stream successfully completely. - -
- ---- - -### Completed Assistant Response: -Response Recieved: - -Screw-Bardo: - -### Notes - -- **U.S. Response to World War II** - - Initially isolated and neutral - - Disillusionment from World War I - - 260,000 American deaths - - No territorial or colonial gain - - Lack of belief in the League of Nations - - Did not join or sign Treaty of Versailles - - War profiteering concerns - - General Gerald Nye - - Formed the Nye Committee to investigate war profiteering - - International debts not paid - - Countries not repaying war debts to the U.S. - - Geographical isolation - - Distance from Europe (3,000 miles away) - - Domestic issues - - Focus on Great Depression recovery - - Neutrality Acts (1935, 1936, 1937) - - Made it illegal to choose sides in foreign conflicts - - Included arms embargo against warring nations - - **FDR's Role** - - Circumvented Neutrality Acts with Cash and Carry - - Saw war as an opportunity to lift the U.S. out of the Great Depression - - Restricted foreign travel for Americans - - Response to sinking of the Lusitania - -- **America First Committee** - - Promoted by Charles Lindbergh - - Advocated for prioritizing American interests over European affairs - -- **Neutrality Renouncement (1937)** - - FDR began military buildup - - First peacetime draft (Selective Service Act of 1940) - - Ages 21 to 35 - - Aimed to create an army of several million - -- **Aid to Allies** - - Lend-Lease Act (1941) - - U.S. supported Allies by supplying war materials - - Atlantic Charter (1941) - - Agreement between Roosevelt and Churchill outlining war aims - -- **Industrial Mobilization** - - Massive increase in production (e.g., arms and materials) - - 1940: 6,000 planes - - 1945: 60,000 planes - - Weapons production increased significantly - - Women's contribution to the workforce - - Example: Rosie the Riveter - -- **Government Agencies** - - War Production Board - - Restricted civilian goods production - - Office of Price Administration (OPA) - - Set prices and rents - - Office of War Information - - Censored information to control public messaging - - War financing through bonds and taxes - - 49 billion dollars in bonds by 1945 - -- **Labor Relations** - - AFL and CIO agreements to avoid strikes - - Wage increases followed by freezes - - Managing strikes, like coal strike in 1943 - -### Questions and Answers - -Q: Why did the U.S. initially maintain an isolationist stance during World War II? -A: The U.S. was disillusioned from World War I and focused on domestic economic recovery. - -Q: What were the Neutrality Acts? -A: Laws that made it illegal for the U.S. to choose sides in foreign conflicts or export arms to warring nations. - -Q: Who was Charles Lindbergh, and what did he advocate for? -A: A prominent leader of the America First Committee who promoted prioritizing American interests over involvement in Europe. - -Q: What was the Lend-Lease Act? -A: A U.S. program that supplied Allied nations with war materials during World War II. - -Q: How did the U.S. government finance the war effort? -A: Through the sale of war bonds and increased taxes. - ---- - - -### Task completed sucessfully without errors! - - -## New Entry at 2024-10-15 20:40:02 - -URL: https://www.youtube.com/watch?v=ivBc5QDtMhQ -Video ID: ivBc5QDtMhQ - - - -# Exception while fetching transcript: - - -Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ivBc5QDtMhQ! This is most likely caused by: - -Subtitles are disabled for this video - -If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem! -## Error: could not retrieve transcript, Assistant won't be called. - - -## New Entry at 2024-10-15 20:43:26 - -URL: https://www.youtube.com/watch?v=ivBfQDtMhQ -Could not parse video id from URL: https://www.youtube.com/watch?v=ivBfQDtMhQ \ No newline at end of file