From 0696ed21e977fb378394f323213123cefc091253 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Wed, 20 Nov 2024 15:23:13 -0500 Subject: [PATCH 1/9] accidently removed .vscode from .gitignore at some point... --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9d83d26..606b1bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ log.md .env +.vscode # Byte-compiled / optimized / DLL files __pycache__/ From c363f4c2464a6c8bf9f2979223a07ad0febbfca3 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Wed, 20 Nov 2024 15:50:41 -0500 Subject: [PATCH 2/9] i read PEP conventions --- app/app.py | 68 +++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/app/app.py b/app/app.py index e65d7b1..8bd1ab3 100644 --- a/app/app.py +++ b/app/app.py @@ -1,11 +1,9 @@ from flask import Flask, render_template, Response, request -from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter +from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter from asyncio import sleep from datetime import datetime import threading, pytz - - app = Flask(__name__, static_folder="website/static", template_folder="website") @app.route('/') @@ -14,49 +12,51 @@ def home(): @app.route('/process_url', methods=['POST']) def process_url(): - global thread - log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n") - url = request.form['url'] + url = request.form.get('url', '').strip() + if not url: + log("No URL provided.\n") + return "No URL provided.", 400 + + log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n") log(f"URL: {url}\n") + # Extract the video ID from the URL - video_id = get_video_id(url) # Modify this function to accept the URL + video_id = get_video_id(url) if not video_id: - log(f"Could not parse video id from URL: {url}") - return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)" - log(f"Video ID: {video_id}\n\n") - + log(f"Could not parse video id from URL: {url}\n") + return "Couldn't parse video ID from URL. (Ensure it's a valid YouTube.com or YouTu.be URL.)", 400 + log(f"Video ID: {video_id}\n") + # Get the transcript for that video ID transcript = get_auto_transcript(video_id) - if (not transcript): - log("## Error: could not retrieve transcript, Assistant won't be called.") - return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." - - thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string. - log("Stream preperation complete, sending reply...\n\n") - return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) # Add more detailed output if needed + if not transcript: + log("## Error: Could not retrieve transcript. Assistant won't be called.\n") + return "Parsed video ID, but transcript retrieval failed (might be disabled by the video owner).", 400 + + # Start the stream thread + thread = threading.Thread(target=create_and_stream, args=(transcript,)) + thread.start() + log("Stream preparation complete, sending reply...\n") + return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) @app.route('/stream_output') -def stream_output(): - def yoink(): +def stream_output_route(): + def generate(): log("
\nStarting stream thread...\n\n") - thread.start() + # Start streaming output from output_stream - log("Starting to stream output.") + log("Starting to stream output.\n") while not output_stream.done: - if output_stream.buffer != []: + if output_stream.buffer: delta = output_stream.buffer.pop(0) - yield bytes(delta, encoding="utf-8") + yield delta.encode("utf-8") else: awaiter(sleep(0.05)) - log(f"\nStream successfully completely.\n\n
\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n") + log(f"\nStream completed.\n\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n") output_stream.reset() - thread.join() - log("\n### Task completed sucessfully without errors!") - return - return Response(yoink(), content_type='text/plain', status=200) + log("\n### Task completed successfully without errors!\n") + + return Response(generate(), content_type='text/plain', status=200) - - - -if __name__ == '__main__': - app.run(debug=True) \ No newline at end of file +if __name__ == '__main__': + app.run(debug=True, host='0.0.0.0', port=1986) From cde37492af61ac57709684361e5024b0f67752cc Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Wed, 20 Nov 2024 15:51:21 -0500 Subject: [PATCH 3/9] i forgot to stage these for the last commit rip --- app/main.py | 186 +++++++++++++++++++-------------------------- docker-compose.yml | 2 - 2 files changed, 80 insertions(+), 108 deletions(-) diff --git a/app/main.py b/app/main.py index edce716..c1f5d58 100644 --- a/app/main.py +++ b/app/main.py @@ -1,128 +1,102 @@ -# To parse video ids +import os import re - -# Youtube Transcript stuff import -import youtube_transcript_api._errors -from youtube_transcript_api import YouTubeTranscriptApi +import threading +import pytz +from datetime import datetime +from dotenv import load_dotenv +from youtube_transcript_api import YouTubeTranscriptApi, _errors from youtube_transcript_api.formatters import TextFormatter - -# OpenAI API stuff import -from openai import AssistantEventHandler -from openai import OpenAI - -### For streaming +from openai import AssistantEventHandler, OpenAI from typing_extensions import override import asyncio -awaiter = asyncio.run - -# The StreamOutput class to handle streaming -class StreamOutput: - - def __init__(self): - self.delta: str = "" - self.response: str = "" - self.done: bool = False - self.buffer: list = [] - - def reset(self): - self.delta = "" - self.response = "" - self.done = False - self.buffer: list = [] - - async def send_delta(self, delta): - self.delta = delta - self.response += delta - def get_index(list): - if len(list) == 0: - return 0 - else: - return len(list)-1 - if self.buffer != []: - try: - if self.delta != self.buffer[get_index(self.buffer)]: - self.buffer.append(delta) - except IndexError as index_error: - log(f"\nCaught IndexError: {str(index_error)}") - self.buffer.append(delta) - else: self.buffer.append(delta) - -# To get the env var -from dotenv import load_dotenv -import os +# Load environment variables load_dotenv() # For logging -import pytz -from datetime import datetime +def log(message: str): + timestamp = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S') + with open("logs/log.md", "a") as file: + file.write(f"{timestamp} - {message}\n") -def log(str): - with open("logs/log.md", "at") as file: - file.write(str) +# StreamOutput class to handle streaming +class StreamOutput: + def __init__(self): + self.response = "" + self.done = False + self.buffer = [] + self.lock = threading.Lock() -### OpenAI Config + def reset(self): + with self.lock: + self.response = "" + self.done = False + self.buffer = [] -# Setting up OpenAI Client with API Key + def add_to_buffer(self, delta: str): + with self.lock: + self.response += delta + self.buffer.append(delta) + +output_stream = StreamOutput() + +# OpenAI Client Configuration client = OpenAI( - organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', - project="proj_NGz8Kux8CSka7DRJucAlDCz6", - api_key=os.getenv("OPENAI_API_KEY") + organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', + project="proj_NGz8Kux8CSka7DRJucAlDCz6", + api_key=os.getenv("OPENAI_API_KEY") ) -# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" -# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: -class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) - - @override - def on_text_delta(self, delta, snapshot): - awaiter(output_stream.send_delta(delta.value)) +# Async helper +def awaiter(coro): + asyncio.run(coro) - def on_tool_call_created(self, tool_call): - raise Exception("Assistant shouldn't be calling tools.") +# EventHandler for OpenAI Assistant +class EventHandler(AssistantEventHandler): + @override + def on_text_created(self, text) -> None: + awaiter(output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n")) -def create_and_stream(transcript): - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages": [{"role": "user", "content": transcript}] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - output_stream.done = True + @override + def on_text_delta(self, delta, snapshot): + awaiter(output_stream.send_delta(delta.value)) -def get_video_id(url): - youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' - youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' - - id = re.search(youtu_be, url) - if not id: - id = re.search(youtube_com, url) - - if not id: - # Couldn't parse video ID from URL - return None - - return id.group(1) + def on_tool_call_created(self, tool_call): + raise Exception("Assistant shouldn't be calling tools.") -# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt -def get_auto_transcript(video_id): - trans_api_errors = youtube_transcript_api._errors - try: - transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) - except trans_api_errors.TranscriptsDisabled as e: - log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') +def create_and_stream(transcript: str): + try: + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + output_stream.done = True + except Exception as e: + log(f"Error in create_and_stream: {e}") + output_stream.done = True + +def get_video_id(url: str) -> str: + youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' + youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' + + match = re.search(youtu_be, url) or re.search(youtube_com, url) + if match: + return match.group(1) return None - - formatter = TextFormatter() # Ensure that you create an instance of TextFormatter - txt_transcript = formatter.format_transcript(transcript) - return txt_transcript - -output_stream = StreamOutput() \ No newline at end of file +def get_auto_transcript(video_id: str) -> str: + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) + formatter = TextFormatter() + return formatter.format_transcript(transcript) + except _errors.TranscriptsDisabled as e: + log(f'Exception while fetching transcript: {e}') + except Exception as e: + log(f'Unexpected error while fetching transcript: {e}') + return None \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 403d03b..83defbb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: app: build: . From 613cfabb5ae57a0498e78fd660262e9d1ae04c80 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Sat, 21 Dec 2024 11:47:53 -0500 Subject: [PATCH 4/9] google cloud? --- docker-compose.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 83defbb..c543524 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,9 +3,7 @@ services: build: . container_name: screw-bardo-container ports: - - "1986:1986" - env_file: - - .env + - "$PORT:1986" volumes: - ./logs:/app/logs restart: unless-stopped \ No newline at end of file From 8aae2cea1c7346ac6ef6db6cf80958043c11cfb3 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 7 Jan 2025 16:00:16 -0500 Subject: [PATCH 5/9] idk how an AI could do better front end experience than me but aight. --- app/main.py | 206 ++++++++++++++++++++--------------- app/website/index.html | 26 ++--- app/website/static/script.js | 115 ++++++++++--------- app/website/static/style.css | 122 +++++++++++++-------- 4 files changed, 268 insertions(+), 201 deletions(-) diff --git a/app/main.py b/app/main.py index c1f5d58..1628239 100644 --- a/app/main.py +++ b/app/main.py @@ -1,102 +1,134 @@ -import os +# To parse video ids import re -import threading -import pytz -from datetime import datetime -from dotenv import load_dotenv -from youtube_transcript_api import YouTubeTranscriptApi, _errors + +# Youtube Transcript stuff import +import youtube_transcript_api._errors +from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter -from openai import AssistantEventHandler, OpenAI + +# OpenAI API stuff import +from openai import AssistantEventHandler +from openai import OpenAI + +### For streaming from typing_extensions import override import asyncio +awaiter = asyncio.run + +# The StreamOutput class to handle streaming +class StreamOutput: + + def __init__(self): + self.delta: str = "" + self.response: str = "" + self.done: bool = False + self.buffer: list = [] + + def reset(self): + self.delta = "" + self.response = "" + self.done = False + self.buffer: list = [] + + async def send_delta(self, delta): + self.delta = delta + self.response += delta + def get_index(list): + if len(list) == 0: + return 0 + else: + return len(list)-1 + if self.buffer != []: + try: + if self.delta != self.buffer[get_index(self.buffer)]: + self.buffer.append(delta) + except IndexError as index_error: + log(f"\nCaught IndexError: {str(index_error)}") + self.buffer.append(delta) + else: self.buffer.append(delta) + +# To get the env var +from dotenv import load_dotenv +import os -# Load environment variables load_dotenv() # For logging -def log(message: str): - timestamp = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S') +import pytz +from datetime import datetime + +def log(message): + try: with open("logs/log.md", "a") as file: - file.write(f"{timestamp} - {message}\n") + file.write(message) + except FileNotFoundError: + with open("logs/log.md", "x+"): + log(message) -# StreamOutput class to handle streaming -class StreamOutput: - def __init__(self): - self.response = "" - self.done = False - self.buffer = [] - self.lock = threading.Lock() +### OpenAI Config - def reset(self): - with self.lock: - self.response = "" - self.done = False - self.buffer = [] +# Setting up OpenAI Client with API Key +client = OpenAI( + organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', + project="proj_NGz8Kux8CSka7DRJucAlDCz6", + api_key=os.getenv("OPENAI_API_KEY") +) - def add_to_buffer(self, delta: str): - with self.lock: - self.response += delta - self.buffer.append(delta) +# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript +asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" + +# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: +class EventHandler(AssistantEventHandler): + @override + def on_text_created(self, text) -> None: + awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) + + @override + def on_text_delta(self, delta, snapshot): + awaiter(output_stream.send_delta(delta.value)) + + def on_tool_call_created(self, tool_call): + raise Exception("Assistant shouldn't be calling tools.") + +def create_and_stream(transcript): + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + output_stream.done = True + +def get_video_id(url): + youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' + youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' + + id = re.search(youtu_be, url) + if not id: + id = re.search(youtube_com, url) + + if not id: + # Couldn't parse video ID from URL + return None + + return id.group(1) + +# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt +def get_auto_transcript(video_id): + trans_api_errors = youtube_transcript_api._errors + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) + except trans_api_errors.TranscriptsDisabled as e: + log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') + return None + + formatter = TextFormatter() # Ensure that you create an instance of TextFormatter + + txt_transcript = formatter.format_transcript(transcript) + return txt_transcript output_stream = StreamOutput() -# OpenAI Client Configuration -client = OpenAI( - organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', - project="proj_NGz8Kux8CSka7DRJucAlDCz6", - api_key=os.getenv("OPENAI_API_KEY") -) - -asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" - -# Async helper -def awaiter(coro): - asyncio.run(coro) - -# EventHandler for OpenAI Assistant -class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - awaiter(output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n")) - - @override - def on_text_delta(self, delta, snapshot): - awaiter(output_stream.send_delta(delta.value)) - - def on_tool_call_created(self, tool_call): - raise Exception("Assistant shouldn't be calling tools.") - -def create_and_stream(transcript: str): - try: - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages": [{"role": "user", "content": transcript}] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - output_stream.done = True - except Exception as e: - log(f"Error in create_and_stream: {e}") - output_stream.done = True - -def get_video_id(url: str) -> str: - youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' - youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' - - match = re.search(youtu_be, url) or re.search(youtube_com, url) - if match: - return match.group(1) - return None - -def get_auto_transcript(video_id: str) -> str: - try: - transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) - formatter = TextFormatter() - return formatter.format_transcript(transcript) - except _errors.TranscriptsDisabled as e: - log(f'Exception while fetching transcript: {e}') - except Exception as e: - log(f'Unexpected error while fetching transcript: {e}') - return None \ No newline at end of file +log(f"\n\n# Main initilized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumeably application starting.\n") \ No newline at end of file diff --git a/app/website/index.html b/app/website/index.html index f283176..a03369a 100644 --- a/app/website/index.html +++ b/app/website/index.html @@ -2,29 +2,27 @@ - - Screw You Bardo - - - + +
+
+
Response will appear here.
+
-
-
Response will appear here.        
-
- - -
- - -
+
+
+ + +
+
+
\ No newline at end of file diff --git a/app/website/static/script.js b/app/website/static/script.js index d28aab1..ec2793a 100644 --- a/app/website/static/script.js +++ b/app/website/static/script.js @@ -1,18 +1,24 @@ -document.addEventListener("DOMContentLoaded", (event) => { - const response_area = document.getElementById('response-area'); - const submit_button = document.getElementById('submit') - submit_button.addEventListener('click', function() { - var url = document.getElementById('url_box').value; +document.addEventListener("DOMContentLoaded", () => { + const responseArea = document.getElementById('response-area'); + const submitButton = document.getElementById('submit'); + const urlForm = document.getElementById('url-form'); + const urlBox = document.getElementById('url_box'); + + urlForm.addEventListener('submit', function(event) { + event.preventDefault(); // Prevent form from submitting the traditional way + const url = urlBox.value.trim(); if (!url) { - response_area.innerText = 'Please enter a URL.'; + responseArea.innerText = 'Please enter a URL.'; return; } - else { - document.getElementById('url_box').value = ""; - } - - // First, process the URL + + // Clear the input and update UI + urlBox.value = ""; + submitButton.disabled = true; + responseArea.innerText = 'Processing...'; + + // Process the URL fetch('/process_url', { method: 'POST', headers: { @@ -24,57 +30,58 @@ document.addEventListener("DOMContentLoaded", (event) => { if (!response.ok) { throw new Error('Network response was not ok'); } - // Extract the text from the response body - return response.text(); // Use .json() if the response is JSON + return response.text(); }) .then(text => { - submit_button.style.display = "none"; if (text === "Processing started. Check /stream_output for updates.") { - streamOutput(response_area); + streamOutput(responseArea); } else { - response_area.innerText = text; // Show any other response message - submit_button.style.display = "flex"; + responseArea.innerText = text; + submitButton.disabled = false; } }) .catch(error => { console.error('Error processing URL:', error); - response_area.innerText = 'Error processing URL: ' + error.message; - submit_button.style.display = "flex"; + responseArea.innerText = 'Error processing URL: ' + error.message; + submitButton.disabled = false; }); }); + + function streamOutput(responseArea) { + // Fetch the streaming output + fetch('/stream_output') + .then(response => { + if (!response.ok) { + throw new Error('Network response was not ok'); + } + const reader = response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + + responseArea.innerHTML = ""; + + function readStream() { + reader.read().then(({ done, value }) => { + if (done) { + submitButton.disabled = false; + return; + } + const chunk = decoder.decode(value, { stream: true }); + responseArea.innerHTML += chunk; + responseArea.scrollTop = responseArea.scrollHeight; + readStream(); + }).catch(error => { + console.error('Error reading stream:', error); + responseArea.innerText = 'Error reading stream: ' + error.message; + submitButton.disabled = false; + }); + } + + readStream(); + }) + .catch(error => { + console.error('Error fetching stream:', error); + responseArea.innerText = 'Error fetching stream: ' + error.message; + submitButton.disabled = false; + }); + } }); - -function streamOutput(response_area) { - // Fetch the streaming output - const streamResponsePromise = fetch('/stream_output'); - response_area.innerHTML = "" - - streamResponsePromise - .then(response => { - const reader = response.body.getReader(); - const decoder = new TextDecoder("utf-8"); - - function readStream() { - reader.read().then(({ done, value }) => { - if(done) { - document.getElementById('submit').style.display = "flex"; - return - } - // Decode and process the chunk - const chunk = decoder.decode(value, { stream: true }); - response_area.innerHTML += chunk; - response_area.scrollTop = response_area.scrollHeight - - // Continue reading - readStream(); - }); - } - - // Start reading the stream - readStream(); - }) - .catch(error => { - console.error('Error fetching stream:', error); - response_area.innerText = 'Error fetching stream: ' + error.message; - }); -} diff --git a/app/website/static/style.css b/app/website/static/style.css index eacb116..541c0d4 100644 --- a/app/website/static/style.css +++ b/app/website/static/style.css @@ -1,7 +1,5 @@ - - @font-face { - font-family: 'nimbus_sans_d_otlight'; + font-family: 'NimbusSansD'; src: url('font-files/nimbus-sans-d-ot-light.woff2') format('woff2'), url('font-files/nimbus-sans-d-ot-light.woff') format('woff'); font-weight: normal; @@ -9,70 +7,102 @@ } * { - font-family: 'nimbus_sans_d_otlight'; - color: white; + box-sizing: border-box; + margin: 0; + padding: 0; + font-family: 'NimbusSansD', sans-serif; + color: #FFFFFF; } body { display: flex; - flex-direction: column; - width: 100%; - max-width: 100vw; - height: 100%; - min-height: 100vh; - max-height: 100vh; - margin: 0; - background-color: rgb(31, 31, 31); + justify-content: center; + align-items: center; + height: 100vh; + background-color: #1F1F1F; } -body .content { +.container { display: flex; flex-direction: column; - align-self: center; - width: 75%; - max-width: 65vw; - height: 100%; - min-height: 100vh; - max-height: 100vh; + width: 85vw; + height: 90vh; + background-color: #2E2E2E; + border-radius: 10px; + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); + overflow: hidden; +} + +.response-section { + flex: 1; + padding: 20px; + background-color: #1E1E1E; + overflow-y: auto; + font-size: 1rem; + line-height: 1.5; +} + +.form-section { + padding: 15px 20px; + background-color: #3A3A3A; } #response-area { - display: block; - height: 90%; - min-height: 90vh; - text-wrap: wrap; - flex-wrap: wrap; - align-content: flex-end; - overflow-y: auto; + white-space: pre-wrap; } -.form_box { +#url-form { display: flex; - width: 100%; - justify-content: space-between; - align-content: space-around; + gap: 10px; } #url_box { - display: flex; - height: 5%; - min-height: 5vh; - width: 90%; - min-width: 80vh; - background-color: rgb(31, 31, 31); + flex: 1; + padding: 10px 15px; + border: none; + border-radius: 5px; + background-color: #4A4A4A; + color: #FFFFFF; + font-size: 1rem; + outline: none; +} + +#url_box::placeholder { + color: #B0B0B0; } #submit { - display: flex; - width: 5%; - min-width: 3vw; - background-color: rgb(49, 49, 49); -} -#submit:hover { + padding: 10px 20px; + border: none; + border-radius: 5px; + background-color: #5A5A5A; + color: #FFFFFF; + font-size: 1rem; cursor: pointer; - background-color: rgb(31, 31, 31); + transition: background-color 0.3s ease; } -input { - border-radius: 15px; +#submit:hover { + background-color: #7A7A7A; } + +#submit:disabled { + background-color: #3A3A3A; + cursor: not-allowed; +} + +/* Responsive Adjustments */ +@media (max-width: 600px) { + .container { + height: 95vh; + } + + #url_box { + font-size: 0.9rem; + } + + #submit { + font-size: 0.9rem; + padding: 10px; + } +} \ No newline at end of file From 5a09efd5b0cc4f57c54c42f9b69d23525aff3eaa Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 7 Jan 2025 16:03:12 -0500 Subject: [PATCH 6/9] .env does not go in dockerignore, dear god it's like putting .git in .gitignore again --- .dockerignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index fb3a569..354ea43 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,7 +4,6 @@ __pycache__ *.pyd *.env *venv/ -.env *.git .gitignore Dockerfile From b33a2a3bd7b80ef6013f523687639f88309a694e Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 7 Jan 2025 16:57:44 -0500 Subject: [PATCH 7/9] Fixed scrolling on overflow --- app/website/index.html | 2 +- app/website/static/script.js | 3 ++- app/website/static/style.css | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/website/index.html b/app/website/index.html index a03369a..fd97afb 100644 --- a/app/website/index.html +++ b/app/website/index.html @@ -12,7 +12,7 @@
-
+
Response will appear here.
diff --git a/app/website/static/script.js b/app/website/static/script.js index ec2793a..891ad5e 100644 --- a/app/website/static/script.js +++ b/app/website/static/script.js @@ -1,5 +1,6 @@ document.addEventListener("DOMContentLoaded", () => { const responseArea = document.getElementById('response-area'); + const responseSection = document.getElementById('response-section'); const submitButton = document.getElementById('submit'); const urlForm = document.getElementById('url-form'); const urlBox = document.getElementById('url_box'); @@ -67,7 +68,7 @@ document.addEventListener("DOMContentLoaded", () => { } const chunk = decoder.decode(value, { stream: true }); responseArea.innerHTML += chunk; - responseArea.scrollTop = responseArea.scrollHeight; + responseSection.scrollTop = responseSection.scrollHeight readStream(); }).catch(error => { console.error('Error reading stream:', error); diff --git a/app/website/static/style.css b/app/website/static/style.css index 541c0d4..5d3c6c5 100644 --- a/app/website/static/style.css +++ b/app/website/static/style.css @@ -33,7 +33,7 @@ body { overflow: hidden; } -.response-section { +#response-section { flex: 1; padding: 20px; background-color: #1E1E1E; From 887aaa6c1d8d7f249d607a794fc1cb976495eef2 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 7 Jan 2025 19:16:43 -0500 Subject: [PATCH 8/9] hype streaming take two plus better handling of multiple users (it still doesn't work) --- .vscode/launch.json | 2 +- app/app.py | 74 ++++------ app/main.py | 259 ++++++++++++++++++++++------------- app/website/static/style.css | 1 + 4 files changed, 188 insertions(+), 148 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 6654ef2..fadf42a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,7 +10,7 @@ "request": "launch", "module": "flask", "env": { - "FLASK_APP": "app.py", + "FLASK_APP": "./app/app.py", "FLASK_DEBUG": "1" }, "args": [ diff --git a/app/app.py b/app/app.py index 8bd1ab3..264bda3 100644 --- a/app/app.py +++ b/app/app.py @@ -1,62 +1,40 @@ +import logging from flask import Flask, render_template, Response, request -from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter -from asyncio import sleep -from datetime import datetime -import threading, pytz +from main import yoink, process app = Flask(__name__, static_folder="website/static", template_folder="website") +# Configure logging +logging.basicConfig( + filename='./logs/app.log', + level=logging.INFO, + format='%(asctime)s %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) + @app.route('/') def home(): + logging.info("Home page accessed.") return render_template('index.html') @app.route('/process_url', methods=['POST']) def process_url(): - url = request.form.get('url', '').strip() - if not url: - log("No URL provided.\n") - return "No URL provided.", 400 - - log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n") - log(f"URL: {url}\n") - - # Extract the video ID from the URL - video_id = get_video_id(url) - if not video_id: - log(f"Could not parse video id from URL: {url}\n") - return "Couldn't parse video ID from URL. (Ensure it's a valid YouTube.com or YouTu.be URL.)", 400 - log(f"Video ID: {video_id}\n") - - # Get the transcript for that video ID - transcript = get_auto_transcript(video_id) - if not transcript: - log("## Error: Could not retrieve transcript. Assistant won't be called.\n") - return "Parsed video ID, but transcript retrieval failed (might be disabled by the video owner).", 400 - - # Start the stream thread - thread = threading.Thread(target=create_and_stream, args=(transcript,)) - thread.start() - log("Stream preparation complete, sending reply...\n") - return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) + global most_recent_thread + url = request.form['url'] + logging.info(f"Received URL for processing: {url}") + success, msg, status_code, most_recent_thread = process(url) + if success: + logging.info("Processing started successfully.") + return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) + else: + logging.error(f"Processing failed: {msg}") + return Response(msg, content_type='text/plain', status=status_code) @app.route('/stream_output') -def stream_output_route(): - def generate(): - log("
\nStarting stream thread...\n\n") - - # Start streaming output from output_stream - log("Starting to stream output.\n") - while not output_stream.done: - if output_stream.buffer: - delta = output_stream.buffer.pop(0) - yield delta.encode("utf-8") - else: - awaiter(sleep(0.05)) - log(f"\nStream completed.\n
\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n") - output_stream.reset() - log("\n### Task completed successfully without errors!\n") - - return Response(generate(), content_type='text/plain', status=200) +def stream_output(): + logging.info("Streaming output requested.") + return Response(yoink(most_recent_thread), content_type='text/plain', status=200) if __name__ == '__main__': - app.run(debug=True, host='0.0.0.0', port=1986) + logging.info("Starting Flask application.") + app.run(debug=True) \ No newline at end of file diff --git a/app/main.py b/app/main.py index 1628239..832dfaa 100644 --- a/app/main.py +++ b/app/main.py @@ -1,134 +1,195 @@ -# To parse video ids import re +import threading +import asyncio +from asyncio import sleep +from typing_extensions import override +from datetime import datetime +import pytz +import os +import logging -# Youtube Transcript stuff import +# Youtube Transcript imports import youtube_transcript_api._errors from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter -# OpenAI API stuff import +# OpenAI API imports from openai import AssistantEventHandler from openai import OpenAI -### For streaming -from typing_extensions import override -import asyncio +# Load environment variables +from dotenv import load_dotenv +load_dotenv() + +# Handle async outside of async functions awaiter = asyncio.run +# Configure logging +logging.basicConfig( + filename='./logs/main.log', + level=logging.INFO, + format='%(asctime)s %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) + # The StreamOutput class to handle streaming class StreamOutput: - - def __init__(self): - self.delta: str = "" - self.response: str = "" - self.done: bool = False - self.buffer: list = [] - - def reset(self): - self.delta = "" - self.response = "" - self.done = False - self.buffer: list = [] - - async def send_delta(self, delta): - self.delta = delta - self.response += delta - def get_index(list): - if len(list) == 0: - return 0 + def __init__(self): + self.delta: str = "" + self.response: str = "" + self.done: bool = False + self.buffer: list = [] + + def reset(self): + self.delta = "" + self.response = "" + self.done = False + self.buffer = [] + + def send_delta(self, delta): + awaiter(self.process_delta(delta)) + + async def process_delta(self, delta): + self.delta = delta + self.response += delta + + def get_index(lst): + if len(lst) == 0: + return 0 + else: + return len(lst) - 1 + + if self.buffer: + try: + if self.delta != self.buffer[get_index(self.buffer)]: + self.buffer.append(delta) + except IndexError as index_error: + logging.error(f"Caught IndexError: {str(index_error)}") + self.buffer.append(delta) else: - return len(list)-1 - if self.buffer != []: - try: - if self.delta != self.buffer[get_index(self.buffer)]: self.buffer.append(delta) - except IndexError as index_error: - log(f"\nCaught IndexError: {str(index_error)}") - self.buffer.append(delta) - else: self.buffer.append(delta) + return -# To get the env var -from dotenv import load_dotenv -import os - -load_dotenv() - -# For logging -import pytz -from datetime import datetime - -def log(message): - try: - with open("logs/log.md", "a") as file: - file.write(message) - except FileNotFoundError: - with open("logs/log.md", "x+"): - log(message) - -### OpenAI Config +# OpenAI Config # Setting up OpenAI Client with API Key client = OpenAI( - organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', - project="proj_NGz8Kux8CSka7DRJucAlDCz6", - api_key=os.getenv("OPENAI_API_KEY") + organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', + project="proj_NGz8Kux8CSka7DRJucAlDCz6", + api_key=os.getenv("OPENAI_API_KEY") ) -# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript +# Screw Bardo Assistant ID asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" -# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: +# Event Handler for OpenAI Assistant class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) - - @override - def on_text_delta(self, delta, snapshot): - awaiter(output_stream.send_delta(delta.value)) + @override + def on_text_created(self, text) -> None: + output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n") + logging.info("Text created event handled.") - def on_tool_call_created(self, tool_call): - raise Exception("Assistant shouldn't be calling tools.") + @override + def on_text_delta(self, delta, snapshot): + output_stream.send_delta(delta.value) + logging.debug(f"Text delta received: {delta.value}") + + def on_tool_call_created(self, tool_call): + error_msg = "Assistant shouldn't be calling tools." + logging.error(error_msg) + raise Exception(error_msg) def create_and_stream(transcript): - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages": [{"role": "user", "content": transcript}] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - output_stream.done = True + logging.info("Starting OpenAI stream thread.") + try: + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + output_stream.done = True + logging.info("OpenAI stream completed.") + except Exception as e: + logging.exception("Exception occurred during create_and_stream.") + +def yoink(thread: threading.Thread): + logging.info("Starting stream thread...") + thread.start() + logging.info("Stream thread started. Beginning to stream output.") + + try: + while not output_stream.done: + if output_stream.buffer: + delta = output_stream.buffer.pop(0) + yield bytes(delta, encoding="utf-8") + else: + asyncio.run(sleep(0.018)) + except Exception as e: + logging.exception("Exception occurred during streaming output.") + finally: + logging.info("Stream completed successfully.") + logging.info(f"Completed Assistant Response:\n{output_stream.response}") + output_stream.reset() + thread.join() + logging.info("Stream thread joined. Task completed successfully without errors.") + +def process(url): + current_time = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S') + logging.info(f"New Entry at {current_time}") + logging.info(f"URL: {url}") + + video_id = get_video_id(url) + if not video_id: + logging.warning(f"Could not parse video id from URL: {url}") + return (False, "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)", 400, None) + logging.info(f"Parsed Video ID: {video_id}") + + # Get the transcript for that video ID + transcript = get_auto_transcript(video_id) + if not transcript: + logging.error("Error: could not retrieve transcript. Assistant won't be called.") + return (False, "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube.", 200, None) + + thread = threading.Thread( + name="create_stream", + target=create_and_stream, + args=(transcript,) + ) + logging.info("Stream preparation complete, sending reply.") + return (True, None, None, thread) def get_video_id(url): - youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' - youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' - - id = re.search(youtu_be, url) - if not id: - id = re.search(youtube_com, url) - - if not id: - # Couldn't parse video ID from URL - return None - - return id.group(1) + youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' + youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' + + id_match = re.search(youtu_be, url) + if not id_match: + id_match = re.search(youtube_com, url) + + if not id_match: + # Couldn't parse video ID from URL + logging.warning(f"Failed to parse video ID from URL: {url}") + return None + + return id_match.group(1) -# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt def get_auto_transcript(video_id): - trans_api_errors = youtube_transcript_api._errors - try: - transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) - except trans_api_errors.TranscriptsDisabled as e: - log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') - return None - - formatter = TextFormatter() # Ensure that you create an instance of TextFormatter + trans_api_errors = youtube_transcript_api._errors + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) + except trans_api_errors.TranscriptsDisabled as e: + logging.exception(f"Exception while fetching transcript: {e}") + return None - txt_transcript = formatter.format_transcript(transcript) - return txt_transcript + formatter = TextFormatter() # Ensure that you create an instance of TextFormatter + txt_transcript = formatter.format_transcript(transcript) + logging.info("Transcript successfully retrieved and formatted.") + return txt_transcript +# Initialize output stream output_stream = StreamOutput() -log(f"\n\n# Main initilized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumeably application starting.\n") \ No newline at end of file +logging.info(f"Main initialized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumably application starting.") \ No newline at end of file diff --git a/app/website/static/style.css b/app/website/static/style.css index 5d3c6c5..2db8550 100644 --- a/app/website/static/style.css +++ b/app/website/static/style.css @@ -40,6 +40,7 @@ body { overflow-y: auto; font-size: 1rem; line-height: 1.5; + scroll-behavior: smooth; } .form-section { From 14b320bbdea91c3614bef5721dedf4ac6c8bdac5 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Tue, 7 Jan 2025 22:25:04 -0500 Subject: [PATCH 9/9] lets fucking go --- .vscode/launch.json | 3 +- app/app.py | 56 ++++++++++++++----- app/main.py | 133 ++++++++++++++++++++++++++++++-------------- requirements.txt | Bin 1158 -> 1214 bytes 4 files changed, 135 insertions(+), 57 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index fadf42a..3eefcee 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,9 +8,10 @@ "name": "Python Debugger: Flask", "type": "debugpy", "request": "launch", + "cwd": "./app", "module": "flask", "env": { - "FLASK_APP": "./app/app.py", + "FLASK_APP": "./app.py", "FLASK_DEBUG": "1" }, "args": [ diff --git a/app/app.py b/app/app.py index 264bda3..739ce92 100644 --- a/app/app.py +++ b/app/app.py @@ -1,40 +1,68 @@ import logging -from flask import Flask, render_template, Response, request -from main import yoink, process +import os +from flask import Flask, render_template, Response, request, session +from main import yoink, process, user_streams, stream_lock +import uuid # Import UUID app = Flask(__name__, static_folder="website/static", template_folder="website") +app.secret_key = os.urandom(24) # Necessary for using sessions + # Configure logging logging.basicConfig( filename='./logs/app.log', - level=logging.INFO, + level=logging.DEBUG, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) +def create_session(): + session_id = str(uuid.uuid4()) + # This should never happen but I'm putting the logic there anyways + try: + if user_streams[session_id]: + session_id = create_session() + except KeyError: + pass + return session_id + + @app.route('/') def home(): - logging.info("Home page accessed.") - return render_template('index.html') + session_id = create_session() + session['id'] = session_id + logging.info(f"Home page accessed. Assigned initial session ID: {session_id}") + return render_template('index.html', session_id=session_id) @app.route('/process_url', methods=['POST']) def process_url(): - global most_recent_thread + session_id = session.get('id') + if not session_id: + session_id = create_session() + session['id'] = session_id + logging.info(f"No existing session. Created new session ID: {session_id}") + url = request.form['url'] - logging.info(f"Received URL for processing: {url}") - success, msg, status_code, most_recent_thread = process(url) + logging.info(f"Received URL for processing from session {session_id}: {url}") + success, msg, status_code, = process(url, session_id) + if success: - logging.info("Processing started successfully.") + logging.info(f"Processing started successfully for session {session_id}.") return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) else: - logging.error(f"Processing failed: {msg}") + logging.error(f"Processing failed for session {session_id}: {msg}") return Response(msg, content_type='text/plain', status=status_code) @app.route('/stream_output') def stream_output(): - logging.info("Streaming output requested.") - return Response(yoink(most_recent_thread), content_type='text/plain', status=200) + session_id = session.get('id') + if not session_id or session_id not in user_streams: + logging.warning(f"Stream requested without a valid session ID: {session_id}") + return Response("No active stream for this session.", content_type='text/plain', status=400) + logging.info(f"Streaming output requested for session {session_id}.") + return Response(yoink(session_id), content_type='text/plain', status=200) if __name__ == '__main__': - logging.info("Starting Flask application.") - app.run(debug=True) \ No newline at end of file + logging.info("Starting Flask application.") + app.run(debug=True, threaded=True) # Enable threaded to handle multiple requests + \ No newline at end of file diff --git a/app/main.py b/app/main.py index 832dfaa..f165799 100644 --- a/app/main.py +++ b/app/main.py @@ -7,6 +7,7 @@ from datetime import datetime import pytz import os import logging +import uuid # Youtube Transcript imports import youtube_transcript_api._errors @@ -21,16 +22,34 @@ from openai import OpenAI from dotenv import load_dotenv load_dotenv() +# Initialize user stream dictionary +user_streams = {} + +# Threading lock for thread safe stuff I think, idk it was used in the docs +stream_lock = threading.Lock() + # Handle async outside of async functions awaiter = asyncio.run # Configure logging -logging.basicConfig( - filename='./logs/main.log', - level=logging.INFO, - format='%(asctime)s %(levelname)s: %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' -) +try: + logging.basicConfig( + filename='./logs/main.log', + level=logging.INFO, + format='%(asctime)s %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) +except FileNotFoundError as e: + with open("./logs/main.log", "x"): + pass + logging.basicConfig( + filename='./logs/main.log', + level=logging.INFO, + format='%(asctime)s %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + logging.info(f"No main.log file was found ({e}), so one was created.") + # The StreamOutput class to handle streaming class StreamOutput: @@ -83,15 +102,20 @@ client = OpenAI( asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" # Event Handler for OpenAI Assistant -class EventHandler(AssistantEventHandler): +class EventHandler(AssistantEventHandler): + + def __init__(self, output_stream: StreamOutput): + super().__init__() + self.output_stream = output_stream + @override def on_text_created(self, text) -> None: - output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n") + self.output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n") logging.info("Text created event handled.") @override def on_text_delta(self, delta, snapshot): - output_stream.send_delta(delta.value) + self.output_stream.send_delta(delta.value) logging.debug(f"Text delta received: {delta.value}") def on_tool_call_created(self, tool_call): @@ -99,67 +123,92 @@ class EventHandler(AssistantEventHandler): logging.error(error_msg) raise Exception(error_msg) -def create_and_stream(transcript): - logging.info("Starting OpenAI stream thread.") +def create_and_stream(transcript, session_id): + logging.info(f"Starting OpenAI stream thread for session {session_id}.") + event_handler = EventHandler(user_streams[session_id]['output_stream']) try: with client.beta.threads.create_and_run_stream( assistant_id=asst_screw_bardo_id, thread={ "messages": [{"role": "user", "content": transcript}] }, - event_handler=EventHandler() + event_handler=event_handler ) as stream: stream.until_done() - output_stream.done = True - logging.info("OpenAI stream completed.") + with stream_lock: + user_streams[session_id]['output_stream'].done = True + logging.info(f"OpenAI stream completed for session {session_id}.") except Exception as e: - logging.exception("Exception occurred during create_and_stream.") + logging.exception(f"Exception occurred during create_and_stream for session {session_id}.") -def yoink(thread: threading.Thread): - logging.info("Starting stream thread...") - thread.start() - logging.info("Stream thread started. Beginning to stream output.") +def yoink(session_id): + logging.info(f"Starting stream for session {session_id}...") + with stream_lock: + user_data = user_streams.get(session_id) + if not user_data: + logging.critical(f"User data not found for session id {session_id}?") + return # Session might have ended + output_stream: StreamOutput = user_data.get('output_stream') + thread: threading.Thread = user_data.get('thread') + thread.start() + while True: + + if not output_stream or not thread: + logging.error(f"No output stream/thread for session {session_id}.\nThread: {thread.name if thread else "None"}") + break - try: - while not output_stream.done: + if output_stream.done and not output_stream.buffer: + break + + try: if output_stream.buffer: delta = output_stream.buffer.pop(0) yield bytes(delta, encoding="utf-8") else: asyncio.run(sleep(0.018)) - except Exception as e: - logging.exception("Exception occurred during streaming output.") - finally: - logging.info("Stream completed successfully.") - logging.info(f"Completed Assistant Response:\n{output_stream.response}") - output_stream.reset() - thread.join() - logging.info("Stream thread joined. Task completed successfully without errors.") + except Exception as e: + logging.exception(f"Exception occurred during streaming for session {session_id}: {e}") + break -def process(url): + logging.info(f"Stream completed successfully for session {session_id}.") + logging.info(f"Completed Assistant Response for session {session_id}:\n{output_stream.response}") + with stream_lock: + thread.join() + del user_streams[session_id] + logging.info(f"Stream thread joined and resources cleaned up for session {session_id}.") + +def process(url, session_id): + # Should initialize the key in the dictionary current_time = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S') - logging.info(f"New Entry at {current_time}") + logging.info(f"New Entry at {current_time} for session {session_id}") logging.info(f"URL: {url}") video_id = get_video_id(url) if not video_id: logging.warning(f"Could not parse video id from URL: {url}") - return (False, "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)", 400, None) + return (False, "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)", 400) logging.info(f"Parsed Video ID: {video_id}") # Get the transcript for that video ID transcript = get_auto_transcript(video_id) if not transcript: - logging.error("Error: could not retrieve transcript. Assistant won't be called.") - return (False, "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube.", 200, None) - - thread = threading.Thread( - name="create_stream", - target=create_and_stream, - args=(transcript,) - ) - logging.info("Stream preparation complete, sending reply.") - return (True, None, None, thread) + logging.error(f"Error: could not retrieve transcript for session {session_id}. Assistant won't be called.") + return (False, "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube.", 200) + user_streams[session_id] = { + 'output_stream': None, # Ensure output_stream is per user + 'thread': None + } + # Create a new StreamOutput for the session + with stream_lock: + user_streams[session_id]['output_stream'] = StreamOutput() + thread = threading.Thread( + name=f"create_stream_{session_id}", + target=create_and_stream, + args=(transcript, session_id) + ) + user_streams[session_id]['thread'] = thread + logging.info(f"Stream preparation complete for session {session_id}, sending reply.") + return (True, None, None) def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' diff --git a/requirements.txt b/requirements.txt index 15691711094f3de024d83b7d4c9537336e4a00b5..11688ceba03d120e01db8a95c7abda6c8caacaf6 100644 GIT binary patch delta 60 zcmZqU+{ZZ~N-~k5m?51Zlc9(ql_8D676^?Q^cXA{^fm@xW)?1GCHXH IlNDJ~0iQw)c>n+a delta 14 VcmdnT*~U2`YGc(I=E-p^2>>hK1&{y$