hype streaming take two plus better handling of multiple users (it still doesn't work)

This commit is contained in:
ForeverPyrite
2025-01-07 19:16:43 -05:00
parent b33a2a3bd7
commit 887aaa6c1d
4 changed files with 188 additions and 148 deletions

2
.vscode/launch.json vendored
View File

@@ -10,7 +10,7 @@
"request": "launch", "request": "launch",
"module": "flask", "module": "flask",
"env": { "env": {
"FLASK_APP": "app.py", "FLASK_APP": "./app/app.py",
"FLASK_DEBUG": "1" "FLASK_DEBUG": "1"
}, },
"args": [ "args": [

View File

@@ -1,62 +1,40 @@
import logging
from flask import Flask, render_template, Response, request from flask import Flask, render_template, Response, request
from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter from main import yoink, process
from asyncio import sleep
from datetime import datetime
import threading, pytz
app = Flask(__name__, static_folder="website/static", template_folder="website") app = Flask(__name__, static_folder="website/static", template_folder="website")
# Configure logging
logging.basicConfig(
filename='./logs/app.log',
level=logging.INFO,
format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
@app.route('/') @app.route('/')
def home(): def home():
logging.info("Home page accessed.")
return render_template('index.html') return render_template('index.html')
@app.route('/process_url', methods=['POST']) @app.route('/process_url', methods=['POST'])
def process_url(): def process_url():
url = request.form.get('url', '').strip() global most_recent_thread
if not url: url = request.form['url']
log("No URL provided.\n") logging.info(f"Received URL for processing: {url}")
return "No URL provided.", 400 success, msg, status_code, most_recent_thread = process(url)
if success:
log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n") logging.info("Processing started successfully.")
log(f"URL: {url}\n")
# Extract the video ID from the URL
video_id = get_video_id(url)
if not video_id:
log(f"Could not parse video id from URL: {url}\n")
return "Couldn't parse video ID from URL. (Ensure it's a valid YouTube.com or YouTu.be URL.)", 400
log(f"Video ID: {video_id}\n")
# Get the transcript for that video ID
transcript = get_auto_transcript(video_id)
if not transcript:
log("## Error: Could not retrieve transcript. Assistant won't be called.\n")
return "Parsed video ID, but transcript retrieval failed (might be disabled by the video owner).", 400
# Start the stream thread
thread = threading.Thread(target=create_and_stream, args=(transcript,))
thread.start()
log("Stream preparation complete, sending reply...\n")
return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200)
else:
logging.error(f"Processing failed: {msg}")
return Response(msg, content_type='text/plain', status=status_code)
@app.route('/stream_output') @app.route('/stream_output')
def stream_output_route(): def stream_output():
def generate(): logging.info("Streaming output requested.")
log("<details>\n<summary>Starting stream thread...</summary>\n\n") return Response(yoink(most_recent_thread), content_type='text/plain', status=200)
# Start streaming output from output_stream
log("Starting to stream output.\n")
while not output_stream.done:
if output_stream.buffer:
delta = output_stream.buffer.pop(0)
yield delta.encode("utf-8")
else:
awaiter(sleep(0.05))
log(f"\nStream completed.\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
output_stream.reset()
log("\n### Task completed successfully without errors!\n")
return Response(generate(), content_type='text/plain', status=200)
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=1986) logging.info("Starting Flask application.")
app.run(debug=True)

View File

@@ -1,23 +1,39 @@
# To parse video ids
import re import re
import threading
import asyncio
from asyncio import sleep
from typing_extensions import override
from datetime import datetime
import pytz
import os
import logging
# Youtube Transcript stuff import # Youtube Transcript imports
import youtube_transcript_api._errors import youtube_transcript_api._errors
from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter from youtube_transcript_api.formatters import TextFormatter
# OpenAI API stuff import # OpenAI API imports
from openai import AssistantEventHandler from openai import AssistantEventHandler
from openai import OpenAI from openai import OpenAI
### For streaming # Load environment variables
from typing_extensions import override from dotenv import load_dotenv
import asyncio load_dotenv()
# Handle async outside of async functions
awaiter = asyncio.run awaiter = asyncio.run
# Configure logging
logging.basicConfig(
filename='./logs/main.log',
level=logging.INFO,
format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# The StreamOutput class to handle streaming # The StreamOutput class to handle streaming
class StreamOutput: class StreamOutput:
def __init__(self): def __init__(self):
self.delta: str = "" self.delta: str = ""
self.response: str = "" self.response: str = ""
@@ -28,44 +44,33 @@ class StreamOutput:
self.delta = "" self.delta = ""
self.response = "" self.response = ""
self.done = False self.done = False
self.buffer: list = [] self.buffer = []
async def send_delta(self, delta): def send_delta(self, delta):
awaiter(self.process_delta(delta))
async def process_delta(self, delta):
self.delta = delta self.delta = delta
self.response += delta self.response += delta
def get_index(list):
if len(list) == 0: def get_index(lst):
if len(lst) == 0:
return 0 return 0
else: else:
return len(list)-1 return len(lst) - 1
if self.buffer != []:
if self.buffer:
try: try:
if self.delta != self.buffer[get_index(self.buffer)]: if self.delta != self.buffer[get_index(self.buffer)]:
self.buffer.append(delta) self.buffer.append(delta)
except IndexError as index_error: except IndexError as index_error:
log(f"\nCaught IndexError: {str(index_error)}") logging.error(f"Caught IndexError: {str(index_error)}")
self.buffer.append(delta) self.buffer.append(delta)
else: self.buffer.append(delta) else:
self.buffer.append(delta)
return
# To get the env var # OpenAI Config
from dotenv import load_dotenv
import os
load_dotenv()
# For logging
import pytz
from datetime import datetime
def log(message):
try:
with open("logs/log.md", "a") as file:
file.write(message)
except FileNotFoundError:
with open("logs/log.md", "x+"):
log(message)
### OpenAI Config
# Setting up OpenAI Client with API Key # Setting up OpenAI Client with API Key
client = OpenAI( client = OpenAI(
@@ -74,23 +79,29 @@ client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY") api_key=os.getenv("OPENAI_API_KEY")
) )
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript # Screw Bardo Assistant ID
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: # Event Handler for OpenAI Assistant
class EventHandler(AssistantEventHandler): class EventHandler(AssistantEventHandler):
@override @override
def on_text_created(self, text) -> None: def on_text_created(self, text) -> None:
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n")
logging.info("Text created event handled.")
@override @override
def on_text_delta(self, delta, snapshot): def on_text_delta(self, delta, snapshot):
awaiter(output_stream.send_delta(delta.value)) output_stream.send_delta(delta.value)
logging.debug(f"Text delta received: {delta.value}")
def on_tool_call_created(self, tool_call): def on_tool_call_created(self, tool_call):
raise Exception("Assistant shouldn't be calling tools.") error_msg = "Assistant shouldn't be calling tools."
logging.error(error_msg)
raise Exception(error_msg)
def create_and_stream(transcript): def create_and_stream(transcript):
logging.info("Starting OpenAI stream thread.")
try:
with client.beta.threads.create_and_run_stream( with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id, assistant_id=asst_screw_bardo_id,
thread={ thread={
@@ -100,35 +111,85 @@ def create_and_stream(transcript):
) as stream: ) as stream:
stream.until_done() stream.until_done()
output_stream.done = True output_stream.done = True
logging.info("OpenAI stream completed.")
except Exception as e:
logging.exception("Exception occurred during create_and_stream.")
def yoink(thread: threading.Thread):
logging.info("Starting stream thread...")
thread.start()
logging.info("Stream thread started. Beginning to stream output.")
try:
while not output_stream.done:
if output_stream.buffer:
delta = output_stream.buffer.pop(0)
yield bytes(delta, encoding="utf-8")
else:
asyncio.run(sleep(0.018))
except Exception as e:
logging.exception("Exception occurred during streaming output.")
finally:
logging.info("Stream completed successfully.")
logging.info(f"Completed Assistant Response:\n{output_stream.response}")
output_stream.reset()
thread.join()
logging.info("Stream thread joined. Task completed successfully without errors.")
def process(url):
current_time = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')
logging.info(f"New Entry at {current_time}")
logging.info(f"URL: {url}")
video_id = get_video_id(url)
if not video_id:
logging.warning(f"Could not parse video id from URL: {url}")
return (False, "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)", 400, None)
logging.info(f"Parsed Video ID: {video_id}")
# Get the transcript for that video ID
transcript = get_auto_transcript(video_id)
if not transcript:
logging.error("Error: could not retrieve transcript. Assistant won't be called.")
return (False, "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube.", 200, None)
thread = threading.Thread(
name="create_stream",
target=create_and_stream,
args=(transcript,)
)
logging.info("Stream preparation complete, sending reply.")
return (True, None, None, thread)
def get_video_id(url): def get_video_id(url):
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
id = re.search(youtu_be, url) id_match = re.search(youtu_be, url)
if not id: if not id_match:
id = re.search(youtube_com, url) id_match = re.search(youtube_com, url)
if not id: if not id_match:
# Couldn't parse video ID from URL # Couldn't parse video ID from URL
logging.warning(f"Failed to parse video ID from URL: {url}")
return None return None
return id.group(1) return id_match.group(1)
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
def get_auto_transcript(video_id): def get_auto_transcript(video_id):
trans_api_errors = youtube_transcript_api._errors trans_api_errors = youtube_transcript_api._errors
try: try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
except trans_api_errors.TranscriptsDisabled as e: except trans_api_errors.TranscriptsDisabled as e:
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') logging.exception(f"Exception while fetching transcript: {e}")
return None return None
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
txt_transcript = formatter.format_transcript(transcript) txt_transcript = formatter.format_transcript(transcript)
logging.info("Transcript successfully retrieved and formatted.")
return txt_transcript return txt_transcript
# Initialize output stream
output_stream = StreamOutput() output_stream = StreamOutput()
log(f"\n\n# Main initilized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumeably application starting.\n") logging.info(f"Main initialized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumably application starting.")

View File

@@ -40,6 +40,7 @@ body {
overflow-y: auto; overflow-y: auto;
font-size: 1rem; font-size: 1rem;
line-height: 1.5; line-height: 1.5;
scroll-behavior: smooth;
} }
.form-section { .form-section {