hype streaming take two plus better handling of multiple users (it still doesn't work)
This commit is contained in:
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -10,7 +10,7 @@
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"env": {
|
||||
"FLASK_APP": "app.py",
|
||||
"FLASK_APP": "./app/app.py",
|
||||
"FLASK_DEBUG": "1"
|
||||
},
|
||||
"args": [
|
||||
|
||||
74
app/app.py
74
app/app.py
@@ -1,62 +1,40 @@
|
||||
import logging
|
||||
from flask import Flask, render_template, Response, request
|
||||
from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter
|
||||
from asyncio import sleep
|
||||
from datetime import datetime
|
||||
import threading, pytz
|
||||
from main import yoink, process
|
||||
|
||||
app = Flask(__name__, static_folder="website/static", template_folder="website")
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
filename='./logs/app.log',
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
@app.route('/')
|
||||
def home():
|
||||
logging.info("Home page accessed.")
|
||||
return render_template('index.html')
|
||||
|
||||
@app.route('/process_url', methods=['POST'])
|
||||
def process_url():
|
||||
url = request.form.get('url', '').strip()
|
||||
if not url:
|
||||
log("No URL provided.\n")
|
||||
return "No URL provided.", 400
|
||||
|
||||
log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
log(f"URL: {url}\n")
|
||||
|
||||
# Extract the video ID from the URL
|
||||
video_id = get_video_id(url)
|
||||
if not video_id:
|
||||
log(f"Could not parse video id from URL: {url}\n")
|
||||
return "Couldn't parse video ID from URL. (Ensure it's a valid YouTube.com or YouTu.be URL.)", 400
|
||||
log(f"Video ID: {video_id}\n")
|
||||
|
||||
# Get the transcript for that video ID
|
||||
transcript = get_auto_transcript(video_id)
|
||||
if not transcript:
|
||||
log("## Error: Could not retrieve transcript. Assistant won't be called.\n")
|
||||
return "Parsed video ID, but transcript retrieval failed (might be disabled by the video owner).", 400
|
||||
|
||||
# Start the stream thread
|
||||
thread = threading.Thread(target=create_and_stream, args=(transcript,))
|
||||
thread.start()
|
||||
log("Stream preparation complete, sending reply...\n")
|
||||
return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200)
|
||||
global most_recent_thread
|
||||
url = request.form['url']
|
||||
logging.info(f"Received URL for processing: {url}")
|
||||
success, msg, status_code, most_recent_thread = process(url)
|
||||
if success:
|
||||
logging.info("Processing started successfully.")
|
||||
return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200)
|
||||
else:
|
||||
logging.error(f"Processing failed: {msg}")
|
||||
return Response(msg, content_type='text/plain', status=status_code)
|
||||
|
||||
@app.route('/stream_output')
|
||||
def stream_output_route():
|
||||
def generate():
|
||||
log("<details>\n<summary>Starting stream thread...</summary>\n\n")
|
||||
|
||||
# Start streaming output from output_stream
|
||||
log("Starting to stream output.\n")
|
||||
while not output_stream.done:
|
||||
if output_stream.buffer:
|
||||
delta = output_stream.buffer.pop(0)
|
||||
yield delta.encode("utf-8")
|
||||
else:
|
||||
awaiter(sleep(0.05))
|
||||
log(f"\nStream completed.\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
|
||||
output_stream.reset()
|
||||
log("\n### Task completed successfully without errors!\n")
|
||||
|
||||
return Response(generate(), content_type='text/plain', status=200)
|
||||
def stream_output():
|
||||
logging.info("Streaming output requested.")
|
||||
return Response(yoink(most_recent_thread), content_type='text/plain', status=200)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host='0.0.0.0', port=1986)
|
||||
logging.info("Starting Flask application.")
|
||||
app.run(debug=True)
|
||||
245
app/main.py
245
app/main.py
@@ -1,134 +1,195 @@
|
||||
# To parse video ids
|
||||
import re
|
||||
import threading
|
||||
import asyncio
|
||||
from asyncio import sleep
|
||||
from typing_extensions import override
|
||||
from datetime import datetime
|
||||
import pytz
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Youtube Transcript stuff import
|
||||
# Youtube Transcript imports
|
||||
import youtube_transcript_api._errors
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from youtube_transcript_api.formatters import TextFormatter
|
||||
|
||||
# OpenAI API stuff import
|
||||
# OpenAI API imports
|
||||
from openai import AssistantEventHandler
|
||||
from openai import OpenAI
|
||||
|
||||
### For streaming
|
||||
from typing_extensions import override
|
||||
import asyncio
|
||||
# Load environment variables
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# Handle async outside of async functions
|
||||
awaiter = asyncio.run
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
filename='./logs/main.log',
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
# The StreamOutput class to handle streaming
|
||||
class StreamOutput:
|
||||
def __init__(self):
|
||||
self.delta: str = ""
|
||||
self.response: str = ""
|
||||
self.done: bool = False
|
||||
self.buffer: list = []
|
||||
|
||||
def __init__(self):
|
||||
self.delta: str = ""
|
||||
self.response: str = ""
|
||||
self.done: bool = False
|
||||
self.buffer: list = []
|
||||
def reset(self):
|
||||
self.delta = ""
|
||||
self.response = ""
|
||||
self.done = False
|
||||
self.buffer = []
|
||||
|
||||
def reset(self):
|
||||
self.delta = ""
|
||||
self.response = ""
|
||||
self.done = False
|
||||
self.buffer: list = []
|
||||
def send_delta(self, delta):
|
||||
awaiter(self.process_delta(delta))
|
||||
|
||||
async def send_delta(self, delta):
|
||||
self.delta = delta
|
||||
self.response += delta
|
||||
def get_index(list):
|
||||
if len(list) == 0:
|
||||
return 0
|
||||
async def process_delta(self, delta):
|
||||
self.delta = delta
|
||||
self.response += delta
|
||||
|
||||
def get_index(lst):
|
||||
if len(lst) == 0:
|
||||
return 0
|
||||
else:
|
||||
return len(lst) - 1
|
||||
|
||||
if self.buffer:
|
||||
try:
|
||||
if self.delta != self.buffer[get_index(self.buffer)]:
|
||||
self.buffer.append(delta)
|
||||
except IndexError as index_error:
|
||||
logging.error(f"Caught IndexError: {str(index_error)}")
|
||||
self.buffer.append(delta)
|
||||
else:
|
||||
return len(list)-1
|
||||
if self.buffer != []:
|
||||
try:
|
||||
if self.delta != self.buffer[get_index(self.buffer)]:
|
||||
self.buffer.append(delta)
|
||||
except IndexError as index_error:
|
||||
log(f"\nCaught IndexError: {str(index_error)}")
|
||||
self.buffer.append(delta)
|
||||
else: self.buffer.append(delta)
|
||||
return
|
||||
|
||||
# To get the env var
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# For logging
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
|
||||
def log(message):
|
||||
try:
|
||||
with open("logs/log.md", "a") as file:
|
||||
file.write(message)
|
||||
except FileNotFoundError:
|
||||
with open("logs/log.md", "x+"):
|
||||
log(message)
|
||||
|
||||
### OpenAI Config
|
||||
# OpenAI Config
|
||||
|
||||
# Setting up OpenAI Client with API Key
|
||||
client = OpenAI(
|
||||
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
||||
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
||||
api_key=os.getenv("OPENAI_API_KEY")
|
||||
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
||||
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
||||
api_key=os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
|
||||
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
||||
# Screw Bardo Assistant ID
|
||||
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
||||
|
||||
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
|
||||
# Event Handler for OpenAI Assistant
|
||||
class EventHandler(AssistantEventHandler):
|
||||
@override
|
||||
def on_text_created(self, text) -> None:
|
||||
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
|
||||
@override
|
||||
def on_text_created(self, text) -> None:
|
||||
output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n")
|
||||
logging.info("Text created event handled.")
|
||||
|
||||
@override
|
||||
def on_text_delta(self, delta, snapshot):
|
||||
awaiter(output_stream.send_delta(delta.value))
|
||||
@override
|
||||
def on_text_delta(self, delta, snapshot):
|
||||
output_stream.send_delta(delta.value)
|
||||
logging.debug(f"Text delta received: {delta.value}")
|
||||
|
||||
def on_tool_call_created(self, tool_call):
|
||||
raise Exception("Assistant shouldn't be calling tools.")
|
||||
def on_tool_call_created(self, tool_call):
|
||||
error_msg = "Assistant shouldn't be calling tools."
|
||||
logging.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
def create_and_stream(transcript):
|
||||
with client.beta.threads.create_and_run_stream(
|
||||
assistant_id=asst_screw_bardo_id,
|
||||
thread={
|
||||
"messages": [{"role": "user", "content": transcript}]
|
||||
},
|
||||
event_handler=EventHandler()
|
||||
) as stream:
|
||||
stream.until_done()
|
||||
output_stream.done = True
|
||||
logging.info("Starting OpenAI stream thread.")
|
||||
try:
|
||||
with client.beta.threads.create_and_run_stream(
|
||||
assistant_id=asst_screw_bardo_id,
|
||||
thread={
|
||||
"messages": [{"role": "user", "content": transcript}]
|
||||
},
|
||||
event_handler=EventHandler()
|
||||
) as stream:
|
||||
stream.until_done()
|
||||
output_stream.done = True
|
||||
logging.info("OpenAI stream completed.")
|
||||
except Exception as e:
|
||||
logging.exception("Exception occurred during create_and_stream.")
|
||||
|
||||
def yoink(thread: threading.Thread):
|
||||
logging.info("Starting stream thread...")
|
||||
thread.start()
|
||||
logging.info("Stream thread started. Beginning to stream output.")
|
||||
|
||||
try:
|
||||
while not output_stream.done:
|
||||
if output_stream.buffer:
|
||||
delta = output_stream.buffer.pop(0)
|
||||
yield bytes(delta, encoding="utf-8")
|
||||
else:
|
||||
asyncio.run(sleep(0.018))
|
||||
except Exception as e:
|
||||
logging.exception("Exception occurred during streaming output.")
|
||||
finally:
|
||||
logging.info("Stream completed successfully.")
|
||||
logging.info(f"Completed Assistant Response:\n{output_stream.response}")
|
||||
output_stream.reset()
|
||||
thread.join()
|
||||
logging.info("Stream thread joined. Task completed successfully without errors.")
|
||||
|
||||
def process(url):
|
||||
current_time = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')
|
||||
logging.info(f"New Entry at {current_time}")
|
||||
logging.info(f"URL: {url}")
|
||||
|
||||
video_id = get_video_id(url)
|
||||
if not video_id:
|
||||
logging.warning(f"Could not parse video id from URL: {url}")
|
||||
return (False, "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)", 400, None)
|
||||
logging.info(f"Parsed Video ID: {video_id}")
|
||||
|
||||
# Get the transcript for that video ID
|
||||
transcript = get_auto_transcript(video_id)
|
||||
if not transcript:
|
||||
logging.error("Error: could not retrieve transcript. Assistant won't be called.")
|
||||
return (False, "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube.", 200, None)
|
||||
|
||||
thread = threading.Thread(
|
||||
name="create_stream",
|
||||
target=create_and_stream,
|
||||
args=(transcript,)
|
||||
)
|
||||
logging.info("Stream preparation complete, sending reply.")
|
||||
return (True, None, None, thread)
|
||||
|
||||
def get_video_id(url):
|
||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
||||
|
||||
id = re.search(youtu_be, url)
|
||||
if not id:
|
||||
id = re.search(youtube_com, url)
|
||||
id_match = re.search(youtu_be, url)
|
||||
if not id_match:
|
||||
id_match = re.search(youtube_com, url)
|
||||
|
||||
if not id:
|
||||
# Couldn't parse video ID from URL
|
||||
return None
|
||||
if not id_match:
|
||||
# Couldn't parse video ID from URL
|
||||
logging.warning(f"Failed to parse video ID from URL: {url}")
|
||||
return None
|
||||
|
||||
return id.group(1)
|
||||
return id_match.group(1)
|
||||
|
||||
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
|
||||
def get_auto_transcript(video_id):
|
||||
trans_api_errors = youtube_transcript_api._errors
|
||||
try:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
||||
except trans_api_errors.TranscriptsDisabled as e:
|
||||
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
|
||||
return None
|
||||
trans_api_errors = youtube_transcript_api._errors
|
||||
try:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
||||
except trans_api_errors.TranscriptsDisabled as e:
|
||||
logging.exception(f"Exception while fetching transcript: {e}")
|
||||
return None
|
||||
|
||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
||||
|
||||
txt_transcript = formatter.format_transcript(transcript)
|
||||
return txt_transcript
|
||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
||||
txt_transcript = formatter.format_transcript(transcript)
|
||||
logging.info("Transcript successfully retrieved and formatted.")
|
||||
return txt_transcript
|
||||
|
||||
# Initialize output stream
|
||||
output_stream = StreamOutput()
|
||||
|
||||
log(f"\n\n# Main initilized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumeably application starting.\n")
|
||||
logging.info(f"Main initialized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumably application starting.")
|
||||
@@ -40,6 +40,7 @@ body {
|
||||
overflow-y: auto;
|
||||
font-size: 1rem;
|
||||
line-height: 1.5;
|
||||
scroll-behavior: smooth;
|
||||
}
|
||||
|
||||
.form-section {
|
||||
|
||||
Reference in New Issue
Block a user