Streaming Update (NEEDS OPTIMIZATIONS AND FAILSAFES!!!!!!!!!!!!!!!!!!!!!!)

This commit is contained in:
ForeverPyrite
2024-10-05 16:57:03 -04:00
parent 5601e8a874
commit 3ea5681f86
5 changed files with 81 additions and 77 deletions

1
.gitignore vendored
View File

@@ -1,4 +1,3 @@
thread-killer.py
log.txt log.txt
.env .env

40
app.py
View File

@@ -1,23 +1,12 @@
from flask import Flask, render_template, Response, request from flask import Flask, render_template, Response, request
from main import get_auto_transcript, get_video_id, create_and_stream, EventHandler from main import get_auto_transcript, get_video_id, create_and_stream, output_buffer, output_lock
from datetime import datetime from datetime import datetime
import sys import threading
import io
import pytz import pytz
import time import time
app = Flask(__name__, static_folder="website/static", template_folder="website") app = Flask(__name__, static_folder="website/static", template_folder="website")
class StreamToLogger(io.StringIO):
def __init__(self):
super().__init__()
def write(self, message):
# I could probably log stuff here
print(message, end='') # Print to standard output (console)
# You could also log this message or handle it differently.
@app.route('/') @app.route('/')
def home(): def home():
return render_template('index.html') return render_template('index.html')
@@ -33,9 +22,6 @@ def streaming():
@app.route('/process_url', methods=['POST']) @app.route('/process_url', methods=['POST'])
def process_url(): def process_url():
old_stdout = sys.stdout
new_stdout = StreamToLogger()
sys.stdout = new_stdout
# Opens a file to log the video id and the assistants respone to see if I can further improve instructions: # Opens a file to log the video id and the assistants respone to see if I can further improve instructions:
#log = open("log.txt", "at", 1) #log = open("log.txt", "at", 1)
url = request.form['url'] url = request.form['url']
@@ -50,11 +36,31 @@ def process_url():
if (not transcript): if (not transcript):
return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
# Start processing in a separate thread
threading.Thread(target=create_and_stream, args=(transcript,)).start()
# Process the transcript and stream the result. # Process the transcript and stream the result.
# response = create_and_stream(transcript) # response = create_and_stream(transcript)
# log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") # log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}")
# Return a response # Return a response
return Response(create_and_stream(transcript), content_type="text/plain", status=200, direct_passthrough=True) # Add more detailed output if needed return Response("Processing started. Check /stream_output for updates.", status=200) # Add more detailed output if needed
@app.route('/stream_output')
def stream_output():
def yoink():
while True:
with output_lock:
if output_buffer:
message = output_buffer.pop(0)
yield f"{message}"
time.sleep(0.005) # Adjust as necessary for your application
return Response(yoink(), content_type='text/plain')
if __name__ == '__main__': # Change this line to properly check for main if __name__ == '__main__': # Change this line to properly check for main
app.run(debug=True) app.run(debug=True)

61
main.py
View File

@@ -13,6 +13,12 @@ from openai import OpenAI
# For streaming # For streaming
from typing_extensions import override from typing_extensions import override
import threading
# Output buffer and thread lock
output_buffer = []
output_lock = threading.Lock()
# To get the env var # To get the env var
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
@@ -21,36 +27,6 @@ load_dotenv()
### OpenAI Config ### OpenAI Config
# This is copy and pasted straight up from the quickstart guide:
class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
print(f"\nassistant > ", end="", flush=True)
@override
def on_text_delta(self, delta, snapshot):
print(delta.value, end="", flush=True)
def on_tool_call_created(self, tool_call):
print(f"\nassistant > {tool_call.type}\n", flush=True)
def on_tool_call_delta(self, delta, snapshot):
if delta.type == 'code_interpreter':
if delta.code_interpreter.input:
print(delta.code_interpreter.input, end="", flush=True)
if delta.code_interpreter.outputs:
print(f"\n\noutput >", flush=True)
for output in delta.code_interpreter.outputs:
if output.type == "logs":
print(f"\n{output.logs}", flush=True)
# Setting up OpenAI Client with API Key # Setting up OpenAI Client with API Key
api_key = os.getenv("OPENAI_API_KEY") api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI( client = OpenAI(
@@ -62,22 +38,31 @@ client = OpenAI(
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# uhh no we need a new thread each time tf # This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
with output_lock:
output_buffer.append(f"\nassistant > {text}")
@override
def on_text_delta(self, delta, snapshot):
with output_lock:
output_buffer.append(delta.value)
def on_tool_call_created(self, tool_call):
with output_lock:
output_buffer.append(f"\nassistant > {tool_call.type}\n")
def create_and_stream(transcript): def create_and_stream(transcript):
with client.beta.threads.create_and_run_stream( with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id, assistant_id=asst_screw_bardo_id,
thread={ thread={
"messages" : [ "messages": [{"role": "user", "content": transcript}]
{"role": "user",
"content": transcript}
]
}, },
event_handler=EventHandler() event_handler=EventHandler()
) as stream: ) as stream:
stream.until_done() stream.until_done()
messages = stream.get_final_messages()
return messages[0].content[0].text.value
def get_video_id(url): def get_video_id(url):
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'

View File

@@ -17,7 +17,7 @@
<body> <body>
<div class="content"> <div class="content">
<p id="response-area">Response will appear here.</p> <pre id="response-area">Response will appear here. </pre>
<div class="form_box"> <div class="form_box">
<input id="url_box" placeholder="Paste the lecture URL here." autofocus></input> <input id="url_box" placeholder="Paste the lecture URL here." autofocus></input>
<input id="submit" type="submit" onclick=""></input> <input id="submit" type="submit" onclick=""></input>

View File

@@ -1,13 +1,14 @@
document.addEventListener("DOMContentLoaded", (event) => { document.addEventListener("DOMContentLoaded", (event) => {
const response_area = document.getElementById('response-area');
document.getElementById('submit').addEventListener('click', function() { document.getElementById('submit').addEventListener('click', function() {
var url = document.getElementById('url_box').value; var url = document.getElementById('url_box').value;
const response_area = document.getElementById('response-area');
if (!url) { if (!url) {
response_area.innerText = 'Please enter a URL.'; response_area.innerText = 'Please enter a URL.';
return; return;
} }
// First, process the URL
fetch('/process_url', { fetch('/process_url', {
method: 'POST', method: 'POST',
headers: { headers: {
@@ -15,6 +16,26 @@ document.addEventListener("DOMContentLoaded", (event) => {
}, },
body: new URLSearchParams({ url: url }) body: new URLSearchParams({ url: url })
}) })
.then(response => {
if (!response.ok) {
throw new Error('Network response was not ok');
}
// Start streaming once processing is started
streamOutput(response_area);
})
.catch(error => {
console.error('Error processing URL:', error);
response_area.innerText = 'Error processing URL: ' + error.message;
});
});
});
function streamOutput(response_area) {
// Fetch the streaming output
const streamResponsePromise = fetch('/stream_output');
response_area.innerHTML = ""
streamResponsePromise
.then(response => { .then(response => {
const reader = response.body.getReader(); const reader = response.body.getReader();
const decoder = new TextDecoder("utf-8"); const decoder = new TextDecoder("utf-8");
@@ -28,14 +49,7 @@ document.addEventListener("DOMContentLoaded", (event) => {
// Decode and process the chunk // Decode and process the chunk
const chunk = decoder.decode(value, { stream: true }); const chunk = decoder.decode(value, { stream: true });
response_area.innerHTML += chunk;
// Split the received chunk by new line to handle multiple lines (if any)
chunk.split('\n').forEach(data => {
if (data.trim()) { // Avoid empty strings
// Update the inner HTML of the output div
response_area.innerHTML += `<p>${data}</p>`;
}
});
// Continue reading // Continue reading
readStream(); readStream();
@@ -47,6 +61,6 @@ document.addEventListener("DOMContentLoaded", (event) => {
}) })
.catch(error => { .catch(error => {
console.error('Error fetching stream:', error); console.error('Error fetching stream:', error);
response_area.innerText = 'Error fetching stream: ' + error.message;
}); });
}); }
});