Pulling from 'dev' (streaming is here)

This commit is contained in:
ForeverPyrite
2024-10-15 20:53:10 -04:00
6 changed files with 170 additions and 76 deletions

3
.gitignore vendored
View File

@@ -1,5 +1,4 @@
thread-killer.py log.md
log.txt
.env .env
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files

51
app.py
View File

@@ -1,7 +1,10 @@
from flask import Flask, render_template, request from flask import Flask, render_template, Response, request
from main import get_auto_transcript, get_video_id, create_and_stream from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter
from asyncio import sleep
from datetime import datetime from datetime import datetime
import pytz import threading, pytz
app = Flask(__name__, static_folder="website/static", template_folder="website") app = Flask(__name__, static_folder="website/static", template_folder="website")
@@ -11,25 +14,49 @@ def home():
@app.route('/process_url', methods=['POST']) @app.route('/process_url', methods=['POST'])
def process_url(): def process_url():
# Opens a file to log the video id and the assistants respone to see if I can further improve instructions: global thread
log = open("log.txt", "at", 1) log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n")
url = request.form['url'] url = request.form['url']
log(f"URL: {url}\n")
# Extract the video ID from the URL # Extract the video ID from the URL
video_id = get_video_id(url) # Modify this function to accept the URL video_id = get_video_id(url) # Modify this function to accept the URL
if not video_id: if not video_id:
log(f"Could not parse video id from URL: {url}")
return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)" return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
log(f"Video ID: {video_id}\n\n")
# Get the transcript for that video ID # Get the transcript for that video ID
transcript = get_auto_transcript(video_id) transcript = get_auto_transcript(video_id)
if (not transcript): if (not transcript):
log("## Error: could not retrieve transcript, Assistant won't be called.")
return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube." return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
# Process the transcript and stream the result. thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string.
response = create_and_stream(transcript) log("Stream preperation complete, sending reply...\n\n")
log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}") return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) # Add more detailed output if needed
# Return a response
return response # Add more detailed output if needed
if __name__ == '__main__': # Change this line to properly check for main @app.route('/stream_output')
def stream_output():
def yoink():
log("<details>\n<summary>Starting stream thread...</summary>\n\n")
thread.start()
# Start streaming output from output_stream
log("Starting to stream output.")
while not output_stream.done:
if output_stream.buffer != []:
delta = output_stream.buffer.pop(0)
yield bytes(delta, encoding="utf-8")
else:
awaiter(sleep(0.05))
log(f"\nStream successfully completely.\n\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
output_stream.reset()
thread.join()
log("\n### Task completed sucessfully without errors!")
return
return Response(yoink(), content_type='text/plain', status=200)
if __name__ == '__main__':
app.run(debug=True) app.run(debug=True)

0
log.md Normal file
View File

118
main.py
View File

@@ -1,4 +1,3 @@
# To parse video ids # To parse video ids
import re import re
@@ -10,8 +9,43 @@ from youtube_transcript_api.formatters import TextFormatter
# OpenAI API stuff import # OpenAI API stuff import
from openai import AssistantEventHandler from openai import AssistantEventHandler
from openai import OpenAI from openai import OpenAI
# For streaming
### For streaming
from typing_extensions import override from typing_extensions import override
import asyncio
awaiter = asyncio.run
# The StreamOutput class to handle streaming
class StreamOutput:
def __init__(self):
self.delta: str = ""
self.response: str = ""
self.done: bool = False
self.buffer: list = []
def reset(self):
self.delta = ""
self.response = ""
self.done = False
self.buffer: list = []
async def send_delta(self, delta):
self.delta = delta
self.response += delta
def get_index(list):
if len(list) == 0:
return 0
else:
return len(list)-1
if self.buffer != []:
try:
if self.delta != self.buffer[get_index(self.buffer)]:
self.buffer.append(delta)
except IndexError as index_error:
log(f"\nCaught IndexError: {str(index_error)}")
self.buffer.append(delta)
else: self.buffer.append(delta)
# To get the env var # To get the env var
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -19,32 +53,16 @@ import os
load_dotenv() load_dotenv()
# For logging
import pytz
from datetime import datetime
def log(str):
with open("log.md", "at") as file:
file.write(str)
### OpenAI Config ### OpenAI Config
# This is copy and pasted straight up from the quickstart guide:
class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
print(f"\nassistant > ", end="", flush=True)
@override
def on_text_delta(self, delta, snapshot):
print(delta.value, end="", flush=True)
def on_tool_call_created(self, tool_call):
print(f"\nassistant > {tool_call.type}\n", flush=True)
def on_tool_call_delta(self, delta, snapshot):
if delta.type == 'code_interpreter':
if delta.code_interpreter.input:
print(delta.code_interpreter.input, end="", flush=True)
if delta.code_interpreter.outputs:
print(f"\n\noutput >", flush=True)
for output in delta.code_interpreter.outputs:
if output.type == "logs":
print(f"\n{output.logs}", flush=True)
# Setting up OpenAI Client with API Key # Setting up OpenAI Client with API Key
api_key = os.getenv("OPENAI_API_KEY") api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI( client = OpenAI(
@@ -56,22 +74,29 @@ client = OpenAI(
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# uhh no we need a new thread each time tf # This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
@override
def on_text_delta(self, delta, snapshot):
awaiter(output_stream.send_delta(delta.value))
def on_tool_call_created(self, tool_call):
raise Exception("Assistant shouldn't be calling tools.")
def create_and_stream(transcript): def create_and_stream(transcript):
with client.beta.threads.create_and_run_stream( with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id, assistant_id=asst_screw_bardo_id,
thread={ thread={
"messages" : [ "messages": [{"role": "user", "content": transcript}]
{"role": "user", },
"content": transcript} event_handler=EventHandler()
]
},
event_handler=EventHandler()
) as stream: ) as stream:
stream.until_done() stream.until_done()
messages = stream.get_final_messages() output_stream.done = True
return messages[0].content[0].text.value
def get_video_id(url): def get_video_id(url):
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
@@ -82,7 +107,7 @@ def get_video_id(url):
id = re.search(youtube_com, url) id = re.search(youtube_com, url)
if not id: if not id:
print("Couldn't parse video ID from URL") # Couldn't parse video ID from URL
return None return None
return id.group(1) return id.group(1)
@@ -92,7 +117,8 @@ def get_auto_transcript(video_id):
trans_api_errors = youtube_transcript_api._errors trans_api_errors = youtube_transcript_api._errors
try: try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
except trans_api_errors.TranscriptsDisabled: except trans_api_errors.TranscriptsDisabled as e:
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
return None return None
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
@@ -100,12 +126,4 @@ def get_auto_transcript(video_id):
txt_transcript = formatter.format_transcript(transcript) txt_transcript = formatter.format_transcript(transcript)
return txt_transcript return txt_transcript
output_stream = StreamOutput()
# Stores the video id imputted by the user
"""
video_id = get_video_id()
transcript = get_auto_transcript(video_id)
create_and_stream(transcript)
"""

View File

@@ -1,17 +1,18 @@
document.addEventListener("DOMContentLoaded", (event) => { document.addEventListener("DOMContentLoaded", (event) => {
document.getElementById('submit').addEventListener('click', function() { const response_area = document.getElementById('response-area');
const submit_button = document.getElementById('submit')
submit_button.addEventListener('click', function() {
var url = document.getElementById('url_box').value; var url = document.getElementById('url_box').value;
const response_area = document.getElementById('response-area');
if (!url) { if (!url) {
response_area.innerText = 'Please enter a URL.'; response_area.innerText = 'Please enter a URL.';
return; return;
} }
else{ else {
response_area.innerText = "Sending URL and retriving transcript." document.getElementById('url_box').value = "";
} }
// First, process the URL
fetch('/process_url', { fetch('/process_url', {
method: 'POST', method: 'POST',
headers: { headers: {
@@ -19,13 +20,61 @@ document.addEventListener("DOMContentLoaded", (event) => {
}, },
body: new URLSearchParams({ url: url }) body: new URLSearchParams({ url: url })
}) })
.then(response => response.text()) .then(response => {
.then(data => { if (!response.ok) {
response_area.innerText = data; throw new Error('Network response was not ok');
}
// Extract the text from the response body
return response.text(); // Use .json() if the response is JSON
})
.then(text => {
submit_button.style.display = "none";
if (text === "Processing started. Check /stream_output for updates.") {
streamOutput(response_area);
} else {
response_area.innerText = text; // Show any other response message
submit_button.style.display = "flex";
}
}) })
.catch(error => { .catch(error => {
console.error('Error:', error); console.error('Error processing URL:', error);
response_area.innerText = 'An error occurred. Please try again.'; response_area.innerText = 'Error processing URL: ' + error.message;
submit_button.style.display = "flex";
}); });
}); });
}); });
function streamOutput(response_area) {
// Fetch the streaming output
const streamResponsePromise = fetch('/stream_output');
response_area.innerHTML = ""
streamResponsePromise
.then(response => {
const reader = response.body.getReader();
const decoder = new TextDecoder("utf-8");
function readStream() {
reader.read().then(({ done, value }) => {
if(done) {
document.getElementById('submit').style.display = "flex";
return
}
// Decode and process the chunk
const chunk = decoder.decode(value, { stream: true });
response_area.innerHTML += chunk;
response_area.scrollTop = response_area.scrollHeight
// Continue reading
readStream();
});
}
// Start reading the stream
readStream();
})
.catch(error => {
console.error('Error fetching stream:', error);
response_area.innerText = 'Error fetching stream: ' + error.message;
});
}

View File

@@ -40,9 +40,10 @@ body .content {
display: block; display: block;
height: 90%; height: 90%;
min-height: 90vh; min-height: 90vh;
overflow: auto; text-wrap: wrap;
flex-wrap: wrap; flex-wrap: wrap;
align-content: flex-end; align-content: flex-end;
overflow-y: auto;
} }
.form_box { .form_box {