Pulling from 'dev' (streaming is here)
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,5 +1,4 @@
|
|||||||
thread-killer.py
|
log.md
|
||||||
log.txt
|
|
||||||
.env
|
.env
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
|
|||||||
51
app.py
51
app.py
@@ -1,7 +1,10 @@
|
|||||||
from flask import Flask, render_template, request
|
from flask import Flask, render_template, Response, request
|
||||||
from main import get_auto_transcript, get_video_id, create_and_stream
|
from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter
|
||||||
|
from asyncio import sleep
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import pytz
|
import threading, pytz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__, static_folder="website/static", template_folder="website")
|
app = Flask(__name__, static_folder="website/static", template_folder="website")
|
||||||
|
|
||||||
@@ -11,25 +14,49 @@ def home():
|
|||||||
|
|
||||||
@app.route('/process_url', methods=['POST'])
|
@app.route('/process_url', methods=['POST'])
|
||||||
def process_url():
|
def process_url():
|
||||||
# Opens a file to log the video id and the assistants respone to see if I can further improve instructions:
|
global thread
|
||||||
log = open("log.txt", "at", 1)
|
log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||||
url = request.form['url']
|
url = request.form['url']
|
||||||
|
log(f"URL: {url}\n")
|
||||||
# Extract the video ID from the URL
|
# Extract the video ID from the URL
|
||||||
video_id = get_video_id(url) # Modify this function to accept the URL
|
video_id = get_video_id(url) # Modify this function to accept the URL
|
||||||
if not video_id:
|
if not video_id:
|
||||||
|
log(f"Could not parse video id from URL: {url}")
|
||||||
return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
|
return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
|
||||||
|
log(f"Video ID: {video_id}\n\n")
|
||||||
|
|
||||||
# Get the transcript for that video ID
|
# Get the transcript for that video ID
|
||||||
transcript = get_auto_transcript(video_id)
|
transcript = get_auto_transcript(video_id)
|
||||||
if (not transcript):
|
if (not transcript):
|
||||||
|
log("## Error: could not retrieve transcript, Assistant won't be called.")
|
||||||
return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
|
return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
|
||||||
|
|
||||||
# Process the transcript and stream the result.
|
thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string.
|
||||||
response = create_and_stream(transcript)
|
log("Stream preperation complete, sending reply...\n\n")
|
||||||
log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}")
|
return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) # Add more detailed output if needed
|
||||||
# Return a response
|
|
||||||
return response # Add more detailed output if needed
|
|
||||||
|
|
||||||
if __name__ == '__main__': # Change this line to properly check for main
|
@app.route('/stream_output')
|
||||||
|
def stream_output():
|
||||||
|
def yoink():
|
||||||
|
log("<details>\n<summary>Starting stream thread...</summary>\n\n")
|
||||||
|
thread.start()
|
||||||
|
# Start streaming output from output_stream
|
||||||
|
log("Starting to stream output.")
|
||||||
|
while not output_stream.done:
|
||||||
|
if output_stream.buffer != []:
|
||||||
|
delta = output_stream.buffer.pop(0)
|
||||||
|
yield bytes(delta, encoding="utf-8")
|
||||||
|
else:
|
||||||
|
awaiter(sleep(0.05))
|
||||||
|
log(f"\nStream successfully completely.\n\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
|
||||||
|
output_stream.reset()
|
||||||
|
thread.join()
|
||||||
|
log("\n### Task completed sucessfully without errors!")
|
||||||
|
return
|
||||||
|
return Response(yoink(), content_type='text/plain', status=200)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
app.run(debug=True)
|
app.run(debug=True)
|
||||||
108
main.py
108
main.py
@@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
# To parse video ids
|
# To parse video ids
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -10,8 +9,43 @@ from youtube_transcript_api.formatters import TextFormatter
|
|||||||
# OpenAI API stuff import
|
# OpenAI API stuff import
|
||||||
from openai import AssistantEventHandler
|
from openai import AssistantEventHandler
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
# For streaming
|
|
||||||
|
### For streaming
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
import asyncio
|
||||||
|
awaiter = asyncio.run
|
||||||
|
|
||||||
|
# The StreamOutput class to handle streaming
|
||||||
|
class StreamOutput:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.delta: str = ""
|
||||||
|
self.response: str = ""
|
||||||
|
self.done: bool = False
|
||||||
|
self.buffer: list = []
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.delta = ""
|
||||||
|
self.response = ""
|
||||||
|
self.done = False
|
||||||
|
self.buffer: list = []
|
||||||
|
|
||||||
|
async def send_delta(self, delta):
|
||||||
|
self.delta = delta
|
||||||
|
self.response += delta
|
||||||
|
def get_index(list):
|
||||||
|
if len(list) == 0:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return len(list)-1
|
||||||
|
if self.buffer != []:
|
||||||
|
try:
|
||||||
|
if self.delta != self.buffer[get_index(self.buffer)]:
|
||||||
|
self.buffer.append(delta)
|
||||||
|
except IndexError as index_error:
|
||||||
|
log(f"\nCaught IndexError: {str(index_error)}")
|
||||||
|
self.buffer.append(delta)
|
||||||
|
else: self.buffer.append(delta)
|
||||||
|
|
||||||
# To get the env var
|
# To get the env var
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -19,32 +53,16 @@ import os
|
|||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
# For logging
|
||||||
|
import pytz
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def log(str):
|
||||||
|
with open("log.md", "at") as file:
|
||||||
|
file.write(str)
|
||||||
|
|
||||||
### OpenAI Config
|
### OpenAI Config
|
||||||
|
|
||||||
# This is copy and pasted straight up from the quickstart guide:
|
|
||||||
class EventHandler(AssistantEventHandler):
|
|
||||||
@override
|
|
||||||
def on_text_created(self, text) -> None:
|
|
||||||
print(f"\nassistant > ", end="", flush=True)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def on_text_delta(self, delta, snapshot):
|
|
||||||
print(delta.value, end="", flush=True)
|
|
||||||
|
|
||||||
def on_tool_call_created(self, tool_call):
|
|
||||||
print(f"\nassistant > {tool_call.type}\n", flush=True)
|
|
||||||
|
|
||||||
def on_tool_call_delta(self, delta, snapshot):
|
|
||||||
if delta.type == 'code_interpreter':
|
|
||||||
if delta.code_interpreter.input:
|
|
||||||
print(delta.code_interpreter.input, end="", flush=True)
|
|
||||||
if delta.code_interpreter.outputs:
|
|
||||||
print(f"\n\noutput >", flush=True)
|
|
||||||
for output in delta.code_interpreter.outputs:
|
|
||||||
if output.type == "logs":
|
|
||||||
print(f"\n{output.logs}", flush=True)
|
|
||||||
|
|
||||||
|
|
||||||
# Setting up OpenAI Client with API Key
|
# Setting up OpenAI Client with API Key
|
||||||
api_key = os.getenv("OPENAI_API_KEY")
|
api_key = os.getenv("OPENAI_API_KEY")
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
@@ -56,22 +74,29 @@ client = OpenAI(
|
|||||||
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
||||||
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
||||||
|
|
||||||
# uhh no we need a new thread each time tf
|
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
|
||||||
# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early
|
class EventHandler(AssistantEventHandler):
|
||||||
|
@override
|
||||||
|
def on_text_created(self, text) -> None:
|
||||||
|
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
|
||||||
|
|
||||||
|
@override
|
||||||
|
def on_text_delta(self, delta, snapshot):
|
||||||
|
awaiter(output_stream.send_delta(delta.value))
|
||||||
|
|
||||||
|
def on_tool_call_created(self, tool_call):
|
||||||
|
raise Exception("Assistant shouldn't be calling tools.")
|
||||||
|
|
||||||
def create_and_stream(transcript):
|
def create_and_stream(transcript):
|
||||||
with client.beta.threads.create_and_run_stream(
|
with client.beta.threads.create_and_run_stream(
|
||||||
assistant_id=asst_screw_bardo_id,
|
assistant_id=asst_screw_bardo_id,
|
||||||
thread={
|
thread={
|
||||||
"messages" : [
|
"messages": [{"role": "user", "content": transcript}]
|
||||||
{"role": "user",
|
|
||||||
"content": transcript}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
event_handler=EventHandler()
|
event_handler=EventHandler()
|
||||||
) as stream:
|
) as stream:
|
||||||
stream.until_done()
|
stream.until_done()
|
||||||
messages = stream.get_final_messages()
|
output_stream.done = True
|
||||||
return messages[0].content[0].text.value
|
|
||||||
|
|
||||||
def get_video_id(url):
|
def get_video_id(url):
|
||||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||||
@@ -82,7 +107,7 @@ def get_video_id(url):
|
|||||||
id = re.search(youtube_com, url)
|
id = re.search(youtube_com, url)
|
||||||
|
|
||||||
if not id:
|
if not id:
|
||||||
print("Couldn't parse video ID from URL")
|
# Couldn't parse video ID from URL
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return id.group(1)
|
return id.group(1)
|
||||||
@@ -92,7 +117,8 @@ def get_auto_transcript(video_id):
|
|||||||
trans_api_errors = youtube_transcript_api._errors
|
trans_api_errors = youtube_transcript_api._errors
|
||||||
try:
|
try:
|
||||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
||||||
except trans_api_errors.TranscriptsDisabled:
|
except trans_api_errors.TranscriptsDisabled as e:
|
||||||
|
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
||||||
@@ -100,12 +126,4 @@ def get_auto_transcript(video_id):
|
|||||||
txt_transcript = formatter.format_transcript(transcript)
|
txt_transcript = formatter.format_transcript(transcript)
|
||||||
return txt_transcript
|
return txt_transcript
|
||||||
|
|
||||||
|
output_stream = StreamOutput()
|
||||||
# Stores the video id imputted by the user
|
|
||||||
"""
|
|
||||||
video_id = get_video_id()
|
|
||||||
|
|
||||||
transcript = get_auto_transcript(video_id)
|
|
||||||
|
|
||||||
create_and_stream(transcript)
|
|
||||||
"""
|
|
||||||
@@ -1,17 +1,18 @@
|
|||||||
|
|
||||||
document.addEventListener("DOMContentLoaded", (event) => {
|
document.addEventListener("DOMContentLoaded", (event) => {
|
||||||
document.getElementById('submit').addEventListener('click', function() {
|
|
||||||
var url = document.getElementById('url_box').value;
|
|
||||||
const response_area = document.getElementById('response-area');
|
const response_area = document.getElementById('response-area');
|
||||||
|
const submit_button = document.getElementById('submit')
|
||||||
|
submit_button.addEventListener('click', function() {
|
||||||
|
var url = document.getElementById('url_box').value;
|
||||||
|
|
||||||
if (!url) {
|
if (!url) {
|
||||||
response_area.innerText = 'Please enter a URL.';
|
response_area.innerText = 'Please enter a URL.';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else{
|
else {
|
||||||
response_area.innerText = "Sending URL and retriving transcript."
|
document.getElementById('url_box').value = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First, process the URL
|
||||||
fetch('/process_url', {
|
fetch('/process_url', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
@@ -19,13 +20,61 @@ document.addEventListener("DOMContentLoaded", (event) => {
|
|||||||
},
|
},
|
||||||
body: new URLSearchParams({ url: url })
|
body: new URLSearchParams({ url: url })
|
||||||
})
|
})
|
||||||
.then(response => response.text())
|
.then(response => {
|
||||||
.then(data => {
|
if (!response.ok) {
|
||||||
response_area.innerText = data;
|
throw new Error('Network response was not ok');
|
||||||
|
}
|
||||||
|
// Extract the text from the response body
|
||||||
|
return response.text(); // Use .json() if the response is JSON
|
||||||
|
})
|
||||||
|
.then(text => {
|
||||||
|
submit_button.style.display = "none";
|
||||||
|
if (text === "Processing started. Check /stream_output for updates.") {
|
||||||
|
streamOutput(response_area);
|
||||||
|
} else {
|
||||||
|
response_area.innerText = text; // Show any other response message
|
||||||
|
submit_button.style.display = "flex";
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
console.error('Error:', error);
|
console.error('Error processing URL:', error);
|
||||||
response_area.innerText = 'An error occurred. Please try again.';
|
response_area.innerText = 'Error processing URL: ' + error.message;
|
||||||
|
submit_button.style.display = "flex";
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function streamOutput(response_area) {
|
||||||
|
// Fetch the streaming output
|
||||||
|
const streamResponsePromise = fetch('/stream_output');
|
||||||
|
response_area.innerHTML = ""
|
||||||
|
|
||||||
|
streamResponsePromise
|
||||||
|
.then(response => {
|
||||||
|
const reader = response.body.getReader();
|
||||||
|
const decoder = new TextDecoder("utf-8");
|
||||||
|
|
||||||
|
function readStream() {
|
||||||
|
reader.read().then(({ done, value }) => {
|
||||||
|
if(done) {
|
||||||
|
document.getElementById('submit').style.display = "flex";
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Decode and process the chunk
|
||||||
|
const chunk = decoder.decode(value, { stream: true });
|
||||||
|
response_area.innerHTML += chunk;
|
||||||
|
response_area.scrollTop = response_area.scrollHeight
|
||||||
|
|
||||||
|
// Continue reading
|
||||||
|
readStream();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start reading the stream
|
||||||
|
readStream();
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error fetching stream:', error);
|
||||||
|
response_area.innerText = 'Error fetching stream: ' + error.message;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
@@ -40,9 +40,10 @@ body .content {
|
|||||||
display: block;
|
display: block;
|
||||||
height: 90%;
|
height: 90%;
|
||||||
min-height: 90vh;
|
min-height: 90vh;
|
||||||
overflow: auto;
|
text-wrap: wrap;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
align-content: flex-end;
|
align-content: flex-end;
|
||||||
|
overflow-y: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.form_box {
|
.form_box {
|
||||||
|
|||||||
Reference in New Issue
Block a user