Dockerized
This commit is contained in:
62
app/app.py
Normal file
62
app/app.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from flask import Flask, render_template, Response, request
|
||||
from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter
|
||||
from asyncio import sleep
|
||||
from datetime import datetime
|
||||
import threading, pytz
|
||||
|
||||
|
||||
|
||||
app = Flask(__name__, static_folder="website/static", template_folder="website")
|
||||
|
||||
@app.route('/')
|
||||
def home():
|
||||
return render_template('index.html')
|
||||
|
||||
@app.route('/process_url', methods=['POST'])
|
||||
def process_url():
|
||||
global thread
|
||||
log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||
url = request.form['url']
|
||||
log(f"URL: {url}\n")
|
||||
# Extract the video ID from the URL
|
||||
video_id = get_video_id(url) # Modify this function to accept the URL
|
||||
if not video_id:
|
||||
log(f"Could not parse video id from URL: {url}")
|
||||
return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
|
||||
log(f"Video ID: {video_id}\n\n")
|
||||
|
||||
# Get the transcript for that video ID
|
||||
transcript = get_auto_transcript(video_id)
|
||||
if (not transcript):
|
||||
log("## Error: could not retrieve transcript, Assistant won't be called.")
|
||||
return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
|
||||
|
||||
thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string.
|
||||
log("Stream preperation complete, sending reply...\n\n")
|
||||
return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200) # Add more detailed output if needed
|
||||
|
||||
@app.route('/stream_output')
|
||||
def stream_output():
|
||||
def yoink():
|
||||
log("<details>\n<summary>Starting stream thread...</summary>\n\n")
|
||||
thread.start()
|
||||
# Start streaming output from output_stream
|
||||
log("Starting to stream output.")
|
||||
while not output_stream.done:
|
||||
if output_stream.buffer != []:
|
||||
delta = output_stream.buffer.pop(0)
|
||||
yield bytes(delta, encoding="utf-8")
|
||||
else:
|
||||
awaiter(sleep(0.05))
|
||||
log(f"\nStream successfully completely.\n\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
|
||||
output_stream.reset()
|
||||
thread.join()
|
||||
log("\n### Task completed sucessfully without errors!")
|
||||
return
|
||||
return Response(yoink(), content_type='text/plain', status=200)
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
||||
129
app/main.py
Normal file
129
app/main.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# To parse video ids
|
||||
import re
|
||||
|
||||
# Youtube Transcript stuff import
|
||||
import youtube_transcript_api._errors
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from youtube_transcript_api.formatters import TextFormatter
|
||||
|
||||
# OpenAI API stuff import
|
||||
from openai import AssistantEventHandler
|
||||
from openai import OpenAI
|
||||
|
||||
### For streaming
|
||||
from typing_extensions import override
|
||||
import asyncio
|
||||
awaiter = asyncio.run
|
||||
|
||||
# The StreamOutput class to handle streaming
|
||||
class StreamOutput:
|
||||
|
||||
def __init__(self):
|
||||
self.delta: str = ""
|
||||
self.response: str = ""
|
||||
self.done: bool = False
|
||||
self.buffer: list = []
|
||||
|
||||
def reset(self):
|
||||
self.delta = ""
|
||||
self.response = ""
|
||||
self.done = False
|
||||
self.buffer: list = []
|
||||
|
||||
async def send_delta(self, delta):
|
||||
self.delta = delta
|
||||
self.response += delta
|
||||
def get_index(list):
|
||||
if len(list) == 0:
|
||||
return 0
|
||||
else:
|
||||
return len(list)-1
|
||||
if self.buffer != []:
|
||||
try:
|
||||
if self.delta != self.buffer[get_index(self.buffer)]:
|
||||
self.buffer.append(delta)
|
||||
except IndexError as index_error:
|
||||
log(f"\nCaught IndexError: {str(index_error)}")
|
||||
self.buffer.append(delta)
|
||||
else: self.buffer.append(delta)
|
||||
|
||||
# To get the env var
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# For logging
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
|
||||
def log(str):
|
||||
with open("log.md", "at") as file:
|
||||
file.write(str)
|
||||
|
||||
### OpenAI Config
|
||||
|
||||
# Setting up OpenAI Client with API Key
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
client = OpenAI(
|
||||
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
||||
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
||||
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
||||
|
||||
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
|
||||
class EventHandler(AssistantEventHandler):
|
||||
@override
|
||||
def on_text_created(self, text) -> None:
|
||||
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
|
||||
|
||||
@override
|
||||
def on_text_delta(self, delta, snapshot):
|
||||
awaiter(output_stream.send_delta(delta.value))
|
||||
|
||||
def on_tool_call_created(self, tool_call):
|
||||
raise Exception("Assistant shouldn't be calling tools.")
|
||||
|
||||
def create_and_stream(transcript):
|
||||
with client.beta.threads.create_and_run_stream(
|
||||
assistant_id=asst_screw_bardo_id,
|
||||
thread={
|
||||
"messages": [{"role": "user", "content": transcript}]
|
||||
},
|
||||
event_handler=EventHandler()
|
||||
) as stream:
|
||||
stream.until_done()
|
||||
output_stream.done = True
|
||||
|
||||
def get_video_id(url):
|
||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
||||
|
||||
id = re.search(youtu_be, url)
|
||||
if not id:
|
||||
id = re.search(youtube_com, url)
|
||||
|
||||
if not id:
|
||||
# Couldn't parse video ID from URL
|
||||
return None
|
||||
|
||||
return id.group(1)
|
||||
|
||||
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
|
||||
def get_auto_transcript(video_id):
|
||||
trans_api_errors = youtube_transcript_api._errors
|
||||
try:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
||||
except trans_api_errors.TranscriptsDisabled as e:
|
||||
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
|
||||
return None
|
||||
|
||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
||||
|
||||
txt_transcript = formatter.format_transcript(transcript)
|
||||
return txt_transcript
|
||||
|
||||
output_stream = StreamOutput()
|
||||
30
app/website/index.html
Normal file
30
app/website/index.html
Normal file
@@ -0,0 +1,30 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en-us">
|
||||
|
||||
<head>
|
||||
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
<title>Screw You Bardo</title>
|
||||
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
||||
<link rel="icon" type="image/x-icon" href="https://www.foreverpyrite.com/favicon.ico">
|
||||
<script src="{{ url_for('static', filename='script.js')}}"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<div class="content">
|
||||
<pre id="response-area">Response will appear here. </pre>
|
||||
<div class="form_box">
|
||||
<input id="url_box" placeholder="Paste the lecture URL here." autofocus></input>
|
||||
<input id="submit" type="submit" onclick=""></input>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
BIN
app/website/static/font-files/nimbus-sans-d-ot-light.woff
Normal file
BIN
app/website/static/font-files/nimbus-sans-d-ot-light.woff
Normal file
Binary file not shown.
BIN
app/website/static/font-files/nimbus-sans-d-ot-light.woff2
Normal file
BIN
app/website/static/font-files/nimbus-sans-d-ot-light.woff2
Normal file
Binary file not shown.
80
app/website/static/script.js
Normal file
80
app/website/static/script.js
Normal file
@@ -0,0 +1,80 @@
|
||||
document.addEventListener("DOMContentLoaded", (event) => {
|
||||
const response_area = document.getElementById('response-area');
|
||||
const submit_button = document.getElementById('submit')
|
||||
submit_button.addEventListener('click', function() {
|
||||
var url = document.getElementById('url_box').value;
|
||||
|
||||
if (!url) {
|
||||
response_area.innerText = 'Please enter a URL.';
|
||||
return;
|
||||
}
|
||||
else {
|
||||
document.getElementById('url_box').value = "";
|
||||
}
|
||||
|
||||
// First, process the URL
|
||||
fetch('/process_url', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
},
|
||||
body: new URLSearchParams({ url: url })
|
||||
})
|
||||
.then(response => {
|
||||
if (!response.ok) {
|
||||
throw new Error('Network response was not ok');
|
||||
}
|
||||
// Extract the text from the response body
|
||||
return response.text(); // Use .json() if the response is JSON
|
||||
})
|
||||
.then(text => {
|
||||
submit_button.style.display = "none";
|
||||
if (text === "Processing started. Check /stream_output for updates.") {
|
||||
streamOutput(response_area);
|
||||
} else {
|
||||
response_area.innerText = text; // Show any other response message
|
||||
submit_button.style.display = "flex";
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error processing URL:', error);
|
||||
response_area.innerText = 'Error processing URL: ' + error.message;
|
||||
submit_button.style.display = "flex";
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
function streamOutput(response_area) {
|
||||
// Fetch the streaming output
|
||||
const streamResponsePromise = fetch('/stream_output');
|
||||
response_area.innerHTML = ""
|
||||
|
||||
streamResponsePromise
|
||||
.then(response => {
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder("utf-8");
|
||||
|
||||
function readStream() {
|
||||
reader.read().then(({ done, value }) => {
|
||||
if(done) {
|
||||
document.getElementById('submit').style.display = "flex";
|
||||
return
|
||||
}
|
||||
// Decode and process the chunk
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
response_area.innerHTML += chunk;
|
||||
response_area.scrollTop = response_area.scrollHeight
|
||||
|
||||
// Continue reading
|
||||
readStream();
|
||||
});
|
||||
}
|
||||
|
||||
// Start reading the stream
|
||||
readStream();
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error fetching stream:', error);
|
||||
response_area.innerText = 'Error fetching stream: ' + error.message;
|
||||
});
|
||||
}
|
||||
78
app/website/static/style.css
Normal file
78
app/website/static/style.css
Normal file
@@ -0,0 +1,78 @@
|
||||
|
||||
|
||||
@font-face {
|
||||
font-family: 'nimbus_sans_d_otlight';
|
||||
src: url('font-files/nimbus-sans-d-ot-light.woff2') format('woff2'),
|
||||
url('font-files/nimbus-sans-d-ot-light.woff') format('woff');
|
||||
font-weight: normal;
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
* {
|
||||
font-family: 'nimbus_sans_d_otlight';
|
||||
color: white;
|
||||
}
|
||||
|
||||
body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
width: 100%;
|
||||
max-width: 100vw;
|
||||
height: 100%;
|
||||
min-height: 100vh;
|
||||
max-height: 100vh;
|
||||
margin: 0;
|
||||
background-color: rgb(31, 31, 31);
|
||||
}
|
||||
|
||||
body .content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-self: center;
|
||||
width: 75%;
|
||||
max-width: 65vw;
|
||||
height: 100%;
|
||||
min-height: 100vh;
|
||||
max-height: 100vh;
|
||||
}
|
||||
|
||||
#response-area {
|
||||
display: block;
|
||||
height: 90%;
|
||||
min-height: 90vh;
|
||||
text-wrap: wrap;
|
||||
flex-wrap: wrap;
|
||||
align-content: flex-end;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.form_box {
|
||||
display: flex;
|
||||
width: 100%;
|
||||
justify-content: space-between;
|
||||
align-content: space-around;
|
||||
}
|
||||
|
||||
#url_box {
|
||||
display: flex;
|
||||
height: 5%;
|
||||
min-height: 5vh;
|
||||
width: 90%;
|
||||
min-width: 80vh;
|
||||
background-color: rgb(31, 31, 31);
|
||||
}
|
||||
|
||||
#submit {
|
||||
display: flex;
|
||||
width: 5%;
|
||||
min-width: 3vw;
|
||||
background-color: rgb(49, 49, 49);
|
||||
}
|
||||
#submit:hover {
|
||||
cursor: pointer;
|
||||
background-color: rgb(31, 31, 31);
|
||||
}
|
||||
|
||||
input {
|
||||
border-radius: 15px;
|
||||
}
|
||||
Reference in New Issue
Block a user