Merge branch 'dev'

2024-11-20 12:09:28 -05:00
parent 78af3ed15b cb4a781d3f
commit 529099a815
13 changed files with 187 additions and 128 deletions
--- a/app/app.py
+++ b/app/app.py
@@ -0,0 +1,62 @@
+from flask import Flask, render_template, Response, request
+from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter 
+from asyncio import sleep
+from datetime import datetime
+import threading, pytz
+
+
+  
+app = Flask(__name__, static_folder="website/static", template_folder="website")
+
+@app.route('/')
+def home():
+    return render_template('index.html')
+
+@app.route('/process_url', methods=['POST'])
+def process_url():
+    global thread
+    log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n")
+    url = request.form['url']
+    log(f"URL: {url}\n")
+    # Extract the video ID from the URL
+    video_id = get_video_id(url)  # Modify this function to accept the URL
+    if not video_id:
+        log(f"Could not parse video id from URL: {url}")
+        return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
+    log(f"Video ID: {video_id}\n\n")
+    
+    # Get the transcript for that video ID
+    transcript = get_auto_transcript(video_id)
+    if (not transcript):
+        log("## Error: could not retrieve transcript, Assistant won't be called.")
+        return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
+    
+    thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string.
+    log("Stream preperation complete, sending reply...\n\n")
+    return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200)  # Add more detailed output if needed
+
+@app.route('/stream_output')
+def stream_output():
+    def yoink():
+        log("<details>\n<summary>Starting stream thread...</summary>\n\n")
+        thread.start()
+        # Start streaming output from output_stream
+        log("Starting to stream output.")
+        while not output_stream.done:
+            if output_stream.buffer != []:
+                delta = output_stream.buffer.pop(0)
+                yield bytes(delta, encoding="utf-8")
+            else:
+                awaiter(sleep(0.05))
+        log(f"\nStream successfully completely.\n\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
+        output_stream.reset()
+        thread.join()
+        log("\n### Task completed sucessfully without errors!")
+        return 
+    return Response(yoink(), content_type='text/plain', status=200)
+
+
+
+
+if __name__ == '__main__': 
+    app.run(debug=True)
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,129 @@
+# To parse video ids
+import re
+
+# Youtube Transcript stuff import
+import youtube_transcript_api._errors
+from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api.formatters import TextFormatter
+
+# OpenAI API stuff import
+from openai import AssistantEventHandler
+from openai import OpenAI
+
+### For streaming
+from typing_extensions import override
+import asyncio
+awaiter = asyncio.run
+
+# The StreamOutput class to handle streaming
+class StreamOutput:
+  
+  def __init__(self):
+    self.delta: str = ""
+    self.response: str = ""
+    self.done: bool = False
+    self.buffer: list = []
+  
+  def reset(self):
+    self.delta = ""
+    self.response = ""
+    self.done = False
+    self.buffer: list = []
+    
+  async def send_delta(self, delta):
+      self.delta = delta
+      self.response += delta
+      def get_index(list):
+        if len(list) == 0:  
+          return 0
+        else:
+          return len(list)-1
+      if self.buffer != []:
+        try:
+          if self.delta != self.buffer[get_index(self.buffer)]:
+            self.buffer.append(delta)
+        except IndexError as index_error:
+          log(f"\nCaught IndexError: {str(index_error)}")
+          self.buffer.append(delta)
+      else: self.buffer.append(delta)
+
+# To get the env var
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+# For logging
+import pytz
+from datetime import datetime
+
+def log(str):
+  with open("log.md", "at") as file:
+    file.write(str)
+
+### OpenAI Config 
+
+# Setting up OpenAI Client with API Key
+api_key = os.getenv("OPENAI_API_KEY")
+client = OpenAI(
+  organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
+  project="proj_NGz8Kux8CSka7DRJucAlDCz6",
+  api_key=api_key
+)
+
+# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
+asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
+
+# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
+class EventHandler(AssistantEventHandler):    
+  @override
+  def on_text_created(self, text) -> None:
+    awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
+  
+  @override
+  def on_text_delta(self, delta, snapshot):
+      awaiter(output_stream.send_delta(delta.value))
+
+  def on_tool_call_created(self, tool_call):
+      raise Exception("Assistant shouldn't be calling tools.")
+
+def create_and_stream(transcript):
+  with client.beta.threads.create_and_run_stream(
+      assistant_id=asst_screw_bardo_id,
+      thread={
+          "messages": [{"role": "user", "content": transcript}]
+      },
+      event_handler=EventHandler()
+  ) as stream:
+      stream.until_done()
+      output_stream.done = True
+
+def get_video_id(url):
+  youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
+  youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
+  
+  id = re.search(youtu_be, url)
+  if not id:
+      id = re.search(youtube_com, url)
+  
+  if not id:
+      # Couldn't parse video ID from URL
+      return None
+  
+  return id.group(1)
+
+# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
+def get_auto_transcript(video_id):
+  trans_api_errors = youtube_transcript_api._errors
+  try:
+    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
+  except trans_api_errors.TranscriptsDisabled as e:
+    log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
+    return None
+    
+  formatter = TextFormatter()  # Ensure that you create an instance of TextFormatter
+
+  txt_transcript = formatter.format_transcript(transcript)
+  return txt_transcript
+
+output_stream = StreamOutput()
--- a/app/website/index.html
+++ b/app/website/index.html
@@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html lang="en-us">
+
+<head>
+
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+
+    <title>Screw You Bardo</title>
+
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+    <link rel="icon" type="image/x-icon" href="https://www.foreverpyrite.com/favicon.ico">
+    <script src="{{ url_for('static', filename='script.js')}}"></script>
+
+</head>
+
+<body>
+
+    <div class="content">
+        <pre id="response-area">Response will appear here.</pre>
+        <div class="form_box">
+            <input id="url_box" placeholder="Paste the lecture URL here." autofocus></input>
+            <input id="submit" type="submit" onclick=""></input>
+        </div>
+
+
+    </div>
+</body>
+
+</html>
--- a/app/website/static/font-files/nimbus-sans-d-ot-light.woff
+++ b/app/website/static/font-files/nimbus-sans-d-ot-light.woff
--- a/app/website/static/font-files/nimbus-sans-d-ot-light.woff2
+++ b/app/website/static/font-files/nimbus-sans-d-ot-light.woff2
--- a/app/website/static/script.js
+++ b/app/website/static/script.js
@@ -0,0 +1,80 @@
+document.addEventListener("DOMContentLoaded", (event) => {
+    const response_area = document.getElementById('response-area');
+    const submit_button = document.getElementById('submit')
+    submit_button.addEventListener('click', function() {
+        var url = document.getElementById('url_box').value;
+
+        if (!url) {
+            response_area.innerText = 'Please enter a URL.';
+            return;
+        }
+        else {
+            document.getElementById('url_box').value = "";
+        }
+        
+        // First, process the URL
+        fetch('/process_url', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/x-www-form-urlencoded',
+            },
+            body: new URLSearchParams({ url: url })
+        })
+        .then(response => {
+            if (!response.ok) {
+                throw new Error('Network response was not ok');
+            }
+            // Extract the text from the response body
+            return response.text(); // Use .json() if the response is JSON
+        })
+        .then(text => {
+            submit_button.style.display = "none";    
+            if (text === "Processing started. Check /stream_output for updates.") {
+                streamOutput(response_area);
+            } else {
+                response_area.innerText = text; // Show any other response message
+                submit_button.style.display = "flex";
+            }
+        })
+        .catch(error => {
+            console.error('Error processing URL:', error);
+            response_area.innerText = 'Error processing URL: ' + error.message;
+            submit_button.style.display = "flex";
+        });
+    });
+});
+
+function streamOutput(response_area) {
+    // Fetch the streaming output
+    const streamResponsePromise = fetch('/stream_output');
+    response_area.innerHTML = "" 
+
+    streamResponsePromise
+        .then(response => {
+            const reader = response.body.getReader();
+            const decoder = new TextDecoder("utf-8");
+
+            function readStream() {
+                reader.read().then(({ done, value }) => {
+                    if(done) {
+                        document.getElementById('submit').style.display = "flex";
+                        return
+                    }
+                    // Decode and process the chunk
+                    const chunk = decoder.decode(value, { stream: true });
+                    response_area.innerHTML += chunk;
+                    response_area.scrollTop = response_area.scrollHeight
+
+                    // Continue reading
+                    readStream();
+                });
+            }
+
+            // Start reading the stream
+            readStream();
+        })
+        .catch(error => {
+            console.error('Error fetching stream:', error);
+            response_area.innerText = 'Error fetching stream: ' + error.message;
+        });
+}
--- a/app/website/static/style.css
+++ b/app/website/static/style.css
@@ -0,0 +1,78 @@
+
+
+@font-face {
+    font-family: 'nimbus_sans_d_otlight';
+    src: url('font-files/nimbus-sans-d-ot-light.woff2') format('woff2'),
+         url('font-files/nimbus-sans-d-ot-light.woff') format('woff');
+    font-weight: normal;
+    font-style: normal;
+}
+
+* {
+    font-family: 'nimbus_sans_d_otlight';
+    color: white;
+}
+
+body {
+    display: flex;
+    flex-direction: column;
+    width: 100%;
+    max-width: 100vw;
+    height: 100%;
+    min-height: 100vh;
+    max-height: 100vh;
+    margin: 0;
+    background-color: rgb(31, 31, 31);
+}
+
+body .content {
+    display: flex;
+    flex-direction: column;
+    align-self: center;
+    width: 75%;
+    max-width: 65vw;
+    height: 100%;
+    min-height: 100vh;
+    max-height: 100vh;
+}
+
+#response-area {
+    display: block;
+    height: 90%;
+    min-height: 90vh;
+    text-wrap: wrap;
+    flex-wrap: wrap;
+    align-content: flex-end;
+    overflow-y: auto;
+}
+
+.form_box {
+    display: flex;
+    width: 100%;
+    justify-content: space-between;
+    align-content: space-around;
+}
+
+#url_box {
+    display: flex;
+    height: 5%;
+    min-height: 5vh;
+    width: 90%;
+    min-width: 80vh;
+    background-color: rgb(31, 31, 31);
+}
+
+#submit {
+    display: flex;
+    width: 5%;
+    min-width: 3vw;
+    background-color: rgb(49, 49, 49);
+}
+#submit:hover {
+    cursor: pointer;
+    background-color: rgb(31, 31, 31);
+}
+
+input {
+    border-radius: 15px;
+}