Pulling from 'dev' (streaming is here)

2024-10-15 20:53:10 -04:00
parent 31736b386f 4bc7722162
commit 78af3ed15b
6 changed files with 170 additions and 76 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
-thread-killer.py
+log.md
 log.txt
 .env
 # Byte-compiled / optimized / DLL files
--- a/app.py
+++ b/app.py
@@ -1,7 +1,10 @@
-from flask import Flask, render_template, request
+from flask import Flask, render_template, Response, request
-from main import get_auto_transcript, get_video_id, create_and_stream
+from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter 
 from asyncio import sleep
 from datetime import datetime
-import pytz
+import threading, pytz
 app = Flask(__name__, static_folder="website/static", template_folder="website")
@@ -11,25 +14,49 @@ def home():
@app.route('/process_url', methods=['POST'])
 def process_url():
-    # Opens a file to log the video id and the assistants respone to see if I can further improve instructions:
+    global thread
-    log = open("log.txt", "at", 1)
+    log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    url = request.form['url']
-    
+    log(f"URL: {url}\n")
    # Extract the video ID from the URL
    video_id = get_video_id(url)  # Modify this function to accept the URL
    if not video_id:
        log(f"Could not parse video id from URL: {url}")
        return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
    log(f"Video ID: {video_id}\n\n")
    # Get the transcript for that video ID
    transcript = get_auto_transcript(video_id)
    if (not transcript):
        log("## Error: could not retrieve transcript, Assistant won't be called.")
        return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
-    # Process the transcript and stream the result.
+    thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string.
-    response = create_and_stream(transcript)
+    log("Stream preperation complete, sending reply...\n\n")
-    log.write(f"\n\n\n### New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n URL: {url}\n Video ID: {video_id}\n\nAssistant Response: \n{response}")
+    return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200)  # Add more detailed output if needed
    # Return a response
    return response  # Add more detailed output if needed
-if __name__ == '__main__':  # Change this line to properly check for main
+@app.route('/stream_output')
 def stream_output():
    def yoink():
        log("<details>\n<summary>Starting stream thread...</summary>\n\n")
        thread.start()
        # Start streaming output from output_stream
        log("Starting to stream output.")
        while not output_stream.done:
            if output_stream.buffer != []:
                delta = output_stream.buffer.pop(0)
                yield bytes(delta, encoding="utf-8")
            else:
                awaiter(sleep(0.05))
        log(f"\nStream successfully completely.\n\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
        output_stream.reset()
        thread.join()
        log("\n### Task completed sucessfully without errors!")
        return 
    return Response(yoink(), content_type='text/plain', status=200)
 if __name__ == '__main__': 
    app.run(debug=True)
--- a/log.md
+++ b/log.md
--- a/main.py
+++ b/main.py
@@ -1,4 +1,3 @@
 # To parse video ids
 import re
@@ -10,8 +9,43 @@ from youtube_transcript_api.formatters import TextFormatter
 # OpenAI API stuff import
 from openai import AssistantEventHandler
 from openai import OpenAI
-# For streaming
+
 ### For streaming
 from typing_extensions import override
 import asyncio
 awaiter = asyncio.run
 # The StreamOutput class to handle streaming
 class StreamOutput:
  def __init__(self):
    self.delta: str = ""
    self.response: str = ""
    self.done: bool = False
    self.buffer: list = []
  def reset(self):
    self.delta = ""
    self.response = ""
    self.done = False
    self.buffer: list = []
  async def send_delta(self, delta):
      self.delta = delta
      self.response += delta
      def get_index(list):
        if len(list) == 0:  
          return 0
        else:
          return len(list)-1
      if self.buffer != []:
        try:
          if self.delta != self.buffer[get_index(self.buffer)]:
            self.buffer.append(delta)
        except IndexError as index_error:
          log(f"\nCaught IndexError: {str(index_error)}")
          self.buffer.append(delta)
      else: self.buffer.append(delta)
 # To get the env var
 from dotenv import load_dotenv
@@ -19,32 +53,16 @@ import os
 load_dotenv()
 # For logging
 import pytz
 from datetime import datetime
 def log(str):
  with open("log.md", "at") as file:
    file.write(str)
 ### OpenAI Config 
 # This is copy and pasted straight up from the quickstart guide:
 class EventHandler(AssistantEventHandler):    
  @override
  def on_text_created(self, text) -> None:
    print(f"\nassistant > ", end="", flush=True)
  @override
  def on_text_delta(self, delta, snapshot):
    print(delta.value, end="", flush=True)
  def on_tool_call_created(self, tool_call):
    print(f"\nassistant > {tool_call.type}\n", flush=True)
  def on_tool_call_delta(self, delta, snapshot):
    if delta.type == 'code_interpreter':
      if delta.code_interpreter.input:
        print(delta.code_interpreter.input, end="", flush=True)
      if delta.code_interpreter.outputs:
        print(f"\n\noutput >", flush=True)
        for output in delta.code_interpreter.outputs:
          if output.type == "logs":
            print(f"\n{output.logs}", flush=True)
 # Setting up OpenAI Client with API Key
 api_key = os.getenv("OPENAI_API_KEY")
 client = OpenAI(
@@ -56,22 +74,29 @@ client = OpenAI(
 # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
 asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
-# uhh no we need a new thread each time tf
+# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
-# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early
+class EventHandler(AssistantEventHandler):    
  @override
  def on_text_created(self, text) -> None:
    awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
  @override
  def on_text_delta(self, delta, snapshot):
      awaiter(output_stream.send_delta(delta.value))
  def on_tool_call_created(self, tool_call):
      raise Exception("Assistant shouldn't be calling tools.")
 def create_and_stream(transcript):
  with client.beta.threads.create_and_run_stream(
-    assistant_id=asst_screw_bardo_id,
+      assistant_id=asst_screw_bardo_id,
-    thread={
+      thread={
-      "messages" : [
+          "messages": [{"role": "user", "content": transcript}]
-        {"role": "user",
+      },
-         "content": transcript}
+      event_handler=EventHandler()
      ]
    },
    event_handler=EventHandler()
  ) as stream:
-    stream.until_done()
+      stream.until_done()
-  messages = stream.get_final_messages()
+      output_stream.done = True
  return messages[0].content[0].text.value
 def get_video_id(url):
  youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
@@ -82,7 +107,7 @@ def get_video_id(url):
      id = re.search(youtube_com, url)
  if not id:
-      print("Couldn't parse video ID from URL")
+      # Couldn't parse video ID from URL
      return None
  return id.group(1)
@@ -92,7 +117,8 @@ def get_auto_transcript(video_id):
  trans_api_errors = youtube_transcript_api._errors
  try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
-  except trans_api_errors.TranscriptsDisabled:
+  except trans_api_errors.TranscriptsDisabled as e:
    log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
    return None
  formatter = TextFormatter()  # Ensure that you create an instance of TextFormatter
@@ -100,12 +126,4 @@ def get_auto_transcript(video_id):
  txt_transcript = formatter.format_transcript(transcript)
  return txt_transcript
-
+output_stream = StreamOutput()
 # Stores the video id imputted by the user
 """
 video_id = get_video_id()
 transcript = get_auto_transcript(video_id)
 create_and_stream(transcript)
 """
--- a/website/static/script.js
+++ b/website/static/script.js
@@ -1,17 +1,18 @@
 document.addEventListener("DOMContentLoaded", (event) => {
-    document.getElementById('submit').addEventListener('click', function() {
+    const response_area = document.getElementById('response-area');
    const submit_button = document.getElementById('submit')
    submit_button.addEventListener('click', function() {
        var url = document.getElementById('url_box').value;
        const response_area = document.getElementById('response-area');
        if (!url) {
            response_area.innerText = 'Please enter a URL.';
            return;
        }
-        else{
+        else {
-            response_area.innerText = "Sending URL and retriving transcript."
+            document.getElementById('url_box').value = "";
        }
        // First, process the URL
        fetch('/process_url', {
            method: 'POST',
            headers: {
@@ -19,13 +20,61 @@ document.addEventListener("DOMContentLoaded", (event) => {
            },
            body: new URLSearchParams({ url: url })
        })
-        .then(response => response.text())
+        .then(response => {
-        .then(data => {
+            if (!response.ok) {
-            response_area.innerText = data;
+                throw new Error('Network response was not ok');
            }
            // Extract the text from the response body
            return response.text(); // Use .json() if the response is JSON
        })
        .then(text => {
            submit_button.style.display = "none";    
            if (text === "Processing started. Check /stream_output for updates.") {
                streamOutput(response_area);
            } else {
                response_area.innerText = text; // Show any other response message
                submit_button.style.display = "flex";
            }
        })
        .catch(error => {
-            console.error('Error:', error);
+            console.error('Error processing URL:', error);
-            response_area.innerText = 'An error occurred. Please try again.';
+            response_area.innerText = 'Error processing URL: ' + error.message;
            submit_button.style.display = "flex";
        });
    });
 });
 function streamOutput(response_area) {
    // Fetch the streaming output
    const streamResponsePromise = fetch('/stream_output');
    response_area.innerHTML = "" 
    streamResponsePromise
        .then(response => {
            const reader = response.body.getReader();
            const decoder = new TextDecoder("utf-8");
            function readStream() {
                reader.read().then(({ done, value }) => {
                    if(done) {
                        document.getElementById('submit').style.display = "flex";
                        return
                    }
                    // Decode and process the chunk
                    const chunk = decoder.decode(value, { stream: true });
                    response_area.innerHTML += chunk;
                    response_area.scrollTop = response_area.scrollHeight
                    // Continue reading
                    readStream();
                });
            }
            // Start reading the stream
            readStream();
        })
        .catch(error => {
            console.error('Error fetching stream:', error);
            response_area.innerText = 'Error fetching stream: ' + error.message;
        });
 }
--- a/website/static/style.css
+++ b/website/static/style.css
@@ -40,9 +40,10 @@ body .content {
    display: block;
    height: 90%;
    min-height: 90vh;
-    overflow: auto;
+    text-wrap: wrap;
    flex-wrap: wrap;
    align-content: flex-end;
    overflow-y: auto;
 }
 .form_box {