Multimodal Textbox (Chat Input Component) (gradio-app#7420)

* first pass * multimodal textbox * add changeset * remove file * more changes * changes * add changeset * revert demo * doc strings fix * update demo * file icons * more updates * format * add story * remove doc line * type fixes * chat interface * new demo * image upload fix * ui changes * addressing PR comments * format * type check * more pr fixes * format * format * test fixes * test fixes * Streaming fixes + other stuff * optional keys to dict value * final fixes * notebook * format * Update guides/04_chatbots/01_creating-a-chatbot-fast.md Co-authored-by: Abubakar Abid <[email protected]> * Update guides/04_chatbots/01_creating-a-chatbot-fast.md Co-authored-by: Abubakar Abid <[email protected]> * Update guides/04_chatbots/01_creating-a-chatbot-fast.md Co-authored-by: Abubakar Abid <[email protected]> * merge * backend fixes * story fix * ui test fix * format * story * format * demo fix * streaming test fix * stories fix * stories fix --------- Co-authored-by: gradio-pr-bot <[email protected]> Co-authored-by: Abubakar Abid <[email protected]>
cschin · Mar 19, 2024 · 15da39f · 15da39f
1 parent c9aba8d
commit 15da39f
Show file tree

Hide file tree

Showing 29 changed files with 1,291 additions and 76 deletions.
diff --git a/.changeset/early-sheep-drop.md b/.changeset/early-sheep-drop.md
@@ -0,0 +1,8 @@
+---
+"@gradio/app": minor
+"@gradio/multimodaltextbox": minor
+"@gradio/upload": minor
+"gradio": minor
+---
+
+feat: Multimodal Textbox (Chat Input Component)
diff --git a/demo/chatbot_multimodal/avatar.png → demo/chatbot_multimodal/files/avatar.png b/demo/chatbot_multimodal/avatar.png → demo/chatbot_multimodal/files/avatar.png
diff --git a/demo/chatbot_multimodal/files/lion.jpg b/demo/chatbot_multimodal/files/lion.jpg
diff --git a/demo/chatbot_multimodal/run.ipynb b/demo/chatbot_multimodal/run.ipynb
@@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/avatar.png"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", "    print(x.index, x.value, x.liked)\n", "\n", "\n", "def add_text(history, text):\n", "    history = history + [(text, None)]\n", "    return history, gr.Textbox(value=\"\", interactive=False)\n", "\n", "\n", "def add_file(history, file):\n", "    history = history + [((file.name,), None)]\n", "    return history\n", "\n", "\n", "def bot(history):\n", "    response = \"**That's cool!**\"\n", "    history[-1][1] = \"\"\n", "    for character in response:\n", "        history[-1][1] += character\n", "        time.sleep(0.05)\n", "        yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    chatbot = gr.Chatbot(\n", "        [],\n", "        elem_id=\"chatbot\",\n", "        bubble_full_width=False,\n", "        avatar_images=(None, (os.path.join(os.path.abspath(''), \"avatar.png\"))),\n", "    )\n", "\n", "    with gr.Row():\n", "        txt = gr.Textbox(\n", "            scale=4,\n", "            show_label=False,\n", "            placeholder=\"Enter text and press enter, or upload an image\",\n", "            container=False,\n", "        )\n", "        btn = gr.UploadButton(\"\ud83d\udcc1\", file_types=[\"image\", \"video\", \"audio\"])\n", "\n", "    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(\n", "        bot, chatbot, chatbot, api_name=\"bot_response\"\n", "    )\n", "    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)\n", "    file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(\n", "        bot, chatbot, chatbot\n", "    )\n", "\n", "    chatbot.like(print_like_dislike, None, None)\n", "\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('files')\n", "!wget -q -O files/avatar.png https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/avatar.png\n", "!wget -q -O files/lion.jpg https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/lion.jpg"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", "    print(x.index, x.value, x.liked)\n", "\n", "def add_message(history, message):\n", "    for x in message[\"files\"]:\n", "        history.append(((x[\"path\"],), None))  \n", "    if message[\"text\"] is not None:\n", "        history.append((message[\"text\"], None))\n", "    return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=[\"image\"])\n", "\n", "def bot(history):\n", "    response = \"**That's cool!**\"\n", "    history[-1][1] = \"\"\n", "    for character in response:\n", "        history[-1][1] += character\n", "        time.sleep(0.05)\n", "        yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    chatbot = gr.Chatbot(\n", "        [],\n", "        elem_id=\"chatbot\",\n", "        bubble_full_width=False,\n", "        avatar_images=(None, (os.path.join(os.path.abspath(''), \"files/avatar.png\"))),\n", "    )\n", "\n", "    chat_input = gr.MultimodalTextbox(interactive=True, file_types=[\"image\"], placeholder=\"Enter message or upload file...\", show_label=False)\n", "    chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input], queue=False).then(\n", "        bot, chatbot, chatbot, api_name=\"bot_response\"\n", "    )\n", "    chat_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input], queue=False)\n", "    chatbot.like(print_like_dislike, None, None)\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
diff --git a/demo/chatbot_multimodal/run.py b/demo/chatbot_multimodal/run.py
@@ -8,16 +8,12 @@
 def print_like_dislike(x: gr.LikeData):
     print(x.index, x.value, x.liked)
 
-
-def add_text(history, text):
-    history = history + [(text, None)]
-    return history, gr.Textbox(value="", interactive=False)
-
-
-def add_file(history, file):
-    history = history + [((file.name,), None)]
-    return history
-
+def add_message(history, message):
+    for x in message["files"]:
+        history.append(((x["path"],), None))  
+    if message["text"] is not None:
+        history.append((message["text"], None))
+    return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=["image"])
 
 def bot(history):
     response = "**That's cool!**"
@@ -33,29 +29,16 @@ def bot(history):
         [],
         elem_id="chatbot",
         bubble_full_width=False,
-        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
+        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "files/avatar.png"))),
     )
 
-    with gr.Row():
-        txt = gr.Textbox(
-            scale=4,
-            show_label=False,
-            placeholder="Enter text and press enter, or upload an image",
-            container=False,
-        )
-        btn = gr.UploadButton("📁", file_types=["image", "video", "audio"])
-
-    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+    chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
+    chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input], queue=False).then(
         bot, chatbot, chatbot, api_name="bot_response"
     )
-    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
-    file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(
-        bot, chatbot, chatbot
-    )
-
+    chat_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input], queue=False)
     chatbot.like(print_like_dislike, None, None)
 
-
 demo.queue()
 if __name__ == "__main__":
     demo.launch()
diff --git a/demo/chatinterface_multimodal/run.ipynb b/demo/chatinterface_multimodal/run.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "def echo(message, history):\n", "    return message[\"text\"]\n", "\n", "demo = gr.ChatInterface(fn=echo, examples=[{\"text\": \"hello\"}, {\"text\": \"hola\"}, {\"text\": \"merhaba\"}], title=\"Echo Bot\", multimodal=True)\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
diff --git a/demo/chatinterface_multimodal/run.py b/demo/chatinterface_multimodal/run.py
@@ -0,0 +1,7 @@
+import gradio as gr
+
+def echo(message, history):
+    return message["text"]
+
+demo = gr.ChatInterface(fn=echo, examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}], title="Echo Bot", multimodal=True)
+demo.launch()
diff --git a/demo/multimodaltextbox_component/run.ipynb b/demo/multimodaltextbox_component/run.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: multimodaltextbox_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "with gr.Blocks() as demo:\n", "    gr.MultimodalTextbox(interactive=True)\n", "\n", "demo.launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
diff --git a/demo/multimodaltextbox_component/run.py b/demo/multimodaltextbox_component/run.py
@@ -0,0 +1,6 @@
+import gradio as gr
+
+with gr.Blocks() as demo:
+    gr.MultimodalTextbox(interactive=True)
+
+demo.launch()
diff --git a/gradio/__init__.py b/gradio/__init__.py
@@ -44,6 +44,7 @@
     LogoutButton,
     Markdown,
     Model3D,
+    MultimodalTextbox,
     Number,
     ParamViewer,
     Plot,
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/avatar.png"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", " print(x.index, x.value, x.liked)\n", "\n", "\n", "def add_text(history, text):\n", " history = history + [(text, None)]\n", " return history, gr.Textbox(value=\"\", interactive=False)\n", "\n", "\n", "def add_file(history, file):\n", " history = history + [((file.name,), None)]\n", " return history\n", "\n", "\n", "def bot(history):\n", " response = \"That's cool!\"\n", " history[-1][1] = \"\"\n", " for character in response:\n", " history[-1][1] += character\n", " time.sleep(0.05)\n", " yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", " chatbot = gr.Chatbot(\n", " [],\n", " elem_id=\"chatbot\",\n", " bubble_full_width=False,\n", " avatar_images=(None, (os.path.join(os.path.abspath(''), \"avatar.png\"))),\n", " )\n", "\n", " with gr.Row():\n", " txt = gr.Textbox(\n", " scale=4,\n", " show_label=False,\n", " placeholder=\"Enter text and press enter, or upload an image\",\n", " container=False,\n", " )\n", " btn = gr.UploadButton(\"\ud83d\udcc1\", file_types=[\"image\", \"video\", \"audio\"])\n", "\n", " txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(\n", " bot, chatbot, chatbot, api_name=\"bot_response\"\n", " )\n", " txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)\n", " file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(\n", " bot, chatbot, chatbot\n", " )\n", "\n", " chatbot.like(print_like_dislike, None, None)\n", "\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
		{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('files')\n", "!wget -q -O files/avatar.png https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/avatar.png\n", "!wget -q -O files/lion.jpg https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/lion.jpg"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", " print(x.index, x.value, x.liked)\n", "\n", "def add_message(history, message):\n", " for x in message[\"files\"]:\n", " history.append(((x[\"path\"],), None)) \n", " if message[\"text\"] is not None:\n", " history.append((message[\"text\"], None))\n", " return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=[\"image\"])\n", "\n", "def bot(history):\n", " response = \"That's cool!\"\n", " history[-1][1] = \"\"\n", " for character in response:\n", " history[-1][1] += character\n", " time.sleep(0.05)\n", " yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", " chatbot = gr.Chatbot(\n", " [],\n", " elem_id=\"chatbot\",\n", " bubble_full_width=False,\n", " avatar_images=(None, (os.path.join(os.path.abspath(''), \"files/avatar.png\"))),\n", " )\n", "\n", " chat_input = gr.MultimodalTextbox(interactive=True, file_types=[\"image\"], placeholder=\"Enter message or upload file...\", show_label=False)\n", " chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input], queue=False).then(\n", " bot, chatbot, chatbot, api_name=\"bot_response\"\n", " )\n", " chat_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input], queue=False)\n", " chatbot.like(print_like_dislike, None, None)\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "def echo(message, history):\n", " return message[\"text\"]\n", "\n", "demo = gr.ChatInterface(fn=echo, examples=[{\"text\": \"hello\"}, {\"text\": \"hola\"}, {\"text\": \"merhaba\"}], title=\"Echo Bot\", multimodal=True)\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}