diff --git a/mito-ai/mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py b/mito-ai/mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py index 81279539e..bb1ddfa88 100644 --- a/mito-ai/mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py +++ b/mito-ai/mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py @@ -56,8 +56,8 @@ def create_agent_smart_debug_prompt(md: AgentSmartDebugMetadata) -> str: RUN_ALL_CELLS: When you want to execute all cells in the notebook from top to bottom, respond with this format: {{ - type: 'run_all_cells', - message: str + "type": "run_all_cells", + "message": "" }} Note that if the name error persists even after using run_all_cells, it means that the variable is not defined in the notebook and you should not reuse this tool. Additionally, this tool could also be used to refresh the notebook state.""")) @@ -128,11 +128,14 @@ def create_agent_smart_debug_prompt(md: AgentSmartDebugMetadata) -> str: {{ - is_finished: false, - cell_update: {{ - type: 'modification' - id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc' - code: "def parse_date(date_str):\n formats = ['%b %d, %Y', '%d %B, %Y']\n\n for fmt in formats:\n try:\n return pd.to_datetime(date_str, format=fmt)\n except ValueError:\n # Try next format\n continue\n\n # If not format worked, return Not a Time\n return pd.NaT\n\ndf['date'] = df['date'].apply(lambda x: parse_date(x))" + "type": "cell_update", + "message": "Fixing the date parsing to handle multiple formats.", + "cell_update": {{ + "type": "modification", + "id": "c68fdf19-db8c-46dd-926f-d90ad35bb3bc", + "code": "def parse_date(date_str):\\n formats = ['%b %d, %Y', '%d %B, %Y']\\n\\n for fmt in formats:\\n try:\\n return pd.to_datetime(date_str, format=fmt)\\n except ValueError:\\n # Try next format\\n continue\\n\\n # If not format worked, return Not a Time\\n return pd.NaT\\n\\ndf['date'] = df['date'].apply(lambda x: parse_date(x))", + "code_summary": "Parsing mixed date formats", + "cell_type": "code" }} }} diff --git a/mito-ai/mito_ai/completions/prompt_builders/agent_system_message.py b/mito-ai/mito_ai/completions/prompt_builders/agent_system_message.py index d1b878bff..7c758522f 100644 --- a/mito-ai/mito_ai/completions/prompt_builders/agent_system_message.py +++ b/mito-ai/mito_ai/completions/prompt_builders/agent_system_message.py @@ -51,16 +51,16 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Format: {{ - type: 'cell_update', - message: str, - cell_update: {{ - type: 'modification' - id: str, - code: str - code_summary: str - cell_type: 'code' | 'markdown' + "type": "cell_update", + "message": "", + "cell_update": {{ + "type": "modification", + "id": "", + "code": "", + "code_summary": "", + "cell_type": "code" or "markdown" }}, - analysis_assumptions: Optional[List[str]] + "analysis_assumptions": [""] }} Important information: @@ -78,16 +78,16 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Format: {{ - type: 'cell_update', - message: str, - cell_update: {{ - type: 'new' - after_cell_id: str - code: str - code_summary: str - cell_type: 'code' | 'markdown' + "type": "cell_update", + "message": "", + "cell_update": {{ + "type": "new", + "after_cell_id": "", + "code": "", + "code_summary": "", + "cell_type": "code" or "markdown" }}, - analysis_assumptions: Optional[List[str]] + "analysis_assumptions": [""] }} Important information: @@ -136,14 +136,14 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Output: {{ - type: 'cell_update', - message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.", - cell_update: {{ - type: 'modification', - id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc', - code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier", - code_summary: "Converting the transaction_date column", - cell_type: 'code' + "type": "cell_update", + "message": "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.", + "cell_update": {{ + "type": "modification", + "id": "c68fdf19-db8c-46dd-926f-d90ad35bb3bc", + "code": "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier", + "code_summary": "Converting the transaction_date column", + "cell_type": "code" }} }} @@ -184,14 +184,14 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Output: {{ - type: 'cell_update', - message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.", - cell_update: {{ - type: 'new', - after_cell_id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc', - code: "import matplotlib.pyplot as plt\\n\\nplt.bar(sales_df.index, sales_df['total_price'])\\nplt.title('Total Price per Sale')\\nplt.xlabel('Transaction Number')\\nplt.ylabel('Sales Price ($)')\\nplt.show()", - code_summary: "Plotting total_price", - cell_type: 'code' + "type": "cell_update", + "message": "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.", + "cell_update": {{ + "type": "new", + "after_cell_id": "c68fdf19-db8c-46dd-926f-d90ad35bb3bc", + "code": "import matplotlib.pyplot as plt\\n\\nplt.bar(sales_df.index, sales_df['total_price'])\\nplt.title('Total Price per Sale')\\nplt.xlabel('Transaction Number')\\nplt.ylabel('Sales Price ($)')\\nplt.show()", + "code_summary": "Plotting total_price", + "cell_type": "code" }} }} """)) @@ -203,9 +203,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: When you want to get a base64 encoded version of a cell's output, respond with this format: {{ - type: 'get_cell_output', - message: str, - get_cell_output_cell_id: str + "type": "get_cell_output", + "message": "", + "get_cell_output_cell_id": "" }} Important information: @@ -218,8 +218,8 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: When you want to execute all cells in the notebook from top to bottom, respond with this format: {{ - type: 'run_all_cells', - message: str + "type": "run_all_cells", + "message": "" }} Important information: @@ -235,10 +235,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Format: {{ - type: 'scratchpad', - message: str, - scratchpad_code: str, - scratchpad_summary: str + "type": "scratchpad", + "message": "", + "scratchpad_code": "", + "scratchpad_summary": "" }} Important information: @@ -254,10 +254,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: {{ - type: 'scratchpad', - message: "I'll check what files are in the current directory to find the data file.", - scratchpad_code: "import os\\nscratch_files = os.listdir('.')\\nprint('Files:', scratch_files)\\nfor scratch_file in scratch_files:\\n if scratch_file.endswith('.csv'):\\n print(f'CSV file found: {scratch_file}')", - scratchpad_summary: "Checking files" + "type": "scratchpad", + "message": "I'll check what files are in the current directory to find the data file.", + "scratchpad_code": "import os\\nscratch_files = os.listdir('.')\\nprint('Files:', scratch_files)\\nfor scratch_file in scratch_files:\\n if scratch_file.endswith('.csv'):\\n print(f'CSV file found: {scratch_file}')", + "scratchpad_summary": "Checking files" }} @@ -269,10 +269,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: When you have a specific question that you the user to answer so that you can figure out how to proceed in your work, you can respond in this format: {{ - type: 'ask_user_question', - message: str, - question: str, - answers: Optional[List[str]] + "type": "ask_user_question", + "message": "", + "question": "", + "answers": [""] }} Important information: @@ -293,10 +293,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: {{ - type: 'ask_user_question', - message: "I tried importing apple_prices.csv and confirmed that it does not exist in the current working directory.", - question: "The file apple_prices.csv does not exist. How do you want to proceed?", - answers: ["Pull Apple Stock prices using yfinance API", "Create placeholder data", "Skip this step"] + "type": "ask_user_question", + "message": "I tried importing apple_prices.csv and confirmed that it does not exist in the current working directory.", + "question": "The file apple_prices.csv does not exist. How do you want to proceed?", + "answers": ["Pull Apple Stock prices using yfinance API", "Create placeholder data", "Skip this step"] }} @@ -308,9 +308,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: When you want to create a new Streamlit app from the current notebook, respond with this format: {{ - type: 'create_streamlit_app', - streamlit_app_prompt: str, - message: str + "type": "create_streamlit_app", + "streamlit_app_prompt": "", + "message": "" }} Important information: @@ -326,9 +326,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Output: {{ - type: 'create_streamlit_app', - streamlit_app_prompt: "The app should have a beginning date and end date input field at the top. It should then be followed by two tabs for the user to select between: current performance and projected performance.", - message: "I'll convert your notebook into an app." + "type": "create_streamlit_app", + "streamlit_app_prompt": "The app should have a beginning date and end date input field at the top. It should then be followed by two tabs for the user to select between: current performance and projected performance.", + "message": "I'll convert your notebook into an app." }} The user will see a preview of the app and because you fulfilled your task, you can next respond with a FINISHED_TASK tool message. @@ -340,9 +340,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: When you want to edit an existing Streamlit app, respond with this format: {{ - type: 'edit_streamlit_app', - message: str, - streamlit_app_prompt: str + "type": "edit_streamlit_app", + "message": "", + "streamlit_app_prompt": "" }} Important information: @@ -358,13 +358,13 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: When you have completed the user's task, respond with a message in this format: {{ - type: 'finished_task', - message: str, - next_steps: Optional[List[str]] + "type": "finished_task", + "message": "", + "next_steps": [""] }} Important information: -1. The message is a short summary of the ALL the work that you've completed on this task. It should not just refer to the final message. It could be something like "I've completed the sales strategy analysis by exploring key relationships in the data and summarizing creating a report with three recommendations to boost sales."" +1. The message is a short summary of the ALL the work that you've completed on this task. It should not just refer to the final message. It could be something like "I've completed the sales strategy analysis by exploring key relationships in the data and summarizing creating a report with three recommendations to boost sales." 2. The message should include citations for any insights that you shared with the user and cell references for whenever you refer to specific cells that you've updated or created. 3. The next_steps is an optional list of 2 or 3 suggested follow-up tasks or analyses that the user might want to perform next. These should be concise, actionable suggestions that build on the work you've just completed. For example: ["Export the cleaned data to CSV", "Analyze revenue per customer", "Convert notebook into an app"]. 4. The next_steps should be as relevant to the user's actual task as possible. Try your best not to make generic suggestions like "Analyze the data" or "Visualize the results". For example, if the user just asked you to calculate LTV of their customers, you might suggest the following next steps: ["Graph key LTV drivers: churn and average transaction value", "Visualize LTV per age group"]. @@ -375,9 +375,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: {{ - type: 'finished_task', - message: "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].", - next_steps: ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"] + "type": "finished_task", + "message": "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].", + "next_steps": ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"] }} @@ -386,8 +386,8 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Output: {{ - type: 'finished_task', - message: "Hey there! I'm Mito AI. How can I help you today?" + "type": "finished_task", + "message": "Hey there! I'm Mito AI. How can I help you today?" }} """)) @@ -463,9 +463,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str: Output: {{ - type: 'finished_task', - message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]", - next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"] + "type": "finished_task", + "message": "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]", + "next_steps": ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"] }} """)) sections.append(SG.Generic("Cell Reference Rules", CELL_REFERENCE_RULES)) diff --git a/mito-ai/mito_ai/utils/open_ai_utils.py b/mito-ai/mito_ai/utils/open_ai_utils.py index 19679b4c5..5aefae507 100644 --- a/mito-ai/mito_ai/utils/open_ai_utils.py +++ b/mito-ai/mito_ai/utils/open_ai_utils.py @@ -159,12 +159,12 @@ def get_open_ai_completion_function_params( # Pydantic models are supported by the OpenAI API, however, we need to be able to # serialize it for requests that are going to be sent to the mito server. # OpenAI expects a very specific schema as seen below. - # Note: Abacus only supports {"type": "json"} format, not the full JSON schema format. + # Note: Abacus only supports {"type": "json_object"} format, not the full JSON schema format. if response_format_info: - # Check if we're using Abacus - it only supports simple {"type": "json"} format + # Check if we're using Abacus - it only supports simple {"type": "json_object"} format if is_abacus_configured() or model.lower().startswith('abacus/'): completion_function_params["response_format"] = { - "type": "json" + "type": "json_object" } else: # For OpenAI and other providers, use the full JSON schema format