Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def create_agent_smart_debug_prompt(md: AgentSmartDebugMetadata) -> str:
RUN_ALL_CELLS:
When you want to execute all cells in the notebook from top to bottom, respond with this format:
{{
type: 'run_all_cells',
message: str
"type": "run_all_cells",
"message": "<string>"
}}
Note that if the name error persists even after using run_all_cells, it means that the variable is not defined in the notebook and you should not reuse this tool. Additionally, this tool could also be used to refresh the notebook state."""))

Expand Down Expand Up @@ -128,11 +128,14 @@ def create_agent_smart_debug_prompt(md: AgentSmartDebugMetadata) -> str:
<Output>

{{
is_finished: false,
cell_update: {{
type: 'modification'
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
code: "def parse_date(date_str):\n formats = ['%b %d, %Y', '%d %B, %Y']\n\n for fmt in formats:\n try:\n return pd.to_datetime(date_str, format=fmt)\n except ValueError:\n # Try next format\n continue\n\n # If not format worked, return Not a Time\n return pd.NaT\n\ndf['date'] = df['date'].apply(lambda x: parse_date(x))"
"type": "cell_update",
"message": "Fixing the date parsing to handle multiple formats.",
"cell_update": {{
"type": "modification",
"id": "c68fdf19-db8c-46dd-926f-d90ad35bb3bc",
"code": "def parse_date(date_str):\\n formats = ['%b %d, %Y', '%d %B, %Y']\\n\\n for fmt in formats:\\n try:\\n return pd.to_datetime(date_str, format=fmt)\\n except ValueError:\\n # Try next format\\n continue\\n\\n # If not format worked, return Not a Time\\n return pd.NaT\\n\\ndf['date'] = df['date'].apply(lambda x: parse_date(x))",
"code_summary": "Parsing mixed date formats",
"cell_type": "code"
}}
}}

Expand Down
152 changes: 76 additions & 76 deletions mito-ai/mito_ai/completions/prompt_builders/agent_system_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Format:
{{
type: 'cell_update',
message: str,
cell_update: {{
type: 'modification'
id: str,
code: str
code_summary: str
cell_type: 'code' | 'markdown'
"type": "cell_update",
"message": "<string>",
"cell_update": {{
"type": "modification",
"id": "<string>",
"code": "<string>",
"code_summary": "<string>",
"cell_type": "code" or "markdown"
}},
analysis_assumptions: Optional[List[str]]
"analysis_assumptions": ["<optional list of strings>"]
}}

Important information:
Expand All @@ -78,16 +78,16 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Format:
{{
type: 'cell_update',
message: str,
cell_update: {{
type: 'new'
after_cell_id: str
code: str
code_summary: str
cell_type: 'code' | 'markdown'
"type": "cell_update",
"message": "<string>",
"cell_update": {{
"type": "new",
"after_cell_id": "<string>",
"code": "<string>",
"code_summary": "<string>",
"cell_type": "code" or "markdown"
}},
analysis_assumptions: Optional[List[str]]
"analysis_assumptions": ["<optional list of strings>"]
}}

Important information:
Expand Down Expand Up @@ -136,14 +136,14 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Output:
{{
type: 'cell_update',
message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
cell_update: {{
type: 'modification',
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
code_summary: "Converting the transaction_date column",
cell_type: 'code'
"type": "cell_update",
"message": "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
"cell_update": {{
"type": "modification",
"id": "c68fdf19-db8c-46dd-926f-d90ad35bb3bc",
"code": "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
"code_summary": "Converting the transaction_date column",
"cell_type": "code"
}}
}}

Expand Down Expand Up @@ -184,14 +184,14 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Output:
{{
type: 'cell_update',
message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
cell_update: {{
type: 'new',
after_cell_id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
code: "import matplotlib.pyplot as plt\\n\\nplt.bar(sales_df.index, sales_df['total_price'])\\nplt.title('Total Price per Sale')\\nplt.xlabel('Transaction Number')\\nplt.ylabel('Sales Price ($)')\\nplt.show()",
code_summary: "Plotting total_price",
cell_type: 'code'
"type": "cell_update",
"message": "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
"cell_update": {{
"type": "new",
"after_cell_id": "c68fdf19-db8c-46dd-926f-d90ad35bb3bc",
"code": "import matplotlib.pyplot as plt\\n\\nplt.bar(sales_df.index, sales_df['total_price'])\\nplt.title('Total Price per Sale')\\nplt.xlabel('Transaction Number')\\nplt.ylabel('Sales Price ($)')\\nplt.show()",
"code_summary": "Plotting total_price",
"cell_type": "code"
}}
}}
</Cell Addition Example>"""))
Expand All @@ -203,9 +203,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
When you want to get a base64 encoded version of a cell's output, respond with this format:

{{
type: 'get_cell_output',
message: str,
get_cell_output_cell_id: str
"type": "get_cell_output",
"message": "<string>",
"get_cell_output_cell_id": "<string>"
}}

Important information:
Expand All @@ -218,8 +218,8 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
When you want to execute all cells in the notebook from top to bottom, respond with this format:

{{
type: 'run_all_cells',
message: str
"type": "run_all_cells",
"message": "<string>"
}}

Important information:
Expand All @@ -235,10 +235,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Format:
{{
type: 'scratchpad',
message: str,
scratchpad_code: str,
scratchpad_summary: str
"type": "scratchpad",
"message": "<string>",
"scratchpad_code": "<string>",
"scratchpad_summary": "<string>"
}}

Important information:
Expand All @@ -254,10 +254,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

<Example>
{{
type: 'scratchpad',
message: "I'll check what files are in the current directory to find the data file.",
scratchpad_code: "import os\\nscratch_files = os.listdir('.')\\nprint('Files:', scratch_files)\\nfor scratch_file in scratch_files:\\n if scratch_file.endswith('.csv'):\\n print(f'CSV file found: {scratch_file}')",
scratchpad_summary: "Checking files"
"type": "scratchpad",
"message": "I'll check what files are in the current directory to find the data file.",
"scratchpad_code": "import os\\nscratch_files = os.listdir('.')\\nprint('Files:', scratch_files)\\nfor scratch_file in scratch_files:\\n if scratch_file.endswith('.csv'):\\n print(f'CSV file found: {scratch_file}')",
"scratchpad_summary": "Checking files"
}}
</Example>

Expand All @@ -269,10 +269,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
When you have a specific question that you the user to answer so that you can figure out how to proceed in your work, you can respond in this format:

{{
type: 'ask_user_question',
message: str,
question: str,
answers: Optional[List[str]]
"type": "ask_user_question",
"message": "<string>",
"question": "<string>",
"answers": ["<optional list of strings>"]
}}

Important information:
Expand All @@ -293,10 +293,10 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

<Example>
{{
type: 'ask_user_question',
message: "I tried importing apple_prices.csv and confirmed that it does not exist in the current working directory.",
question: "The file apple_prices.csv does not exist. How do you want to proceed?",
answers: ["Pull Apple Stock prices using yfinance API", "Create placeholder data", "Skip this step"]
"type": "ask_user_question",
"message": "I tried importing apple_prices.csv and confirmed that it does not exist in the current working directory.",
"question": "The file apple_prices.csv does not exist. How do you want to proceed?",
"answers": ["Pull Apple Stock prices using yfinance API", "Create placeholder data", "Skip this step"]
}}
</Example>

Expand All @@ -308,9 +308,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
When you want to create a new Streamlit app from the current notebook, respond with this format:

{{
type: 'create_streamlit_app',
streamlit_app_prompt: str,
message: str
"type": "create_streamlit_app",
"streamlit_app_prompt": "<string>",
"message": "<string>"
}}

Important information:
Expand All @@ -326,9 +326,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Output:
{{
type: 'create_streamlit_app',
streamlit_app_prompt: "The app should have a beginning date and end date input field at the top. It should then be followed by two tabs for the user to select between: current performance and projected performance.",
message: "I'll convert your notebook into an app."
"type": "create_streamlit_app",
"streamlit_app_prompt": "The app should have a beginning date and end date input field at the top. It should then be followed by two tabs for the user to select between: current performance and projected performance.",
"message": "I'll convert your notebook into an app."
}}

The user will see a preview of the app and because you fulfilled your task, you can next respond with a FINISHED_TASK tool message.
Expand All @@ -340,9 +340,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
When you want to edit an existing Streamlit app, respond with this format:

{{
type: 'edit_streamlit_app',
message: str,
streamlit_app_prompt: str
"type": "edit_streamlit_app",
"message": "<string>",
"streamlit_app_prompt": "<string>"
}}

Important information:
Expand All @@ -358,13 +358,13 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
When you have completed the user's task, respond with a message in this format:

{{
type: 'finished_task',
message: str,
next_steps: Optional[List[str]]
"type": "finished_task",
"message": "<string>",
"next_steps": ["<optional list of strings>"]
}}

Important information:
1. The message is a short summary of the ALL the work that you've completed on this task. It should not just refer to the final message. It could be something like "I've completed the sales strategy analysis by exploring key relationships in the data and summarizing creating a report with three recommendations to boost sales.""
1. The message is a short summary of the ALL the work that you've completed on this task. It should not just refer to the final message. It could be something like "I've completed the sales strategy analysis by exploring key relationships in the data and summarizing creating a report with three recommendations to boost sales."
2. The message should include citations for any insights that you shared with the user and cell references for whenever you refer to specific cells that you've updated or created.
3. The next_steps is an optional list of 2 or 3 suggested follow-up tasks or analyses that the user might want to perform next. These should be concise, actionable suggestions that build on the work you've just completed. For example: ["Export the cleaned data to CSV", "Analyze revenue per customer", "Convert notebook into an app"].
4. The next_steps should be as relevant to the user's actual task as possible. Try your best not to make generic suggestions like "Analyze the data" or "Visualize the results". For example, if the user just asked you to calculate LTV of their customers, you might suggest the following next steps: ["Graph key LTV drivers: churn and average transaction value", "Visualize LTV per age group"].
Expand All @@ -375,9 +375,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

<Finished Task Example 1>
{{
type: 'finished_task',
message: "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].",
next_steps: ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"]
"type": "finished_task",
"message": "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].",
"next_steps": ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"]
}}
</Finished Task Example 1>

Expand All @@ -386,8 +386,8 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Output:
{{
type: 'finished_task',
message: "Hey there! I'm Mito AI. How can I help you today?"
"type": "finished_task",
"message": "Hey there! I'm Mito AI. How can I help you today?"
}}
</Finished Task Example 2>
"""))
Expand Down Expand Up @@ -463,9 +463,9 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:

Output:
{{
type: 'finished_task',
message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
"type": "finished_task",
"message": "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
"next_steps": ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
}}
</Example>"""))
sections.append(SG.Generic("Cell Reference Rules", CELL_REFERENCE_RULES))
Expand Down
6 changes: 3 additions & 3 deletions mito-ai/mito_ai/utils/open_ai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,12 @@ def get_open_ai_completion_function_params(
# Pydantic models are supported by the OpenAI API, however, we need to be able to
# serialize it for requests that are going to be sent to the mito server.
# OpenAI expects a very specific schema as seen below.
# Note: Abacus only supports {"type": "json"} format, not the full JSON schema format.
# Note: Abacus only supports {"type": "json_object"} format, not the full JSON schema format.
if response_format_info:
# Check if we're using Abacus - it only supports simple {"type": "json"} format
# Check if we're using Abacus - it only supports simple {"type": "json_object"} format
if is_abacus_configured() or model.lower().startswith('abacus/'):
completion_function_params["response_format"] = {
"type": "json"
"type": "json_object"
}
else:
# For OpenAI and other providers, use the full JSON schema format
Expand Down
Loading