feat: add ReAct agent loop and build-time config (mimi_secrets.h)

Rewrite agent_loop.c with ReAct tool use loop: LLM call → tool
execution → repeat until end_turn (max 10 iterations). Add tool
guidance to system prompt. Add set_search_key CLI command.

Add mimi_secrets.h for build-time credentials with highest priority
over NVS/CLI values. All modules (wifi, telegram, llm, proxy,
web_search) check build-time secrets first, fall back to NVS.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
crispyberry
2026-02-07 00:37:43 +08:00
parent 2fe81b8ee1
commit 0e1da79b74
9 changed files with 255 additions and 57 deletions

1
.gitignore vendored
View File

@@ -27,6 +27,7 @@ mmap_generate_*.h
# Environment
.env
main/mimi_secrets.h
# Cache / Compiled
.cache

View File

@@ -4,30 +4,95 @@
#include "bus/message_bus.h"
#include "llm/llm_proxy.h"
#include "memory/session_mgr.h"
#include "tools/tool_registry.h"
#include <string.h>
#include <stdlib.h>
#include "esp_log.h"
#include "esp_heap_caps.h"
#include "cJSON.h"
static const char *TAG = "agent";
#define TOOL_OUTPUT_SIZE (8 * 1024)
/* Build the assistant content array from llm_response_t for the messages history.
* Returns a cJSON array with text and tool_use blocks. */
static cJSON *build_assistant_content(const llm_response_t *resp)
{
cJSON *content = cJSON_CreateArray();
/* Text block */
if (resp->text && resp->text_len > 0) {
cJSON *text_block = cJSON_CreateObject();
cJSON_AddStringToObject(text_block, "type", "text");
cJSON_AddStringToObject(text_block, "text", resp->text);
cJSON_AddItemToArray(content, text_block);
}
/* Tool use blocks */
for (int i = 0; i < resp->call_count; i++) {
const llm_tool_call_t *call = &resp->calls[i];
cJSON *tool_block = cJSON_CreateObject();
cJSON_AddStringToObject(tool_block, "type", "tool_use");
cJSON_AddStringToObject(tool_block, "id", call->id);
cJSON_AddStringToObject(tool_block, "name", call->name);
cJSON *input = cJSON_Parse(call->input);
if (input) {
cJSON_AddItemToObject(tool_block, "input", input);
} else {
cJSON_AddItemToObject(tool_block, "input", cJSON_CreateObject());
}
cJSON_AddItemToArray(content, tool_block);
}
return content;
}
/* Build the user message with tool_result blocks */
static cJSON *build_tool_results(const llm_response_t *resp, char *tool_output, size_t tool_output_size)
{
cJSON *content = cJSON_CreateArray();
for (int i = 0; i < resp->call_count; i++) {
const llm_tool_call_t *call = &resp->calls[i];
/* Execute tool */
tool_output[0] = '\0';
tool_registry_execute(call->name, call->input, tool_output, tool_output_size);
ESP_LOGI(TAG, "Tool %s result: %d bytes", call->name, (int)strlen(tool_output));
/* Build tool_result block */
cJSON *result_block = cJSON_CreateObject();
cJSON_AddStringToObject(result_block, "type", "tool_result");
cJSON_AddStringToObject(result_block, "tool_use_id", call->id);
cJSON_AddStringToObject(result_block, "content", tool_output);
cJSON_AddItemToArray(content, result_block);
}
return content;
}
static void agent_loop_task(void *arg)
{
ESP_LOGI(TAG, "Agent loop started on core %d", xPortGetCoreID());
/* Allocate large buffers from PSRAM */
char *system_prompt = heap_caps_calloc(1, MIMI_CONTEXT_BUF_SIZE, MALLOC_CAP_SPIRAM);
char *messages_json = heap_caps_calloc(1, MIMI_LLM_STREAM_BUF_SIZE, MALLOC_CAP_SPIRAM);
char *history_json = heap_caps_calloc(1, MIMI_LLM_STREAM_BUF_SIZE, MALLOC_CAP_SPIRAM);
char *response_buf = heap_caps_calloc(1, MIMI_LLM_STREAM_BUF_SIZE, MALLOC_CAP_SPIRAM);
char *tool_output = heap_caps_calloc(1, TOOL_OUTPUT_SIZE, MALLOC_CAP_SPIRAM);
if (!system_prompt || !messages_json || !history_json || !response_buf) {
if (!system_prompt || !history_json || !tool_output) {
ESP_LOGE(TAG, "Failed to allocate PSRAM buffers");
vTaskDelete(NULL);
return;
}
const char *tools_json = tool_registry_get_tools_json();
while (1) {
mimi_msg_t msg;
esp_err_t err = message_bus_pop_inbound(&msg, UINT32_MAX);
@@ -38,36 +103,81 @@ static void agent_loop_task(void *arg)
/* 1. Build system prompt */
context_build_system_prompt(system_prompt, MIMI_CONTEXT_BUF_SIZE);
/* 2. Load session history */
/* 2. Load session history into cJSON array */
session_get_history_json(msg.chat_id, history_json,
MIMI_LLM_STREAM_BUF_SIZE, MIMI_AGENT_MAX_HISTORY);
/* 3. Build messages array (history + current message) */
context_build_messages(history_json, msg.content,
messages_json, MIMI_LLM_STREAM_BUF_SIZE);
cJSON *messages = cJSON_Parse(history_json);
if (!messages) messages = cJSON_CreateArray();
/* 4. Call Claude API */
err = llm_chat(system_prompt, messages_json, response_buf, MIMI_LLM_STREAM_BUF_SIZE);
/* 3. Append current user message */
cJSON *user_msg = cJSON_CreateObject();
cJSON_AddStringToObject(user_msg, "role", "user");
cJSON_AddStringToObject(user_msg, "content", msg.content);
cJSON_AddItemToArray(messages, user_msg);
if (err == ESP_OK && response_buf[0]) {
/* 5. Save to session */
session_append(msg.chat_id, "user", msg.content);
session_append(msg.chat_id, "assistant", response_buf);
/* 4. ReAct loop */
char *final_text = NULL;
int iteration = 0;
/* 6. Push response to outbound */
mimi_msg_t out = {0};
strncpy(out.channel, msg.channel, sizeof(out.channel) - 1);
strncpy(out.chat_id, msg.chat_id, sizeof(out.chat_id) - 1);
out.content = strdup(response_buf);
if (out.content) {
message_bus_push_outbound(&out);
while (iteration < MIMI_AGENT_MAX_TOOL_ITER) {
llm_response_t resp;
err = llm_chat_tools(system_prompt, messages, tools_json, &resp);
if (err != ESP_OK) {
ESP_LOGE(TAG, "LLM call failed: %s", esp_err_to_name(err));
break;
}
} else {
/* Send error response */
if (!resp.tool_use) {
/* Normal completion — save final text and break */
if (resp.text && resp.text_len > 0) {
final_text = strdup(resp.text);
}
llm_response_free(&resp);
break;
}
ESP_LOGI(TAG, "Tool use iteration %d: %d calls", iteration + 1, resp.call_count);
/* Append assistant message with content array */
cJSON *asst_msg = cJSON_CreateObject();
cJSON_AddStringToObject(asst_msg, "role", "assistant");
cJSON_AddItemToObject(asst_msg, "content", build_assistant_content(&resp));
cJSON_AddItemToArray(messages, asst_msg);
/* Execute tools and append results */
cJSON *tool_results = build_tool_results(&resp, tool_output, TOOL_OUTPUT_SIZE);
cJSON *result_msg = cJSON_CreateObject();
cJSON_AddStringToObject(result_msg, "role", "user");
cJSON_AddItemToObject(result_msg, "content", tool_results);
cJSON_AddItemToArray(messages, result_msg);
llm_response_free(&resp);
iteration++;
}
cJSON_Delete(messages);
/* 5. Send response */
if (final_text && final_text[0]) {
/* Save to session (only user text + final assistant text) */
session_append(msg.chat_id, "user", msg.content);
session_append(msg.chat_id, "assistant", final_text);
/* Push response to outbound */
mimi_msg_t out = {0};
strncpy(out.channel, msg.channel, sizeof(out.channel) - 1);
strncpy(out.chat_id, msg.chat_id, sizeof(out.chat_id) - 1);
out.content = strdup(response_buf[0] ? response_buf : "Sorry, I encountered an error.");
out.content = final_text; /* transfer ownership */
message_bus_push_outbound(&out);
} else {
/* Error or empty response */
free(final_text);
mimi_msg_t out = {0};
strncpy(out.channel, msg.channel, sizeof(out.channel) - 1);
strncpy(out.chat_id, msg.chat_id, sizeof(out.chat_id) - 1);
out.content = strdup("Sorry, I encountered an error.");
if (out.content) {
message_bus_push_outbound(&out);
}

View File

@@ -43,7 +43,12 @@ esp_err_t context_build_system_prompt(char *buf, size_t size)
"You are MimiClaw, a personal AI assistant running on an ESP32-S3 device.\n"
"You communicate through Telegram and WebSocket.\n\n"
"## Current Time\n%s\n\n"
"Be helpful, accurate, and concise.\n",
"Be helpful, accurate, and concise.\n\n"
"## Available Tools\n"
"You have access to the following tools:\n"
"- web_search: Search the web for current information. "
"Use this when you need up-to-date facts, news, weather, or anything beyond your training data.\n\n"
"Use tools when needed. Provide your final answer as text after using tools.\n",
time_str);
/* Bootstrap files */

View File

@@ -6,6 +6,7 @@
#include "memory/memory_store.h"
#include "memory/session_mgr.h"
#include "proxy/http_proxy.h"
#include "tools/tool_web_search.h"
#include <string.h>
#include <stdio.h>
@@ -202,6 +203,24 @@ static int cmd_clear_proxy(int argc, char **argv)
return 0;
}
/* --- set_search_key command --- */
static struct {
struct arg_str *key;
struct arg_end *end;
} search_key_args;
static int cmd_set_search_key(int argc, char **argv)
{
int nerrors = arg_parse(argc, argv, (void **)&search_key_args);
if (nerrors != 0) {
arg_print_errors(stderr, search_key_args.end, argv[0]);
return 1;
}
tool_web_search_set_key(search_key_args.key->sval[0]);
printf("Search API key saved.\n");
return 0;
}
/* --- restart command --- */
static int cmd_restart(int argc, char **argv)
{
@@ -324,6 +343,17 @@ esp_err_t serial_cli_init(void)
};
esp_console_cmd_register(&heap_cmd);
/* set_search_key */
search_key_args.key = arg_str1(NULL, NULL, "<key>", "Brave Search API key");
search_key_args.end = arg_end(1);
esp_console_cmd_t search_key_cmd = {
.command = "set_search_key",
.help = "Set Brave Search API key for web_search tool",
.func = &cmd_set_search_key,
.argtable = &search_key_args,
};
esp_console_cmd_register(&search_key_cmd);
/* set_proxy */
proxy_args.host = arg_str1(NULL, NULL, "<host>", "Proxy host/IP");
proxy_args.port = arg_int1(NULL, NULL, "<port>", "Proxy port");

View File

@@ -20,6 +20,7 @@
#include "gateway/ws_server.h"
#include "cli/serial_cli.h"
#include "proxy/http_proxy.h"
#include "tools/tool_registry.h"
static const char *TAG = "mimi";
@@ -104,6 +105,7 @@ void app_main(void)
ESP_ERROR_CHECK(http_proxy_init());
ESP_ERROR_CHECK(telegram_bot_init());
ESP_ERROR_CHECK(llm_proxy_init());
ESP_ERROR_CHECK(tool_registry_init());
ESP_ERROR_CHECK(agent_loop_init());
/* Start Serial CLI first (works without WiFi) */

View File

@@ -0,0 +1,29 @@
/*
* MimiClaw Build-time Secrets
*
* Copy this file to mimi_secrets.h and fill in your values.
* Non-empty values here take HIGHEST priority (override NVS/CLI).
* Leave empty ("") to use NVS values set via CLI.
*
* cp mimi_secrets.h.example mimi_secrets.h
*/
#pragma once
/* WiFi */
#define MIMI_SECRET_WIFI_SSID ""
#define MIMI_SECRET_WIFI_PASS ""
/* Telegram Bot */
#define MIMI_SECRET_TG_TOKEN ""
/* Anthropic API */
#define MIMI_SECRET_API_KEY ""
#define MIMI_SECRET_MODEL ""
/* HTTP Proxy (leave empty or set both) */
#define MIMI_SECRET_PROXY_HOST ""
#define MIMI_SECRET_PROXY_PORT ""
/* Brave Search API */
#define MIMI_SECRET_SEARCH_KEY ""

View File

@@ -15,6 +15,12 @@
static const char *TAG = "proxy";
/* Only show warnings/errors by default; reduce polling noise */
__attribute__((constructor)) static void proxy_log_level(void)
{
esp_log_level_set(TAG, ESP_LOG_WARN);
}
/* ── Config (cached from NVS) ─────────────────────────────────── */
static char s_proxy_host[64] = {0};
@@ -22,6 +28,11 @@ static uint16_t s_proxy_port = 0;
esp_err_t http_proxy_init(void)
{
/* Build-time secrets take highest priority */
if (MIMI_SECRET_PROXY_HOST[0] != '\0' && MIMI_SECRET_PROXY_PORT[0] != '\0') {
strncpy(s_proxy_host, MIMI_SECRET_PROXY_HOST, sizeof(s_proxy_host) - 1);
s_proxy_port = (uint16_t)atoi(MIMI_SECRET_PROXY_PORT);
} else {
nvs_handle_t nvs;
esp_err_t err = nvs_open(MIMI_NVS_PROXY, NVS_READONLY, &nvs);
if (err == ESP_OK) {
@@ -30,6 +41,7 @@ esp_err_t http_proxy_init(void)
nvs_get_u16(nvs, MIMI_NVS_KEY_PROXY_PORT, &s_proxy_port);
nvs_close(nvs);
}
}
if (s_proxy_host[0] && s_proxy_port) {
ESP_LOGI(TAG, "Proxy configured: %s:%d", s_proxy_host, s_proxy_port);

View File

@@ -13,7 +13,7 @@
static const char *TAG = "telegram";
static char s_bot_token[128] = {0};
static char s_bot_token[128] = MIMI_SECRET_TG_TOKEN;
static int64_t s_update_offset = 0;
/* HTTP response accumulator */
@@ -257,7 +257,8 @@ static void telegram_poll_task(void *arg)
esp_err_t telegram_bot_init(void)
{
/* Load token from NVS */
/* Build-time secret takes highest priority */
if (s_bot_token[0] == '\0') {
nvs_handle_t nvs;
esp_err_t err = nvs_open(MIMI_NVS_TG, NVS_READONLY, &nvs);
if (err == ESP_OK) {
@@ -265,6 +266,7 @@ esp_err_t telegram_bot_init(void)
nvs_get_str(nvs, MIMI_NVS_KEY_TG_TOKEN, s_bot_token, &len);
nvs_close(nvs);
}
}
if (s_bot_token[0]) {
ESP_LOGI(TAG, "Telegram bot token loaded (len=%d)", (int)strlen(s_bot_token));

View File

@@ -71,15 +71,21 @@ esp_err_t wifi_manager_init(void)
esp_err_t wifi_manager_start(void)
{
/* Read credentials from NVS */
wifi_config_t wifi_cfg = {0};
/* Build-time secrets take highest priority */
if (MIMI_SECRET_WIFI_SSID[0] != '\0') {
strncpy((char *)wifi_cfg.sta.ssid, MIMI_SECRET_WIFI_SSID, sizeof(wifi_cfg.sta.ssid) - 1);
strncpy((char *)wifi_cfg.sta.password, MIMI_SECRET_WIFI_PASS, sizeof(wifi_cfg.sta.password) - 1);
} else {
/* Fall back to NVS */
nvs_handle_t nvs;
esp_err_t err = nvs_open(MIMI_NVS_WIFI, NVS_READONLY, &nvs);
if (err != ESP_OK) {
ESP_LOGW(TAG, "No WiFi credentials in NVS. Use CLI: wifi_set <SSID> <PASS>");
ESP_LOGW(TAG, "No WiFi credentials. Use CLI: wifi_set <SSID> <PASS>");
return ESP_ERR_NOT_FOUND;
}
wifi_config_t wifi_cfg = {0};
size_t len = sizeof(wifi_cfg.sta.ssid);
err = nvs_get_str(nvs, MIMI_NVS_KEY_SSID, (char *)wifi_cfg.sta.ssid, &len);
if (err != ESP_OK) {
@@ -91,6 +97,7 @@ esp_err_t wifi_manager_start(void)
len = sizeof(wifi_cfg.sta.password);
nvs_get_str(nvs, MIMI_NVS_KEY_PASS, (char *)wifi_cfg.sta.password, &len);
nvs_close(nvs);
}
ESP_LOGI(TAG, "Connecting to SSID: %s", wifi_cfg.sta.ssid);