information-retrieval

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 0e28cdce317175bca9edd6fb04b2513843c319b8
parent 29672f8c1a72cfaf285b4cb9dc1a0529e19b4c18
Author: Andrew Laack <andrew.laack@imbue.com>
Date:   Sun, 11 Jan 2026 05:02:43 -0600

Continued updates

Diffstat:
Mllm-search/src/main.cpp | 76++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mllm-search/src/prompts.cpp | 19++++++++++---------
2 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/llm-search/src/main.cpp b/llm-search/src/main.cpp @@ -1,4 +1,5 @@ #include <cpr/cpr.h> +#include <ctime> #include "../include/openai.hpp" #include <vector> #include <iostream> @@ -32,7 +33,12 @@ std::string getMessageFromChat(nlohmann::json json){ std::string getResearcherPrompt(){ std::string returnStr(researchPrompt); - std::string dateReplacement = "1-10-2026"; + + std::time_t t = std::time(nullptr); + std::tm* now = std::localtime(&t); + std::string dateReplacement = std::to_string(now->tm_mon + 1) + "-" + + std::to_string(now->tm_mday) + "-" + + std::to_string(now->tm_year + 1900); std::string toReplace = "$DATE"; std::size_t dateLocation = returnStr.find(toReplace); @@ -68,8 +74,10 @@ std::vector<std::pair<std::string, std::string>> parseToolCalls(std::string resp std::string toolcall; std::string remaining; + + // TODO: Do prefix matching if (space != std::string::npos){ - toolcall = current.substr(1, space); + toolcall = current.substr(1, space - 1); std::string unclean = current.substr(space + 1, current.size() - space - 2); for (size_t i = 0; i < unclean.size(); ++i) { if (unclean[i] == '\\' && i + 1 < unclean.size() && unclean[i + 1] == '"') { @@ -80,7 +88,7 @@ std::vector<std::pair<std::string, std::string>> parseToolCalls(std::string resp } else { - toolcall = "UNKNOWN"; + toolcall = "done"; } auto pair = std::pair(toolcall, remaining); @@ -127,34 +135,54 @@ int main() { std::string envVariable = "ANTHROPIC_API_KEY"; std::string model = "claude-opus-4-5-20251101"; std::string baseURL = "https://api.anthropic.com/v1/"; + std::string priorRuns = ""; + int linkNum = 1; connection.setToken(getenv(envVariable.c_str())); connection.setBaseUrl(baseURL); - std::vector<std::pair<std::string, std::string>> toolCallsAndParams; - std::string prefix; - - std::string prompt = getResearcherPrompt(); std::string chatMessage; std::getline(std::cin, chatMessage); - prefix = researchPrompt + chatMessage; - - nlohmann::json response = sendUserMessage(&connection, model, prefix); - std::cout << getMessageFromChat(response) << std::endl; - toolCallsAndParams = parseToolCalls(getMessageFromChat(response)); - for(int i = 0 ; i < toolCallsAndParams.size(); ++i){ - std::pair<std::string, std::string> current = toolCallsAndParams[i]; - if(current.first.compare("web_search")){ - std::cout << current.second << std::endl; - std::vector<std::string> params = nlohmann::json::parse(current.second).get<std::vector<std::string>>(); - for(int i = 0 ; i < params.size(); ++i){ - std::cout << "Searching the web for " << params[i] << std::endl; - std::cout << params[i] << std::endl; - std::vector<SearchResult> searchRes = searchSearxng("https://searx.laack.co", params[i]); - for (int i = 0 ; i < searchRes.size(); ++i){ - std::cout << searchRes[i].content << std::endl; - } + while(true){ + std::vector<std::pair<std::string, std::string>> toolCallsAndParams; + std::string message; + + std::string prompt = getResearcherPrompt(); + message = prompt + chatMessage; + + nlohmann::json response = sendUserMessage(&connection, model, priorRuns + message); + toolCallsAndParams = parseToolCalls(getMessageFromChat(response)); + + + for(int i = 0 ; i < toolCallsAndParams.size(); ++i){ + std::pair<std::string, std::string> current = toolCallsAndParams[i]; + if(current.first.compare("web_search") == 0){ + //std::cout << current.second << std::endl; + std::vector<std::string> params = nlohmann::json::parse(current.second).get<std::vector<std::string>>(); + for(int i = 0 ; i < params.size(); ++i){ + std::cout << "Searching the web for " << params[i] << std::endl; + //std::cout << params[i] << std::endl; + // TODO: add citations + std::vector<SearchResult> searchRes = searchSearxng("https://searx.laack.co", params[i]); + + priorRuns += "Searching for " + params[i]; + for (int i = 0 ; i < searchRes.size(); ++i){ + priorRuns += "Results from " + searchRes[i].url + " [" + std::to_string(linkNum) + "]:\n" + searchRes[i].content + "\n"; + linkNum += 1; + } + } + } + else{ + if(current.first == "done"){ + // TODO: Add a final summarization prompt + std::string summary = "Consider the above context, and use it to answer the following question:"; + std::string final = "\n Ensure your final answer makes extensive references to resources with [1] syntax."; + + nlohmann::json response = sendUserMessage(&connection, model, priorRuns + summary + chatMessage + final); + std::cout << getMessageFromChat(response) << std::endl; + return 0; + } } } } diff --git a/llm-search/src/prompts.cpp b/llm-search/src/prompts.cpp @@ -2,6 +2,8 @@ // TODO: Tell which iteration on // TODO: Add available tools +// TODO: Done should be its own thing, requesting the summary model to do a final summary. + std::string researchPrompt = R"( return ` You are an action orchestrator. Fulfill user requests by calling tools—no free-form replies. @@ -11,7 +13,7 @@ Today's date is $DATE ## Available Tools - `web_search(queries)` - Search the web. queries: array of 1-3 strings. -- `done(summary)` - Call when finished. summary: brief answer to user's question. +- `done()` - Call when finished for current context to be passed to another model for final question answering ## Output Format @@ -34,7 +36,7 @@ User: "Who won the mass lottery yesterday" web_search ["Massachusetts lottery results yesterday"] [After tool results return the needed info] -done("GPT-5 includes...") +done ## Wrong (NEVER do this) @@ -45,10 +47,9 @@ done("GPT-5 includes...") ## Rules 1. NEVER output normal text. ONLY call tools in the exact format above. -2. Your knowledge is outdated. Always use web_search, even for basic facts. -3. Don't assume things exist or don't exist—just search. -4. If 2-3 searches don't find something, call done and report that. -5. Call done when you have enough information to answer. -6. All information coming from web requests should be cited with the correct tag - - Example: "C++ 23 does not supply a KNN class in the STL. [1]" -)"; +2. Always use web_search to verify information. Assume all training data is out of date. +3. Don't assume things exist or don't exist +4. Call done when their is sufficent information in context to answer the user's question. + + +USER QUESTION:)";