From 91049454595c56772dbb8f31cc1451237607dffc Mon Sep 17 00:00:00 2001 From: Keyrxng <106303466+Keyrxng@users.noreply.github.com> Date: Sun, 3 Nov 2024 23:17:18 +0000 Subject: [PATCH] chore: push old and merge missing ctx fixes --- .cspell.json | 27 +-- package.json | 2 +- .../helpers/append-to-base-chat-history.ts | 4 +- src/adapters/openai/helpers/completions.ts | 87 +++------- .../openai/helpers/create-system-msg.ts | 14 +- src/adapters/openai/helpers/prompts.ts | 2 +- src/adapters/openai/types.ts | 6 +- src/handlers/ask-llm.ts | 159 ++++++++++-------- src/handlers/comment-created-callback.ts | 4 +- src/handlers/llm-query-output.ts | 6 +- src/types/github-types.ts | 2 +- src/types/llm.ts | 8 +- tests/main.test.ts | 4 +- 13 files changed, 138 insertions(+), 187 deletions(-) diff --git a/.cspell.json b/.cspell.json index ed9a1f5..ff444a7 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,14 +1,7 @@ { "$schema": "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json", "version": "0.2", - "ignorePaths": [ - "**/*.json", - "**/*.css", - "node_modules", - "**/*.log", - "./src/adapters/supabase/**/**.ts", - "/supabase/*" - ], + "ignorePaths": ["**/*.json", "**/*.css", "node_modules", "**/*.log", "./src/adapters/supabase/**/**.ts", "/supabase/*"], "useGitignore": true, "language": "en", "words": [ @@ -48,17 +41,7 @@ "mobileprovision", "icns" ], - "dictionaries": [ - "typescript", - "node", - "software-terms" - ], - "import": [ - "@cspell/dict-typescript/cspell-ext.json", - "@cspell/dict-node/cspell-ext.json", - "@cspell/dict-software-terms" - ], - "ignoreRegExpList": [ - "[0-9a-fA-F]{6}" - ] -} \ No newline at end of file + "dictionaries": ["typescript", "node", "software-terms"], + "import": ["@cspell/dict-typescript/cspell-ext.json", "@cspell/dict-node/cspell-ext.json", "@cspell/dict-software-terms"], + "ignoreRegExpList": ["[0-9a-fA-F]{6}"] +} diff --git a/package.json b/package.json index 66487f1..34a965c 100644 --- a/package.json +++ b/package.json @@ -87,4 +87,4 @@ "@commitlint/config-conventional" ] } -} \ No newline at end of file +} diff --git a/src/adapters/openai/helpers/append-to-base-chat-history.ts b/src/adapters/openai/helpers/append-to-base-chat-history.ts index 472bf39..ca59cd6 100644 --- a/src/adapters/openai/helpers/append-to-base-chat-history.ts +++ b/src/adapters/openai/helpers/append-to-base-chat-history.ts @@ -2,7 +2,7 @@ import { createSystemMessage } from "./create-system-msg"; import { ChatHistory, CreationParams, ToolCallResponse } from "../types"; export function appendToConversation(params: CreationParams, toolCallsToAppend: ToolCallResponse[] = []): ChatHistory { - const { systemMessage, prompt, additionalContext, localContext, groundTruths, botName } = params; + const { systemMessage, query, additionalContext, localContext, groundTruths, botName } = params; const baseChat: ChatHistory = [ { role: "system", @@ -18,7 +18,7 @@ export function appendToConversation(params: CreationParams, toolCallsToAppend: content: [ { type: "text", - text: prompt, + text: query, }, ], }, diff --git a/src/adapters/openai/helpers/completions.ts b/src/adapters/openai/helpers/completions.ts index bf991d8..bd7c980 100644 --- a/src/adapters/openai/helpers/completions.ts +++ b/src/adapters/openai/helpers/completions.ts @@ -5,20 +5,9 @@ import { logger } from "../../../helpers/errors"; import { appendToConversation } from "./append-to-base-chat-history"; import { getAnswerAndTokenUsage } from "./get-answer-and-token-usage"; import { CreationParams, ResponseFromLlm, ToolCallResponse } from "../types"; -import { MAX_COMPLETION_TOKENS } from "../constants"; import { CompletionsModelHelper, ModelApplications } from "../../../types/llm"; import { encode } from "gpt-tokenizer"; -export interface CompletionsType { - answer: string; - groundTruths: string[]; - tokenUsage: { - input: number; - output: number; - total: number; - }; -} - export class Completions extends SuperOpenAi { protected context: Context; @@ -63,73 +52,38 @@ export class Completions extends SuperOpenAi { return this.getModelMaxTokenLimit("o1-mini"); } - async createCompletion( - { - query, - model, + params: { + systemMessage: string; + query: string; + model: string; + additionalContext: string[]; + localContext: string[]; + groundTruths: string[]; + botName: string; + maxTokens: number; + }, + chatHistory?: OpenAI.Chat.Completions.ChatCompletionMessageParam[] + ): Promise { + const { query, model, additionalContext, localContext, groundTruths, botName, maxTokens } = params; + logger.info(`Creating completion for model: ${model} with query: ${query}`); + logger.info(`Context for completion:`, { additionalContext, localContext, groundTruths, botName, - maxTokens, - }: { - query: string, - model: string, - additionalContext: string[], - localContext: string[], - groundTruths: string[], - botName: string, - maxTokens: number - } - ): Promise { - const numTokens = await this.findTokenLength(query, additionalContext, localContext, groundTruths); - logger.info(`Number of tokens: ${numTokens}`); - - const sysMsg = [ - "You Must obey the following ground truths: ", - JSON.stringify(groundTruths) + "\n", - "You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.", - `Your name is: ${botName}`, - "\n", - "Main Context (Provide additional precedence in terms of information): ", - localContext.join("\n"), - "Secondary Context: ", - additionalContext.join("\n"), - ].join("\n"); - - logger.info(`System message: ${sysMsg}`); - logger.info(`Query: ${query}`); + }); const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({ // tools: LLM_TOOLS, might not be a good idea to have this available for the general chatbot model: model, - messages: [ - { - role: "system", - content: [ - { - type: "text", - text: sysMsg, - }, - ], - }, - { - role: "user", - content: [ - { - type: "text", - text: query, - }, - ], - }, - ], + messages: chatHistory || appendToConversation(params), temperature: 0.2, // This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models. // max_COMPLETION_tokens: MAX_COMPLETION_TOKENS, /**An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. */ - max_completion_tokens: MAX_COMPLETION_TOKENS, + max_completion_tokens: maxTokens, top_p: 0.5, frequency_penalty: 0, presence_penalty: 0, @@ -144,7 +98,7 @@ export class Completions extends SuperOpenAi { } async handleFunctionCalling(res: OpenAI.Chat.Completions.ChatCompletion, params: CreationParams) { - const { systemMessage, prompt, model, additionalContext, localContext, groundTruths, botName } = params; + const { systemMessage, query, model, additionalContext, localContext, groundTruths, botName, maxTokens } = params; if (res.choices[0].finish_reason === "function_call") { const toolCalls = res.choices[0].message.tool_calls; const choiceMessage = res.choices[0]["message"]; @@ -218,12 +172,13 @@ export class Completions extends SuperOpenAi { return await this.createCompletion( { systemMessage, - prompt, + query, model, additionalContext, localContext, groundTruths, botName, + maxTokens, }, newChat ); diff --git a/src/adapters/openai/helpers/create-system-msg.ts b/src/adapters/openai/helpers/create-system-msg.ts index 94ecfca..532f6a0 100644 --- a/src/adapters/openai/helpers/create-system-msg.ts +++ b/src/adapters/openai/helpers/create-system-msg.ts @@ -1,17 +1,13 @@ export function createSystemMessage(systemMessage: string, additionalContext: string[], localContext: string[], groundTruths: string[], botName: string) { // safer to use array join than string concatenation const parts = [ - "You Must obey the following ground truths: [", - groundTruths.join(":"), - "]\n", + `You Must obey the following ground truths: ${JSON.stringify(groundTruths)}\n`, systemMessage, - "Your name is : ", - botName, - "\n", - "Primary Context: ", - additionalContext.join("\n"), - "\nLocal Context: ", + `Your name is: ${botName}`, + "Main Context (Provide additional precedence in terms of information): ", localContext.join("\n"), + "Secondary Context: ", + additionalContext.join("\n"), ]; return parts.join("\n"); diff --git a/src/adapters/openai/helpers/prompts.ts b/src/adapters/openai/helpers/prompts.ts index 3110adc..c1ceed3 100644 --- a/src/adapters/openai/helpers/prompts.ts +++ b/src/adapters/openai/helpers/prompts.ts @@ -1,4 +1,4 @@ -export const DEFAULT_SYSTEM_MESSAGE = `You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions. +export const CHATBOT_DEFAULT_SYSTEM_MESSAGE = `You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions. # Steps diff --git a/src/adapters/openai/types.ts b/src/adapters/openai/types.ts index 1f0497d..7d6fa6f 100644 --- a/src/adapters/openai/types.ts +++ b/src/adapters/openai/types.ts @@ -6,22 +6,24 @@ export type TokenUsage = { input: number; output: number; total: number; - outputDetails?: OpenAI.Completions.CompletionUsage.CompletionTokensDetails; + reasoning_tokens?: number; }; export type ResponseFromLlm = { answer: string; + groundTruths: string[]; tokenUsage: TokenUsage; }; export type CreationParams = { systemMessage: string; - prompt: string; + query: string; model: string; additionalContext: string[]; localContext: string[]; groundTruths: string[]; botName: string; + maxTokens: number; }; export type ToolCallResponse = { diff --git a/src/handlers/ask-llm.ts b/src/handlers/ask-llm.ts index a9869b3..86c8e9d 100644 --- a/src/handlers/ask-llm.ts +++ b/src/handlers/ask-llm.ts @@ -1,96 +1,113 @@ import { Context } from "../types"; -import { ResponseFromLlm } from "../adapters/openai/helpers/completions"; import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comment"; import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues"; import { recursivelyFetchLinkedIssues } from "../helpers/issue-fetching"; import { formatChatHistory } from "../helpers/format-chat-history"; -import { optimizeContext } from "../helpers/issue"; -import { DEFAULT_SYSTEM_MESSAGE } from "../adapters/openai/helpers/prompts"; import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/chat-bot"; import { findGroundTruths } from "./ground-truths/find-ground-truths"; import { bubbleUpErrorComment, logger } from "../helpers/errors"; +import { ResponseFromLlm } from "../adapters/openai/types"; +import { CHATBOT_DEFAULT_SYSTEM_MESSAGE } from "../adapters/openai/helpers/prompts"; -/** - * Asks a question to GPT and returns the response - * @param context - The context object containing environment and configuration details - * @param question - The question to ask GPT - * @returns The response from GPT - * @throws If no question is provided - */ export async function askQuestion(context: Context<"issue_comment.created">, question: string) { + if (!question) { + throw logger.error("No question provided"); + } + // using any links in comments or issue/pr bodies to fetch more context + const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({ + context, + owner: context.payload.repository.owner.login, + repo: context.payload.repository.name, + issueNum: context.payload.issue.number, + }); + // build a nicely structure system message containing a streamlined chat history + // includes the current issue, any linked issues, and any linked PRs + const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies); + logger.info(`${formattedChat.join("")}`); + return await askLlm(context, question, formattedChat); +} +export async function askLlm(context: Context, question: string, formattedChat: string[]): Promise { + const { + env: { UBIQUITY_OS_APP_NAME }, + config: { model, similarityThreshold, maxTokens }, + adapters: { + supabase: { comment, issue }, + voyage: { reranker }, + openai: { completions }, + }, + } = context; - export async function askQuestion(context: Context, question: string) { - if (!question) { - throw logger.error("No question provided"); - } - // using any links in comments or issue/pr bodies to fetch more context - const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({ - context, - owner: context.payload.repository.owner.login, - repo: context.payload.repository.name, - issueNum: context.payload.issue.number, - }); - // build a nicely structure system message containing a streamlined chat history - // includes the current issue, any linked issues, and any linked PRs - const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies); - logger.info(`${formattedChat.join("")}`); - return await askLlm(context, question, formattedChat); - } + try { + // using db functions to find similar comments and issues + const [similarComments, similarIssues] = await Promise.all([ + comment.findSimilarComments(question, 1 - similarityThreshold, ""), + issue.findSimilarIssues(question, 1 - similarityThreshold, ""), + ]); - export async function askLlm(context: Context, question: string, formattedChat: string[]): Promise { - const { - env: { UBIQUITY_OS_APP_NAME }, - config: { model, similarityThreshold, maxTokens }, - adapters: { - supabase: { comment, issue }, - voyage: { reranker }, - openai: { completions }, - }, - } = context; + // combine the similar comments and issues into a single array + const similarText = [ + ...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []), + ...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []), + ]; - try { - // using db functions to find similar comments and issues - const [similarComments, similarIssues] = await Promise.all([ - comment.findSimilarComments(question, 1 - similarityThreshold, ""), - issue.findSimilarIssues(question, 1 - similarityThreshold, ""), - ]); + // filter out any empty strings + formattedChat = formattedChat.filter((text) => text); - // combine the similar comments and issues into a single array - const similarText = [ - ...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []), - ...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []), - ]; + logger.info(`Found similar texts: pre-rerank`, { + similarComments, + similarIssues, + }); - // filter out any empty strings - formattedChat = formattedChat.filter((text) => text); + // rerank the similar text using voyageai + const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : []; - // rerank the similar text using voyageai - const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : []; - // gather structural data about the payload repository - const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]); + logger.info(`Found similar texts: post-rerank`, { + rerankedText, + }); - let groundTruths: string[] = []; + // gather structural data about the payload repository + const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]); - if (!languages.length) { - groundTruths.push("No languages found in the repository"); - } + let groundTruths: string[] = []; - if (!Reflect.ownKeys(dependencies).length) { - groundTruths.push("No dependencies found in the repository"); - } + if (!languages.length) { + groundTruths.push("No languages found in the repository"); + } - if (!Reflect.ownKeys(devDependencies).length) { - groundTruths.push("No devDependencies found in the repository"); - } + if (!Reflect.ownKeys(dependencies).length) { + groundTruths.push("No dependencies found in the repository"); + } - if (groundTruths.length === 3) { - return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens); - } + if (!Reflect.ownKeys(devDependencies).length) { + groundTruths.push("No devDependencies found in the repository"); + } - groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies }); - return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens); - } catch (error) { - throw bubbleUpErrorComment(context, error, false); + if (groundTruths.length === 3) { + return await completions.createCompletion({ + systemMessage: CHATBOT_DEFAULT_SYSTEM_MESSAGE, + query: question, + model, + additionalContext: rerankedText, + localContext: formattedChat, + groundTruths, + botName: UBIQUITY_OS_APP_NAME, + maxTokens, + }); } + + groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies }); + return await completions.createCompletion({ + systemMessage: CHATBOT_DEFAULT_SYSTEM_MESSAGE, + query: question, + model, + additionalContext: rerankedText, + localContext: formattedChat, + groundTruths, + botName: UBIQUITY_OS_APP_NAME, + maxTokens, + }); + } catch (error) { + throw bubbleUpErrorComment(context, error, false); } +} diff --git a/src/handlers/comment-created-callback.ts b/src/handlers/comment-created-callback.ts index 9a9d641..10784b2 100644 --- a/src/handlers/comment-created-callback.ts +++ b/src/handlers/comment-created-callback.ts @@ -31,11 +31,9 @@ export async function issueCommentCreatedCallback( } logger.info(`Asking question: ${question}`); return await handleLlmQueryOutput(context, await askQuestion(context, question)); - - } -function createStructuredMetadata(header: string | undefined, logReturn: LogReturn) { +export function createStructuredMetadata(header: string | undefined, logReturn: LogReturn) { let logMessage, metadata; if (logReturn) { logMessage = logReturn.logMessage; diff --git a/src/handlers/llm-query-output.ts b/src/handlers/llm-query-output.ts index 2ecafd1..bda763b 100644 --- a/src/handlers/llm-query-output.ts +++ b/src/handlers/llm-query-output.ts @@ -1,8 +1,9 @@ -import { ResponseFromLlm } from "../adapters/openai/helpers/completions"; +import { ResponseFromLlm } from "../adapters/openai/types"; import { bubbleUpErrorComment } from "../helpers/errors"; import { Context } from "../types"; import { CallbackResult } from "../types/proxy"; import { addCommentToIssue } from "./add-comment"; +import { createStructuredMetadata } from "./comment-created-callback"; export async function handleLlmQueryOutput(context: Context, llmResponse: ResponseFromLlm): Promise { const { logger } = context; @@ -12,7 +13,6 @@ export async function handleLlmQueryOutput(context: Context, llmResponse: Respon throw logger.error(`No answer from OpenAI`); } logger.info(`Answer: ${answer}`, { tokenUsage }); - const tokens = `\n\n`; const metadataString = createStructuredMetadata( // don't change this header, it's used for tracking @@ -30,4 +30,4 @@ export async function handleLlmQueryOutput(context: Context, llmResponse: Respon } catch (error) { throw await bubbleUpErrorComment(context, error, false); } -} \ No newline at end of file +} diff --git a/src/types/github-types.ts b/src/types/github-types.ts index af351f3..b5692de 100644 --- a/src/types/github-types.ts +++ b/src/types/github-types.ts @@ -39,4 +39,4 @@ export type FetchedCodes = { org: string; repo: string; issueNumber: number; -}; \ No newline at end of file +}; diff --git a/src/types/llm.ts b/src/types/llm.ts index c92035b..7d5bedf 100644 --- a/src/types/llm.ts +++ b/src/types/llm.ts @@ -15,16 +15,16 @@ type CodeReviewAppParams = { export type AppParamsHelper = TApp extends "code-review" ? CodeReviewAppParams : TApp extends "chat-bot" - ? ChatBotAppParams - : never; + ? ChatBotAppParams + : never; export type CompletionsModelHelper = TApp extends "code-review" ? "gpt-4o" : TApp extends "chat-bot" ? "o1-mini" : never; export type GroundTruthsSystemMessage = TApp extends "code-review" ? (typeof GROUND_TRUTHS_SYSTEM_MESSAGES)["code-review"] : TApp extends "chat-bot" - ? (typeof GROUND_TRUTHS_SYSTEM_MESSAGES)["chat-bot"] - : never; + ? (typeof GROUND_TRUTHS_SYSTEM_MESSAGES)["chat-bot"] + : never; export type GroundTruthsSystemMessageTemplate = { truthRules: string[]; diff --git a/tests/main.test.ts b/tests/main.test.ts index 0c888dd..701b5b0 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -11,7 +11,7 @@ import { runPlugin } from "../src/plugin"; import { TransformDecodeCheckError, Value } from "@sinclair/typebox/value"; import { envSchema } from "../src/types/env"; import { ResponseFromLlm } from "../src/adapters/openai/helpers/completions"; -import { CompletionsType } from "../src/adapters/openai/helpers/completions"; +import { ResponseFromLlm } from "../src/adapters/openai/helpers/completions"; import { logger } from "../src/helpers/errors"; const TEST_QUESTION = "what is pi?"; @@ -412,7 +412,7 @@ function createContext(body = TEST_SLASH_COMMAND) { getModelMaxOutputLimit: () => { return 50000; }, - createCompletion: async (): Promise => { + createCompletion: async (): Promise => { return { answer: MOCK_ANSWER, groundTruths: [MOCK_ANSWER],