forked from ubiquity-os-marketplace/command-ask
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
465 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,4 @@ | ||
OPENAI_API_KEY="MY_SECRET" | ||
SUPABASE_URL="" | ||
SUPABASE_KEY="" | ||
VOYAGEAI_API_KEY="" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import { SupabaseClient } from "@supabase/supabase-js"; | ||
import { Context } from "../types"; | ||
import { VoyageAIClient } from "voyageai"; | ||
import { Embeddings } from "./supabase/helpers/embeddings"; | ||
|
||
export function createAdapters(supabaseClient: SupabaseClient, voyage: VoyageAIClient, context: Context) { | ||
return { | ||
supabase: { | ||
embeddings: new Embeddings(voyage, supabaseClient, context), | ||
}, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import { SupabaseClient } from "@supabase/supabase-js"; | ||
import { Super } from "./supabase"; | ||
import { Context } from "../../../types/context"; | ||
import { VoyageAIClient } from "voyageai"; | ||
import { CommentType, EmbeddingClass, IssueSimilaritySearchResult } from "../../../types/embeddings"; | ||
|
||
const VECTOR_SIZE = 1024; | ||
|
||
/** | ||
* Embeddings class for creating, updating, and deleting embeddings. | ||
* | ||
* Schema is as follows: | ||
* - `source_id` - The unique identifier for the embedding. (e.g. comment node_id, telegram chat_id, etc.) | ||
* - `type` - The type of embedding. (e.g. setup_instructions, dao_info, task, comment). Consider this the category. | ||
* - `plaintext` - The plaintext version of the markdown | ||
* - `embedding` - The embedding vector for the markdown | ||
* - `metadata` - Additional metadata for the embedding. (e.g. author_association, author_id, fileChunkIndex, filePath, isPrivate) | ||
* - `created_at` - The timestamp when the embedding was created | ||
* - `modified_at` - The timestamp when the embedding was last modified | ||
*/ | ||
export class Embeddings extends Super { | ||
private _voyageClient: VoyageAIClient; | ||
constructor(voyageClient: VoyageAIClient, supabase: SupabaseClient, context: Context) { | ||
super(supabase, context); | ||
this._voyageClient = voyageClient; | ||
} | ||
|
||
async getEmbedding(sourceId: string): Promise<CommentType> { | ||
const { data, error } = await this.supabase.from("content").select("*").eq("source_id", sourceId).single(); | ||
if (error && error.code !== "PGRST116") { | ||
this.context.logger.error("Error getting comment", { err: error, sourceId }); | ||
} | ||
return data; | ||
} | ||
|
||
async getEmbeddingsByClass(embeddingClass: EmbeddingClass): Promise<CommentType[]> { | ||
const { data, error } = await this.supabase.from("content").select("*").eq("type", embeddingClass); | ||
if (error) { | ||
this.context.logger.error("Error getting comments", { err: error, embeddingClass }); | ||
return []; | ||
} | ||
return data; | ||
} | ||
|
||
// Working with embeddings | ||
async findSimilarContent(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[]> { | ||
const embedding = await this._embedWithVoyage(markdown, "query"); | ||
const { data, error } = await this.supabase.rpc("find_similar_content", { | ||
curr_source_id: currentId, | ||
query_embedding: embedding, | ||
threshold: threshold, | ||
}); | ||
if (error) { | ||
this.context.logger.error("Error finding similar issues", error); | ||
return []; | ||
} | ||
return data; | ||
} | ||
|
||
/** | ||
* Compares a single query against all embeddings in the database and returns the most similar embeddings. | ||
*/ | ||
async findRelevantContext(markdown: string, threshold: number): Promise<IssueSimilaritySearchResult[]> { | ||
const embedding = await this._embedWithVoyage(markdown, "query"); | ||
const { data, error } = await this.supabase.rpc("find_relevant_context", { | ||
query_embedding: embedding, | ||
threshold: threshold, | ||
}); | ||
if (error) { | ||
this.context.logger.error("Error finding similar issues", error); | ||
return []; | ||
} | ||
return data; | ||
} | ||
|
||
async hybridSearchWithMetadata( | ||
queryEmbedding: number[], | ||
contentType: string, | ||
metadataKey?: string, | ||
metadataValue?: string | ||
) { | ||
const { data, error } = await this.supabase | ||
.rpc('hybridsearchwithmeta', { | ||
query_embedding: queryEmbedding, // The embedding vector of the query | ||
// content_type: contentType, // The classified content type (e.g., "setup_instructions") | ||
// metadata_key: metadataKey, // The key to filter the metadata | ||
// metadata_value: metadataValue // The value to filter the metadata | ||
}); | ||
|
||
if (error) { | ||
console.error('Error performing hybrid search:', error); | ||
return null; | ||
} | ||
|
||
return data; // Results from the hybrid search | ||
} | ||
|
||
async _embedWithVoyage(text: string | null, inputType: "document" | "query"): Promise<number[]> { | ||
try { | ||
if (text === null) { | ||
return new Array(VECTOR_SIZE).fill(0); | ||
} else { | ||
const response = await this._voyageClient.embed({ | ||
input: text, | ||
model: "voyage-large-2-instruct", | ||
inputType: inputType | ||
}); | ||
return (response.data && response.data[0]?.embedding) || []; | ||
} | ||
} catch (err) { | ||
throw new Error(this.context.logger.error("Error embedding comment", { err })?.logMessage.raw); | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import { SupabaseClient } from "@supabase/supabase-js"; | ||
import { Context } from "../../../types/context"; | ||
|
||
export class Super { | ||
protected supabase: SupabaseClient; | ||
protected context: Context; | ||
|
||
constructor(supabase: SupabaseClient, context: Context) { | ||
this.supabase = supabase; | ||
this.context = context; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import OpenAI from "openai"; | ||
import { Context } from "../types"; | ||
import { EmbeddingClass, CommentType } from "../types/embeddings"; | ||
import { createAdapters } from "../adapters"; | ||
import { createClient } from "@supabase/supabase-js"; | ||
import { VoyageAIClient } from "voyageai"; | ||
/** | ||
* Prior to using the query embedding to find related content, | ||
* we first must NLP the query to categorize it into one of the | ||
* "EmbeddingClass" types. "setup_instructions" | "dao_info" | "task" | "comment". | ||
* | ||
* This allows us to narrow the scope to only the section of information that we know | ||
* is relevant to the user's query. we can use the entire embedding bank but | ||
* refining the search to a specific class of embeddings will yield better results. | ||
*/ | ||
|
||
|
||
export async function queryNlp(context: Context, query: string) { | ||
const { logger, adapters: { supabase } } = context; | ||
|
||
const classification = await zeroShotNlpClassify(context, query); | ||
const queryEmbedding = await supabase.embeddings._embedWithVoyage(query, "query"); | ||
|
||
logger.info(`Classification of query`, { classification }); | ||
const embeddings = await supabase.embeddings.hybridSearchWithMetadata(queryEmbedding, classification); | ||
console.log(`Found ${embeddings.length} embeddings for query`, { query, classification }); | ||
|
||
console.log("Embeddings", embeddings); | ||
return embeddings; | ||
} | ||
|
||
export async function zeroShotNlpClassify(context: Context, query: string) { | ||
const { | ||
env: { OPENAI_API_KEY }, | ||
config: { openAiBaseUrl }, | ||
} = context; | ||
|
||
const openAi = new OpenAI({ | ||
apiKey: OPENAI_API_KEY, | ||
...(openAiBaseUrl && { baseURL: openAiBaseUrl }), | ||
}); | ||
|
||
|
||
const sysMsg = `You are developer onboarding assistant, built by Ubiquity DAO and your name is UbiquityOS. | ||
You are designed to help developers onboard to the Ubiquity DAO ecosystem, all queries will pertain to the Ubiquity DAO ecosystem. | ||
You will classify a query and from that classification, we are able to fetch a category of embeddings to use as context for the query. | ||
There are four classifications of user query: | ||
- setup_instructions: This relates directly to questions which seek to understand how to set up a project. | ||
e.g: "How do I setup the kernel?" "How do I start a plugin?" | ||
- dao_info: This relates to questions which seek to understand the Ubiquity DAO ecosystem. | ||
e.g: "What is the Ubiquity DAO?" "What is the Ubiquity DAO mission?" | ||
- task: Tasks are issue specifications, they cover features, bugs, and other tasks that need to be completed. | ||
e.g: "What is issue xyz about?" "How do I fix issue xyz?" | ||
- comment: Comments are user comments on issues, they can be used to provide context to a query. | ||
e.g: "What are the comments on issue xyz?" "What do people think about issue xyz?" | ||
Reply with a one-word classification of the query. | ||
` | ||
|
||
const res: OpenAI.Chat.Completions.ChatCompletion = await openAi.chat.completions.create({ | ||
messages: [ | ||
{ | ||
role: "system", | ||
content: sysMsg, | ||
}, | ||
{ | ||
role: "user", | ||
content: query, | ||
}, | ||
], | ||
model: "chatgpt-4o-latest", | ||
}); | ||
|
||
return res.choices[0].message.content as EmbeddingClass; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.