-
Notifications
You must be signed in to change notification settings - Fork 8
/
retrieval.py
46 lines (40 loc) · 1.32 KB
/
retrieval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from imports import *
loader = CSVLoader(file_path=r'Seed_Dataset.csv')
data = loader.load()
embeddings = HuggingFaceEmbeddings()
# persist_directory = 'docs/chroma_db/'
# !rm -rf ./docs/chroma_db
vector_db = FAISS.from_documents(
documents=data,
embedding=embeddings,
# persist_directory=persist_directory
)
# vector_db.persist()
def store_to_df(store):
v_dict = store.docstore._dict
data_rows = []
for k in v_dict.keys():
content = v_dict[k].page_content
data_rows.append({"chunk_id" : k, "content" : content})
vector_df = pd.DataFrame(data_rows)
return vector_df
def show_vstore(store):
vector_df = store_to_df(store)
# display(vector_df)
def delete_tool_examples(store, tool_name, arg_name = None):
vector_df = store_to_df(store)
if (arg_name is not None):
mask = (vector_df['content'].str.contains(tool_name) & vector_df['content'].str.contains(arg_name))
else:
mask = vector_df['content'].str.contains(tool_name)
chunk_ids_to_delete = vector_df.loc[mask, 'chunk_id']
print(chunk_ids_to_delete)
try:
store.delete(chunk_ids_to_delete)
except:
print("Unable to delete")
def add_to_vector_store(store, examples):
for example in examples:
doc = Document(page_content = example)
extension = FAISS.from_documents([doc], embeddings)
store.merge_from(extension)