Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix testing security issue #81

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export ORIGINS="https://beta.ansari.chat,http://beta.ansari.chat,https://ansari.
export PGPASSWORD="" # Password for PostgreSQL database
export VECTARA_API_KEY="" # Authentication token for Vectara API

# Related to our PostgreSQL database
export QURAN_DOT_COM_API_KEY=""

# Directory for storing templates
export template_dir="." # Directory path for templates

Expand All @@ -29,3 +32,9 @@ export WHATSAPP_API_VERSION="<<CURRENT-VERSION-AS-MENTIONED-IN-SOURCE-URL-ABOVE>
export WHATSAPP_BUSINESS_PHONE_NUMBER_ID="<<YOUR-WHATSAPP-BUSINESS-PHONE-NUMBER-ID>>"
export WHATSAPP_ACCESS_TOKEN_FROM_SYS_USER="<<YOUR-SYSTEM-USER-ACCESS-TOKEN>"
export WHATSAPP_VERIFY_TOKEN_FOR_WEBHOOK="<<YOUR-VERIFIFY-TOKEN-TO-VERIFY-WHATSAPP-WEBHOOK>>"

# Related to internal code logic
export PYTEST_API_KEY="<<SECRET KEY OBTAINED FROM ANSARI TEAM FOR LOCAL PYTESTING>>"
# Leave the values below when locally debugging the application
export LOGGING_LEVEL="DEBUG"
export DEBUG_MODE="True"
1 change: 1 addition & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
WHATSAPP_RECIPIENT_WAID: ${{ secrets.WHATSAPP_RECIPIENT_WAID }}
WHATSAPP_API_VERSION: ${{ secrets.WHATSAPP_API_VERSION }}
WHATSAPP_BUSINESS_PHONE_NUMBER_ID: ${{ secrets.WHATSAPP_BUSINESS_PHONE_NUMBER_ID }}
WHATSAPP_TEST_BUSINESS_PHONE_NUMBER_ID: ${{ secrets.WHATSAPP_TEST_BUSINESS_PHONE_NUMBER_ID }}
WHATSAPP_ACCESS_TOKEN_FROM_SYS_USER: ${{ secrets.WHATSAPP_ACCESS_TOKEN_FROM_SYS_USER }}
WHATSAPP_VERIFY_TOKEN_FOR_WEBHOOK: ${{ secrets.WHATSAPP_VERIFY_TOKEN_FOR_WEBHOOK }}
PYTHONPATH: src
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
.conda/
.venv/
.vscode/
src/ansari_backend.egg-info/*
diskcache_dir/
abandoned/
bin/
data/
datasources/
etc/
example_projects/
Expand Down
8 changes: 6 additions & 2 deletions data/mawsuah/strip_tashkeel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import textract
from tqdm.auto import tqdm

from ansari.ansari_logger import get_logger

logger = get_logger(__name__)


def strip_tashkeel_from_doc(input_file, output_file):
text = textract.process(input_file).decode("utf-8") # Extract text from .doc file
Expand All @@ -24,8 +28,8 @@ def strip_tashkeel_from_doc(input_file, output_file):
# iterate over all files in the directory
for input_file in tqdm(input_dir.glob("*.doc")):
if input_file.is_file() and input_file.suffix == ".doc":
print(f"Processing {input_file.name}...")
logger.info(f"Processing {input_file.name}...")
strip_tashkeel_from_doc(
input_file, output_dir.joinpath(input_file.with_suffix(".txt").name)
)
print(f"Done processing {input_file.name}")
logger.info(f"Done processing {input_file.name}")
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
}
},
"pricing": {
"billable": "True (actually sent as a boolean value, so no quotes)",
"billable": "True/False (actually sent as a boolean value, so no quotes)",
"pricing_model": "CBP",
"category": "service"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{
"object": "whatsapp_business_account",
"entry": [
{
"id": "<<unique numeric ID for this specific response>>",
"changes": [
{
"value": {
"messaging_product": "whatsapp",
"metadata": {
"display_phone_number": "<<the WHATSAPP_BUSINESS_PHONE_NUMBER without non-numeric characters (e.g., +1 (555) 555-5555 -> 15555555555)>>",
"phone_number_id": "<<the WHATSAPP_BUSINESS_PHONE_NUMBER_ID (this ID is a numeric value)>>"
},
"contacts": [
{
"profile": {
"name": "<<the whatsapp sender's display name>>"
},
"wa_id": "<<the whatsapp sender's phone number without non-numeric characters>>"
}
],
"messages": [
{
"from": "<<the whatsapp sender's phone number without non-numeric characters>>",
"id": "wamid.<<UNIQUE_NUMERIC_ID_FOR_THIS_SPECIFIC_MSG>>",
"timestamp": "<<Unix timestamp of the message (e.g., 1730548169)>>",
"type": "<<the type of the sent message ('text'/'audio'/'video'/'image'/'sticker'/'document'/'location'/'contacts'/'unsupported')>>",
// The following field is only present if "type" has value "text"
"text": {
"body": "<<the whatsapp sender's sent message>>"
},
// The following field is only present if "type" has value "audio"
"audio": {
"mime_type": "audio/ogg",
"codecs": "opus",
"sha256": "<<SHA256 hash of the audio file>>",
"id": "<<unique numeric ID for this specific audio file>>",
"voice": "True/False (actually sent as a boolean value, so no quotes)"
},
// The following field is only present if "type" has value "image"
"image": {
"mime_type": "image/jpeg",
"sha256": "<<SHA256 hash of the image file>>",
"id": "<<unique numeric ID for this specific image file>>"
},
// The following field is only present if "type" has value "sticker"
"sticker": {
"mime_type": "image/webp",
"sha256": "<<SHA256 hash of the sticker file>>",
"id": "<<unique numeric ID for this specific sticker file>>",
"animated": "True/False (actually sent as a boolean value, so no quotes)"
},
// The following field is only present if "type" has value "video" (when sending a video/gif)
"video": {
"mime_type": "video/mp4",
"sha256": "<<SHA256 hash of the video file>>",
"id": "<<unique numeric ID for this specific video file>>"
},
// the following field is only present if "type" has value "document"
"document": {
"filename": "<<the name of the document file>>",
"mime_type": "application/pdf , application/vnd.openxmlformats-officedocument.wordprocessingml.document , etc.",
"sha256": "<<SHA256 hash of the document file>>",
"id": "<<unique numeric ID for this specific document file>>"
},
// the following field is only present if "type" has value "location"
"location": {
"address": "<<address of the location>>",
"latitude": "<<latitude of the location>>",
"longitude": "<<longitude of the location>>",
"name": "<<name of the location>>",
"url": "<<URL of the location (https://foursquare.com/v/LOCATION_ID)>>"
},
// the following field is only present if "type" has value "contacts"
"contacts":[
{
"name": {
"first_name": "<<first name of the contact>>",
"middle_name": "<<middle name of the contact>>",
"last_name": "<<last name of the contact>>",
"formatted_name": "<<formatted name of the contact (entire name)>>"
},
"phones": [
{
"phone": "<<phone number of the contact (with spaces and special characters)>>",
"wa_id": "<<phone number of the contact (without spaces and special characters)>>",
"type": "<<type of the phone number (e.g., 'MOBILE', 'WORK', 'HOME', etc.)>>"
}
],
}
],
// The following field is only present if "type" has value "unsupported"
// as of 2024-11-12, video notes, gifs sent from giphy (whatsapp keyboard), and polls are not supported
"errors": [
{
"code": 131051,
"title": "Message type unknown",
"message": "Message type unknown",
"error_data": {
"details": "Message type is currently not supported."
}
}
]
}
]
},
"field": "messages"
}
]
}
]
}
12 changes: 8 additions & 4 deletions setup_database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import os

import psycopg2

from config import get_settings
from ansari.ansari_logger import get_logger
from ansari.config import get_settings

logger = get_logger(__name__)


def import_sql_files(directory, db_url):
Expand All @@ -20,7 +24,7 @@ def import_sql_files(directory, db_url):
for filename in sorted_files:
if filename.endswith(".sql"):
file_path = os.path.join(directory, filename)
print("Importing:", file_path)
logger.info(f"Importing: {file_path}")

# Read the SQL file
with open(file_path, "r") as f:
Expand All @@ -29,7 +33,7 @@ def import_sql_files(directory, db_url):
# Execute the SQL query
cursor.execute(sql_query)
except psycopg2.Error as error:
print("Error executing", filename, ":", error)
logger.error(f"Error executing {filename}: {error}")
conn.rollback() # Rollback the transaction in case of error

# Commit changes to the database
Expand All @@ -39,7 +43,7 @@ def import_sql_files(directory, db_url):
cursor.close()

except (Exception, psycopg2.DatabaseError) as error:
print("Error:", error)
logger.error(f"Error: {error}")
finally:
if conn is not None:
conn.close()
Expand Down
14 changes: 3 additions & 11 deletions src/ansari/agents/ansari.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import hashlib
import json
import logging
import os
import time
import traceback
Expand All @@ -10,20 +9,13 @@
import litellm
from langfuse.decorators import langfuse_context, observe

from ansari.config import get_settings
from ansari.ansari_logger import get_logger
from ansari.tools.search_hadith import SearchHadith
from ansari.tools.search_vectara import SearchVectara
from ansari.tools.search_quran import SearchQuran
from ansari.tools.search_vectara import SearchVectara
from ansari.util.prompt_mgr import PromptMgr

logger = logging.getLogger(__name__ + ".Ansari")
logging_level = get_settings().LOGGING_LEVEL.upper()
logger.setLevel(logging_level)

# # Uncomment below when logging above doesn't output to std, and you want to see the logs in the console
# console_handler = logging.StreamHandler()
# console_handler.setLevel(logging_mode)
# logger.addHandler(console_handler)
logger = get_logger(__name__ + ".Ansari")


class Ansari:
Expand Down
18 changes: 6 additions & 12 deletions src/ansari/agents/ansari_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,18 @@

import litellm

from ansari.ansari_logger import get_logger
from ansari.tools.search_hadith import SearchHadith
from ansari.tools.search_vectara import SearchVectara
from ansari.tools.search_quran import SearchQuran
from ansari.tools.search_vectara import SearchVectara
from ansari.util.prompt_mgr import PromptMgr

logger = logging.getLogger(__name__ + ".AnsariWorkflow")

if not sys.argv[0].endswith("main_api.py"):
logging_mode = logging.DEBUG
logging_level = logging.DEBUG
else:
logging_mode = logging.INFO

logger.setLevel(logging_mode)
logging_level = logging.INFO

# # Uncomment below when logging above doesn't output to std, and you want to see the logs in the console
# console_handler = logging.StreamHandler()
# console_handler.setLevel(logging_mode)
# logger.addHandler(console_handler)
logger = get_logger(__name__ + ".AnsariWorkflow", logging_level)


class AnsariWorkflow:
Expand Down Expand Up @@ -118,7 +112,7 @@ def _execute_search_step(self, step_params, prev_outputs):
elif "query_from_prev_output_index" in step_params:
results = tool.run_as_string(
prev_outputs[step_params["query_from_prev_output_index"]],
metadata_filter=step_params.get("metadata_filter")
metadata_filter=step_params.get("metadata_filter"),
)
else:
raise ValueError(
Expand Down
16 changes: 10 additions & 6 deletions src/ansari/ansari_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from typing import Union

import bcrypt
import jwt
import psycopg2
import psycopg2.pool
from fastapi import HTTPException, Request
from jwt import ExpiredSignatureError, InvalidTokenError

from ansari.ansari_logger import get_logger
from ansari.config import Settings, get_settings

logger = logging.getLogger(__name__)
logging_level = get_settings().LOGGING_LEVEL.upper()
logger.setLevel(logging_level)
logger = get_logger(__name__)


class MessageLogger:
Expand Down Expand Up @@ -576,19 +576,23 @@ def convert_message_llm(self, msg):
else:
return {"role": msg[0], "content": msg[1]}

def store_quran_answer(self, surah: int, ayah: int, question: str, ansari_answer: str):
def store_quran_answer(
self, surah: int, ayah: int, question: str, ansari_answer: str
):
with self.get_connection() as conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO quran_answers (surah, ayah, question, ansari_answer, review_result, final_answer)
VALUES (%s, %s, %s, %s, 'pending', NULL)
""",
(surah, ayah, question, ansari_answer)
(surah, ayah, question, ansari_answer),
)
conn.commit()

def get_quran_answer(self, surah: int, ayah: int, question: str) -> Union[str, None]:
def get_quran_answer(
self, surah: int, ayah: int, question: str
) -> Union[str, None]:
"""
Retrieve the stored answer for a given surah, ayah, and question.

Expand Down
31 changes: 31 additions & 0 deletions src/ansari/ansari_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import logging

from ansari.config import get_settings


def get_logger(
caller_file_name: str, logging_level=None, debug_mode=None
) -> logging.Logger:
"""
Creates and returns a logger instance for the specified caller file.

Args:
caller_file_name (str): The name of the file requesting the logger.
logging_level (Optional[str]): The logging level to be set for the logger.
If None, it defaults to the LOGGING_LEVEL from settings.
debug_mode (Optional[bool]): If True, adds a console handler to the logger.
If None, it defaults to the DEBUG_MODE from settings.
Returns:
logging.Logger: Configured logger instance.
"""
logger = logging.getLogger(caller_file_name)
if logging_level is None:
logging_level = get_settings().LOGGING_LEVEL.upper()
logger.setLevel(logging_level)

if debug_mode is not False and get_settings().DEBUG_MODE:
console_handler = logging.StreamHandler()
console_handler.setLevel(logging_level)
logger.addHandler(console_handler)

return logger
Loading
Loading