Skip to content

Commit

Permalink
feat: add crunchbase
Browse files Browse the repository at this point in the history
  • Loading branch information
tomtobac committed Dec 12, 2023
1 parent cbfe47a commit fbe48ca
Show file tree
Hide file tree
Showing 8 changed files with 851 additions and 0 deletions.
3 changes: 3 additions & 0 deletions crunchbase/.env-template
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CRUNCHBASE_CONNECTOR_API_KEY=
CRUNCHBASE_API_KEY=
CRUNCHBASE_SEARCH_LIMIT=10
37 changes: 37 additions & 0 deletions crunchbase/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Template Quick Start Connector

This is a _template_ for a simple quick start connector that return static data. This can serve as starting point for creating a brand new connector.

## Configuration

This connector is very simple and only needs a `TEMPLATE_CONNECTOR_API_KEY` environment variable to be set. This value will be used for bearer token authentication to protect this connector from abuse.

A `.env-template` file is provided with all the environment variables that are used by this connector.

## Development

Create a virtual environment and install dependencies with poetry. We recommend using in-project virtual environments:

```bash
$ poetry config virtualenvs.in-project true
$ poetry install --no-root
```

Then start the server

```bash
$ poetry run flask --app provider --debug run --port 5000
```

and check with curl to see that everything is working

```bash
$ curl --request POST \
--url http://localhost:5000/search \
--header 'Content-Type: application/json' \
--data '{
"query": "which species of penguin is the tallest?"
}'
```

Alternatively, load up the Swagger UI and try out the API from a browser: http://localhost:5000/ui/
657 changes: 657 additions & 0 deletions crunchbase/poetry.lock

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions crunchbase/provider/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import logging
import os

import connexion # type: ignore
from dotenv import load_dotenv

load_dotenv()


API_VERSION = "api.yaml"


class UpstreamProviderError(Exception):
def __init__(self, message) -> None:
self.message = message

def __str__(self) -> str:
return self.message


def create_app() -> connexion.FlaskApp:
# use connexion to create a flask app with the endpoints defined in api.yaml spec
app = connexion.FlaskApp(__name__, specification_dir="../../.openapi")
app.add_api(
API_VERSION, resolver=connexion.resolver.RelativeResolver("provider.app")
)
logging.basicConfig(level=logging.INFO)
flask_app = app.app
# load environment variables prefixed with the name of the current directory
config_prefix = os.path.split(os.getcwd())[1].upper().replace("_", "")
flask_app.config.from_prefixed_env(config_prefix)
flask_app.config["APP_ID"] = config_prefix
return flask_app
30 changes: 30 additions & 0 deletions crunchbase/provider/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging
from flask import current_app as app
from flask import abort
from connexion.exceptions import Unauthorized
from . import UpstreamProviderError, provider


logger = logging.getLogger(__name__)

def search(body):
logger.debug(f'Search request: {body["query"]}')
try:
data = provider.search(body["query"])
logger.info(f"Found {len(data)} results")
except UpstreamProviderError as error:
logger.error(f"Upstream search error: {error.message}")
abort(502, error.message)
except AssertionError as error:
logger.error(f"Crunchbase config error: {error}")
abort(502, f"Crunchbase config error: {error}")

return {"results": data}, 200, {"X-Connector-Id": app.config.get("APP_ID")}


# This function is run for all endpoints to ensure requests are using a valid API key
def apikey_auth(token):
if token != app.config.get("CONNECTOR_API_KEY"):
raise Unauthorized()
# successfully authenticated
return {}
46 changes: 46 additions & 0 deletions crunchbase/provider/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import requests
from flask import current_app as app

from . import UpstreamProviderError

client = None


class CrunchbaseApiClient:
API_URL = "https://api.crunchbase.com/api/v4/autocompletes"

def __init__(self, api_key, search_limit):
self.user_key = api_key
self.search_limit = search_limit

def get_search_limit(self):
return self.search_limit

def get(self, params={}):
response = requests.get(self.API_URL, params=params)

if response.status_code != 200:
message = response.text or f"Error: HTTP {response.status_code}"
raise UpstreamProviderError(message)

return response.json()

def autocomplete(self, term):
# @see: https://data.crunchbase.com/docs/using-autocomplete-api
params = {
"query": term,
"limit": self.search_limit,
"user_key": self.user_key
}
return self.get(params)


def get_client():
global client
assert (api_key := app.config.get("API_KEY")), "CRUNCHBASE_API_KEY must be set"
search_limit = app.config.get("SEARCH_LIMIT", 20)

if not client:
client = CrunchbaseApiClient(api_key, search_limit)

return client
27 changes: 27 additions & 0 deletions crunchbase/provider/provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging

from .client import get_client

logger = logging.getLogger(__name__)

def serialize_results(data):
# debug data
logger.debug(f"Raw data: {data}")

serialized_data = []
results = data.get("entities", [])
for entity in results:
identifier = entity.pop("identifier", {})
serialized_data.append({
"text": entity.pop("short_description"),
"title": identifier.pop("value"),
"url": f"https://www.crunchbase.com/{identifier.pop('entity_def_id')}/{identifier.pop('permalink')}",
"id": identifier.pop("uuid"),
})
return serialized_data


def search(query):
client = get_client()

return serialize_results(client.autocomplete(query))
18 changes: 18 additions & 0 deletions crunchbase/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tool.poetry]
name = "crunchbase"
version = "1.0.0"
description = ""
authors = ["Tomeu Cabot <tomeu@cohere.com>"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.11"
flask = "2.2.5"
connexion = { extras = ["swagger-ui"], version = "^2.14.2" }
python-dotenv = "^1.0.0"
gunicorn = "^21.2.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

0 comments on commit fbe48ca

Please sign in to comment.