diff --git a/examples/getting_started/example1.py b/examples/getting_started/example1.py new file mode 100644 index 000000000..30b05085d --- /dev/null +++ b/examples/getting_started/example1.py @@ -0,0 +1,18 @@ +from indexify import IndexifyClient, ExtractionGraph +client = IndexifyClient() + +extraction_graph_spec = """ +name: 'sportsknowledgebase' +extraction_policies: + - extractor: 'tensorlake/minilm-l6' + name: 'minilml6' +""" +extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec) +client.create_extraction_graph(extraction_graph) +print("indexes", client.indexes()) + +content_ids = client.add_documents("sportsknowledgebase", ["Adam Silver is the NBA Commissioner", "Roger Goodell is the NFL commisioner"]) +client.wait_for_extraction(content_ids) + +context = client.search_index(name="sportsknowledgebase.minilml6.embedding", query="NBA commissioner", top_k=1) +print(context) diff --git a/examples/getting_started/example2.py b/examples/getting_started/example2.py new file mode 100644 index 000000000..b8e9ccad6 --- /dev/null +++ b/examples/getting_started/example2.py @@ -0,0 +1,37 @@ +from indexify import IndexifyClient, ExtractionGraph +import requests +client = IndexifyClient() + +extraction_graph_spec = """ +name: 'audiosummary' +extraction_policies: + - extractor: 'tensorlake/whisper-asr' + name: 'transcription' + - extractor: 'tensorlake/summarization' + name: 'summarizer' + input_params: + max_length: 400 + min_length: 300 + chunk_method: str = 'recursive' + content_source: 'transcription' + - extractor: 'tensorlake/minilm-l6' + name: 'minilml6' + content_source: 'summarizer' +""" + +extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec) +client.create_extraction_graph(extraction_graph) + +with open("sample.mp3", 'wb') as file: + file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/sample-000009.mp3")).content) +content_id = client.upload_file("audiosummary", "sample.mp3") + +client.wait_for_extraction(content_id) +client.get_extracted_content(content_id) + + +context = client.search_index(name="audiosummary.minilml6.embedding", query="President of America", top_k=1) + + + + diff --git a/examples/getting_started/example3.py b/examples/getting_started/example3.py new file mode 100644 index 000000000..72a97079b --- /dev/null +++ b/examples/getting_started/example3.py @@ -0,0 +1,24 @@ +from indexify import IndexifyClient, ExtractionGraph +import requests + +client = IndexifyClient() + +extraction_graph_spec = """ +name: 'imageknowledgebase' +extraction_policies: + - extractor: 'tensorlake/yolo-extractor' + name: 'object_detection' +""" + +extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec) +client.create_extraction_graph(extraction_graph) + +with open("sample.jpg", 'wb') as file: + file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/people-standing.jpg")).content) +content_id = client.upload_file("imageknowledgebase", "sample.jpg") + +client.wait_for_extraction(content_id) +client.get_extracted_content(content_id) + +print(client.sql_query("select * from imageknowledgebase where object_name='skateboard';")) + diff --git a/examples/getting_started/example4.py b/examples/getting_started/example4.py new file mode 100644 index 000000000..81cdf0fed --- /dev/null +++ b/examples/getting_started/example4.py @@ -0,0 +1,22 @@ +from indexify import IndexifyClient, ExtractionGraph +import requests +client = IndexifyClient() + +extraction_graph_spec = """ +name: 'pdfqa' +extraction_policies: + - extractor: 'tensorlake/pdf-extractor' + name: 'docextractor' +""" + +extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec) +client.create_extraction_graph(extraction_graph) + + +with open("sample.pdf", 'wb') as file: + file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/scientific-paper-example.pdf")).content) +content_id = client.upload_file("pdfqa", "sample.pdf") + +client.wait_for_extraction(content_id) +print(client.get_extracted_content(content_id)) +