update getting started code

tensorlakeai · Jun 16, 2024 · 2f21e5e · 2f21e5e
1 parent b160a58
commit 2f21e5e
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 0 deletions.
diff --git a/examples/getting_started/example1.py b/examples/getting_started/example1.py
@@ -0,0 +1,18 @@
+from indexify import IndexifyClient, ExtractionGraph
+client = IndexifyClient()
+
+extraction_graph_spec = """
+name: 'sportsknowledgebase'
+extraction_policies:
+   - extractor: 'tensorlake/minilm-l6'
+     name: 'minilml6'
+"""
+extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
+client.create_extraction_graph(extraction_graph) 
+print("indexes", client.indexes())
+
+content_ids = client.add_documents("sportsknowledgebase", ["Adam Silver is the NBA Commissioner", "Roger Goodell is the NFL commisioner"])
+client.wait_for_extraction(content_ids)
+
+context = client.search_index(name="sportsknowledgebase.minilml6.embedding", query="NBA commissioner", top_k=1)
+print(context)
diff --git a/examples/getting_started/example2.py b/examples/getting_started/example2.py
@@ -0,0 +1,37 @@
+from indexify import IndexifyClient, ExtractionGraph
+import requests
+client = IndexifyClient()
+
+extraction_graph_spec = """
+name: 'audiosummary'
+extraction_policies:
+   - extractor: 'tensorlake/whisper-asr'
+     name: 'transcription'
+   - extractor: 'tensorlake/summarization'
+     name: 'summarizer'
+     input_params:
+        max_length: 400
+        min_length: 300
+        chunk_method: str = 'recursive'
+     content_source: 'transcription'
+   - extractor: 'tensorlake/minilm-l6'
+     name: 'minilml6'
+     content_source: 'summarizer'
+"""
+
+extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
+client.create_extraction_graph(extraction_graph)
+
+with open("sample.mp3", 'wb') as file:
+  file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/sample-000009.mp3")).content)
+content_id = client.upload_file("audiosummary", "sample.mp3")
+
+client.wait_for_extraction(content_id)
+client.get_extracted_content(content_id)
+
+
+context = client.search_index(name="audiosummary.minilml6.embedding", query="President of America", top_k=1)
+
+
+
+
diff --git a/examples/getting_started/example3.py b/examples/getting_started/example3.py
@@ -0,0 +1,24 @@
+from indexify import IndexifyClient, ExtractionGraph
+import requests
+
+client = IndexifyClient()
+
+extraction_graph_spec = """
+name: 'imageknowledgebase'
+extraction_policies:
+   - extractor: 'tensorlake/yolo-extractor'
+     name: 'object_detection'
+"""
+
+extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
+client.create_extraction_graph(extraction_graph)
+
+with open("sample.jpg", 'wb') as file:
+  file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/people-standing.jpg")).content)
+content_id = client.upload_file("imageknowledgebase", "sample.jpg")
+
+client.wait_for_extraction(content_id)
+client.get_extracted_content(content_id)
+
+print(client.sql_query("select * from imageknowledgebase where object_name='skateboard';"))
+
diff --git a/examples/getting_started/example4.py b/examples/getting_started/example4.py
@@ -0,0 +1,22 @@
+from indexify import IndexifyClient, ExtractionGraph
+import requests
+client = IndexifyClient()
+
+extraction_graph_spec = """
+name: 'pdfqa'
+extraction_policies:
+   - extractor: 'tensorlake/pdf-extractor'
+     name: 'docextractor'
+"""
+
+extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
+client.create_extraction_graph(extraction_graph)
+
+
+with open("sample.pdf", 'wb') as file:
+  file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/scientific-paper-example.pdf")).content)
+content_id = client.upload_file("pdfqa", "sample.pdf")
+
+client.wait_for_extraction(content_id)
+print(client.get_extracted_content(content_id))
+