Skip to content

Commit

Permalink
update getting started code
Browse files Browse the repository at this point in the history
  • Loading branch information
diptanu committed Jun 16, 2024
1 parent b160a58 commit 2f21e5e
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 0 deletions.
18 changes: 18 additions & 0 deletions examples/getting_started/example1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from indexify import IndexifyClient, ExtractionGraph
client = IndexifyClient()

extraction_graph_spec = """
name: 'sportsknowledgebase'
extraction_policies:
- extractor: 'tensorlake/minilm-l6'
name: 'minilml6'
"""
extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
client.create_extraction_graph(extraction_graph)
print("indexes", client.indexes())

content_ids = client.add_documents("sportsknowledgebase", ["Adam Silver is the NBA Commissioner", "Roger Goodell is the NFL commisioner"])
client.wait_for_extraction(content_ids)

context = client.search_index(name="sportsknowledgebase.minilml6.embedding", query="NBA commissioner", top_k=1)
print(context)
37 changes: 37 additions & 0 deletions examples/getting_started/example2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from indexify import IndexifyClient, ExtractionGraph
import requests
client = IndexifyClient()

extraction_graph_spec = """
name: 'audiosummary'
extraction_policies:
- extractor: 'tensorlake/whisper-asr'
name: 'transcription'
- extractor: 'tensorlake/summarization'
name: 'summarizer'
input_params:
max_length: 400
min_length: 300
chunk_method: str = 'recursive'
content_source: 'transcription'
- extractor: 'tensorlake/minilm-l6'
name: 'minilml6'
content_source: 'summarizer'
"""

extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
client.create_extraction_graph(extraction_graph)

with open("sample.mp3", 'wb') as file:
file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/sample-000009.mp3")).content)
content_id = client.upload_file("audiosummary", "sample.mp3")

client.wait_for_extraction(content_id)
client.get_extracted_content(content_id)


context = client.search_index(name="audiosummary.minilml6.embedding", query="President of America", top_k=1)




24 changes: 24 additions & 0 deletions examples/getting_started/example3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from indexify import IndexifyClient, ExtractionGraph
import requests

client = IndexifyClient()

extraction_graph_spec = """
name: 'imageknowledgebase'
extraction_policies:
- extractor: 'tensorlake/yolo-extractor'
name: 'object_detection'
"""

extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
client.create_extraction_graph(extraction_graph)

with open("sample.jpg", 'wb') as file:
file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/people-standing.jpg")).content)
content_id = client.upload_file("imageknowledgebase", "sample.jpg")

client.wait_for_extraction(content_id)
client.get_extracted_content(content_id)

print(client.sql_query("select * from imageknowledgebase where object_name='skateboard';"))

22 changes: 22 additions & 0 deletions examples/getting_started/example4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from indexify import IndexifyClient, ExtractionGraph
import requests
client = IndexifyClient()

extraction_graph_spec = """
name: 'pdfqa'
extraction_policies:
- extractor: 'tensorlake/pdf-extractor'
name: 'docextractor'
"""

extraction_graph = ExtractionGraph.from_yaml(extraction_graph_spec)
client.create_extraction_graph(extraction_graph)


with open("sample.pdf", 'wb') as file:
file.write((requests.get("https://extractor-files.diptanu-6d5.workers.dev/scientific-paper-example.pdf")).content)
content_id = client.upload_file("pdfqa", "sample.pdf")

client.wait_for_extraction(content_id)
print(client.get_extracted_content(content_id))

0 comments on commit 2f21e5e

Please sign in to comment.