From c1ffae6c03f47768711fd93d2780c0957f38400d Mon Sep 17 00:00:00 2001
From: khustup2 <sasun@activeloop.ai>
Date: Thu, 19 Dec 2024 20:30:46 +0000
Subject: [PATCH] v4.1.1 Release

---
 python/deeplake/__init__.py  |  46 ++--
 python/deeplake/__init__.pyi | 405 +++++++++++++++++++++++++++++++---
 python/deeplake/core.pyi     |  57 +++--
 python/deeplake/tql.pyi      |   4 +-
 python/deeplake/types.pyi    | 416 ++++++++++++++++++++++++-----------
 5 files changed, 721 insertions(+), 207 deletions(-)

diff --git a/python/deeplake/__init__.py b/python/deeplake/__init__.py
index 40a82c9082..5684486de2 100644
--- a/python/deeplake/__init__.py
+++ b/python/deeplake/__init__.py
@@ -1,5 +1,5 @@
 import os
-from typing import Callable, Any, Dict
+from typing import Callable, Any, Dict, Optional
 
 try:
     from tqdm import tqdm as progress_bar
@@ -14,7 +14,7 @@ def progress_bar(iterable, *args, **kwargs):
 import deeplake
 from ._deeplake import *
 
-__version__ = "4.1.0"
+__version__ = "4.1.1"
 
 __all__ = [
     "__version__",
@@ -180,7 +180,12 @@ def empty(*args, **kwargs):
     )
 
 
-def convert(src: str, dst: str, dst_creds: Dict[str, str] = None):
+def convert(
+    src: str,
+    dst: str,
+    dst_creds: Optional[Dict[str, str]] = None,
+    token: Optional[str] = None,
+) -> None:
     """
     Copies the v3 dataset at src into a new dataset in the new v4 format.
     """
@@ -192,41 +197,30 @@ def get_raw_columns(source):
         return [
             col.name
             for col in source.schema.columns
-            if not col.dtype.is_link
-            and col.dtype.kind
-            in {
+            if not col.dtype.is_link and col.dtype.kind in {
                 deeplake.types.TypeKind.Image,
                 deeplake.types.TypeKind.SegmentMask,
-                deeplake.types.TypeKind.BinaryMask,
+                deeplake.types.TypeKind.BinaryMask
             }
         ]
 
-    def transfer_non_link_data(source, dest, batch_size):
-        dl = deeplake._deeplake._Prefetcher(
-            source,
-            batch_size=batch_size,
-            adaptive=True,
-            raw_columns=set(get_raw_columns(source)),
-        )
+    def transfer_non_link_data(source, dest):
+        dl = deeplake._deeplake._Prefetcher(source, raw_columns=set(get_raw_columns(source)))
         for counter, batch in enumerate(progress_bar(dl), start=1):
             dest.append(batch)
             if counter % 100 == 0:
                 commit_data(dest)
         commit_data(dest, "Final commit of non-link data")
 
-    def transfer_with_links(source, dest, links, column_names, batch_size):
+    def transfer_with_links(source, dest, links, column_names):
         iterable_cols = [col for col in column_names if col not in links]
         link_sample_info = {link: source[link]._links_info() for link in links}
         dest.set_creds_key(link_sample_info[links[0]]["key"])
         pref_ds = source.query(f"SELECT {','.join(iterable_cols)}")
-        dl = deeplake._deeplake._Prefetcher(
-            pref_ds,
-            batch_size=batch_size,
-            adaptive=True,
-            raw_columns=set(get_raw_columns(source)),
-        )
+        dl = deeplake._deeplake._Prefetcher(pref_ds, raw_columns=set(get_raw_columns(source)))
 
         for counter, batch in enumerate(progress_bar(dl), start=1):
+            batch_size = len(batch[iterable_cols[0]])
             for link in links:
                 link_data = link_sample_info[link]["data"]
                 start_index = (counter - 1) * batch_size
@@ -238,8 +232,8 @@ def transfer_with_links(source, dest, links, column_names, batch_size):
                 commit_data(dest)
         commit_data(dest, "Final commit of linked data")
 
-    source_ds = deeplake.query(f'select * from "{src}"')
-    dest_ds = deeplake.like(source_ds, dst, dst_creds)
+    source_ds = deeplake.query(f'select * from "{src}"', token=token)
+    dest_ds = deeplake.like(source_ds, dst, dst_creds, token=token)
     commit_data(dest_ds, "Created dataset")
 
     column_names = [col.name for col in source_ds.schema.columns]
@@ -248,13 +242,11 @@ def transfer_with_links(source, dest, links, column_names, batch_size):
         for col in source_ds.schema.columns
         if source_ds.schema[col.name].dtype.is_link
     ]
-    batch_size = 10000
-
     print(f"Transferring {len(source_ds)} rows to {dst}...")
     if not links:
-        transfer_non_link_data(source_ds, dest_ds, batch_size)
+        transfer_non_link_data(source_ds, dest_ds)
     else:
-        transfer_with_links(source_ds, dest_ds, links, column_names, batch_size)
+        transfer_with_links(source_ds, dest_ds, links, column_names)
 
     for column in column_names:
         meta = dict(source_ds[column].metadata)
diff --git a/python/deeplake/__init__.pyi b/python/deeplake/__init__.pyi
index 541eef2078..7fb934332a 100644
--- a/python/deeplake/__init__.pyi
+++ b/python/deeplake/__init__.pyi
@@ -259,35 +259,158 @@ class Metadata(ReadOnlyMetadata):
 
 def query(query: str, token: str | None = None) -> DatasetView:
     """
-    Executes the given TQL query and returns a DatasetView.
+    Executes a TQL (Tensor Query Language) query and returns a filtered DatasetView.
+    
+    TQL provides SQL-like querying capabilities specifically designed for ML datasets, allowing you 
+    to filter, sort, and select data based on various criteria including vector similarity.
 
-    Compared to [deeplake.Dataset.query][], this version of query can join multiple datasets together
-    or query a single dataset without opening it first.
+    Args:
+        query: A TQL query string. The query can:
+            - Filter rows using WHERE clauses
+            - Sort results using ORDER BY
+            - Select specific columns using SELECT
+            - Perform vector similarity search using BM25_SIMILARITY
+            - Join multiple datasets
+        token: Optional Activeloop token for authentication. Not required if using environment 
+            credentials.
+
+    Returns:
+        DatasetView: A view containing the query results. The view can be:
+            - Used directly for ML training
+            - Further filtered with additional queries
+            - Converted to PyTorch/TensorFlow dataloaders
+            - Materialized into a new dataset
 
     Examples:
+        Basic filtering:
         ```python
-        r = deeplake.query("select * from \\"al://my_org/dataset\\" where id > 30")
+        # Select images with high confidence labels
+        view = deeplake.query(f'SELECT * FROM "{ds_path}" WHERE confidence > 0.9')
+        
+        # Get samples from specific classes
+        cats = deeplake.query(f'SELECT * FROM "{ds_path}" WHERE label IN (\'cat\', \'kitten\')')
+        ```
+
+        Text similarity search:
+        ```python
+        # Find semantically similar text using BM25
+        similar = deeplake.query(f'''
+            SELECT * FROM "{ds_path}"
+            ORDER BY BM25_SIMILARITY(text_column, 'query text') DESC 
+            LIMIT 100
+        ''')
+        ```
+
+        Vector similarity search:
+        ```python
+        # Find nearest neighbor embeddings
+        neighbors = deeplake.query(f'''
+            SELECT * FROM "{ds_path}"
+            ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, ...]) DESC
+            LIMIT 10
+        ''')
+        ```
+
+        Joins across datasets:
+        ```python
+        # Join images with their metadata
+        results = deeplake.query(f'''
+            SELECT i.image, m.label, m.bbox 
+            FROM "{image_ds_path}" AS i
+            JOIN "{metadata_ds_path}" AS m ON i.id = m.image_id
+            WHERE m.verified = true
+        ''')
         ```
-    """
 
+        Using with ML frameworks:
+        ```python
+        # Filter dataset and create PyTorch dataloader
+        train_data = deeplake.query("SELECT * FROM dataset WHERE split = 'train'")
+        train_loader = train_data.pytorch().dataloader(batch_size=32)
+        ```
+    """
     ...
 
 def query_async(query: str, token: str | None = None) -> Future:
     """
-    Asynchronously executes the given TQL query and returns a Future that will resolve into DatasetView.
+    Asynchronously executes a TQL (Tensor Query Language) query and returns a Future that will resolve into DatasetView.
+    
+    TQL provides SQL-like querying capabilities specifically designed for ML datasets, allowing you 
+    to filter, sort, and select data based on various criteria including vector similarity.
+
+    Args:
+        query: A TQL query string. The query can:
+            - Filter rows using WHERE clauses
+            - Sort results using ORDER BY
+            - Select specific columns using SELECT
+            - Perform vector similarity search using BM25_SIMILARITY
+            - Join multiple datasets
+        token: Optional Activeloop token for authentication. Not required if using environment 
+            credentials.
+
+    Returns:
+        Future: A Future object that resolves to a DatasetView. The resulting view can be:
+            - Used directly for ML training
+            - Further filtered with additional queries
+            - Converted to PyTorch/TensorFlow dataloaders
+            - Materialized into a new dataset
 
     Examples:
+        Basic filtering with await:
         ```python
-        future = deeplake.query_async("select * where category == 'active'")
-        result = future.result()
-        for row in result:
-            print("Id is: ", row["id"])
+        # Select images with high confidence labels
+        view = await deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE confidence > 0.9')
+        
+        # Get samples from specific classes
+        cats = await deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE label IN (\'cat\', \'kitten\')')
+        ```
 
-        # or use the Future in an await expression
-        future = deeplake.query_async("select * where category == 'active'")
-        result = await future
-        for row in result:
-            print("Id is: ", row["id"])
+        Text similarity search with Future.result():
+        ```python
+        # Find semantically similar text using BM25
+        future = deeplake.query_async(f'''
+            SELECT * FROM "{ds_path}"
+            ORDER BY BM25_SIMILARITY(text_column, 'query text') DESC 
+            LIMIT 100
+        ''')
+        similar = future.result()  # Blocks until query completes
+        ```
+
+        Vector similarity search:
+        ```python
+        # Find nearest neighbor embeddings
+        neighbors = await deeplake.query_async(f'''
+            SELECT * FROM "{ds_path}"
+            ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, ...]) DESC
+            LIMIT 10
+        ''')
+        ```
+
+        Joins across datasets:
+        ```python
+        # Join images with their metadata
+        results = await deeplake.query_async(f'''
+            SELECT i.image, m.label, m.bbox 
+            FROM "{image_ds_path}" AS i
+            JOIN "{metadata_ds_path}" AS m ON i.id = m.image_id
+            WHERE m.verified = true
+        ''')
+        ```
+
+        Using with ML frameworks:
+        ```python
+        # Filter dataset and create PyTorch dataloader
+        future = deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE split = \'train\'')
+        train_data = future.result()
+        train_loader = train_data.pytorch().dataloader(batch_size=32)
+        ```
+
+        Non-blocking check:
+        ```python
+        # Check if query is complete without blocking
+        future = deeplake.query_async(f'SELECT * FROM "{ds_path}"')
+        if future.is_completed():
+            results = future.result()
         ```
     """
     ...
@@ -503,25 +626,246 @@ class ColumnDefinitionView:
 
 class ColumnView:
     """
-    Provides access to a column in a dataset.
+    Provides read-only access to a column in a dataset. ColumnView is designed for efficient 
+    data access in ML workflows, supporting both synchronous and asynchronous operations.
+
+    The ColumnView class allows you to:
+    - Access column data using integer indices, slices, or lists of indices
+    - Retrieve data asynchronously for better performance in ML pipelines
+    - Access column metadata and properties
+    - Get information about linked data if the column contains references
+
+    Examples:
+        Load image data from a column for training
+        ```python
+        # Access a single image
+        image = dataset["images"][0]
+        
+        # Load a batch of images
+        batch = dataset["images"][0:32]
+        
+        # Async load for better performance
+        images_future = dataset["images"].get_async(0:32)
+        images = images_future.result()
+        ```
+
+        Access embeddings for similarity search
+        ```python
+        # Get all embeddings
+        embeddings = dataset["embeddings"][:]
+        
+        # Get specific embeddings by indices
+        selected = dataset["embeddings"][[1, 5, 10]]
+        ```
+
+        Check column properties
+        ```python
+        # Get column name
+        name = dataset["images"].name
+        
+        # Access metadata
+        if "mean" in dataset["images"].metadata:
+            mean = dataset["images"].metadata["mean"]
+        ```
     """
 
-    def __getitem__(self, index: int | slice | list | tuple) -> typing.Any: ...
-    def get_async(self, index: int | slice | list | tuple) -> Future: ...
-    def __len__(self) -> int: ...
+    def __getitem__(self, index: int | slice | list | tuple) -> typing.Any:
+        """
+        Retrieve data from the column at the specified index or range.
+
+        Parameters:
+            index: Can be:
+                - int: Single item index
+                - slice: Range of indices (e.g., 0:10)
+                - list/tuple: Multiple specific indices
+
+        Returns:
+            The data at the specified index/indices. Type depends on the column's data type.
+
+        Examples:
+            ```python
+            # Get single item
+            image = column[0]
+            
+            # Get range
+            batch = column[0:32]
+            
+            # Get specific indices
+            items = column[[1, 5, 10]]
+            ```
+        """
+        ...
+
+    def get_async(self, index: int | slice | list | tuple) -> Future:
+        """
+        Asynchronously retrieve data from the column. Useful for large datasets or when 
+        loading multiple items in ML pipelines.
+
+        Parameters:
+            index: Can be:
+                - int: Single item index
+                - slice: Range of indices
+                - list/tuple: Multiple specific indices
+
+        Returns:
+            Future: A Future object that resolves to the requested data.
+
+        Examples:
+            ```python
+            # Async batch load
+            future = column.get_async(0:32)
+            batch = future.result()
+            
+            # Using with async/await
+            batch = await column.get_async(0:32)
+            ```
+        """
+        ...
+
+    def __len__(self) -> int:
+        """
+        Get the number of items in the column.
+
+        Returns:
+            int: Number of items in the column.
+        """
+        ...
+
     def __str__(self) -> str: ...
-    def _links_info(self) -> dict: ...
+
+    def _links_info(self) -> dict:
+        """
+        Get information about linked data if this column contains references to other datasets.
+        
+        Internal method used primarily for debugging and advanced operations.
+
+        Returns:
+            dict: Information about linked data.
+        """
+        ...
+
     @property
-    def metadata(self) -> ReadOnlyMetadata: ...
+    def metadata(self) -> ReadOnlyMetadata:
+        """
+        Access the column's metadata. Useful for storing statistics, preprocessing parameters,
+        or other information about the column data.
+
+        Examples:
+            ```python
+            # Access preprocessing parameters
+            mean = column.metadata["mean"]
+            std = column.metadata["std"]
+            
+            # Check available metadata
+            for key in column.metadata.keys():
+                print(f"{key}: {column.metadata[key]}")
+            ```
+        """
+        ...
+
     @property
-    def name(self) -> str: ...
+    def name(self) -> str:
+        """
+        Get the name of the column.
+
+        Returns:
+            str: The column name.
+        """
+        ...
+
 
 class Column(ColumnView):
-    def __setitem__(self, index: int | slice, value: typing.Any) -> None: ...
-    def set_async(self, index: int | slice, value: typing.Any) -> FutureVoid: ...
+    """
+    Provides read-write access to a column in a dataset. Column extends ColumnView with 
+    methods for modifying data, making it suitable for dataset creation and updates in 
+    ML workflows.
+
+    The Column class allows you to:
+    - Read and write data using integer indices, slices, or lists of indices
+    - Modify data asynchronously for better performance
+    - Access and modify column metadata
+    - Handle various data types common in ML: images, embeddings, labels, etc.
+
+    Examples:
+        Update training labels
+        ```python
+        # Update single label
+        dataset["labels"][0] = 1
+        
+        # Update batch of labels
+        dataset["labels"][0:32] = new_labels
+        
+        # Async update for better performance
+        future = dataset["labels"].set_async(0:32, new_labels)
+        future.wait()
+        ```
+
+        Store image embeddings
+        ```python
+        # Generate and store embeddings
+        embeddings = model.encode(images)
+        dataset["embeddings"][0:len(embeddings)] = embeddings
+        ```
+
+        Manage column metadata
+        ```python
+        # Store preprocessing parameters
+        dataset["images"].metadata["mean"] = [0.485, 0.456, 0.406]
+        dataset["images"].metadata["std"] = [0.229, 0.224, 0.225]
+        ```
+    """
+
+    def __setitem__(self, index: int | slice, value: typing.Any) -> None:
+        """
+        Set data in the column at the specified index or range.
+
+        Parameters:
+            index: Can be:
+                - int: Single item index
+                - slice: Range of indices (e.g., 0:10)
+            value: The data to store. Must match the column's data type.
+
+        Examples:
+            ```python
+            # Update single item
+            column[0] = new_image
+            
+            # Update range
+            column[0:32] = new_batch
+            ```
+        """
+        ...
+
+    def set_async(self, index: int | slice, value: typing.Any) -> FutureVoid:
+        """
+        Asynchronously set data in the column. Useful for large updates or when 
+        modifying multiple items in ML pipelines.
+
+        Parameters:
+            index: Can be:
+                - int: Single item index
+                - slice: Range of indices
+            value: The data to store. Must match the column's data type.
+
+        Returns:
+            FutureVoid: A FutureVoid that completes when the update is finished.
+
+        Examples:
+            ```python
+            # Async batch update
+            future = column.set_async(0:32, new_batch)
+            future.wait()
+            
+            # Using with async/await
+            await column.set_async(0:32, new_batch)
+            ```
+        """
+        ...
+
     @property
     def metadata(self) -> Metadata: ...
 
+
 class Version:
     """
     An atomic change within [deeplake.Dataset][]'s history
@@ -855,7 +1199,7 @@ class DatasetView:
             ds.add_column("id", int)
             ds.add_column("name", str)
             ds.append({"id": [1,2,3], "name": ["Mary", "Joe", "Bill"]})
-
+            
             row = ds[1]
             print("Id:", row["id"], "Name:", row["name"]) # Output: 2 Name: Joe
             rows = ds[1:2]
@@ -986,7 +1330,7 @@ class DatasetView:
         Examples:
             ```python
             from torch.utils.data import DataLoader
-
+            
             ds = deeplake.open("path/to/dataset")
             dataloader = DataLoader(ds.pytorch(), batch_size=60,
                                         shuffle=True, num_workers=10)
@@ -1015,6 +1359,7 @@ class DatasetView:
         """
         ...
 
+
 class Dataset(DatasetView):
     """
     Datasets are the primary data structure used in DeepLake. They are used to store and manage data for searching, training, evaluation.
@@ -1122,9 +1467,7 @@ class Dataset(DatasetView):
         """
         ...
 
-    def __getitem__(
-        self, input: int | slice | list | tuple | str
-    ) -> Row | RowRange | Column:
+    def __getitem__(self, input: int | slice | list | tuple | str) -> Row | RowRange | Column:
         """
         Returns a subset of data from the Dataset
 
@@ -1867,7 +2210,7 @@ def create(
         ```python
         import deeplake
         from deeplake import types
-
+        
         # Create a dataset in your local filesystem:
         ds = deeplake.create("directory_path")
         ds.add_column("id", types.Int32())
@@ -1936,7 +2279,7 @@ def create_async(
         ```python
         import deeplake
         from deeplake import types
-
+        
         # Asynchronously create a dataset in your local filesystem:
         ds = await deeplake.create_async("directory_path")
         await ds.add_column("id", types.Int32())
diff --git a/python/deeplake/core.pyi b/python/deeplake/core.pyi
index 6d8facfbb5..c082460cd5 100644
--- a/python/deeplake/core.pyi
+++ b/python/deeplake/core.pyi
@@ -8,24 +8,53 @@ import typing
 
 __all__ = ["Dict", "IndexMapping64", "MemoryBuffer"]
 
+
 class Dict:
-    def __getstate__(self: dict) -> dict: ...
-    def __setstate__(self: dict, arg0: dict) -> None: ...
-    def __eq__(self: dict, other: dict | dict) -> bool: ...
-    def __getitem__(self: dict, key: str) -> typing.Any: ...
-    def __len__(self: dict) -> int: ...
-    def __ne__(self: dict, other: dict | dict) -> bool: ...
-    def __str__(self: dict) -> str: ...
-    def items(self: dict) -> list: ...
-    def keys(self: dict) -> list[str]: ...
-    def to_dict(self: dict) -> dict: ...
+    def __getstate__(self: dict) -> dict:
+        ...
+
+    def __setstate__(self: dict, arg0: dict) -> None:
+        ...
+
+    def __eq__(self: dict, other: dict | dict) -> bool:
+        ...
+
+    def __getitem__(self: dict, key: str) -> typing.Any:
+        ...
+
+    def __len__(self: dict) -> int:
+        ...
+
+    def __ne__(self: dict, other: dict | dict) -> bool:
+        ...
+
+    def __str__(self: dict) -> str:
+        ...
+
+    def items(self: dict) -> list:
+        ...
+
+    def keys(self: dict) -> list[str]:
+        ...
+
+    def to_dict(self: dict) -> dict:
+        ...
+
 
 class IndexMapping64:
     def __getitem__(self, index: int) -> int: ...
-    def __getstate__(self) -> tuple: ...
-    def __iter__(self) -> typing.Iterator[int]: ...
-    def __len__(self) -> int: ...
-    def __setstate__(self, arg0: tuple) -> None: ...
+
+    def __getstate__(self) -> tuple:
+        ...
+
+    def __iter__(self) -> typing.Iterator[int]:
+        ...
+
+    def __len__(self) -> int:
+        ...
+
+    def __setstate__(self, arg0: tuple) -> None:
+        ...
 
 class MemoryBuffer:
     def __buffer__(self, flags):
diff --git a/python/deeplake/tql.pyi b/python/deeplake/tql.pyi
index 42af0b12c7..97f0d1e43c 100644
--- a/python/deeplake/tql.pyi
+++ b/python/deeplake/tql.pyi
@@ -20,9 +20,9 @@ def register_function(function: typing.Callable) -> None:
         ```python
         def next_number(a):
             return a + 1
-
+        
         deeplake.tql.register_function(next_number)
-
+        
         r = ds.query("SELECT * WHERE next_number(column_name) > 10")
         ```
     """
diff --git a/python/deeplake/types.pyi b/python/deeplake/types.pyi
index 1b46326859..896b198fc9 100644
--- a/python/deeplake/types.pyi
+++ b/python/deeplake/types.pyi
@@ -38,14 +38,19 @@ __all__ = [
 ]
 
 class QuantizationType:
-    Binary: typing.ClassVar[QuantizationType]
     """
-    Stores a binary quantized representation of the original embedding in the index rather than the a full copy of the embedding.
-    
-    This slightly decreases accuracy of searches, while significantly improving query time.   
+    Enumeration of available quantization types for embeddings.
+
+    Members:
+        Binary:
+            Stores a binary quantized representation of the original embedding in the index 
+            rather than a full copy of the embedding. This slightly decreases accuracy of 
+            searches, while significantly improving query time.
     """
 
+    Binary: typing.ClassVar[QuantizationType]
     __members__: typing.ClassVar[dict[str, QuantizationType]]
+    
     def __eq__(self, other: typing.Any) -> bool: ...
     def __getstate__(self) -> int: ...
     def __hash__(self) -> int: ...
@@ -56,20 +61,41 @@ class QuantizationType:
     def __repr__(self) -> str: ...
     def __setstate__(self, state: int) -> None: ...
     def __str__(self) -> str: ...
+    
     @property
-    def name(self) -> str: ...
+    def name(self) -> str:
+        """
+        Returns:
+            str: The name of the quantization type.
+        """
+        ...
+    
     @property
-    def value(self) -> int: ...
+    def value(self) -> int:
+        """
+        Returns:
+            int: The integer value of the quantization type.
+        """
+        ...
 
 Binary: QuantizationType
+"""
+Binary quantization type for embeddings.
+
+This slightly decreases accuracy of searches while significantly improving query time
+by storing a binary quantized representation instead of the full embedding.
+"""
 
 class TextIndexType:
     """
-    Members:
+    Enumeration of available text indexing types.
 
-      Inverted
-
-      BM25
+    Members:
+        Inverted:
+            A text index that supports keyword lookup. Can be used with ``CONTAINS(column, 'wanted_value')``.
+        BM25:
+            A BM25-based index of text data. Can be used with ``BM25_SIMILARITY(column, 'search text')`` 
+            in a TQL ``ORDER BY`` clause.
     """
 
     BM25: typing.ClassVar[TextIndexType]
@@ -86,14 +112,28 @@ class TextIndexType:
     def __repr__(self) -> str: ...
     def __setstate__(self, state: int) -> None: ...
     def __str__(self) -> str: ...
+    
     @property
-    def name(self) -> str: ...
+    def name(self) -> str:
+        """
+        Returns:
+            str: The name of the text index type.
+        """
+        ...
+    
     @property
-    def value(self) -> int: ...
+    def value(self) -> int:
+        """
+        Returns:
+            int: The integer value of the text index type.
+        """
+        ...
 
 class DataType:
     """
     The base class all specific types extend from.
+
+    This class provides the foundation for all data types in the deeplake.
     """
 
     def __eq__(self, other: DataType) -> bool: ...
@@ -101,67 +141,106 @@ class DataType:
     def __str__(self) -> str: ...
 
 class Type:
-    """ """
+    """
+    Base class for all complex data types in the deeplake.
+    
+    This class extends DataType to provide additional functionality for complex types
+    like images, embeddings, and sequences.
+    """
 
     def __str__(self) -> str: ...
     def __eq__(self, other: Type) -> bool: ...
     def __ne__(self, other: Type) -> bool: ...
-    @property(readonly=True)
-    def data_type(self) -> DataType: ...
-    @property(readonly=True)
-    # Temporary workaround. Need to remove `deeplake._deeplake` from the return type.
-    def default_format(self) -> deeplake._deeplake.formats.DataFormat: ...
+
+    @property
+    def data_type(self) -> DataType:
+        """
+        Returns:
+            DataType: The underlying data type of this type.
+        """
+        ...
+
+    @property
+    def default_format(self) -> deeplake._deeplake.formats.DataFormat:
+        """
+        Returns:
+            DataFormat: The default format used for this type.
+        """
+        ...
+
     @property
     def id(self) -> str:
         """
-        The id (name) of the data type
+        Returns:
+            str: The id (name) of the data type.
         """
         ...
 
     @property
-    def is_sequence(self) -> bool: ...
+    def is_sequence(self) -> bool:
+        """
+        Returns:
+            bool: True if this type is a sequence, False otherwise.
+        """
+        ...
+
     @property
-    def is_link(self) -> bool: ...
+    def is_link(self) -> bool:
+        """
+        Returns:
+            bool: True if this type is a link, False otherwise.
+        """
+        ...
+
     @property
-    def is_image(self) -> bool: ...
+    def is_image(self) -> bool:
+        """
+        Returns:
+            bool: True if this type is an image, False otherwise.
+        """
+        ...
+
     @property
-    def is_segment_mask(self) -> bool: ...
+    def is_segment_mask(self) -> bool:
+        """
+        Returns:
+            bool: True if this type is a segment mask, False otherwise.
+        """
+        ...
+
     @property
-    def kind(self) -> TypeKind: ...
+    def kind(self) -> TypeKind:
+        """
+        Returns:
+            TypeKind: The kind of this type.
+        """
+        ...
+
     @property
     def shape(self) -> list[int] | None:
         """
-        The shape of the data type if applicable. Otherwise none
+        Returns:
+            list[int] | None: The shape of the data type if applicable, otherwise None.
         """
         ...
 
 class TypeKind:
     """
-    Members:
-
-      Generic
-
-      Text
-
-      Dict
-
-      Embedding
-
-      Sequence
-
-      Image
-
-      BoundingBox
-
-      BinaryMask
+    Enumeration of all available type kinds in the deeplake.
 
-      SegmentMask
-
-      Polygon
-
-      ClassLabel
-
-      Link
+    Members:
+        Generic: Generic data type
+        Text: Text data type
+        Dict: Dictionary data type
+        Embedding: Embedding data type
+        Sequence: Sequence data type
+        Image: Image data type
+        BoundingBox: Bounding box data type
+        BinaryMask: Binary mask data type
+        SegmentMask: Segmentation mask data type
+        Polygon: Polygon data type
+        ClassLabel: Class label data type
+        Link: Link data type
     """
 
     BinaryMask: typing.ClassVar[TypeKind]
@@ -177,6 +256,7 @@ class TypeKind:
     Sequence: typing.ClassVar[TypeKind]
     Text: typing.ClassVar[TypeKind]
     __members__: typing.ClassVar[dict[str, TypeKind]]
+
     def __eq__(self, other: typing.Any) -> bool: ...
     def __getstate__(self) -> int: ...
     def __hash__(self) -> int: ...
@@ -187,99 +267,132 @@ class TypeKind:
     def __repr__(self) -> str: ...
     def __setstate__(self, state: int) -> None: ...
     def __str__(self) -> str: ...
+    
     @property
-    def name(self) -> str: ...
+    def name(self) -> str:
+        """
+        Returns:
+            str: The name of the type kind.
+        """
+        ...
+    
     @property
-    def value(self) -> int: ...
+    def value(self) -> int:
+        """
+        Returns:
+            int: The integer value of the type kind.
+        """
+        ...
 
 @typing.overload
 def Array(dtype: DataType | str, dimensions: int) -> DataType: ...
 @typing.overload
 def Array(dtype: DataType | str, shape: list[int]) -> DataType: ...
+
 def Array(dtype: DataType | str, dimensions: int, shape: list[int]) -> DataType:
     """
-    A generic array of data.
+    Creates a generic array of data.
 
     Parameters:
-        dtype: The datatype of values in the array
-        dimensions: The number of dimensions/axies in the array. Unlike specifying `shape`, there is no constraint on the size of each dimension.
-        shape: Constrain the size of each dimension in the array
+        dtype: DataType | str
+            The datatype of values in the array
+        dimensions: int
+            The number of dimensions/axes in the array. Unlike specifying ``shape``,
+            there is no constraint on the size of each dimension.
+        shape: list[int]
+            Constrain the size of each dimension in the array
+
+    Returns:
+        DataType: A new array data type with the specified parameters.
 
     Examples:
-        ```python
-        # Create a three-dimensional array, where each dimension can have any number of elements
-        ds.add_column("col1", types.Array("int32", dimensions=3))
+        Create a three-dimensional array, where each dimension can have any number of elements::
 
-        # Create a three-dimensional array, where each dimension has a known size
-        ds.add_column("col2", types.Array(types.Float32(), shape=[50, 30, 768]))
-        ```
+            ds.add_column("col1", types.Array("int32", dimensions=3))
+        
+        Create a three-dimensional array, where each dimension has a known size::
+
+            ds.add_column("col2", types.Array(types.Float32(), shape=[50, 30, 768]))
     """
     ...
 
 def Bool() -> DataType:
     """
-    A boolean value
+    Creates a boolean value type.
+
+    Returns:
+        DataType: A new boolean data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Bool)
-        ds.add_column("col2", "bool")
-        ```
+        Create columns with boolean type::
+
+            ds.add_column("col1", types.Bool)
+            ds.add_column("col2", "bool")
     """
     ...
 
 def Text(index_type: str | TextIndexType | None = None) -> Type:
     """
-    Text data of arbitrary length.
-
-    Options for index_type are:
-
-    - [deeplake.types.Inverted][]
-    - [deeplake.types.BM25][]
+    Creates a text data type of arbitrary length.
 
     Parameters:
-        index_type: How to index the data in the column for faster searching. Default is `None` meaning "do not index"
+        index_type: str | TextIndexType | None
+            How to index the data in the column for faster searching.
+            Options are:
+            
+            - :class:`deeplake.types.Inverted`
+            - :class:`deeplake.types.BM25`
+            
+            Default is ``None`` meaning "do not index"
+
+    Returns:
+        Type: A new text data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Text)
-        ds.add_column("col2", "text")
-        ds.add_column("col3", str)
-        ds.add_column("col4", types.Text(index_type=types.Inverted))
-        ds.add_column("col4", types.Text(index_type=types.BM25))
-        ```
+        Create text columns with different configurations::
+
+            ds.add_column("col1", types.Text)
+            ds.add_column("col2", "text")
+            ds.add_column("col3", str)
+            ds.add_column("col4", types.Text(index_type=types.Inverted))
+            ds.add_column("col4", types.Text(index_type=types.BM25))
     """
     ...
 
 BM25: TextIndexType
 """
-A [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) based index of text data.
+A BM25-based index of text data.
 
-This index can be used with `BM25_SIMILARITY(column, 'search text')` in a TQL `ORDER BY` clause.
+This index can be used with ``BM25_SIMILARITY(column, 'search text')`` in a TQL ``ORDER BY`` clause.
+
+See Also:
+    `BM25 Algorithm <https://en.wikipedia.org/wiki/Okapi_BM25>`_
 """
 
 Inverted: TextIndexType
 """
 A text index that supports keyword lookup.
 
-This index can be used with `CONTAINS(column, 'wanted_value')`.
+This index can be used with ``CONTAINS(column, 'wanted_value')``.
 """
 
 def Dict() -> Type:
     """
-    Supports storing arbitrary key/value pairs in each row.
+    Creates a type that supports storing arbitrary key/value pairs in each row.
+
+    Returns:
+        Type: A new dictionary data type.
 
-    See [deeplake.types.Struct][] for a type that supports defining allowed keys.
+    See Also:
+        :func:`deeplake.types.Struct` for a type that supports defining allowed keys.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Dict)
+        Create and use a dictionary column::
 
-        ds.append([{"col1", {"a": 1, "b": 2}}])
-        ds.append([{"col1", {"b": 3, "c": 4}}])
-        ```
+            ds.add_column("col1", types.Dict)
+            ds.append([{"col1": {"a": 1, "b": 2}}])
+            ds.append([{"col1": {"b": 3, "c": 4}}])
     """
-
     ...
 
 def Embedding(
@@ -288,102 +401,135 @@ def Embedding(
     quantization: QuantizationType | None = None,
 ) -> Type:
     """
-    A single-dimensional embedding of a given length. See [deeplake.types.Array][] for a multidimensional array.
+    Creates a single-dimensional embedding of a given length.
 
     Parameters:
-        size: The size of the embedding
-        dtype: The datatype of the embedding. Defaults to float32
-        quantization: How to compress the embeddings in the index. Default uses no compression, but can be set to [deeplake.types.QuantizationType.Binary][]
+        size: int | None
+            The size of the embedding
+        dtype: DataType | str
+            The datatype of the embedding. Defaults to float32
+        quantization: QuantizationType | None
+            How to compress the embeddings in the index. Default uses no compression,
+            but can be set to :class:`deeplake.types.QuantizationType.Binary`
+
+    Returns:
+        Type: A new embedding data type.
+
+    See Also:
+        :func:`deeplake.types.Array` for a multidimensional array.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Embedding(768))
-        ds.add_column("col2", types.Embedding(768, quantization=types.QuantizationType.Binary))
-        ```
+        Create embedding columns::
+
+            ds.add_column("col1", types.Embedding(768))
+            ds.add_column("col2", types.Embedding(768, quantization=types.QuantizationType.Binary))
     """
     ...
 
 def Float32() -> DataType:
     """
-    A 32-bit float value
+    Creates a 32-bit float value type.
+
+    Returns:
+        DataType: A new 32-bit float data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Float)
-        ```
+        Create a column with 32-bit float type::
+
+            ds.add_column("col1", types.Float32)
     """
     ...
 
 def Float64() -> DataType:
     """
-    A 64-bit float value
+    Creates a 64-bit float value type.
+
+    Returns:
+        DataType: A new 64-bit float data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Float64)
-        ```
+        Create a column with 64-bit float type::
+
+            ds.add_column("col1", types.Float64)
     """
     ...
 
 def Int16() -> DataType:
     """
-    A 16-bit integer value
+    Creates a 16-bit integer value type.
+
+    Returns:
+        DataType: A new 16-bit integer data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Int16)
-        ```
+        Create a column with 16-bit integer type::
+
+            ds.add_column("col1", types.Int16)
     """
     ...
 
 def Int32() -> DataType:
     """
-    A 32-bit integer value
+    Creates a 32-bit integer value type.
+
+    Returns:
+        DataType: A new 32-bit integer data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Int32)
-        ```
+        Create a column with 32-bit integer type::
+
+            ds.add_column("col1", types.Int32)
     """
     ...
 
 def Int64() -> DataType:
     """
-    A 64-bit integer value
+    Creates a 64-bit integer value type.
+
+    Returns:
+        DataType: A new 64-bit integer data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Int64)
-        ```
+        Create a column with 64-bit integer type::
+
+            ds.add_column("col1", types.Int64)
     """
     ...
 
 def Int8() -> DataType:
     """
-    An 8-bit integer value
+    Creates an 8-bit integer value type.
+
+    Returns:
+        DataType: A new 8-bit integer data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Int8)
-        ```
+        Create a column with 8-bit integer type::
+
+            ds.add_column("col1", types.Int8)
     """
     ...
 
 def Sequence(nested_type: DataType | str | Type) -> Type:
     """
-    A sequence is a list of other data types, where there is a order to the values in the list.
+    Creates a sequence type that represents an ordered list of other data types.
 
-    For example, a video can be stored as a sequence of images to better capture the time-based ordering of the images rather than simply storing them as an Array
+    A sequence maintains the order of its values, making it suitable for time-series
+    data like videos (sequences of images).
 
     Parameters:
-        nested_type: The data type of the values in the sequence. Can be any data type, not just primitive types.
+        nested_type: DataType | str | Type
+            The data type of the values in the sequence. Can be any data type,
+            not just primitive types.
+
+    Returns:
+        Type: A new sequence data type.
 
     Examples:
-        ```python
-        ds.add_column("col1", types.Sequence(types.Image(sample_compression="jpeg")))
-        ```
+        Create a sequence of images::
+
+            ds.add_column("col1", types.Sequence(types.Image(sample_
     """
-    ...
 
 def Image(dtype: DataType | str = "uint8", sample_compression: str = "png") -> Type:
     """
@@ -427,8 +573,12 @@ def Link(type: Type) -> Type:
     """
     ...
 
-def Polygon() -> Type: ...
-def ClassLabel(dtype: DataType | str) -> Type: ...
+def Polygon() -> Type:
+    ...
+
+def ClassLabel(dtype: DataType | str) -> Type:
+    ...
+
 def BoundingBox(
     dtype: DataType | str = "float32",
     format: str | None = None,
@@ -507,7 +657,7 @@ def Struct(fields: dict[str, DataType | str]) -> DataType:
            "field1": types.Int16(),
            "field2": types.Text(),
         }))
-
+        
         ds.append([{"col1": {"field1": 3, "field2": "a"}}])
         print(ds[0]["col1"]["field1"]) # Output: 3
         ```