diff --git a/docs/src/python/python.md b/docs/src/python/python.md
index 5531870f..4cc3b5ce 100644
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -22,8 +22,6 @@ pip install lancedb
 
 ::: lancedb.query.LanceQueryBuilder
 
-::: lancedb.query.LanceFtsQueryBuilder
-
 ## Embeddings
 
 ::: lancedb.embeddings.registry.EmbeddingFunctionRegistry
@@ -56,7 +54,7 @@ pip install lancedb
 
 ## Utilities
 
-::: lancedb.vector
+::: lancedb.schema.vector
 
 ## Integrations
 
diff --git a/python/lancedb/context.py b/python/lancedb/context.py
index 73800d02..02051614 100644
--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -84,7 +84,9 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
     context windows that don't cross document boundaries. In this case, we can
     pass ``document_id`` as the group by.
 
-    >>> contextualize(data).window(4).stride(2).text_col('token').groupby('document_id').to_pandas()
+    >>> (contextualize(data)
+    ...     .window(4).stride(2).text_col('token').groupby('document_id')
+    ...     .to_pandas())
                        token  document_id
     0    The quick brown fox            1
     2  brown fox jumped over            1
@@ -92,18 +94,24 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
     6           the lazy dog            1
     9      I love sandwiches            2
 
-    ``min_window_size`` determines the minimum size of the  context windows that are generated
-    This can be used to trim the last few context windows which have size less than
-    ``min_window_size``. By default context windows of size 1 are skipped.
+    ``min_window_size`` determines the minimum size of the context windows
+    that are generated.This can be used to trim the last few context windows
+    which have size less than ``min_window_size``.
+    By default context windows of size 1 are skipped.
 
-    >>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_pandas()
+    >>> (contextualize(data)
+    ...     .window(6).stride(3).text_col('token').groupby('document_id')
+    ...     .to_pandas())
                                  token  document_id
     0  The quick brown fox jumped over            1
     3     fox jumped over the lazy dog            1
     6                     the lazy dog            1
     9                I love sandwiches            2
 
-    >>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_pandas()
+    >>> (contextualize(data)
+    ...     .window(6).stride(3).min_window_size(4).text_col('token')
+    ...     .groupby('document_id')
+    ...     .to_pandas())
                                  token  document_id
     0  The quick brown fox jumped over            1
     3     fox jumped over the lazy dog            1
@@ -113,7 +121,9 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
 
 
 class Contextualizer:
-    """Create context windows from a DataFrame. See [lancedb.context.contextualize][]."""
+    """Create context windows from a DataFrame.
+    See [lancedb.context.contextualize][].
+    """
 
     def __init__(self, raw_df):
         self._text_col = None
@@ -183,7 +193,7 @@ class Contextualizer:
         deprecated_in="0.3.1",
         removed_in="0.4.0",
         current_version=__version__,
-        details="Use the bar function instead",
+        details="Use to_pandas() instead",
     )
     def to_df(self) -> "pd.DataFrame":
         return self.to_pandas()
diff --git a/python/lancedb/db.py b/python/lancedb/db.py
index 6cdbce33..f03e9614 100644
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -52,12 +52,24 @@ class DBConnection(ABC):
         ----------
         name: str
             The name of the table.
-        data: list, tuple, dict, pd.DataFrame; optional
-            The data to initialize the table. User must provide at least one of `data` or `schema`.
-        schema: pyarrow.Schema or LanceModel; optional
-            The schema of the table.
+        data: The data to initialize the table, *optional*
+            User must provide at least one of `data` or `schema`.
+            Acceptable types are:
+
+            - dict or list-of-dict
+
+            - pandas.DataFrame
+
+            - pyarrow.Table or pyarrow.RecordBatch
+        schema: The schema of the table, *optional*
+            Acceptable types are:
+
+            - pyarrow.Schema
+
+            - [LanceModel][lancedb.pydantic.LanceModel]
         mode: str; default "create"
-            The mode to use when creating the table. Can be either "create" or "overwrite".
+            The mode to use when creating the table.
+            Can be either "create" or "overwrite".
             By default, if the table already exists, an exception is raised.
             If you want to overwrite the table, use mode="overwrite".
         on_bad_vectors: str, default "error"
@@ -150,7 +162,8 @@ class DBConnection(ABC):
         ...     for i in range(5):
         ...         yield pa.RecordBatch.from_arrays(
         ...             [
-        ...                 pa.array([[3.1, 4.1], [5.9, 26.5]], pa.list_(pa.float32(), 2)),
+        ...                 pa.array([[3.1, 4.1], [5.9, 26.5]],
+        ...                     pa.list_(pa.float32(), 2)),
         ...                 pa.array(["foo", "bar"]),
         ...                 pa.array([10.0, 20.0]),
         ...             ],
diff --git a/python/lancedb/query.py b/python/lancedb/query.py
index f8162efc..7fb31af1 100644
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -30,7 +30,40 @@ pd = safe_import_pandas()
 
 
 class Query(pydantic.BaseModel):
-    """A Query"""
+    """The LanceDB Query
+
+    Attributes
+    ----------
+    vector : List[float]
+        the vector to search for
+    filter : Optional[str]
+        sql filter to refine the query with, optional
+    prefilter : bool
+        if True then apply the filter before vector search
+    k : int
+        top k results to return
+    metric : str
+        the distance metric between a pair of vectors,
+
+        can support L2 (default), Cosine and Dot.
+        [metric definitions][search]
+    columns : Optional[List[str]]
+        which columns to return in the results
+    nprobes : int
+        The number of probes used - optional
+
+        - A higher number makes search more accurate but also slower.
+
+        - See discussion in [Querying an ANN Index][querying-an-ann-index] for
+          tuning advice.
+    refine_factor : Optional[int]
+        Refine the results by reading extra elements and re-ranking them in memory - optional
+
+        - A higher number makes search more accurate but also slower.
+
+        - See discussion in [Querying an ANN Index][querying-an-ann-index] for
+          tuning advice.
+    """
 
     vector_column: str = VECTOR_COLUMN_NAME
 
@@ -61,6 +94,10 @@ class Query(pydantic.BaseModel):
 
 
 class LanceQueryBuilder(ABC):
+    """Build LanceDB query based on specific query type:
+    vector or full text search.
+    """
+
     @classmethod
     def create(
         cls,
@@ -133,11 +170,11 @@ class LanceQueryBuilder(ABC):
         deprecated_in="0.3.1",
         removed_in="0.4.0",
         current_version=__version__,
-        details="Use the bar function instead",
+        details="Use to_pandas() instead",
     )
     def to_df(self) -> "pd.DataFrame":
         """
-        Deprecated alias for `to_pandas()`. Please use `to_pandas()` instead.
+        *Deprecated alias for `to_pandas()`. Please use `to_pandas()` instead.*
 
         Execute the query and return the results as a pandas DataFrame.
         In addition to the selected columns, LanceDB also returns a vector
@@ -253,8 +290,6 @@ class LanceQueryBuilder(ABC):
 
 class LanceVectorQueryBuilder(LanceQueryBuilder):
     """
-    A builder for nearest neighbor queries for LanceDB.
-
     Examples
     --------
     >>> import lancedb
@@ -310,7 +345,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
         Higher values will yield better recall (more likely to find vectors if
         they exist) at the expense of latency.
 
-        See discussion in [Querying an ANN Index][../querying-an-ann-index] for
+        See discussion in [Querying an ANN Index][querying-an-ann-index] for
         tuning advice.
 
         Parameters
@@ -397,6 +432,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
 
 
 class LanceFtsQueryBuilder(LanceQueryBuilder):
+    """A builder for full text search for LanceDB."""
+
     def __init__(self, table: "lancedb.table.Table", query: str):
         super().__init__(table)
         self._query = query
diff --git a/python/lancedb/remote/db.py b/python/lancedb/remote/db.py
index f087ddc5..6b018062 100644
--- a/python/lancedb/remote/db.py
+++ b/python/lancedb/remote/db.py
@@ -104,7 +104,11 @@ class RemoteDBConnection(DBConnection):
             raise ValueError("Either data or schema must be provided.")
         if data is not None:
             data = _sanitize_data(
-                data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
+                data,
+                schema,
+                metadata=None,
+                on_bad_vectors=on_bad_vectors,
+                fill_value=fill_value,
             )
         else:
             if schema is None:
diff --git a/python/lancedb/table.py b/python/lancedb/table.py
index a28b12f0..7d9717d9 100644
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -149,13 +149,13 @@ class Table(ABC):
     @property
     @abstractmethod
     def schema(self) -> pa.Schema:
-        """The [Arrow Schema](https://arrow.apache.org/docs/python/api/datatypes.html#) of
-        this Table
+        """The [Arrow Schema](https://arrow.apache.org/docs/python/api/datatypes.html#)
+        of this Table
 
         """
         raise NotImplementedError
 
-    def to_pandas(self):
+    def to_pandas(self) -> "pd.DataFrame":
         """Return the table as a pandas DataFrame.
 
         Returns
@@ -191,17 +191,18 @@ class Table(ABC):
             The distance metric to use when creating the index.
             Valid values are "L2", "cosine", or "dot".
             L2 is euclidean distance.
-        num_partitions: int
+        num_partitions: int, default 256
             The number of IVF partitions to use when creating the index.
             Default is 256.
-        num_sub_vectors: int
+        num_sub_vectors: int, default 96
             The number of PQ sub-vectors to use when creating the index.
             Default is 96.
         vector_column_name: str, default "vector"
             The vector column name to create the index.
         replace: bool, default True
-            If True, replace the existing index if it exists.
-            If False, raise an error if duplicate index exists.
+            - If True, replace the existing index if it exists.
+
+            - If False, raise an error if duplicate index exists.
         accelerator: str, default None
             If set, use the given accelerator to create the index.
             Only support "cuda" for now.
@@ -220,8 +221,14 @@ class Table(ABC):
 
         Parameters
         ----------
-        data: list-of-dict, dict, pd.DataFrame
-            The data to insert into the table.
+        data: DATA
+            The data to insert into the table. Acceptable types are:
+
+            - dict or list-of-dict
+
+            - pandas.DataFrame
+
+            - pyarrow.Table or pyarrow.RecordBatch
         mode: str
             The mode to use when writing the data. Valid values are
             "append" and "overwrite".
@@ -242,31 +249,70 @@ class Table(ABC):
         query_type: str = "auto",
     ) -> LanceQueryBuilder:
         """Create a search query to find the nearest neighbors
-        of the given query vector.
+        of the given query vector. We currently support [vector search][search]
+        and [full-text search][experimental-full-text-search].
+
+        All query options are defined in [Query][lancedb.query.Query].
+
+        Examples
+        --------
+        >>> import lancedb
+        >>> db = lancedb.connect("./.lancedb")
+        >>> data = [
+        ...    {"original_width": 100, "caption": "bar", "vector": [0.1, 2.3, 4.5]},
+        ...    {"original_width": 2000, "caption": "foo",  "vector": [0.5, 3.4, 1.3]},
+        ...    {"original_width": 3000, "caption": "test", "vector": [0.3, 6.2, 2.6]}
+        ... ]
+        >>> table = db.create_table("my_table", data)
+        >>> query = [0.4, 1.4, 2.4]
+        >>> (table.search(query, vector_column_name="vector")
+        ...     .where("original_width > 1000", prefilter=True)
+        ...     .select(["caption", "original_width"])
+        ...     .limit(2)
+        ...     .to_pandas())
+          caption  original_width           vector  _distance
+        0     foo            2000  [0.5, 3.4, 1.3]   5.220000
+        1    test            3000  [0.3, 6.2, 2.6]  23.089996
 
         Parameters
         ----------
-        query: str, list, np.ndarray, PIL.Image.Image, default None
-            The query to search for. If None then
-            the select/where/limit clauses are applied to filter
+        query: list/np.ndarray/str/PIL.Image.Image, default None
+            The targetted vector to search for.
+
+            - *default None*.
+            Acceptable types are: list, np.ndarray, PIL.Image.Image
+
+            - If None then the select/where/limit clauses are applied to filter
             the table
-        vector_column_name: str, default "vector"
+        vector_column_name: str
             The name of the vector column to search.
-        query_type: str, default "auto"
-            "vector", "fts", or "auto"
-            If "auto" then the query type is inferred from the query;
-            If `query` is a list/np.ndarray then the query type is "vector";
-            If `query` is a PIL.Image.Image then either do vector search
-            or raise an error if no corresponding embedding function is found.
-            If `query` is a string, then the query type is "vector" if the
+            *default "vector"*
+        query_type: str
+            *default "auto"*.
+            Acceptable types are: "vector", "fts", or "auto"
+
+            - If "auto" then the query type is inferred from the query;
+
+                - If `query` is a list/np.ndarray then the query type is
+                "vector";
+
+                - If `query` is a PIL.Image.Image then either do vector search,
+                or raise an error if no corresponding embedding function is found.
+
+            - If `query` is a string, then the query type is "vector" if the
             table has embedding functions else the query type is "fts"
 
         Returns
         -------
         LanceQueryBuilder
             A query builder object representing the query.
-            Once executed, the query returns selected columns, the vector,
-            and also the "_distance" column which is the distance between the query
+            Once executed, the query returns
+
+            - selected columns
+
+            - the vector
+
+            - and also the "_distance" column which is the distance between the query
             vector and the returned vector.
         """
         raise NotImplementedError
@@ -285,14 +331,19 @@ class Table(ABC):
         Parameters
         ----------
         where: str
-            The SQL where clause to use when deleting rows. For example, 'x = 2'
-            or 'x IN (1, 2, 3)'. The filter must not be empty, or it will error.
+            The SQL where clause to use when deleting rows.
+
+            - For example, 'x = 2' or 'x IN (1, 2, 3)'.
+
+            The filter must not be empty, or it will error.
 
         Examples
         --------
         >>> import lancedb
         >>> data = [
-        ...   {"x": 1, "vector": [1, 2]}, {"x": 2, "vector": [3, 4]}, {"x": 3, "vector": [5, 6]}
+        ...    {"x": 1, "vector": [1, 2]},
+        ...    {"x": 2, "vector": [3, 4]},
+        ...    {"x": 3, "vector": [5, 6]}
         ... ]
         >>> db = lancedb.connect("./.lancedb")
         >>> table = db.create_table("my_table", data)
@@ -377,7 +428,8 @@ class LanceTable(Table):
         --------
         >>> import lancedb
         >>> db = lancedb.connect("./.lancedb")
-        >>> table = db.create_table("my_table", [{"vector": [1.1, 0.9], "type": "vector"}])
+        >>> table = db.create_table("my_table",
+        ...    [{"vector": [1.1, 0.9], "type": "vector"}])
         >>> table.version
         2
         >>> table.to_pandas()
@@ -424,7 +476,8 @@ class LanceTable(Table):
         --------
         >>> import lancedb
         >>> db = lancedb.connect("./.lancedb")
-        >>> table = db.create_table("my_table", [{"vector": [1.1, 0.9], "type": "vector"}])
+        >>> table = db.create_table("my_table", [
+        ...     {"vector": [1.1, 0.9], "type": "vector"}])
         >>> table.version
         2
         >>> table.to_pandas()
@@ -669,14 +722,39 @@ class LanceTable(Table):
         query_type: str = "auto",
     ) -> LanceQueryBuilder:
         """Create a search query to find the nearest neighbors
-        of the given query vector.
+        of the given query vector. We currently support [vector search][search]
+        and [full-text search][search].
+
+        Examples
+        --------
+        >>> import lancedb
+        >>> db = lancedb.connect("./.lancedb")
+        >>> data = [
+        ...    {"original_width": 100, "caption": "bar", "vector": [0.1, 2.3, 4.5]},
+        ...    {"original_width": 2000, "caption": "foo",  "vector": [0.5, 3.4, 1.3]},
+        ...    {"original_width": 3000, "caption": "test", "vector": [0.3, 6.2, 2.6]}
+        ... ]
+        >>> table = db.create_table("my_table", data)
+        >>> query = [0.4, 1.4, 2.4]
+        >>> (table.search(query, vector_column_name="vector")
+        ...     .where("original_width > 1000", prefilter=True)
+        ...     .select(["caption", "original_width"])
+        ...     .limit(2)
+        ...     .to_pandas())
+          caption  original_width           vector  _distance
+        0     foo            2000  [0.5, 3.4, 1.3]   5.220000
+        1    test            3000  [0.3, 6.2, 2.6]  23.089996
 
         Parameters
         ----------
-        query: str, list, np.ndarray, a PIL Image or None
-            The query to search for. If None then
-            the select/where/limit clauses are applied to filter
-            the table
+        query: list/np.ndarray/str/PIL.Image.Image, default None
+            The targetted vector to search for.
+
+            - *default None*.
+            Acceptable types are: list, np.ndarray, PIL.Image.Image
+
+            - If None then the select/[where][sql]/limit clauses are applied
+            to filter the table
         vector_column_name: str, default "vector"
             The name of the vector column to search.
         query_type: str, default "auto"
@@ -685,7 +763,7 @@ class LanceTable(Table):
             If `query` is a list/np.ndarray then the query type is "vector";
             If `query` is a PIL.Image.Image then either do vector search
             or raise an error if no corresponding embedding function is found.
-            If the query is a string, then the query type is "vector" if the
+            If the `query` is a string, then the query type is "vector" if the
             table has embedding functions, else the query type is "fts"
 
         Returns
@@ -720,7 +798,9 @@ class LanceTable(Table):
         --------
         >>> import lancedb
         >>> data = [
-        ...   {"x": 1, "vector": [1, 2]}, {"x": 2, "vector": [3, 4]}, {"x": 3, "vector": [5, 6]}
+        ...    {"x": 1, "vector": [1, 2]},
+        ...    {"x": 2, "vector": [3, 4]},
+        ...    {"x": 3, "vector": [5, 6]}
         ... ]
         >>> db = lancedb.connect("./.lancedb")
         >>> table = db.create_table("my_table", data)
@@ -740,7 +820,8 @@ class LanceTable(Table):
             The data to insert into the table.
             At least one of `data` or `schema` must be provided.
         schema: pa.Schema or LanceModel, optional
-            The schema of the table. If not provided, the schema is inferred from the data.
+            The schema of the table. If not provided,
+            the schema is inferred from the data.
             At least one of `data` or `schema` must be provided.
         mode: str, default "create"
             The mode to use when writing the data. Valid values are
@@ -811,7 +892,8 @@ class LanceTable(Table):
         file_info = fs.get_file_info(path)
         if file_info.type != pa.fs.FileType.Directory:
             raise FileNotFoundError(
-                f"Table {name} does not exist. Please first call db.create_table({name}, data)"
+                f"Table {name} does not exist."
+                f"Please first call db.create_table({name}, data)"
             )
         return tbl
 
@@ -838,7 +920,9 @@ class LanceTable(Table):
         --------
         >>> import lancedb
         >>> data = [
-        ...   {"x": 1, "vector": [1, 2]}, {"x": 2, "vector": [3, 4]}, {"x": 3, "vector": [5, 6]}
+        ...    {"x": 1, "vector": [1, 2]},
+        ...    {"x": 2, "vector": [3, 4]},
+        ...    {"x": 3, "vector": [5, 6]}
         ... ]
         >>> db = lancedb.connect("./.lancedb")
         >>> table = db.create_table("my_table", data)
@@ -1013,7 +1097,8 @@ def _sanitize_vector_column(
     # ChunkedArray is annoying to work with, so we combine chunks here
     vec_arr = data[vector_column_name].combine_chunks()
     if pa.types.is_list(data[vector_column_name].type):
-        # if it's a variable size list array we make sure the dimensions are all the same
+        # if it's a variable size list array,
+        # we make sure the dimensions are all the same
         has_jagged_ndims = len(vec_arr.values) % len(data) != 0
         if has_jagged_ndims:
             data = _sanitize_jagged(