add black to python CI (#178)

Closes #48
2026-01-11 06:12:58 +00:00 · 2023-06-12 11:22:34 -07:00
parent 7bad676f30
commit 9b83ce3d2a
13 changed files with 86 additions and 51 deletions
--- a/python/lancedb/conftest.py
+++ b/python/lancedb/conftest.py
@@ -6,6 +6,7 @@ import pytest
 # import lancedb so we don't have to in every example
 import lancedb

+
@pytest.fixture(autouse=True)
 def doctest_setup(monkeypatch, tmpdir):
    # disable color for doctests so we don't have to include
@@ -15,6 +16,3 @@ def doctest_setup(monkeypatch, tmpdir):
    monkeypatch.setitem(os.environ, "COLUMNS", "80")
    # Work in a temporary directory
    monkeypatch.chdir(tmpdir)
-
-    
-    
--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -15,6 +15,7 @@ from __future__ import annotations
 import pandas as pd
 from .exceptions import MissingValueError, MissingColumnError

+
 def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
    """Create a Contextualizer object for the given DataFrame.

@@ -85,6 +86,7 @@ def contextualize(raw_df: pd.DataFrame) -> Contextualizer:

 class Contextualizer:
    """Create context windows from a DataFrame. See [lancedb.context.contextualize][]."""
+
    def __init__(self, raw_df):
        self._text_col = None
        self._groupby = None
@@ -144,12 +146,16 @@ class Contextualizer:
            raise MissingColumnError(self._text_col)

        if self._window is None or self._window < 1:
-            raise MissingValueError("The value of window is None or less than 1. Specify the "
-                "window size (number of rows to include in each window)")
+            raise MissingValueError(
+                "The value of window is None or less than 1. Specify the "
+                "window size (number of rows to include in each window)"
+            )

        if self._stride is None or self._stride < 1:
-            raise MissingValueError("The value of stride is None or less than 1. Specify the "
-                "stride (number of rows to skip between each window)")
+            raise MissingValueError(
+                "The value of stride is None or less than 1. Specify the "
+                "stride (number of rows to skip between each window)"
+            )

        def process_group(grp):
            # For each group, create the text rolling window
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -33,7 +33,7 @@ class LanceDBConnection:
    ----------
    uri: str or Path
        The root uri of the database.
-    
+
    Examples
    --------
    >>> import lancedb
@@ -79,16 +79,20 @@ class LanceDBConnection:
        try:
            filesystem, path = fs.FileSystem.from_uri(self.uri)
        except pa.ArrowInvalid:
-            raise NotImplementedError(
-                "Unsupported scheme: " + self.uri
-            )
+            raise NotImplementedError("Unsupported scheme: " + self.uri)

        try:
-            paths = filesystem.get_file_info(fs.FileSelector(get_uri_location(self.uri)))
+            paths = filesystem.get_file_info(
+                fs.FileSelector(get_uri_location(self.uri))
+            )
        except FileNotFoundError:
            # It is ok if the file does not exist since it will be created
            paths = []
-        tables = [os.path.splitext(file_info.base_name)[0] for file_info in paths if file_info.extension == 'lance']
+        tables = [
+            os.path.splitext(file_info.base_name)[0]
+            for file_info in paths
+            if file_info.extension == "lance"
+        ]
        return tables

    def __len__(self) -> int:
@@ -153,7 +157,7 @@ class LanceDBConnection:
        vector: [[[1.1,1.2],[0.2,1.8]]]
        lat: [[45.5,40.1]]
        long: [[-122.7,-74.1]]
-        
+
        You can also pass a pandas DataFrame:

        >>> import pandas as pd
@@ -175,7 +179,7 @@ class LanceDBConnection:
        lat: [[45.5,40.1]]
        long: [[-122.7,-74.1]]

-        Data is converted to Arrow before being written to disk. For maximum 
+        Data is converted to Arrow before being written to disk. For maximum
        control over how data is saved, either provide the PyArrow schema to
        convert to or else provide a PyArrow table directly.

--- a/python/lancedb/embeddings.py
+++ b/python/lancedb/embeddings.py
@@ -33,7 +33,7 @@ def with_embeddings(
    """Add a vector column to a table using the given embedding function.

    The new columns will be called "vector".
-    
+
    Parameters
    ----------
    func : Callable
@@ -48,7 +48,7 @@ def with_embeddings(
        Whether to show a progress bar.
    batch_size : int, default 1000
        The number of row values to pass to each call of the embedding function.
-    
+
    Returns
    -------
    pa.Table
--- a/python/lancedb/exceptions.py
+++ b/python/lancedb/exceptions.py
@@ -1,16 +1,22 @@
 """Custom exception handling"""

+
 class MissingValueError(ValueError):
    """Exception raised when a required value is missing."""
+
    pass

+
 class MissingColumnError(KeyError):
    """
-    Exception raised when a column name specified is not in 
+    Exception raised when a column name specified is not in
    the  DataFrame object
    """
+
    def __init__(self, column_name):
        self.column_name = column_name

    def __str__(self):
-        return f"Error: Column '{self.column_name}' does not exist in the DataFrame object"
+        return (
+            f"Error: Column '{self.column_name}' does not exist in the DataFrame object"
+        )
--- a/python/lancedb/fts.py
+++ b/python/lancedb/fts.py
@@ -68,7 +68,7 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) -
        The table to index
    fields : List[str]
        List of fields to index
-    
+
    Returns
    -------
    int
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -120,7 +120,7 @@ class LanceQueryBuilder:
    def nprobes(self, nprobes: int) -> LanceQueryBuilder:
        """Set the number of probes to use.

-        Higher values will yield better recall (more likely to find vectors if 
+        Higher values will yield better recall (more likely to find vectors if
        they exist) at the expense of latency.

        See discussion in [Querying an ANN Index][../querying-an-ann-index] for
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -99,7 +99,7 @@ class LanceTable:
    @property
    def schema(self) -> pa.Schema:
        """Return the schema of the table.
-        
+
        Returns
        -------
        pa.Schema
@@ -117,14 +117,14 @@ class LanceTable:

    def checkout(self, version: int):
        """Checkout a version of the table. This is an in-place operation.
-        
+
        This allows viewing previous versions of the table.
-        
+
        Parameters
        ----------
        version : int
            The version to checkout.
-        
+
        Examples
        --------
        >>> import lancedb
@@ -165,7 +165,7 @@ class LanceTable:

    def to_pandas(self) -> pd.DataFrame:
        """Return the table as a pandas DataFrame.
-        
+
        Returns
        -------
        pd.DataFrame
@@ -174,7 +174,7 @@ class LanceTable:

    def to_arrow(self) -> pa.Table:
        """Return the table as a pyarrow Table.
-        
+
        Returns
        -------
        pa.Table"""
@@ -342,4 +342,6 @@ def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table
        values = values.cast(pa.float32())
    list_size = len(values) / len(data)
    vec_arr = pa.FixedSizeListArray.from_arrays(values, list_size)
-    return data.set_column(data.column_names.index(vector_column_name), vector_column_name, vec_arr)
+    return data.set_column(
+        data.column_names.index(vector_column_name), vector_column_name, vec_arr
+    )