This commit is contained in:
Chang She
2023-03-22 18:29:07 -07:00
parent 1f42104c77
commit 5ef5141812
6 changed files with 63 additions and 43 deletions

View File

@@ -53,8 +53,9 @@ class LanceDBConnection:
def __getitem__(self, name: str) -> LanceTable:
return self.open_table(name)
def create_table(self, name: str, data: DATA = None,
schema: pa.Schema = None) -> LanceTable:
def create_table(
self, name: str, data: DATA = None, schema: pa.Schema = None
) -> LanceTable:
"""Create a table in the database.
Parameters

View File

@@ -76,17 +76,12 @@ class LanceQueryBuilder:
return self
def to_df(self) -> pd.DataFrame:
"""Execute the query and return the results as a pandas DataFrame.
"""
"""Execute the query and return the results as a pandas DataFrame."""
ds = self._table.to_lance()
# TODO indexed search
tbl = ds.to_table(
columns=self._columns,
filter=self._where,
nearest={
"column": VECTOR_COLUMN_NAME,
"q": self._query,
"k": self._limit
}
nearest={"column": VECTOR_COLUMN_NAME, "q": self._query, "k": self._limit},
)
return tbl.to_pandas()

View File

@@ -131,8 +131,9 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
return data
# cast the columns to the expected types
data = data.combine_chunks()
return pa.Table.from_arrays([data[name] for name in schema.names],
schema=schema)
return pa.Table.from_arrays(
[data[name] for name in schema.names], schema=schema
)
# just check the vector column
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)