diff --git a/docs/src/basic.md b/docs/src/basic.md index f4ee7bd7..328c2dfb 100644 --- a/docs/src/basic.md +++ b/docs/src/basic.md @@ -79,6 +79,18 @@ We'll cover the basics of using LanceDB on your local machine in this section. ??? info "Under the hood, LanceDB is converting the input data into an Apache Arrow table and persisting it to disk in [Lance format](https://www.github.com/lancedb/lance)." +### Creating an empty table + +Sometimes you may not have the data to insert into the table at creation time. +In this case, you can create an empty table and specify the schema. + +=== "Python" + ```python + import pyarrow as pa + schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))]) + tbl = db.create_table("empty_table", schema=schema) + ``` + ## How to open an existing table Once created, you can open a table using the following code: @@ -122,6 +134,22 @@ After a table has been created, you can always add more data to it using {vector: [9.5, 56.2], item: "buzz", price: 200.0}]) ``` +## How to search for (approximate) nearest neighbors + +Once you've embedded the query, you can find its nearest neighbors using the following code: + +=== "Python" + ```python + tbl.search([100, 100]).limit(2).to_df() + ``` + + This returns a pandas DataFrame with the results. + +=== "Javascript" + ```javascript + const query = await tbl.search([100, 100]).limit(2).execute(); + ``` + ## How to delete rows from a table Use the `delete()` method on tables to delete rows from a table. To choose @@ -151,21 +179,19 @@ To see what expressions are supported, see the [SQL filters](sql.md) section. Read more: [vectordb.Table.delete](javascript/interfaces/Table.md#delete) -## How to search for (approximate) nearest neighbors +## How to remove a table -Once you've embedded the query, you can find its nearest neighbors using the following code: +Use the `drop_table()` method on the database to remove a table. === "Python" ```python - tbl.search([100, 100]).limit(2).to_df() + db.drop_table("my_table") ``` - This returns a pandas DataFrame with the results. +This permanently removes the table and is not recoverable, unlike deleting rows. +By default, if the table does not exist an exception is raised. To suppress this, +you can pass in `ignore_missing=True`. -=== "Javascript" - ```javascript - const query = await tbl.search([100, 100]).limit(2).execute(); - ``` ## What's next diff --git a/python/lancedb/db.py b/python/lancedb/db.py index e87ea533..53e992b0 100644 --- a/python/lancedb/db.py +++ b/python/lancedb/db.py @@ -22,6 +22,7 @@ import pyarrow as pa from pyarrow import fs from .common import DATA, URI +from .pydantic import LanceModel from .table import LanceTable, Table from .util import fs_from_uri, get_uri_location, get_uri_scheme @@ -39,7 +40,7 @@ class DBConnection(ABC): self, name: str, data: Optional[DATA] = None, - schema: Optional[pa.Schema] = None, + schema: Optional[pa.Schema, LanceModel] = None, mode: str = "create", on_bad_vectors: str = "error", fill_value: float = 0.0, @@ -52,7 +53,7 @@ class DBConnection(ABC): The name of the table. data: list, tuple, dict, pd.DataFrame; optional The data to initialize the table. User must provide at least one of `data` or `schema`. - schema: pyarrow.Schema; optional + schema: pyarrow.Schema or LanceModel; optional The schema of the table. mode: str; default "create" The mode to use when creating the table. Can be either "create" or "overwrite". @@ -277,7 +278,7 @@ class LanceDBConnection(DBConnection): self, name: str, data: Optional[DATA] = None, - schema: pa.Schema = None, + schema: Optional[pa.Schema, LanceModel] = None, mode: str = "create", on_bad_vectors: str = "error", fill_value: float = 0.0, diff --git a/python/lancedb/table.py b/python/lancedb/table.py index 66ed83bc..63f60689 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -13,6 +13,7 @@ from __future__ import annotations +import inspect import os from abc import ABC, abstractmethod from functools import cached_property @@ -506,7 +507,7 @@ class LanceTable(Table): data: list-of-dict, dict, pd.DataFrame, default None The data to insert into the table. At least one of `data` or `schema` must be provided. - schema: dict, optional + schema: pa.Schema or LanceModel, optional The schema of the table. If not provided, the schema is inferred from the data. At least one of `data` or `schema` must be provided. mode: str, default "create" @@ -519,6 +520,8 @@ class LanceTable(Table): The value to use when filling vectors. Only used if on_bad_vectors="fill". """ tbl = LanceTable(db, name) + if inspect.isclass(schema) and issubclass(schema, LanceModel): + schema = schema.to_arrow_schema() if data is not None: data = _sanitize_data( data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value diff --git a/python/tests/test_db.py b/python/tests/test_db.py index 8e71e43d..3d154173 100644 --- a/python/tests/test_db.py +++ b/python/tests/test_db.py @@ -17,6 +17,7 @@ import pyarrow as pa import pytest import lancedb +from lancedb.pydantic import LanceModel def test_basic(tmp_path): @@ -167,8 +168,14 @@ def test_empty_or_nonexistent_table(tmp_path): with pytest.raises(Exception): db.open_table("does_not_exist") - schema = pa.schema([pa.field("a", pa.int32())]) - db.create_table("test", schema=schema) + schema = pa.schema([pa.field("a", pa.int64(), nullable=False)]) + test = db.create_table("test", schema=schema) + + class TestModel(LanceModel): + a: int + + test2 = db.create_table("test2", schema=TestModel) + assert test.schema == test2.schema def test_replace_index(tmp_path):