Error implementations (#232)

Solves #216 by adding a check on table open for existence of the
`.lance` file. Does not check for it for remote connections.
This commit is contained in:
Leon Yee
2023-06-27 16:48:31 -07:00
committed by GitHub
parent 4bc676e26a
commit eb5bcda337
7 changed files with 113 additions and 8 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
*.egg-info
**/__pycache__
.DS_Store
venv
rust/target
rust/Cargo.lock

View File

@@ -79,10 +79,7 @@ def qanda_langchain(query):
download_docs()
docs = store_docs()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200,)
documents = text_splitter.split_documents(docs)
embeddings = OpenAIEmbeddings()

View File

@@ -16,7 +16,7 @@ npm install vectordb
const lancedb = require('vectordb');
const db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>');
const table = await db.openTable('my_table');
const query = await table.search([0.1, 0.3]).limit(20).execute();
const results = await table.search([0.1, 0.3]).limit(20).execute();
console.log(results);
```

85
python/README.md Normal file
View File

@@ -0,0 +1,85 @@
# LanceDB
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
## Installation
```bash
pip install lancedb
```
## Usage
### Basic Example
```python
import lancedb
db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>')
table = db.open_table('my_table')
results = table.search([0.1, 0.3]).limit(20).to_df()
print(results)
```
## Development
Create a virtual environment and activate it:
```bash
python -m venv venv
. ./venv/bin/activate
```
Install the necessary packages:
```bash
python -m pip install .
```
To run the unit tests:
```bash
pytest
```
To run linter and automatically fix all errors:
```bash
black .
isort .
```
If any packages are missing, install them with:
```bash
pip install <PACKAGE_NAME>
```
___
For **Windows** users, there may be errors when installing packages, so these commands may be helpful:
Activate the virtual environment:
```bash
. .\venv\Scripts\activate
```
You may need to run the installs separately:
```bash
pip install -e .[tests]
pip install -e .[dev]
```
`tantivy` requires `rust` to be installed, so install it with `conda`, as it doesn't support windows installation:
```bash
pip install wheel
pip install cargo
conda install rust
pip install tantivy
```
To run the unit tests:
```bash
pytest
```

View File

@@ -252,7 +252,7 @@ class LanceDBConnection:
if data is not None:
tbl = LanceTable.create(self, name, data, schema, mode=mode)
else:
tbl = LanceTable(self, name)
tbl = LanceTable.open(self, name)
return tbl
def open_table(self, name: str) -> LanceTable:
@@ -267,7 +267,7 @@ class LanceDBConnection:
-------
A LanceTable object representing the table.
"""
return LanceTable(self, name)
return LanceTable.open(self, name)
def drop_table(self, name: str):
"""Drop a table from the database.

View File

@@ -308,6 +308,19 @@ class LanceTable:
lance.write_dataset(data, tbl._dataset_uri, mode=mode)
return tbl
@classmethod
def open(cls, db, name):
tbl = cls(db, name)
if tbl._conn.is_managed_remote:
# Not completely sure how to check for remote table existence yet.
return tbl
if not os.path.exists(tbl._dataset_uri):
raise FileNotFoundError(
f"Table {name} does not exist. Please first call db.create_table({name}, data)"
)
return tbl
def delete(self, where: str):
"""Delete rows from the table.

View File

@@ -123,6 +123,15 @@ def test_delete_table(tmp_path):
assert db.table_names() == ["test"]
def test_empty_or_nonexistent_table(tmp_path):
db = lancedb.connect(tmp_path)
with pytest.raises(Exception):
db.create_table("test_with_no_data")
with pytest.raises(Exception):
db.open_table("does_not_exist")
def test_replace_index(tmp_path):
db = lancedb.connect(uri=tmp_path)
table = db.create_table(