mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-03 02:12:56 +00:00
feat(python): async-sync feature parity on Table (#1914)
### Changes to sync API * Updated `LanceTable` and `LanceDBConnection` reprs * Add `storage_options`, `data_storage_version`, and `enable_v2_manifest_paths` to sync create table API. * Add `storage_options` to `open_table` in sync API. * Add `list_indices()` and `index_stats()` to sync API * `create_table()` will now create only 1 version when data is passed. Previously it would always create two versions: 1 to create an empty table and 1 to add data to it. ### Changes to async API * Add `embedding_functions` to async `create_table()` API. * Added `head()` to async API ### Refactors * Refactor index parameters into dataclasses so they are easier to use from Python * Moved most tests to use an in-memory DB so we don't need to create so many temp directories Closes #1792 Closes #1932 --------- Co-authored-by: Weston Pace <weston.pace@gmail.com>
This commit is contained in:
@@ -15,10 +15,12 @@ import random
|
||||
from unittest import mock
|
||||
|
||||
import lancedb as ldb
|
||||
from lancedb.db import DBConnection
|
||||
from lancedb.index import FTS
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from utils import exception_output
|
||||
|
||||
pytest.importorskip("lancedb.fts")
|
||||
tantivy = pytest.importorskip("tantivy")
|
||||
@@ -458,3 +460,44 @@ def test_syntax(table):
|
||||
table.search('the cats OR dogs were not really "pets" at all').phrase_query().limit(
|
||||
10
|
||||
).to_list()
|
||||
|
||||
|
||||
def test_language(mem_db: DBConnection):
|
||||
sentences = [
|
||||
"Il n'y a que trois routes qui traversent la ville.",
|
||||
"Je veux prendre la route vers l'est.",
|
||||
"Je te retrouve au café au bout de la route.",
|
||||
]
|
||||
data = [{"text": s} for s in sentences]
|
||||
table = mem_db.create_table("test", data=data)
|
||||
|
||||
with pytest.raises(ValueError) as e:
|
||||
table.create_fts_index("text", use_tantivy=False, language="klingon")
|
||||
|
||||
assert exception_output(e) == (
|
||||
"ValueError: LanceDB does not support the requested language: 'klingon'\n"
|
||||
"Supported languages: Arabic, Danish, Dutch, English, Finnish, French, "
|
||||
"German, Greek, Hungarian, Italian, Norwegian, Portuguese, Romanian, "
|
||||
"Russian, Spanish, Swedish, Tamil, Turkish"
|
||||
)
|
||||
|
||||
table.create_fts_index(
|
||||
"text",
|
||||
use_tantivy=False,
|
||||
language="French",
|
||||
stem=True,
|
||||
ascii_folding=True,
|
||||
remove_stop_words=True,
|
||||
)
|
||||
|
||||
# Can get "routes" and "route" from the same root
|
||||
results = table.search("route", query_type="fts").limit(5).to_list()
|
||||
assert len(results) == 3
|
||||
|
||||
# Can find "café", without needing to provide accent
|
||||
results = table.search("cafe", query_type="fts").limit(5).to_list()
|
||||
assert len(results) == 1
|
||||
|
||||
# Stop words -> no results
|
||||
results = table.search("la", query_type="fts").limit(5).to_list()
|
||||
assert len(results) == 0
|
||||
|
||||
Reference in New Issue
Block a user