mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-05 03:12:57 +00:00
Compare commits
2 Commits
docs/mcp
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
27d9e5c596 | ||
|
|
ec8271931f |
34
Cargo.lock
generated
34
Cargo.lock
generated
@@ -2720,7 +2720,7 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "fsst"
|
name = "fsst"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
@@ -3708,7 +3708,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance"
|
name = "lance"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -3768,7 +3768,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-arrow"
|
name = "lance-arrow"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -3786,7 +3786,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-core"
|
name = "lance-core"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-buffer",
|
"arrow-buffer",
|
||||||
@@ -3823,7 +3823,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datafusion"
|
name = "lance-datafusion"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3851,7 +3851,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-datagen"
|
name = "lance-datagen"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3867,7 +3867,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-encoding"
|
name = "lance-encoding"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayref",
|
"arrayref",
|
||||||
"arrow",
|
"arrow",
|
||||||
@@ -3907,7 +3907,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-file"
|
name = "lance-file"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3942,7 +3942,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-index"
|
name = "lance-index"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -3996,7 +3996,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-io"
|
name = "lance-io"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-arith",
|
"arrow-arith",
|
||||||
@@ -4035,7 +4035,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-linalg"
|
name = "lance-linalg"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ord",
|
"arrow-ord",
|
||||||
@@ -4059,7 +4059,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-table"
|
name = "lance-table"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4099,7 +4099,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lance-testing"
|
name = "lance-testing"
|
||||||
version = "0.25.3"
|
version = "0.25.3"
|
||||||
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
|
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-schema",
|
"arrow-schema",
|
||||||
@@ -4110,7 +4110,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.19.0-beta.4"
|
version = "0.19.0-beta.5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4197,7 +4197,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.19.0-beta.4"
|
version = "0.19.0-beta.5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4222,7 +4222,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.19.0-beta.4"
|
version = "0.19.0-beta.5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4240,7 +4240,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.22.0-beta.4"
|
version = "0.22.0-beta.5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
|
|||||||
16
Cargo.toml
16
Cargo.toml
@@ -23,14 +23,14 @@ rust-version = "1.78.0"
|
|||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.25.3", "features" = [
|
lance = { "version" = "=0.25.3", "features" = [
|
||||||
"dynamodb",
|
"dynamodb",
|
||||||
], tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
], tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-io = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-io = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-index = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-index = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-linalg = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-linalg = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-table = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-table = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-testing = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-testing = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-datafusion = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-datafusion = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
lance-encoding = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
|
lance-encoding = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "54.1", optional = false }
|
arrow = { version = "54.1", optional = false }
|
||||||
arrow-array = "54.1"
|
arrow-array = "54.1"
|
||||||
|
|||||||
@@ -1304,6 +1304,27 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(results[0].text).toBe(data[0].text);
|
expect(results[0].text).toBe(data[0].text);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("full text index on list", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = [
|
||||||
|
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
|
||||||
|
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
|
||||||
|
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
|
||||||
|
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
|
||||||
|
];
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", {
|
||||||
|
config: Index.fts(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await table.search("lance").toArray();
|
||||||
|
expect(results.length).toBe(3);
|
||||||
|
|
||||||
|
const results2 = await table.search('"lance database"').toArray();
|
||||||
|
expect(results2.length).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
test("full text search without positions", async () => {
|
test("full text search without positions", async () => {
|
||||||
const db = await connect(tmpDir.name);
|
const db = await connect(tmpDir.name);
|
||||||
const data = [
|
const data = [
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.0-beta.5"
|
current_version = "0.22.0-beta.6"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.22.0-beta.5"
|
version = "0.22.0-beta.6"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ from lancedb.db import DBConnection
|
|||||||
from lancedb.index import FTS
|
from lancedb.index import FTS
|
||||||
from lancedb.query import BoostQuery, MatchQuery, MultiMatchQuery, PhraseQuery
|
from lancedb.query import BoostQuery, MatchQuery, MultiMatchQuery, PhraseQuery
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
from utils import exception_output
|
from utils import exception_output
|
||||||
@@ -626,3 +627,32 @@ def test_language(mem_db: DBConnection):
|
|||||||
# Stop words -> no results
|
# Stop words -> no results
|
||||||
results = table.search("la", query_type="fts").limit(5).to_list()
|
results = table.search("la", query_type="fts").limit(5).to_list()
|
||||||
assert len(results) == 0
|
assert len(results) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_on_list(mem_db: DBConnection):
|
||||||
|
data = pa.table(
|
||||||
|
{
|
||||||
|
"text": [
|
||||||
|
["lance database", "the", "search"],
|
||||||
|
["lance database"],
|
||||||
|
["lance", "search"],
|
||||||
|
["database", "search"],
|
||||||
|
["unrelated", "doc"],
|
||||||
|
],
|
||||||
|
"vector": [
|
||||||
|
[1.0, 2.0, 3.0],
|
||||||
|
[4.0, 5.0, 6.0],
|
||||||
|
[7.0, 8.0, 9.0],
|
||||||
|
[10.0, 11.0, 12.0],
|
||||||
|
[13.0, 14.0, 15.0],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
table = mem_db.create_table("test", data=data)
|
||||||
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
|
|
||||||
|
res = table.search("lance").limit(5).to_list()
|
||||||
|
assert len(res) == 3
|
||||||
|
|
||||||
|
res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
|
||||||
|
assert len(res) == 2
|
||||||
|
|||||||
@@ -158,7 +158,17 @@ pub fn supported_label_list_data_type(dtype: &DataType) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn supported_fts_data_type(dtype: &DataType) -> bool {
|
pub fn supported_fts_data_type(dtype: &DataType) -> bool {
|
||||||
matches!(dtype, DataType::Utf8 | DataType::LargeUtf8)
|
supported_fts_data_type_impl(dtype, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn supported_fts_data_type_impl(dtype: &DataType, in_list: bool) -> bool {
|
||||||
|
match (dtype, in_list) {
|
||||||
|
(DataType::Utf8 | DataType::LargeUtf8, _) => true,
|
||||||
|
(DataType::List(field) | DataType::LargeList(field), false) => {
|
||||||
|
supported_fts_data_type_impl(field.data_type(), true)
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn supported_vector_data_type(dtype: &DataType) -> bool {
|
pub fn supported_vector_data_type(dtype: &DataType) -> bool {
|
||||||
|
|||||||
Reference in New Issue
Block a user