Compare commits

...

12 Commits

Author SHA1 Message Date
Lance Release
f3cf986777 [python] Bump version: 0.3.1 → 0.3.2 2023-10-24 19:06:38 +00:00
Bert
c73fcc8898 update lance to 0.8.7 (#598) 2023-10-24 14:49:36 -04:00
Chang She
cd9debc3b7 fix(python): fix multiple embedding functions bug (#597)
Closes #594

The embedding functions are pydantic models so multiple instances with
the same parameters are considered ==, which means that if you have
multiple embedding columns it's possible for the embeddings to get
overwritten. Instead we use `is` instead of == to avoid this problem.

testing: modified unit test to include this case
2023-10-24 13:05:05 -04:00
Rob Meng
26a97ba997 feat: add checkout method to table to reuse existing store and connections (#593)
Prior to this PR, to get a new version of a table, we need to re-open
the table. This has a few downsides w.r.t. performance:
* Object store is recreated, which takes time and throws away existing
warm connections
* Commit handler is thrown aways as well, which also may contain warm
connections
2023-10-23 12:06:13 -04:00
Rob Meng
ce19fedb08 feat: include manifest files in mirrow store (#589) 2023-10-21 12:21:41 -04:00
Will Jones
14e8e48de2 Revert "[python] Bump version: 0.3.2 → 0.3.3"
This reverts commit c30faf6083.
2023-10-20 17:52:49 -07:00
Will Jones
c30faf6083 [python] Bump version: 0.3.2 → 0.3.3 2023-10-20 17:30:00 -07:00
Ayush Chaurasia
64a4f025bb [Docs]: Minor Fixes (#587)
* Filename typo
* Remove rick_morty csv as users won't really be able to use it.. We can
create a an executable colab and download it from a bucket or smth.
2023-10-20 16:14:35 +02:00
Ayush Chaurasia
6dc968e7d3 [Docs] Embeddings API: Add multi-lingual semantic search example (#582) 2023-10-20 18:40:49 +05:30
Ayush Chaurasia
06b5b69f1e [Docs]Versioning docs (#586)
closes #564

---------

Co-authored-by: Chang She <chang@lancedb.com>
2023-10-20 18:40:16 +05:30
Lance Release
6bd3a838fc Updating package-lock.json 2023-10-19 20:45:39 +00:00
Lance Release
f36fea8f20 Updating package-lock.json 2023-10-19 20:06:10 +00:00
13 changed files with 1915 additions and 58 deletions

View File

@@ -5,9 +5,9 @@ exclude = ["python"]
resolver = "2"
[workspace.dependencies]
lance = { "version" = "=0.8.6", "features" = ["dynamodb"] }
lance-linalg = { "version" = "=0.8.6" }
lance-testing = { "version" = "=0.8.6" }
lance = { "version" = "=0.8.7", "features" = ["dynamodb"] }
lance-linalg = { "version" = "=0.8.7" }
lance-testing = { "version" = "=0.8.7" }
# Note that this one does not include pyarrow
arrow = { version = "47.0.0", optional = false }
arrow-array = "47.0"
@@ -18,7 +18,7 @@ arrow-schema = "47.0"
arrow-arith = "47.0"
arrow-cast = "47.0"
chrono = "0.4.23"
half = { "version" = "=2.2.1", default-features = false, features = [
half = { "version" = "=2.3.1", default-features = false, features = [
"num-traits"
] }
log = "0.4"

View File

@@ -73,12 +73,14 @@ nav:
- Vector Search: search.md
- SQL filters: sql.md
- Indexing: ann_indexes.md
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
- 🧬 Embeddings:
- embeddings/index.md
- Ingest Embedding Functions: embeddings/embedding_functions.md
- Available Functions: embeddings/default_embedding_functions.md
- Create Custom Embedding Functions: embeddings/api.md
- Example- MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
- Example - Multi-lingual semantic search: notebooks/multi_lingual_example.ipynb
- Example - MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
- 🔍 Python full-text search: fts.md
- 🔌 Integrations:
- integrations/index.md
@@ -110,12 +112,14 @@ nav:
- Vector Search: search.md
- SQL filters: sql.md
- Indexing: ann_indexes.md
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
- Embeddings:
- embeddings/index.md
- Ingest Embedding Functions: embeddings/embedding_functions.md
- Available Functions: embeddings/default_embedding_functions.md
- Create Custom Embedding Functions: embeddings/api.md
- Example- MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
- Example - Multi-lingual semantic search: notebooks/multi_lingual_example.ipynb
- Example - MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
- Python full-text search: fts.md
- Integrations:
- integrations/index.md

View File

@@ -1,5 +1,13 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "88c1af18",
"metadata": {},
"source": [
"# Example - MultiModal CLIP Embeddings"
]
},
{
"cell_type": "markdown",
"id": "c6b5d346-2c2a-4341-a132-00e53543f8d1",

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

74
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.3.2",
"version": "0.3.3",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.3.2",
"version": "0.3.3",
"cpu": [
"x64",
"arm64"
@@ -53,11 +53,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.3.2",
"@lancedb/vectordb-darwin-x64": "0.3.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.3.2",
"@lancedb/vectordb-linux-x64-gnu": "0.3.2",
"@lancedb/vectordb-win32-x64-msvc": "0.3.2"
"@lancedb/vectordb-darwin-arm64": "0.3.3",
"@lancedb/vectordb-darwin-x64": "0.3.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.3.3",
"@lancedb/vectordb-linux-x64-gnu": "0.3.3",
"@lancedb/vectordb-win32-x64-msvc": "0.3.3"
}
},
"node_modules/@apache-arrow/ts": {
@@ -317,9 +317,9 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.2.tgz",
"integrity": "sha512-CDh+sU2k4xVfWauwDZnybma8AJ+Q2i0SzHg05BwgDcani7I0k60NjJ5GobpgQ38xOiEmwHllES1xs4NRh+1YkA==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.3.tgz",
"integrity": "sha512-nvyj7xNX2/wb/PH5TjyhLR/NQ1jVuoBw2B5UaSg7qf8Tnm5SSXWQ7F25RVKcKwh72fz1qB+CWW24ftZnRzbT/Q==",
"cpu": [
"arm64"
],
@@ -329,9 +329,9 @@
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.2.tgz",
"integrity": "sha512-xevyA+M/UE8ttaNkx68AyIUKlyWMhIzOECx0hbyN1zfShJe2UcunQcmbM1NxUi7EywodByyiP7bfMI1ZR1Y4Mw==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.3.tgz",
"integrity": "sha512-7CW+nILyPHp6cua0Rl0xaTDWw/vajEn/jCsEjFYgDmE+rtf5Z5Fum41FxR9C2TtIAvUK+nWb5mkYeOLqU6vRvg==",
"cpu": [
"x64"
],
@@ -341,9 +341,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.2.tgz",
"integrity": "sha512-mSKkQ/p6UTSLwWzfZMBS7wA6Gf335KljXLaOhdT4TUI/jC6e9/cvZKkXRgpdE9/gvfl4/WVzKY7sg3+azDYQ+A==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.3.tgz",
"integrity": "sha512-MmhwbacKxZPkLwwOqysVY8mUb8lFoyFIPlYhSLV4xS1C8X4HWALljIul1qMl1RYudp9Uc3PsOzRexl+OvCGfUw==",
"cpu": [
"arm64"
],
@@ -353,9 +353,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.2.tgz",
"integrity": "sha512-S1D0VwdidwyfIKE58t94rD+EEb5B64ORMVkTw5FBZJirShkk82+0G9H3jNgWrRMt1PB3Qn1286/wqDLQ9+fTsA==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.3.tgz",
"integrity": "sha512-OrNlsKi/QPw59Po040oRKn8IuqFEk4upc/4FaFKqVkcmQjjZrMg5Kgy9ZfWIhHdAnWXXggZZIPArpt0X1B0ceA==",
"cpu": [
"x64"
],
@@ -365,9 +365,9 @@
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.2.tgz",
"integrity": "sha512-tnct1hf9GAlMchhYU6Lqmbm2nUKPO8apS7tuTIiucQh6gx+vbHmFZHFNHhw1AUJTpsj/eH2Z9iNayuC5Scdvhw==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.3.tgz",
"integrity": "sha512-lIT0A7a6eqX51IfGyhECtpXXgsr//kgbd+HZbcCdPy2GMmNezSch/7V22zExDSpF32hX8WfgcTLYCVWVilggDQ==",
"cpu": [
"x64"
],
@@ -4869,33 +4869,33 @@
}
},
"@lancedb/vectordb-darwin-arm64": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.2.tgz",
"integrity": "sha512-CDh+sU2k4xVfWauwDZnybma8AJ+Q2i0SzHg05BwgDcani7I0k60NjJ5GobpgQ38xOiEmwHllES1xs4NRh+1YkA==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.3.tgz",
"integrity": "sha512-nvyj7xNX2/wb/PH5TjyhLR/NQ1jVuoBw2B5UaSg7qf8Tnm5SSXWQ7F25RVKcKwh72fz1qB+CWW24ftZnRzbT/Q==",
"optional": true
},
"@lancedb/vectordb-darwin-x64": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.2.tgz",
"integrity": "sha512-xevyA+M/UE8ttaNkx68AyIUKlyWMhIzOECx0hbyN1zfShJe2UcunQcmbM1NxUi7EywodByyiP7bfMI1ZR1Y4Mw==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.3.tgz",
"integrity": "sha512-7CW+nILyPHp6cua0Rl0xaTDWw/vajEn/jCsEjFYgDmE+rtf5Z5Fum41FxR9C2TtIAvUK+nWb5mkYeOLqU6vRvg==",
"optional": true
},
"@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.2.tgz",
"integrity": "sha512-mSKkQ/p6UTSLwWzfZMBS7wA6Gf335KljXLaOhdT4TUI/jC6e9/cvZKkXRgpdE9/gvfl4/WVzKY7sg3+azDYQ+A==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.3.tgz",
"integrity": "sha512-MmhwbacKxZPkLwwOqysVY8mUb8lFoyFIPlYhSLV4xS1C8X4HWALljIul1qMl1RYudp9Uc3PsOzRexl+OvCGfUw==",
"optional": true
},
"@lancedb/vectordb-linux-x64-gnu": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.2.tgz",
"integrity": "sha512-S1D0VwdidwyfIKE58t94rD+EEb5B64ORMVkTw5FBZJirShkk82+0G9H3jNgWrRMt1PB3Qn1286/wqDLQ9+fTsA==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.3.tgz",
"integrity": "sha512-OrNlsKi/QPw59Po040oRKn8IuqFEk4upc/4FaFKqVkcmQjjZrMg5Kgy9ZfWIhHdAnWXXggZZIPArpt0X1B0ceA==",
"optional": true
},
"@lancedb/vectordb-win32-x64-msvc": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.2.tgz",
"integrity": "sha512-tnct1hf9GAlMchhYU6Lqmbm2nUKPO8apS7tuTIiucQh6gx+vbHmFZHFNHhw1AUJTpsj/eH2Z9iNayuC5Scdvhw==",
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.3.tgz",
"integrity": "sha512-lIT0A7a6eqX51IfGyhECtpXXgsr//kgbd+HZbcCdPy2GMmNezSch/7V22zExDSpF32hX8WfgcTLYCVWVilggDQ==",
"optional": true
},
"@neon-rs/cli": {

View File

@@ -65,8 +65,8 @@ describe('LanceDB Mirrored Store Integration test', function () {
const mirroredPath = path.join(dir, `${tableName}.lance`)
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be two dirs
assert.equal(files.length, 2)
// there should be three dirs
assert.equal(files.length, 3)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
@@ -76,6 +76,12 @@ describe('LanceDB Mirrored Store Integration test', function () {
assert.isTrue(files[0].name.endsWith('.txn'))
})
fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.manifest'))
})
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
@@ -88,8 +94,8 @@ describe('LanceDB Mirrored Store Integration test', function () {
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be two dirs
assert.equal(files.length, 3)
// there should be four dirs
assert.equal(files.length, 4)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
assert.isTrue(files[2].isDirectory())
@@ -128,12 +134,13 @@ describe('LanceDB Mirrored Store Integration test', function () {
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be two dirs
assert.equal(files.length, 4)
// there should be five dirs
assert.equal(files.length, 5)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
assert.isTrue(files[2].isDirectory())
assert.isTrue(files[3].isDirectory())
assert.isTrue(files[4].isDirectory())
// Three TXs now
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.1
current_version = 0.3.2
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -327,7 +327,12 @@ class LanceModel(pydantic.BaseModel):
for vec, func in vec_and_function:
for source, field_info in cls.safe_get_fields().items():
src_func = get_extras(field_info, "source_column_for")
if src_func == func:
if src_func is func:
# note we can't use == here since the function is a pydantic
# model so two instances of the same function are ==, so if you
# have multiple vector columns from multiple sources, both will
# be mapped to the same source column
# GH594
configs.append(
EmbeddingFunctionConfig(
source_column=source, vector_column=vec, function=func

View File

@@ -1,9 +1,9 @@
[project]
name = "lancedb"
version = "0.3.1"
version = "0.3.2"
dependencies = [
"deprecation",
"pylance==0.8.6",
"pylance==0.8.7",
"ratelimiter~=1.0",
"retry>=0.9.2",
"tqdm>=4.1.0",

View File

@@ -33,10 +33,13 @@ def test_sentence_transformer(alias, tmp_path):
db = lancedb.connect(tmp_path)
registry = get_registry()
func = registry.get(alias).create()
func2 = registry.get(alias).create()
class Words(LanceModel):
text: str = func.SourceField()
text2: str = func2.SourceField()
vector: Vector(func.ndims()) = func.VectorField()
vector2: Vector(func2.ndims()) = func2.VectorField()
table = db.create_table("words", schema=Words)
table.add(
@@ -50,7 +53,16 @@ def test_sentence_transformer(alias, tmp_path):
"foo",
"bar",
"baz",
]
],
"text2": [
"to be or not to be",
"that is the question",
"for whether tis nobler",
"in the mind to suffer",
"the slings and arrows",
"of outrageous fortune",
"or to take arms",
],
}
)
)
@@ -62,6 +74,13 @@ def test_sentence_transformer(alias, tmp_path):
expected = table.search(vec).limit(1).to_pydantic(Words)[0]
assert actual.text == expected.text
assert actual.text == "hello world"
assert not np.allclose(actual.vector, actual.vector2)
actual = (
table.search(query, vector_column_name="vector2").limit(1).to_pydantic(Words)[0]
)
assert actual.text != "hello world"
assert not np.allclose(actual.vector, actual.vector2)
@pytest.mark.slow

View File

@@ -57,7 +57,7 @@ trait PrimaryOnly {
impl PrimaryOnly for Path {
fn primary_only(&self) -> bool {
self.to_string().contains("manifest")
self.filename().unwrap_or("") == "_latest.manifest"
}
}
@@ -118,8 +118,10 @@ impl ObjectStore for MirroringObjectStore {
self.primary.head(location).await
}
// garbage collection on secondary will happen async from other means
async fn delete(&self, location: &Path) -> Result<()> {
if !location.primary_only() {
self.secondary.delete(location).await?;
}
self.primary.delete(location).await
}
@@ -132,7 +134,7 @@ impl ObjectStore for MirroringObjectStore {
}
async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
if from.primary_only() {
if to.primary_only() {
self.primary.copy(from, to).await
} else {
self.secondary.copy(from, to).await?;
@@ -142,6 +144,9 @@ impl ObjectStore for MirroringObjectStore {
}
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
if !to.primary_only() {
self.secondary.copy(from, to).await?;
}
self.primary.copy_if_not_exists(from, to).await
}
}
@@ -379,7 +384,7 @@ mod test {
let primary_f = primary_elem.unwrap().unwrap();
// hit manifest, skip, _versions contains all the manifest and should not exist on secondary
let primary_raw_path = primary_f.file_name().to_str().unwrap();
if primary_raw_path.contains("manifest") || primary_raw_path.contains("_versions") {
if primary_raw_path.contains("_latest.manifest") {
primary_elem = primary_iter.next();
continue;
}

View File

@@ -153,6 +153,22 @@ impl Table {
})
}
pub async fn checkout_latest(&self) -> Result<Self> {
let latest_version_id = self.dataset.latest_version_id().await?;
let dataset = if latest_version_id == self.dataset.version().version {
self.dataset.clone()
} else {
Arc::new(self.dataset.checkout_version(latest_version_id).await?)
};
Ok(Table {
name: self.name.clone(),
uri: self.uri.clone(),
dataset,
store_wrapper: self.store_wrapper.clone(),
})
}
fn get_table_name(uri: &str) -> Result<String> {
let path = Path::new(uri);
let name = path