Compare commits

..

3 Commits

Author SHA1 Message Date
Lance Release
1f41101897 Bump version: 0.14.0 → 0.14.1-beta.0 2024-10-17 18:58:45 +00:00
Will Jones
99e4db0d6a feat(rust): allow add_embedding on create_empty_table (#1754)
Fixes https://github.com/lancedb/lancedb/issues/1750
2024-10-17 11:58:15 -07:00
Will Jones
46486d4d22 fix: list_indices can handle fts indexes (#1753)
Fixes #1752
2024-10-16 10:39:40 -07:00
6 changed files with 76 additions and 52 deletions

View File

@@ -22,13 +22,13 @@ categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.18.3", "features" = [
"dynamodb",
]}
lance-index = { "version" = "=0.18.3"}
lance-linalg = { "version" = "=0.18.3"}
lance-table = { "version" = "=0.18.3"}
lance-testing = { "version" = "=0.18.3"}
lance-datafusion = { "version" = "=0.18.3"}
lance-encoding = { "version" = "=0.18.3"}
], git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-index = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-linalg = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-table = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-testing = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-datafusion = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-encoding = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
# Note that this one does not include pyarrow
arrow = { version = "52.2", optional = false }
arrow-array = "52.2"

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.14.0"
current_version = "0.14.1-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.14.0"
version = "0.14.1-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -196,22 +196,6 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
};
Ok((data, builder))
}
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
}
// Builder methods that only apply when we do not have initial data
@@ -329,6 +313,26 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
};
self
}
/// Add an embedding definition to the table.
///
/// The `embedding_name` must match the name of an embedding function that
/// was previously registered with the connection's [`EmbeddingRegistry`].
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
}
#[derive(Clone, Debug)]

View File

@@ -144,7 +144,7 @@ impl std::str::FromStr for IndexType {
"BTREE" => Ok(Self::BTree),
"BITMAP" => Ok(Self::Bitmap),
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
"FTS" => Ok(Self::FTS),
"FTS" | "INVERTED" => Ok(Self::FTS),
"IVF_PQ" => Ok(Self::IvfPq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),

View File

@@ -2110,7 +2110,6 @@ mod tests {
use arrow_schema::{DataType, Field, Schema, TimeUnit};
use futures::TryStreamExt;
use lance::dataset::{Dataset, WriteMode};
use lance::index::DatasetIndexInternalExt;
use lance::io::{ObjectStoreParams, WrappingObjectStore};
use rand::Rng;
use tempfile::tempdir;
@@ -3002,22 +3001,8 @@ mod tests {
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
// assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["category".to_string()]);
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
.await
.unwrap();
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("category", &index_meta.uuid.to_string())
.await
.unwrap();
assert_eq!(idx.index_type(), IndexType::Bitmap);
}
#[tokio::test]
@@ -3086,22 +3071,57 @@ mod tests {
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
// assert_eq!(index.index_type, crate::index::IndexType::LabelList);
assert_eq!(index.index_type, crate::index::IndexType::LabelList);
assert_eq!(index.columns, vec!["tags".to_string()]);
}
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
#[tokio::test]
async fn test_create_inverted_index() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
const WORDS: [&str; 3] = ["cat", "dog", "fish"];
let mut text_builder = StringBuilder::new();
let num_rows = 120;
for i in 0..num_rows {
text_builder.append_value(WORDS[i % 3]);
}
let text = Arc::new(text_builder.finish());
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("text", DataType::Utf8, true),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..num_rows as i32)),
text,
],
)
.unwrap();
let table = conn
.create_table(
"test_bitmap",
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
)
.execute()
.await
.unwrap();
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("tags", &index_meta.uuid.to_string())
table
.create_index(&["text"], Index::FTS(Default::default()))
.execute()
.await
.unwrap();
assert_eq!(idx.index_type(), IndexType::LabelList);
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::FTS);
assert_eq!(index.columns, vec!["text".to_string()]);
assert_eq!(index.name, "text_idx");
}
#[tokio::test]