mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 14:49:57 +00:00
Compare commits
3 Commits
weston/0.1
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1f41101897 | ||
|
|
99e4db0d6a | ||
|
|
46486d4d22 |
14
Cargo.toml
14
Cargo.toml
@@ -22,13 +22,13 @@ categories = ["database-implementations"]
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.18.3", "features" = [
|
||||
"dynamodb",
|
||||
]}
|
||||
lance-index = { "version" = "=0.18.3"}
|
||||
lance-linalg = { "version" = "=0.18.3"}
|
||||
lance-table = { "version" = "=0.18.3"}
|
||||
lance-testing = { "version" = "=0.18.3"}
|
||||
lance-datafusion = { "version" = "=0.18.3"}
|
||||
lance-encoding = { "version" = "=0.18.3"}
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-index = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-linalg = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-table = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-testing = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-datafusion = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
lance-encoding = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "52.2", optional = false }
|
||||
arrow-array = "52.2"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.14.0"
|
||||
current_version = "0.14.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.14.0"
|
||||
version = "0.14.1-beta.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -196,22 +196,6 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
|
||||
};
|
||||
Ok((data, builder))
|
||||
}
|
||||
|
||||
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
|
||||
// Early verification of the embedding name
|
||||
let embedding_func = self
|
||||
.parent
|
||||
.embedding_registry()
|
||||
.get(&definition.embedding_name)
|
||||
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
|
||||
name: definition.embedding_name.clone(),
|
||||
reason: "No embedding function found in the connection's embedding_registry"
|
||||
.to_string(),
|
||||
})?;
|
||||
|
||||
self.embeddings.push((definition, embedding_func));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Builder methods that only apply when we do not have initial data
|
||||
@@ -329,6 +313,26 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
|
||||
};
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an embedding definition to the table.
|
||||
///
|
||||
/// The `embedding_name` must match the name of an embedding function that
|
||||
/// was previously registered with the connection's [`EmbeddingRegistry`].
|
||||
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
|
||||
// Early verification of the embedding name
|
||||
let embedding_func = self
|
||||
.parent
|
||||
.embedding_registry()
|
||||
.get(&definition.embedding_name)
|
||||
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
|
||||
name: definition.embedding_name.clone(),
|
||||
reason: "No embedding function found in the connection's embedding_registry"
|
||||
.to_string(),
|
||||
})?;
|
||||
|
||||
self.embeddings.push((definition, embedding_func));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -144,7 +144,7 @@ impl std::str::FromStr for IndexType {
|
||||
"BTREE" => Ok(Self::BTree),
|
||||
"BITMAP" => Ok(Self::Bitmap),
|
||||
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
|
||||
"FTS" => Ok(Self::FTS),
|
||||
"FTS" | "INVERTED" => Ok(Self::FTS),
|
||||
"IVF_PQ" => Ok(Self::IvfPq),
|
||||
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
|
||||
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
|
||||
|
||||
@@ -2110,7 +2110,6 @@ mod tests {
|
||||
use arrow_schema::{DataType, Field, Schema, TimeUnit};
|
||||
use futures::TryStreamExt;
|
||||
use lance::dataset::{Dataset, WriteMode};
|
||||
use lance::index::DatasetIndexInternalExt;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use rand::Rng;
|
||||
use tempfile::tempdir;
|
||||
@@ -3002,22 +3001,8 @@ mod tests {
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
|
||||
// assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
|
||||
assert_eq!(index.columns, vec!["category".to_string()]);
|
||||
|
||||
// For now, just open the index to verify its type
|
||||
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
|
||||
let indices = lance_dataset
|
||||
.load_indices_by_name(&index.name)
|
||||
.await
|
||||
.unwrap();
|
||||
let index_meta = &indices[0];
|
||||
let idx = lance_dataset
|
||||
.open_scalar_index("category", &index_meta.uuid.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(idx.index_type(), IndexType::Bitmap);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -3086,22 +3071,57 @@ mod tests {
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
|
||||
// assert_eq!(index.index_type, crate::index::IndexType::LabelList);
|
||||
assert_eq!(index.index_type, crate::index::IndexType::LabelList);
|
||||
assert_eq!(index.columns, vec!["tags".to_string()]);
|
||||
}
|
||||
|
||||
// For now, just open the index to verify its type
|
||||
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
|
||||
let indices = lance_dataset
|
||||
.load_indices_by_name(&index.name)
|
||||
#[tokio::test]
|
||||
async fn test_create_inverted_index() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
|
||||
const WORDS: [&str; 3] = ["cat", "dog", "fish"];
|
||||
let mut text_builder = StringBuilder::new();
|
||||
let num_rows = 120;
|
||||
for i in 0..num_rows {
|
||||
text_builder.append_value(WORDS[i % 3]);
|
||||
}
|
||||
let text = Arc::new(text_builder.finish());
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("text", DataType::Utf8, true),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..num_rows as i32)),
|
||||
text,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = conn
|
||||
.create_table(
|
||||
"test_bitmap",
|
||||
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
|
||||
)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let index_meta = &indices[0];
|
||||
let idx = lance_dataset
|
||||
.open_scalar_index("tags", &index_meta.uuid.to_string())
|
||||
|
||||
table
|
||||
.create_index(&["text"], Index::FTS(Default::default()))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(idx.index_type(), IndexType::LabelList);
|
||||
let index_configs = table.list_indices().await.unwrap();
|
||||
assert_eq!(index_configs.len(), 1);
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::FTS);
|
||||
assert_eq!(index.columns, vec!["text".to_string()]);
|
||||
assert_eq!(index.name, "text_idx");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user