mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-22 21:09:58 +00:00
feat: a utility for creating "permutation views" (#2552)
I'm working on a lancedb version of pytorch data loading (and hopefully addressing https://github.com/lancedb/lance/issues/3727). However, rather than rely on pytorch for everything I'm moving some of the things that pytorch does into rust. This gives us more control over data loading (e.g. using shards or a hash-based split) and it allows permutations to be persistent. In particular I hope to be able to: * Create a persistent permutation * This permutation can handle splits, filtering, shuffling, and sharding * Create a rust data loader that can read a permutation (one or more splits), or a subset of a permutation (for DDP) * Create a python data loader that delegates to the rust data loader Eventually create integrations for other data loading libraries, including rust & node
This commit is contained in:
27
Cargo.lock
generated
27
Cargo.lock
generated
@@ -4702,6 +4702,7 @@ dependencies = [
|
||||
name = "lancedb"
|
||||
version = "0.22.2"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4737,8 +4738,11 @@ dependencies = [
|
||||
"http 1.3.1",
|
||||
"http-body 1.0.1",
|
||||
"lance",
|
||||
"lance-core",
|
||||
"lance-datafusion",
|
||||
"lance-datagen",
|
||||
"lance-encoding",
|
||||
"lance-file",
|
||||
"lance-index",
|
||||
"lance-io",
|
||||
"lance-linalg",
|
||||
@@ -4764,6 +4768,7 @@ dependencies = [
|
||||
"serde_with",
|
||||
"snafu",
|
||||
"tempfile",
|
||||
"test-log",
|
||||
"tokenizers",
|
||||
"tokio",
|
||||
"url",
|
||||
@@ -8237,6 +8242,28 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-log"
|
||||
version = "0.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e33b98a582ea0be1168eba097538ee8dd4bbe0f2b01b22ac92ea30054e5be7b"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"test-log-macros",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-log-macros"
|
||||
version = "0.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "451b374529930d7601b1eef8d32bc79ae870b6079b069401709c2a8bf9e75f36"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
|
||||
Reference in New Issue
Block a user