mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-25 06:19:57 +00:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59014a01e0 | ||
|
|
47ae17ea05 | ||
|
|
b6739f3f66 | ||
|
|
3a2df0ce45 | ||
|
|
c0bc65cdfa | ||
|
|
298b81f0b0 | ||
|
|
fe7a3ccd60 | ||
|
|
baf8d7c1a1 | ||
|
|
2021e1bf6d | ||
|
|
2dbe71cf88 | ||
|
|
afe19ade7f | ||
|
|
118efdce73 | ||
|
|
b0426387e7 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -15,3 +15,5 @@ python/build
|
||||
python/dist
|
||||
|
||||
notebooks/.ipynb_checkpoints
|
||||
|
||||
**/.hypothesis
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
<img width="275" alt="LanceDB Logo" src="https://user-images.githubusercontent.com/917119/226205734-6063d87a-1ecc-45fe-85be-1dea6383a3d8.png">
|
||||
|
||||
**Serverless, low-latency vector database for AI applications**
|
||||
**Developer-friendly, serverless vector database for AI applications**
|
||||
|
||||
<a href="https://lancedb.github.io/lancedb/">Documentation</a> •
|
||||
<a href="https://blog.eto.ai/">Blog</a> •
|
||||
@@ -21,6 +21,10 @@ The key features of LanceDB include:
|
||||
|
||||
* Production-scale vector search with no servers to manage.
|
||||
|
||||
* Optimized for multi-modal data (text, images, videos, point clouds and more).
|
||||
|
||||
* Native Python and Javascript/Typescript support (coming soon).
|
||||
|
||||
* Combine attribute-based information with vectors and store them as a single source-of-truth.
|
||||
|
||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
||||
|
||||
@@ -6,6 +6,10 @@ The key features of LanceDB include:
|
||||
|
||||
* Production-scale vector search with no servers to manage.
|
||||
|
||||
* Optimized for multi-modal data (text, images, videos, point clouds and more).
|
||||
|
||||
* Native Python and Javascript/Typescript support (coming soon).
|
||||
|
||||
* Combine attribute-based information with vectors and store them as a single source-of-truth.
|
||||
|
||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from .db import LanceDBConnection, URI
|
||||
from .db import URI, LanceDBConnection
|
||||
|
||||
|
||||
def connect(uri: URI) -> LanceDBConnection:
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from pathlib import Path
|
||||
from typing import Union, List
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
@@ -14,10 +14,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from .common import URI, DATA
|
||||
from .common import DATA, URI
|
||||
from .table import LanceTable
|
||||
from .util import get_uri_scheme
|
||||
|
||||
|
||||
class LanceDBConnection:
|
||||
@@ -26,10 +28,12 @@ class LanceDBConnection:
|
||||
"""
|
||||
|
||||
def __init__(self, uri: URI):
|
||||
if isinstance(uri, str):
|
||||
uri = Path(uri)
|
||||
uri = uri.expanduser().absolute()
|
||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||
is_local = isinstance(uri, Path) or get_uri_scheme(uri) == "file"
|
||||
if is_local:
|
||||
if isinstance(uri, str):
|
||||
uri = Path(uri)
|
||||
uri = uri.expanduser().absolute()
|
||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||
self._uri = str(uri)
|
||||
|
||||
@property
|
||||
@@ -43,7 +47,11 @@ class LanceDBConnection:
|
||||
-------
|
||||
A list of table names.
|
||||
"""
|
||||
return [p.stem for p in Path(self.uri).glob("*.lance")]
|
||||
if get_uri_scheme(self.uri) == "file":
|
||||
return [p.stem for p in Path(self.uri).glob("*.lance")]
|
||||
raise NotImplementedError(
|
||||
"List table_names is only supported for local filesystem for now"
|
||||
)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.table_names())
|
||||
|
||||
@@ -13,14 +13,13 @@
|
||||
|
||||
import math
|
||||
import sys
|
||||
|
||||
from retry import retry
|
||||
from typing import Callable, Union
|
||||
|
||||
from lance.vector import vec_to_table
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from lance.vector import vec_to_table
|
||||
from retry import retry
|
||||
|
||||
|
||||
def with_embeddings(
|
||||
@@ -68,7 +67,9 @@ class EmbeddingFunction:
|
||||
if len(self.rate_limiter_kwargs) > 0:
|
||||
v = int(sys.version_info.minor)
|
||||
if v >= 11:
|
||||
print("WARNING: rate limit only support up to 3.10, proceeding without rate limiter")
|
||||
print(
|
||||
"WARNING: rate limit only support up to 3.10, proceeding without rate limiter"
|
||||
)
|
||||
else:
|
||||
import ratelimiter
|
||||
|
||||
|
||||
@@ -19,12 +19,12 @@ from functools import cached_property
|
||||
import lance
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from lance import LanceDataset
|
||||
import pyarrow as pa
|
||||
from lance import LanceDataset
|
||||
from lance.vector import vec_to_table
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from .query import LanceQueryBuilder
|
||||
from .common import DATA, VECTOR_COLUMN_NAME, VEC
|
||||
|
||||
|
||||
def _sanitize_data(data, schema):
|
||||
|
||||
43
python/lancedb/util.py
Normal file
43
python/lancedb/util.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from urllib.parse import ParseResult, urlparse
|
||||
|
||||
from pyarrow import fs
|
||||
|
||||
|
||||
def get_uri_scheme(uri: str) -> str:
|
||||
"""
|
||||
Get the scheme of a URI. If the URI does not have a scheme, assume it is a file URI.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri : str
|
||||
The URI to parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str: The scheme of the URI.
|
||||
"""
|
||||
parsed = urlparse(uri)
|
||||
scheme = parsed.scheme
|
||||
if not scheme:
|
||||
scheme = "file"
|
||||
elif scheme in ["s3a", "s3n"]:
|
||||
scheme = "s3"
|
||||
elif len(scheme) == 1:
|
||||
# Windows drive names are parsed as the scheme
|
||||
# e.g. "c:\path" -> ParseResult(scheme="c", netloc="", path="/path", ...)
|
||||
# So we add special handling here for schemes that are a single character
|
||||
scheme = "file"
|
||||
return scheme
|
||||
@@ -1,10 +1,10 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.1.1"
|
||||
dependencies = ["pylance>=0.4.4", "ratelimiter", "retry", "tqdm"]
|
||||
version = "0.1.2"
|
||||
dependencies = ["pylance>=0.4.6", "ratelimiter", "retry", "tqdm"]
|
||||
description = "lancedb"
|
||||
authors = [
|
||||
{ name = "Lance Devs", email = "dev@eto.ai" },
|
||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||
]
|
||||
license = { file = "LICENSE" }
|
||||
readme = "README.md"
|
||||
|
||||
@@ -11,10 +11,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import lancedb
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
import lancedb
|
||||
|
||||
|
||||
def test_basic(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
@@ -12,15 +12,14 @@
|
||||
# limitations under the License.
|
||||
|
||||
import lance
|
||||
from lancedb.query import LanceQueryBuilder
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.testing as tm
|
||||
import pyarrow as pa
|
||||
|
||||
import pytest
|
||||
|
||||
from lancedb.query import LanceQueryBuilder
|
||||
|
||||
|
||||
class MockTable:
|
||||
def __init__(self, tmp_path):
|
||||
|
||||
30
python/tests/test_util.py
Normal file
30
python/tests/test_util.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from lancedb.util import get_uri_scheme
|
||||
|
||||
|
||||
def test_normalize_uri():
|
||||
uris = [
|
||||
"relative/path",
|
||||
"/absolute/path",
|
||||
"file:///absolute/path",
|
||||
"s3://bucket/path",
|
||||
"gs://bucket/path",
|
||||
"c:\\windows\\path",
|
||||
]
|
||||
schemes = ["file", "file", "file", "s3", "gs", "file"]
|
||||
|
||||
for uri, expected_scheme in zip(uris, schemes):
|
||||
parsed_scheme = get_uri_scheme(uri)
|
||||
assert parsed_scheme == expected_scheme
|
||||
12
rust/Cargo.toml
Normal file
12
rust/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.0.1"
|
||||
edition = "2021"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
lance = "0.4.3"
|
||||
14
rust/src/lib.rs
Normal file
14
rust/src/lib.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
pub fn add(left: usize, right: usize) -> usize {
|
||||
left + right
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let result = add(2, 2);
|
||||
assert_eq!(result, 4);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user