Compare commits

...

13 Commits

Author SHA1 Message Date
Chang She
59014a01e0 bump version for v0.1.2 2023-05-05 11:27:09 -07:00
Chang She
47ae17ea05 Merge pull request #58 from lancedb/changhiskhan/parse-schema
Add method to get the URI scheme to support cloud storage
2023-05-04 14:36:45 -07:00
Chang She
b6739f3f66 windows paths 2023-05-04 11:41:05 -07:00
Chang She
3a2df0ce45 Add method to get the URI scheme to support cloud storage 2023-05-04 09:47:03 -07:00
Chang She
c0bc65cdfa Merge pull request #55 from lancedb/jaichopra/update-tagline
update tagline
2023-05-03 21:06:41 -07:00
Jai Chopra
298b81f0b0 update tagline 2023-05-03 19:55:10 -07:00
Jai
fe7a3ccd60 Merge pull request #53 from lancedb/jaichopra/update-major-features-readme
also update docs index
2023-05-03 07:51:54 -07:00
Jai Chopra
baf8d7c1a1 also update docs index 2023-05-03 07:50:44 -07:00
Chang She
2021e1bf6d Merge pull request #52 from lancedb/jaichopra/update-major-features-readme 2023-05-03 07:36:09 -07:00
Jai Chopra
2dbe71cf88 add new feature to readme.md 2023-05-03 07:30:46 -07:00
Lei Xu
afe19ade7f Merge pull request #49 from lancedb/lei/rust_core
Rust core directory
2023-04-27 10:40:21 -07:00
Lei Xu
118efdce73 add cargo metadata 2023-04-27 10:36:01 -07:00
Lei Xu
b0426387e7 initialize the rust core 2023-04-27 10:31:50 -07:00
15 changed files with 140 additions and 22 deletions

2
.gitignore vendored
View File

@@ -15,3 +15,5 @@ python/build
python/dist
notebooks/.ipynb_checkpoints
**/.hypothesis

View File

@@ -3,7 +3,7 @@
<img width="275" alt="LanceDB Logo" src="https://user-images.githubusercontent.com/917119/226205734-6063d87a-1ecc-45fe-85be-1dea6383a3d8.png">
**Serverless, low-latency vector database for AI applications**
**Developer-friendly, serverless vector database for AI applications**
<a href="https://lancedb.github.io/lancedb/">Documentation</a>
<a href="https://blog.eto.ai/">Blog</a>
@@ -21,6 +21,10 @@ The key features of LanceDB include:
* Production-scale vector search with no servers to manage.
* Optimized for multi-modal data (text, images, videos, point clouds and more).
* Native Python and Javascript/Typescript support (coming soon).
* Combine attribute-based information with vectors and store them as a single source-of-truth.
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.

View File

@@ -6,6 +6,10 @@ The key features of LanceDB include:
* Production-scale vector search with no servers to manage.
* Optimized for multi-modal data (text, images, videos, point clouds and more).
* Native Python and Javascript/Typescript support (coming soon).
* Combine attribute-based information with vectors and store them as a single source-of-truth.
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.

View File

@@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .db import LanceDBConnection, URI
from .db import URI, LanceDBConnection
def connect(uri: URI) -> LanceDBConnection:

View File

@@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
from typing import Union, List
from typing import List, Union
import numpy as np
import pandas as pd

View File

@@ -14,10 +14,12 @@
from __future__ import annotations
from pathlib import Path
import pyarrow as pa
from .common import URI, DATA
from .common import DATA, URI
from .table import LanceTable
from .util import get_uri_scheme
class LanceDBConnection:
@@ -26,10 +28,12 @@ class LanceDBConnection:
"""
def __init__(self, uri: URI):
if isinstance(uri, str):
uri = Path(uri)
uri = uri.expanduser().absolute()
Path(uri).mkdir(parents=True, exist_ok=True)
is_local = isinstance(uri, Path) or get_uri_scheme(uri) == "file"
if is_local:
if isinstance(uri, str):
uri = Path(uri)
uri = uri.expanduser().absolute()
Path(uri).mkdir(parents=True, exist_ok=True)
self._uri = str(uri)
@property
@@ -43,7 +47,11 @@ class LanceDBConnection:
-------
A list of table names.
"""
return [p.stem for p in Path(self.uri).glob("*.lance")]
if get_uri_scheme(self.uri) == "file":
return [p.stem for p in Path(self.uri).glob("*.lance")]
raise NotImplementedError(
"List table_names is only supported for local filesystem for now"
)
def __len__(self) -> int:
return len(self.table_names())

View File

@@ -13,14 +13,13 @@
import math
import sys
from retry import retry
from typing import Callable, Union
from lance.vector import vec_to_table
import numpy as np
import pandas as pd
import pyarrow as pa
from lance.vector import vec_to_table
from retry import retry
def with_embeddings(
@@ -68,7 +67,9 @@ class EmbeddingFunction:
if len(self.rate_limiter_kwargs) > 0:
v = int(sys.version_info.minor)
if v >= 11:
print("WARNING: rate limit only support up to 3.10, proceeding without rate limiter")
print(
"WARNING: rate limit only support up to 3.10, proceeding without rate limiter"
)
else:
import ratelimiter

View File

@@ -19,12 +19,12 @@ from functools import cached_property
import lance
import numpy as np
import pandas as pd
from lance import LanceDataset
import pyarrow as pa
from lance import LanceDataset
from lance.vector import vec_to_table
from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .query import LanceQueryBuilder
from .common import DATA, VECTOR_COLUMN_NAME, VEC
def _sanitize_data(data, schema):

43
python/lancedb/util.py Normal file
View File

@@ -0,0 +1,43 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from urllib.parse import ParseResult, urlparse
from pyarrow import fs
def get_uri_scheme(uri: str) -> str:
"""
Get the scheme of a URI. If the URI does not have a scheme, assume it is a file URI.
Parameters
----------
uri : str
The URI to parse.
Returns
-------
str: The scheme of the URI.
"""
parsed = urlparse(uri)
scheme = parsed.scheme
if not scheme:
scheme = "file"
elif scheme in ["s3a", "s3n"]:
scheme = "s3"
elif len(scheme) == 1:
# Windows drive names are parsed as the scheme
# e.g. "c:\path" -> ParseResult(scheme="c", netloc="", path="/path", ...)
# So we add special handling here for schemes that are a single character
scheme = "file"
return scheme

View File

@@ -1,10 +1,10 @@
[project]
name = "lancedb"
version = "0.1.1"
dependencies = ["pylance>=0.4.4", "ratelimiter", "retry", "tqdm"]
version = "0.1.2"
dependencies = ["pylance>=0.4.6", "ratelimiter", "retry", "tqdm"]
description = "lancedb"
authors = [
{ name = "Lance Devs", email = "dev@eto.ai" },
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
]
license = { file = "LICENSE" }
readme = "README.md"

View File

@@ -11,10 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import lancedb
import pandas as pd
import pytest
import lancedb
def test_basic(tmp_path):
db = lancedb.connect(tmp_path)

View File

@@ -12,15 +12,14 @@
# limitations under the License.
import lance
from lancedb.query import LanceQueryBuilder
import numpy as np
import pandas as pd
import pandas.testing as tm
import pyarrow as pa
import pytest
from lancedb.query import LanceQueryBuilder
class MockTable:
def __init__(self, tmp_path):

30
python/tests/test_util.py Normal file
View File

@@ -0,0 +1,30 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from lancedb.util import get_uri_scheme
def test_normalize_uri():
uris = [
"relative/path",
"/absolute/path",
"file:///absolute/path",
"s3://bucket/path",
"gs://bucket/path",
"c:\\windows\\path",
]
schemes = ["file", "file", "file", "s3", "gs", "file"]
for uri, expected_scheme in zip(uris, schemes):
parsed_scheme = get_uri_scheme(uri)
assert parsed_scheme == expected_scheme

12
rust/Cargo.toml Normal file
View File

@@ -0,0 +1,12 @@
[package]
name = "vectordb"
version = "0.0.1"
edition = "2021"
description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0"
repository = "https://github.com/lancedb/lancedb"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
lance = "0.4.3"

14
rust/src/lib.rs Normal file
View File

@@ -0,0 +1,14 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}