diff --git a/.github/workflows/build_linux_wheel/action.yml b/.github/workflows/build_linux_wheel/action.yml new file mode 100644 index 00000000..8c3e02f3 --- /dev/null +++ b/.github/workflows/build_linux_wheel/action.yml @@ -0,0 +1,58 @@ +# We create a composite action to be re-used both for testing and for releasing +name: build-linux-wheel +description: "Build a manylinux wheel for lance" +inputs: + python-minor-version: + description: "8, 9, 10, 11, 12" + required: true + args: + description: "--release" + required: false + default: "" + arm-build: + description: "Build for arm64 instead of x86_64" + # Note: this does *not* mean the host is arm64, since we might be cross-compiling. + required: false + default: "false" +runs: + using: "composite" + steps: + - name: CONFIRM ARM BUILD + shell: bash + run: | + echo "ARM BUILD: ${{ inputs.arm-build }}" + - name: Build x86_64 Manylinux wheel + if: ${{ inputs.arm-build == 'false' }} + uses: PyO3/maturin-action@v1 + with: + command: build + working-directory: python + target: x86_64-unknown-linux-gnu + manylinux: "2_17" + args: ${{ inputs.args }} + before-script-linux: | + set -e + yum install -y openssl-devel \ + && curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$(uname -m).zip > /tmp/protoc.zip \ + && unzip /tmp/protoc.zip -d /usr/local \ + && rm /tmp/protoc.zip + - name: Build Arm Manylinux Wheel + if: ${{ inputs.arm-build == 'true' }} + uses: PyO3/maturin-action@v1 + with: + command: build + working-directory: python + target: aarch64-unknown-linux-gnu + manylinux: "2_24" + args: ${{ inputs.args }} + before-script-linux: | + set -e + apt install -y unzip + if [ $(uname -m) = "x86_64" ]; then + PROTOC_ARCH="x86_64" + else + PROTOC_ARCH="aarch_64" + fi + curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$PROTOC_ARCH.zip > /tmp/protoc.zip \ + && unzip /tmp/protoc.zip -d /usr/local \ + && rm /tmp/protoc.zip diff --git a/.github/workflows/build_mac_wheel/action.yml b/.github/workflows/build_mac_wheel/action.yml new file mode 100644 index 00000000..1932262d --- /dev/null +++ b/.github/workflows/build_mac_wheel/action.yml @@ -0,0 +1,25 @@ +# We create a composite action to be re-used both for testing and for releasing +name: build_wheel +description: "Build a lance wheel" +inputs: + python-minor-version: + description: "8, 9, 10, 11" + required: true + args: + description: "--release" + required: false + default: "" +runs: + using: "composite" + steps: + - name: Install macos dependency + shell: bash + run: | + brew install protobuf + - name: Build wheel + uses: PyO3/maturin-action@v1 + with: + command: build + args: ${{ inputs.args }} + working-directory: python + interpreter: 3.${{ inputs.python-minor-version }} diff --git a/.github/workflows/build_windows_wheel/action.yml b/.github/workflows/build_windows_wheel/action.yml new file mode 100644 index 00000000..756334f8 --- /dev/null +++ b/.github/workflows/build_windows_wheel/action.yml @@ -0,0 +1,33 @@ +# We create a composite action to be re-used both for testing and for releasing +name: build_wheel +description: "Build a lance wheel" +inputs: + python-minor-version: + description: "8, 9, 10, 11" + required: true + args: + description: "--release" + required: false + default: "" +runs: + using: "composite" + steps: + - name: Install Protoc v21.12 + working-directory: C:\ + run: | + New-Item -Path 'C:\protoc' -ItemType Directory + Set-Location C:\protoc + Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip + 7z x protoc.zip + Add-Content $env:GITHUB_PATH "C:\protoc\bin" + shell: powershell + - name: Build wheel + uses: PyO3/maturin-action@v1 + with: + command: build + args: ${{ inputs.args }} + working-directory: python + - uses: actions/upload-artifact@v3 + with: + name: windows-wheels + path: python\target\wheels diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 27833506..66605cc9 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -2,30 +2,91 @@ name: PyPI Publish on: release: - types: [ published ] + types: [published] jobs: - publish: - runs-on: ubuntu-latest - # Only runs on tags that matches the python-make-release action - if: startsWith(github.ref, 'refs/tags/python-v') - defaults: - run: - shell: bash - working-directory: python + linux: + timeout-minutes: 60 + strategy: + matrix: + python-minor-version: ["8"] + platform: + - x86_64 + - aarch64 + runs-on: "ubuntu-22.04" steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v4 with: - python-version: "3.8" - - name: Build distribution - run: | - ls -la - pip install wheel setuptools --upgrade - python setup.py sdist bdist_wheel - - name: Publish - uses: pypa/gh-action-pypi-publish@v1.8.5 + python-version: 3.${{ matrix.python-minor-version }} + - uses: ./.github/workflows/build_linux_wheel with: - password: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} - packages-dir: python/dist + python-minor-version: ${{ matrix.python-minor-version }} + args: "--release --strip" + arm-build: ${{ matrix.platform == 'aarch64' }} + - uses: ./.github/workflows/upload_wheel + with: + token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} + repo: "pypi" + mac: + timeout-minutes: 60 + runs-on: ${{ matrix.config.runner }} + strategy: + matrix: + python-minor-version: ["8"] + config: + - target: x86_64-apple-darwin + runner: macos-13 + - target: aarch64-apple-darwin + runner: macos-14 + env: + MACOSX_DEPLOYMENT_TARGET: 10.15 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref }} + fetch-depth: 0 + lfs: true + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.12 + - uses: ./.github/workflows/build_mac_wheel + with: + python-minor-version: ${{ matrix.python-minor-version }} + args: "--release --strip --target ${{ matrix.config.target }}" + - uses: ./.github/workflows/upload_wheel + with: + python-minor-version: ${{ matrix.python-minor-version }} + token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} + repo: "pypi" + windows: + timeout-minutes: 60 + runs-on: windows-latest + strategy: + matrix: + python-minor-version: ["8"] + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref }} + fetch-depth: 0 + lfs: true + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.${{ matrix.python-minor-version }} + - uses: ./.github/workflows/build_windows_wheel + with: + python-minor-version: ${{ matrix.python-minor-version }} + args: "--release --strip" + vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }} + - uses: ./.github/workflows/upload_wheel + with: + python-minor-version: ${{ matrix.python-minor-version }} + token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} + repo: "pypi" diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 846b8497..d80cb948 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -14,49 +14,133 @@ concurrency: cancel-in-progress: true jobs: - linux: + lint: + name: "Lint" timeout-minutes: 30 - strategy: - matrix: - python-minor-version: [ "8", "11" ] runs-on: "ubuntu-22.04" defaults: run: shell: bash working-directory: python steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - lfs: true - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.${{ matrix.python-minor-version }} - - name: Install lancedb - run: | - pip install -e .[tests] - pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 - pip install pytest pytest-mock ruff - - name: Format check - run: ruff format --check . - - name: Lint - run: ruff . - - name: Run tests - run: pytest -m "not slow" -x -v --durations=30 tests - - name: doctest - run: pytest --doctest-modules lancedb + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install ruff + run: | + pip install ruff + - name: Format check + run: ruff format --check . + - name: Lint + run: ruff . + doctest: + name: "Doctest" + timeout-minutes: 30 + runs-on: "ubuntu-22.04" + defaults: + run: + shell: bash + working-directory: python + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + - name: Install protobuf + run: | + sudo apt update + sudo apt install -y protobuf-compiler + - uses: Swatinem/rust-cache@v2 + with: + workspaces: python + - name: Install + run: | + pip install -e .[tests,dev,embeddings] + pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 + pip install mlx + - name: Doctest + run: pytest --doctest-modules python/lancedb + linux: + name: "Linux: python-3.${{ matrix.python-minor-version }}" + timeout-minutes: 30 + strategy: + matrix: + python-minor-version: ["8", "11"] + runs-on: "ubuntu-22.04" + defaults: + run: + shell: bash + working-directory: python + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + - name: Install protobuf + run: | + sudo apt update + sudo apt install -y protobuf-compiler + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.${{ matrix.python-minor-version }} + - uses: Swatinem/rust-cache@v2 + with: + workspaces: python + - uses: ./.github/workflows/build_linux_wheel + - uses: ./.github/workflows/run_tests + # Make sure wheels are not included in the Rust cache + - name: Delete wheels + run: rm -rf target/wheels platform: - name: "Platform: ${{ matrix.config.name }}" + name: "Mac: ${{ matrix.config.name }}" timeout-minutes: 30 strategy: matrix: config: - - name: x86 Mac + - name: x86 runner: macos-13 - - name: Arm Mac + - name: Arm runner: macos-14 - - name: x86 Windows + runs-on: "${{ matrix.config.runner }}" + defaults: + run: + shell: bash + working-directory: python + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - uses: Swatinem/rust-cache@v2 + with: + workspaces: python + - uses: ./.github/workflows/build_mac_wheel + - uses: ./.github/workflows/run_tests + # Make sure wheels are not included in the Rust cache + - name: Delete wheels + run: rm -rf target/wheels + windows: + name: "Windows: ${{ matrix.config.name }}" + timeout-minutes: 30 + strategy: + matrix: + config: + - name: x86 runner: windows-latest runs-on: "${{ matrix.config.runner }}" defaults: @@ -64,21 +148,22 @@ jobs: shell: bash working-directory: python steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - lfs: true - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install lancedb - run: | - pip install -e .[tests] - pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 - pip install pytest pytest-mock - - name: Run tests - run: pytest -m "not slow" -x -v --durations=30 tests + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - uses: Swatinem/rust-cache@v2 + with: + workspaces: python + - uses: ./.github/workflows/build_windows_wheel + - uses: ./.github/workflows/run_tests + # Make sure wheels are not included in the Rust cache + - name: Delete wheels + run: rm -rf target/wheels pydantic1x: timeout-minutes: 30 runs-on: "ubuntu-22.04" @@ -87,21 +172,22 @@ jobs: shell: bash working-directory: python steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - lfs: true - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.9 - - name: Install lancedb - run: | - pip install "pydantic<2" - pip install -e .[tests] - pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 - pip install pytest pytest-mock - - name: Run tests - run: pytest -m "not slow" -x -v --durations=30 tests - - name: doctest - run: pytest --doctest-modules lancedb + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y protobuf-compiler + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Install lancedb + run: | + pip install "pydantic<2" + pip install -e .[tests] + pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 + - name: Run tests + run: pytest -m "not slow" -x -v --durations=30 python/tests diff --git a/.github/workflows/run_tests/action.yml b/.github/workflows/run_tests/action.yml new file mode 100644 index 00000000..9fd65c70 --- /dev/null +++ b/.github/workflows/run_tests/action.yml @@ -0,0 +1,17 @@ +name: run-tests + +description: "Install lance wheel and run unit tests" +inputs: + python-minor-version: + required: true + description: "8 9 10 11 12" +runs: + using: "composite" + steps: + - name: Install lancedb + shell: bash + run: | + pip3 install $(ls target/wheels/lancedb-*.whl)[tests,dev,embeddings] + - name: pytest + shell: bash + run: pytest -m "not slow" -x -v --durations=30 python/python/tests diff --git a/.github/workflows/upload_wheel/action.yml b/.github/workflows/upload_wheel/action.yml new file mode 100644 index 00000000..8494e01e --- /dev/null +++ b/.github/workflows/upload_wheel/action.yml @@ -0,0 +1,29 @@ +name: upload-wheel + +description: "Upload wheels to Pypi" +inputs: + os: + required: true + description: "ubuntu-22.04 or macos-13" + repo: + required: false + description: "pypi or testpypi" + default: "pypi" + token: + required: true + description: "release token for the repo" + +runs: + using: "composite" + steps: + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + pip install twine + - name: Publish wheel + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ inputs.token }} + shell: bash + run: twine upload --repository ${{ inputs.repo }} target/wheels/lancedb-*.whl diff --git a/.gitignore b/.gitignore index c065383d..607466e6 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,11 @@ python/dist **/.hypothesis +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + ## Javascript *.node **/node_modules diff --git a/Cargo.toml b/Cargo.toml index d7a8d686..8671d22d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["rust/ffi/node", "rust/lancedb", "nodejs"] +members = ["rust/ffi/node", "rust/lancedb", "nodejs", "python"] # Python package needs to be built by maturin. exclude = ["python"] resolver = "2" diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 00000000..e6b83206 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "lancedb-python" +version = "0.4.10" +edition.workspace = true +description = "Python bindings for LanceDB" +license.workspace = true +repository.workspace = true +keywords.workspace = true +categories.workspace = true + + +[lib] +name = "_lancedb" +crate-type = ["cdylib"] + +[dependencies] +lancedb = { path = "../rust/lancedb" } +env_logger = "0.10" +pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] } +pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] } + +# Prevent dynamic linking of lzma, which comes from datafusion +lzma-sys = { version = "*", features = ["static"] } + +[build-dependencies] +pyo3-build-config = { version = "0.20.3", features = ["extension-module", "abi3-py38"] } diff --git a/python/README.md b/python/README.md index 94e27d6a..cbcec70a 100644 --- a/python/README.md +++ b/python/README.md @@ -20,10 +20,10 @@ results = table.search([0.1, 0.3]).limit(20).to_list() print(results) ``` - ## Development -Create a virtual environment and activate it: +LanceDb is based on the rust crate `lancedb` and is built with maturin. In order to build with maturin +you will either need a conda environment or a virtual environment (venv). ```bash python -m venv venv @@ -33,7 +33,15 @@ python -m venv venv Install the necessary packages: ```bash -python -m pip install . +python -m pip install .[tests,dev] +``` + +To build the python package you can use maturin: + +```bash +# This will build the rust bindings and place them in the appropriate place +# in your venv or conda environment +matruin develop ``` To run the unit tests: @@ -45,7 +53,7 @@ pytest To run the doc tests: ```bash -pytest --doctest-modules lancedb +pytest --doctest-modules python/lancedb ``` To run linter and automatically fix all errors: @@ -61,31 +69,27 @@ If any packages are missing, install them with: pip install ``` - ___ For **Windows** users, there may be errors when installing packages, so these commands may be helpful: Activate the virtual environment: + ```bash . .\venv\Scripts\activate ``` You may need to run the installs separately: + ```bash pip install -e .[tests] pip install -e .[dev] ``` - `tantivy` requires `rust` to be installed, so install it with `conda`, as it doesn't support windows installation: + ```bash pip install wheel pip install cargo conda install rust pip install tantivy ``` - -To run the unit tests: -```bash -pytest -``` diff --git a/python/build.rs b/python/build.rs new file mode 100644 index 00000000..dace4a9b --- /dev/null +++ b/python/build.rs @@ -0,0 +1,3 @@ +fn main() { + pyo3_build_config::add_extension_module_link_args(); +} diff --git a/python/pyproject.toml b/python/pyproject.toml index d9433dcd..3cbe0a69 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ "attrs>=21.3.0", "semver>=3.0", "cachetools", - "overrides>=0.7" + "overrides>=0.7", ] description = "lancedb" authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }] @@ -23,7 +23,7 @@ keywords = [ "data-science", "machine-learning", "arrow", - "data-analytics" + "data-analytics", ] classifiers = [ "Development Status :: 3 - Alpha", @@ -45,16 +45,48 @@ classifiers = [ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] -tests = ["aiohttp", "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "duckdb", "pytz", "polars>=0.19"] +tests = [ + "aiohttp", + "pandas>=1.4", + "pytest", + "pytest-mock", + "pytest-asyncio", + "duckdb", + "pytz", + "polars>=0.19", +] dev = ["ruff", "pre-commit"] -docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "mkdocs-ultralytics-plugin==0.0.44"] +docs = [ + "mkdocs", + "mkdocs-jupyter", + "mkdocs-material", + "mkdocstrings[python]", + "mkdocs-ultralytics-plugin==0.0.44", +] clip = ["torch", "pillow", "open-clip"] -embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "huggingface_hub", - "InstructorEmbedding", "google.generativeai", "boto3>=1.28.57", "awscli>=1.29.57", "botocore>=1.31.57"] +embeddings = [ + "openai>=1.6.1", + "sentence-transformers", + "torch", + "pillow", + "open-clip-torch", + "cohere", + "huggingface_hub", + "InstructorEmbedding", + "google.generativeai", + "boto3>=1.28.57", + "awscli>=1.29.57", + "botocore>=1.31.57", +] + +[tool.maturin] +python-source = "python" +module-name = "lancedb._lancedb" [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +requires = ["maturin>=1.4"] +build-backend = "maturin" + [tool.ruff.lint] select = ["F", "E", "W", "I", "G", "TCH", "PERF"] @@ -64,5 +96,5 @@ addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py" markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", - "asyncio" + "asyncio", ] diff --git a/python/lancedb/__init__.py b/python/python/lancedb/__init__.py similarity index 58% rename from python/lancedb/__init__.py rename to python/python/lancedb/__init__.py index 91ca8f62..5464f654 100644 --- a/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -19,8 +19,9 @@ from typing import Optional, Union __version__ = importlib.metadata.version("lancedb") -from .common import URI -from .db import DBConnection, LanceDBConnection +from ._lancedb import connect as lancedb_connect +from .common import URI, sanitize_uri +from .db import AsyncConnection, AsyncLanceDBConnection, DBConnection, LanceDBConnection from .remote.db import RemoteDBConnection from .schema import vector # noqa: F401 @@ -100,3 +101,74 @@ def connect( uri, api_key, region, host_override, request_thread_pool=request_thread_pool ) return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval) + + +async def connect_async( + uri: URI, + *, + api_key: Optional[str] = None, + region: str = "us-east-1", + host_override: Optional[str] = None, + read_consistency_interval: Optional[timedelta] = None, + request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None, +) -> AsyncConnection: + """Connect to a LanceDB database. + + Parameters + ---------- + uri: str or Path + The uri of the database. + api_key: str, optional + If present, connect to LanceDB cloud. + Otherwise, connect to a database on file system or cloud storage. + Can be set via environment variable `LANCEDB_API_KEY`. + region: str, default "us-east-1" + The region to use for LanceDB Cloud. + host_override: str, optional + The override url for LanceDB Cloud. + read_consistency_interval: timedelta, default None + (For LanceDB OSS only) + The interval at which to check for updates to the table from other + processes. If None, then consistency is not checked. For performance + reasons, this is the default. For strong consistency, set this to + zero seconds. Then every read will check for updates from other + processes. As a compromise, you can set this to a non-zero timedelta + for eventual consistency. If more than that interval has passed since + the last check, then the table will be checked for updates. Note: this + consistency only applies to read operations. Write operations are + always consistent. + request_thread_pool: int or ThreadPoolExecutor, optional + The thread pool to use for making batch requests to the LanceDB Cloud API. + If an integer, then a ThreadPoolExecutor will be created with that + number of threads. If None, then a ThreadPoolExecutor will be created + with the default number of threads. If a ThreadPoolExecutor, then that + executor will be used for making requests. This is for LanceDB Cloud + only and is only used when making batch requests (i.e., passing in + multiple queries to the search method at once). + + Examples + -------- + + For a local directory, provide a path for the database: + + >>> import lancedb + >>> db = lancedb.connect("~/.lancedb") + + For object storage, use a URI prefix: + + >>> db = lancedb.connect("s3://my-bucket/lancedb") + + Connect to LancdDB cloud: + + >>> db = lancedb.connect("db://my_database", api_key="ldb_...") + + Returns + ------- + conn : DBConnection + A connection to a LanceDB database. + """ + return AsyncLanceDBConnection( + await lancedb_connect( + sanitize_uri(uri), api_key, region, host_override, read_consistency_interval + ) + ) diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi new file mode 100644 index 00000000..308220d5 --- /dev/null +++ b/python/python/lancedb/_lancedb.pyi @@ -0,0 +1,12 @@ +from typing import Optional + +class Connection(object): + async def table_names(self) -> list[str]: ... + +async def connect( + uri: str, + api_key: Optional[str], + region: Optional[str], + host_override: Optional[str], + read_consistency_interval: Optional[float], +) -> Connection: ... diff --git a/python/lancedb/common.py b/python/python/lancedb/common.py similarity index 95% rename from python/lancedb/common.py rename to python/python/lancedb/common.py index 54c7c9e0..ff3b3636 100644 --- a/python/lancedb/common.py +++ b/python/python/lancedb/common.py @@ -34,3 +34,7 @@ class Credential(str): def __str__(self) -> str: return "********" + + +def sanitize_uri(uri: URI) -> str: + return str(uri) diff --git a/python/lancedb/conftest.py b/python/python/lancedb/conftest.py similarity index 100% rename from python/lancedb/conftest.py rename to python/python/lancedb/conftest.py diff --git a/python/lancedb/context.py b/python/python/lancedb/context.py similarity index 100% rename from python/lancedb/context.py rename to python/python/lancedb/context.py diff --git a/python/lancedb/db.py b/python/python/lancedb/db.py similarity index 61% rename from python/lancedb/db.py rename to python/python/lancedb/db.py index 41b56494..a7dd4f4d 100644 --- a/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -28,6 +28,7 @@ from .util import fs_from_uri, get_uri_location, get_uri_scheme, join_uri if TYPE_CHECKING: from datetime import timedelta + from ._lancedb import Connection as LanceDbConnection from .common import DATA, URI from .embeddings import EmbeddingFunctionConfig from .pydantic import LanceModel @@ -40,14 +41,21 @@ class DBConnection(EnforceOverrides): def table_names( self, page_token: Optional[str] = None, limit: int = 10 ) -> Iterable[str]: - """List all table in this database + """List all tables in this database, in sorted order Parameters ---------- page_token: str, optional The token to use for pagination. If not present, start from the beginning. + Typically, this token is last table name from the previous page. + Only supported by LanceDb Cloud. limit: int, default 10 The size of the page to return. + Only supported by LanceDb Cloud. + + Returns + ------- + Iterable of str """ pass @@ -412,3 +420,254 @@ class LanceDBConnection(DBConnection): def drop_database(self): filesystem, path = fs_from_uri(self.uri) filesystem.delete_dir(path) + + +class AsyncConnection(EnforceOverrides): + """An active LanceDB connection interface.""" + + @abstractmethod + async def table_names( + self, *, page_token: Optional[str] = None, limit: int = 10 + ) -> Iterable[str]: + """List all tables in this database, in sorted order + + Parameters + ---------- + page_token: str, optional + The token to use for pagination. If not present, start from the beginning. + Typically, this token is last table name from the previous page. + Only supported by LanceDb Cloud. + limit: int, default 10 + The size of the page to return. + Only supported by LanceDb Cloud. + + Returns + ------- + Iterable of str + """ + pass + + @abstractmethod + async def create_table( + self, + name: str, + data: Optional[DATA] = None, + schema: Optional[Union[pa.Schema, LanceModel]] = None, + mode: str = "create", + exist_ok: bool = False, + on_bad_vectors: str = "error", + fill_value: float = 0.0, + embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None, + ) -> Table: + """Create a [Table][lancedb.table.Table] in the database. + + Parameters + ---------- + name: str + The name of the table. + data: The data to initialize the table, *optional* + User must provide at least one of `data` or `schema`. + Acceptable types are: + + - dict or list-of-dict + + - pandas.DataFrame + + - pyarrow.Table or pyarrow.RecordBatch + schema: The schema of the table, *optional* + Acceptable types are: + + - pyarrow.Schema + + - [LanceModel][lancedb.pydantic.LanceModel] + mode: str; default "create" + The mode to use when creating the table. + Can be either "create" or "overwrite". + By default, if the table already exists, an exception is raised. + If you want to overwrite the table, use mode="overwrite". + exist_ok: bool, default False + If a table by the same name already exists, then raise an exception + if exist_ok=False. If exist_ok=True, then open the existing table; + it will not add the provided data but will validate against any + schema that's specified. + on_bad_vectors: str, default "error" + What to do if any of the vectors are not the same size or contains NaNs. + One of "error", "drop", "fill". + fill_value: float + The value to use when filling vectors. Only used if on_bad_vectors="fill". + + Returns + ------- + LanceTable + A reference to the newly created table. + + !!! note + + The vector index won't be created by default. + To create the index, call the `create_index` method on the table. + + Examples + -------- + + Can create with list of tuples or dictionaries: + + >>> import lancedb + >>> db = lancedb.connect("./.lancedb") + >>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7}, + ... {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1}] + >>> db.create_table("my_table", data) + LanceTable(connection=..., name="my_table") + >>> db["my_table"].head() + pyarrow.Table + vector: fixed_size_list[2] + child 0, item: float + lat: double + long: double + ---- + vector: [[[1.1,1.2],[0.2,1.8]]] + lat: [[45.5,40.1]] + long: [[-122.7,-74.1]] + + You can also pass a pandas DataFrame: + + >>> import pandas as pd + >>> data = pd.DataFrame({ + ... "vector": [[1.1, 1.2], [0.2, 1.8]], + ... "lat": [45.5, 40.1], + ... "long": [-122.7, -74.1] + ... }) + >>> db.create_table("table2", data) + LanceTable(connection=..., name="table2") + >>> db["table2"].head() + pyarrow.Table + vector: fixed_size_list[2] + child 0, item: float + lat: double + long: double + ---- + vector: [[[1.1,1.2],[0.2,1.8]]] + lat: [[45.5,40.1]] + long: [[-122.7,-74.1]] + + Data is converted to Arrow before being written to disk. For maximum + control over how data is saved, either provide the PyArrow schema to + convert to or else provide a [PyArrow Table](pyarrow.Table) directly. + + >>> custom_schema = pa.schema([ + ... pa.field("vector", pa.list_(pa.float32(), 2)), + ... pa.field("lat", pa.float32()), + ... pa.field("long", pa.float32()) + ... ]) + >>> db.create_table("table3", data, schema = custom_schema) + LanceTable(connection=..., name="table3") + >>> db["table3"].head() + pyarrow.Table + vector: fixed_size_list[2] + child 0, item: float + lat: float + long: float + ---- + vector: [[[1.1,1.2],[0.2,1.8]]] + lat: [[45.5,40.1]] + long: [[-122.7,-74.1]] + + + It is also possible to create an table from `[Iterable[pa.RecordBatch]]`: + + + >>> import pyarrow as pa + >>> def make_batches(): + ... for i in range(5): + ... yield pa.RecordBatch.from_arrays( + ... [ + ... pa.array([[3.1, 4.1], [5.9, 26.5]], + ... pa.list_(pa.float32(), 2)), + ... pa.array(["foo", "bar"]), + ... pa.array([10.0, 20.0]), + ... ], + ... ["vector", "item", "price"], + ... ) + >>> schema=pa.schema([ + ... pa.field("vector", pa.list_(pa.float32(), 2)), + ... pa.field("item", pa.utf8()), + ... pa.field("price", pa.float32()), + ... ]) + >>> db.create_table("table4", make_batches(), schema=schema) + LanceTable(connection=..., name="table4") + + """ + raise NotImplementedError + + async def open_table(self, name: str) -> Table: + """Open a Lance Table in the database. + + Parameters + ---------- + name: str + The name of the table. + + Returns + ------- + A LanceTable object representing the table. + """ + raise NotImplementedError + + async def drop_table(self, name: str): + """Drop a table from the database. + + Parameters + ---------- + name: str + The name of the table. + """ + raise NotImplementedError + + async def drop_database(self): + """ + Drop database + This is the same thing as dropping all the tables + """ + raise NotImplementedError + + +class AsyncLanceDBConnection(AsyncConnection): + def __init__(self, connection: LanceDbConnection): + self._inner = connection + + async def __repr__(self) -> str: + pass + + @override + async def table_names( + self, + *, + page_token=None, + limit=None, + ) -> Iterable[str]: + return await self._inner.table_names() + + @override + async def create_table( + self, + name: str, + data: Optional[DATA] = None, + schema: Optional[Union[pa.Schema, LanceModel]] = None, + mode: str = "create", + exist_ok: bool = False, + on_bad_vectors: str = "error", + fill_value: float = 0.0, + embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None, + ) -> LanceTable: + raise NotImplementedError + + @override + async def open_table(self, name: str) -> LanceTable: + raise NotImplementedError + + @override + async def drop_table(self, name: str, ignore_missing: bool = False): + raise NotImplementedError + + @override + async def drop_database(self): + raise NotImplementedError diff --git a/python/lancedb/embeddings/__init__.py b/python/python/lancedb/embeddings/__init__.py similarity index 100% rename from python/lancedb/embeddings/__init__.py rename to python/python/lancedb/embeddings/__init__.py diff --git a/python/lancedb/embeddings/base.py b/python/python/lancedb/embeddings/base.py similarity index 100% rename from python/lancedb/embeddings/base.py rename to python/python/lancedb/embeddings/base.py diff --git a/python/lancedb/embeddings/bedrock.py b/python/python/lancedb/embeddings/bedrock.py similarity index 100% rename from python/lancedb/embeddings/bedrock.py rename to python/python/lancedb/embeddings/bedrock.py diff --git a/python/lancedb/embeddings/cohere.py b/python/python/lancedb/embeddings/cohere.py similarity index 100% rename from python/lancedb/embeddings/cohere.py rename to python/python/lancedb/embeddings/cohere.py diff --git a/python/lancedb/embeddings/gemini_text.py b/python/python/lancedb/embeddings/gemini_text.py similarity index 100% rename from python/lancedb/embeddings/gemini_text.py rename to python/python/lancedb/embeddings/gemini_text.py diff --git a/python/lancedb/embeddings/gte.py b/python/python/lancedb/embeddings/gte.py similarity index 100% rename from python/lancedb/embeddings/gte.py rename to python/python/lancedb/embeddings/gte.py diff --git a/python/lancedb/embeddings/gte_mlx_model.py b/python/python/lancedb/embeddings/gte_mlx_model.py similarity index 100% rename from python/lancedb/embeddings/gte_mlx_model.py rename to python/python/lancedb/embeddings/gte_mlx_model.py diff --git a/python/lancedb/embeddings/imagebind.py b/python/python/lancedb/embeddings/imagebind.py similarity index 100% rename from python/lancedb/embeddings/imagebind.py rename to python/python/lancedb/embeddings/imagebind.py diff --git a/python/lancedb/embeddings/instructor.py b/python/python/lancedb/embeddings/instructor.py similarity index 100% rename from python/lancedb/embeddings/instructor.py rename to python/python/lancedb/embeddings/instructor.py diff --git a/python/lancedb/embeddings/open_clip.py b/python/python/lancedb/embeddings/open_clip.py similarity index 100% rename from python/lancedb/embeddings/open_clip.py rename to python/python/lancedb/embeddings/open_clip.py diff --git a/python/lancedb/embeddings/openai.py b/python/python/lancedb/embeddings/openai.py similarity index 100% rename from python/lancedb/embeddings/openai.py rename to python/python/lancedb/embeddings/openai.py diff --git a/python/lancedb/embeddings/registry.py b/python/python/lancedb/embeddings/registry.py similarity index 100% rename from python/lancedb/embeddings/registry.py rename to python/python/lancedb/embeddings/registry.py diff --git a/python/lancedb/embeddings/sentence_transformers.py b/python/python/lancedb/embeddings/sentence_transformers.py similarity index 100% rename from python/lancedb/embeddings/sentence_transformers.py rename to python/python/lancedb/embeddings/sentence_transformers.py diff --git a/python/lancedb/embeddings/utils.py b/python/python/lancedb/embeddings/utils.py similarity index 100% rename from python/lancedb/embeddings/utils.py rename to python/python/lancedb/embeddings/utils.py diff --git a/python/lancedb/exceptions.py b/python/python/lancedb/exceptions.py similarity index 100% rename from python/lancedb/exceptions.py rename to python/python/lancedb/exceptions.py diff --git a/python/lancedb/fts.py b/python/python/lancedb/fts.py similarity index 100% rename from python/lancedb/fts.py rename to python/python/lancedb/fts.py diff --git a/python/lancedb/merge.py b/python/python/lancedb/merge.py similarity index 100% rename from python/lancedb/merge.py rename to python/python/lancedb/merge.py diff --git a/python/lancedb/pydantic.py b/python/python/lancedb/pydantic.py similarity index 100% rename from python/lancedb/pydantic.py rename to python/python/lancedb/pydantic.py diff --git a/python/lancedb/query.py b/python/python/lancedb/query.py similarity index 100% rename from python/lancedb/query.py rename to python/python/lancedb/query.py diff --git a/python/lancedb/remote/__init__.py b/python/python/lancedb/remote/__init__.py similarity index 100% rename from python/lancedb/remote/__init__.py rename to python/python/lancedb/remote/__init__.py diff --git a/python/lancedb/remote/arrow.py b/python/python/lancedb/remote/arrow.py similarity index 100% rename from python/lancedb/remote/arrow.py rename to python/python/lancedb/remote/arrow.py diff --git a/python/lancedb/remote/client.py b/python/python/lancedb/remote/client.py similarity index 100% rename from python/lancedb/remote/client.py rename to python/python/lancedb/remote/client.py diff --git a/python/lancedb/remote/connection_timeout.py b/python/python/lancedb/remote/connection_timeout.py similarity index 100% rename from python/lancedb/remote/connection_timeout.py rename to python/python/lancedb/remote/connection_timeout.py diff --git a/python/lancedb/remote/db.py b/python/python/lancedb/remote/db.py similarity index 100% rename from python/lancedb/remote/db.py rename to python/python/lancedb/remote/db.py diff --git a/python/lancedb/remote/errors.py b/python/python/lancedb/remote/errors.py similarity index 100% rename from python/lancedb/remote/errors.py rename to python/python/lancedb/remote/errors.py diff --git a/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py similarity index 100% rename from python/lancedb/remote/table.py rename to python/python/lancedb/remote/table.py diff --git a/python/lancedb/rerankers/__init__.py b/python/python/lancedb/rerankers/__init__.py similarity index 100% rename from python/lancedb/rerankers/__init__.py rename to python/python/lancedb/rerankers/__init__.py diff --git a/python/lancedb/rerankers/base.py b/python/python/lancedb/rerankers/base.py similarity index 100% rename from python/lancedb/rerankers/base.py rename to python/python/lancedb/rerankers/base.py diff --git a/python/lancedb/rerankers/cohere.py b/python/python/lancedb/rerankers/cohere.py similarity index 100% rename from python/lancedb/rerankers/cohere.py rename to python/python/lancedb/rerankers/cohere.py diff --git a/python/lancedb/rerankers/colbert.py b/python/python/lancedb/rerankers/colbert.py similarity index 100% rename from python/lancedb/rerankers/colbert.py rename to python/python/lancedb/rerankers/colbert.py diff --git a/python/lancedb/rerankers/cross_encoder.py b/python/python/lancedb/rerankers/cross_encoder.py similarity index 100% rename from python/lancedb/rerankers/cross_encoder.py rename to python/python/lancedb/rerankers/cross_encoder.py diff --git a/python/lancedb/rerankers/linear_combination.py b/python/python/lancedb/rerankers/linear_combination.py similarity index 100% rename from python/lancedb/rerankers/linear_combination.py rename to python/python/lancedb/rerankers/linear_combination.py diff --git a/python/lancedb/rerankers/openai.py b/python/python/lancedb/rerankers/openai.py similarity index 100% rename from python/lancedb/rerankers/openai.py rename to python/python/lancedb/rerankers/openai.py diff --git a/python/lancedb/schema.py b/python/python/lancedb/schema.py similarity index 100% rename from python/lancedb/schema.py rename to python/python/lancedb/schema.py diff --git a/python/lancedb/table.py b/python/python/lancedb/table.py similarity index 100% rename from python/lancedb/table.py rename to python/python/lancedb/table.py diff --git a/python/lancedb/util.py b/python/python/lancedb/util.py similarity index 100% rename from python/lancedb/util.py rename to python/python/lancedb/util.py diff --git a/python/tests/test_context.py b/python/python/tests/test_context.py similarity index 99% rename from python/tests/test_context.py rename to python/python/tests/test_context.py index 75adb348..bcd1d63c 100644 --- a/python/tests/test_context.py +++ b/python/python/tests/test_context.py @@ -13,7 +13,6 @@ import pandas as pd import pytest - from lancedb.context import contextualize diff --git a/python/tests/test_db.py b/python/python/tests/test_db.py similarity index 95% rename from python/tests/test_db.py rename to python/python/tests/test_db.py index 7b716f22..6f2fd1b4 100644 --- a/python/tests/test_db.py +++ b/python/python/tests/test_db.py @@ -11,12 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import lancedb import numpy as np import pandas as pd import pyarrow as pa import pytest - -import lancedb from lancedb.pydantic import LanceModel, Vector @@ -166,6 +165,24 @@ def test_table_names(tmp_path): assert db.table_names() == ["test1", "test2", "test3"] +@pytest.mark.asyncio +async def test_table_names_async(tmp_path): + db = lancedb.connect(tmp_path) + data = pd.DataFrame( + { + "vector": [[3.1, 4.1], [5.9, 26.5]], + "item": ["foo", "bar"], + "price": [10.0, 20.0], + } + ) + db.create_table("test2", data=data) + db.create_table("test1", data=data) + db.create_table("test3", data=data) + + db = await lancedb.connect_async(tmp_path) + assert await db.table_names() == ["test1", "test2", "test3"] + + def test_create_mode(tmp_path): db = lancedb.connect(tmp_path) data = pd.DataFrame( diff --git a/python/tests/test_e2e_remote_db.py b/python/python/tests/test_e2e_remote_db.py similarity index 99% rename from python/tests/test_e2e_remote_db.py rename to python/python/tests/test_e2e_remote_db.py index e9e69c48..f058092e 100644 --- a/python/tests/test_e2e_remote_db.py +++ b/python/python/tests/test_e2e_remote_db.py @@ -13,7 +13,6 @@ import numpy as np import pytest - from lancedb import LanceDBConnection # TODO: setup integ test mark and script diff --git a/python/tests/test_embeddings.py b/python/python/tests/test_embeddings.py similarity index 99% rename from python/tests/test_embeddings.py rename to python/python/tests/test_embeddings.py index 32142a57..af442a16 100644 --- a/python/tests/test_embeddings.py +++ b/python/python/tests/test_embeddings.py @@ -13,11 +13,10 @@ import sys import lance +import lancedb import numpy as np import pyarrow as pa import pytest - -import lancedb from lancedb.conftest import MockTextEmbeddingFunction from lancedb.embeddings import ( EmbeddingFunctionConfig, diff --git a/python/tests/test_embeddings_slow.py b/python/python/tests/test_embeddings_slow.py similarity index 99% rename from python/tests/test_embeddings_slow.py rename to python/python/tests/test_embeddings_slow.py index 11b024f0..9dc85bfc 100644 --- a/python/tests/test_embeddings_slow.py +++ b/python/python/tests/test_embeddings_slow.py @@ -14,12 +14,11 @@ import importlib import io import os +import lancedb import numpy as np import pandas as pd import pytest import requests - -import lancedb from lancedb.embeddings import get_registry from lancedb.pydantic import LanceModel, Vector @@ -185,10 +184,9 @@ def test_imagebind(tmp_path): import shutil import tempfile + import lancedb.embeddings.imagebind import pandas as pd import requests - - import lancedb.embeddings.imagebind from lancedb.embeddings import get_registry from lancedb.pydantic import LanceModel, Vector diff --git a/python/tests/test_fts.py b/python/python/tests/test_fts.py similarity index 98% rename from python/tests/test_fts.py rename to python/python/tests/test_fts.py index a62b1b2e..e884d605 100644 --- a/python/tests/test_fts.py +++ b/python/python/tests/test_fts.py @@ -14,13 +14,13 @@ import os import random from unittest import mock +import lancedb as ldb import numpy as np import pandas as pd import pytest -import tantivy -import lancedb as ldb -import lancedb.fts +pytest.importorskip("lancedb.fts") +tantivy = pytest.importorskip("tantivy") @pytest.fixture diff --git a/python/tests/test_io.py b/python/python/tests/test_io.py similarity index 99% rename from python/tests/test_io.py rename to python/python/tests/test_io.py index 0629e809..10b749b2 100644 --- a/python/tests/test_io.py +++ b/python/python/tests/test_io.py @@ -13,9 +13,8 @@ import os -import pytest - import lancedb +import pytest # You need to setup AWS credentials an a base path to run this test. Example # AWS_PROFILE=default TEST_S3_BASE_URL=s3://my_bucket/dataset pytest tests/test_io.py diff --git a/python/tests/test_pydantic.py b/python/python/tests/test_pydantic.py similarity index 99% rename from python/tests/test_pydantic.py rename to python/python/tests/test_pydantic.py index b37373ee..8f9d335c 100644 --- a/python/tests/test_pydantic.py +++ b/python/python/tests/test_pydantic.py @@ -20,9 +20,8 @@ from typing import List, Optional, Tuple import pyarrow as pa import pydantic import pytest -from pydantic import Field - from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema +from pydantic import Field @pytest.mark.skipif( diff --git a/python/tests/test_query.py b/python/python/tests/test_query.py similarity index 99% rename from python/tests/test_query.py rename to python/python/tests/test_query.py index cefea0c2..422f3a23 100644 --- a/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -18,7 +18,6 @@ import numpy as np import pandas.testing as tm import pyarrow as pa import pytest - from lancedb.db import LanceDBConnection from lancedb.pydantic import LanceModel, Vector from lancedb.query import LanceVectorQueryBuilder, Query diff --git a/python/tests/test_remote_client.py b/python/python/tests/test_remote_client.py similarity index 99% rename from python/tests/test_remote_client.py rename to python/python/tests/test_remote_client.py index 73ebf153..e9a2b19f 100644 --- a/python/tests/test_remote_client.py +++ b/python/python/tests/test_remote_client.py @@ -17,7 +17,6 @@ import pandas as pd import pyarrow as pa import pytest from aiohttp import web - from lancedb.remote.client import RestfulLanceDBClient, VectorQuery diff --git a/python/tests/test_remote_db.py b/python/python/tests/test_remote_db.py similarity index 99% rename from python/tests/test_remote_db.py rename to python/python/tests/test_remote_db.py index f4aff298..a775d5c7 100644 --- a/python/tests/test_remote_db.py +++ b/python/python/tests/test_remote_db.py @@ -11,9 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pyarrow as pa - import lancedb +import pyarrow as pa from lancedb.remote.client import VectorQuery, VectorQueryResult diff --git a/python/tests/test_rerankers.py b/python/python/tests/test_rerankers.py similarity index 99% rename from python/tests/test_rerankers.py rename to python/python/tests/test_rerankers.py index 5d28e412..a912c64b 100644 --- a/python/tests/test_rerankers.py +++ b/python/python/tests/test_rerankers.py @@ -1,9 +1,8 @@ import os +import lancedb import numpy as np import pytest - -import lancedb from lancedb.conftest import MockTextEmbeddingFunction # noqa from lancedb.embeddings import EmbeddingFunctionRegistry from lancedb.pydantic import LanceModel, Vector @@ -15,6 +14,9 @@ from lancedb.rerankers import ( ) from lancedb.table import LanceTable +# Tests rely on FTS index +pytest.importorskip("lancedb.fts") + def get_test_table(tmp_path): db = lancedb.connect(tmp_path) diff --git a/python/tests/test_table.py b/python/python/tests/test_table.py similarity index 99% rename from python/tests/test_table.py rename to python/python/tests/test_table.py index 6b2eac92..8b3029e0 100644 --- a/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -20,19 +20,18 @@ from typing import List from unittest.mock import PropertyMock, patch import lance +import lancedb import numpy as np import pandas as pd import polars as pl import pyarrow as pa import pytest -from pydantic import BaseModel - -import lancedb from lancedb.conftest import MockTextEmbeddingFunction from lancedb.db import LanceDBConnection from lancedb.embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry from lancedb.pydantic import LanceModel, Vector from lancedb.table import LanceTable +from pydantic import BaseModel class MockDB: @@ -804,6 +803,9 @@ def test_count_rows(db): def test_hybrid_search(db, tmp_path): + # This test uses an FTS index + pytest.importorskip("lancedb.fts") + db = MockDB(str(tmp_path)) # Create a LanceDB table schema with a vector and a text column emb = EmbeddingFunctionRegistry.get_instance().get("test")() diff --git a/python/tests/test_util.py b/python/python/tests/test_util.py similarity index 99% rename from python/tests/test_util.py rename to python/python/tests/test_util.py index 1bf3e693..fa7e75f0 100644 --- a/python/tests/test_util.py +++ b/python/python/tests/test_util.py @@ -15,7 +15,6 @@ import os import pathlib import pytest - from lancedb.util import get_uri_scheme, join_uri diff --git a/python/setup.py b/python/setup.py deleted file mode 100644 index bcabfa2f..00000000 --- a/python/setup.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2023 LanceDB Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import setuptools - -if __name__ == "__main__": - setuptools.setup() diff --git a/python/src/connection.rs b/python/src/connection.rs new file mode 100644 index 00000000..96d3c7cb --- /dev/null +++ b/python/src/connection.rs @@ -0,0 +1,66 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use lancedb::connection::Connection as LanceConnection; +use pyo3::{pyclass, pyfunction, pymethods, PyAny, PyRef, PyResult, Python}; +use pyo3_asyncio::tokio::future_into_py; + +use crate::error::PythonErrorExt; + +#[pyclass] +pub struct Connection { + inner: LanceConnection, +} + +#[pymethods] +impl Connection { + pub fn table_names(self_: PyRef<'_, Self>) -> PyResult<&PyAny> { + let inner = self_.inner.clone(); + future_into_py(self_.py(), async move { + inner.table_names().await.infer_error() + }) + } +} + +#[pyfunction] +pub fn connect( + py: Python, + uri: String, + api_key: Option, + region: Option, + host_override: Option, + read_consistency_interval: Option, +) -> PyResult<&PyAny> { + future_into_py(py, async move { + let mut builder = lancedb::connect(&uri); + if let Some(api_key) = api_key { + builder = builder.api_key(&api_key); + } + if let Some(region) = region { + builder = builder.region(®ion); + } + if let Some(host_override) = host_override { + builder = builder.host_override(&host_override); + } + if let Some(read_consistency_interval) = read_consistency_interval { + let read_consistency_interval = Duration::from_secs_f64(read_consistency_interval); + builder = builder.read_consistency_interval(read_consistency_interval); + } + Ok(Connection { + inner: builder.execute().await.infer_error()?, + }) + }) +} diff --git a/python/src/error.rs b/python/src/error.rs new file mode 100644 index 00000000..c65192ca --- /dev/null +++ b/python/src/error.rs @@ -0,0 +1,61 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use pyo3::{ + exceptions::{PyOSError, PyRuntimeError, PyValueError}, + PyResult, +}; + +use lancedb::error::Error as LanceError; + +pub trait PythonErrorExt { + /// Convert to a python error based on the Lance error type + fn infer_error(self) -> PyResult; + /// Convert to OSError + fn os_error(self) -> PyResult; + /// Convert to RuntimeError + fn runtime_error(self) -> PyResult; + /// Convert to ValueError + fn value_error(self) -> PyResult; +} + +impl PythonErrorExt for std::result::Result { + fn infer_error(self) -> PyResult { + match &self { + Ok(_) => Ok(self.unwrap()), + Err(err) => match err { + LanceError::InvalidTableName { .. } => self.value_error(), + LanceError::TableNotFound { .. } => self.value_error(), + LanceError::TableAlreadyExists { .. } => self.runtime_error(), + LanceError::CreateDir { .. } => self.os_error(), + LanceError::Store { .. } => self.runtime_error(), + LanceError::Lance { .. } => self.runtime_error(), + LanceError::Schema { .. } => self.value_error(), + LanceError::Runtime { .. } => self.runtime_error(), + }, + } + } + + fn os_error(self) -> PyResult { + self.map_err(|err| PyOSError::new_err(err.to_string())) + } + + fn runtime_error(self) -> PyResult { + self.map_err(|err| PyRuntimeError::new_err(err.to_string())) + } + + fn value_error(self) -> PyResult { + self.map_err(|err| PyValueError::new_err(err.to_string())) + } +} diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 00000000..2a66810d --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,32 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use connection::{connect, Connection}; +use env_logger::Env; +use pyo3::{pymodule, types::PyModule, wrap_pyfunction, PyResult, Python}; + +pub mod connection; +pub(crate) mod error; + +#[pymodule] +pub fn _lancedb(_py: Python, m: &PyModule) -> PyResult<()> { + let env = Env::new() + .filter_or("LANCEDB_LOG", "warn") + .write_style("LANCEDB_LOG_STYLE"); + env_logger::init_from_env(env); + m.add_class::()?; + m.add_function(wrap_pyfunction!(connect, m)?)?; + m.add("__version__", env!("CARGO_PKG_VERSION"))?; + Ok(()) +}