mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-15 00:02:59 +00:00
Merge pull request #8 from lancedb/changhiskhan/mkdocs
hello world mkdocs
This commit is contained in:
54
.github/workflows/docs.yml
vendored
Normal file
54
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
name: Deploy docs to Pages
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
# Allow one concurrent deployment
|
||||
concurrency:
|
||||
group: "pages"
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# Single deploy job since we're just deploying
|
||||
build:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: "pip"
|
||||
cache-dependency-path: "docs/requirements.txt"
|
||||
- name: Build Python
|
||||
working-directory: python
|
||||
run: |
|
||||
python -m pip install -e .
|
||||
python -m pip install -r ../docs/requirements.txt
|
||||
- name: Build docs
|
||||
working-directory: docs
|
||||
run: |
|
||||
mkdoc build
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v2
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v1
|
||||
with:
|
||||
path: "site"
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v1
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -4,4 +4,8 @@
|
||||
**/__pycache__
|
||||
|
||||
rust/target
|
||||
rust/Cargo.lock
|
||||
rust/Cargo.lock
|
||||
|
||||
site
|
||||
|
||||
.pytest_cache
|
||||
11
.pre-commit-config.yaml
Normal file
11
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.2.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.12.0
|
||||
hooks:
|
||||
- id: black
|
||||
@@ -17,3 +17,4 @@ LanceDB's core is written in Rust 🦀 and is built using Lance, an open-source
|
||||
## Documentation Quick Links
|
||||
|
||||
* `Quick start` - search and filter a hello world vector dataset with LanceDB using the Python SDK.
|
||||
* [`API Reference`](python.md) - detailed documentation for the LanceDB Python SDK.
|
||||
@@ -1,3 +0,0 @@
|
||||
site_name: LanceDB Documentation
|
||||
theme:
|
||||
name: material
|
||||
12
docs/python.md
Normal file
12
docs/python.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# LanceDB Python API Reference
|
||||
|
||||
## Installation
|
||||
|
||||
```shell
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
::: lancedb
|
||||
::: lancedb.db
|
||||
::: lancedb.table
|
||||
::: lancedb.query
|
||||
3
docs/requirements.txt
Normal file
3
docs/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
mkdocs==1.4.2
|
||||
mkdocs-material==9.1.3
|
||||
mkdocstrings[python]==0.20.0
|
||||
12
mkdocs.yml
Normal file
12
mkdocs.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
site_name: LanceDB Documentation
|
||||
|
||||
theme:
|
||||
name: "material"
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- mkdocstrings
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Python API: python.md
|
||||
@@ -53,8 +53,9 @@ class LanceDBConnection:
|
||||
def __getitem__(self, name: str) -> LanceTable:
|
||||
return self.open_table(name)
|
||||
|
||||
def create_table(self, name: str, data: DATA = None,
|
||||
schema: pa.Schema = None) -> LanceTable:
|
||||
def create_table(
|
||||
self, name: str, data: DATA = None, schema: pa.Schema = None
|
||||
) -> LanceTable:
|
||||
"""Create a table in the database.
|
||||
|
||||
Parameters
|
||||
|
||||
@@ -76,17 +76,12 @@ class LanceQueryBuilder:
|
||||
return self
|
||||
|
||||
def to_df(self) -> pd.DataFrame:
|
||||
"""Execute the query and return the results as a pandas DataFrame.
|
||||
"""
|
||||
"""Execute the query and return the results as a pandas DataFrame."""
|
||||
ds = self._table.to_lance()
|
||||
# TODO indexed search
|
||||
tbl = ds.to_table(
|
||||
columns=self._columns,
|
||||
filter=self._where,
|
||||
nearest={
|
||||
"column": VECTOR_COLUMN_NAME,
|
||||
"q": self._query,
|
||||
"k": self._limit
|
||||
}
|
||||
nearest={"column": VECTOR_COLUMN_NAME, "q": self._query, "k": self._limit},
|
||||
)
|
||||
return tbl.to_pandas()
|
||||
|
||||
@@ -131,8 +131,9 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
||||
return data
|
||||
# cast the columns to the expected types
|
||||
data = data.combine_chunks()
|
||||
return pa.Table.from_arrays([data[name] for name in schema.names],
|
||||
schema=schema)
|
||||
return pa.Table.from_arrays(
|
||||
[data[name] for name in schema.names], schema=schema
|
||||
)
|
||||
# just check the vector column
|
||||
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)
|
||||
|
||||
|
||||
@@ -37,7 +37,13 @@ repository = "https://github.com/eto-ai/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tests = [
|
||||
"pytest",
|
||||
"pytest"
|
||||
]
|
||||
dev = [
|
||||
"ruff", "pre-commit", "black"
|
||||
]
|
||||
docs = [
|
||||
"mkdocs", "mkdocs-material", "mkdocstrings[python]"
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
||||
@@ -20,9 +20,13 @@ def test_basic(tmp_path):
|
||||
assert db.uri == str(tmp_path)
|
||||
assert db.table_names() == []
|
||||
|
||||
table = db.create_table("test",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
|
||||
table = db.create_table(
|
||||
"test",
|
||||
data=[
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||
],
|
||||
)
|
||||
rs = table.search([100, 100]).limit(1).to_df()
|
||||
assert len(rs) == 1
|
||||
assert rs["item"].iloc[0] == "bar"
|
||||
|
||||
@@ -21,7 +21,6 @@ import pytest
|
||||
|
||||
|
||||
class MockTable:
|
||||
|
||||
def __init__(self, tmp_path):
|
||||
self.uri = tmp_path
|
||||
|
||||
@@ -31,16 +30,22 @@ class MockTable:
|
||||
|
||||
@pytest.fixture
|
||||
def table(tmp_path) -> MockTable:
|
||||
df = pd.DataFrame({
|
||||
"vector": [[1, 2], [3, 4]],
|
||||
"id": [1, 2],
|
||||
"str_field": ["a", "b"],
|
||||
"float_field": [1.0, 2.0]
|
||||
})
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("id", pa.int32()),
|
||||
pa.field("str_field", pa.string()),
|
||||
pa.field("float_field", pa.float64())])
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"vector": [[1, 2], [3, 4]],
|
||||
"id": [1, 2],
|
||||
"str_field": ["a", "b"],
|
||||
"float_field": [1.0, 2.0],
|
||||
}
|
||||
)
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("id", pa.int32()),
|
||||
pa.field("str_field", pa.string()),
|
||||
pa.field("float_field", pa.float64()),
|
||||
]
|
||||
)
|
||||
lance.write_dataset(df, tmp_path, schema)
|
||||
return MockTable(tmp_path)
|
||||
|
||||
@@ -55,5 +60,3 @@ def test_query_builder_with_filter(table):
|
||||
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
|
||||
assert df["id"].values[0] == 2
|
||||
assert all(df["vector"].values[0] == [3, 4])
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ from lancedb.table import LanceTable
|
||||
|
||||
|
||||
class MockDB:
|
||||
|
||||
def __init__(self, uri: Path):
|
||||
self.uri = uri
|
||||
|
||||
@@ -33,9 +32,12 @@ def db(tmp_path) -> MockDB:
|
||||
|
||||
def test_basic(db):
|
||||
ds = LanceTable.create(
|
||||
db, "test",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
|
||||
db,
|
||||
"test",
|
||||
data=[
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||
],
|
||||
).to_lance()
|
||||
|
||||
table = LanceTable(db, "test")
|
||||
@@ -45,21 +47,35 @@ def test_basic(db):
|
||||
|
||||
|
||||
def test_add(db):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32())),
|
||||
pa.field("item", pa.string()),
|
||||
pa.field("price", pa.float32())])
|
||||
expected = pa.Table.from_arrays([
|
||||
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||
pa.array(["foo", "bar"]),
|
||||
pa.array([10.0, 20.0])
|
||||
], schema=schema)
|
||||
data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]]
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32())),
|
||||
pa.field("item", pa.string()),
|
||||
pa.field("price", pa.float32()),
|
||||
]
|
||||
)
|
||||
expected = pa.Table.from_arrays(
|
||||
[
|
||||
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||
pa.array(["foo", "bar"]),
|
||||
pa.array([10.0, 20.0]),
|
||||
],
|
||||
schema=schema,
|
||||
)
|
||||
data = [
|
||||
[
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||
]
|
||||
]
|
||||
df = pd.DataFrame(data[0])
|
||||
data.append(df)
|
||||
data.append(pa.Table.from_pandas(df, schema=schema))
|
||||
|
||||
for i, d in enumerate(data):
|
||||
tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema)
|
||||
.to_lance().to_table())
|
||||
tbl = (
|
||||
LanceTable.create(db, f"test_{i}", data=d, schema=schema)
|
||||
.to_lance()
|
||||
.to_table()
|
||||
)
|
||||
assert expected == tbl
|
||||
|
||||
Reference in New Issue
Block a user