mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 06:39:57 +00:00
This is v1 of integrating full text search index into LanceDB.
# API
The query API is roughly the same as before, except if the input is text
instead of a vector we assume that its fts search.
## Example
If `table` is a LanceDB LanceTable, then:
Build index: `table.create_fts_index("text")`
Query: `df = table.search("puppy").limit(10).select(["text"]).to_df()`
# Implementation
Here we use the tantivy-py package to build the index. We then use the
row id's as the full-text-search index's doc id then we just do a Take
operation to fetch the rows.
# Limitations
1. don't support incremental row appends yet. New data won't show up in
search
2. local filesystem only
3. requires building tantivy explicitly
---------
Co-authored-by: Chang She <chang@lancedb.com>
59 lines
1.4 KiB
TOML
59 lines
1.4 KiB
TOML
[project]
|
|
name = "lancedb"
|
|
version = "0.1.2"
|
|
dependencies = ["pylance>=0.4.6", "ratelimiter", "retry", "tqdm"]
|
|
description = "lancedb"
|
|
authors = [
|
|
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
|
]
|
|
license = { file = "LICENSE" }
|
|
readme = "README.md"
|
|
requires-python = ">=3.8"
|
|
keywords = [
|
|
"data-format",
|
|
"data-science",
|
|
"machine-learning",
|
|
"arrow",
|
|
"data-analytics"
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 3 - Alpha",
|
|
"Environment :: Console",
|
|
"Intended Audience :: Science/Research",
|
|
"License :: OSI Approved :: Apache Software License",
|
|
"Operating System :: OS Independent",
|
|
"Programming Language :: Python",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3 :: Only",
|
|
"Programming Language :: Python :: 3.8",
|
|
"Programming Language :: Python :: 3.9",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Topic :: Scientific/Engineering",
|
|
]
|
|
|
|
[project.urls]
|
|
repository = "https://github.com/eto-ai/lancedb"
|
|
|
|
[project.optional-dependencies]
|
|
tests = [
|
|
"pytest"
|
|
]
|
|
dev = [
|
|
"ruff", "pre-commit", "black"
|
|
]
|
|
docs = [
|
|
"mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"
|
|
]
|
|
fts = [
|
|
# tantivy 0.19.2
|
|
"tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985"
|
|
]
|
|
|
|
[build-system]
|
|
requires = [
|
|
"setuptools",
|
|
"wheel",
|
|
]
|
|
build-backend = "setuptools.build_meta"
|