Files
lancedb/python/pyproject.toml
msu-reevo cc81f3e1a5 fix(python): typing (#2167)
@wjones127 is there a standard way you guys setup your virtualenv? I can
either relist all the dependencies in the pyright precommit section, or
specify a venv, or the user has to be in the virtual environment when
they run git commit. If the venv location was standardized or a python
manager like `uv` was used it would be easier to avoid duplicating the
pyright dependency list.

Per your suggestion, in `pyproject.toml` I added in all the passing
files to the `includes` section.

For ruff I upgraded the version and removed "TCH" which doesn't exist as
an option.

I added a `pyright_report.csv` which contains a list of all files sorted
by pyright errors ascending as a todo list to work on.

I fixed about 30 issues in `table.py` stemming from str's being passed
into methods that required a string within a set of string Literals by
extracting them into `types.py`

Can you verify in the rust bridge that the schema should be a property
and not a method here? If it's a method, then there's another place in
the code where `inner.schema` should be `inner.schema()`
``` python
class RecordBatchStream:
    @property
    def schema(self) -> pa.Schema: ...
```

Also unless the `_lancedb.pyi` file is wrong, then there is no
`__anext__` here for `__inner` when it's not an `AsyncGenerator` and
only `next` is defined:
``` python
    async def __anext__(self) -> pa.RecordBatch:
        return await self._inner.__anext__()
        if isinstance(self._inner, AsyncGenerator):
            batch = await self._inner.__anext__()
        else:
            batch = await self._inner.next()
        if batch is None:
            raise StopAsyncIteration
        return batch
```
in the else statement, `_inner` is a `RecordBatchStream`
```python
class RecordBatchStream:
    @property
    def schema(self) -> pa.Schema: ...
    async def next(self) -> Optional[pa.RecordBatch]: ...
```

---------

Co-authored-by: Will Jones <willjones127@gmail.com>
2025-03-10 09:01:23 -07:00

131 lines
3.3 KiB
TOML

[project]
name = "lancedb"
# version in Cargo.toml
dynamic = ["version"]
dependencies = [
"deprecation",
"tqdm>=4.27.0",
"pyarrow>=14",
"pydantic>=1.10",
"packaging",
"overrides>=0.7",
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.9"
keywords = [
"data-format",
"data-science",
"machine-learning",
"arrow",
"data-analytics",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering",
]
[project.urls]
repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
tests = [
"aiohttp",
"boto3",
"pandas>=1.4",
"pytest",
"pytest-mock",
"pytest-asyncio",
"duckdb",
"pytz",
"polars>=0.19, <=1.3.0",
"tantivy",
"pyarrow-stubs",
"pylance~=0.23.2",
]
dev = [
"ruff",
"pre-commit",
"pyright",
'typing-extensions>=4.0.0; python_version < "3.11"',
]
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
clip = ["torch", "pillow", "open-clip"]
embeddings = [
"requests>=2.31.0",
"openai>=1.6.1",
"sentence-transformers",
"torch",
"pillow",
"open-clip-torch",
"cohere",
"huggingface_hub",
"InstructorEmbedding",
"google.generativeai",
"boto3>=1.28.57",
"awscli>=1.29.57",
"botocore>=1.31.57",
"ollama",
"ibm-watsonx-ai>=1.1.2",
]
azure = ["adlfs>=2024.2.0"]
[tool.maturin]
python-source = "python"
module-name = "lancedb._lancedb"
[build-system]
requires = ["maturin>=1.4"]
build-backend = "maturin"
[tool.ruff.lint]
select = ["F", "E", "W", "G", "PERF"]
[tool.pytest.ini_options]
addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py"
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"asyncio",
"s3_test",
]
[tool.pyright]
include = [
"python/lancedb/index.py",
"python/lancedb/rerankers/util.py",
"python/lancedb/rerankers/__init__.py",
"python/lancedb/rerankers/voyageai.py",
"python/lancedb/rerankers/jinaai.py",
"python/lancedb/rerankers/openai.py",
"python/lancedb/rerankers/cross_encoder.py",
"python/lancedb/rerankers/colbert.py",
"python/lancedb/rerankers/answerdotai.py",
"python/lancedb/rerankers/cohere.py",
"python/lancedb/arrow.py",
"python/lancedb/__init__.py",
"python/lancedb/types.py",
"python/lancedb/integrations/__init__.py",
"python/lancedb/exceptions.py",
"python/lancedb/background_loop.py",
"python/lancedb/schema.py",
"python/lancedb/remote/__init__.py",
"python/lancedb/remote/errors.py",
"python/lancedb/embeddings/__init__.py",
"python/lancedb/_lancedb.pyi",
]
exclude = ["python/tests/"]
pythonVersion = "3.12"