mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-22 21:09:58 +00:00
Exposes `storage_options` in LanceDB. This is provided for Python async, Node `lancedb`, and Node `vectordb` (and Rust of course). Python synchronous is omitted because it's not compatible with the PyArrow filesystems we use there currently. In the future, we will move the sync API to wrap the async one, and then it will get support for `storage_options`. 1. Fixes #1168 2. Closes #1165 3. Closes #1082 4. Closes #439 5. Closes #897 6. Closes #642 7. Closes #281 8. Closes #114 9. Closes #990 10. Deprecating `awsCredentials` and `awsRegion`. Users are encouraged to use `storageOptions` instead.
81 lines
2.3 KiB
Python
81 lines
2.3 KiB
Python
import glob
|
|
from typing import Iterator, List
|
|
from pathlib import Path
|
|
|
|
glob_string = "../src/**/*.md"
|
|
excluded_globs = [
|
|
"../src/fts.md",
|
|
"../src/embedding.md",
|
|
"../src/examples/*.md",
|
|
"../src/integrations/voxel51.md",
|
|
"../src/guides/tables.md",
|
|
"../src/python/duckdb.md",
|
|
"../src/embeddings/*.md",
|
|
"../src/concepts/*.md",
|
|
"../src/ann_indexes.md",
|
|
"../src/basic.md",
|
|
"../src/hybrid_search/hybrid_search.md",
|
|
]
|
|
|
|
python_prefix = "py"
|
|
python_file = ".py"
|
|
python_folder = "python"
|
|
|
|
files = glob.glob(glob_string, recursive=True)
|
|
excluded_files = [
|
|
f
|
|
for excluded_glob in excluded_globs
|
|
for f in glob.glob(excluded_glob, recursive=True)
|
|
]
|
|
|
|
|
|
def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
|
|
in_code_block = False
|
|
# Python code has strict indentation
|
|
strip_length = 0
|
|
skip_test = False
|
|
for line in lines:
|
|
if "skip-test" in line:
|
|
skip_test = True
|
|
if line.strip().startswith(prefix + python_prefix):
|
|
in_code_block = True
|
|
strip_length = len(line) - len(line.lstrip())
|
|
elif in_code_block and line.strip().startswith(suffix):
|
|
in_code_block = False
|
|
if not skip_test:
|
|
yield "\n"
|
|
skip_test = False
|
|
elif in_code_block:
|
|
if not skip_test:
|
|
yield line[strip_length:]
|
|
|
|
|
|
def wrap_async(lines: List[str]) -> List[str]:
|
|
# Indent all the lines
|
|
lines = [" " + line for line in lines]
|
|
# Put all lines in `async def main():`
|
|
lines = ["async def main():\n"] + lines
|
|
# Put `import asyncio\n asyncio.run(main())` at the end
|
|
lines = lines + ["\n", "import asyncio\n", "asyncio.run(main())\n"]
|
|
return lines
|
|
|
|
|
|
for file in filter(lambda file: file not in excluded_files, files):
|
|
with open(file, "r") as f:
|
|
lines = list(yield_lines(iter(f), "```", "```"))
|
|
|
|
if len(lines) > 0:
|
|
if any("await" in line for line in lines):
|
|
lines = wrap_async(lines)
|
|
|
|
print(lines)
|
|
out_path = (
|
|
Path(python_folder)
|
|
/ Path(file).name.strip(".md")
|
|
/ (Path(file).name.strip(".md") + python_file)
|
|
)
|
|
print(out_path)
|
|
out_path.parent.mkdir(exist_ok=True, parents=True)
|
|
with open(out_path, "w") as out:
|
|
out.writelines(lines)
|