Files
lancedb/docs/test/md_testing.py
2024-04-11 15:20:00 +05:30

82 lines
2.3 KiB
Python

import glob
from typing import Iterator, List
from pathlib import Path
glob_string = "../src/**/*.md"
excluded_globs = [
"../src/fts.md",
"../src/embedding.md",
"../src/examples/*.md",
"../src/integrations/voxel51.md",
"../src/guides/tables.md",
"../src/python/duckdb.md",
"../src/embeddings/*.md",
"../src/concepts/*.md",
"../src/ann_indexes.md",
"../src/basic.md",
"../src/hybrid_search/hybrid_search.md",
"../src/reranking/*.md",
]
python_prefix = "py"
python_file = ".py"
python_folder = "python"
files = glob.glob(glob_string, recursive=True)
excluded_files = [
f
for excluded_glob in excluded_globs
for f in glob.glob(excluded_glob, recursive=True)
]
def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
in_code_block = False
# Python code has strict indentation
strip_length = 0
skip_test = False
for line in lines:
if "skip-test" in line:
skip_test = True
if line.strip().startswith(prefix + python_prefix):
in_code_block = True
strip_length = len(line) - len(line.lstrip())
elif in_code_block and line.strip().startswith(suffix):
in_code_block = False
if not skip_test:
yield "\n"
skip_test = False
elif in_code_block:
if not skip_test:
yield line[strip_length:]
def wrap_async(lines: List[str]) -> List[str]:
# Indent all the lines
lines = [" " + line for line in lines]
# Put all lines in `async def main():`
lines = ["async def main():\n"] + lines
# Put `import asyncio\n asyncio.run(main())` at the end
lines = lines + ["\n", "import asyncio\n", "asyncio.run(main())\n"]
return lines
for file in filter(lambda file: file not in excluded_files, files):
with open(file, "r") as f:
lines = list(yield_lines(iter(f), "```", "```"))
if len(lines) > 0:
if any("await" in line for line in lines):
lines = wrap_async(lines)
print(lines)
out_path = (
Path(python_folder)
/ Path(file).name.strip(".md")
/ (Path(file).name.strip(".md") + python_file)
)
print(out_path)
out_path.parent.mkdir(exist_ok=True, parents=True)
with open(out_path, "w") as out:
out.writelines(lines)