mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-25 06:19:57 +00:00
- Creates testing files `md_testing.py` and `md_testing.js` for testing python and nodejs code in markdown files in the documentation This listens for HTML tags as well: `<!--[language] code code code...-->` will create a set-up file to create some mock tables or to fulfill some assumptions in the documentation. - Creates a github action workflow that triggers every push/pr to `docs/**` - Modifies documentation so tests run (mostly indentation, some small syntax errors and some missing imports) A list of excluded files that we need to take a closer look at later on: ```javascript const excludedFiles = [ "../src/fts.md", "../src/embedding.md", "../src/examples/serverless_lancedb_with_s3_and_lambda.md", "../src/examples/serverless_qa_bot_with_modal_and_langchain.md", "../src/examples/youtube_transcript_bot_with_nodejs.md", ]; ``` Many of them can't be done because we need the OpenAI API key :(. `fts.md` has some issues with the library, I believe this is still experimental? Closes #170 --------- Co-authored-by: Will Jones <willjones127@gmail.com>
42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
import glob
|
|
from typing import Iterator
|
|
from pathlib import Path
|
|
|
|
excluded_files = [
|
|
"../src/fts.md",
|
|
"../src/embedding.md",
|
|
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
|
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
|
"../src/examples/youtube_transcript_bot_with_nodejs.md"
|
|
]
|
|
|
|
python_prefix = "py"
|
|
python_file = ".py"
|
|
python_folder = "python"
|
|
glob_string = "../src/**/*.md"
|
|
|
|
def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
|
|
in_code_block = False
|
|
# Python code has strict indentation
|
|
strip_length = 0
|
|
for line in lines:
|
|
if line.strip().startswith(prefix + python_prefix):
|
|
in_code_block = True
|
|
strip_length = len(line) - len(line.lstrip())
|
|
elif in_code_block and line.strip().startswith(suffix):
|
|
in_code_block = False
|
|
yield "\n"
|
|
elif in_code_block:
|
|
yield line[strip_length:]
|
|
|
|
for file in filter(lambda file: file not in excluded_files, glob.glob(glob_string, recursive=True)):
|
|
with open(file, "r") as f:
|
|
lines = list(yield_lines(iter(f), "```", "```"))
|
|
|
|
if len(lines) > 0:
|
|
out_path = Path(python_folder) / Path(file).name.strip(".md") / (Path(file).name.strip(".md") + python_file)
|
|
print(out_path)
|
|
out_path.parent.mkdir(exist_ok=True, parents=True)
|
|
with open(out_path, "w") as out:
|
|
out.writelines(lines)
|