deduplicate files in S3

This commit is contained in:
Alek Westover
2023-07-26 16:17:27 -04:00
parent b9a7a661d0
commit cc2e1c0119
2 changed files with 22 additions and 1 deletions

View File

@@ -776,6 +776,13 @@ ARG PG_VERSION
ARG BUILD_TAG
RUN apt update && apt install -y zstd
# Define extension build numbers
# NOTE: it is *not* necessary for the build_tag to be BUILD_TAG. In particular,
# you should update the build_tag for each extension only when you build it
RUN echo "kq_imcx 5670669815" >> build_tags.txt
RUN echo "anon 5670669815" >> build_tags.txt
RUN echo "postgis 5670669815" >> build_tags.txt
# copy the control files here
COPY --from=kq-imcx-pg-build /extensions/ /extensions/
COPY --from=pg-anon-pg-build /extensions/ /extensions/

View File

@@ -45,11 +45,18 @@ if __name__ == "__main__":
BUILD_TAG = args.BUILD_TAG
public_ext_list = args.public_extensions.split(",")
build_tags = {}
with open("build_tags.txt", "r") as f:
for line in f.readlines():
ext, build_tag = line.strip().split(" ")
build_tags[ext] = build_tag
ext_index = {}
library_index = {}
EXT_PATH = Path("extensions")
for extension in EXT_PATH.iterdir():
if extension.is_dir():
build_tag = build_tags[extension.name]
control_data = {}
for control_file in extension.glob("*.control"):
if control_file.suffix != ".control":
@@ -58,8 +65,15 @@ if __name__ == "__main__":
control_data[control_file.name] = f.read()
ext_index[extension.name] = {
"control_data": control_data,
"archive_path": f"{BUILD_TAG}/{pg_version}/extensions/{extension.name}.tar.zst",
"archive_path": f"{build_tag}/{pg_version}/extensions/{extension.name}.tar.zst",
}
# if we didn't build the extension for this build tag
# then we don't need to re-upload it. so we delete it
if build_tag != BUILD_TAG:
import shutil
shutil.rmtree(extension)
elif extension.suffix == ".zst":
file_list = (
str(subprocess.check_output(["tar", "tf", str(extension)]), "utf-8")