create cosa buildfetch; deprecate cosa buildprep

buildprep will continue to work but will print a warning and wait some time before continuing.
2026-02-06 03:45:08 +01:00 · 2021-10-20 10:13:32 -04:00
parent d45653a093
commit 0d90ad4208
2 changed files with 235 additions and 0 deletions
--- a/src/cmd-buildfetch
+++ b/src/cmd-buildfetch
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+# NOTE: PYTHONUNBUFFERED is set in the entrypoint for unbuffered output
+#
+# Fetches the bare minimum from external servers to create the next build. May
+# require configured AWS credentials if bucket and objects are not public.
+
+import argparse
+import os
+import subprocess
+import sys
+import requests
+import shutil
+from tenacity import retry, retry_if_exception_type
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from cosalib.builds import Builds, BUILDFILES
+from cosalib.cmdlib import (
+    get_basearch,
+    load_json,
+    retry_callback,
+    retry_stop,
+    rm_allow_noent)
+from cosalib.s3 import download_file, head_bucket, head_object
+
+retry_requests_exception = (retry_if_exception_type(requests.Timeout) |
+                            retry_if_exception_type(requests.ReadTimeout) |
+                            retry_if_exception_type(requests.ConnectTimeout) |
+                            retry_if_exception_type(requests.ConnectionError))
+
+
+def main():
+    args = parse_args()
+
+    if args.url.startswith("s3://"):
+        fetcher = S3Fetcher(args.url)
+    elif args.url.startswith("http://") or args.url.startswith("https://"):
+        fetcher = HTTPFetcher(args.url)
+    elif args.url.startswith("file://") or args.url.startswith("/"):
+        fetcher = LocalFetcher(args.url)
+    else:
+        raise Exception("Invalid scheme: only file://, s3://, and http(s):// supported")
+
+    builds = None
+    if fetcher.exists('builds.json'):
+        if not args.refresh:
+            if os.path.isfile(BUILDFILES['sourcedata']):
+                # If we have local builds, don't overwrite that by default.
+                if subprocess.call(['cmp', BUILDFILES['sourcedata'], BUILDFILES['list']],
+                                   stdout=subprocess.DEVNULL,
+                                   stderr=subprocess.DEVNULL) != 0:
+                    raise SystemExit(f"{BUILDFILES['list']} modified locally, run with --refresh")
+            fetcher.fetch_json('builds.json')
+        else:
+            fetcher.fetch('builds.json', dest=BUILDFILES['sourcedata'])
+            print(f"Updated {BUILDFILES['sourcedata']}")
+            return
+        # Record the origin and original state
+        with open(BUILDFILES['sourceurl'], 'w') as f:
+            f.write(args.url + '\n')
+        subprocess.check_call(['cp-reflink', BUILDFILES['list'], BUILDFILES['sourcedata']])
+        builds = Builds()
+    else:
+        print("No builds.json found")
+        return
+
+    if builds.is_empty():
+        print("Remote has empty build list!")
+        return
+
+    buildid = args.build or builds.get_latest()
+    # Let's handle args.arch. If the user didn't specify an arch
+    # then operate on the current arch of the system. If the user
+    # gave us the special value of --arch=all then download the
+    # build metadata for all architectures related to this build.
+    if len(args.arch) == 0:
+        arches = [get_basearch()]
+    elif args.arch == ['all']:
+        arches = builds.get_build_arches(buildid)
+    else:
+        arches = args.arch
+
+    for arch in arches:
+        # If the architecture doesn't exist then assume there were
+        # no builds for this architecture yet, which can only happen
+        # if someone passed in the architecture value. return early
+        if arch not in builds.get_build_arches(buildid):
+            print(f"No {arch} artifacts for build {buildid}")
+            return
+
+        builddir = builds.get_build_dir(buildid, arch)
+        os.makedirs(builddir, exist_ok=True)
+
+        # trim out the leading builds/
+        assert builddir.startswith("builds/")
+        builddir = builddir[len("builds/"):]
+
+        objects = ['meta.json', 'ostree-commit-object']
+        for f in objects:
+            fetcher.fetch(f'{builddir}/{f}')
+
+        buildmeta = load_json(f'builds/{builddir}/meta.json')
+
+        if args.ostree:
+            f = buildmeta['images']['ostree']['path']
+            fetcher.fetch(f'{builddir}/{f}')
+
+    # and finally the symlink
+    if args.build is None:
+        rm_allow_noent('builds/latest')
+        os.symlink(buildid, 'builds/latest')
+
+    # also nuke the any local matching OStree ref, since we want to build on
+    # top of this new one
+    if 'ref' in buildmeta and os.path.isdir('tmp/repo'):
+        subprocess.check_call(['ostree', 'refs', '--repo', 'tmp/repo',
+                               '--delete', buildmeta['ref']],
+                              stdout=subprocess.DEVNULL)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("url", metavar='URL',
+                        help="URL from which to fetch metadata")
+    parser.add_argument("-b", "--build", action='store',
+                        help="Fetch specified build instead of latest")
+    parser.add_argument("--ostree", action='store_true',
+                        help="Also download full OSTree commit")
+    parser.add_argument("--refresh", action='store_true',
+                        help="Assuming local changes, only update {BUILDFILES['sourcedata']}")
+    parser.add_argument("--arch", default=[], action='append',
+                        help="the target architecture(s)")
+    return parser.parse_args()
+
+
+class Fetcher(object):
+
+    def __init__(self, url_base):
+        self.url_base = url_base
+
+    def fetch(self, path, dest=None):
+        # if no specific dest given, assume it's a path under builds/
+        if dest is None:
+            dest = f'builds/{path}'
+        # NB: `urllib.parse.urljoin()` does not do what one thinks it does.
+        # Using `os.path.join()` is a hack, but eh... we're not planning to run
+        # on Windows anytime soon.
+        url = os.path.join(self.url_base, path)
+        print(f"Fetching: {url}")
+        # ensure the dir for dest file exists
+        # otherwise s3 download_file won't be able to write temp file
+        os.makedirs(os.path.dirname(dest), exist_ok=True)
+        self.fetch_impl(url, dest)
+        return dest
+
+    def fetch_impl(self, url, dest):
+        raise NotImplementedError
+
+    def exists_impl(self, url):
+        raise NotImplementedError
+
+    def fetch_json(self, path):
+        return load_json(self.fetch(path))
+
+    def exists(self, path):
+        url = os.path.join(self.url_base, path)
+        return self.exists_impl(url)
+
+
+class HTTPFetcher(Fetcher):
+
+    def __init__(self, url_base):
+        super().__init__(url_base)
+
+    @retry(stop=retry_stop, retry=retry_requests_exception, before_sleep=retry_callback)
+    def fetch_impl(self, url, dest):
+        # notice we don't use `stream=True` here; the stuff we're fetching for
+        # now is super small
+        with requests.get(url) as r:
+            r.raise_for_status()
+            with open(dest, mode='wb') as f:
+                f.write(r.content)
+
+    @retry(stop=retry_stop, retry=retry_requests_exception, before_sleep=retry_callback)
+    def exists_impl(self, url):
+        with requests.head(url) as r:
+            if r.status_code == 200:
+                return True
+            # just treat 403 as ENOENT too; this is common for APIs to do (like
+            # AWS) and we don't support HTTP basic auth here anyway
+            if r.status_code in [404, 403]:
+                return False
+            raise Exception(f"Received rc {r.status_code} for {url}")
+
+
+class S3Fetcher(Fetcher):
+
+    def fetch_impl(self, url, dest):
+        assert url.startswith("s3://")
+        bucket, key = url[len("s3://"):].split('/', 1)
+        # this function does not need to be retried with the decorator as download_file would
+        # retry automatically based on s3config settings
+        download_file(bucket, key, dest)
+
+    def exists_impl(self, url):
+        assert url.startswith("s3://")
+        bucket, key = url[len("s3://"):].split('/', 1)
+        # sanity check that the bucket exists and we have access to it
+        head_bucket(bucket=bucket)
+        return head_object(bucket=bucket, key=key)
+
+
+class LocalFetcher(Fetcher):
+
+    def __init__(self, url_base):
+        if url_base.startswith("file://"):
+            url_base = url_base[len("file://"):]
+        super().__init__(url_base)
+
+    def fetch_impl(self, url, dest):
+        shutil.copyfile(url, dest)
+
+    def exists_impl(self, url):
+        return os.path.exists(url)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/cmd-buildprep
+++ b/src/cmd-buildprep
@@ -10,6 +10,7 @@ import subprocess
 import sys
 import requests
 import shutil
+import time
 from tenacity import retry, retry_if_exception_type

 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -32,6 +33,12 @@ retry_requests_exception = (retry_if_exception_type(requests.Timeout) |
 def main():
    args = parse_args()

+    # print deprecation warning and sleep
+    message = "WARNING: `cosa buildprep` is deprecated, please use `cosa buildfetch`"
+    print(f"\033[93m{message}\033[00m", file=sys.stderr)
+    print("sleeping 60 seconds", file=sys.stderr)
+    time.sleep(60)
+
    if args.url.startswith("s3://"):
        fetcher = S3Fetcher(args.url)
    elif args.url.startswith("http://") or args.url.startswith("https://"):