Source code for s3_tools.objects.list

"""List S3 bucket objects."""
from pathlib import Path
from typing import (
    Dict,
    Optional,
    List,
    Union,
)

import fnmatch

import boto3


[docs]def list_objects( bucket: str, prefix: Union[str, Path] = "", search_str: Optional[str] = None, max_keys: int = 1000, aws_auth: Dict[str, str] = {}, as_paths: bool = False, ) -> List[Union[str, Path]]: """Retrieve the list of objects from AWS S3 bucket under a given prefix and search string. Parameters ---------- bucket: str AWS S3 bucket where the objects are stored. prefix: Union[str, Path] Prefix where the objects are under. search_str: str Basic search string to filter out keys on result (uses Unix shell-style wildcards), by default is None. For more about the search check "fnmatch" package. max_keys: int Max number of keys to have pagination. aws_auth: Dict[str, str] Contains AWS credentials, by default is empty. as_paths: bool If True, the keys are returned as Path objects, otherwise as strings, by default is False. Returns ------- List[Union[str, Path]] List of keys inside the bucket, under the path, and filtered. Examples -------- >>> list_objects(bucket="myBucket", prefix="myData") [ "myData/myFile.data", "myData/myMusic/awesome.mp3", "myData/myDocs/paper.doc" ] >>> list_objects(bucket="myBucket", prefix="myData", search_str="*paper*", as_paths=True) [ Path("myData/myDocs/paper.doc") ] """ continuation_token: Optional[str] = None keys = [] session = boto3.session.Session(**aws_auth) s3 = session.client("s3") while True: list_kwargs = { "Bucket": bucket, "Prefix": Path(prefix).as_posix(), "MaxKeys": max_keys } if continuation_token: list_kwargs["ContinuationToken"] = continuation_token response = s3.list_objects_v2(**list_kwargs) if "Contents" in response: keys.extend([obj["Key"] for obj in response["Contents"]]) if not response.get("NextContinuationToken"): break continuation_token = response.get("NextContinuationToken") if isinstance(search_str, str): keys = fnmatch.filter(keys, search_str) return keys if not as_paths else [Path(key) for key in keys]