Skip to content

prefect_gcp.deployments.steps

Prefect deployment steps for code storage in and retrieval from Google Cloud Storage.

Classes

PullFromGcsOutput

Bases: TypedDict

The output of the pull_from_gcs step.

Source code in prefect_gcp/deployments/steps.py
29
30
31
32
33
34
35
36
class PullFromGcsOutput(TypedDict):
    """
    The output of the `pull_from_gcs` step.
    """

    bucket: str
    folder: str
    directory: str

PullProjectFromGcsOutput

Bases: PullFromGcsOutput

Deprecated. Use PullFromGcsOutput instead.

Source code in prefect_gcp/deployments/steps.py
39
40
41
@deprecated_callable(start_date="Jun 2023", help="Use `PullFromGcsOutput` instead.")
class PullProjectFromGcsOutput(PullFromGcsOutput):
    """Deprecated. Use `PullFromGcsOutput` instead."""

PushProjectToGcsOutput

Bases: PushToGcsOutput

Deprecated. Use PushToGcsOutput instead.

Source code in prefect_gcp/deployments/steps.py
24
25
26
@deprecated_callable(start_date="Jun 2023", help="Use `PushToGcsOutput` instead.")
class PushProjectToGcsOutput(PushToGcsOutput):
    """Deprecated. Use `PushToGcsOutput` instead."""

PushToGcsOutput

Bases: TypedDict

The output of the push_to_gcs step.

Source code in prefect_gcp/deployments/steps.py
15
16
17
18
19
20
21
class PushToGcsOutput(TypedDict):
    """
    The output of the `push_to_gcs` step.
    """

    bucket: str
    folder: str

Functions

pull_from_gcs

Pulls the contents of a project from an GCS bucket to the current working directory.

Parameters:

Name Type Description Default
bucket str

The name of the GCS bucket where files are stored.

required
folder str

The folder in the GCS bucket where files are stored.

required
project Optional[str]

The GCP project the bucket belongs to. If not provided, the project will be inferred from the credentials or the local environment.

None
credentials Optional[Dict]

A dictionary containing the service account information and project used for authentication. If not provided, the application default credentials will be used.

None

Returns:

Type Description
PullProjectFromGcsOutput

A dictionary containing the bucket, folder, and local directory where files were downloaded.

Examples:

Pull from GCS using the default environment credentials:

build:
    - prefect_gcp.deployments.steps.pull_from_gcs:
        requires: prefect-gcp
        bucket: my-bucket
        folder: my-folder

Pull from GCS using credentials stored in a block:

build:
    - prefect_gcp.deployments.steps.pull_from_gcs:
        requires: prefect-gcp
        bucket: my-bucket
        folder: my-folder
        credentials: "{{ prefect.blocks.gcp-credentials.dev-credentials }}"

Pull from to an GCS bucket using credentials stored in a service account file:

build:
    - prefect_gcp.deployments.steps.pull_from_gcs:
        requires: prefect-gcp
        bucket: my-bucket
        folder: my-folder
        credentials:
            project: my-project
            service_account_file: /path/to/service_account.json

Source code in prefect_gcp/deployments/steps.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def pull_from_gcs(
    bucket: str,
    folder: str,
    project: Optional[str] = None,
    credentials: Optional[Dict] = None,
) -> PullProjectFromGcsOutput:
    """
    Pulls the contents of a project from an GCS bucket to the current working directory.

    Args:
        bucket: The name of the GCS bucket where files are stored.
        folder: The folder in the GCS bucket where files are stored.
        project: The GCP project the bucket belongs to. If not provided, the project will be
            inferred from the credentials or the local environment.
        credentials: A dictionary containing the service account information and project
            used for authentication. If not provided, the application default
            credentials will be used.

    Returns:
        A dictionary containing the bucket, folder, and local directory where files were downloaded.

    Examples:
        Pull from GCS using the default environment credentials:
        ```yaml
        build:
            - prefect_gcp.deployments.steps.pull_from_gcs:
                requires: prefect-gcp
                bucket: my-bucket
                folder: my-folder
        ```

        Pull from GCS using credentials stored in a block:
        ```yaml
        build:
            - prefect_gcp.deployments.steps.pull_from_gcs:
                requires: prefect-gcp
                bucket: my-bucket
                folder: my-folder
                credentials: "{{ prefect.blocks.gcp-credentials.dev-credentials }}"
        ```

        Pull from to an GCS bucket using credentials stored in a service account file:
        ```yaml
        build:
            - prefect_gcp.deployments.steps.pull_from_gcs:
                requires: prefect-gcp
                bucket: my-bucket
                folder: my-folder
                credentials:
                    project: my-project
                    service_account_file: /path/to/service_account.json
        ```

    """  # noqa
    local_path = Path.cwd()
    project = credentials.get("project") if credentials else None

    gcp_creds = None
    if credentials is not None:
        if credentials.get("service_account_info") is not None:
            gcp_creds = Credentials.from_service_account_info(
                credentials.get("service_account_info"),
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )
        elif credentials.get("service_account_file") is not None:
            gcp_creds = Credentials.from_service_account_file(
                credentials.get("service_account_file"),
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )

    gcp_creds = gcp_creds or google.auth.default()[0]

    storage_client = StorageClient(credentials=gcp_creds, project=project)

    blobs = storage_client.list_blobs(bucket, prefix=folder)

    for blob in blobs:
        if blob.name.endswith("/"):
            # object is a folder and will be created if it contains any objects
            continue
        local_blob_download_path = PurePosixPath(
            local_path
            / relative_path_to_current_platform(blob.name).relative_to(folder)
        )
        Path.mkdir(Path(local_blob_download_path.parent), parents=True, exist_ok=True)

        blob.download_to_filename(local_blob_download_path)

    return {
        "bucket": bucket,
        "folder": folder,
        "directory": str(local_path),
    }

pull_project_from_gcs

Deprecated. Use pull_from_gcs instead.

Source code in prefect_gcp/deployments/steps.py
252
253
254
255
256
257
@deprecated_callable(start_date="Jun 2023", help="Use `pull_from_gcs` instead.")
def pull_project_from_gcs(*args, **kwargs) -> PullProjectFromGcsOutput:
    """
    Deprecated. Use `pull_from_gcs` instead.
    """
    return pull_from_gcs(*args, **kwargs)

push_project_to_gcs

Deprecated. Use push_to_gcs instead.

Source code in prefect_gcp/deployments/steps.py
149
150
151
152
153
154
@deprecated_callable(start_date="Jun 2023", help="Use `push_to_gcs` instead.")
def push_project_to_gcs(*args, **kwargs) -> PushToGcsOutput:
    """
    Deprecated. Use `push_to_gcs` instead.
    """
    return push_to_gcs(*args, **kwargs)

push_to_gcs

Pushes the contents of the current working directory to a GCS bucket, excluding files and folders specified in the ignore_file.

Parameters:

Name Type Description Default
bucket str

The name of the GCS bucket where files will be uploaded.

required
folder str

The folder in the GCS bucket where files will be uploaded.

required
project Optional[str]

The GCP project the bucket belongs to. If not provided, the project will be inferred from the credentials or the local environment.

None
credentials Optional[Dict]

A dictionary containing the service account information and project used for authentication. If not provided, the application default credentials will be used.

None
ignore_file

The name of the file containing ignore patterns.

'.prefectignore'

Returns:

Type Description
PushToGcsOutput

A dictionary containing the bucket and folder where files were uploaded.

Examples:

Push to a GCS bucket:

build:
    - prefect_gcp.deployments.steps.push_to_gcs:
        requires: prefect-gcp
        bucket: my-bucket
        folder: my-project

Push to a GCS bucket using credentials stored in a block:

build:
    - prefect_gcp.deployments.steps.push_to_gcs:
        requires: prefect-gcp
        bucket: my-bucket
        folder: my-folder
        credentials: "{{ prefect.blocks.gcp-credentials.dev-credentials }}"

Push to a GCS bucket using credentials stored in a service account file:

build:
    - prefect_gcp.deployments.steps.push_to_gcs:
        requires: prefect-gcp
        bucket: my-bucket
        folder: my-folder
        credentials:
            project: my-project
            service_account_file: /path/to/service_account.json

Source code in prefect_gcp/deployments/steps.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def push_to_gcs(
    bucket: str,
    folder: str,
    project: Optional[str] = None,
    credentials: Optional[Dict] = None,
    ignore_file=".prefectignore",
) -> PushToGcsOutput:
    """
    Pushes the contents of the current working directory to a GCS bucket,
    excluding files and folders specified in the ignore_file.

    Args:
        bucket: The name of the GCS bucket where files will be uploaded.
        folder: The folder in the GCS bucket where files will be uploaded.
        project: The GCP project the bucket belongs to. If not provided, the project
            will be inferred from the credentials or the local environment.
        credentials: A dictionary containing the service account information and project
            used for authentication. If not provided, the application default
            credentials will be used.
        ignore_file: The name of the file containing ignore patterns.

    Returns:
        A dictionary containing the bucket and folder where files were uploaded.

    Examples:
        Push to a GCS bucket:
        ```yaml
        build:
            - prefect_gcp.deployments.steps.push_to_gcs:
                requires: prefect-gcp
                bucket: my-bucket
                folder: my-project
        ```

        Push  to a GCS bucket using credentials stored in a block:
        ```yaml
        build:
            - prefect_gcp.deployments.steps.push_to_gcs:
                requires: prefect-gcp
                bucket: my-bucket
                folder: my-folder
                credentials: "{{ prefect.blocks.gcp-credentials.dev-credentials }}"
        ```

        Push to a GCS bucket using credentials stored in a service account
        file:
        ```yaml
        build:
            - prefect_gcp.deployments.steps.push_to_gcs:
                requires: prefect-gcp
                bucket: my-bucket
                folder: my-folder
                credentials:
                    project: my-project
                    service_account_file: /path/to/service_account.json
        ```

    """
    project = credentials.get("project") if credentials else None

    gcp_creds = None
    if credentials is not None:
        if credentials.get("service_account_info") is not None:
            gcp_creds = Credentials.from_service_account_info(
                credentials.get("service_account_info"),
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )
        elif credentials.get("service_account_file") is not None:
            gcp_creds = Credentials.from_service_account_file(
                credentials.get("service_account_file"),
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )

    gcp_creds = gcp_creds or google.auth.default()[0]

    storage_client = StorageClient(credentials=gcp_creds, project=project)
    bucket_resource = storage_client.bucket(bucket)

    local_path = Path.cwd()

    included_files = None
    if ignore_file and Path(ignore_file).exists():
        with open(ignore_file, "r") as f:
            ignore_patterns = f.readlines()
        included_files = filter_files(str(local_path), ignore_patterns)

    for local_file_path in local_path.expanduser().rglob("*"):
        relative_local_file_path = local_file_path.relative_to(local_path)
        if (
            included_files is not None
            and str(relative_local_file_path) not in included_files
        ):
            continue
        elif not local_file_path.is_dir():
            remote_file_path = (folder / relative_local_file_path).as_posix()

            blob_resource = bucket_resource.blob(remote_file_path)
            blob_resource.upload_from_filename(local_file_path)

    return {
        "bucket": bucket,
        "folder": folder,
    }