From 0aae21298dc63f5e5b4cee98b3ac08163819c230 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 18 Sep 2024 14:17:29 +0200 Subject: [PATCH 1/3] Source Sharepoint: handle wrong folder name [skip ci] Signed-off-by: Artem Inzhyyants --- .../source-microsoft-sharepoint/metadata.yaml | 2 +- .../source-microsoft-sharepoint/pyproject.toml | 2 +- .../source_microsoft_sharepoint/stream_reader.py | 7 +++++-- .../source_microsoft_sharepoint/utils.py | 13 +++++++++++-- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml b/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml index 607388d9efc0..e79fb5110232 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml @@ -20,7 +20,7 @@ data: connectorSubtype: file connectorType: source definitionId: 59353119-f0f2-4e5a-a8ba-15d887bc34f6 - dockerImageTag: 0.5.1 + dockerImageTag: 0.5.2 dockerRepository: airbyte/source-microsoft-sharepoint githubIssueLabel: source-microsoft-sharepoint icon: microsoft-sharepoint.svg diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml index eddf09c21bf7..a06512f872ad 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "0.5.1" +version = "0.5.2" name = "source-microsoft-sharepoint" description = "Source implementation for Microsoft SharePoint." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py index dfb07f06a04f..8db55c5c6609 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py @@ -18,7 +18,7 @@ from office365.graph_client import GraphClient from source_microsoft_sharepoint.spec import SourceMicrosoftSharePointSpec -from .utils import MicrosoftSharePointRemoteFile, execute_query_with_retry, filter_http_urls +from .utils import FolderNotFoundException, MicrosoftSharePointRemoteFile, execute_query_with_retry, filter_http_urls class SourceMicrosoftSharePointClient: @@ -187,7 +187,10 @@ def _get_files_by_drive_name(self, drives, folder_path): folder = drive.root folder_path_url = drive.web_url else: - folder = execute_query_with_retry(drive.root.get_by_path(folder_path).get()) + try: + folder = execute_query_with_retry(drive.root.get_by_path(folder_path).get()) + except FolderNotFoundException: + continue folder_path_url = drive.web_url + "/" + folder_path yield from self._list_directories_and_files(folder, folder_path_url) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py index 985b45146d8e..7a658c1b2431 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/utils.py @@ -1,6 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. - - +import logging import time from datetime import datetime from enum import Enum @@ -9,6 +8,8 @@ from airbyte_cdk import AirbyteTracedException, FailureType from airbyte_cdk.sources.file_based.remote_file import RemoteFile +LOGGER = logging.getLogger("airbyte") + class SearchScope(Enum): OWN_DRIVES = "OWN_DRIVES" @@ -16,6 +17,10 @@ class SearchScope(Enum): BOTH = "BOTH" +class FolderNotFoundException(Exception): + pass + + class MicrosoftSharePointRemoteFile(RemoteFile): download_url: str @@ -77,6 +82,10 @@ def execute_query_with_retry(obj, max_retries=5, initial_retry_after=5, max_retr time.sleep(retry_after) retries += 1 retry_after = min(retry_after * 2, max_retry_after) # Double the wait time for next retry, up to a max limit + elif hasattr(ex, "response") and ex.response.status_code == HTTPStatus.NOT_FOUND: + error_message = f"Requested item/folder could not be found: url: {ex.response.url}" + LOGGER.warning(error_message) + raise FolderNotFoundException(error_message) else: # Re-raise exceptions that are not related to rate limits or service availability raise AirbyteTracedException.from_exception(ex, message="Caught unexpected exception") From 230b934b9650cd009fed4ec96269c1895ec87db1 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 18 Sep 2024 14:19:52 +0200 Subject: [PATCH 2/3] Source Sharepoint: update docs Signed-off-by: Artem Inzhyyants --- .../sources/microsoft-sharepoint.md | 51 ++++++++++--------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/docs/integrations/sources/microsoft-sharepoint.md b/docs/integrations/sources/microsoft-sharepoint.md index 8d59aa2dd0f4..5ab1607a36b9 100644 --- a/docs/integrations/sources/microsoft-sharepoint.md +++ b/docs/integrations/sources/microsoft-sharepoint.md @@ -139,31 +139,32 @@ The connector is restricted by normal Microsoft Graph [requests limitation](http
Expand to review -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------| -| 0.5.1 | 2024-08-24 | [44660](https://github.com/airbytehq/airbyte/pull/44660) | Update dependencies | -| 0.5.0 | 2024-08-19 | [42983](https://github.com/airbytehq/airbyte/pull/42983) | Migrate to CDK v4.5.1 | -| 0.4.5 | 2024-08-19 | [44382](https://github.com/airbytehq/airbyte/pull/44382) | Update dependencies | -| 0.4.4 | 2024-08-12 | [43743](https://github.com/airbytehq/airbyte/pull/43743) | Update dependencies | -| 0.4.3 | 2024-08-10 | [43565](https://github.com/airbytehq/airbyte/pull/43565) | Update dependencies | -| 0.4.2 | 2024-08-03 | [43235](https://github.com/airbytehq/airbyte/pull/43235) | Update dependencies | -| 0.4.1 | 2024-07-27 | [42704](https://github.com/airbytehq/airbyte/pull/42704) | Update dependencies | -| 0.4.0 | 2024-07-25 | [42008](https://github.com/airbytehq/airbyte/pull/42008) | Migrate to CDK v3.5.3 | -| 0.3.1 | 2024-07-20 | [42143](https://github.com/airbytehq/airbyte/pull/42143) | Update dependencies | -| 0.3.0 | 2024-07-16 | [42007](https://github.com/airbytehq/airbyte/pull/42007) | Migrate to CDK v2.4.0 | -| 0.2.11 | 2024-07-13 | [41688](https://github.com/airbytehq/airbyte/pull/41688) | Update dependencies | -| 0.2.10 | 2024-07-10 | [41589](https://github.com/airbytehq/airbyte/pull/41589) | Update dependencies | -| 0.2.9 | 2024-07-06 | [40917](https://github.com/airbytehq/airbyte/pull/40917) | Update dependencies | -| 0.2.8 | 2024-06-26 | [40539](https://github.com/airbytehq/airbyte/pull/40539) | Update dependencies | -| 0.2.7 | 2024-06-25 | [40357](https://github.com/airbytehq/airbyte/pull/40357) | Update dependencies | -| 0.2.6 | 2024-06-24 | [40233](https://github.com/airbytehq/airbyte/pull/40233) | Update dependencies | -| 0.2.5 | 2024-06-22 | [39987](https://github.com/airbytehq/airbyte/pull/39987) | Update dependencies | -| 0.2.4 | 2024-05-29 | [38675](https://github.com/airbytehq/airbyte/pull/38675) | Avoid error on empty stream when running discover | -| 0.2.3 | 2024-04-17 | [37372](https://github.com/airbytehq/airbyte/pull/37372) | Make refresh token optional | -| 0.2.2 | 2024-03-28 | [36573](https://github.com/airbytehq/airbyte/pull/36573) | Update QL to 400 | -| 0.2.1 | 2024-03-22 | [36381](https://github.com/airbytehq/airbyte/pull/36381) | Unpin CDK | -| 0.2.0 | 2024-03-06 | [35830](https://github.com/airbytehq/airbyte/pull/35830) | Add fetching shared items | -| 0.1.0 | 2024-01-25 | [33537](https://github.com/airbytehq/airbyte/pull/33537) | New source | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------| +| 0.5.2 | 2024-08-24 | [45646](https://github.com/airbytehq/airbyte/pull/45646) | Fix: handle wrong folder name | +| 0.5.1 | 2024-08-24 | [44660](https://github.com/airbytehq/airbyte/pull/44660) | Update dependencies | +| 0.5.0 | 2024-08-19 | [42983](https://github.com/airbytehq/airbyte/pull/42983) | Migrate to CDK v4.5.1 | +| 0.4.5 | 2024-08-19 | [44382](https://github.com/airbytehq/airbyte/pull/44382) | Update dependencies | +| 0.4.4 | 2024-08-12 | [43743](https://github.com/airbytehq/airbyte/pull/43743) | Update dependencies | +| 0.4.3 | 2024-08-10 | [43565](https://github.com/airbytehq/airbyte/pull/43565) | Update dependencies | +| 0.4.2 | 2024-08-03 | [43235](https://github.com/airbytehq/airbyte/pull/43235) | Update dependencies | +| 0.4.1 | 2024-07-27 | [42704](https://github.com/airbytehq/airbyte/pull/42704) | Update dependencies | +| 0.4.0 | 2024-07-25 | [42008](https://github.com/airbytehq/airbyte/pull/42008) | Migrate to CDK v3.5.3 | +| 0.3.1 | 2024-07-20 | [42143](https://github.com/airbytehq/airbyte/pull/42143) | Update dependencies | +| 0.3.0 | 2024-07-16 | [42007](https://github.com/airbytehq/airbyte/pull/42007) | Migrate to CDK v2.4.0 | +| 0.2.11 | 2024-07-13 | [41688](https://github.com/airbytehq/airbyte/pull/41688) | Update dependencies | +| 0.2.10 | 2024-07-10 | [41589](https://github.com/airbytehq/airbyte/pull/41589) | Update dependencies | +| 0.2.9 | 2024-07-06 | [40917](https://github.com/airbytehq/airbyte/pull/40917) | Update dependencies | +| 0.2.8 | 2024-06-26 | [40539](https://github.com/airbytehq/airbyte/pull/40539) | Update dependencies | +| 0.2.7 | 2024-06-25 | [40357](https://github.com/airbytehq/airbyte/pull/40357) | Update dependencies | +| 0.2.6 | 2024-06-24 | [40233](https://github.com/airbytehq/airbyte/pull/40233) | Update dependencies | +| 0.2.5 | 2024-06-22 | [39987](https://github.com/airbytehq/airbyte/pull/39987) | Update dependencies | +| 0.2.4 | 2024-05-29 | [38675](https://github.com/airbytehq/airbyte/pull/38675) | Avoid error on empty stream when running discover | +| 0.2.3 | 2024-04-17 | [37372](https://github.com/airbytehq/airbyte/pull/37372) | Make refresh token optional | +| 0.2.2 | 2024-03-28 | [36573](https://github.com/airbytehq/airbyte/pull/36573) | Update QL to 400 | +| 0.2.1 | 2024-03-22 | [36381](https://github.com/airbytehq/airbyte/pull/36381) | Unpin CDK | +| 0.2.0 | 2024-03-06 | [35830](https://github.com/airbytehq/airbyte/pull/35830) | Add fetching shared items | +| 0.1.0 | 2024-01-25 | [33537](https://github.com/airbytehq/airbyte/pull/33537) | New source |
From 0180e962999805b58763484326d97978d59a7d82 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Thu, 19 Sep 2024 12:06:38 +0200 Subject: [PATCH 3/3] Source GCS: update timeout Signed-off-by: Artem Inzhyyants --- .../source-microsoft-sharepoint/acceptance-test-config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-integrations/connectors/source-microsoft-sharepoint/acceptance-test-config.yml b/airbyte-integrations/connectors/source-microsoft-sharepoint/acceptance-test-config.yml index d201b7101373..0dea4a808cf3 100644 --- a/airbyte-integrations/connectors/source-microsoft-sharepoint/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-microsoft-sharepoint/acceptance-test-config.yml @@ -27,9 +27,11 @@ acceptance_tests: tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 3600 future_state: future_state_path: "integration_tests/abnormal_state.json" full_refresh: tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 3600