feat(LAB-3534): add stepIdIn and stepStatusIn filters (#1885)

BlueGrizzliBear · web-flow · commit a5cc12a78f8a · 2025-04-24T10:00:29.000+02:00
diff --git a/src/kili/adapters/kili_api_gateway/asset/mappers.py b/src/kili/adapters/kili_api_gateway/asset/mappers.py
@@ -64,4 +64,6 @@ def asset_where_mapper(filters: AssetFilters):
             "type": filters.issue_type,
             "status": filters.issue_status,
         },
+        "stepIdIn": filters.step_id_in,
+        "stepStatusIn": filters.step_status_in,
     }
diff --git a/src/kili/domain/asset/asset.py b/src/kili/domain/asset/asset.py
@@ -12,10 +12,12 @@
 
 AssetId = NewType("AssetId", str)
 AssetExternalId = NewType("AssetExternalId", str)
-
+AssetStatusInStep = NewType("AssetStatusInStep", str)
 
 AssetStatus = Literal["TODO", "ONGOING", "LABELED", "REVIEWED", "TO_REVIEW"]
 
+StatusInStep = Literal["TO_DO", "DOING", "PARTIALLY_DONE", "REDO", "DONE", "SKIPPED"]
+
 
 @dataclass
 class AssetFilters:
@@ -48,8 +50,6 @@ class AssetFilters:
     assignee_in: Optional[ListOrTuple[str]] = None
     assignee_not_in: Optional[ListOrTuple[str]] = None
     metadata_where: Optional[dict] = None
-    skipped: Optional[bool] = None
-    status_in: Optional[ListOrTuple[AssetStatus]] = None
     updated_at_gte: Optional[str] = None
     updated_at_lte: Optional[str] = None
     label_category_search: Optional[str] = None
@@ -59,3 +59,7 @@ class AssetFilters:
     inference_mark_lte: Optional[float] = None
     issue_type: Optional["IssueType"] = None
     issue_status: Optional["IssueStatus"] = None
+    skipped: Optional[bool] = None
+    status_in: Optional[ListOrTuple[AssetStatus]] = None
+    step_id_in: Optional[ListOrTuple[str]] = None
+    step_status_in: Optional[ListOrTuple[StatusInStep]] = None
diff --git a/src/kili/domain/project.py b/src/kili/domain/project.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from enum import Enum
-from typing import TYPE_CHECKING, Literal, NewType, Optional
+from typing import TYPE_CHECKING, Literal, NewType, Optional, TypedDict
 
 from .types import ListOrTuple
 
@@ -15,6 +15,14 @@
 ]
 
 
+@dataclass(frozen=True)
+class ProjectStep(TypedDict, total=True):
+    """Project step type."""
+
+    id: str
+    name: str
+
+
 class InputTypeEnum(str, Enum):
     """Input type enum."""
 
diff --git a/src/kili/presentation/client/asset.py b/src/kili/presentation/client/asset.py
@@ -17,15 +17,25 @@
 from typeguard import typechecked
 
 from kili.adapters.kili_api_gateway.helpers.queries import QueryOptions
-from kili.domain.asset import AssetExternalId, AssetFilters, AssetId, AssetStatus
+from kili.domain.asset.asset import (
+    AssetExternalId,
+    AssetFilters,
+    AssetId,
+    AssetStatus,
+    StatusInStep,
+)
 from kili.domain.issue import IssueStatus, IssueType
 from kili.domain.label import LabelType
 from kili.domain.project import ProjectId
 from kili.domain.types import ListOrTuple
 from kili.presentation.client.helpers.common_validators import (
     disable_tqdm_if_as_generator,
 )
+from kili.presentation.client.helpers.filter_conversion import (
+    convert_step_in_to_step_id_in_filter,
+)
 from kili.use_cases.asset import AssetUseCases
+from kili.use_cases.project.project import ProjectUseCases
 from kili.utils.logcontext import for_all_methods, log_call
 
 from .base import BaseClientMethods
@@ -80,8 +90,6 @@ def assets(
         label_honeypot_mark_lt: Optional[float] = None,
         label_type_in: Optional[List[LabelType]] = None,
         metadata_where: Optional[dict] = None,
-        skipped: Optional[bool] = None,
-        status_in: Optional[List[AssetStatus]] = None,
         updated_at_gte: Optional[str] = None,
         updated_at_lte: Optional[str] = None,
         label_category_search: Optional[str] = None,
@@ -112,6 +120,10 @@ def assets(
         external_id_strictly_in: Optional[List[str]] = None,
         external_id_in: Optional[List[str]] = None,
         label_output_format: Literal["dict", "parsed_label"] = "dict",
+        skipped: Optional[bool] = None,
+        status_in: Optional[List[AssetStatus]] = None,
+        step_name_in: Optional[List[str]] = None,
+        step_status_in: Optional[List[StatusInStep]] = None,
         *,
         as_generator: Literal[True],
     ) -> Generator[Dict, None, None]:
@@ -158,8 +170,6 @@ def assets(
         label_honeypot_mark_lt: Optional[float] = None,
         label_type_in: Optional[List[LabelType]] = None,
         metadata_where: Optional[dict] = None,
-        skipped: Optional[bool] = None,
-        status_in: Optional[List[AssetStatus]] = None,
         updated_at_gte: Optional[str] = None,
         updated_at_lte: Optional[str] = None,
         label_category_search: Optional[str] = None,
@@ -190,6 +200,10 @@ def assets(
         external_id_strictly_in: Optional[List[str]] = None,
         external_id_in: Optional[List[str]] = None,
         label_output_format: Literal["dict", "parsed_label"] = "dict",
+        skipped: Optional[bool] = None,
+        status_in: Optional[List[AssetStatus]] = None,
+        step_name_in: Optional[List[str]] = None,
+        step_status_in: Optional[List[StatusInStep]] = None,
         *,
         as_generator: Literal[False] = False,
     ) -> List[Dict]:
@@ -236,8 +250,6 @@ def assets(
         label_honeypot_mark_lt: Optional[float] = None,
         label_type_in: Optional[List[LabelType]] = None,
         metadata_where: Optional[dict] = None,
-        skipped: Optional[bool] = None,
-        status_in: Optional[List[AssetStatus]] = None,
         updated_at_gte: Optional[str] = None,
         updated_at_lte: Optional[str] = None,
         label_category_search: Optional[str] = None,
@@ -268,6 +280,10 @@ def assets(
         external_id_strictly_in: Optional[List[str]] = None,
         external_id_in: Optional[List[str]] = None,
         label_output_format: Literal["dict", "parsed_label"] = "dict",
+        skipped: Optional[bool] = None,
+        status_in: Optional[List[AssetStatus]] = None,
+        step_name_in: Optional[List[str]] = None,
+        step_status_in: Optional[List[StatusInStep]] = None,
         *,
         as_generator: bool = False,
     ) -> Union[Iterable[Dict], "pd.DataFrame"]:
@@ -289,8 +305,6 @@ def assets(
             metadata_where: Filters by the values of the metadata of the asset.
             honeypot_mark_gt: Deprecated. Use `honeypot_mark_gte` instead.
             honeypot_mark_lt: Deprecated. Use `honeypot_mark_lte` instead.
-            status_in: Returned assets should have a status that belongs to that list, if given.
-                Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`.
             label_type_in: Returned assets should have a label whose type belongs to that list, if given.
             label_author_in: Returned assets should have a label whose author belongs to that list, if given. An author can be designated by the first name, the last name, or the first name + last name.
             label_consensus_mark_gt: Deprecated. Use `label_consensus_mark_gte` instead.
@@ -300,7 +314,6 @@ def assets(
             label_created_at_lt: Deprecated. Use `label_created_at_lte` instead.
             label_honeypot_mark_gt: Deprecated. Use `label_honeypot_mark_gte` instead.
             label_honeypot_mark_lt: Deprecated. Use `label_honeypot_mark_lte` instead.
-            skipped: Returned assets should be skipped
             updated_at_gte: Returned assets should have a label whose update date is greater or equal to this date.
             updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
             format: If equal to 'pandas', returns a pandas DataFrame
@@ -335,6 +348,15 @@ def assets(
             external_id_in: Returned assets should have external ids that partially match the ones in the list.
                 For example, with `external_id_in=['abc']`, any asset with an external id containing `'abc'` will be returned.
             label_output_format: If `parsed_label`, the labels in the assets will be parsed. More information on parsed labels in the [documentation](https://python-sdk-docs.kili-technology.com/latest/sdk/tutorials/label_parsing/).
+            skipped: Returned assets should be skipped
+                Only applicable if the project is in WorkflowV1 (legacy).
+            status_in: Returned assets should have a status that belongs to that list, if given.
+                Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`.
+                Only applicable if the project is in the WorkflowV1 (legacy).
+            step_name_in: Returned assets are in the step whose name belong to that list, if given.
+                Only applicable if the project is in WorkflowV2.
+            step_status_in: Returned assets have the status in their step that belongs to that list, if given.
+                Only applicable if the project is in WorkflowV2.
 
         !!! info "Dates format"
             Date strings should have format: "YYYY-MM-DD"
@@ -431,6 +453,33 @@ def assets(
 
         disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
 
+        step_id_in = None
+        if (
+            step_name_in is not None
+            or step_status_in is not None
+            or status_in is not None
+            or skipped is not None
+        ):
+            project_use_cases = ProjectUseCases(self.kili_api_gateway)
+            project_steps = project_use_cases.get_project_steps(project_id)
+
+            if step_name_in is not None or step_status_in is not None or status_in is not None:
+                step_id_in = convert_step_in_to_step_id_in_filter(
+                    project_steps=project_steps,
+                    fields=fields,
+                    asset_filter_kwargs={
+                        "step_name_in": step_name_in,
+                        "step_status_in": step_status_in,
+                        "status_in": status_in,
+                        "skipped": skipped,
+                    },
+                )
+            elif skipped is not None and len(project_steps) != 0:
+                warnings.warn(
+                    "Filter skipped given : only use filter step_status_in with the SKIPPED step status instead for this project",
+                    stacklevel=1,
+                )
+
         asset_use_cases = AssetUseCases(self.kili_api_gateway)
         filters = AssetFilters(
             project_id=ProjectId(project_id),
@@ -474,6 +523,8 @@ def assets(
             assignee_not_in=assignee_not_in,
             issue_status=issue_status,
             issue_type=issue_type,
+            step_id_in=step_id_in,
+            step_status_in=step_status_in,
         )
         assets_gen = asset_use_cases.list_assets(
             filters,
diff --git a/src/kili/presentation/client/helpers/filter_conversion.py b/src/kili/presentation/client/helpers/filter_conversion.py
@@ -0,0 +1,77 @@
+"""Module for common argument validators across client methods."""
+
+import warnings
+from typing import Dict, List, Optional
+
+from kili.domain.project import ProjectStep
+from kili.domain.types import ListOrTuple
+
+
+def extract_step_ids_from_project_steps(
+    project_steps: List[ProjectStep],
+    step_name_in: List[str],
+) -> List[str]:
+    """Extract step ids from project steps."""
+    matching_steps = [step for step in project_steps if step["name"] in step_name_in]
+
+    # Raise an exception if any name in step_name_in does not match a step["name"]
+    unmatched_names = [
+        name for name in step_name_in if name not in [step["name"] for step in project_steps]
+    ]
+    if unmatched_names:
+        raise ValueError(f"The following step names do not match any steps: {unmatched_names}")
+
+    return [step["id"] for step in matching_steps]
+
+
+def convert_step_in_to_step_id_in_filter(
+    asset_filter_kwargs: Dict[str, object],
+    project_steps: List[ProjectStep],
+    fields: Optional[ListOrTuple[str]] = None,
+) -> Optional[List[str]]:
+    """If a stepIn filter is given, convert it to a stepIdIn and return it."""
+    step_name_in = asset_filter_kwargs.get("step_name_in")
+    step_status_in = asset_filter_kwargs.get("step_status_in")
+    status_in = asset_filter_kwargs.get("status_in")
+    skipped = asset_filter_kwargs.get("skipped")
+
+    if len(project_steps) != 0:
+        if step_status_in is not None and status_in is not None:
+            raise ValueError(
+                "Filters step_status_in and status_in both given : only use filter step_status_in for this project."
+            )
+        if step_name_in is not None and status_in is not None:
+            raise ValueError(
+                "Filters step_name_in and status_in both given : use filter step_status_in instead of status_in for this project."  # pylint: disable=line-too-long
+            )
+        if status_in is not None:
+            warnings.warn(
+                "Filter status_in given : use filters step_status_in and step_name_in instead for this project.",
+                stacklevel=1,
+            )
+        if skipped is not None:
+            warnings.warn(
+                "Filter skipped given : only use filter step_status_in with the SKIPPED step status instead for this project",  # pylint: disable=line-too-long
+                stacklevel=1,
+            )
+        if fields and "status" in fields:
+            warnings.warn(
+                "Field status requested : request fields step and stepStatus instead for this project",
+                stacklevel=1,
+            )
+
+        if (
+            step_name_in is not None
+            and isinstance(step_name_in, list)
+            and all(isinstance(item, str) for item in step_name_in)
+        ):
+            return extract_step_ids_from_project_steps(
+                project_steps=project_steps, step_name_in=step_name_in
+            )
+        return None
+
+    if step_name_in is not None or step_status_in is not None:
+        raise ValueError(
+            "Filters step_name_in and/or step_status_in given : use filter status_in for this project."
+        )
+    return None
diff --git a/src/kili/presentation/client/label.py b/src/kili/presentation/client/label.py
@@ -34,12 +34,16 @@
     assert_all_arrays_have_same_size,
     disable_tqdm_if_as_generator,
 )
+from kili.presentation.client.helpers.filter_conversion import (
+    convert_step_in_to_step_id_in_filter,
+)
 from kili.services.export import export_labels
 from kili.services.export.exceptions import NoCompatibleJobError
 from kili.services.export.types import CocoAnnotationModifier, LabelFormat, SplitOption
 from kili.use_cases.asset.utils import AssetUseCasesUtils
 from kili.use_cases.label import LabelUseCases
 from kili.use_cases.label.types import LabelToCreateUseCaseInput
+from kili.use_cases.project.project import ProjectUseCases
 from kili.utils.labels.parsing import ParsedLabel
 from kili.utils.logcontext import for_all_methods, log_call
 
@@ -1146,8 +1150,8 @@ def export_labels(
                 - `label_reviewer_not_in`
                 - `assignee_in`
                 - `assignee_not_in`
-                - `skipped`
-                - `status_in`
+                - `skipped`: only applicable if the project is in the WorkflowV1 (legacy).
+                - `status_in`: only applicable if the project is in the WorkflowV1 (legacy).
                 - `label_category_search`
                 - `created_at_gte`
                 - `created_at_lte`
@@ -1156,6 +1160,8 @@ def export_labels(
                 - `inference_mark_gte`
                 - `inference_mark_lte`
                 - `metadata_where`
+                - `step_name_in`: : only applicable if the project is in the WorkflowV2.
+                - `step_status_in`: only applicable if the project is in the WorkflowV2.
 
                 See the documentation of [`kili.assets()`](https://python-sdk-docs.kili-technology.com/latest/sdk/asset/#kili.queries.asset.__init__.QueriesAsset.assets) for more information.
             normalized_coordinates: This parameter is only effective on the Kili (a.k.a raw) format.
@@ -1204,6 +1210,31 @@ def is_rectangle(coco_annotation, coco_image, kili_annotation):
         else:
             resolved_asset_ids = cast(List[AssetId], asset_ids)
 
+        if asset_filter_kwargs and (
+            asset_filter_kwargs.get("step_name_in") is not None
+            or asset_filter_kwargs.get("step_status_in") is not None
+            or asset_filter_kwargs.get("status_in") is not None
+            or asset_filter_kwargs.get("skipped") is not None
+        ):
+            project_use_cases = ProjectUseCases(self.kili_api_gateway)
+            project_steps = project_use_cases.get_project_steps(project_id)
+
+            step_name_in = asset_filter_kwargs.get("step_name_in")
+            step_status_in = asset_filter_kwargs.get("step_status_in")
+            status_in = asset_filter_kwargs.get("status_in")
+            skipped = asset_filter_kwargs.get("skipped")
+            if step_name_in is not None or step_status_in is not None or status_in is not None:
+                step_id_in = convert_step_in_to_step_id_in_filter(
+                    project_steps=project_steps, asset_filter_kwargs=asset_filter_kwargs
+                )
+                asset_filter_kwargs.pop("step_name_in", None)
+                asset_filter_kwargs["step_id_in"] = step_id_in
+            elif skipped is not None and len(project_steps) != 0:
+                warnings.warn(
+                    "Filter skipped given : only use filter step_status_in with the SKIPPED step status instead for this project",
+                    stacklevel=1,
+                )
+
         try:
             return export_labels(
                 self,  # pyright: ignore[reportGeneralTypeIssues]
diff --git a/src/kili/services/export/tools.py b/src/kili/services/export/tools.py
@@ -140,6 +140,8 @@ def fetch_assets(
         "inference_mark_gte": asset_filter_kwargs.pop("inference_mark_gte", None),
         "inference_mark_lte": asset_filter_kwargs.pop("inference_mark_lte", None),
         "metadata_where": asset_filter_kwargs.pop("metadata_where", None),
+        "step_id_in": asset_filter_kwargs.pop("step_id_in", None),
+        "step_status_in": asset_filter_kwargs.pop("step_status_in", None),
     }
 
     if asset_filter_kwargs:
diff --git a/src/kili/use_cases/project/project.py b/src/kili/use_cases/project/project.py

Original file line number	Diff line number	Diff line change
`@@ -64,4 +64,6 @@ def asset_where_mapper(filters: AssetFilters):`
`64`	`64`	`"type": filters.issue_type,`
`65`	`65`	`"status": filters.issue_status,`
`66`	`66`	`},`
	`67`	`+ "stepIdIn": filters.step_id_in,`
	`68`	`+ "stepStatusIn": filters.step_status_in,`
`67`	`69`	`}`
Original file line number	Diff line number	Diff line change
`@@ -140,6 +140,8 @@ def fetch_assets(`
`140`	`140`	`"inference_mark_gte": asset_filter_kwargs.pop("inference_mark_gte", None),`
`141`	`141`	`"inference_mark_lte": asset_filter_kwargs.pop("inference_mark_lte", None),`
`142`	`142`	`"metadata_where": asset_filter_kwargs.pop("metadata_where", None),`
	`143`	`+ "step_id_in": asset_filter_kwargs.pop("step_id_in", None),`
	`144`	`+ "step_status_in": asset_filter_kwargs.pop("step_status_in", None),`
`143`	`145`	`}`
`144`	`146`
`145`	`147`	`if asset_filter_kwargs:`