Skip to content

Commit

Permalink
feat: add last_activity_at to feedback datasets (argilla-io#3992)
Browse files Browse the repository at this point in the history
# Description

This PR adds a new `DateTime` column to `datasets` table with name
`last_activity_at`.

This new column will be updated for the following cases:
* When a dataset is created or updated.
* When a response for a record belonging to the dataset has been
created, updated or deleted.

The idea is that we can use `last_activity_at` to record the last time
that an activity associated with a dataset happened and use it instead
of `updated_at` to show recent activity on the UI.

Closes argilla-io#3981

**Type of change**

- [x] New feature (non-breaking change which adds functionality)

**How Has This Been Tested**

- [x] Adding new tests and running them locally.
- [x] Running tests using PostgreSQL locally.

**Checklist**

- [ ] I added relevant documentation
- [x] follows the style guidelines of this project
- [x] I did a self-review of my code
- [ ] I made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [ ] I filled out [the contributor form](https://tally.so/r/n9XrxK)
(see text above)
- [ ] I have added relevant notes to the CHANGELOG.md file (See
https://keepachangelog.com/)

---------

Co-authored-by: Damián Pumar <[email protected]>
  • Loading branch information
jfcalvo and damianpumar authored Oct 20, 2023
1 parent 239b593 commit f758a94
Show file tree
Hide file tree
Showing 14 changed files with 116 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ These are the section headers that we use:
- Added automatic model card generation through `ArgillaTrainer.save` ([#3857](https://github.com/argilla-io/argilla/pull/3857))
- Added `FeedbackDataset` `TaskTemplateMixin` for pre-defined task templates. ([#3969](https://github.com/argilla-io/argilla/pull/3969))
- A maximum limit of 50 on the number of options a ranking question can accept. ([#3975](https://github.com/argilla-io/argilla/pull/3975))
- New `last_activity_at` field to `FeedbackDataset` exposing when the last activity for the associated dataset occurs. ([#3992](https://github.com/argilla-io/argilla/pull/3992))

### Changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ export default {
},
{
name: "Updated at",
field: "updatedAt",
field: "lastActivityAt",
class: "date",
type: "date",
sortable: "true",
Expand Down
3 changes: 3 additions & 0 deletions frontend/e2e/common/dataset-api-mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export const newDatasetsMocked = [
workspace_id: "4e70e21a-7533-41e9-8a25-11d6ee3091be",
inserted_at: fakeDateMonthAgo(4),
updated_at: fakeDateMonthAgo(4),
last_activity_at: fakeDateMonthAgo(4),
},
{
id: "25073901-a24d-4416-b331-71a497b38063",
Expand All @@ -75,6 +76,7 @@ export const newDatasetsMocked = [
workspace_id: "4e70e21a-7533-41e9-8a25-11d6ee3091be",
inserted_at: fakeDateMonthAgo(4),
updated_at: fakeDateMonthAgo(4),
last_activity_at: fakeDateMonthAgo(4),
},
{
id: "25073901-a24d-4416-b331-71a497b38065",
Expand All @@ -85,6 +87,7 @@ export const newDatasetsMocked = [
workspace_id: "4e70e21a-7533-41e9-8a25-11d6ee3091bd",
inserted_at: fakeDateMonthAgo(4),
updated_at: fakeDateMonthAgo(4),
last_activity_at: fakeDateMonthAgo(4),
},
];

Expand Down
3 changes: 2 additions & 1 deletion frontend/v1/domain/entities/Dataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ export class Dataset {
public readonly workspaceName: string,
public readonly tags: unknown,
public readonly createdAt: string,
public updatedAt: string
public updatedAt: string,
public readonly lastActivityAt: string
) {
this.originalGuidelines = guidelines;
}
Expand Down
8 changes: 6 additions & 2 deletions frontend/v1/infrastructure/repositories/DatasetRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ interface BackendDatasetFeedbackTaskResponse {
name: string;
status: string;
updated_at: string;
last_activity_at: string;
workspace_id: string;
}

Expand All @@ -42,7 +43,8 @@ export class DatasetRepository implements IDatasetRepository {
workspace,
{},
dataset.inserted_at,
dataset.updated_at
dataset.updated_at,
dataset.last_activity_at
);
}

Expand All @@ -60,6 +62,7 @@ export class DatasetRepository implements IDatasetRepository {
dataset.workspace,
dataset.tags,
dataset.created_at,
dataset.last_updated,
dataset.last_updated
);
});
Expand All @@ -76,7 +79,8 @@ export class DatasetRepository implements IDatasetRepository {
datasetFromBackend.workspace_name,
{},
datasetFromBackend.inserted_at,
datasetFromBackend.updated_at
datasetFromBackend.updated_at,
datasetFromBackend.last_activity_at
);
}
);
Expand Down
1 change: 1 addition & 0 deletions src/argilla/client/sdk/v1/datasets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class FeedbackDatasetModel(BaseModel):
allow_extra_metadata: Optional[bool] = True
status: Optional[str] = None
workspace_id: Optional[UUID] = None
last_activity_at: Optional[datetime] = None
inserted_at: datetime
updated_at: datetime

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2021-present, the Recognai S.L. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""add last_activity_at to datasets table
Revision ID: 84f6b9ff6076
Revises: b8458008b60e
Create Date: 2023-10-19 16:06:16.097130
"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "84f6b9ff6076"
down_revision = "b8458008b60e"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("datasets", sa.Column("last_activity_at", sa.DateTime(), nullable=True))
op.execute("UPDATE datasets SET last_activity_at = updated_at")
with op.batch_alter_table("datasets") as batch_op:
batch_op.alter_column("last_activity_at", nullable=False)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("datasets", "last_activity_at")
# ### end Alembic commands ###
11 changes: 11 additions & 0 deletions src/argilla/server/contexts/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
from uuid import UUID

import sqlalchemy
from fastapi.encoders import jsonable_encoder
from sqlalchemy import and_, func, or_, select
from sqlalchemy.orm import contains_eager, joinedload, selectinload
Expand Down Expand Up @@ -62,6 +64,12 @@
NOT_VISIBLE_FOR_ANNOTATORS_ALLOWED_ROLES = [UserRole.admin]


async def _touch_dataset_last_activity_at(db: "AsyncSession", dataset: Dataset) -> Dataset:
return await db.execute(
sqlalchemy.update(Dataset).where(Dataset.id == dataset.id).values(last_activity_at=datetime.utcnow())
)


async def get_dataset_by_id(
db: "AsyncSession",
dataset_id: UUID,
Expand Down Expand Up @@ -711,6 +719,7 @@ async def create_response(
autocommit=False,
)
await db.flush([response])
await _touch_dataset_last_activity_at(db, record.dataset)
await search_engine.update_record_response(response)

await db.commit()
Expand All @@ -731,6 +740,7 @@ async def update_response(
replace_dict=True,
autocommit=False,
)
await _touch_dataset_last_activity_at(db, response.record.dataset)
await search_engine.update_record_response(response)

await db.commit()
Expand All @@ -741,6 +751,7 @@ async def update_response(
async def delete_response(db: "AsyncSession", search_engine: SearchEngine, response: Response) -> Response:
async with db.begin_nested():
response = await response.delete(db, autocommit=False)
await _touch_dataset_last_activity_at(db, response.record.dataset)
await search_engine.delete_record_response(response)

await db.commit()
Expand Down
13 changes: 13 additions & 0 deletions src/argilla/server/models/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,21 @@
# limitations under the License.

import secrets
from datetime import datetime
from typing import Any, List, Optional
from uuid import UUID

from pydantic import parse_obj_as
from sqlalchemy import JSON, ForeignKey, String, Text, UniqueConstraint, and_, sql
from sqlalchemy import Enum as SAEnum
from sqlalchemy.engine.default import DefaultExecutionContext
from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.orm import Mapped, mapped_column, relationship

from argilla.server.enums import DatasetStatus, MetadataPropertyType, ResponseStatus, SuggestionType, UserRole
from argilla.server.models.base import DatabaseModel
from argilla.server.models.metadata_properties import MetadataPropertySettings
from argilla.server.models.mixins import inserted_at_current_value
from argilla.server.models.questions import QuestionSettings

# Include here the data model ref to be accessible for automatic alembic migration scripts
Expand Down Expand Up @@ -217,6 +220,10 @@ def __repr__(self):
DatasetStatusEnum = SAEnum(DatasetStatus, name="dataset_status_enum")


def _updated_at_current_value(context: DefaultExecutionContext) -> datetime:
return context.get_current_parameters(isolate_multiinsert_groups=False)["updated_at"]


class Dataset(DatabaseModel):
__tablename__ = "datasets"

Expand All @@ -225,6 +232,11 @@ class Dataset(DatabaseModel):
allow_extra_metadata: Mapped[bool] = mapped_column(default=True, server_default=sql.true())
status: Mapped[DatasetStatus] = mapped_column(DatasetStatusEnum, default=DatasetStatus.draft, index=True)
workspace_id: Mapped[UUID] = mapped_column(ForeignKey("workspaces.id", ondelete="CASCADE"), index=True)
inserted_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=inserted_at_current_value, onupdate=datetime.utcnow)
last_activity_at: Mapped[datetime] = mapped_column(
default=inserted_at_current_value, onupdate=_updated_at_current_value
)

workspace: Mapped["Workspace"] = relationship(back_populates="datasets")
fields: Mapped[List["Field"]] = relationship(
Expand Down Expand Up @@ -266,6 +278,7 @@ def __repr__(self):
return (
f"Dataset(id={str(self.id)!r}, name={self.name!r}, guidelines={self.guidelines!r}, "
f"status={self.status.value!r}, workspace_id={str(self.workspace_id)!r}, "
f"last_activity_at={str(self.last_activity_at)!r}, "
f"inserted_at={str(self.inserted_at)!r}, updated_at={str(self.updated_at)!r})"
)

Expand Down
6 changes: 3 additions & 3 deletions src/argilla/server/models/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from typing import TYPE_CHECKING, Any, Dict, List, Set, TypeVar, Union

from pydantic import BaseModel
from sqlalchemy import func, sql
from sqlalchemy import sql
from sqlalchemy.dialects.mysql import insert as mysql_insert
from sqlalchemy.dialects.postgresql import insert as postgres_insert
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
Expand Down Expand Up @@ -174,10 +174,10 @@ async def save(self, db: "AsyncSession", autocommit: bool = True) -> Self:
return self


def _default_inserted_at(context: DefaultExecutionContext) -> datetime:
def inserted_at_current_value(context: DefaultExecutionContext) -> datetime:
return context.get_current_parameters(isolate_multiinsert_groups=False)["inserted_at"]


class TimestampMixin:
inserted_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=_default_inserted_at, onupdate=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=inserted_at_current_value, onupdate=datetime.utcnow)
1 change: 1 addition & 0 deletions src/argilla/server/schemas/v1/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class Dataset(BaseModel):
allow_extra_metadata: bool
status: DatasetStatus
workspace_id: UUID
last_activity_at: datetime
inserted_at: datetime
updated_at: datetime

Expand Down
11 changes: 10 additions & 1 deletion tests/unit/server/api/v1/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ async def test_list_current_user_datasets(self, async_client: "AsyncClient", own
"allow_extra_metadata": True,
"status": "draft",
"workspace_id": str(dataset_a.workspace_id),
"last_activity_at": dataset_a.last_activity_at.isoformat(),
"inserted_at": dataset_a.inserted_at.isoformat(),
"updated_at": dataset_a.updated_at.isoformat(),
},
Expand All @@ -126,6 +127,7 @@ async def test_list_current_user_datasets(self, async_client: "AsyncClient", own
"allow_extra_metadata": True,
"status": "draft",
"workspace_id": str(dataset_b.workspace_id),
"last_activity_at": dataset_b.last_activity_at.isoformat(),
"inserted_at": dataset_b.inserted_at.isoformat(),
"updated_at": dataset_b.updated_at.isoformat(),
},
Expand All @@ -136,6 +138,7 @@ async def test_list_current_user_datasets(self, async_client: "AsyncClient", own
"allow_extra_metadata": True,
"status": "ready",
"workspace_id": str(dataset_c.workspace_id),
"last_activity_at": dataset_c.last_activity_at.isoformat(),
"inserted_at": dataset_c.inserted_at.isoformat(),
"updated_at": dataset_c.updated_at.isoformat(),
},
Expand Down Expand Up @@ -1735,6 +1738,7 @@ async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header:
"allow_extra_metadata": True,
"status": "draft",
"workspace_id": str(dataset.workspace_id),
"last_activity_at": dataset.last_activity_at.isoformat(),
"inserted_at": dataset.inserted_at.isoformat(),
"updated_at": dataset.updated_at.isoformat(),
}
Expand Down Expand Up @@ -1901,9 +1905,11 @@ async def test_create_dataset(self, async_client: "AsyncClient", db: "AsyncSessi
"allow_extra_metadata": False,
"status": "draft",
"workspace_id": str(workspace.id),
"last_activity_at": datetime.fromisoformat(response_body["last_activity_at"]).isoformat(),
"inserted_at": datetime.fromisoformat(response_body["inserted_at"]).isoformat(),
"updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(),
}
assert response_body["last_activity_at"] == response_body["inserted_at"] == response_body["updated_at"]

async def test_create_dataset_with_invalid_length_guidelines(
self, async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict
Expand Down Expand Up @@ -5201,16 +5207,19 @@ async def test_update_dataset(self, async_client: "AsyncClient", db: "AsyncSessi
guidelines = dataset.guidelines

assert response.status_code == 200
assert response.json() == {
response_body = response.json()
assert response_body == {
"id": str(dataset.id),
"name": name,
"guidelines": guidelines,
"allow_extra_metadata": True,
"status": "ready",
"workspace_id": str(dataset.workspace_id),
"last_activity_at": dataset.last_activity_at.isoformat(),
"inserted_at": dataset.inserted_at.isoformat(),
"updated_at": dataset.updated_at.isoformat(),
}
assert response_body["last_activity_at"] == response_body["updated_at"]

dataset = await db.get(Dataset, dataset.id)
assert dataset.name == name
Expand Down
8 changes: 7 additions & 1 deletion tests/unit/server/api/v1/test_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import pytest
from argilla._constants import API_KEY_HEADER_NAME
from argilla.server.enums import ResponseStatus
from argilla.server.models import Record, Response, Suggestion, User, UserRole
from argilla.server.models import Dataset, Record, Response, Suggestion, User, UserRole
from argilla.server.search_engine import SearchEngine
from sqlalchemy import func, select
from sqlalchemy.orm import Session
Expand Down Expand Up @@ -830,13 +830,19 @@ async def test_create_record_response(
"status": status,
}

dataset_previous_last_activity_at = dataset.last_activity_at
dataset_previous_updated_at = dataset.updated_at

response = await async_client.post(
f"/api/v1/records/{record.id}/responses", headers=owner_auth_header, json=response_json
)

assert response.status_code == 201
assert (await db.execute(select(func.count(Response.id)))).scalar() == 1

assert dataset.last_activity_at > dataset_previous_last_activity_at
assert dataset.updated_at == dataset_previous_updated_at

response_body = response.json()
assert await db.get(Response, UUID(response_body["id"]))
assert response_body == {
Expand Down
Loading

0 comments on commit f758a94

Please sign in to comment.