From 8098ca9d2e391b17e5e3da5cfa5af042221bfe36 Mon Sep 17 00:00:00 2001 From: Gusted Date: Sun, 17 Dec 2023 16:39:58 +0100 Subject: [PATCH] [GITEA] Avoid `WHERE IN` for comment migration query - Rewrite `UpdateCommentsMigrationsByType` to not use `WHERE IN` as that's a performance diaster for MariaDB, it now use batching to query the the relevant comment IDs via JOINs (which is not possible in a UPDATE query for SQLite) and then update them in a seperate query. - Add unit test. - Resolves https://codeberg.org/forgejo/forgejo/issues/1856 --- models/issues/comment.go | 46 +++++++++++++++++++++++------------ models/issues/comment_test.go | 27 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/models/issues/comment.go b/models/issues/comment.go index c38b35d8fa..4f804eca03 100644 --- a/models/issues/comment.go +++ b/models/issues/comment.go @@ -1177,22 +1177,36 @@ func DeleteComment(ctx context.Context, comment *Comment) error { // UpdateCommentsMigrationsByType updates comments' migrations information via given git service type and original id and poster id func UpdateCommentsMigrationsByType(ctx context.Context, tp structs.GitServiceType, originalAuthorID string, posterID int64) error { - _, err := db.GetEngine(ctx).Table("comment"). - Where(builder.In("issue_id", - builder.Select("issue.id"). - From("issue"). - InnerJoin("repository", "issue.repo_id = repository.id"). - Where(builder.Eq{ - "repository.original_service_type": tp, - }), - )). - And("comment.original_author_id = ?", originalAuthorID). - Update(map[string]any{ - "poster_id": posterID, - "original_author": "", - "original_author_id": 0, - }) - return err + const batchSize = 50 + for { + commentIDs := make([]int64, 0, batchSize) + if err := db.GetEngine(ctx). + Table("comment"). + Select("`comment`.id"). + Join("INNER", "issue", "`comment`.issue_id = `issue`.id"). + Join("INNER", "repository", "`repository`.id = `issue`.repo_id"). + Where("`repository`.original_service_type = ? AND `comment`.original_author_id = ?", tp, originalAuthorID). + Limit(batchSize, 0). + Find(&commentIDs); err != nil { + return err + } + + for _, commentID := range commentIDs { + if _, err := db.GetEngine(ctx).Table(&Comment{}).ID(commentID).Update(map[string]any{ + "poster_id": posterID, + "original_author": "", + "original_author_id": 0, + }); err != nil { + return err + } + } + + if len(commentIDs) < batchSize { + break + } + } + + return nil } // CreateAutoMergeComment is a internal function, only use it for CommentTypePRScheduledToAutoMerge and CommentTypePRUnScheduledToAutoMerge CommentTypes diff --git a/models/issues/comment_test.go b/models/issues/comment_test.go index c5bbfdedc2..e08bd7fbf5 100644 --- a/models/issues/comment_test.go +++ b/models/issues/comment_test.go @@ -12,6 +12,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/unittest" user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/structs" "github.com/stretchr/testify/assert" ) @@ -97,3 +98,29 @@ func TestMigrate_InsertIssueComments(t *testing.T) { unittest.CheckConsistencyFor(t, &issues_model.Issue{}) } + +func TestUpdateCommentsMigrationsByType(t *testing.T) { + assert.NoError(t, unittest.PrepareTestDatabase()) + + issue := unittest.AssertExistsAndLoadBean(t, &issues_model.Issue{ID: 1}) + repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: issue.RepoID}) + comment := unittest.AssertExistsAndLoadBean(t, &issues_model.Comment{ID: 1, IssueID: issue.ID}) + + // Set repository to migrated from Gitea. + repo.OriginalServiceType = structs.GiteaService + repo_model.UpdateRepositoryCols(db.DefaultContext, repo, "original_service_type") + + // Set comment to have an original author. + comment.OriginalAuthor = "Example User" + comment.OriginalAuthorID = 1 + comment.PosterID = 0 + _, err := db.GetEngine(db.DefaultContext).ID(comment.ID).Cols("original_author", "original_author_id", "poster_id").Update(comment) + assert.NoError(t, err) + + assert.NoError(t, issues_model.UpdateCommentsMigrationsByType(db.DefaultContext, structs.GiteaService, "1", 513)) + + comment = unittest.AssertExistsAndLoadBean(t, &issues_model.Comment{ID: 1, IssueID: issue.ID}) + assert.Empty(t, comment.OriginalAuthor) + assert.Empty(t, comment.OriginalAuthorID) + assert.EqualValues(t, 513, comment.PosterID) +}