Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run Flaky Tests Nightly #6654

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/run-flaky-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: All Tests Including Flaky

on:
push: # todo: remove this trigger later
branches:
- "cdf/run-flaky-tests-nightly"

schedule:
# Run once a day at midnight
- cron: '0 0 * * *'
Comment on lines +9 to +10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

midnight in what time zone?


workflow_dispatch:

env:
COMMIT: ${{ github.sha }}
PR_BASE_COMMIT: ${{ github.event.push.base.sha }}
DOCKER_COMPOSE_FILE: ./develop/github/docker-compose.yml
TEMPORAL_VERSION_CHECK_DISABLED: 1
BUILDKITE_ANALYTICS_TOKEN: ${{ secrets.BUILDKITE_ANALYTICS_TOKEN }}

jobs:
call-run-tests-workflow-with-flaky:
uses: temporalio/temporal/.github/workflows/run-tests.yml@cdf/run-flaky-tests-nightly
with:
include_flaky_tests: true
secrets: inherit
14 changes: 14 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ on:
- release/**
- cloud/**

workflow_call:
inputs:
include_flaky_tests:
description: 'Whether to include flaky tests'
type: boolean

workflow_dispatch:
inputs:
commit:
Expand Down Expand Up @@ -47,6 +53,9 @@ on:
description: 'List of DBs to test on (i.e. ["sqlite", "cassandra", "mysql8", "postgres12"])'
type: string
default: '["sqlite"]'
include_flaky_tests:
description: 'Whether to include flaky tests'
type: boolean

concurrency: # Auto-cancel existing runs in the PR when a new commit is pushed
group: run-tests-${{ github.head_ref || github.run_id }}
Expand Down Expand Up @@ -152,6 +161,7 @@ jobs:
UNIT_TEST_DIR: ${{ inputs.unit_test_directory }}
TEST_TIMEOUT: ${{ needs.set-up-single-test.outputs.test_timeout }}
RUN_SINGLE_UNIT_TEST: ${{ inputs.run_single_unit_test }}
INCLUDE_FLAKY_TESTS: ${{ inputs.include_flaky_tests }}
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -188,6 +198,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
env:
BUILDKITE_MESSAGE: "{\"job\": \"integration-test\"}"
INCLUDE_FLAKY_TESTS: ${{ inputs.include_flaky_tests }}
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -274,6 +285,7 @@ jobs:
SINGLE_TEST_ARGS: ${{ needs.set-up-single-test.outputs.single_test_args }}
TEST_TIMEOUT: ${{ needs.set-up-single-test.outputs.test_timeout }}
BUILDKITE_MESSAGE: "{\"job\": \"functional-test\", \"db\": \"${{ matrix.persistence_driver }}\"}"
INCLUDE_FLAKY_TESTS: ${{ inputs.include_flaky_tests }}
steps:
- name: Print functional test
run: echo "${{ needs.set-up-single-test.outputs.dbs }}" && echo "$SINGLE_TEST_ARGS"
Expand Down Expand Up @@ -346,6 +358,7 @@ jobs:
PERSISTENCE_TYPE: ${{ matrix.persistence_type }}
PERSISTENCE_DRIVER: ${{ matrix.persistence_driver }}
BUILDKITE_MESSAGE: "{\"job\": \"functional-test-xdc\", \"db\": \"${{ matrix.persistence_driver }}\"}"
INCLUDE_FLAKY_TESTS: ${{ inputs.include_flaky_tests }}
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -415,6 +428,7 @@ jobs:
PERSISTENCE_TYPE: ${{ matrix.persistence_type }}
PERSISTENCE_DRIVER: ${{ matrix.persistence_driver }}
ES_VERSION: ${{ matrix.es_version }}
INCLUDE_FLAKY_TESTS: ${{ inputs.include_flaky_tests }}
steps:
- uses: actions/checkout@v4
with:
Expand Down
4 changes: 3 additions & 1 deletion common/rpc/test/rpc_localstore_tls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ type localStoreRPCSuite struct {
}

func TestLocalStoreTLSSuite(t *testing.T) {
t.Skip("Skipping flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
t.Skip("flaky test")
}
suite.Run(t, &localStoreRPCSuite{
Suite: &suite.Suite{},
})
Expand Down
4 changes: 3 additions & 1 deletion tests/acquire_shard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,9 @@
// TestEventuallySucceeds verifies that we eventually succeed in acquiring the shard when we get a deadline exceeded
// error followed by a successful acquire shard call.
func (s *EventualSuccessSuite) TestEventuallySucceeds() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 230 in tests/acquire_shard_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
Expand Down
5 changes: 4 additions & 1 deletion tests/describe_task_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package tests
import (
"context"
"flag"
"os"
"testing"
"time"

Expand Down Expand Up @@ -80,7 +81,9 @@ func (s *DescribeTaskQueueSuite) TestAddNoTasks_ValidateStats() {
}

func (s *DescribeTaskQueueSuite) TestAddSingleTask_ValidateStats() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}
Comment on lines +84 to +86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we put this logic in one place so these calls are just:

Suggested change
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}
s.skipAsFlaky()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. As discussed offline, I'll take over this PR from @carlydf and implement the changes.


s.OverrideDynamicConfig(dynamicconfig.MatchingUpdateAckInterval, 5*time.Second)
s.RunTestWithMatchingBehavior(func() { s.publishConsumeWorkflowTasksValidateStats(1, true) })
Expand Down
4 changes: 3 additions & 1 deletion tests/http_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,9 @@
}

func (s *HttpApiTestSuite) TestHTTPAPIHeaders() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 311 in tests/http_api_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

if s.HttpAPIAddress() == "" {
s.T().Skip("HTTP API server not enabled")
Expand Down
5 changes: 4 additions & 1 deletion tests/max_buffered_event_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ package tests

import (
"context"
"os"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -138,7 +139,9 @@ func (s *MaxBufferedEventSuite) TestMaxBufferedEventsLimit() {
}

func (s *MaxBufferedEventSuite) TestBufferedEventsMutableStateSizeLimit() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

/*
This test starts a workflow, and block its workflow task, then sending
Expand Down
52 changes: 39 additions & 13 deletions tests/versioning_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,9 @@
}

func (s *VersioningIntegSuite) TestIndependentActivityTaskAssignment_Spooled_VersionedWorkflow() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1050 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.independentActivityTaskAssignmentSpooled(true) })
}
Expand Down Expand Up @@ -1239,7 +1241,9 @@
}

func (s *VersioningIntegSuite) TestIndependentActivityTaskAssignment_SyncMatch_VersionedWorkflow() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1244 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.independentActivityTaskAssignmentSyncMatch(true) })
}
Expand Down Expand Up @@ -1421,7 +1425,9 @@
}

func (s *VersioningIntegSuite) TestWorkflowTaskRedirectInRetryFirstTask() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1428 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.testWorkflowTaskRedirectInRetry(true) })
}
Expand Down Expand Up @@ -1732,7 +1738,9 @@
}

func (s *VersioningIntegSuite) TestDispatchUpgradeWait() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1741 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchUpgrade(true, false) })
}
Expand Down Expand Up @@ -1835,19 +1843,25 @@
)

func (s *VersioningIntegSuite) TestDispatchActivityOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1846 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchActivity(dontFailActivity, false, false) })
}

func (s *VersioningIntegSuite) TestDispatchActivityFailOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1854 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchActivity(failActivity, false, false) })
}

func (s *VersioningIntegSuite) TestDispatchActivityTimeoutOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 1862 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchActivity(timeoutActivity, false, false) })
}
Expand Down Expand Up @@ -2602,13 +2616,17 @@
}

func (s *VersioningIntegSuite) TestDispatchChildWorkflowOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {

Check failure on line 2619 in tests/versioning_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: os
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchChildWorkflow(false, false) })
}

func (s *VersioningIntegSuite) TestDispatchChildWorkflow() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchChildWorkflow(true, false) })
}
Expand Down Expand Up @@ -3084,7 +3102,9 @@
}

func (s *VersioningIntegSuite) TestDispatchContinueAsNewOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchContinueAsNew(false, false) })
}
Expand Down Expand Up @@ -3248,7 +3268,9 @@
}

func (s *VersioningIntegSuite) TestDispatchContinueAsNewUpgradeOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchContinueAsNewUpgrade(false) })
}
Expand Down Expand Up @@ -3600,13 +3622,17 @@
}

func (s *VersioningIntegSuite) TestDispatchCronOld() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchCron(false) })
}

func (s *VersioningIntegSuite) TestDispatchCron() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

s.RunTestWithMatchingBehavior(func() { s.dispatchCron(true) })
}
Expand Down
4 changes: 3 additions & 1 deletion tests/workflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,9 @@ func (s *WorkflowTestSuite) TestCompleteWorkflowTaskAndCreateNewOne() {
}

func (s *WorkflowTestSuite) TestWorkflowTaskAndActivityTaskTimeoutsWorkflow() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

id := "functional-timeouts-workflow-test"
wt := "functional-timeouts-workflow-test-type"
Expand Down
8 changes: 6 additions & 2 deletions tests/xdc/failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1857,7 +1857,9 @@ func (s *FunctionalClustersTestSuite) TestTransientWorkflowTaskFailover() {
}

func (s *FunctionalClustersTestSuite) TestCronWorkflowStartAndFailover() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

namespace := "test-cron-workflow-start-and-failover-" + common.GenerateRandomString(5)
client1 := s.cluster1.FrontendClient() // active
Expand Down Expand Up @@ -1953,7 +1955,9 @@ func (s *FunctionalClustersTestSuite) TestCronWorkflowStartAndFailover() {
}

func (s *FunctionalClustersTestSuite) TestCronWorkflowCompleteAndFailover() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

namespace := "test-cron-workflow-complete-and-failover-" + common.GenerateRandomString(5)
client1 := s.cluster1.FrontendClient() // active
Expand Down
8 changes: 6 additions & 2 deletions tests/xdc/history_replication_signals_and_updates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ func (t *hrsuTest) newHrsuTestCluster(ns string, name string, cluster *testcore.
// TestAcceptedUpdateCanBeCompletedAfterFailoverAndFailback tests that an update can be accepted in one cluster, and completed in a
// different cluster, after a failover.
func (s *hrsuTestSuite) TestAcceptedUpdateCanBeCompletedAfterFailoverAndFailback() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

t, ctx, cancel := s.startHrsuTest()
defer cancel()
Expand Down Expand Up @@ -360,7 +362,9 @@ func (s *hrsuTestSuite) TestConflictResolutionReappliesUpdates() {
// updates have the same update ID. The test confirms that when the conflict is resolved, we do not reapply the
// UpdateAccepted event, since it has a conflicting ID.
func (s *hrsuTestSuite) TestConflictResolutionDoesNotReapplyAcceptedUpdateWithConflictingId() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

t, ctx, cancel := s.startHrsuTest()
defer cancel()
Expand Down
5 changes: 4 additions & 1 deletion tests/xdc/nexus_state_replication_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"io"
"net/http"
"net/http/httptest"
"os"
"slices"
"testing"
"time"
Expand Down Expand Up @@ -100,7 +101,9 @@ func (s *NexusStateReplicationSuite) TearDownSuite() {
// 9. Check that the operation completion triggers a workflow task when we poll on cluster1.
// 10. Complete the workflow.
func (s *NexusStateReplicationSuite) TestNexusOperationEventsReplicated() {
s.T().Skip("flaky test")
if os.Getenv("INCLUDE_FLAKY_TESTS") != "true" {
s.T().Skip("flaky test")
}

var callbackToken string
var publicCallbackUrl string
Expand Down
Loading