From ef6414be3e00e39e130bb53b932c5a2ddbcddbc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=A1vio=20Ramalho?= Date: Wed, 28 Oct 2020 16:45:35 +0100 Subject: [PATCH] Fix deadlock on harbor-core initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the harbor core initialization if the database takes longer to be ready there is a risk of deadlock when checking for the TCP connection with the database. The `TestTCPConn` function uses unbuffered channels to check when the connection succeeds/timeouts. The timeout check is executed in parallel with the connection check (this runs in a gorountine). The deadlock happens when the goroutine execution takes longer than the function timeout (hence setting `cancel <- 1`) and the DialTimeout call succeeds (hence setting `success <- 1`). At this point both threads are waiting for the channels values to be read. This is reproducible mostly on slow systems where initializing the database takes longer and finishes during the 5th time of the `DialTimeout` call where it eventually exceeds the TestTCPConn timeout. This fix sets the `success` and `cancel` channels as buffered (non-blocking). Signed-off-by: Flávio Ramalho --- src/common/utils/utils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/utils/utils.go b/src/common/utils/utils.go index 2d7e11d93..c7003651f 100644 --- a/src/common/utils/utils.go +++ b/src/common/utils/utils.go @@ -89,8 +89,8 @@ func GenerateRandomString() string { // with the connection, in second // interval: the interval time for retring after failure, in second func TestTCPConn(addr string, timeout, interval int) error { - success := make(chan int) - cancel := make(chan int) + success := make(chan int, 1) + cancel := make(chan int, 1) go func() { n := 1