Skip to content

Commit

Permalink
[BACKPORT pg15-cherrypicks] all: Bulk port from master - 111
Browse files Browse the repository at this point in the history
Summary:
 12b2c40 [#23999] DocDB: Big shared memory segments
 b1e6329 [PLAT-15279] Add gzip compression to core dumps from DB.
 06472d5 [#24050] docdb: Fix re-packing rows after alter table add column with default value
 9009d11 [#23837] YSQL: Temporarily disable some tests with Connection Manager enabled
 11acca7 [#23325][#23326] yugabyted: Support for adding new databases for xCluster replication (Phase 2)
 96703da [PLAT-15465][PLAT-15466] Minor fixes in YNP
 c5aca3b [PLAT-14924][PLAT-12829][PLAT-15446] - ui bugs and improvements
 6e82692 [#23770] [#23797] YSQL: Stabilise some test failures with Connection Manager enabled
 b50bd1b [PLAT-15279] Adjusting the core pattern to create the cores with the core_ prefix for collect cores to catch it
 f692a60 [PLAT-14045] UBI-8 images don't have hostname
 d6a19da [PLAT-15377] Adding a global uncaught exception handler to yugaware
 acbb1ba [PLAT-15225] Verify there is no running master on nodes selected for master replacement
 Excluded: 3e93354 [#23686] YSQL: Build relcache foreign key list from YB catcache

Test Plan: Jenkins: rebase: pg15-cherrypicks

Reviewers: tfoucher, fizaa, telgersma

Differential Revision: https://phorge.dev.yugabyte.com/D38503
  • Loading branch information
yugabyte-ci authored and fizaaluthra committed Sep 27, 2024
1 parent 5ad81e0 commit fbbcd7f
Show file tree
Hide file tree
Showing 54 changed files with 1,967 additions and 403 deletions.
942 changes: 754 additions & 188 deletions bin/yugabyted

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions docker/images/yugabyte/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ EOR
fi
EOF

# Install hostname for ubi8 images.
RUN if [[ "$BASE_IMAGE" == "registry.access.redhat.com/ubi8/ubi"* ]]; then \
dnf install hostname; \
fi

# Ensure our base system is fully up to date
RUN set -x; \
$yum_upgrade -y \
Expand Down
9 changes: 9 additions & 0 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/BasePgSQLTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,15 @@ public class BasePgSQLTest extends BaseMiniClusterTest {
"ability to configure debug logs for connection manager to be at the same levels as " +
"tserver log levels.";

protected static final String LONG_PASSWORD_SUPPORT_NEEDED =
"(DB-10387) This test leads to certain I/O errors due to the usage of long passwords when " +
"Connection Manager is enabled. Skipping this test with Connection Manager enabled.";

protected static final String RECREATE_USER_SUPPORT_NEEDED =
"(DB-10760) This test needs stricter statistic updates for when roles are recreated when " +
"Connection Manager is enabled. Skipping this test with Connection Manager enabled " +
"until the relevant code is pushed to master.";

// Warmup modes for Connection Manager during test runs.
protected static enum ConnectionManagerWarmupMode {
NONE,
Expand Down
13 changes: 13 additions & 0 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgAuthorization.java
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,11 @@ public void testRoleChanging() throws Exception {

@Test
public void testAttributes() throws Exception {
// (DB-10760) Role OID-based pool design is needed in addition to waiting
// for connection count-related statistics for this test to pass when
// Connection Manager is enabled. Skipping this test temporarily.
assumeFalse(BasePgSQLTest.RECREATE_USER_SUPPORT_NEEDED, isTestRunningWithConnectionManager());

// NOTE: The INHERIT attribute is tested separately in testMembershipInheritance.
try (Statement statement = connection.createStatement()) {
statement.execute("CREATE ROLE unprivileged");
Expand Down Expand Up @@ -3225,6 +3230,14 @@ public void testMultiNodeOwnershipChanges() throws Exception {

@Test
public void testLongPasswords() throws Exception {
// (DB-10387) (DB-10760) Using long passwords with Connection Manager
// causes I/O errors during test execution. Skip this test temporarily
// until support for the same can be provided with Connection Manager.
// This test will further need the support of role OID-based pooling
// to help support recreate role operations (DROP ROLE followed by
// CREATE ROLE).
assumeFalse(BasePgSQLTest.LONG_PASSWORD_SUPPORT_NEEDED, isTestRunningWithConnectionManager());

try (Statement statement = connection.createStatement()) {
statement.execute("CREATE ROLE unprivileged");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ protected Map<String, String> getTServerFlags() {

@Test
public void testPgRegressTrigger() throws Exception {
// (DB-12699) Test triggers are not realised on randomly allocated backend
// processes when Connection Manager is enabled, allow the test to run
// without a warmed up pool of connections.
setConnMgrWarmupModeAndRestartCluster(ConnectionManagerWarmupMode.NONE);
runPgRegressTest("yb_triggers_schedule");
}
}
33 changes: 25 additions & 8 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/TestYbAsh.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.HashSet;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -69,6 +71,24 @@ private void executePgSleep(Statement statement, long seconds) throws Exception
statement.execute("SELECT pg_sleep(" + seconds + ")");
}

// Helper function to get backend count from yb_active_session_history.
private int getAshBackendCount(Statement stmt) throws Exception {
if (isTestRunningWithConnectionManager()) {
HashSet pids = new HashSet();
for (int i = 0; i < CONN_MGR_WARMUP_BACKEND_COUNT; i++) {
pids.add(getSingleRow(stmt, "SELECT pg_backend_pid()").getInt(0));
}
return getSingleRow(stmt, "SELECT COUNT(*) FROM " + ASH_VIEW +
" WHERE pid IN (" +
pids.stream().map(String::valueOf).collect(Collectors.joining(",")) +
")").getLong(0).intValue();
} else {
int pid = getSingleRow(stmt, "SELECT pg_backend_pid()").getInt(0);
return getSingleRow(stmt, "SELECT COUNT(*) FROM " + ASH_VIEW +
" WHERE pid = " + pid).getLong(0).intValue();
}
}

/**
* We should get an error if we try to query the ASH view without
* enabling ASH
Expand Down Expand Up @@ -322,27 +342,24 @@ public void testSampleSize() throws Exception {
*/
@Test
public void testYsqlPids() throws Exception {
// (DB-12674) Choosing backend PID is not deterministic with random
// backend allocation, use round-robin allocation instead.
setConnMgrWarmupModeAndRestartCluster(ConnectionManagerWarmupMode.ROUND_ROBIN);
setAshConfigAndRestartCluster(100, ASH_SAMPLE_SIZE);

try (Statement statement = connection.createStatement()) {
statement.execute("CREATE TABLE test_table(k INT, v TEXT)");
for (int i = 0; i < 100; ++i) {
statement.execute(String.format("INSERT INTO test_table VALUES(%d, 'v-%d')", i, i));
}
int pid = getSingleRow(statement, "SELECT pg_backend_pid()").getInt(0);
int res = getSingleRow(statement, "SELECT COUNT(*) FROM " + ASH_VIEW +
" WHERE pid = " + pid).getLong(0).intValue();
assertGreaterThan(res, 0);
assertGreaterThan(getAshBackendCount(statement), 0);
}

try (Statement statement = connection.createStatement()) {
for (int i = 0; i < 100; ++i) {
statement.execute(String.format("SELECT * FROM test_table WHERE k = %d", i));
}
int pid = getSingleRow(statement, "SELECT pg_backend_pid()").getInt(0);
int res = getSingleRow(statement, "SELECT COUNT(*) FROM " + ASH_VIEW +
" WHERE pid = " + pid).getLong(0).intValue();
assertGreaterThan(res, 0);
assertGreaterThan(getAshBackendCount(statement), 0);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ pg_max_mem_mb: 0
install_retry_count: 5
vm_max_map_count: 262144
yb_metrics_dir: "{{ yb_home_dir }}/metrics"
bin_path: "/usr/local/bin"
42 changes: 41 additions & 1 deletion managed/devops/roles/provision-cluster-server/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,50 @@
state: present
tags: yb-prebuilt-ami

- name: Create directory for core dumps
file:
path: "{{ yb_home_dir }}/cores/"
state: directory
mode: '0755'
owner: "{{ user_name }}"
group: "{{ user_name }}"
tags: yb-prebuilt-ami

- name: Ensure gzip is installed
command: which gzip
register: gzip_installed
ignore_errors: yes
tags: yb-prebuilt-ami

- name: Fail if gzip is not installed
fail:
msg: "gzip is not installed, please install it manually."
when: gzip_installed.rc != 0
tags: yb-prebuilt-ami

- name: Create core dump compression script
become: yes
become_method: sudo
copy:
dest: "{{ bin_path }}/compress_core.sh"
mode: '0755'
content: |
#!/bin/bash
DUMP_DIR="/home/yugabyte/cores/"
CORE_FILE="$DUMP_DIR/$(basename $1).$$.gz"
GZIP_PATH=$(PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" which gzip)
if [[ -z "$GZIP_PATH" ]]; then
echo "gzip not found, aborting." >> /home/yugabyte/cores/custom_core_dump.log
exit 1
fi
/usr/bin/gzip >> "$CORE_FILE"
chmod 644 "$CORE_FILE"
tags: yb-prebuilt-ami

- name: Provision | Create core dump kernel pattern
sysctl:
name: kernel.core_pattern
value: "{{ yb_home_dir }}/cores/core_%p_%t_%E"
value: "|{{ bin_path }}/compress_core.sh {{ yb_home_dir }}/cores/core_yb.%e.%p"
state: present
tags: yb-prebuilt-ami

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def print_results_helper(self, file):
file.write("""
print_results() {
any_fail=0
if [[ $json_results == *'"result":"FAIL"'* ]]; then
if [[ $json_results == *'"result": "FAIL"'* ]]; then
any_fail=1
fi
json_results+='\n]}'
Expand Down
11 changes: 11 additions & 0 deletions managed/node-agent/resources/ynp/configs/setup_logger.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import logging.config
import os
import pwd


def setup_logger(config):
Expand Down Expand Up @@ -53,3 +54,13 @@ def setup_logger(config):
os.chmod(os.path.join(log_dir, log_file), 0o644)
logger = logging.getLogger()
logger.info("Logging Setup Done")

if 'SUDO_USER' in os.environ:
original_user = os.environ['SUDO_USER']
else:
original_user = os.getlogin()
user_info = pwd.getpwnam(original_user)
uid = user_info.pw_uid
gid = user_info.pw_gid
os.chown(log_dir, uid, gid)
os.chown(os.path.join(log_dir, log_file), uid, gid)
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,13 @@ for mount_point in "${mount_points_array[@]}"; do
add_result "home directory check" "FAIL" "$message"
fi
if [ -d "$mount_point" ]; then
if [ -w "$mount_point" ] && [ $(( $(stat -c %a "$mount_point") % 10 & 2 )) -ne 0 ]; then
if su - {{ yb_user }} -c "test -w \"$mount_point\""; then
result="PASS"
message="Directory $mount_point exists and is world-writable."
message="Directory $mount_point exists and is writable by {{ yb_user }}."
echo "[PASS] $message"
else
result="FAIL"
message="Directory $mount_point exists but is not world-writable."
message="Directory $mount_point exists but is not {{ yb_user }} writable."
echo "[FAIL] $message"
any_fail=1
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ if [ "$http_status" -ge 200 ] && [ "$http_status" -lt 300 ]; then
echo "HTTP GET request successful. Processing response..."

# Extract IPs using sed and grep
ips=$(echo "$response_body" | sed -n 's/.*"ip":"\([a-zA-Z0-9\.-]*\)".*/\1/p')
ips=$(echo "$response_body" | grep -o '"ip":"[a-zA-Z0-9.:_-]*"' | cut -d '"' -f4)
for ip in $ips; do
if [[ "$ip" == "{{ node_ip }}" ]]; then
matched=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ airgap_flag="--airgap"
airgap_flag=""
{% endif %}

installer_dir="{{ ynp_dir }}/../../bin"
installer_dir="{{ yb_home_dir }}/.install"
su - {{ yb_user }} -c "\"$installer_dir/node-agent-installer.sh\" -c install \
-u {{ url }} -t {{ api_key }} --provider_id $provider_id \
--instance_type {{ instance_type_name }} --zone_name {{ provider_region_zone_name }} \
Expand Down Expand Up @@ -299,6 +299,8 @@ function cleanup() {
if test -f "{{ tmp_directory }}/add_node_to_provider.json"; then
rm "{{ tmp_directory }}/add_node_to_provider.json"
fi

rm -rf "{{ yb_home_dir }}/.install"
}

trap cleanup EXIT
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,12 @@ else
echo "el8 not detected, skipping changing selinux context"
fi

installer_dir="{{ ynp_dir }}/../../bin"
# Copy node-agent binary to yugabyte home directory. This is needed so that yugabyte user
# can read the binary in restricted environment.
mkdir -p {{ yb_home_dir }}/.install
cp "$installer_dir/node-agent-installer.sh" {{ yb_home_dir }}/.install
chown -R {{ yb_user }}:{{ yb_user }} {{ yb_home_dir }}/.install

# Ensure the permissions for yb_home_dir are 750
chmod 750 "{{ yb_home_dir }}"
20 changes: 20 additions & 0 deletions managed/src/main/java/GlobalExceptionHandler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (c) Yugabyte, Inc.

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GlobalExceptionHandler implements Thread.UncaughtExceptionHandler {

private static Logger logger = LoggerFactory.getLogger(GlobalExceptionHandler.class);

// Added for test
public static void setLogger(Logger l) {
logger = l;
}

@Override
public void uncaughtException(Thread t, Throwable e) {
// Log the exception
logger.error("Yugaware uncaught exception in thread '{}'", t.getName(), e);
}
}
2 changes: 2 additions & 0 deletions managed/src/main/java/MainModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ public MainModule(Environment environment, Config config) {

@Override
public void configure() {
// Bind the uncaught exception handler at the application startup
Thread.setDefaultUncaughtExceptionHandler(new GlobalExceptionHandler());
bind(StaticInjectorHolder.class).asEagerSingleton();
bind(Long.class)
.annotatedWith(Names.named("AppStartupTimeMs"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.yugabyte.yw.common.ImageBundleUtil;
import com.yugabyte.yw.common.NodeManager;
import com.yugabyte.yw.common.NodeUIApiHelper;
import com.yugabyte.yw.common.NodeUniverseManager;
import com.yugabyte.yw.common.PlatformExecutorFactory;
import com.yugabyte.yw.common.ReleaseManager;
import com.yugabyte.yw.common.RestoreManagerYb;
Expand Down Expand Up @@ -93,6 +94,7 @@ public abstract class AbstractTaskBase implements ITask {
protected final ReleaseManager releaseManager;
protected final YsqlQueryExecutor ysqlQueryExecutor;
protected final GFlagsValidation gFlagsValidation;
protected final NodeUniverseManager nodeUniverseManager;

@Inject
protected AbstractTaskBase(BaseTaskDependencies baseTaskDependencies) {
Expand Down Expand Up @@ -120,6 +122,7 @@ protected AbstractTaskBase(BaseTaskDependencies baseTaskDependencies) {
this.releaseManager = baseTaskDependencies.getReleaseManager();
this.ysqlQueryExecutor = baseTaskDependencies.getYsqlQueryExecutor();
this.gFlagsValidation = baseTaskDependencies.getGFlagsValidation();
this.nodeUniverseManager = baseTaskDependencies.getNodeUniverseManager();
}

protected ITaskParams taskParams() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,8 @@ Map<String, Long> getFollowerLagMs(String ip, int port) {
private CustomerTask submitMasterFailoverTask(
Customer customer, Universe universe, Action action) {
NodeDetails node = universe.getNode(action.getNodeName());
NodeDetails possibleReplacementCandidate = findReplacementMaster(universe, node);
String possibleReplacementCandidate =
findReplacementMaster(universe, node, true /* pickNewNode */);
if (possibleReplacementCandidate == null) {
log.error(
"No replacement master found for node {} in universe {}",
Expand All @@ -483,7 +484,7 @@ private CustomerTask submitMasterFailoverTask(
}
log.debug(
"Found a possible replacement master candidate {} for universe {}",
possibleReplacementCandidate.getNodeName(),
possibleReplacementCandidate,
universe.getUniverseUUID());
Set<String> leaderlessTablets = getLeaderlessTablets(universe.getUniverseUUID());
if (CollectionUtils.isNotEmpty(leaderlessTablets)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,8 @@ private void detectMasterFailure(
}
// Verify that the replacement node exists before creating the failover schedule.
NodeDetails node = universe.getNode(action.getNodeName());
NodeDetails possibleReplacementCandidate =
autoMasterFailover.findReplacementMaster(universe, node);
String possibleReplacementCandidate =
autoMasterFailover.findReplacementMaster(universe, node, true /* pickNewNode */);
if (possibleReplacementCandidate == null) {
disableSchedule(customer, failoverScheduleName, true);
log.info(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.yugabyte.yw.common.ImageBundleUtil;
import com.yugabyte.yw.common.NodeManager;
import com.yugabyte.yw.common.NodeUIApiHelper;
import com.yugabyte.yw.common.NodeUniverseManager;
import com.yugabyte.yw.common.PlatformExecutorFactory;
import com.yugabyte.yw.common.ReleaseManager;
import com.yugabyte.yw.common.RestoreManagerYb;
Expand Down Expand Up @@ -62,4 +63,5 @@ public class BaseTaskDependencies {
private final ReleaseManager releaseManager;
private final YsqlQueryExecutor ysqlQueryExecutor;
private final GFlagsValidation gFlagsValidation;
private final NodeUniverseManager nodeUniverseManager;
}
Original file line number Diff line number Diff line change
Expand Up @@ -723,10 +723,10 @@ private void waitForSubTasks(boolean abortOnFailure) {
anyEx = (anyEx != null) ? anyEx : thisEx;
runnableSubTask.updateTaskDetailsOnError(TaskInfo.State.Aborted, thisEx);
removeCompletedSubTask(iter, runnableSubTask, anyEx);
} catch (Exception e) {
anyEx = e;
runnableSubTask.updateTaskDetailsOnError(TaskInfo.State.Failure, e);
removeCompletedSubTask(iter, runnableSubTask, e);
} catch (Throwable th) {
anyEx = th;
runnableSubTask.updateTaskDetailsOnError(TaskInfo.State.Failure, th);
removeCompletedSubTask(iter, runnableSubTask, th);
}
}
}
Expand Down
Loading

0 comments on commit fbbcd7f

Please sign in to comment.