Improve Watchdog Early Warning Feature - Closes #1319
1) Don't kick in until server has started (the full crash will still kick in before full start) 2) Delay reporting until 10 seconds, then print every 5 3) Make the intervals configurable 4) Make it able to be disabled by setting every interval to <= 0
This commit is contained in:
parent
edb9d18aed
commit
dc428860cf
2 changed files with 166 additions and 87 deletions
|
@ -1,87 +0,0 @@
|
||||||
From a30613e82fe3e0e0759e03c7d6b4b69d60e005f0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: miclebrick <miclebrick@outlook.com>
|
|
||||||
Date: Wed, 8 Aug 2018 15:30:52 -0400
|
|
||||||
Subject: [PATCH] Add 5 second short dumps to watchdog
|
|
||||||
|
|
||||||
|
|
||||||
diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java
|
|
||||||
index 57a4748a3..815c3e664 100644
|
|
||||||
--- a/src/main/java/org/spigotmc/WatchdogThread.java
|
|
||||||
+++ b/src/main/java/org/spigotmc/WatchdogThread.java
|
|
||||||
@@ -13,8 +13,10 @@ public class WatchdogThread extends Thread
|
|
||||||
|
|
||||||
private static WatchdogThread instance;
|
|
||||||
private final long timeoutTime;
|
|
||||||
+ private final long shortTimeout; // Paper - Timeout time for just printing a dump but not restarting
|
|
||||||
private final boolean restart;
|
|
||||||
private volatile long lastTick;
|
|
||||||
+ private long lastShortDump; // Paper - Keep track of short dump times to avoid spamming console with short dumps
|
|
||||||
private volatile boolean stopping;
|
|
||||||
|
|
||||||
private WatchdogThread(long timeoutTime, boolean restart)
|
|
||||||
@@ -22,6 +24,7 @@ public class WatchdogThread extends Thread
|
|
||||||
super( "Paper Watchdog Thread" );
|
|
||||||
this.timeoutTime = timeoutTime;
|
|
||||||
this.restart = restart;
|
|
||||||
+ shortTimeout = Math.min(5000, timeoutTime); // Paper - Make short timeout the lower of 5 seconds, and timeout time
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void doStart(int timeoutTime, boolean restart)
|
|
||||||
@@ -52,9 +55,19 @@ public class WatchdogThread extends Thread
|
|
||||||
while ( !stopping )
|
|
||||||
{
|
|
||||||
//
|
|
||||||
- if ( lastTick != 0 && System.currentTimeMillis() > lastTick + timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
|
|
||||||
+ long currentTime = System.currentTimeMillis(); // Paper - do we REALLY need to call this method multiple times?
|
|
||||||
+ if ( lastTick != 0 && currentTime > lastTick + shortTimeout && !Boolean.getBoolean("disable.watchdog") ) // Paper - Add property to disable and short timeout
|
|
||||||
{
|
|
||||||
+ // Paper start
|
|
||||||
+ boolean isLongTimeout = currentTime > lastTick + timeoutTime;
|
|
||||||
+ // Don't spam short dumps
|
|
||||||
+ if ( !isLongTimeout && currentTime < lastShortDump + shortTimeout ) continue;
|
|
||||||
+ lastShortDump = currentTime;
|
|
||||||
+ // Paper end
|
|
||||||
Logger log = Bukkit.getServer().getLogger();
|
|
||||||
+ // Paper start - Different message when it's a short timeout
|
|
||||||
+ if ( isLongTimeout )
|
|
||||||
+ {
|
|
||||||
log.log( Level.SEVERE, "The server has stopped responding!" );
|
|
||||||
log.log( Level.SEVERE, "Please report this to https://github.com/PaperMC/Paper/issues" );
|
|
||||||
log.log( Level.SEVERE, "Be sure to include ALL relevant console errors and Minecraft crash reports" );
|
|
||||||
@@ -79,11 +92,19 @@ public class WatchdogThread extends Thread
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Paper end
|
|
||||||
+ } else
|
|
||||||
+ {
|
|
||||||
+ log.log( Level.SEVERE, "The server has not responded for " + shortTimeout / 1000 + " seconds! Creating thread dump");
|
|
||||||
+ }
|
|
||||||
+ // Paper end - Different message for short timeout
|
|
||||||
log.log( Level.SEVERE, "------------------------------" );
|
|
||||||
log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" );
|
|
||||||
dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().primaryThread.getId(), Integer.MAX_VALUE ), log );
|
|
||||||
log.log( Level.SEVERE, "------------------------------" );
|
|
||||||
//
|
|
||||||
+ // Paper start - Only print full dump on long timeouts
|
|
||||||
+ if ( isLongTimeout )
|
|
||||||
+ {
|
|
||||||
log.log( Level.SEVERE, "Entire Thread Dump:" );
|
|
||||||
ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true );
|
|
||||||
for ( ThreadInfo thread : threads )
|
|
||||||
@@ -97,11 +118,12 @@ public class WatchdogThread extends Thread
|
|
||||||
RestartCommand.restart();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
+ } // Paper end
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
- sleep( 10000 );
|
|
||||||
+ sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout
|
|
||||||
} catch ( InterruptedException ex )
|
|
||||||
{
|
|
||||||
interrupt();
|
|
||||||
--
|
|
||||||
2.18.0
|
|
||||||
|
|
|
@ -0,0 +1,166 @@
|
||||||
|
From 572418116aca425b6283f25eb1aaefafac1381fd Mon Sep 17 00:00:00 2001
|
||||||
|
From: miclebrick <miclebrick@outlook.com>
|
||||||
|
Date: Wed, 8 Aug 2018 15:30:52 -0400
|
||||||
|
Subject: [PATCH] Add Early Warning Feature to WatchDog
|
||||||
|
|
||||||
|
Detect when the server has been hung for a long duration, and start printing
|
||||||
|
thread dumps at an interval until the point of crash.
|
||||||
|
|
||||||
|
This will help diagnose what was going on in that time before the crash.
|
||||||
|
|
||||||
|
diff --git a/src/main/java/com/destroystokyo/paper/PaperConfig.java b/src/main/java/com/destroystokyo/paper/PaperConfig.java
|
||||||
|
index a5ff014e33..332e90f86b 100644
|
||||||
|
--- a/src/main/java/com/destroystokyo/paper/PaperConfig.java
|
||||||
|
+++ b/src/main/java/com/destroystokyo/paper/PaperConfig.java
|
||||||
|
@@ -23,6 +23,8 @@ import org.bukkit.configuration.InvalidConfigurationException;
|
||||||
|
import org.bukkit.configuration.file.YamlConfiguration;
|
||||||
|
import co.aikar.timings.Timings;
|
||||||
|
import co.aikar.timings.TimingsManager;
|
||||||
|
+import org.spigotmc.SpigotConfig;
|
||||||
|
+import org.spigotmc.WatchdogThread;
|
||||||
|
|
||||||
|
public class PaperConfig {
|
||||||
|
|
||||||
|
@@ -304,6 +306,14 @@ public class PaperConfig {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ public static int watchdogPrintEarlyWarningEvery = 5000;
|
||||||
|
+ public static int watchdogPrintEarlyWarningDelay = 10000;
|
||||||
|
+ private static void watchdogEarlyWarning() {
|
||||||
|
+ watchdogPrintEarlyWarningEvery = getInt("settings.watchdog.early-warning-every", 5000);
|
||||||
|
+ watchdogPrintEarlyWarningDelay = getInt("settings.watchdog.early-warning-delay", 10000);
|
||||||
|
+ WatchdogThread.doStart(SpigotConfig.timeoutTime, SpigotConfig.restartOnCrash );
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
public static int tabSpamIncrement = 10;
|
||||||
|
public static int tabSpamLimit = 500;
|
||||||
|
private static void tabSpamLimiters() {
|
||||||
|
diff --git a/src/main/java/net/minecraft/server/MinecraftServer.java b/src/main/java/net/minecraft/server/MinecraftServer.java
|
||||||
|
index 0399a48e19..e0546e3dd2 100644
|
||||||
|
--- a/src/main/java/net/minecraft/server/MinecraftServer.java
|
||||||
|
+++ b/src/main/java/net/minecraft/server/MinecraftServer.java
|
||||||
|
@@ -622,6 +622,7 @@ public abstract class MinecraftServer implements ICommandListener, Runnable, IAs
|
||||||
|
this.a(this.q);
|
||||||
|
|
||||||
|
// Spigot start
|
||||||
|
+ org.spigotmc.WatchdogThread.hasStarted = true;
|
||||||
|
Arrays.fill( recentTps, 20 );
|
||||||
|
long start = System.nanoTime(), lastTick = start - TICK_TIME, catchupTime = 0, curTime, wait, tickSection = start; // Paper - Further improve server tick loop
|
||||||
|
while (this.isRunning) {
|
||||||
|
diff --git a/src/main/java/org/spigotmc/SpigotConfig.java b/src/main/java/org/spigotmc/SpigotConfig.java
|
||||||
|
index 4f9fd4bc60..2cdd9aaf81 100644
|
||||||
|
--- a/src/main/java/org/spigotmc/SpigotConfig.java
|
||||||
|
+++ b/src/main/java/org/spigotmc/SpigotConfig.java
|
||||||
|
@@ -223,7 +223,7 @@ public class SpigotConfig
|
||||||
|
restartScript = getString( "settings.restart-script", restartScript );
|
||||||
|
restartMessage = transform( getString( "messages.restart", "Server is restarting" ) );
|
||||||
|
commands.put( "restart", new RestartCommand( "restart" ) );
|
||||||
|
- WatchdogThread.doStart( timeoutTime, restartOnCrash );
|
||||||
|
+ //WatchdogThread.doStart( timeoutTime, restartOnCrash ); // Paper - moved to PaperConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean bungee;
|
||||||
|
diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java
|
||||||
|
index 57a4748a30..28a9e2b96f 100644
|
||||||
|
--- a/src/main/java/org/spigotmc/WatchdogThread.java
|
||||||
|
+++ b/src/main/java/org/spigotmc/WatchdogThread.java
|
||||||
|
@@ -5,6 +5,7 @@ import java.lang.management.MonitorInfo;
|
||||||
|
import java.lang.management.ThreadInfo;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
+import com.destroystokyo.paper.PaperConfig;
|
||||||
|
import net.minecraft.server.MinecraftServer;
|
||||||
|
import org.bukkit.Bukkit;
|
||||||
|
|
||||||
|
@@ -13,6 +14,10 @@ public class WatchdogThread extends Thread
|
||||||
|
|
||||||
|
private static WatchdogThread instance;
|
||||||
|
private final long timeoutTime;
|
||||||
|
+ private final long earlyWarningEvery; // Paper - Timeout time for just printing a dump but not restarting
|
||||||
|
+ private final long earlyWarningDelay; // Paper
|
||||||
|
+ public static volatile boolean hasStarted; // Paper
|
||||||
|
+ private long lastEarlyWarning; // Paper - Keep track of short dump times to avoid spamming console with short dumps
|
||||||
|
private final boolean restart;
|
||||||
|
private volatile long lastTick;
|
||||||
|
private volatile boolean stopping;
|
||||||
|
@@ -22,6 +27,8 @@ public class WatchdogThread extends Thread
|
||||||
|
super( "Paper Watchdog Thread" );
|
||||||
|
this.timeoutTime = timeoutTime;
|
||||||
|
this.restart = restart;
|
||||||
|
+ earlyWarningEvery = Math.min(PaperConfig.watchdogPrintEarlyWarningEvery, timeoutTime); // Paper
|
||||||
|
+ earlyWarningDelay = Math.min(PaperConfig.watchdogPrintEarlyWarningDelay, timeoutTime); // Paper
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void doStart(int timeoutTime, boolean restart)
|
||||||
|
@@ -52,9 +59,19 @@ public class WatchdogThread extends Thread
|
||||||
|
while ( !stopping )
|
||||||
|
{
|
||||||
|
//
|
||||||
|
- if ( lastTick != 0 && System.currentTimeMillis() > lastTick + timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
|
||||||
|
+ long currentTime = System.currentTimeMillis(); // Paper - do we REALLY need to call this method multiple times?
|
||||||
|
+ if ( lastTick != 0 && currentTime > lastTick + earlyWarningEvery && !Boolean.getBoolean("disable.watchdog") ) // Paper - Add property to disable and short timeout
|
||||||
|
{
|
||||||
|
+ // Paper start
|
||||||
|
+ boolean isLongTimeout = currentTime > lastTick + timeoutTime;
|
||||||
|
+ // Don't spam early warning dumps
|
||||||
|
+ if ( !isLongTimeout && (earlyWarningEvery <= 0 || !hasStarted || currentTime < lastEarlyWarning + earlyWarningEvery || currentTime < lastTick + earlyWarningDelay)) continue;
|
||||||
|
+ lastEarlyWarning = currentTime;
|
||||||
|
+ // Paper end
|
||||||
|
Logger log = Bukkit.getServer().getLogger();
|
||||||
|
+ // Paper start - Different message when it's a short timeout
|
||||||
|
+ if ( isLongTimeout )
|
||||||
|
+ {
|
||||||
|
log.log( Level.SEVERE, "The server has stopped responding!" );
|
||||||
|
log.log( Level.SEVERE, "Please report this to https://github.com/PaperMC/Paper/issues" );
|
||||||
|
log.log( Level.SEVERE, "Be sure to include ALL relevant console errors and Minecraft crash reports" );
|
||||||
|
@@ -79,29 +96,45 @@ public class WatchdogThread extends Thread
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Paper end
|
||||||
|
+ } else
|
||||||
|
+ {
|
||||||
|
+ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---");
|
||||||
|
+ log.log(Level.SEVERE, "The server has not responded for " + (currentTime - lastTick) / 1000 + " seconds! Creating thread dump");
|
||||||
|
+ }
|
||||||
|
+ // Paper end - Different message for short timeout
|
||||||
|
log.log( Level.SEVERE, "------------------------------" );
|
||||||
|
log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" );
|
||||||
|
dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().primaryThread.getId(), Integer.MAX_VALUE ), log );
|
||||||
|
log.log( Level.SEVERE, "------------------------------" );
|
||||||
|
//
|
||||||
|
+ // Paper start - Only print full dump on long timeouts
|
||||||
|
+ if ( isLongTimeout )
|
||||||
|
+ {
|
||||||
|
log.log( Level.SEVERE, "Entire Thread Dump:" );
|
||||||
|
ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true );
|
||||||
|
for ( ThreadInfo thread : threads )
|
||||||
|
{
|
||||||
|
dumpThread( thread, log );
|
||||||
|
}
|
||||||
|
+ } else {
|
||||||
|
+ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+
|
||||||
|
log.log( Level.SEVERE, "------------------------------" );
|
||||||
|
|
||||||
|
if ( restart )
|
||||||
|
{
|
||||||
|
RestartCommand.restart();
|
||||||
|
}
|
||||||
|
+ if (isLongTimeout) {
|
||||||
|
break;
|
||||||
|
+ } // Paper end
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
- sleep( 10000 );
|
||||||
|
+ sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout
|
||||||
|
} catch ( InterruptedException ex )
|
||||||
|
{
|
||||||
|
interrupt();
|
||||||
|
--
|
||||||
|
2.18.0
|
||||||
|
|
Loading…
Reference in a new issue