Drain jobs cleanly on shutdown

This commit is contained in:
Alvaro 2023-02-24 12:03:17 -07:00 committed by GitHub
parent a83a85d557
commit b5849f872a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 301 additions and 30 deletions

View file

@ -65,7 +65,10 @@ import { getContact, isIncoming } from './messages/helpers';
import { migrateMessageData } from './messages/migrateMessageData';
import { createBatcher } from './util/batcher';
import { updateConversationsWithUuidLookup } from './updateConversationsWithUuidLookup';
import { initializeAllJobQueues } from './jobs/initializeAllJobQueues';
import {
initializeAllJobQueues,
shutdownAllJobQueues,
} from './jobs/initializeAllJobQueues';
import { removeStorageKeyJobQueue } from './jobs/removeStorageKeyJobQueue';
import { ourProfileKeyService } from './services/ourProfileKey';
import { notificationService } from './services/notifications';
@ -168,6 +171,8 @@ import { flushAttachmentDownloadQueue } from './util/attachmentDownloadQueue';
import { StartupQueue } from './util/StartupQueue';
import { showConfirmationDialog } from './util/showConfirmationDialog';
import { onCallEventSync } from './util/onCallEventSync';
import { sleeper } from './util/sleeper';
import { MINUTE } from './util/durations';
export function isOverHourIntoPast(timestamp: number): boolean {
const HOUR = 1000 * 60 * 60;
@ -730,6 +735,46 @@ export async function startApp(): Promise<void> {
)
);
sleeper.shutdown();
const shutdownQueues = async () => {
await Promise.allSettled([
StartupQueue.shutdown(),
shutdownAllJobQueues(),
]);
await Promise.allSettled(
window.ConversationController.getAll().map(async convo => {
try {
await convo.shutdownJobQueue();
} catch (err) {
log.error(
`background/shutdown: error waiting for conversation ${convo.idForLogging} job queue shutdown`,
Errors.toLogFormat(err)
);
}
})
);
};
// wait for at most 2 minutes for startup queue and job queues to drain
let timeout: NodeJS.Timeout | undefined;
await Promise.race([
shutdownQueues(),
new Promise<void>((resolve, _) => {
timeout = setTimeout(() => {
log.warn(
'background/shutdown - timed out waiting for StartupQueue/JobQueues, continuing with shutdown'
);
timeout = undefined;
resolve();
}, 2 * MINUTE);
}),
]);
if (timeout) {
clearTimeout(timeout);
}
log.info('background/shutdown: waiting for all batchers');
// A number of still-to-queue database queries might be waiting inside batchers.

View file

@ -54,6 +54,8 @@ export abstract class JobQueue<T> {
private readonly logPrefix: string;
private shuttingDown = false;
private readonly onCompleteCallbacks = new Map<
string,
{
@ -66,6 +68,10 @@ export abstract class JobQueue<T> {
private started = false;
get isShuttingDown(): boolean {
return this.shuttingDown;
}
constructor(options: Readonly<JobQueueOptions>) {
assertDev(
Number.isInteger(options.maxAttempts) && options.maxAttempts >= 1,
@ -115,6 +121,10 @@ export abstract class JobQueue<T> {
extra?: Readonly<{ attempt?: number; log?: LoggerType }>
): Promise<void>;
protected getQueues(): ReadonlySet<PQueue> {
return new Set([this.defaultInMemoryQueue]);
}
/**
* Start streaming jobs from the store.
*/
@ -130,6 +140,10 @@ export abstract class JobQueue<T> {
const stream = this.store.stream(this.queueType);
for await (const storedJob of stream) {
if (this.shuttingDown) {
log.info(`${this.logPrefix} is shutting down. Can't accept more work.`);
break;
}
void this.enqueueStoredJob(storedJob);
}
}
@ -275,4 +289,14 @@ export abstract class JobQueue<T> {
reject(result.err);
}
}
async shutdown(): Promise<void> {
const queues = this.getQueues();
log.info(
`${this.logPrefix} shutdown: stop accepting new work and drain ${queues.size} promise queues`
);
this.shuttingDown = true;
await Promise.all([...queues].map(q => q.onIdle()));
log.info(`${this.logPrefix} shutdown: complete`);
}
}

View file

@ -7,7 +7,6 @@ import * as globalLogger from '../logging/log';
import * as durations from '../util/durations';
import { exponentialBackoffMaxAttempts } from '../util/exponentialBackoff';
import { commonShouldJobContinue } from './helpers/commonShouldJobContinue';
import { InMemoryQueues } from './helpers/InMemoryQueues';
import { jobQueueDatabaseStore } from './JobQueueDatabaseStore';
import { JobQueue } from './JobQueue';
@ -24,7 +23,6 @@ import { sendReceipts } from './helpers/sendReceipts';
import type { LoggerType } from '../types/Logging';
import { ConversationVerificationState } from '../state/ducks/conversationsEnums';
import { sleep } from '../util/sleep';
import { MINUTE } from '../util/durations';
import {
OutgoingIdentityKeyError,
@ -38,6 +36,8 @@ import type { Job } from './Job';
import type { ParsedJob } from './types';
import type SendMessage from '../textsecure/SendMessage';
import type { UUIDStringType } from '../types/UUID';
import { commonShouldJobContinue } from './helpers/commonShouldJobContinue';
import { sleeper } from '../util/sleeper';
import { receiptSchema, ReceiptType } from '../types/Receipt';
// Note: generally, we only want to add to this list. If you do need to change one of
@ -188,6 +188,10 @@ export class ConversationJobQueue extends JobQueue<ConversationQueueJobData> {
}
>();
override getQueues(): ReadonlySet<PQueue> {
return this.inMemoryQueues.allQueues;
}
public override async add(
data: Readonly<ConversationQueueJobData>,
insert?: (job: ParsedJob<ConversationQueueJobData>) => Promise<void>
@ -290,13 +294,21 @@ export class ConversationJobQueue extends JobQueue<ConversationQueueJobData> {
}
if (window.Signal.challengeHandler?.isRegistered(conversationId)) {
if (this.isShuttingDown) {
throw new Error("Shutting down, can't wait for captcha challenge.");
}
log.info(
'captcha challenge is pending for this conversation; waiting at most 5m...'
);
// eslint-disable-next-line no-await-in-loop
await Promise.race([
this.startVerificationWaiter(conversation.id),
sleep(5 * MINUTE),
// don't resolve on shutdown, otherwise we end up in an infinite loop
sleeper.sleep(
5 * MINUTE,
`conversationJobQueue: waiting for captcha: ${conversation.idForLogging()}`,
{ resolveOnShutdown: false }
),
]);
continue;
}
@ -320,13 +332,22 @@ export class ConversationJobQueue extends JobQueue<ConversationQueueJobData> {
return;
}
if (this.isShuttingDown) {
throw new Error("Shutting down, can't wait for verification.");
}
log.info(
'verification is pending for this conversation; waiting at most 5m...'
);
// eslint-disable-next-line no-await-in-loop
await Promise.race([
this.startVerificationWaiter(conversation.id),
sleep(5 * MINUTE),
// don't resolve on shutdown, otherwise we end up in an infinite loop
sleeper.sleep(
5 * MINUTE,
`conversationJobQueue: verification pending: ${conversation.idForLogging()}`,
{ resolveOnShutdown: false }
),
]);
continue;
}

View file

@ -20,4 +20,8 @@ export class InMemoryQueues {
this.queues.set(key, newQueue);
return newQueue;
}
get allQueues(): ReadonlySet<PQueue> {
return new Set(this.queues.values());
}
}

View file

@ -3,9 +3,9 @@
import type { LoggerType } from '../../types/Logging';
import { waitForOnline } from '../../util/waitForOnline';
import { sleep } from '../../util/sleep';
import { exponentialBackoffSleepTime } from '../../util/exponentialBackoff';
import { isDone as isDeviceLinked } from '../../util/registration';
import { sleeper } from '../../util/sleeper';
export async function commonShouldJobContinue({
attempt,
@ -45,7 +45,10 @@ export async function commonShouldJobContinue({
const sleepTime = exponentialBackoffSleepTime(attempt);
log.info(`sleeping for ${sleepTime}`);
await sleep(sleepTime);
await sleeper.sleep(
sleepTime,
`commonShouldJobContinue: attempt ${attempt}, skipWait ${skipWait}`
);
return true;
}

View file

@ -2,7 +2,7 @@
// SPDX-License-Identifier: AGPL-3.0-only
import type { LoggerType } from '../../types/Logging';
import { sleep } from '../../util/sleep';
import { sleeper } from '../../util/sleeper';
import { findRetryAfterTimeFromError } from './findRetryAfterTimeFromError';
export async function sleepForRateLimitRetryAfterTime({
@ -24,5 +24,9 @@ export async function sleepForRateLimitRetryAfterTime({
`Got a 413 or 429 response code. Sleeping for ${retryAfter} millisecond(s)`
);
await sleep(retryAfter);
await sleeper.sleep(
retryAfter,
'sleepForRateLimitRetryAfterTime: Got a 413 or 429 response code',
{ resolveOnShutdown: false }
);
}

View file

@ -37,3 +37,15 @@ export function initializeAllJobQueues({
drop(removeStorageKeyJobQueue.streamJobs());
drop(reportSpamJobQueue.streamJobs());
}
export async function shutdownAllJobQueues(): Promise<void> {
await Promise.allSettled([
conversationJobQueue.shutdown(),
singleProtoJobQueue.shutdown(),
readSyncJobQueue.shutdown(),
viewSyncJobQueue.shutdown(),
viewOnceOpenJobQueue.shutdown(),
removeStorageKeyJobQueue.shutdown(),
reportSpamJobQueue.shutdown(),
]);
}

View file

@ -8,13 +8,13 @@ import { waitForOnline } from '../util/waitForOnline';
import { isDone as isDeviceLinked } from '../util/registration';
import type { LoggerType } from '../types/Logging';
import { map } from '../util/iterables';
import { sleep } from '../util/sleep';
import { JobQueue } from './JobQueue';
import { jobQueueDatabaseStore } from './JobQueueDatabaseStore';
import { parseIntWithFallback } from '../util/parseIntWithFallback';
import type { WebAPIType } from '../textsecure/WebAPI';
import { HTTPError } from '../textsecure/Errors';
import { sleeper } from '../util/sleeper';
const RETRY_WAIT_TIME = durations.MINUTE;
const RETRYABLE_4XX_FAILURE_STATUSES = new Set([
@ -94,7 +94,10 @@ export class ReportSpamJobQueue extends JobQueue<ReportSpamJobData> {
log.info(
`reportSpamJobQueue: server responded with ${code} status code. Sleeping before our next attempt`
);
await sleep(RETRY_WAIT_TIME);
await sleeper.sleep(
RETRY_WAIT_TIME,
`reportSpamJobQueue: server responded with ${code} status code`
);
throw err;
}

View file

@ -31,6 +31,10 @@ const MAX_ATTEMPTS = exponentialBackoffMaxAttempts(MAX_RETRY_TIME);
export class SingleProtoJobQueue extends JobQueue<SingleProtoJobData> {
private parallelQueue = new PQueue({ concurrency: MAX_PARALLEL_JOBS });
protected override getQueues(): ReadonlySet<PQueue> {
return new Set([this.parallelQueue]);
}
protected override getInMemoryQueue(
_parsedJob: ParsedJob<SingleProtoJobData>
): PQueue {

View file

@ -113,14 +113,15 @@ export class ReadSyncs extends Collection {
// TODO DESKTOP-1509: use MessageUpdater.markRead once this is TS
message.markRead(readAt, { skipSave: true });
const updateConversation = () => {
const updateConversation = async () => {
// onReadMessage may result in messages older than this one being
// marked read. We want those messages to have the same expire timer
// start time as this one, so we pass the readAt value through.
void message.getConversation()?.onReadMessage(message, readAt);
};
if (StartupQueue.isReady()) {
// only available during initialization
if (StartupQueue.isAvailable()) {
const conversation = message.getConversation();
if (conversation) {
StartupQueue.add(
@ -130,7 +131,9 @@ export class ReadSyncs extends Collection {
);
}
} else {
updateConversation();
// not awaiting since we don't want to block work happening in the
// eventHandlerQueue
void updateConversation();
}
} else {
const now = Date.now();

View file

@ -272,6 +272,8 @@ export class ConversationModel extends window.Backbone
private privVerifiedEnum?: typeof window.textsecure.storage.protocol.VerifiedStatus;
private isShuttingDown = false;
override defaults(): Partial<ConversationAttributesType> {
return {
unreadCount: 0,
@ -3580,14 +3582,19 @@ export class ConversationModel extends window.Backbone
return validateConversation(attributes);
}
queueJob<T>(
async queueJob<T>(
name: string,
callback: (abortSignal: AbortSignal) => Promise<T>
): Promise<T> {
this.jobQueue = this.jobQueue || new PQueue({ concurrency: 1 });
const logId = `conversation.queueJob(${this.idForLogging()}, ${name})`;
if (this.isShuttingDown) {
log.warn(`${logId}: shutting down, can't accept more work`);
throw new Error(`${logId}: shutting down, can't accept more work`);
}
this.jobQueue = this.jobQueue || new PQueue({ concurrency: 1 });
const taskWithTimeout = createTaskWithTimeout(callback, logId);
const abortController = new AbortController();
@ -5683,6 +5690,30 @@ export class ConversationModel extends window.Backbone
return this.get('storySendMode') ?? StorySendMode.IfActive;
}
async shutdownJobQueue(): Promise<void> {
log.info(`conversation ${this.idForLogging()} jobQueue shutdown start`);
if (!this.jobQueue) {
log.info(`conversation ${this.idForLogging()} no jobQueue to shutdown`);
return;
}
// If the queue takes more than 10 seconds to get to idle, we force it by setting
// isShuttingDown = true which will reject incoming requests.
const to = setTimeout(() => {
log.warn(
`conversation ${this.idForLogging()} jobQueue stop accepting new work`
);
this.isShuttingDown = true;
}, 10 * SECOND);
await this.jobQueue.onIdle();
this.isShuttingDown = true;
clearTimeout(to);
log.info(`conversation ${this.idForLogging()} jobQueue shutdown complete`);
}
}
window.Whisper.Conversation = ConversationModel;

View file

@ -152,7 +152,6 @@ describe('pnp/send gv2 invite', function needsName() {
let group: Group;
{
state = await phone.waitForStorageState({ after: state });
const groups = await phone.getAllGroups(state);
assert.strictEqual(groups.length, 1);

View file

@ -1,20 +1,26 @@
// Copyright 2021 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import PQueue from 'p-queue';
import * as Errors from '../types/errors';
import * as log from '../logging/log';
type EntryType = Readonly<{
value: number;
callback(): void;
callback(): Promise<void>;
}>;
let startupProcessingQueue: StartupQueue | undefined;
export class StartupQueue {
private readonly map = new Map<string, EntryType>();
private readonly running: PQueue = new PQueue({
// mostly io-bound work that is not very parallelizable
// small number should be sufficient
concurrency: 5,
});
public add(id: string, value: number, f: () => void): void {
public add(id: string, value: number, f: () => Promise<void>): void {
const existing = this.map.get(id);
if (existing && existing.value >= value) {
return;
@ -30,26 +36,36 @@ export class StartupQueue {
this.map.clear();
for (const { callback } of values) {
void this.running.add(async () => {
try {
callback();
return callback();
} catch (error) {
log.error(
'StartupQueue: Failed to process item due to error',
Errors.toLogFormat(error)
);
throw error;
}
});
}
}
private shutdown(): Promise<void> {
log.info(
`StartupQueue: Waiting for ${this.running.pending} tasks to drain`
);
return this.running.onIdle();
}
static initialize(): void {
startupProcessingQueue = new StartupQueue();
}
static isReady(): boolean {
static isAvailable(): boolean {
return Boolean(startupProcessingQueue);
}
static add(id: string, value: number, f: () => void): void {
static add(id: string, value: number, f: () => Promise<void>): void {
startupProcessingQueue?.add(id, value, f);
}
@ -57,4 +73,8 @@ export class StartupQueue {
startupProcessingQueue?.flush();
startupProcessingQueue = undefined;
}
static async shutdown(): Promise<void> {
await startupProcessingQueue?.shutdown();
}
}

98
ts/util/sleeper.ts Normal file
View file

@ -0,0 +1,98 @@
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import * as log from '../logging/log';
import * as Errors from '../types/errors';
/**
* Provides a way to delay tasks
* but also a way to force sleeping tasks to immediately resolve/reject on shutdown
*/
export class Sleeper {
private shuttingDown = false;
private shutdownCallbacks: Set<() => void> = new Set();
/**
* delay by ms, careful when using on a loop if resolving on shutdown (default)
*/
sleep(
ms: number,
reason: string,
options?: { resolveOnShutdown?: boolean }
): Promise<void> {
log.info(`Sleeper: sleeping for ${ms}ms. Reason: ${reason}`);
const resolveOnShutdown = options?.resolveOnShutdown ?? true;
return new Promise((resolve, reject) => {
let timeout: NodeJS.Timeout | undefined;
const shutdownCallback = () => {
if (timeout) {
clearTimeout(timeout);
}
log.info(
`Sleeper: resolving sleep task on shutdown. Original reason: ${reason}`
);
if (resolveOnShutdown) {
setTimeout(resolve, 0);
} else {
setTimeout(() => {
reject(
new Error(
`Sleeper: rejecting sleep task during shutdown. Original reason: ${reason}`
)
);
}, 0);
}
};
if (this.shuttingDown) {
log.info(
`Sleeper: sleep called when shutdown is in progress, scheduling immediate ${
resolveOnShutdown ? 'resolution' : 'rejection'
}. Original reason: ${reason}`
);
shutdownCallback();
return;
}
timeout = setTimeout(() => {
resolve();
this.removeShutdownCallback(shutdownCallback);
}, ms);
this.addShutdownCallback(shutdownCallback);
});
}
private addShutdownCallback(callback: () => void) {
this.shutdownCallbacks.add(callback);
}
private removeShutdownCallback(callback: () => void) {
this.shutdownCallbacks.delete(callback);
}
shutdown(): void {
if (this.shuttingDown) {
return;
}
log.info(
`Sleeper: shutting down, settling ${this.shutdownCallbacks.size} in-progress sleep calls`
);
this.shuttingDown = true;
this.shutdownCallbacks.forEach(cb => {
try {
cb();
} catch (error) {
log.error(
'Sleeper: Error executing shutdown callback',
Errors.toLogFormat(error)
);
}
});
log.info('Sleeper: sleep tasks settled');
}
}
export const sleeper = new Sleeper();