diff --git a/packages/backend/src/server/SkRateLimiterService.md b/packages/backend/src/server/SkRateLimiterService.md
index c8a2b4e85c..2c7de6715b 100644
--- a/packages/backend/src/server/SkRateLimiterService.md
+++ b/packages/backend/src/server/SkRateLimiterService.md
@@ -34,8 +34,9 @@ Header meanings and usage have been devised by adapting common patterns to work
 
 ## Performance
 
-SkRateLimiterService makes between 1 and 4 redis transactions per rate limit check.
+SkRateLimiterService makes between 0 and 4 redis transactions per rate limit check.
 The first call is read-only, while the others perform at least one write operation.
+No calls are made if a client has already been blocked at least once, as the block status is stored in a short-term memory cache.
 Two integer keys are stored per client/subject, and both expire together after the maximum duration of the limit.
 While performance has not been formally tested, it's expected that SkRateLimiterService has an impact roughly on par with the legacy RateLimiterService.
 Redis memory usage should be notably lower due to the reduced number of keys and avoidance of set / array constructions.
@@ -54,6 +55,12 @@ Any possible conflict would have to occur within a few-milliseconds window, whic
 This error does not compound, as all further operations are relative (Increment and Add).
 Thus, it's considered an acceptable tradeoff given the limitations imposed by Redis and ioredis.
 
+In-process memory caches are used sparingly to avoid consistency problems.
+Besides the role factor cache, there is one "important" cache which directly impacts limit calculations: the lockout cache.
+This cache stores response data for blocked limits, preventing repeated calls to redis if a client ignores the 429 errors and continues making requests.
+Consistency is guaranteed by only caching blocked limits (allowances are not cached), and by limiting cached data to the duration of the block.
+This ensures that stale limit info is never used.
+
 ## Algorithm Pseudocode
 
 The Atomic Leaky Bucket algorithm is described here, in pseudocode:
diff --git a/packages/backend/src/server/SkRateLimiterService.ts b/packages/backend/src/server/SkRateLimiterService.ts
index 30bf092e4f..ccc26c1b61 100644
--- a/packages/backend/src/server/SkRateLimiterService.ts
+++ b/packages/backend/src/server/SkRateLimiterService.ts
@@ -17,10 +17,17 @@ import type { RoleService } from '@/core/RoleService.js';
 // Required because MemoryKVCache doesn't support null keys.
 const defaultUserKey = '';
 
+interface Lockout {
+	at: number;
+	info: LimitInfo;
+}
+
 @Injectable()
 export class SkRateLimiterService {
 	// 1-minute cache interval
 	private readonly factorCache = new MemoryKVCache<number>(1000 * 60);
+	// 10-second cache interval
+	private readonly lockoutCache = new MemoryKVCache<Lockout>(1000 * 10);
 	private readonly disabled: boolean;
 
 	constructor(
@@ -58,6 +65,15 @@ export class SkRateLimiterService {
 		}
 
 		const actor = typeof(actorOrUser) === 'object' ? actorOrUser.id : actorOrUser;
+
+		// TODO add to docs
+		// Fast-path to avoid extra redis calls for blocked clients
+		const lockoutKey = `@${actor}#${limit.key}`;
+		const lockout = this.getLockout(lockoutKey);
+		if (lockout) {
+			return lockout;
+		}
+
 		const userCacheKey = typeof(actorOrUser) === 'object' ? actorOrUser.id : defaultUserKey;
 		const userRoleKey = typeof(actorOrUser) === 'object' ? actorOrUser.id : null;
 		const factor = this.factorCache.get(userCacheKey) ?? await this.factorCache.fetch(userCacheKey, async () => {
@@ -73,6 +89,47 @@ export class SkRateLimiterService {
 			throw new Error(`Rate limit factor is zero or negative: ${factor}`);
 		}
 
+		const info = await this.applyLimit(limit, actor, factor);
+
+		// Store blocked status to avoid hammering redis
+		if (info.blocked) {
+			this.lockoutCache.set(lockoutKey, {
+				at: this.timeService.now,
+				info,
+			});
+		}
+
+		return info;
+	}
+
+	private getLockout(lockoutKey: string): LimitInfo | null {
+		const lockout = this.lockoutCache.get(lockoutKey);
+		if (!lockout) {
+			// Not blocked, proceed with redis check
+			return null;
+		}
+
+		const now = this.timeService.now;
+		const elapsedMs = now - lockout.at;
+		if (elapsedMs >= lockout.info.resetMs) {
+			// Block expired, clear and proceed with redis check
+			this.lockoutCache.delete(lockoutKey);
+			return null;
+		}
+
+		// Limit is still active, update calculations
+		lockout.at = now;
+		lockout.info.resetMs -= elapsedMs;
+		lockout.info.resetSec = Math.ceil(lockout.info.resetMs / 1000);
+		lockout.info.fullResetMs -= elapsedMs;
+		lockout.info.fullResetSec = Math.ceil(lockout.info.fullResetMs / 1000);
+
+		// Re-cache the new object
+		this.lockoutCache.set(lockoutKey, lockout);
+		return lockout.info;
+	}
+
+	private async applyLimit(limit: Keyed<RateLimit>, actor: string, factor: number) {
 		if (isLegacyRateLimit(limit)) {
 			return await this.limitLegacy(limit, actor, factor);
 		} else {