sched: Make __update_entity_runnable_avg() fast
__update_entity_runnable_avg forms the core of maintaining an entity's runnable
load average.  In this function we charge the accumulated run-time since last
update and handle appropriate decay.  In some cases, e.g. a waking task, this
time interval may be much larger than our period unit.
Fortunately we can exploit some properties of our series to perform decay for a
blocked update in constant time and account the contribution for a running
update in essentially-constant* time.
[*]: For any running entity they should be performing updates at the tick which
gives us a soft limit of 1 jiffy between updates, and we can compute up to a
32 jiffy update in a single pass.
C program to generate the magic constants in the arrays:
  #include <math.h>
  #include <stdio.h>
  #define N 32
  #define WMULT_SHIFT 32
  const long WMULT_CONST = ((1UL << N) - 1);
  double y;
  long runnable_avg_yN_inv[N];
  void calc_mult_inv() {
  	int i;
  	double yn = 0;
  	printf("inverses\n");
  	for (i = 0; i < N; i++) {
  		yn = (double)WMULT_CONST * pow(y, i);
  		runnable_avg_yN_inv[i] = yn;
  		printf("%2d: 0x%8lx\n", i, runnable_avg_yN_inv[i]);
  	}
  	printf("\n");
  }
  long mult_inv(long c, int n) {
  	return (c * runnable_avg_yN_inv[n]) >>  WMULT_SHIFT;
  }
  void calc_yn_sum(int n)
  {
  	int i;
  	double sum = 0, sum_fl = 0, diff = 0;
  	/*
  	 * We take the floored sum to ensure the sum of partial sums is never
  	 * larger than the actual sum.
  	 */
  	printf("sum y^n\n");
  	printf("   %8s  %8s %8s\n", "exact", "floor", "error");
  	for (i = 1; i <= n; i++) {
  		sum = (y * sum + y * 1024);
  		sum_fl = floor(y * sum_fl+ y * 1024);
  		printf("%2d: %8.0f  %8.0f %8.0f\n", i, sum, sum_fl,
  			sum_fl - sum);
  	}
  	printf("\n");
  }
  void calc_conv(long n) {
  	long old_n;
  	int i = -1;
  	printf("convergence (LOAD_AVG_MAX, LOAD_AVG_MAX_N)\n");
  	do {
  		old_n = n;
  		n = mult_inv(n, 1) + 1024;
  		i++;
  	} while (n != old_n);
  	printf("%d> %ld\n", i - 1, n);
  	printf("\n");
  }
  void main() {
  	y = pow(0.5, 1/(double)N);
  	calc_mult_inv();
  	calc_conv(1024);
  	calc_yn_sum(N);
  }
[ Compile with -lm ]
Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141507.277808946@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
	
	
This commit is contained in:
		
					parent
					
						
							
								f269ae0469
							
						
					
				
			
			
				commit
				
					
						5b51f2f80b
					
				
			
		
					 1 changed files with 99 additions and 22 deletions
				
			
		| 
						 | 
					@ -883,18 +883,93 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
 | 
				
			||||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
 | 
					#endif /* CONFIG_FAIR_GROUP_SCHED */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_SMP
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * We choose a half-life close to 1 scheduling period.
 | 
				
			||||||
 | 
					 * Note: The tables below are dependent on this value.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define LOAD_AVG_PERIOD 32
 | 
				
			||||||
 | 
					#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
 | 
				
			||||||
 | 
					#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Precomputed fixed inverse multiplies for multiplication by y^n */
 | 
				
			||||||
 | 
					static const u32 runnable_avg_yN_inv[] = {
 | 
				
			||||||
 | 
						0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
 | 
				
			||||||
 | 
						0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
 | 
				
			||||||
 | 
						0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
 | 
				
			||||||
 | 
						0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
 | 
				
			||||||
 | 
						0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
 | 
				
			||||||
 | 
						0x85aac367, 0x82cd8698,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Precomputed \Sum y^k { 1<=k<=n }.  These are floor(true_value) to prevent
 | 
				
			||||||
 | 
					 * over-estimates when re-combining.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static const u32 runnable_avg_yN_sum[] = {
 | 
				
			||||||
 | 
						    0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
 | 
				
			||||||
 | 
						 9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
 | 
				
			||||||
 | 
						17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Approximate:
 | 
					 * Approximate:
 | 
				
			||||||
 *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
 | 
					 *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static __always_inline u64 decay_load(u64 val, u64 n)
 | 
					static __always_inline u64 decay_load(u64 val, u64 n)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	for (; n && val; n--) {
 | 
						unsigned int local_n;
 | 
				
			||||||
		val *= 4008;
 | 
					
 | 
				
			||||||
		val >>= 12;
 | 
						if (!n)
 | 
				
			||||||
 | 
							return val;
 | 
				
			||||||
 | 
						else if (unlikely(n > LOAD_AVG_PERIOD * 63))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* after bounds checking we can collapse to 32-bit */
 | 
				
			||||||
 | 
						local_n = n;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * As y^PERIOD = 1/2, we can combine
 | 
				
			||||||
 | 
						 *    y^n = 1/2^(n/PERIOD) * k^(n%PERIOD)
 | 
				
			||||||
 | 
						 * With a look-up table which covers k^n (n<PERIOD)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * To achieve constant time decay_load.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
 | 
				
			||||||
 | 
							val >>= local_n / LOAD_AVG_PERIOD;
 | 
				
			||||||
 | 
							local_n %= LOAD_AVG_PERIOD;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return val;
 | 
						val *= runnable_avg_yN_inv[local_n];
 | 
				
			||||||
 | 
						/* We don't use SRR here since we always want to round down. */
 | 
				
			||||||
 | 
						return val >> 32;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * For updates fully spanning n periods, the contribution to runnable
 | 
				
			||||||
 | 
					 * average will be: \Sum 1024*y^n
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We can compute this reasonably efficiently by combining:
 | 
				
			||||||
 | 
					 *   y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for  n <PERIOD}
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static u32 __compute_runnable_contrib(u64 n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						u32 contrib = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (likely(n <= LOAD_AVG_PERIOD))
 | 
				
			||||||
 | 
							return runnable_avg_yN_sum[n];
 | 
				
			||||||
 | 
						else if (unlikely(n >= LOAD_AVG_MAX_N))
 | 
				
			||||||
 | 
							return LOAD_AVG_MAX;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */
 | 
				
			||||||
 | 
							contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							n -= LOAD_AVG_PERIOD;
 | 
				
			||||||
 | 
						} while (n > LOAD_AVG_PERIOD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						contrib = decay_load(contrib, n);
 | 
				
			||||||
 | 
						return contrib + runnable_avg_yN_sum[n];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -929,7 +1004,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 | 
				
			||||||
							struct sched_avg *sa,
 | 
												struct sched_avg *sa,
 | 
				
			||||||
							int runnable)
 | 
												int runnable)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u64 delta;
 | 
						u64 delta, periods;
 | 
				
			||||||
 | 
						u32 runnable_contrib;
 | 
				
			||||||
	int delta_w, decayed = 0;
 | 
						int delta_w, decayed = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	delta = now - sa->last_runnable_update;
 | 
						delta = now - sa->last_runnable_update;
 | 
				
			||||||
| 
						 | 
					@ -963,25 +1039,26 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 | 
				
			||||||
		 * period and accrue it.
 | 
							 * period and accrue it.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		delta_w = 1024 - delta_w;
 | 
							delta_w = 1024 - delta_w;
 | 
				
			||||||
		BUG_ON(delta_w > delta);
 | 
							if (runnable)
 | 
				
			||||||
		do {
 | 
								sa->runnable_avg_sum += delta_w;
 | 
				
			||||||
			if (runnable)
 | 
							sa->runnable_avg_period += delta_w;
 | 
				
			||||||
				sa->runnable_avg_sum += delta_w;
 | 
					 | 
				
			||||||
			sa->runnable_avg_period += delta_w;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/*
 | 
							delta -= delta_w;
 | 
				
			||||||
			 * Remainder of delta initiates a new period, roll over
 | 
					 | 
				
			||||||
			 * the previous.
 | 
					 | 
				
			||||||
			 */
 | 
					 | 
				
			||||||
			sa->runnable_avg_sum =
 | 
					 | 
				
			||||||
				decay_load(sa->runnable_avg_sum, 1);
 | 
					 | 
				
			||||||
			sa->runnable_avg_period =
 | 
					 | 
				
			||||||
				decay_load(sa->runnable_avg_period, 1);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
			delta -= delta_w;
 | 
							/* Figure out how many additional periods this update spans */
 | 
				
			||||||
			/* New period is empty */
 | 
							periods = delta / 1024;
 | 
				
			||||||
			delta_w = 1024;
 | 
							delta %= 1024;
 | 
				
			||||||
		} while (delta >= 1024);
 | 
					
 | 
				
			||||||
 | 
							sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
 | 
				
			||||||
 | 
											  periods + 1);
 | 
				
			||||||
 | 
							sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
 | 
				
			||||||
 | 
											     periods + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Efficiently calculate \sum (1..n_period) 1024*y^i */
 | 
				
			||||||
 | 
							runnable_contrib = __compute_runnable_contrib(periods);
 | 
				
			||||||
 | 
							if (runnable)
 | 
				
			||||||
 | 
								sa->runnable_avg_sum += runnable_contrib;
 | 
				
			||||||
 | 
							sa->runnable_avg_period += runnable_contrib;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Remainder of delta accrued against u_0` */
 | 
						/* Remainder of delta accrued against u_0` */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue