 650c5e5654
			
		
	
	
	650c5e5654
	
	
	
		
			
			The unified hierarchy interface for memory cgroups will no longer use "-1" to mean maximum possible resource value. In preparation for this, make the string an argument and let the caller supply it. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: Greg Thelen <gthelen@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			193 lines
		
	
	
	
		
			4.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
	
		
			4.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Lockless hierarchical page accounting & limiting
 | |
|  *
 | |
|  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
 | |
|  */
 | |
| 
 | |
| #include <linux/page_counter.h>
 | |
| #include <linux/atomic.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/string.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/bug.h>
 | |
| #include <asm/page.h>
 | |
| 
 | |
| /**
 | |
|  * page_counter_cancel - take pages out of the local counter
 | |
|  * @counter: counter
 | |
|  * @nr_pages: number of pages to cancel
 | |
|  */
 | |
| void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
 | |
| {
 | |
| 	long new;
 | |
| 
 | |
| 	new = atomic_long_sub_return(nr_pages, &counter->count);
 | |
| 	/* More uncharges than charges? */
 | |
| 	WARN_ON_ONCE(new < 0);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * page_counter_charge - hierarchically charge pages
 | |
|  * @counter: counter
 | |
|  * @nr_pages: number of pages to charge
 | |
|  *
 | |
|  * NOTE: This does not consider any configured counter limits.
 | |
|  */
 | |
| void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
 | |
| {
 | |
| 	struct page_counter *c;
 | |
| 
 | |
| 	for (c = counter; c; c = c->parent) {
 | |
| 		long new;
 | |
| 
 | |
| 		new = atomic_long_add_return(nr_pages, &c->count);
 | |
| 		/*
 | |
| 		 * This is indeed racy, but we can live with some
 | |
| 		 * inaccuracy in the watermark.
 | |
| 		 */
 | |
| 		if (new > c->watermark)
 | |
| 			c->watermark = new;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * page_counter_try_charge - try to hierarchically charge pages
 | |
|  * @counter: counter
 | |
|  * @nr_pages: number of pages to charge
 | |
|  * @fail: points first counter to hit its limit, if any
 | |
|  *
 | |
|  * Returns 0 on success, or -ENOMEM and @fail if the counter or one of
 | |
|  * its ancestors has hit its configured limit.
 | |
|  */
 | |
| int page_counter_try_charge(struct page_counter *counter,
 | |
| 			    unsigned long nr_pages,
 | |
| 			    struct page_counter **fail)
 | |
| {
 | |
| 	struct page_counter *c;
 | |
| 
 | |
| 	for (c = counter; c; c = c->parent) {
 | |
| 		long new;
 | |
| 		/*
 | |
| 		 * Charge speculatively to avoid an expensive CAS.  If
 | |
| 		 * a bigger charge fails, it might falsely lock out a
 | |
| 		 * racing smaller charge and send it into reclaim
 | |
| 		 * early, but the error is limited to the difference
 | |
| 		 * between the two sizes, which is less than 2M/4M in
 | |
| 		 * case of a THP locking out a regular page charge.
 | |
| 		 *
 | |
| 		 * The atomic_long_add_return() implies a full memory
 | |
| 		 * barrier between incrementing the count and reading
 | |
| 		 * the limit.  When racing with page_counter_limit(),
 | |
| 		 * we either see the new limit or the setter sees the
 | |
| 		 * counter has changed and retries.
 | |
| 		 */
 | |
| 		new = atomic_long_add_return(nr_pages, &c->count);
 | |
| 		if (new > c->limit) {
 | |
| 			atomic_long_sub(nr_pages, &c->count);
 | |
| 			/*
 | |
| 			 * This is racy, but we can live with some
 | |
| 			 * inaccuracy in the failcnt.
 | |
| 			 */
 | |
| 			c->failcnt++;
 | |
| 			*fail = c;
 | |
| 			goto failed;
 | |
| 		}
 | |
| 		/*
 | |
| 		 * Just like with failcnt, we can live with some
 | |
| 		 * inaccuracy in the watermark.
 | |
| 		 */
 | |
| 		if (new > c->watermark)
 | |
| 			c->watermark = new;
 | |
| 	}
 | |
| 	return 0;
 | |
| 
 | |
| failed:
 | |
| 	for (c = counter; c != *fail; c = c->parent)
 | |
| 		page_counter_cancel(c, nr_pages);
 | |
| 
 | |
| 	return -ENOMEM;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * page_counter_uncharge - hierarchically uncharge pages
 | |
|  * @counter: counter
 | |
|  * @nr_pages: number of pages to uncharge
 | |
|  */
 | |
| void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
 | |
| {
 | |
| 	struct page_counter *c;
 | |
| 
 | |
| 	for (c = counter; c; c = c->parent)
 | |
| 		page_counter_cancel(c, nr_pages);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * page_counter_limit - limit the number of pages allowed
 | |
|  * @counter: counter
 | |
|  * @limit: limit to set
 | |
|  *
 | |
|  * Returns 0 on success, -EBUSY if the current number of pages on the
 | |
|  * counter already exceeds the specified limit.
 | |
|  *
 | |
|  * The caller must serialize invocations on the same counter.
 | |
|  */
 | |
| int page_counter_limit(struct page_counter *counter, unsigned long limit)
 | |
| {
 | |
| 	for (;;) {
 | |
| 		unsigned long old;
 | |
| 		long count;
 | |
| 
 | |
| 		/*
 | |
| 		 * Update the limit while making sure that it's not
 | |
| 		 * below the concurrently-changing counter value.
 | |
| 		 *
 | |
| 		 * The xchg implies two full memory barriers before
 | |
| 		 * and after, so the read-swap-read is ordered and
 | |
| 		 * ensures coherency with page_counter_try_charge():
 | |
| 		 * that function modifies the count before checking
 | |
| 		 * the limit, so if it sees the old limit, we see the
 | |
| 		 * modified counter and retry.
 | |
| 		 */
 | |
| 		count = atomic_long_read(&counter->count);
 | |
| 
 | |
| 		if (count > limit)
 | |
| 			return -EBUSY;
 | |
| 
 | |
| 		old = xchg(&counter->limit, limit);
 | |
| 
 | |
| 		if (atomic_long_read(&counter->count) <= count)
 | |
| 			return 0;
 | |
| 
 | |
| 		counter->limit = old;
 | |
| 		cond_resched();
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * page_counter_memparse - memparse() for page counter limits
 | |
|  * @buf: string to parse
 | |
|  * @max: string meaning maximum possible value
 | |
|  * @nr_pages: returns the result in number of pages
 | |
|  *
 | |
|  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
 | |
|  * limited to %PAGE_COUNTER_MAX.
 | |
|  */
 | |
| int page_counter_memparse(const char *buf, const char *max,
 | |
| 			  unsigned long *nr_pages)
 | |
| {
 | |
| 	char *end;
 | |
| 	u64 bytes;
 | |
| 
 | |
| 	if (!strcmp(buf, max)) {
 | |
| 		*nr_pages = PAGE_COUNTER_MAX;
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	bytes = memparse(buf, &end);
 | |
| 	if (*end != '\0')
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	*nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
 | |
| 
 | |
| 	return 0;
 | |
| }
 |