 1abbe1394a
			
		
	
	
	1abbe1394a
	
	
	
		
			
			With BQL being deployed, we can more likely have following behavior : We dequeue a packet from qdisc in dequeue_skb(), then we realize target tx queue is in XOFF state in sch_direct_xmit(), and we have to hold the skb into gso_skb for later. This shows in stats (tc -s qdisc dev eth0) as requeues. Problem of these requeues is that high priority packets can not be dequeued as long as this (possibly low prio and big TSO packet) is not removed from gso_skb. At 1Gbps speed, a full size TSO packet is 500 us of extra latency. In some cases, we know that all packets dequeued from a qdisc are for a particular and known txq : - If device is non multi queue - For all MQ/MQPRIO slave qdiscs This patch introduces a new qdisc flag, TCQ_F_ONETXQUEUE to mark this capability, so that dequeue_skb() is allowed to dequeue a packet only if the associated txq is not stopped. This indeed reduce latencies for high prio packets (or improve fairness with sfq/fq_codel), and almost remove qdisc 'requeues'. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
		
			
				
	
	
		
			243 lines
		
	
	
	
		
			5.7 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			243 lines
		
	
	
	
		
			5.7 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * net/sched/sch_mq.c		Classful multiqueue dummy scheduler
 | |
|  *
 | |
|  * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License
 | |
|  * version 2 as published by the Free Software Foundation.
 | |
|  */
 | |
| 
 | |
| #include <linux/types.h>
 | |
| #include <linux/slab.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/export.h>
 | |
| #include <linux/string.h>
 | |
| #include <linux/errno.h>
 | |
| #include <linux/skbuff.h>
 | |
| #include <net/netlink.h>
 | |
| #include <net/pkt_sched.h>
 | |
| 
 | |
| struct mq_sched {
 | |
| 	struct Qdisc		**qdiscs;
 | |
| };
 | |
| 
 | |
| static void mq_destroy(struct Qdisc *sch)
 | |
| {
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	struct mq_sched *priv = qdisc_priv(sch);
 | |
| 	unsigned int ntx;
 | |
| 
 | |
| 	if (!priv->qdiscs)
 | |
| 		return;
 | |
| 	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
 | |
| 		qdisc_destroy(priv->qdiscs[ntx]);
 | |
| 	kfree(priv->qdiscs);
 | |
| }
 | |
| 
 | |
| static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 | |
| {
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	struct mq_sched *priv = qdisc_priv(sch);
 | |
| 	struct netdev_queue *dev_queue;
 | |
| 	struct Qdisc *qdisc;
 | |
| 	unsigned int ntx;
 | |
| 
 | |
| 	if (sch->parent != TC_H_ROOT)
 | |
| 		return -EOPNOTSUPP;
 | |
| 
 | |
| 	if (!netif_is_multiqueue(dev))
 | |
| 		return -EOPNOTSUPP;
 | |
| 
 | |
| 	/* pre-allocate qdiscs, attachment can't fail */
 | |
| 	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
 | |
| 			       GFP_KERNEL);
 | |
| 	if (priv->qdiscs == NULL)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
 | |
| 		dev_queue = netdev_get_tx_queue(dev, ntx);
 | |
| 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
 | |
| 					  TC_H_MAKE(TC_H_MAJ(sch->handle),
 | |
| 						    TC_H_MIN(ntx + 1)));
 | |
| 		if (qdisc == NULL)
 | |
| 			goto err;
 | |
| 		priv->qdiscs[ntx] = qdisc;
 | |
| 		qdisc->flags |= TCQ_F_ONETXQUEUE;
 | |
| 	}
 | |
| 
 | |
| 	sch->flags |= TCQ_F_MQROOT;
 | |
| 	return 0;
 | |
| 
 | |
| err:
 | |
| 	mq_destroy(sch);
 | |
| 	return -ENOMEM;
 | |
| }
 | |
| 
 | |
| static void mq_attach(struct Qdisc *sch)
 | |
| {
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	struct mq_sched *priv = qdisc_priv(sch);
 | |
| 	struct Qdisc *qdisc;
 | |
| 	unsigned int ntx;
 | |
| 
 | |
| 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
 | |
| 		qdisc = priv->qdiscs[ntx];
 | |
| 		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
 | |
| 		if (qdisc)
 | |
| 			qdisc_destroy(qdisc);
 | |
| 	}
 | |
| 	kfree(priv->qdiscs);
 | |
| 	priv->qdiscs = NULL;
 | |
| }
 | |
| 
 | |
| static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
 | |
| {
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	struct Qdisc *qdisc;
 | |
| 	unsigned int ntx;
 | |
| 
 | |
| 	sch->q.qlen = 0;
 | |
| 	memset(&sch->bstats, 0, sizeof(sch->bstats));
 | |
| 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 | |
| 
 | |
| 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
 | |
| 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
 | |
| 		spin_lock_bh(qdisc_lock(qdisc));
 | |
| 		sch->q.qlen		+= qdisc->q.qlen;
 | |
| 		sch->bstats.bytes	+= qdisc->bstats.bytes;
 | |
| 		sch->bstats.packets	+= qdisc->bstats.packets;
 | |
| 		sch->qstats.qlen	+= qdisc->qstats.qlen;
 | |
| 		sch->qstats.backlog	+= qdisc->qstats.backlog;
 | |
| 		sch->qstats.drops	+= qdisc->qstats.drops;
 | |
| 		sch->qstats.requeues	+= qdisc->qstats.requeues;
 | |
| 		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
 | |
| 		spin_unlock_bh(qdisc_lock(qdisc));
 | |
| 	}
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
 | |
| {
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	unsigned long ntx = cl - 1;
 | |
| 
 | |
| 	if (ntx >= dev->num_tx_queues)
 | |
| 		return NULL;
 | |
| 	return netdev_get_tx_queue(dev, ntx);
 | |
| }
 | |
| 
 | |
| static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
 | |
| 					    struct tcmsg *tcm)
 | |
| {
 | |
| 	unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
 | |
| 	struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
 | |
| 
 | |
| 	if (!dev_queue) {
 | |
| 		struct net_device *dev = qdisc_dev(sch);
 | |
| 
 | |
| 		return netdev_get_tx_queue(dev, 0);
 | |
| 	}
 | |
| 	return dev_queue;
 | |
| }
 | |
| 
 | |
| static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 | |
| 		    struct Qdisc **old)
 | |
| {
 | |
| 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 
 | |
| 	if (dev->flags & IFF_UP)
 | |
| 		dev_deactivate(dev);
 | |
| 
 | |
| 	*old = dev_graft_qdisc(dev_queue, new);
 | |
| 	if (new)
 | |
| 		new->flags |= TCQ_F_ONETXQUEUE;
 | |
| 	if (dev->flags & IFF_UP)
 | |
| 		dev_activate(dev);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
 | |
| {
 | |
| 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 | |
| 
 | |
| 	return dev_queue->qdisc_sleeping;
 | |
| }
 | |
| 
 | |
| static unsigned long mq_get(struct Qdisc *sch, u32 classid)
 | |
| {
 | |
| 	unsigned int ntx = TC_H_MIN(classid);
 | |
| 
 | |
| 	if (!mq_queue_get(sch, ntx))
 | |
| 		return 0;
 | |
| 	return ntx;
 | |
| }
 | |
| 
 | |
| static void mq_put(struct Qdisc *sch, unsigned long cl)
 | |
| {
 | |
| }
 | |
| 
 | |
| static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
 | |
| 			 struct sk_buff *skb, struct tcmsg *tcm)
 | |
| {
 | |
| 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 | |
| 
 | |
| 	tcm->tcm_parent = TC_H_ROOT;
 | |
| 	tcm->tcm_handle |= TC_H_MIN(cl);
 | |
| 	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 | |
| 			       struct gnet_dump *d)
 | |
| {
 | |
| 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 | |
| 
 | |
| 	sch = dev_queue->qdisc_sleeping;
 | |
| 	sch->qstats.qlen = sch->q.qlen;
 | |
| 	if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
 | |
| 	    gnet_stats_copy_queue(d, &sch->qstats) < 0)
 | |
| 		return -1;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 | |
| {
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	unsigned int ntx;
 | |
| 
 | |
| 	if (arg->stop)
 | |
| 		return;
 | |
| 
 | |
| 	arg->count = arg->skip;
 | |
| 	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
 | |
| 		if (arg->fn(sch, ntx + 1, arg) < 0) {
 | |
| 			arg->stop = 1;
 | |
| 			break;
 | |
| 		}
 | |
| 		arg->count++;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static const struct Qdisc_class_ops mq_class_ops = {
 | |
| 	.select_queue	= mq_select_queue,
 | |
| 	.graft		= mq_graft,
 | |
| 	.leaf		= mq_leaf,
 | |
| 	.get		= mq_get,
 | |
| 	.put		= mq_put,
 | |
| 	.walk		= mq_walk,
 | |
| 	.dump		= mq_dump_class,
 | |
| 	.dump_stats	= mq_dump_class_stats,
 | |
| };
 | |
| 
 | |
| struct Qdisc_ops mq_qdisc_ops __read_mostly = {
 | |
| 	.cl_ops		= &mq_class_ops,
 | |
| 	.id		= "mq",
 | |
| 	.priv_size	= sizeof(struct mq_sched),
 | |
| 	.init		= mq_init,
 | |
| 	.destroy	= mq_destroy,
 | |
| 	.attach		= mq_attach,
 | |
| 	.dump		= mq_dump,
 | |
| 	.owner		= THIS_MODULE,
 | |
| };
 |