net_sched: act_bpf: remove spinlock in fast path
Similar to act_gact/act_mirred, act_bpf can be lockless in packet processing with extra care taken to free bpf programs after rcu grace period. Replacement of existing act_bpf (very rare) is done with synchronize_rcu() and final destruction is done from tc_action_ops->cleanup() callback that is called from tcf_exts_destroy()->tcf_action_destroy()->__tcf_hash_release() when bind and refcnt reach zero which is only possible when classifier is destroyed. Previous two patches fixed the last two classifiers (tcindex and rsvp) to call tcf_exts_destroy() from rcu callback. Similar to gact/mirred there is a race between prog->filter and prog->tcf_action. Meaning that the program being replaced may use previous default action if it happened to return TC_ACT_UNSPEC. act_mirred race betwen tcf_action and tcfm_dev is similar. In all cases the race is harmless. Long term we may want to improve the situation by replacing the whole tc_action->priv as single pointer instead of updating inner fields one by one. Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
9e528d8915
commit
cff82457c5
2 changed files with 20 additions and 18 deletions
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
struct tcf_bpf {
|
struct tcf_bpf {
|
||||||
struct tcf_common common;
|
struct tcf_common common;
|
||||||
struct bpf_prog *filter;
|
struct bpf_prog __rcu *filter;
|
||||||
union {
|
union {
|
||||||
u32 bpf_fd;
|
u32 bpf_fd;
|
||||||
u16 bpf_num_ops;
|
u16 bpf_num_ops;
|
||||||
|
|
|
@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
|
||||||
struct tcf_result *res)
|
struct tcf_result *res)
|
||||||
{
|
{
|
||||||
struct tcf_bpf *prog = act->priv;
|
struct tcf_bpf *prog = act->priv;
|
||||||
|
struct bpf_prog *filter;
|
||||||
int action, filter_res;
|
int action, filter_res;
|
||||||
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
|
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
|
||||||
|
|
||||||
if (unlikely(!skb_mac_header_was_set(skb)))
|
if (unlikely(!skb_mac_header_was_set(skb)))
|
||||||
return TC_ACT_UNSPEC;
|
return TC_ACT_UNSPEC;
|
||||||
|
|
||||||
spin_lock(&prog->tcf_lock);
|
tcf_lastuse_update(&prog->tcf_tm);
|
||||||
|
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
|
||||||
|
|
||||||
prog->tcf_tm.lastuse = jiffies;
|
|
||||||
bstats_update(&prog->tcf_bstats, skb);
|
|
||||||
|
|
||||||
/* Needed here for accessing maps. */
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
filter = rcu_dereference(prog->filter);
|
||||||
if (at_ingress) {
|
if (at_ingress) {
|
||||||
__skb_push(skb, skb->mac_len);
|
__skb_push(skb, skb->mac_len);
|
||||||
filter_res = BPF_PROG_RUN(prog->filter, skb);
|
filter_res = BPF_PROG_RUN(filter, skb);
|
||||||
__skb_pull(skb, skb->mac_len);
|
__skb_pull(skb, skb->mac_len);
|
||||||
} else {
|
} else {
|
||||||
filter_res = BPF_PROG_RUN(prog->filter, skb);
|
filter_res = BPF_PROG_RUN(filter, skb);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
|
||||||
break;
|
break;
|
||||||
case TC_ACT_SHOT:
|
case TC_ACT_SHOT:
|
||||||
action = filter_res;
|
action = filter_res;
|
||||||
prog->tcf_qstats.drops++;
|
qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
|
||||||
break;
|
break;
|
||||||
case TC_ACT_UNSPEC:
|
case TC_ACT_UNSPEC:
|
||||||
action = prog->tcf_action;
|
action = prog->tcf_action;
|
||||||
|
@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&prog->tcf_lock);
|
|
||||||
return action;
|
return action;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
|
||||||
struct tcf_bpf_cfg *cfg)
|
struct tcf_bpf_cfg *cfg)
|
||||||
{
|
{
|
||||||
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
|
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
|
||||||
cfg->filter = prog->filter;
|
/* updates to prog->filter are prevented, since it's called either
|
||||||
|
* with rtnl lock or during final cleanup in rcu callback
|
||||||
|
*/
|
||||||
|
cfg->filter = rcu_dereference_protected(prog->filter, 1);
|
||||||
|
|
||||||
cfg->bpf_ops = prog->bpf_ops;
|
cfg->bpf_ops = prog->bpf_ops;
|
||||||
cfg->bpf_name = prog->bpf_name;
|
cfg->bpf_name = prog->bpf_name;
|
||||||
|
@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
|
||||||
|
|
||||||
if (!tcf_hash_check(parm->index, act, bind)) {
|
if (!tcf_hash_check(parm->index, act, bind)) {
|
||||||
ret = tcf_hash_create(parm->index, est, act,
|
ret = tcf_hash_create(parm->index, est, act,
|
||||||
sizeof(*prog), bind, false);
|
sizeof(*prog), bind, true);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -325,7 +326,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
prog = to_bpf(act);
|
prog = to_bpf(act);
|
||||||
spin_lock_bh(&prog->tcf_lock);
|
ASSERT_RTNL();
|
||||||
|
|
||||||
if (res != ACT_P_CREATED)
|
if (res != ACT_P_CREATED)
|
||||||
tcf_bpf_prog_fill_cfg(prog, &old);
|
tcf_bpf_prog_fill_cfg(prog, &old);
|
||||||
|
@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
|
||||||
prog->bpf_fd = cfg.bpf_fd;
|
prog->bpf_fd = cfg.bpf_fd;
|
||||||
|
|
||||||
prog->tcf_action = parm->action;
|
prog->tcf_action = parm->action;
|
||||||
prog->filter = cfg.filter;
|
rcu_assign_pointer(prog->filter, cfg.filter);
|
||||||
|
|
||||||
spin_unlock_bh(&prog->tcf_lock);
|
if (res == ACT_P_CREATED) {
|
||||||
|
|
||||||
if (res == ACT_P_CREATED)
|
|
||||||
tcf_hash_insert(act);
|
tcf_hash_insert(act);
|
||||||
else
|
} else {
|
||||||
|
/* make sure the program being replaced is no longer executing */
|
||||||
|
synchronize_rcu();
|
||||||
tcf_bpf_cfg_cleanup(&old);
|
tcf_bpf_cfg_cleanup(&old);
|
||||||
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
out:
|
out:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue