From af76fc6c158d5e70764c9cc277aefe7a134436fd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 5 Jun 2019 13:46:29 -0400 Subject: [PATCH 001/658] NFSD fill-in netloc4 structure nfs.4 defines nfs42_netaddr structure that represents netloc4. Populate needed fields from the sockaddr structure. This will be used by flexfiles and 4.2 inter copy Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfsd.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 57b93d95fa5c..0ff6ef964a48 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -387,6 +388,37 @@ void nfsd_lockd_shutdown(void); extern const u32 nfsd_suppattrs[3][3]; +static inline u32 nfsd4_set_netaddr(struct sockaddr *addr, + struct nfs42_netaddr *netaddr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + unsigned int port; + size_t ret_addr, ret_port; + + switch (addr->sa_family) { + case AF_INET: + port = ntohs(sin->sin_port); + sprintf(netaddr->netid, "tcp"); + netaddr->netid_len = 3; + break; + case AF_INET6: + port = ntohs(sin6->sin6_port); + sprintf(netaddr->netid, "tcp6"); + netaddr->netid_len = 4; + break; + default: + return nfserr_inval; + } + ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr)); + ret_port = snprintf(netaddr->addr + ret_addr, + RPCBIND_MAXUADDRLEN + 1 - ret_addr, + ".%u.%u", port >> 8, port & 0xff); + WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr); + netaddr->addr_len = ret_addr + ret_port; + return 0; +} + static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2) { return !((bm1[0] & ~bm2[0]) || From 84e1b21d5ec4cc1b005586f32c67c046ea4ffb8a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 13 Sep 2019 14:00:57 -0400 Subject: [PATCH 002/658] NFSD add ca_source_server<> to COPY Decode the ca_source_server list that's sent but only use the first one. Presence of non-zero list indicates an "inter" copy. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4xdr.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++- fs/nfsd/xdr4.h | 12 +++++---- 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d2dc4c0e22e8..a1d839406dc1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "idmap.h" #include "acl.h" @@ -1744,10 +1745,47 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) DECODE_TAIL; } +static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, + struct nl4_server *ns) +{ + DECODE_HEAD; + struct nfs42_netaddr *naddr; + + READ_BUF(4); + ns->nl4_type = be32_to_cpup(p++); + + /* currently support for 1 inter-server source server */ + switch (ns->nl4_type) { + case NL4_NETADDR: + naddr = &ns->u.nl4_addr; + + READ_BUF(4); + naddr->netid_len = be32_to_cpup(p++); + if (naddr->netid_len > RPCBIND_MAXNETIDLEN) + goto xdr_error; + + READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */ + COPYMEM(naddr->netid, naddr->netid_len); + + naddr->addr_len = be32_to_cpup(p++); + if (naddr->addr_len > RPCBIND_MAXUADDRLEN) + goto xdr_error; + + READ_BUF(naddr->addr_len); + COPYMEM(naddr->addr, naddr->addr_len); + break; + default: + goto xdr_error; + } + DECODE_TAIL; +} + static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { DECODE_HEAD; + struct nl4_server *ns_dummy; + int i, count; status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); if (status) @@ -1762,7 +1800,32 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) p = xdr_decode_hyper(p, ©->cp_count); p++; /* ca_consecutive: we always do consecutive copies */ copy->cp_synchronous = be32_to_cpup(p++); - /* tmp = be32_to_cpup(p); Source server list not supported */ + + count = be32_to_cpup(p++); + + copy->cp_intra = false; + if (count == 0) { /* intra-server copy */ + copy->cp_intra = true; + goto intra; + } + + /* decode all the supplied server addresses but use first */ + status = nfsd4_decode_nl4_server(argp, ©->cp_src); + if (status) + return status; + + ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); + if (ns_dummy == NULL) + return nfserrno(-ENOMEM); + for (i = 0; i < count - 1; i++) { + status = nfsd4_decode_nl4_server(argp, ns_dummy); + if (status) { + kfree(ns_dummy); + return status; + } + } + kfree(ns_dummy); +intra: DECODE_TAIL; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f4737d66ee98..e815a9cc3b05 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -518,11 +518,13 @@ struct nfsd42_write_res { struct nfsd4_copy { /* request */ - stateid_t cp_src_stateid; - stateid_t cp_dst_stateid; - u64 cp_src_pos; - u64 cp_dst_pos; - u64 cp_count; + stateid_t cp_src_stateid; + stateid_t cp_dst_stateid; + u64 cp_src_pos; + u64 cp_dst_pos; + u64 cp_count; + struct nl4_server cp_src; + bool cp_intra; /* both */ bool cp_synchronous; From 51911868fc62f6b1bc460ea2d8bddece6c72e467 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 8 Aug 2019 11:14:59 -0400 Subject: [PATCH 003/658] NFSD COPY_NOTIFY xdr Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4proc.c | 28 +++++++++++++++ fs/nfsd/nfs4xdr.c | 90 ++++++++++++++++++++++++++++++++++++++++++++-- fs/nfsd/xdr4.h | 13 +++++++ 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4798667af647..d3f60562ac1d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1339,6 +1339,13 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp, return status; } +static __be32 +nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + return nfserr_notsupp; +} + static __be32 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_fallocate *fallocate, int flags) @@ -2292,6 +2299,21 @@ static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } +static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 3 /* cnr_lease_time */ + + 1 /* We support one cnr_source_server */ + + 1 /* cnr_stateid seq */ + + op_encode_stateid_maxsz /* cnr_stateid */ + + 1 /* num cnr_source_server*/ + + 1 /* nl4_type */ + + 1 /* nl4 size */ + + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */) + * sizeof(__be32); +} + #ifdef CONFIG_NFSD_PNFS static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { @@ -2716,6 +2738,12 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_OFFLOAD_CANCEL", .op_rsize_bop = nfsd4_only_status_rsize, }, + [OP_COPY_NOTIFY] = { + .op_func = nfsd4_copy_notify, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_COPY_NOTIFY", + .op_rsize_bop = nfsd4_copy_notify_rsize, + }, }; /** diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a1d839406dc1..1d8a08b84e32 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1837,6 +1837,18 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid(argp, &os->stateid); } +static __be32 +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, + struct nfsd4_copy_notify *cn) +{ + int status; + + status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid); + if (status) + return status; + return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); +} + static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { @@ -1938,7 +1950,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { /* new operations for NFSv4.2 */ [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -4306,6 +4318,46 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, return nfs_ok; } +static __be32 +nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) +{ + struct xdr_stream *xdr = &resp->xdr; + struct nfs42_netaddr *addr; + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = cpu_to_be32(ns->nl4_type); + + switch (ns->nl4_type) { + case NL4_NETADDR: + addr = &ns->u.nl4_addr; + + /* netid_len, netid, uaddr_len, uaddr (port included + * in RPCBIND_MAXUADDRLEN) + */ + p = xdr_reserve_space(xdr, + 4 /* netid len */ + + (XDR_QUADLEN(addr->netid_len) * 4) + + 4 /* uaddr len */ + + (XDR_QUADLEN(addr->addr_len) * 4)); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(addr->netid_len); + p = xdr_encode_opaque_fixed(p, addr->netid, + addr->netid_len); + *p++ = cpu_to_be32(addr->addr_len); + p = xdr_encode_opaque_fixed(p, addr->addr, + addr->addr_len); + break; + default: + WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR); + return nfserr_inval; + } + + return 0; +} + static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_copy *copy) @@ -4339,6 +4391,40 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; } +static __be32 +nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_copy_notify *cn) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + if (nfserr) + return nfserr; + + /* 8 sec, 4 nsec */ + p = xdr_reserve_space(xdr, 12); + if (!p) + return nfserr_resource; + + /* cnr_lease_time */ + p = xdr_encode_hyper(p, cn->cpn_sec); + *p++ = cpu_to_be32(cn->cpn_nsec); + + /* cnr_stateid */ + nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid); + if (nfserr) + return nfserr; + + /* cnr_src.nl_nsvr */ + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(1); + + return nfsd42_encode_nl4_server(resp, &cn->cpn_src); +} + static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_seek *seek) @@ -4436,7 +4522,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { /* NFSv4.2 operations */ [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index e815a9cc3b05..8231fe04bec0 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -570,6 +570,18 @@ struct nfsd4_offload_status { u32 status; }; +struct nfsd4_copy_notify { + /* request */ + stateid_t cpn_src_stateid; + struct nl4_server cpn_dst; + + /* response */ + stateid_t cpn_cnr_stateid; + u64 cpn_sec; + u32 cpn_nsec; + struct nl4_server cpn_src; +}; + struct nfsd4_op { int opnum; const struct nfsd4_operation * opdesc; @@ -629,6 +641,7 @@ struct nfsd4_op { struct nfsd4_clone clone; struct nfsd4_copy copy; struct nfsd4_offload_status offload_status; + struct nfsd4_copy_notify copy_notify; struct nfsd4_seek seek; } u; struct nfs4_replay * replay; From 624322f1adc58acd0b69f77a6ddc764207e97241 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 4 Oct 2019 16:34:26 -0400 Subject: [PATCH 004/658] NFSD add COPY_NOTIFY operation Introducing the COPY_NOTIFY operation. Create a new unique stateid that will keep track of the copy state and the upcoming READs that will use that stateid. Each associated parent stateid has a list of copy notify stateids. A copy notify structure makes a copy of the parent stateid and a clientid and will use it to look up the parent stateid during the READ request (suggested by Trond Myklebust ). At nfs4_put_stid() time, we walk the list of the associated copy notify stateids and delete them. Laundromat thread will traverse globally stored copy notify stateid in idr and notice if any haven't been referenced in the lease period, if so, it'll remove them. Return single netaddr to advertise to the copy. Suggested-by: Trond Myklebust Signed-off-by: Olga Kornievskaia Signed-off-by: Andy Adamson --- fs/nfsd/nfs4proc.c | 58 +++++++++++++++++++----- fs/nfsd/nfs4state.c | 106 +++++++++++++++++++++++++++++++++++++++----- fs/nfsd/state.h | 31 +++++++++++-- fs/nfsd/xdr4.h | 2 +- 4 files changed, 173 insertions(+), 24 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d3f60562ac1d..dee5aec098fd 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "idmap.h" #include "cache.h" @@ -776,7 +777,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, - &read->rd_nf); + &read->rd_nf, NULL); if (status) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; @@ -948,7 +949,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, - WR_STATE, NULL); + WR_STATE, NULL, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -999,7 +1000,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, trace_nfsd_write_start(rqstp, &cstate->current_fh, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - stateid, WR_STATE, &nf); + stateid, WR_STATE, &nf, NULL); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; @@ -1034,14 +1035,14 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_nofilehandle; status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, - src_stateid, RD_STATE, src); + src_stateid, RD_STATE, src, NULL); if (status) { dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); goto out; } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - dst_stateid, WR_STATE, dst); + dst_stateid, WR_STATE, dst, NULL); if (status) { dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); goto out_put_src; @@ -1221,7 +1222,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) static void cleanup_async_copy(struct nfsd4_copy *copy) { - nfs4_free_cp_state(copy); + nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); @@ -1275,7 +1276,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out; - if (!nfs4_init_cp_state(nn, copy)) { + if (!nfs4_init_copy_state(nn, copy)) { kfree(async_copy); goto out; } @@ -1343,7 +1344,44 @@ static __be32 nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - return nfserr_notsupp; + struct nfsd4_copy_notify *cn = &u->copy_notify; + __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_stid *stid; + struct nfs4_cpntf_state *cps; + struct nfs4_client *clp = cstate->clp; + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &cn->cpn_src_stateid, RD_STATE, NULL, + &stid); + if (status) + return status; + + cn->cpn_sec = nn->nfsd4_lease; + cn->cpn_nsec = 0; + + status = nfserrno(-ENOMEM); + cps = nfs4_alloc_init_cpntf_state(nn, stid); + if (!cps) + goto out; + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); + memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); + memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); + + /* For now, only return one server address in cpn_src, the + * address used by the client to connect to this server. + */ + cn->cpn_src.nl4_type = NL4_NETADDR; + status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, + &cn->cpn_src.u.nl4_addr); + WARN_ON_ONCE(status); + if (status) { + nfs4_put_cpntf_state(nn, cps); + goto out; + } +out: + nfs4_put_stid(stid); + return status; } static __be32 @@ -1355,7 +1393,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, - WR_STATE, &nf); + WR_STATE, &nf, NULL); if (status != nfs_ok) { dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); return status; @@ -1414,7 +1452,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, - RD_STATE, &nf); + RD_STATE, &nf, NULL); if (status) { dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 369e574c5092..ae3dff210108 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -80,6 +80,7 @@ static u64 current_sessionid = 1; static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); +static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); /* Locking: */ @@ -722,6 +723,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla /* Will be incremented before return to client: */ refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); + INIT_LIST_HEAD(&stid->sc_cp_list); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -741,30 +743,76 @@ out_free: /* * Create a unique stateid_t to represent each COPY. */ -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, + unsigned char sc_type) { int new_id; + stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time; + stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->sc_type = sc_type; + idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); - new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT); + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); + stid->stid.si_opaque.so_id = new_id; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; - copy->cp_stateid.si_opaque.so_id = new_id; - copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; - copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; return 1; } -void nfs4_free_cp_state(struct nfsd4_copy *copy) +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +{ + return nfs4_init_cp_state(nn, ©->cp_stateid, NFS4_COPY_STID); +} + +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid) +{ + struct nfs4_cpntf_state *cps; + + cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); + if (!cps) + return NULL; + cps->cpntf_time = get_seconds(); + refcount_set(&cps->cp_stateid.sc_count, 1); + if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) + goto out_free; + spin_lock(&nn->s2s_cp_lock); + list_add(&cps->cp_list, &p_stid->sc_cp_list); + spin_unlock(&nn->s2s_cp_lock); + return cps; +out_free: + kfree(cps); + return NULL; +} + +void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; + WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); - idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id); + idr_remove(&nn->s2s_cp_stateids, + copy->cp_stateid.stid.si_opaque.so_id); + spin_unlock(&nn->s2s_cp_lock); +} + +static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid) +{ + struct nfs4_cpntf_state *cps; + struct nfsd_net *nn; + + nn = net_generic(net, nfsd_net_id); + spin_lock(&nn->s2s_cp_lock); + while (!list_empty(&stid->sc_cp_list)) { + cps = list_first_entry(&stid->sc_cp_list, + struct nfs4_cpntf_state, cp_list); + _free_cpntf_state_locked(nn, cps); + } spin_unlock(&nn->s2s_cp_lock); } @@ -915,6 +963,7 @@ nfs4_put_stid(struct nfs4_stid *s) return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); if (fp) @@ -5215,6 +5264,9 @@ nfs4_laundromat(struct nfsd_net *nn) struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; + struct nfs4_cpntf_state *cps; + copy_stateid_t *cps_t; + int i; dprintk("NFSD: laundromat service - starting\n"); @@ -5225,6 +5277,17 @@ nfs4_laundromat(struct nfsd_net *nn) dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); + + spin_lock(&nn->s2s_cp_lock); + idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { + cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); + if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && + !time_after((unsigned long)cps->cpntf_time, + (unsigned long)cutoff)) + _free_cpntf_state_locked(nn, cps); + } + spin_unlock(&nn->s2s_cp_lock); + spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); @@ -5600,6 +5663,24 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, out: return status; } +static void +_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) +{ + WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) + return; + list_del(&cps->cp_list); + idr_remove(&nn->s2s_cp_stateids, + cps->cp_stateid.stid.si_opaque.so_id); + kfree(cps); +} + +void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) +{ + spin_lock(&nn->s2s_cp_lock); + _free_cpntf_state_locked(nn, cps); + spin_unlock(&nn->s2s_cp_lock); +} /* * Checks for stateid operations @@ -5607,7 +5688,8 @@ out: __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, - stateid_t *stateid, int flags, struct nfsd_file **nfp) + stateid_t *stateid, int flags, struct nfsd_file **nfp, + struct nfs4_stid **cstid) { struct inode *ino = d_inode(fhp->fh_dentry); struct net *net = SVC_NET(rqstp); @@ -5656,8 +5738,12 @@ done: if (status == nfs_ok && nfp) status = nfs4_check_file(rqstp, fhp, s, nfp, flags); out: - if (s) - nfs4_put_stid(s); + if (s) { + if (!status && cstid) + *cstid = s; + else + nfs4_put_stid(s); + } return status; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index d61b83b9654c..35eb7170aefc 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -56,6 +56,14 @@ typedef struct { stateid_opaque_t si_opaque; } stateid_t; +typedef struct { + stateid_t stid; +#define NFS4_COPY_STID 1 +#define NFS4_COPYNOTIFY_STID 2 + unsigned char sc_type; + refcount_t sc_count; +} copy_stateid_t; + #define STATEID_FMT "(%08x/%08x/%08x/%08x)" #define STATEID_VAL(s) \ (s)->si_opaque.so_clid.cl_boot, \ @@ -96,6 +104,7 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 + struct list_head sc_cp_list; unsigned char sc_type; stateid_t sc_stateid; spinlock_t sc_lock; @@ -104,6 +113,17 @@ struct nfs4_stid { void (*sc_free)(struct nfs4_stid *); }; +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the + * parent OPEN/LOCK/DELEG stateid. + */ +struct nfs4_cpntf_state { + copy_stateid_t cp_stateid; + struct list_head cp_list; /* per parent nfs4_stid */ + stateid_t cp_p_stateid; /* copy of parent's stateid */ + clientid_t cp_p_clid; /* copy of parent's clid */ + time_t cpntf_time; /* last time stateid used */ +}; + /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -618,14 +638,17 @@ struct nfsd4_copy; extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, - stateid_t *stateid, int flags, struct nfsd_file **filp); + stateid_t *stateid, int flags, struct nfsd_file **filp, + struct nfs4_stid **cstid); __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)); -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy); -void nfs4_free_cp_state(struct nfsd4_copy *copy); +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy); +void nfs4_free_copy_state(struct nfsd4_copy *copy); +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); @@ -655,6 +678,8 @@ void put_nfs4_file(struct nfs4_file *fi); extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); +extern void nfs4_put_cpntf_state(struct nfsd_net *nn, + struct nfs4_cpntf_state *cps); static inline void get_nfs4_file(struct nfs4_file *fi) { refcount_inc(&fi->fi_ref); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 8231fe04bec0..2937e06f3554 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -542,7 +542,7 @@ struct nfsd4_copy { struct nfsd_file *nf_src; struct nfsd_file *nf_dst; - stateid_t cp_stateid; + copy_stateid_t cp_stateid; struct list_head copies; struct task_struct *copy_task; From b7342204253aaa1ce8351e0d94b43f98c8706cee Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 6 Sep 2019 15:17:21 -0400 Subject: [PATCH 005/658] NFSD check stateids against copy stateids Incoming stateid (used by a READ) could be a saved copy stateid. Using the provided stateid, look it up in the list of copy_notify stateids. If found, use the parent's stateid and parent's clid to look up the parent's stid to do the appropriate checks. Update the copy notify timestamp (cpntf_time) with current time this making it 'active' so that laundromat thread will not delete copy notify state. Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4state.c | 74 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ae3dff210108..31b71aed4230 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4539,7 +4539,8 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 static __be32 lookup_clientid(clientid_t *clid, struct nfsd4_compound_state *cstate, - struct nfsd_net *nn) + struct nfsd_net *nn, + bool sessions) { struct nfs4_client *found; @@ -4560,7 +4561,7 @@ static __be32 lookup_clientid(clientid_t *clid, */ WARN_ON_ONCE(cstate->session); spin_lock(&nn->client_lock); - found = find_confirmed_client(clid, false, nn); + found = find_confirmed_client(clid, sessions, nn); if (!found) { spin_unlock(&nn->client_lock); return nfserr_expired; @@ -4593,7 +4594,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - status = lookup_clientid(clientid, cstate, nn); + status = lookup_clientid(clientid, cstate, nn, false); if (status) return status; clp = cstate->clp; @@ -5182,7 +5183,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) goto out; clp = cstate->clp; @@ -5584,7 +5585,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn, + false); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; @@ -5674,6 +5676,59 @@ _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) cps->cp_stateid.stid.si_opaque.so_id); kfree(cps); } +/* + * A READ from an inter server to server COPY will have a + * copy stateid. Look up the copy notify stateid from the + * idr structure and take a reference on it. + */ +static __be32 _find_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_cpntf_state **cps) +{ + copy_stateid_t *cps_t; + struct nfs4_cpntf_state *state = NULL; + + if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id) + return nfserr_bad_stateid; + spin_lock(&nn->s2s_cp_lock); + cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id); + if (cps_t) { + state = container_of(cps_t, struct nfs4_cpntf_state, + cp_stateid); + if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) + return nfserr_bad_stateid; + refcount_inc(&state->cp_stateid.sc_count); + } + spin_unlock(&nn->s2s_cp_lock); + if (!state) + return nfserr_bad_stateid; + *cps = state; + return 0; +} + +static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_stid **stid) +{ + __be32 status; + struct nfs4_cpntf_state *cps = NULL; + struct nfsd4_compound_state cstate; + + status = _find_cpntf_state(nn, st, &cps); + if (status) + return status; + + cps->cpntf_time = get_seconds(); + memset(&cstate, 0, sizeof(cstate)); + status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); + if (status) + goto out; + status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + stid, nn); + put_client_renew(cstate.clp); +out: + nfs4_put_cpntf_state(nn, cps); + return status; +} void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { @@ -5711,6 +5766,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); + if (status == nfserr_bad_stateid) + status = find_cpntf_state(nn, stateid, &s); if (status) return status; status = nfsd4_stid_check_stateid_generation(stateid, s, @@ -6743,7 +6800,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, cstate, nn); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn, + false); if (status) goto out; } @@ -6927,7 +6985,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return status; @@ -7074,7 +7132,7 @@ nfs4_check_open_reclaim(clientid_t *clid, __be32 status; /* find clientid in conf_id_hashtbl */ - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return nfserr_reclaim_bad; From 51100d2b87cba12b09db79fa6577adccc0c2d14f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 13 Sep 2018 13:58:24 -0400 Subject: [PATCH 006/658] NFSD generalize nfsd4_compound_state flag names Allow for sid_flag field non-stateid use. Signed-off-by: Andy Adamson --- fs/nfsd/nfs4proc.c | 8 ++++---- fs/nfsd/nfs4state.c | 7 ++++--- fs/nfsd/xdr4.h | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dee5aec098fd..8cc9f6e91874 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -531,9 +531,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_restorefh; fh_dup2(&cstate->current_fh, &cstate->save_fh); - if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) { + if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) { memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } return nfs_ok; } @@ -543,9 +543,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { fh_dup2(&cstate->save_fh, &cstate->current_fh); - if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { + if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) { memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG); } return nfs_ok; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 31b71aed4230..ed5e80b3ac1f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7930,7 +7930,8 @@ nfs4_state_shutdown(void) static void get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { - if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) + if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) && + CURRENT_STATEID(stateid)) memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); } @@ -7939,14 +7940,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (cstate->minorversion) { memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } } void clear_current_stateid(struct nfsd4_compound_state *cstate) { - CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } /* diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2937e06f3554..0b4fe0795b72 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -46,9 +46,9 @@ #define CURRENT_STATE_ID_FLAG (1<<0) #define SAVED_STATE_ID_FLAG (1<<1) -#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f)) -#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f)) -#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f)) +#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f)) +#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f)) +#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f)) struct nfsd4_compound_state { struct svc_fh current_fh; From b9e8638e3d9ed8334f1f7071e081860aac37e83e Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 7 Oct 2019 10:56:48 -0400 Subject: [PATCH 007/658] NFSD: allow inter server COPY to have a STALE source server fh The inter server to server COPY source server filehandle is a foreign filehandle as the COPY is sent to the destination server. Signed-off-by: Olga Kornievskaia --- fs/nfsd/Kconfig | 10 ++++++++ fs/nfsd/nfs4proc.c | 57 ++++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/nfsfh.h | 5 +++- fs/nfsd/xdr4.h | 1 + 4 files changed, 68 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f2f81561ebb6..f368f3215f88 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -134,6 +134,16 @@ config NFSD_FLEXFILELAYOUT If unsure, say N. +config NFSD_V4_2_INTER_SSC + bool "NFSv4.2 inter server to server COPY" + depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 + help + This option enables support for NFSv4.2 inter server to + server copy where the destination server calls the NFSv4.2 + client to read the data to copy from the source server. + + If unsure, say N. + config NFSD_V4_SECURITY_LABEL bool "Provide Security Label support for NFSv4 server" depends on NFSD_V4 && SECURITY diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8cc9f6e91874..816777c12bc7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -504,12 +504,20 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_putfh *putfh = &u->putfh; + __be32 ret; fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); + ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + if (ret == nfserr_stale && putfh->no_verify) { + SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN); + ret = 0; + } +#endif + return ret; } static __be32 @@ -1957,6 +1965,45 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, - rqstp->rq_auth_slack; } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +static void +check_if_stalefh_allowed(struct nfsd4_compoundargs *args) +{ + struct nfsd4_op *op, *current_op = NULL, *saved_op = NULL; + struct nfsd4_copy *copy; + struct nfsd4_putfh *putfh; + int i; + + /* traverse all operation and if it's a COPY compound, mark the + * source filehandle to skip verification + */ + for (i = 0; i < args->opcnt; i++) { + op = &args->ops[i]; + if (op->opnum == OP_PUTFH) + current_op = op; + else if (op->opnum == OP_SAVEFH) + saved_op = current_op; + else if (op->opnum == OP_RESTOREFH) + current_op = saved_op; + else if (op->opnum == OP_COPY) { + copy = (struct nfsd4_copy *)&op->u; + if (!saved_op) { + op->status = nfserr_nofilehandle; + return; + } + putfh = (struct nfsd4_putfh *)&saved_op->u; + if (!copy->cp_intra) + putfh->no_verify = true; + } + } +} +#else +static void +check_if_stalefh_allowed(struct nfsd4_compoundargs *args) +{ +} +#endif + /* * COMPOUND call. */ @@ -2005,6 +2052,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) resp->opcnt = 1; goto encode_op; } + check_if_stalefh_allowed(args); trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { @@ -2020,13 +2068,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; } - - if (!current_fh->fh_dentry) { + if (!current_fh->fh_dentry && + !HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) { if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (current_fh->fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export && + current_fh->fh_export->ex_fslocs.migrated && !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 755e256a9103..b9c75680bc31 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -35,7 +35,7 @@ typedef struct svc_fh { bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ - + int fh_flags; /* FH flags */ #ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ @@ -56,6 +56,9 @@ typedef struct svc_fh { #endif /* CONFIG_NFSD_V3 */ } svc_fh; +#define NFSD4_FH_FOREIGN (1<<0) +#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) +#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f)) enum nfsd_fsid { FSID_DEV = 0, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0b4fe0795b72..b16f602af8f1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -221,6 +221,7 @@ struct nfsd4_lookup { struct nfsd4_putfh { u32 pf_fhlen; /* request */ char *pf_fhval; /* request */ + bool no_verify; /* represents foreigh fh */ }; struct nfsd4_open { From ce0887ac96d35c7105090e166bb0807dc0a0e838 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 9 Oct 2019 11:50:48 -0400 Subject: [PATCH 008/658] NFSD add nfs4 inter ssc to nfsd4_copy Given a universal address, mount the source server from the destination server. Use an internal mount. Call the NFS client nfs42_ssc_open to obtain the NFS struct file suitable for nfsd_copy_range. Ability to do "inter" server-to-server depends on the an nfsd kernel parameter "inter_copy_offload_enable". Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4proc.c | 295 ++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/nfs4state.c | 15 ++- fs/nfsd/nfssvc.c | 6 + fs/nfsd/state.h | 3 + fs/nfsd/xdr4.h | 5 + 5 files changed, 296 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 816777c12bc7..fc72f5729732 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1144,6 +1144,208 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) while ((copy = nfsd4_get_copy(clp)) != NULL) nfsd4_stop_copy(copy); } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid); +extern void nfs42_ssc_close(struct file *filep); + +extern void nfs_sb_deactive(struct super_block *sb); + +#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" + +/** + * Support one copy source server for now. + */ +static __be32 +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, + struct vfsmount **mount) +{ + struct file_system_type *type; + struct vfsmount *ss_mnt; + struct nfs42_netaddr *naddr; + struct sockaddr_storage tmp_addr; + size_t tmp_addrlen, match_netid_len = 3; + char *startsep = "", *endsep = "", *match_netid = "tcp"; + char *ipaddr, *dev_name, *raw_data; + int len, raw_len, status = -EINVAL; + + naddr = &nss->u.nl4_addr; + tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, + naddr->addr_len, + (struct sockaddr *)&tmp_addr, + sizeof(tmp_addr)); + if (tmp_addrlen == 0) + goto out_err; + + if (tmp_addr.ss_family == AF_INET6) { + startsep = "["; + endsep = "]"; + match_netid = "tcp6"; + match_netid_len = 4; + } + + if (naddr->netid_len != match_netid_len || + strncmp(naddr->netid, match_netid, naddr->netid_len)) + goto out_err; + + /* Construct the raw data for the vfs_kern_mount call */ + len = RPC_MAX_ADDRBUFLEN + 1; + ipaddr = kzalloc(len, GFP_KERNEL); + if (!ipaddr) + goto out_err; + + rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len); + + /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/ + + raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr); + raw_data = kzalloc(raw_len, GFP_KERNEL); + if (!raw_data) + goto out_free_ipaddr; + + snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr); + + status = -ENODEV; + type = get_fs_type("nfs"); + if (!type) + goto out_free_rawdata; + + /* Set the server: for the vfs_kern_mount call */ + dev_name = kzalloc(len + 5, GFP_KERNEL); + if (!dev_name) + goto out_free_rawdata; + snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); + + /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ + ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); + module_put(type->owner); + if (IS_ERR(ss_mnt)) + goto out_free_devname; + + status = 0; + *mount = ss_mnt; + +out_free_devname: + kfree(dev_name); +out_free_rawdata: + kfree(raw_data); +out_free_ipaddr: + kfree(ipaddr); +out_err: + return status; +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ + nfs_sb_deactive(ss_mnt->mnt_sb); + mntput(ss_mnt); +} + +/** + * nfsd4_setup_inter_ssc + * + * Verify COPY destination stateid. + * Connect to the source server with NFSv4.1. + * Create the source struct file for nfsd_copy_range. + * Called with COPY cstate: + * SAVED_FH: source filehandle + * CURRENT_FH: destination filehandle + * + * Returns errno (not nfserrxxx) + */ +static __be32 +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy, struct vfsmount **mount) +{ + struct svc_fh *s_fh = NULL; + stateid_t *s_stid = ©->cp_src_stateid; + __be32 status = -EINVAL; + + /* Verify the destination stateid and set dst struct file*/ + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + ©->cp_dst_stateid, + WR_STATE, ©->nf_dst, NULL); + if (status) + goto out; + + status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + if (status) + goto out; + + s_fh = &cstate->save_fh; + + copy->c_fh.size = s_fh->fh_handle.fh_size; + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + copy->stateid.seqid = s_stid->si_generation; + memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, + sizeof(stateid_opaque_t)); + + status = 0; +out: + return status; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, + struct nfsd_file *dst) +{ + nfs42_ssc_close(src->nf_file); + nfsd_file_put(src); + nfsd_file_put(dst); + mntput(ss_mnt); +} + +#else /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy, + struct vfsmount **mount) +{ + *mount = NULL; + return -EINVAL; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, + struct nfsd_file *dst) +{ +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ +} + +static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid) +{ + return NULL; +} +#endif /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + return nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, + ©->nf_src, ©->cp_dst_stateid, + ©->nf_dst); +} + +static void +nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) +{ + nfsd_file_put(src); + nfsd_file_put(dst); +} static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { @@ -1209,12 +1411,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) status = nfs_ok; } - nfsd_file_put(copy->nf_src); - nfsd_file_put(copy->nf_dst); + if (!copy->cp_intra) /* Inter server SSC */ + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, + copy->nf_dst); + else + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); + return status; } -static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) { dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; @@ -1224,8 +1430,17 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->nf_src = nfsd_file_get(src->nf_src); + dst->cp_intra = src->cp_intra; + if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + dst->nf_src = nfsd_file_get(src->nf_src); + memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); + memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); + memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); + dst->ss_mnt = src->ss_mnt; + + return 0; } static void cleanup_async_copy(struct nfsd4_copy *copy) @@ -1244,7 +1459,25 @@ static int nfsd4_do_async_copy(void *data) struct nfsd4_copy *copy = (struct nfsd4_copy *)data; struct nfsd4_copy *cb_copy; + if (!copy->cp_intra) { /* Inter server SSC */ + copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); + if (!copy->nf_src) { + copy->nfserr = nfserr_serverfault; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(copy->nf_src->nf_file)) { + kfree(copy->nf_src); + copy->nfserr = nfserr_offload_denied; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + } + copy->nfserr = nfsd4_do_copy(copy, 0); +do_callback: cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!cb_copy) goto out; @@ -1256,6 +1489,8 @@ static int nfsd4_do_async_copy(void *data) &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); nfsd4_run_cb(&cb_copy->cp_cb); out: + if (!copy->cp_intra) + kfree(copy->nf_src); cleanup_async_copy(copy); return 0; } @@ -1268,11 +1503,20 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, - ©->nf_src, ©->cp_dst_stateid, - ©->nf_dst); - if (status) - goto out; + if (!copy->cp_intra) { /* Inter server SSC */ + if (!inter_copy_offload_enable || copy->cp_synchronous) { + status = nfserr_notsupp; + goto out; + } + status = nfsd4_setup_inter_ssc(rqstp, cstate, copy, + ©->ss_mnt); + if (status) + return nfserr_offload_denied; + } else { + status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); + if (status) + return status; + } copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, @@ -1283,15 +1527,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) - goto out; - if (!nfs4_init_copy_state(nn, copy)) { - kfree(async_copy); - goto out; - } + goto out_err; + if (!nfs4_init_copy_state(nn, copy)) + goto out_err; refcount_set(&async_copy->refcount, 1); memcpy(©->cp_res.cb_stateid, ©->cp_stateid, sizeof(copy->cp_stateid)); - dup_copy_fields(copy, async_copy); + status = dup_copy_fields(copy, async_copy); + if (status) + goto out_err; async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) @@ -1302,13 +1546,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_unlock(&async_copy->cp_clp->async_lock); wake_up_process(async_copy->copy_task); status = nfs_ok; - } else + } else { status = nfsd4_do_copy(copy, 1); + } out: return status; out_err: if (async_copy) cleanup_async_copy(async_copy); + status = nfserrno(-ENOMEM); + if (!copy->cp_intra) + nfsd4_interssc_disconnect(copy->ss_mnt); goto out; } @@ -1319,7 +1567,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1335,17 +1583,18 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp, union nfsd4_op_u *u) { struct nfsd4_offload_status *os = &u->offload_status; - __be32 status = 0; struct nfsd4_copy *copy; struct nfs4_client *clp = cstate->clp; copy = find_async_copy(clp, &os->stateid); - if (copy) - nfsd4_stop_copy(copy); - else - status = nfserr_bad_stateid; + if (!copy) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - return status; + return manage_cpntf_state(nn, &os->stateid, clp, NULL); + } else + nfsd4_stop_copy(copy); + + return nfs_ok; } static __be32 diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ed5e80b3ac1f..296765e693d0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5681,8 +5681,9 @@ _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) * copy stateid. Look up the copy notify stateid from the * idr structure and take a reference on it. */ -static __be32 _find_cpntf_state(struct nfsd_net *nn, stateid_t *st, - struct nfs4_cpntf_state **cps) +__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_client *clp, + struct nfs4_cpntf_state **cps) { copy_stateid_t *cps_t; struct nfs4_cpntf_state *state = NULL; @@ -5696,12 +5697,16 @@ static __be32 _find_cpntf_state(struct nfsd_net *nn, stateid_t *st, cp_stateid); if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) return nfserr_bad_stateid; - refcount_inc(&state->cp_stateid.sc_count); + if (!clp) + refcount_inc(&state->cp_stateid.sc_count); + else + _free_cpntf_state_locked(nn, state); } spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; - *cps = state; + if (!clp && state) + *cps = state; return 0; } @@ -5712,7 +5717,7 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_cpntf_state *cps = NULL; struct nfsd4_compound_state cstate; - status = _find_cpntf_state(nn, st, &cps); + status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e8bee8ff30c5..c720097a9149 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -31,6 +31,12 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC +bool inter_copy_offload_enable; +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 35eb7170aefc..ffc590de016b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -680,6 +680,9 @@ extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_client *clp, + struct nfs4_cpntf_state **cps); static inline void get_nfs4_file(struct nfs4_file *fi) { refcount_inc(&fi->fi_ref); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index b16f602af8f1..db63d39b1507 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -549,7 +549,12 @@ struct nfsd4_copy { struct task_struct *copy_task; refcount_t refcount; bool stopped; + + struct vfsmount *ss_mnt; + struct nfs_fh c_fh; + nfs4_stateid stateid; }; +extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ From 5277a79e2dee458f0185e4ebde1cd4e128f014e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Dec 2019 10:59:36 +0300 Subject: [PATCH 009/658] nfsd: unlock on error in manage_cpntf_state() We are holding the "nn->s2s_cp_lock" so we can't return directly without unlocking first. Fixes: f3dee17721a0 ("NFSD check stateids against copy stateids") Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 296765e693d0..390ad454a229 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5695,13 +5695,16 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) - return nfserr_bad_stateid; + if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { + state = NULL; + goto unlock; + } if (!clp) refcount_inc(&state->cp_stateid.sc_count); else _free_cpntf_state_locked(nn, state); } +unlock: spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; From 10db651210b2c618eb90a7fea4d6c5132bba7982 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:52 -0500 Subject: [PATCH 010/658] NFSD fix mismatching type in nfsd4_set_netaddr Fix __be32 and u32 mismatch in return and assignment. Reported-by: kbuild test robot Fixes: dbd4c2dd8f13 ("NFSD add COPY_NOTIFY operation") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 0ff6ef964a48..c679afd9fee9 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -388,7 +388,7 @@ void nfsd_lockd_shutdown(void); extern const u32 nfsd_suppattrs[3][3]; -static inline u32 nfsd4_set_netaddr(struct sockaddr *addr, +static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr, struct nfs42_netaddr *netaddr) { struct sockaddr_in *sin = (struct sockaddr_in *)addr; From 3f9544ca62bc13cf1c145d1deae7bf3270730e0a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:53 -0500 Subject: [PATCH 011/658] NFSD: fix seqid in copy stateid s_stid->si_generation is a u32, copy->stateid.seqid is a __be32, so we should be byte-swapping here if necessary. This effectively undoes the byte-swap performed when reading s_stid->s_generation in nfsd4_decode_copy(). Without this second swap, the stateid we sent to the source in READ could be different from the one the client provided us in the COPY. We didn't spot this in testing since our implementation always uses a 0 in the seqid field. But other implementations might not do that. You'd think we should just skip the byte-swapping entirely, but the s_stid field can be used for either our own stateids (in the intra-server case) or foreign stateids (in the inter-server case), and the former are interpreted by us and need byte-swapping. Reported-by: kbuild test robot Fixes: d5e54eeb0e3d ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index fc72f5729732..42fee1f94a84 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1280,7 +1280,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, copy->c_fh.size = s_fh->fh_handle.fh_size; memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); - copy->stateid.seqid = s_stid->si_generation; + copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); From b8290ca250fb773aeaf76995af84825e7509d0d3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:53 -0500 Subject: [PATCH 012/658] NFSD fix nfserro errno mismatch There is mismatch between __be32 and u32 in nfserr and errno. Reported-by: kbuild test robot Fixes: d5e54eeb0e3d ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 42fee1f94a84..d012f0894fab 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1169,7 +1169,8 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, size_t tmp_addrlen, match_netid_len = 3; char *startsep = "", *endsep = "", *match_netid = "tcp"; char *ipaddr, *dev_name, *raw_data; - int len, raw_len, status = -EINVAL; + int len, raw_len; + __be32 status = nfserr_inval; naddr = &nss->u.nl4_addr; tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, @@ -1207,7 +1208,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr); - status = -ENODEV; + status = nfserr_nodev; type = get_fs_type("nfs"); if (!type) goto out_free_rawdata; @@ -1253,8 +1254,6 @@ nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) * Called with COPY cstate: * SAVED_FH: source filehandle * CURRENT_FH: destination filehandle - * - * Returns errno (not nfserrxxx) */ static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, @@ -1263,7 +1262,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, { struct svc_fh *s_fh = NULL; stateid_t *s_stid = ©->cp_src_stateid; - __be32 status = -EINVAL; + __be32 status = nfserr_inval; /* Verify the destination stateid and set dst struct file*/ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -1308,7 +1307,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct vfsmount **mount) { *mount = NULL; - return -EINVAL; + return nfserr_inval; } static void From 2e577f0faca4640348c398cb85d60a1eedac4b1e Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:54 -0500 Subject: [PATCH 013/658] NFSD fixing possible null pointer derefering in copy offload Static checker revealed possible error path leading to possible NULL pointer dereferencing. Reported-by: Dan Carpenter Fixes: e0639dc5805a: ("NFSD introduce async copy feature") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d012f0894fab..d33c39c18cdd 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1446,7 +1446,8 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); - nfsd_file_put(copy->nf_src); + if (copy->cp_intra) + nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); list_del(©->copies); spin_unlock(©->cp_clp->async_lock); From d781e3df710745fbbaee4eb07fd5b64331a1b175 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 6 Dec 2019 16:07:32 -0500 Subject: [PATCH 014/658] nfsd4: avoid NULL deference on strange COPY compounds With cross-server COPY we've introduced the possibility that the current or saved filehandle might not have fh_dentry/fh_export filled in, but we missed a place that assumed it was. I think this could be triggered by a compound like: PUTFH(foreign filehandle) GETATTR SAVEFH COPY First, check_if_stalefh_allowed sets no_verify on the first (PUTFH) op. Then op_func = nfsd4_putfh runs and leaves current_fh->fh_export NULL. need_wrongsec_check returns true, since this PUTFH has OP_IS_PUTFH_LIKE set and GETATTR does not have OP_HANDLES_WRONGSEC set. We should probably also consider tightening the checks in check_if_stalefh_allowed and double-checking that we don't assume the filehandle is verified elsewhere in the compound. But I think this fixes the immediate issue. Reported-by: Dan Carpenter Fixes: 4e48f1cccab3 "NFSD: allow inter server COPY to have... " Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d33c39c18cdd..30c75b961d68 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2368,7 +2368,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_flags & OP_CLEAR_STATEID) clear_current_stateid(cstate); - if (need_wrongsec_check(rqstp)) + if (current_fh->fh_export && + need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } encode_op: From c807da539e8276cc1bd2fb08df5ed160e329b40a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 4 Dec 2019 13:29:36 +0100 Subject: [PATCH 015/658] i2c: remove unneeded 'extern' from function declatations According to coding-style.rst, extern should not be specified for exported functions. Signed-off-by: Luca Ceresoli Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 115 +++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 60 deletions(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 582ef05ec07e..f834687989f7 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -50,8 +50,8 @@ struct property_entry; * transmit an arbitrary number of messages without interruption. * @count must be be less than 64k since msg.len is u16. */ -extern int i2c_transfer_buffer_flags(const struct i2c_client *client, - char *buf, int count, u16 flags); +int i2c_transfer_buffer_flags(const struct i2c_client *client, + char *buf, int count, u16 flags); /** * i2c_master_recv - issue a single I2C message in master receive mode @@ -115,11 +115,9 @@ static inline int i2c_master_send_dmasafe(const struct i2c_client *client, /* Transfer num messages. */ -extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num); +int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); /* Unlocked flavor */ -extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num); +int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); /* This is the very generalized SMBus access routine. You probably do not want to use this, though; one of the functions below may be much easier, @@ -138,16 +136,14 @@ s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, /* Now follow the 'nice' access routines. These also document the calling conventions of i2c_smbus_xfer. */ -extern s32 i2c_smbus_read_byte(const struct i2c_client *client); -extern s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value); -extern s32 i2c_smbus_read_byte_data(const struct i2c_client *client, - u8 command); -extern s32 i2c_smbus_write_byte_data(const struct i2c_client *client, - u8 command, u8 value); -extern s32 i2c_smbus_read_word_data(const struct i2c_client *client, - u8 command); -extern s32 i2c_smbus_write_word_data(const struct i2c_client *client, - u8 command, u16 value); +s32 i2c_smbus_read_byte(const struct i2c_client *client); +s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value); +s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command); +s32 i2c_smbus_write_byte_data(const struct i2c_client *client, + u8 command, u8 value); +s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command); +s32 i2c_smbus_write_word_data(const struct i2c_client *client, + u8 command, u16 value); static inline s32 i2c_smbus_read_word_swapped(const struct i2c_client *client, u8 command) @@ -165,19 +161,18 @@ i2c_smbus_write_word_swapped(const struct i2c_client *client, } /* Returns the number of read bytes */ -extern s32 i2c_smbus_read_block_data(const struct i2c_client *client, - u8 command, u8 *values); -extern s32 i2c_smbus_write_block_data(const struct i2c_client *client, - u8 command, u8 length, const u8 *values); +s32 i2c_smbus_read_block_data(const struct i2c_client *client, + u8 command, u8 *values); +s32 i2c_smbus_write_block_data(const struct i2c_client *client, + u8 command, u8 length, const u8 *values); /* Returns the number of read bytes */ -extern s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, - u8 command, u8 length, u8 *values); -extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, - u8 command, u8 length, - const u8 *values); -extern s32 -i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, - u8 command, u8 length, u8 *values); +s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, + u8 command, u8 length, u8 *values); +s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, + u8 command, u8 length, const u8 *values); +s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, + u8 command, u8 length, + u8 *values); int i2c_get_device_id(const struct i2c_client *client, struct i2c_device_identity *id); #endif /* I2C */ @@ -337,10 +332,10 @@ struct i2c_client { }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) -extern struct i2c_client *i2c_verify_client(struct device *dev); -extern struct i2c_adapter *i2c_verify_adapter(struct device *dev); -extern const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, - const struct i2c_client *client); +struct i2c_client *i2c_verify_client(struct device *dev); +struct i2c_adapter *i2c_verify_adapter(struct device *dev); +const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, + const struct i2c_client *client); static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { @@ -369,9 +364,9 @@ enum i2c_slave_event { I2C_SLAVE_STOP, }; -extern int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb); -extern int i2c_slave_unregister(struct i2c_client *client); -extern bool i2c_detect_slave_mode(struct device *dev); +int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb); +int i2c_slave_unregister(struct i2c_client *client); +bool i2c_detect_slave_mode(struct device *dev); static inline int i2c_slave_event(struct i2c_client *client, enum i2c_slave_event event, u8 *val) @@ -440,10 +435,10 @@ struct i2c_board_info { * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. */ -extern struct i2c_client * +struct i2c_client * i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); -extern struct i2c_client * +struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); /* If you don't know the exact address of an I2C device, use this variant @@ -452,33 +447,33 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf * it must return 1 on successful probe, 0 otherwise. If it is not provided, * a default probing method is used. */ -extern struct i2c_client * +struct i2c_client * i2c_new_scanned_device(struct i2c_adapter *adap, struct i2c_board_info *info, unsigned short const *addr_list, int (*probe)(struct i2c_adapter *adap, unsigned short addr)); -extern struct i2c_client * +struct i2c_client * i2c_new_probed_device(struct i2c_adapter *adap, struct i2c_board_info *info, unsigned short const *addr_list, int (*probe)(struct i2c_adapter *adap, unsigned short addr)); /* Common custom probe functions */ -extern int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); +int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); -extern struct i2c_client * +struct i2c_client * i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address); -extern struct i2c_client * +struct i2c_client * devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address); -extern struct i2c_client * +struct i2c_client * i2c_new_ancillary_device(struct i2c_client *client, - const char *name, - u16 default_addr); + const char *name, + u16 default_addr); -extern void i2c_unregister_device(struct i2c_client *client); +void i2c_unregister_device(struct i2c_client *client); #endif /* I2C */ /* Mainboard arch_initcall() code should register all its I2C devices. @@ -486,7 +481,7 @@ extern void i2c_unregister_device(struct i2c_client *client); * Modules for add-on boards must use other calls. */ #ifdef CONFIG_I2C_BOARDINFO -extern int +int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n); #else @@ -840,12 +835,12 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) /* administration... */ #if IS_ENABLED(CONFIG_I2C) -extern int i2c_add_adapter(struct i2c_adapter *adap); -extern void i2c_del_adapter(struct i2c_adapter *adap); -extern int i2c_add_numbered_adapter(struct i2c_adapter *adap); +int i2c_add_adapter(struct i2c_adapter *adap); +void i2c_del_adapter(struct i2c_adapter *adap); +int i2c_add_numbered_adapter(struct i2c_adapter *adap); -extern int i2c_register_driver(struct module *owner, struct i2c_driver *driver); -extern void i2c_del_driver(struct i2c_driver *driver); +int i2c_register_driver(struct module *owner, struct i2c_driver *driver); +void i2c_del_driver(struct i2c_driver *driver); /* use a define to avoid include chaining to get THIS_MODULE */ #define i2c_add_driver(driver) \ @@ -858,12 +853,12 @@ static inline bool i2c_client_has_driver(struct i2c_client *client) /* call the i2c_client->command() of all attached clients with * the given arguments */ -extern void i2c_clients_command(struct i2c_adapter *adap, - unsigned int cmd, void *arg); +void i2c_clients_command(struct i2c_adapter *adap, + unsigned int cmd, void *arg); -extern struct i2c_adapter *i2c_get_adapter(int nr); -extern void i2c_put_adapter(struct i2c_adapter *adap); -extern unsigned int i2c_adapter_depth(struct i2c_adapter *adapter); +struct i2c_adapter *i2c_get_adapter(int nr); +void i2c_put_adapter(struct i2c_adapter *adap); +unsigned int i2c_adapter_depth(struct i2c_adapter *adapter); void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults); @@ -935,15 +930,15 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ -extern struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); +struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); /* must call put_device() when done with returned i2c_adapter device */ -extern struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node); +struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node); /* must call i2c_put_adapter() when done with returned i2c_adapter device */ struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node); -extern const struct of_device_id +const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, struct i2c_client *client); From 54a19fd4a6402ef47fce5c3a5374c71f52373c40 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Nov 2019 16:23:06 +0100 Subject: [PATCH 016/658] i2c: core: Use DEVICE_ATTR_*() helper macros Convert the i2c core sysfs attributes from DEVICE_ATTR() to DEVICE_ATTR_*(), to reduce boilerplate. This requires renaming some functions. Although no suitable macro exists for the delete_device attribute, rename i2c_sysfs_delete_device() to delete_device_store() for consistency. Signed-off-by: Geert Uytterhoeven Reviewed-by: Luca Ceresoli Reviewed-by: Kieran Bingham Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9f8dcd3f8385..a1eb28a3cc54 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -449,15 +449,15 @@ static void i2c_client_dev_release(struct device *dev) } static ssize_t -show_name(struct device *dev, struct device_attribute *attr, char *buf) +name_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", dev->type == &i2c_client_type ? to_i2c_client(dev)->name : to_i2c_adapter(dev)->name); } -static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); +static DEVICE_ATTR_RO(name); static ssize_t -show_modalias(struct device *dev, struct device_attribute *attr, char *buf) +modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = to_i2c_client(dev); int len; @@ -472,7 +472,7 @@ show_modalias(struct device *dev, struct device_attribute *attr, char *buf) return sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name); } -static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL); +static DEVICE_ATTR_RO(modalias); static struct attribute *i2c_dev_attrs[] = { &dev_attr_name.attr, @@ -1016,8 +1016,8 @@ EXPORT_SYMBOL_GPL(i2c_adapter_depth); * the user to provide incorrect parameters. */ static ssize_t -i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +new_device_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct i2c_adapter *adap = to_i2c_adapter(dev); struct i2c_board_info info; @@ -1072,7 +1072,7 @@ i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device); +static DEVICE_ATTR_WO(new_device); /* * And of course let the users delete the devices they instantiated, if @@ -1084,8 +1084,8 @@ static DEVICE_ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device); * the user to delete the wrong device. */ static ssize_t -i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +delete_device_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct i2c_adapter *adap = to_i2c_adapter(dev); struct i2c_client *client, *next; @@ -1128,7 +1128,7 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr, return res; } static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL, - i2c_sysfs_delete_device); + delete_device_store); static struct attribute *i2c_adapter_attrs[] = { &dev_attr_name.attr, From 8b183f5b85e0be4f94fa9309da5504bb7ba3367e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 5 Dec 2019 14:39:56 +0100 Subject: [PATCH 017/658] dt-bindings: i2c: rcar: Document r8a77961 support Document support for the I2C controller in the Renesas R-Car M3-W+ (R8A77961) SoC. Update all references to R-Car M3-W from "r8a7796" to "r8a77960", to avoid confusion between R-Car M3-W (R8A77960) and M3-W+. No driver update is needed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,i2c.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt index 0660a3eb2547..c359965d0724 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt @@ -17,7 +17,8 @@ Required properties: "renesas,i2c-r8a7793" if the device is a part of a R8A7793 SoC. "renesas,i2c-r8a7794" if the device is a part of a R8A7794 SoC. "renesas,i2c-r8a7795" if the device is a part of a R8A7795 SoC. - "renesas,i2c-r8a7796" if the device is a part of a R8A7796 SoC. + "renesas,i2c-r8a7796" if the device is a part of a R8A77960 SoC. + "renesas,i2c-r8a77961" if the device is a part of a R8A77961 SoC. "renesas,i2c-r8a77965" if the device is a part of a R8A77965 SoC. "renesas,i2c-r8a77970" if the device is a part of a R8A77970 SoC. "renesas,i2c-r8a77980" if the device is a part of a R8A77980 SoC. From 15c0677162b77636899ac668f3bc2055c941b9b7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 5 Dec 2019 14:40:25 +0100 Subject: [PATCH 018/658] dt-bindings: i2c: sh_mobile: Document r8a77961 support Document support for the IIC controller in the Renesas R-Car M3-W+ (R8A77961) SoC. No driver update is needed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,iic.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt index 64d11ffb07c4..ffe085c9947e 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,iic.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,iic.txt @@ -17,6 +17,7 @@ Required properties: - "renesas,iic-r8a7794" (R-Car E2) - "renesas,iic-r8a7795" (R-Car H3) - "renesas,iic-r8a7796" (R-Car M3-W) + - "renesas,iic-r8a77961" (R-Car M3-W+) - "renesas,iic-r8a77965" (R-Car M3-N) - "renesas,iic-r8a77990" (R-Car E3) - "renesas,iic-sh73a0" (SH-Mobile AG5) From 09a80f2aef06b7c86143f5c14efd3485e0d2c139 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Dec 2019 12:33:33 -0500 Subject: [PATCH 019/658] nfsd: Return the correct number of bytes written to the file We must allow for the fact that iov_iter_write() could have returned a short write (e.g. if there was an ENOSPC issue). Fixes: d890be159a71 "nfsd: Add I/O trace points in the NFSv4 write path" Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c0dc491537a6..f0bca0e87d0c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -975,6 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; + *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); From fc5fc5d7cc9e0c7d2ca5c751cf5450c6bb635e57 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Thu, 19 Dec 2019 17:29:20 +0800 Subject: [PATCH 020/658] nfsd4: Remove unneeded semicolon Fixes coccicheck warning: fs/nfsd/nfs4state.c:3376:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 390ad454a229..be141136aec2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3422,7 +3422,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir) case NFS4_CDFC4_BACK_OR_BOTH: *dir = NFS4_CDFC4_BOTH; return nfs_ok; - }; + } return nfserr_inval; } From 57f64034966fb945fc958f95f0c51e47af590344 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Dec 2019 14:57:23 -0500 Subject: [PATCH 021/658] nfsd: Clone should commit src file metadata too vfs_clone_file_range() can modify the metadata on the source file too, so we need to commit that to stable storage as well. Reported-by: Dave Chinner Signed-off-by: Trond Myklebust Acked-by: Dave Chinner Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f0bca0e87d0c..82cf80dde5c7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -280,19 +280,25 @@ out: * Commit metadata changes to stable storage. */ static int -commit_metadata(struct svc_fh *fhp) +commit_inode_metadata(struct inode *inode) { - struct inode *inode = d_inode(fhp->fh_dentry); const struct export_operations *export_ops = inode->i_sb->s_export_op; - if (!EX_ISSYNC(fhp->fh_export)) - return 0; - if (export_ops->commit_metadata) return export_ops->commit_metadata(inode); return sync_inode_metadata(inode, 1); } +static int +commit_metadata(struct svc_fh *fhp) +{ + struct inode *inode = d_inode(fhp->fh_dentry); + + if (!EX_ISSYNC(fhp->fh_export)) + return 0; + return commit_inode_metadata(inode); +} + /* * Go over the attributes and take care of the small differences between * NFS semantics and what Linux expects. @@ -537,6 +543,9 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, if (sync) { loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + + if (!status) + status = commit_inode_metadata(file_inode(src)); if (status < 0) return nfserrno(status); } From fc1b20659597015a30e8ea032f168e97848c1d39 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Tue, 17 Dec 2019 16:50:47 -0600 Subject: [PATCH 022/658] nfsd: remove unnecessary assertion in nfsd4_encode_replay The replay variable is set in the only caller of nfsd4_encode_replay. The assertion is unnecessary and the patch removes this check. Signed-off-by: Aditya Pakki Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1d8a08b84e32..85949fd6ae7f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4649,8 +4649,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) __be32 *p; struct nfs4_replay *rp = op->replay; - BUG_ON(!rp); - p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); if (!p) { WARN_ON_ONCE(1); From b3f255ef6bffc18a28c3b6295357f2a3380c033f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Oct 2017 11:25:34 +0200 Subject: [PATCH 023/658] nfsd: use ktime_get_seconds() for timestamps The delegation logic in nfsd uses the somewhat inefficient seconds_since_boot() function to record time intervals. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index be141136aec2..7bba1e150a4a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -854,7 +854,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid) static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; - time_t swap_time; + time64_t swap_time; int new; /* index into 'set' */ DECLARE_BITMAP(set[2], 256); } blocked_delegations; @@ -866,15 +866,15 @@ static int delegation_blocked(struct knfsd_fh *fh) if (bd->entries == 0) return 0; - if (seconds_since_boot() - bd->swap_time > 30) { + if (ktime_get_seconds() - bd->swap_time > 30) { spin_lock(&blocked_delegations_lock); - if (seconds_since_boot() - bd->swap_time > 30) { + if (ktime_get_seconds() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; memset(bd->set[bd->new], 0, sizeof(bd->set[0])); bd->new = 1-bd->new; - bd->swap_time = seconds_since_boot(); + bd->swap_time = ktime_get_seconds(); } spin_unlock(&blocked_delegations_lock); } @@ -904,7 +904,7 @@ static void block_delegations(struct knfsd_fh *fh) __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) - bd->swap_time = seconds_since_boot(); + bd->swap_time = ktime_get_seconds(); bd->entries += 1; spin_unlock(&blocked_delegations_lock); } From e29f470396a7447764e73289fd63a4591a0b443b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 14:52:43 +0100 Subject: [PATCH 024/658] nfsd: print 64-bit timestamps in client_info_show The nii_time field gets truncated to 'time_t' on 32-bit architectures before printing. Remove the use of 'struct timespec' to product the correct output beyond 2038. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 ++--- fs/nfsd/state.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7bba1e150a4a..27e1c12618b8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2341,7 +2341,7 @@ static int client_info_show(struct seq_file *m, void *v) clp->cl_nii_domain.len); seq_printf(m, "\nImplementation name: "); seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); - seq_printf(m, "\nImplementation time: [%ld, %ld]\n", + seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } drop_client(clp); @@ -2995,8 +2995,7 @@ static __be32 copy_impl_id(struct nfs4_client *clp, xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); if (!clp->cl_nii_name.data) return nfserr_jukebox; - clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec; - clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec; + clp->cl_nii_time = exid->nii_time; return 0; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ffc590de016b..6cf442033786 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -340,7 +340,7 @@ struct nfs4_client { /* NFSv4.1 client implementation id: */ struct xdr_netobj cl_nii_domain; struct xdr_netobj cl_nii_name; - struct timespec cl_nii_time; + struct timespec64 cl_nii_time; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; From 92c5e46911c0c1b20fa2294298aaf008c3afe975 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 14:55:32 +0100 Subject: [PATCH 025/658] nfsd: handle nfs3 timestamps as unsigned The decode_time3 function behaves differently on 32-bit and 64-bit architectures: on the former, a 32-bit timestamp gets converted into an signed number and then into a timestamp between 1902 and 2038, while on the latter it is interpreted as unsigned in the range 1970-2106. Change all the remaining 'timespec' in nfsd to 'timespec64' to make the behavior the same, and use the current interpretation of the dominant 64-bit architectures. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 20 ++++++++------------ fs/nfsd/nfsfh.h | 4 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 195ab7a0fc89..c997b710af27 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -32,14 +32,14 @@ static u32 nfs3_ftypes[] = { * XDR functions for basic NFS types */ static __be32 * -encode_time3(__be32 *p, struct timespec *time) +encode_time3(__be32 *p, struct timespec64 *time) { *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); return p; } static __be32 * -decode_time3(__be32 *p, struct timespec *time) +decode_time3(__be32 *p, struct timespec64 *time) { time->tv_sec = ntohl(*p++); time->tv_nsec = ntohl(*p++); @@ -167,7 +167,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct timespec ts; *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); @@ -183,12 +182,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl((u32) MINOR(stat->rdev)); p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, stat->ino); - ts = timespec64_to_timespec(stat->atime); - p = encode_time3(p, &ts); - ts = timespec64_to_timespec(stat->mtime); - p = encode_time3(p, &ts); - ts = timespec64_to_timespec(stat->ctime); - p = encode_time3(p, &ts); + p = encode_time3(p, &stat->atime); + p = encode_time3(p, &stat->mtime); + p = encode_time3(p, &stat->ctime); return p; } @@ -277,8 +273,8 @@ void fill_pre_wcc(struct svc_fh *fhp) stat.size = inode->i_size; } - fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime); - fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime); + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; fhp->fh_pre_size = stat.size; fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_saved = true; @@ -330,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if ((args->check_guard = ntohl(*p++)) != 0) { - struct timespec time; + struct timespec64 time; p = decode_time3(p, &time); args->guardtime = time.tv_sec; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index b9c75680bc31..56cfbc361561 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -42,8 +42,8 @@ typedef struct svc_fh { /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ - struct timespec fh_pre_mtime; /* mtime before oper */ - struct timespec fh_pre_ctime; /* ctime before oper */ + struct timespec64 fh_pre_mtime; /* mtime before oper */ + struct timespec64 fh_pre_ctime; /* ctime before oper */ /* * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) * to find out if it is valid. From e4598e38ee7e708d751081799f27d3f11a3e687d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 15:14:03 +0100 Subject: [PATCH 026/658] nfsd: use timespec64 in encode_time_delta The values in encode_time_delta are always small and don't overflow the range of 'struct timespec', so changing it has no effect. Change it to timespec64 as a prerequisite for removing the timespec definition later. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 85949fd6ae7f..9761512674a0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2099,11 +2099,11 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, */ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) { - struct timespec ts; + struct timespec64 ts; u32 ns; ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); - ts = ns_to_timespec(ns); + ts = ns_to_timespec64(ns); p = xdr_encode_hyper(p, ts.tv_sec); *p++ = cpu_to_be32(ts.tv_nsec); From 9cc7680149b238223bbb8bcf3272f6bd47f1fbfb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 15:53:13 +0100 Subject: [PATCH 027/658] nfsd: make 'boot_time' 64-bit wide The local boot time variable gets truncated to time_t at the moment, which can lead to slightly odd behavior on 32-bit architectures. Use ktime_get_real_seconds() instead of get_seconds() to always get a 64-bit result, and keep it that way wherever possible. It still gets truncated in a few places: - When assigning to cl_clientid.cl_boot, this is already documented and is only used as a unique identifier. - In clients_still_reclaiming(), the truncation is to 'unsigned long' in order to use the 'time_before() helper. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4recover.c | 8 ++++---- fs/nfsd/nfs4state.c | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 9a4ef815fb8c..29bbe28eda53 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -40,7 +40,7 @@ struct nfsd_net { struct lock_manager nfsd4_manager; bool grace_ended; - time_t boot_time; + time64_t boot_time; /* internal mount of the "nfsd" pseudofilesystem: */ struct vfsmount *nfsd_mnt; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2481e7662128..a8fb18609146 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1445,7 +1445,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn) } cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; - cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; + cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); if (!ret) ret = cup->cu_u.cu_msg.cm_status; @@ -1782,7 +1782,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp) } static char * -nfsd4_cltrack_grace_start(time_t grace_start) +nfsd4_cltrack_grace_start(time64_t grace_start) { int copied; size_t len; @@ -1795,7 +1795,7 @@ nfsd4_cltrack_grace_start(time_t grace_start) if (!result) return result; - copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", + copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld", grace_start); if (copied >= len) { /* just return nothing if output was truncated */ @@ -2004,7 +2004,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) char *legacy; char timestr[22]; /* FIXME: better way to determine max size? */ - sprintf(timestr, "%ld", nn->boot_time); + sprintf(timestr, "%lld", nn->boot_time); legacy = nfsd4_cltrack_legacy_topdir(); nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); kfree(legacy); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 27e1c12618b8..a178374bacd5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -748,7 +748,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, { int new_id; - stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time; + stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; stid->sc_type = sc_type; @@ -1911,7 +1911,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) */ if (clid->cl_boot == (u32)nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", + dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n", clid->cl_boot, clid->cl_id, nn->boot_time); return 1; } @@ -2271,7 +2271,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { - clp->cl_clientid.cl_boot = nn->boot_time; + clp->cl_clientid.cl_boot = (u32)nn->boot_time; clp->cl_clientid.cl_id = nn->clientid_counter++; gen_confirm(clp, nn); } @@ -5233,9 +5233,9 @@ nfsd4_end_grace(struct nfsd_net *nn) */ static bool clients_still_reclaiming(struct nfsd_net *nn) { - unsigned long now = get_seconds(); - unsigned long double_grace_period_end = nn->boot_time + - 2 * nn->nfsd4_lease; + unsigned long now = (unsigned long) ktime_get_real_seconds(); + unsigned long double_grace_period_end = (unsigned long)nn->boot_time + + 2 * (unsigned long)nn->nfsd4_lease; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == @@ -7792,7 +7792,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; - nn->boot_time = get_seconds(); + nn->boot_time = ktime_get_real_seconds(); nn->grace_ended = false; nn->nfsd4_manager.block_opens = true; INIT_LIST_HEAD(&nn->nfsd4_manager.list); From 2a1aa48929af78bfe8c4d4fe4b1ed8996af47cee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 17:50:18 +0100 Subject: [PATCH 028/658] nfsd: pass a 64-bit guardtime to nfsd_setattr() Guardtime handling in nfs3 differs between 32-bit and 64-bit architectures, and uses the deprecated time_t type. Change it to using time64_t, which behaves the same way on 64-bit and 32-bit architectures, treating the number as an unsigned 32-bit entity with a range of year 1970 to 2106 consistently, and avoiding the y2038 overflow. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/nfsproc.c | 4 ++-- fs/nfsd/vfs.c | 4 ++-- fs/nfsd/vfs.h | 2 +- fs/nfsd/xdr3.h | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 30c75b961d68..f39cac6e08db 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -984,7 +984,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, - 0, (time_t)0); + 0, (time64_t)0); out: fh_drop_write(&cstate->current_fh); return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a178374bacd5..36bbb18abf77 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4721,7 +4721,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); + return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c83ddac22f38..aa013b736073 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -113,7 +113,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0); + nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); done: return nfsd_return_attrs(nfserr, resp); } @@ -380,7 +380,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0); + nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0); } out_unlock: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 82cf80dde5c7..fe8c5d0e8c5f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -364,7 +364,7 @@ out_nfserrno: */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) + int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; @@ -1133,7 +1133,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) - return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); + return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); /* Callers expect file metadata to be committed here */ return nfserrno(commit_metadata(resfhp)); } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index cc110a10bfe8..bbb485177b25 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -48,7 +48,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time_t); + struct iattr *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 99ff9f403ff1..0fa12988fb6a 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -14,7 +14,7 @@ struct nfsd3_sattrargs { struct svc_fh fh; struct iattr attrs; int check_guard; - time_t guardtime; + time64_t guardtime; }; struct nfsd3_diropargs { From b6356d420296e4fd5045ab3b404bfeb9f659ce08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 18:06:52 +0100 Subject: [PATCH 029/658] nfsd: use time64_t in nfsd_proc_setattr() check Change to time64_t and ktime_get_real_seconds() to make the logic work correctly on 32-bit architectures beyond 2038. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index aa013b736073..b25c90be29fb 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -94,7 +94,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) * Solaris, at least, doesn't seem to care what the time * request is. We require it be within 30 minutes of now. */ - time_t delta = iap->ia_atime.tv_sec - get_seconds(); + time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds(); nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); if (nfserr) From 2561c92b12f4f4e386d453556685f75775c0938b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 22:32:20 +0100 Subject: [PATCH 030/658] nfsd: fix delay timer on 32-bit architectures The nfsd4_cb_layout_done() function takes a 'time_t' value, multiplied by NSEC_PER_SEC*2 to get a nanosecond value. This works fine on 64-bit architectures, but on 32-bit, any value over 1 second results in a signed integer overflow with unexpected results. Cast one input to a 64-bit type in order to produce the same result that we have on 64-bit architectures, regarless of the type of nfsd4_lease. Fixes: 6b9b21073d3b ("nfsd: give up on CB_LAYOUTRECALLs after two lease periods") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4layouts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2681c70283ce..e12409eca7cc 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ From 9594497f2c78993cb66b696122f7c65528ace985 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 14:43:17 +0100 Subject: [PATCH 031/658] nfsd: fix jiffies/time_t mixup in LRU list The nfsd4_blocked_lock->nbl_time timestamp is recorded in jiffies, but then compared to a CLOCK_REALTIME timestamp later on, which makes no sense. For consistency with the other timestamps, change this to use a time_t. This is a change in behavior, which may cause regressions, but the current code is not sensible. On a system with CONFIG_HZ=1000, the 'time_after((unsigned long)nbl->nbl_time, (unsigned long)cutoff))' check is false for roughly the first 18 days of uptime and then true for the next 49 days. Fixes: 7919d0a27f1e ("nfsd: add a LRU list for blocked locks") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/state.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 36bbb18abf77..f680b1530f38 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6700,7 +6700,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = get_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6cf442033786..df6bd2cb257f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -626,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; From 20b7d86f29d39e8ae19bb29c24ffee70dc385ddf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 16:31:52 +0100 Subject: [PATCH 032/658] nfsd: use boottime for lease expiry calculation A couple of time_t variables are only used to track the state of the lease time and its expiration. The code correctly uses the 'time_after()' macro to make this work on 32-bit architectures even beyond year 2038, but the get_seconds() function and the time_t type itself are deprecated as they behave inconsistently between 32-bit and 64-bit architectures and often lead to code that is not y2038 safe. As a minor issue, using get_seconds() leads to problems with concurrent settimeofday() or clock_settime() calls, in the worst case timeout never triggering after the time has been set backwards. Change nfsd to use time64_t and ktime_get_boottime_seconds() here. This is clearly excessive, as boottime by itself means we never go beyond 32 bits, but it does mean we handle this correctly and consistently without having to worry about corner cases and should be no more expensive than the previous implementation on 64-bit architectures. The max_cb_time() function gets changed in order to avoid an expensive 64-bit division operation, but as the lease time is at most one hour, there is no change in behavior. Also do the same for server-to-server copy expiration time. Signed-off-by: Arnd Bergmann [bfields@redhat.com: fix up copy expiration] Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 4 ++-- fs/nfsd/nfs4callback.c | 11 +++++++++- fs/nfsd/nfs4state.c | 48 +++++++++++++++++++----------------------- fs/nfsd/nfsctl.c | 6 +++--- fs/nfsd/state.h | 10 ++++----- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 29bbe28eda53..2baf32311e00 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -92,8 +92,8 @@ struct nfsd_net { bool in_grace; const struct nfsd4_client_tracking_ops *client_tracking_ops; - time_t nfsd4_lease; - time_t nfsd4_grace; + time64_t nfsd4_lease; + time64_t nfsd4_grace; bool somebody_reclaimed; bool track_reclaim_completes; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 24534db87e86..c3b11a715082 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -823,7 +823,16 @@ static const struct rpc_program cb_program = { static int max_cb_time(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - return max(nn->nfsd4_lease/10, (time_t)1) * HZ; + + /* + * nfsd4_lease is set to at most one hour in __nfsd4_write_time, + * so we can use 32-bit math on it. Warn if that assumption + * ever stops being true. + */ + if (WARN_ON_ONCE(nn->nfsd4_lease > 3600)) + return 360 * HZ; + + return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ; } static struct workqueue_struct *callback_wq; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f680b1530f38..1ad68b49a101 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -171,7 +171,7 @@ renew_client_locked(struct nfs4_client *clp) clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); list_move_tail(&clp->cl_lru, &nn->client_lru); - clp->cl_time = get_seconds(); + clp->cl_time = ktime_get_boottime_seconds(); } static void put_client_renew_locked(struct nfs4_client *clp) @@ -776,7 +776,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); if (!cps) return NULL; - cps->cpntf_time = get_seconds(); + cps->cpntf_time = ktime_get_boottime_seconds(); refcount_set(&cps->cp_stateid.sc_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; @@ -2661,7 +2661,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, gen_clid(clp, nn); kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); - clp->cl_time = get_seconds(); + clp->cl_time = ktime_get_boottime_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); @@ -4331,7 +4331,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) last = oo->oo_last_closed_stid; oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); - oo->oo_time = get_seconds(); + oo->oo_time = ktime_get_boottime_seconds(); spin_unlock(&nn->client_lock); if (last) nfs4_put_stid(&last->st_stid); @@ -4426,7 +4426,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) */ spin_lock(&state_lock); if (dp->dl_time == 0) { - dp->dl_time = get_seconds(); + dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } spin_unlock(&state_lock); @@ -5233,9 +5233,8 @@ nfsd4_end_grace(struct nfsd_net *nn) */ static bool clients_still_reclaiming(struct nfsd_net *nn) { - unsigned long now = (unsigned long) ktime_get_real_seconds(); - unsigned long double_grace_period_end = (unsigned long)nn->boot_time + - 2 * (unsigned long)nn->nfsd4_lease; + time64_t double_grace_period_end = nn->boot_time + + 2 * nn->nfsd4_lease; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == @@ -5248,12 +5247,12 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) * If we've given them *two* lease times to reclaim, and they're * still not done, give up: */ - if (time_after(now, double_grace_period_end)) + if (ktime_get_boottime_seconds() > double_grace_period_end) return false; return true; } -static time_t +static time64_t nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; @@ -5262,8 +5261,8 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - nn->nfsd4_lease; - time_t t, new_timeo = nn->nfsd4_lease; + time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; + time64_t t, new_timeo = nn->nfsd4_lease; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; @@ -5282,8 +5281,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - !time_after((unsigned long)cps->cpntf_time, - (unsigned long)cutoff)) + cps->cpntf_time > cutoff) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); @@ -5291,7 +5289,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { + if (clp->cl_time > cutoff) { t = clp->cl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5314,7 +5312,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { + if (dp->dl_time > cutoff) { t = dp->dl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5334,8 +5332,7 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (time_after((unsigned long)oo->oo_time, - (unsigned long)cutoff)) { + if (oo->oo_time > cutoff) { t = oo->oo_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5365,8 +5362,7 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (time_after((unsigned long)nbl->nbl_time, - (unsigned long)cutoff)) { + if (nbl->nbl_time > cutoff) { t = nbl->nbl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5383,7 +5379,7 @@ nfs4_laundromat(struct nfsd_net *nn) free_blocked_lock(nbl); } out: - new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); + new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); return new_timeo; } @@ -5393,13 +5389,13 @@ static void laundromat_main(struct work_struct *); static void laundromat_main(struct work_struct *laundry) { - time_t t; + time64_t t; struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); t = nfs4_laundromat(nn); - dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); + dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } @@ -5723,7 +5719,7 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (status) return status; - cps->cpntf_time = get_seconds(); + cps->cpntf_time = ktime_get_boottime_seconds(); memset(&cstate, 0, sizeof(cstate)); status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); if (status) @@ -6700,7 +6696,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = get_seconds(); + nbl->nbl_time = ktime_get_boottime_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); @@ -7861,7 +7857,7 @@ nfs4_state_start_net(struct net *net) nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) goto skip_grace; - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", + printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 11b42c523f04..aace740d5a92 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -956,7 +956,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, - time_t *time, struct nfsd_net *nn) + time64_t *time, struct nfsd_net *nn) { char *mesg = buf; int rv, i; @@ -984,11 +984,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, *time = i; } - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time); } static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, - time_t *time, struct nfsd_net *nn) + time64_t *time, struct nfsd_net *nn) { ssize_t rv; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index df6bd2cb257f..68d3f30ee760 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -121,7 +121,7 @@ struct nfs4_cpntf_state { struct list_head cp_list; /* per parent nfs4_stid */ stateid_t cp_p_stateid; /* copy of parent's stateid */ clientid_t cp_p_clid; /* copy of parent's clid */ - time_t cpntf_time; /* last time stateid used */ + time64_t cpntf_time; /* last time stateid used */ }; /* @@ -152,7 +152,7 @@ struct nfs4_delegation { struct list_head dl_recall_lru; /* delegation recalled */ struct nfs4_clnt_odstate *dl_clnt_odstate; u32 dl_type; - time_t dl_time; + time64_t dl_time; /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; @@ -330,7 +330,7 @@ struct nfs4_client { #endif struct xdr_netobj cl_name; /* id generated by client */ nfs4_verifier cl_verifier; /* generated by client */ - time_t cl_time; /* time of last lease renewal */ + time64_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ @@ -469,7 +469,7 @@ struct nfs4_openowner { */ struct list_head oo_close_lru; struct nfs4_ol_stateid *oo_last_closed_stid; - time_t oo_time; /* time of placement on so_close_lru */ + time64_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 unsigned char oo_flags; }; @@ -626,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - time_t nbl_time; + time64_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; From 9104ae494e67305c11f5793b82cc768e0f09cea9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 16:45:30 +0100 Subject: [PATCH 033/658] nfsd: use ktime_get_real_seconds() in nfs4_verifier gen_confirm() generates a unique identifier based on the current time. This overflows in year 2038, but that is harmless since it generally does not lead to duplicates, as long as the time has been initialized by a real-time clock or NTP. Using ktime_get_boottime_seconds() or ktime_get_seconds() would avoid the overflow, but it would be more likely to result in non-unique numbers. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1ad68b49a101..65cfe9ab47be 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2264,7 +2264,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy */ - verf[0] = (__force __be32)get_seconds(); + verf[0] = (__force __be32)(u32)ktime_get_real_seconds(); verf[1] = (__force __be32)nn->clverifier_counter++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } From 364d5814b9dd40f6219e7673466c2e18becd62ca Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 16:51:55 +0100 Subject: [PATCH 034/658] nfsd: remove nfs4_reset_lease() declarations The function was removed a long time ago, but the declaration and a dummy implementation are still there, referencing the deprecated time_t type. Remove both. Fixes: f958a1320ff7 ("nfsd4: remove unnecessary lease-setting function") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index c679afd9fee9..2ab5569126b8 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -143,7 +143,6 @@ int nfs4_state_start(void); int nfs4_state_start_net(struct net *net); void nfs4_state_shutdown(void); void nfs4_state_shutdown_net(struct net *net); -void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); @@ -154,7 +153,6 @@ static inline int nfs4_state_start(void) { return 0; } static inline int nfs4_state_start_net(struct net *net) { return 0; } static inline void nfs4_state_shutdown(void) { } static inline void nfs4_state_shutdown_net(struct net *net) { } -static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } static inline char * nfs4_recoverydir(void) {return NULL; } static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) From 567ec716efab543c55a0d1abf2303f193ea9d544 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 19 Dec 2019 11:55:58 +0100 Subject: [PATCH 035/658] eeprom: at24: update the license tag The current GPL v2.0 or later SPDX tag is 'GPL-2.0-or-later' as defined at https://spdx.org/licenses/. Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 0681d5fdd538..1d7270bbf29f 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0-or-later /* * at24.c - handle most I2C EEPROMs * From 69afc4b623088665677be09ea59627aaf080bff7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 2 Jan 2020 17:56:32 +0100 Subject: [PATCH 036/658] eeprom: at24: sort headers alphabetically For consistency and easier maintenance: sort the headers alphabetically. Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 1d7270bbf29f..ffecabd5d527 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -6,23 +6,23 @@ * Copyright (C) 2008 Wolfram Sang, Pengutronix */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* Address pointer is 16 bit. */ #define AT24_FLAG_ADDR16 BIT(7) From 348001433fad54033af24af837451d7a6563bf33 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 6 Mar 2019 23:15:35 +0000 Subject: [PATCH 037/658] i2c: mux: pca9541: use the BIT macro Because it looks nice! Reviewed-by: Guenter Roeck Reviewed-by: Vladimir Zapolskiy Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-pca9541.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c index 50e1fb4aedf5..6daec8d3d331 100644 --- a/drivers/i2c/muxes/i2c-mux-pca9541.c +++ b/drivers/i2c/muxes/i2c-mux-pca9541.c @@ -16,6 +16,7 @@ * warranty of any kind, whether express or implied. */ +#include #include #include #include @@ -42,20 +43,20 @@ #define PCA9541_CONTROL 0x01 #define PCA9541_ISTAT 0x02 -#define PCA9541_CTL_MYBUS (1 << 0) -#define PCA9541_CTL_NMYBUS (1 << 1) -#define PCA9541_CTL_BUSON (1 << 2) -#define PCA9541_CTL_NBUSON (1 << 3) -#define PCA9541_CTL_BUSINIT (1 << 4) -#define PCA9541_CTL_TESTON (1 << 6) -#define PCA9541_CTL_NTESTON (1 << 7) +#define PCA9541_CTL_MYBUS BIT(0) +#define PCA9541_CTL_NMYBUS BIT(1) +#define PCA9541_CTL_BUSON BIT(2) +#define PCA9541_CTL_NBUSON BIT(3) +#define PCA9541_CTL_BUSINIT BIT(4) +#define PCA9541_CTL_TESTON BIT(6) +#define PCA9541_CTL_NTESTON BIT(7) -#define PCA9541_ISTAT_INTIN (1 << 0) -#define PCA9541_ISTAT_BUSINIT (1 << 1) -#define PCA9541_ISTAT_BUSOK (1 << 2) -#define PCA9541_ISTAT_BUSLOST (1 << 3) -#define PCA9541_ISTAT_MYTEST (1 << 6) -#define PCA9541_ISTAT_NMYTEST (1 << 7) +#define PCA9541_ISTAT_INTIN BIT(0) +#define PCA9541_ISTAT_BUSINIT BIT(1) +#define PCA9541_ISTAT_BUSOK BIT(2) +#define PCA9541_ISTAT_BUSLOST BIT(3) +#define PCA9541_ISTAT_MYTEST BIT(6) +#define PCA9541_ISTAT_NMYTEST BIT(7) #define BUSON (PCA9541_CTL_BUSON | PCA9541_CTL_NBUSON) #define MYBUS (PCA9541_CTL_MYBUS | PCA9541_CTL_NMYBUS) From 0d1569a763faef9df0f08c19c31c6aea84a302ff Mon Sep 17 00:00:00 2001 From: Biwen Li Date: Wed, 25 Dec 2019 18:36:22 +0800 Subject: [PATCH 038/658] dt-bindings: i2c: support property idle-state This supports property idle-state Reviewed-by: Rob Herring Signed-off-by: Biwen Li Signed-off-by: Peter Rosin --- Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt index 30ac6a60f041..7abda506b828 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt @@ -25,6 +25,8 @@ Required Properties: Optional Properties: - reset-gpios: Reference to the GPIO connected to the reset input. + - idle-state: if present, overrides i2c-mux-idle-disconnect, + Please refer to Documentation/devicetree/bindings/mux/mux-controller.txt - i2c-mux-idle-disconnect: Boolean; if defined, forces mux to disconnect all children in idle state. This is necessary for example, if there are several multiplexers on the bus and the devices behind them use same I2C addresses. From e65e228eb0963be32180db3039f0a1467b426aae Mon Sep 17 00:00:00 2001 From: Biwen Li Date: Wed, 25 Dec 2019 18:36:23 +0800 Subject: [PATCH 039/658] i2c: mux: pca954x: support property idle-state This supports property idle-state,if present, overrides i2c-mux-idle-disconnect. My use cases: - Use the property idle-state to fix an errata on LS2085ARDB and LS2088ARDB. - Errata id: E-00013(board LS2085ARDB and LS2088ARDB revision on Rev.B, Rev.C and Rev.D). - About E-00013: - Description: I2C1 and I2C3 buses are missing pull-up. - Impact: When the PCA954x device is tri-stated, the I2C bus will float. This makes the I2C bus and its associated downstream devices inaccessible. - Hardware fix: Populate resistors R189 and R190 for I2C1 and resistors R228 and R229 for I2C3. - Software fix: Remove the tri-state option from the PCA954x driver(PCA954x always on enable status, specify a channel zero in dts to fix the errata E-00013). Tested-by: Ioana Ciornei Signed-off-by: Biwen Li Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-pca954x.c | 69 +++++++++++++++++++---------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 923aa3a5a3dc..a0d926ae3f86 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -86,7 +86,7 @@ struct pca954x { u8 last_chan; /* last register value */ /* MUX_IDLE_AS_IS, MUX_IDLE_DISCONNECT or >= 0 for channel */ - s8 idle_state; + s32 idle_state; struct i2c_client *client; @@ -229,20 +229,23 @@ static int pca954x_reg_write(struct i2c_adapter *adap, I2C_SMBUS_BYTE, &dummy); } +static u8 pca954x_regval(struct pca954x *data, u8 chan) +{ + /* We make switches look like muxes, not sure how to be smarter. */ + if (data->chip->muxtype == pca954x_ismux) + return chan | data->chip->enable; + else + return 1 << chan; +} + static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) { struct pca954x *data = i2c_mux_priv(muxc); struct i2c_client *client = data->client; - const struct chip_desc *chip = data->chip; u8 regval; int ret = 0; - /* we make switches look like muxes, not sure how to be smarter */ - if (chip->muxtype == pca954x_ismux) - regval = chan | chip->enable; - else - regval = 1 << chan; - + regval = pca954x_regval(data, chan); /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { ret = pca954x_reg_write(muxc->parent, client, regval); @@ -256,7 +259,7 @@ static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan) { struct pca954x *data = i2c_mux_priv(muxc); struct i2c_client *client = data->client; - s8 idle_state; + s32 idle_state; idle_state = READ_ONCE(data->idle_state); if (idle_state >= 0) @@ -402,6 +405,22 @@ static void pca954x_cleanup(struct i2c_mux_core *muxc) i2c_mux_del_adapters(muxc); } +static int pca954x_init(struct i2c_client *client, struct pca954x *data) +{ + int ret; + + if (data->idle_state >= 0) + data->last_chan = pca954x_regval(data, data->idle_state); + else + data->last_chan = 0; /* Disconnect multiplexer */ + + ret = i2c_smbus_write_byte(client, data->last_chan); + if (ret < 0) + data->last_chan = 0; + + return ret; +} + /* * I2C init/probing/exit functions */ @@ -411,7 +430,6 @@ static int pca954x_probe(struct i2c_client *client, struct i2c_adapter *adap = client->adapter; struct device *dev = &client->dev; struct device_node *np = dev->of_node; - bool idle_disconnect_dt; struct gpio_desc *gpio; struct i2c_mux_core *muxc; struct pca954x *data; @@ -462,23 +480,24 @@ static int pca954x_probe(struct i2c_client *client, } } - /* Write the mux register at addr to verify + data->idle_state = MUX_IDLE_AS_IS; + if (of_property_read_u32(np, "idle-state", &data->idle_state)) { + if (np && of_property_read_bool(np, "i2c-mux-idle-disconnect")) + data->idle_state = MUX_IDLE_DISCONNECT; + } + + /* + * Write the mux register at addr to verify * that the mux is in fact present. This also - * initializes the mux to disconnected state. + * initializes the mux to a channel + * or disconnected state. */ - if (i2c_smbus_write_byte(client, 0) < 0) { + ret = pca954x_init(client, data); + if (ret < 0) { dev_warn(dev, "probe failed\n"); return -ENODEV; } - data->last_chan = 0; /* force the first selection */ - data->idle_state = MUX_IDLE_AS_IS; - - idle_disconnect_dt = np && - of_property_read_bool(np, "i2c-mux-idle-disconnect"); - if (idle_disconnect_dt) - data->idle_state = MUX_IDLE_DISCONNECT; - ret = pca954x_irq_setup(muxc); if (ret) goto fail_cleanup; @@ -530,9 +549,13 @@ static int pca954x_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct i2c_mux_core *muxc = i2c_get_clientdata(client); struct pca954x *data = i2c_mux_priv(muxc); + int ret; - data->last_chan = 0; - return i2c_smbus_write_byte(client, 0); + ret = pca954x_init(client, data); + if (ret < 0) + dev_err(&client->dev, "failed to verify mux presence\n"); + + return ret; } #endif From 384a7ccaa37dff7c84123547dcfd98e2dd4144ba Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 25 Dec 2019 11:19:34 +0800 Subject: [PATCH 040/658] nfsd: use true,false for bool variable in vfs.c Fixes coccicheck warning: fs/nfsd/vfs.c:1389:5-13: WARNING: Assignment of 0/1 to bool variable fs/nfsd/vfs.c:1398:5-13: WARNING: Assignment of 0/1 to bool variable fs/nfsd/vfs.c:1415:2-10: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fe8c5d0e8c5f..3aeffb26fca5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1396,7 +1396,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; break; } /* fall through */ @@ -1405,7 +1405,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; goto set_attr; } /* fall through */ @@ -1422,7 +1422,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } if (created) - *created = 1; + *created = true; nfsd_check_ignore_resizing(iap); From 500c248171408b80a6c87d278085ad1c8be762c2 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 25 Dec 2019 11:19:35 +0800 Subject: [PATCH 041/658] nfsd: use true,false for bool variable in nfs4proc.c Fixes coccicheck warning: fs/nfsd/nfs4proc.c:235:1-18: WARNING: Assignment of 0/1 to bool variable fs/nfsd/nfs4proc.c:368:1-17: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f39cac6e08db..1e14b3ed5674 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -233,7 +233,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru if (!*resfh) return nfserr_jukebox; fh_init(*resfh, NFS4_FHSIZE); - open->op_truncate = 0; + open->op_truncate = false; if (open->op_create) { /* FIXME: check session persistence and pnfs flags. @@ -366,7 +366,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; - open->op_created = 0; + open->op_created = false; /* * RFC5661 18.51.3 * Before RECLAIM_COMPLETE done, server should deny new lock From e44b4bf2648ef05ff1cde9f354079bce58a5ebde Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 25 Dec 2019 11:19:36 +0800 Subject: [PATCH 042/658] nfsd: use true,false for bool variable in nfssvc.c Fixes coccicheck warning: fs/nfsd/nfssvc.c:394:2-14: WARNING: Assignment of 0/1 to bool variable fs/nfsd/nfssvc.c:407:2-14: WARNING: Assignment of 0/1 to bool variable fs/nfsd/nfssvc.c:422:2-14: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c720097a9149..7900f3494ecb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -397,7 +397,7 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre ret = lockd_up(net, cred); if (ret) goto out_socks; - nn->lockd_up = 1; + nn->lockd_up = true; } ret = nfs4_state_start_net(net); @@ -410,7 +410,7 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre out_lockd: if (nn->lockd_up) { lockd_down(net); - nn->lockd_up = 0; + nn->lockd_up = false; } out_socks: nfsd_shutdown_generic(); @@ -425,7 +425,7 @@ static void nfsd_shutdown_net(struct net *net) nfs4_state_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); - nn->lockd_up = 0; + nn->lockd_up = false; } nn->nfsd_net_up = false; nfsd_shutdown_generic(); From a44c03b5879ca0da079ccb03b378d79299e2afd5 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 9 Dec 2019 10:19:59 +0000 Subject: [PATCH 043/658] dt-bindings: i2c: at91: cosmetic formatting of compatible list Format the list of compatibles with one compatible per line. Suggested-by: Peter Rosin Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-at91.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-at91.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt index 2210f4359c45..d35cd638d054 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-at91.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt @@ -1,10 +1,16 @@ I2C for Atmel platforms Required properties : -- compatible : Must be "atmel,at91rm9200-i2c", "atmel,at91sam9261-i2c", - "atmel,at91sam9260-i2c", "atmel,at91sam9g20-i2c", "atmel,at91sam9g10-i2c", - "atmel,at91sam9x5-i2c", "atmel,sama5d4-i2c", "atmel,sama5d2-i2c" or - "microchip,sam9x60-i2c" +- compatible : Must be one of: + "atmel,at91rm9200-i2c", + "atmel,at91sam9261-i2c", + "atmel,at91sam9260-i2c", + "atmel,at91sam9g20-i2c", + "atmel,at91sam9g10-i2c", + "atmel,at91sam9x5-i2c", + "atmel,sama5d4-i2c", + "atmel,sama5d2-i2c", + "microchip,sam9x60-i2c". - reg: physical base address of the controller and length of memory mapped region. - interrupts: interrupt number to the cpu. From 42f36457f98dbf329748231ae24499f99117fcb1 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 9 Dec 2019 10:20:05 +0000 Subject: [PATCH 044/658] i2c: at91: remote default value initialization Platform data structs are initialized by default with zero values. Thus it becomes redundant to initialize them manually to zero (false). Remove extra false initialization for field members in structs. Reported-by: Wolfram Sang Signed-off-by: Eugen Hristev Reviewed-by: Ludovic Desroches Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-at91-core.c | 39 ------------------------------ 1 file changed, 39 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91-core.c b/drivers/i2c/busses/i2c-at91-core.c index e13af4874976..2b10fa1f8a18 100644 --- a/drivers/i2c/busses/i2c-at91-core.c +++ b/drivers/i2c/busses/i2c-at91-core.c @@ -66,55 +66,26 @@ static struct at91_twi_pdata at91rm9200_config = { .clk_max_div = 5, .clk_offset = 3, .has_unre_flag = true, - .has_alt_cmd = false, - .has_hold_field = false, - .has_dig_filtr = false, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static struct at91_twi_pdata at91sam9261_config = { .clk_max_div = 5, .clk_offset = 4, - .has_unre_flag = false, - .has_alt_cmd = false, - .has_hold_field = false, - .has_dig_filtr = false, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static struct at91_twi_pdata at91sam9260_config = { .clk_max_div = 7, .clk_offset = 4, - .has_unre_flag = false, - .has_alt_cmd = false, - .has_hold_field = false, - .has_dig_filtr = false, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static struct at91_twi_pdata at91sam9g20_config = { .clk_max_div = 7, .clk_offset = 4, - .has_unre_flag = false, - .has_alt_cmd = false, - .has_hold_field = false, - .has_dig_filtr = false, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static struct at91_twi_pdata at91sam9g10_config = { .clk_max_div = 7, .clk_offset = 4, - .has_unre_flag = false, - .has_alt_cmd = false, - .has_hold_field = false, - .has_dig_filtr = false, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static const struct platform_device_id at91_twi_devtypes[] = { @@ -142,23 +113,13 @@ static const struct platform_device_id at91_twi_devtypes[] = { static struct at91_twi_pdata at91sam9x5_config = { .clk_max_div = 7, .clk_offset = 4, - .has_unre_flag = false, - .has_alt_cmd = false, - .has_hold_field = false, - .has_dig_filtr = false, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static struct at91_twi_pdata sama5d4_config = { .clk_max_div = 7, .clk_offset = 4, - .has_unre_flag = false, - .has_alt_cmd = false, .has_hold_field = true, .has_dig_filtr = true, - .has_adv_dig_filtr = false, - .has_ana_filtr = false, }; static struct at91_twi_pdata sama5d2_config = { From bc0757a51c5a3ef4e76ba6d22310a6e49bc4d0e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Thu, 12 Dec 2019 01:12:50 +0100 Subject: [PATCH 045/658] i2c: tiny-usb: Correct I2C fault codes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes the I2C fault codes according to the specified values in Documentation/i2c/fault-codes. Signed-off-by: Christoph Müllner Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tiny-usb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index 43e3603489ee..7279ca0eaa2d 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -84,7 +84,7 @@ static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) pmsg->buf, pmsg->len) != pmsg->len) { dev_err(&adapter->dev, "failure reading data\n"); - ret = -EREMOTEIO; + ret = -EIO; goto out; } } else { @@ -94,7 +94,7 @@ static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) pmsg->buf, pmsg->len) != pmsg->len) { dev_err(&adapter->dev, "failure writing data\n"); - ret = -EREMOTEIO; + ret = -EIO; goto out; } } @@ -102,13 +102,13 @@ static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) /* read status */ if (usb_read(adapter, CMD_GET_STATUS, 0, 0, pstatus, 1) != 1) { dev_err(&adapter->dev, "failure reading status\n"); - ret = -EREMOTEIO; + ret = -EIO; goto out; } dev_dbg(&adapter->dev, " status = %d\n", *pstatus); if (*pstatus == STATUS_ADDRESS_NAK) { - ret = -EREMOTEIO; + ret = -ENXIO; goto out; } } From f4b5af78cb7e87983898b4de5a7e4c2b68ac653b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?= Date: Tue, 17 Dec 2019 16:14:09 +0800 Subject: [PATCH 046/658] dt-bindings: I2C: Add X1000 bindings. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the I2C bindings for the X1000 Soc from Ingenic. Signed-off-by: 周琰杰 (Zhou Yanjie) Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-jz4780.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt index 3738cfbf863f..d229eff5ca1b 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt @@ -1,7 +1,9 @@ * Ingenic JZ4780 I2C Bus controller Required properties: -- compatible: should be "ingenic,jz4780-i2c" +- compatible: should be one of the following: + - "ingenic,jz4780-i2c" for the JZ4780 + - "ingenic,x1000-i2c" for the X1000 - reg: Should contain the address & size of the I2C controller registers. - interrupts: Should specify the interrupt provided by parent. - clocks: Should contain a single clock specifier for the JZ4780 I2C clock. From 21575a7a8d4c2593ca8d77b3793b35ab3464d99f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?= Date: Tue, 17 Dec 2019 16:14:10 +0800 Subject: [PATCH 047/658] I2C: JZ4780: Add support for the X1000. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for probing i2c driver on the X1000 Soc from Ingenic. call the corresponding fifo parameter according to the device model obtained from the devicetree. Signed-off-by: 周琰杰 (Zhou Yanjie) Acked-by: Paul Cercueil Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 156 ++++++++++++++++++++++++-------- 1 file changed, 116 insertions(+), 40 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 25dcd73acd63..16a67a64284a 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -4,6 +4,7 @@ * * Copyright (C) 2006 - 2009 Ingenic Semiconductor Inc. * Copyright (C) 2015 Imagination Technologies + * Copyright (C) 2019 周琰杰 (Zhou Yanjie) */ #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +57,7 @@ #define JZ4780_I2C_ACKGC 0x98 #define JZ4780_I2C_ENSTA 0x9C #define JZ4780_I2C_SDAHD 0xD0 +#define X1000_I2C_SDAHD 0x7C #define JZ4780_I2C_CTRL_STPHLD BIT(7) #define JZ4780_I2C_CTRL_SLVDIS BIT(6) @@ -73,6 +76,8 @@ #define JZ4780_I2C_STA_TFNF BIT(1) #define JZ4780_I2C_STA_ACT BIT(0) +#define X1000_I2C_DC_STOP BIT(9) + static const char * const jz4780_i2c_abrt_src[] = { "ABRT_7B_ADDR_NOACK", "ABRT_10ADDR1_NOACK", @@ -130,18 +135,33 @@ static const char * const jz4780_i2c_abrt_src[] = { #define JZ4780_I2CFLCNT_ADJUST(n) (((n) - 1) < 8 ? 8 : ((n) - 1)) #define JZ4780_I2C_FIFO_LEN 16 -#define TX_LEVEL 3 -#define RX_LEVEL (JZ4780_I2C_FIFO_LEN - TX_LEVEL - 1) + +#define X1000_I2C_FIFO_LEN 64 #define JZ4780_I2C_TIMEOUT 300 #define BUFSIZE 200 +enum ingenic_i2c_version { + ID_JZ4780, + ID_X1000, +}; + +/* ingenic_i2c_config: SoC specific config data. */ +struct ingenic_i2c_config { + enum ingenic_i2c_version version; + + int fifosize; + int tx_level; + int rx_level; +}; + struct jz4780_i2c { void __iomem *iomem; int irq; struct clk *clk; struct i2c_adapter adap; + const struct ingenic_i2c_config *cdata; /* lock to protect rbuf and wbuf between xfer_rd/wr and irq handler */ spinlock_t lock; @@ -340,11 +360,18 @@ static int jz4780_i2c_set_speed(struct jz4780_i2c *i2c) if (hold_time >= 0) { /*i2c hold time enable */ - hold_time |= JZ4780_I2C_SDAHD_HDENB; - jz4780_i2c_writew(i2c, JZ4780_I2C_SDAHD, hold_time); + if (i2c->cdata->version >= ID_X1000) { + jz4780_i2c_writew(i2c, X1000_I2C_SDAHD, hold_time); + } else { + hold_time |= JZ4780_I2C_SDAHD_HDENB; + jz4780_i2c_writew(i2c, JZ4780_I2C_SDAHD, hold_time); + } } else { /* disable hold time */ - jz4780_i2c_writew(i2c, JZ4780_I2C_SDAHD, 0); + if (i2c->cdata->version >= ID_X1000) + jz4780_i2c_writew(i2c, X1000_I2C_SDAHD, 0); + else + jz4780_i2c_writew(i2c, JZ4780_I2C_SDAHD, 0); } return 0; @@ -359,9 +386,11 @@ static int jz4780_i2c_cleanup(struct jz4780_i2c *i2c) spin_lock_irqsave(&i2c->lock, flags); /* can send stop now if need */ - tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); - tmp &= ~JZ4780_I2C_CTRL_STPHLD; - jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + if (i2c->cdata->version < ID_X1000) { + tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); + tmp &= ~JZ4780_I2C_CTRL_STPHLD; + jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + } /* disable all interrupts first */ jz4780_i2c_writew(i2c, JZ4780_I2C_INTM, 0); @@ -399,11 +428,19 @@ static int jz4780_i2c_prepare(struct jz4780_i2c *i2c) return jz4780_i2c_enable(i2c); } -static void jz4780_i2c_send_rcmd(struct jz4780_i2c *i2c, int cmd_count) +static void jz4780_i2c_send_rcmd(struct jz4780_i2c *i2c, + int cmd_count, + int cmd_left) { int i; - for (i = 0; i < cmd_count; i++) + for (i = 0; i < cmd_count - 1; i++) + jz4780_i2c_writew(i2c, JZ4780_I2C_DC, JZ4780_I2C_DC_READ); + + if ((cmd_left == 0) && (i2c->cdata->version >= ID_X1000)) + jz4780_i2c_writew(i2c, JZ4780_I2C_DC, + JZ4780_I2C_DC_READ | X1000_I2C_DC_STOP); + else jz4780_i2c_writew(i2c, JZ4780_I2C_DC, JZ4780_I2C_DC_READ); } @@ -458,37 +495,44 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id) rd_left = i2c->rd_total_len - i2c->rd_data_xfered; - if (rd_left <= JZ4780_I2C_FIFO_LEN) + if (rd_left <= i2c->cdata->fifosize) jz4780_i2c_writew(i2c, JZ4780_I2C_RXTL, rd_left - 1); } if (intst & JZ4780_I2C_INTST_TXEMP) { if (i2c->is_write == 0) { int cmd_left = i2c->rd_total_len - i2c->rd_cmd_xfered; - int max_send = (JZ4780_I2C_FIFO_LEN - 1) + int max_send = (i2c->cdata->fifosize - 1) - (i2c->rd_cmd_xfered - i2c->rd_data_xfered); int cmd_to_send = min(cmd_left, max_send); if (i2c->rd_cmd_xfered != 0) cmd_to_send = min(cmd_to_send, - JZ4780_I2C_FIFO_LEN - - TX_LEVEL - 1); + i2c->cdata->fifosize + - i2c->cdata->tx_level - 1); if (cmd_to_send) { - jz4780_i2c_send_rcmd(i2c, cmd_to_send); i2c->rd_cmd_xfered += cmd_to_send; + cmd_left = i2c->rd_total_len - + i2c->rd_cmd_xfered; + jz4780_i2c_send_rcmd(i2c, + cmd_to_send, cmd_left); + } - cmd_left = i2c->rd_total_len - i2c->rd_cmd_xfered; if (cmd_left == 0) { intmsk = jz4780_i2c_readw(i2c, JZ4780_I2C_INTM); intmsk &= ~JZ4780_I2C_INTM_MTXEMP; jz4780_i2c_writew(i2c, JZ4780_I2C_INTM, intmsk); - tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); - tmp &= ~JZ4780_I2C_CTRL_STPHLD; - jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + if (i2c->cdata->version < ID_X1000) { + tmp = jz4780_i2c_readw(i2c, + JZ4780_I2C_CTRL); + tmp &= ~JZ4780_I2C_CTRL_STPHLD; + jz4780_i2c_writew(i2c, + JZ4780_I2C_CTRL, tmp); + } } } else { unsigned short data; @@ -497,23 +541,26 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id) i2c_sta = jz4780_i2c_readw(i2c, JZ4780_I2C_STA); while ((i2c_sta & JZ4780_I2C_STA_TFNF) && - (i2c->wt_len > 0)) { + (i2c->wt_len > 0)) { i2c_sta = jz4780_i2c_readw(i2c, JZ4780_I2C_STA); data = *i2c->wbuf; data &= ~JZ4780_I2C_DC_READ; - jz4780_i2c_writew(i2c, JZ4780_I2C_DC, - data); + if ((!i2c->stop_hold) && (i2c->cdata->version >= + ID_X1000)) + data |= X1000_I2C_DC_STOP; + jz4780_i2c_writew(i2c, JZ4780_I2C_DC, data); i2c->wbuf++; i2c->wt_len--; } if (i2c->wt_len == 0) { - if (!i2c->stop_hold) { + if ((!i2c->stop_hold) && (i2c->cdata->version < + ID_X1000)) { tmp = jz4780_i2c_readw(i2c, - JZ4780_I2C_CTRL); + JZ4780_I2C_CTRL); tmp &= ~JZ4780_I2C_CTRL_STPHLD; - jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, - tmp); + jz4780_i2c_writew(i2c, + JZ4780_I2C_CTRL, tmp); } jz4780_i2c_trans_done(i2c); @@ -567,20 +614,22 @@ static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, i2c->rd_data_xfered = 0; i2c->rd_cmd_xfered = 0; - if (len <= JZ4780_I2C_FIFO_LEN) + if (len <= i2c->cdata->fifosize) jz4780_i2c_writew(i2c, JZ4780_I2C_RXTL, len - 1); else - jz4780_i2c_writew(i2c, JZ4780_I2C_RXTL, RX_LEVEL); + jz4780_i2c_writew(i2c, JZ4780_I2C_RXTL, i2c->cdata->rx_level); - jz4780_i2c_writew(i2c, JZ4780_I2C_TXTL, TX_LEVEL); + jz4780_i2c_writew(i2c, JZ4780_I2C_TXTL, i2c->cdata->tx_level); jz4780_i2c_writew(i2c, JZ4780_I2C_INTM, JZ4780_I2C_INTM_MRXFL | JZ4780_I2C_INTM_MTXEMP | JZ4780_I2C_INTM_MTXABT | JZ4780_I2C_INTM_MRXOF); - tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); - tmp |= JZ4780_I2C_CTRL_STPHLD; - jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + if (i2c->cdata->version < ID_X1000) { + tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); + tmp |= JZ4780_I2C_CTRL_STPHLD; + jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + } spin_unlock_irqrestore(&i2c->lock, flags); @@ -626,14 +675,16 @@ static inline int jz4780_i2c_xfer_write(struct jz4780_i2c *i2c, i2c->wbuf = buf; i2c->wt_len = len; - jz4780_i2c_writew(i2c, JZ4780_I2C_TXTL, TX_LEVEL); + jz4780_i2c_writew(i2c, JZ4780_I2C_TXTL, i2c->cdata->tx_level); jz4780_i2c_writew(i2c, JZ4780_I2C_INTM, JZ4780_I2C_INTM_MTXEMP | JZ4780_I2C_INTM_MTXABT); - tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); - tmp |= JZ4780_I2C_CTRL_STPHLD; - jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + if (i2c->cdata->version < ID_X1000) { + tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); + tmp |= JZ4780_I2C_CTRL_STPHLD; + jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + } spin_unlock_irqrestore(&i2c->lock, flags); @@ -716,8 +767,25 @@ static const struct i2c_algorithm jz4780_i2c_algorithm = { .functionality = jz4780_i2c_functionality, }; +static const struct ingenic_i2c_config jz4780_i2c_config = { + .version = ID_JZ4780, + + .fifosize = JZ4780_I2C_FIFO_LEN, + .tx_level = JZ4780_I2C_FIFO_LEN / 2, + .rx_level = JZ4780_I2C_FIFO_LEN / 2 - 1, +}; + +static const struct ingenic_i2c_config x1000_i2c_config = { + .version = ID_X1000, + + .fifosize = X1000_I2C_FIFO_LEN, + .tx_level = X1000_I2C_FIFO_LEN / 2, + .rx_level = X1000_I2C_FIFO_LEN / 2 - 1, +}; + static const struct of_device_id jz4780_i2c_of_matches[] = { - { .compatible = "ingenic,jz4780-i2c", }, + { .compatible = "ingenic,jz4780-i2c", .data = &jz4780_i2c_config }, + { .compatible = "ingenic,x1000-i2c", .data = &x1000_i2c_config }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, jz4780_i2c_of_matches); @@ -734,6 +802,12 @@ static int jz4780_i2c_probe(struct platform_device *pdev) if (!i2c) return -ENOMEM; + i2c->cdata = device_get_match_data(&pdev->dev); + if (!i2c->cdata) { + dev_err(&pdev->dev, "Error: No device match found\n"); + return -ENODEV; + } + i2c->adap.owner = THIS_MODULE; i2c->adap.algo = &jz4780_i2c_algorithm; i2c->adap.algo_data = i2c; @@ -777,9 +851,11 @@ static int jz4780_i2c_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Bus frequency is %d KHz\n", i2c->speed); - tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); - tmp &= ~JZ4780_I2C_CTRL_STPHLD; - jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + if (i2c->cdata->version < ID_X1000) { + tmp = jz4780_i2c_readw(i2c, JZ4780_I2C_CTRL); + tmp &= ~JZ4780_I2C_CTRL_STPHLD; + jz4780_i2c_writew(i2c, JZ4780_I2C_CTRL, tmp); + } jz4780_i2c_writew(i2c, JZ4780_I2C_INTM, 0x0); From 14f4957313fbf575c7bbd37d45cb148e11fdbc20 Mon Sep 17 00:00:00 2001 From: Khouloud Touil Date: Tue, 7 Jan 2020 10:29:18 +0100 Subject: [PATCH 048/658] dt-bindings: nvmem: new optional property wp-gpios Several memories have a write-protect pin, that when pulled high, it blocks the write operation. On some boards, this pin is connected to a GPIO and pulled high by default, which forces the user to manually change its state before writing. Instead of modifying all the memory drivers to check this pin, make the NVMEM subsystem check if the write-protect GPIO being passed through the nvmem_config or defined in the device tree and pull it low whenever writing to the memory. Add a new optional property to the device tree binding document, which allows to specify the GPIO line to which the write-protect pin is connected. Signed-off-by: Khouloud Touil Reviewed-by: Linus Walleij Reviewed-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/nvmem/nvmem.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml index 1c75a059206c..b43c6c65294e 100644 --- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml +++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml @@ -34,6 +34,14 @@ properties: description: Mark the provider as read only. + wp-gpios: + description: + GPIO to which the write-protect pin of the chip is connected. + The write-protect GPIO is asserted, when it's driven high + (logical '1') to block the write operation. It's deasserted, + when it's driven low (logical '0') to allow writing. + maxItems: 1 + patternProperties: "^.*@[0-9a-f]+$": type: object @@ -63,9 +71,12 @@ patternProperties: examples: - | + #include + qfprom: eeprom@700000 { #address-cells = <1>; #size-cells = <1>; + wp-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>; /* ... */ From 2a127da461a9d8d97782d6e82b227041393eb4d2 Mon Sep 17 00:00:00 2001 From: Khouloud Touil Date: Tue, 7 Jan 2020 10:29:19 +0100 Subject: [PATCH 049/658] nvmem: add support for the write-protect pin The write-protect pin handling looks like a standard property that could benefit other users if available in the core nvmem framework. Instead of modifying all the memory drivers to check this pin, make the NVMEM subsystem check if the write-protect GPIO being passed through the nvmem_config or defined in the device tree and pull it low whenever writing to the memory. There was a suggestion for introducing the gpiodesc from pdata, but as pdata is already removed it could be replaced by adding it to nvmem_config. Reference: https://lists.96boards.org/pipermail/dev/2018-August/001056.html Signed-off-by: Khouloud Touil Reviewed-by: Linus Walleij Acked-by: Srinivas Kandagatla Signed-off-by: Bartosz Golaszewski --- drivers/nvmem/core.c | 19 +++++++++++++++++-- drivers/nvmem/nvmem.h | 2 ++ include/linux/nvmem-provider.h | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 9f1ee9c766ec..3e1c94c4eee8 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include "nvmem.h" @@ -54,8 +55,14 @@ static int nvmem_reg_read(struct nvmem_device *nvmem, unsigned int offset, static int nvmem_reg_write(struct nvmem_device *nvmem, unsigned int offset, void *val, size_t bytes) { - if (nvmem->reg_write) - return nvmem->reg_write(nvmem->priv, offset, val, bytes); + int ret; + + if (nvmem->reg_write) { + gpiod_set_value_cansleep(nvmem->wp_gpio, 0); + ret = nvmem->reg_write(nvmem->priv, offset, val, bytes); + gpiod_set_value_cansleep(nvmem->wp_gpio, 1); + return ret; + } return -EINVAL; } @@ -338,6 +345,14 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) kfree(nvmem); return ERR_PTR(rval); } + if (config->wp_gpio) + nvmem->wp_gpio = config->wp_gpio; + else + nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp", + GPIOD_OUT_HIGH); + if (IS_ERR(nvmem->wp_gpio)) + return PTR_ERR(nvmem->wp_gpio); + kref_init(&nvmem->refcnt); INIT_LIST_HEAD(&nvmem->cells); diff --git a/drivers/nvmem/nvmem.h b/drivers/nvmem/nvmem.h index eb8ed7121fa3..be0d66d75c8a 100644 --- a/drivers/nvmem/nvmem.h +++ b/drivers/nvmem/nvmem.h @@ -9,6 +9,7 @@ #include #include #include +#include struct nvmem_device { struct module *owner; @@ -26,6 +27,7 @@ struct nvmem_device { struct list_head cells; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; + struct gpio_desc *wp_gpio; void *priv; }; diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index fe051323be0a..6d6f8e5d24c9 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -11,6 +11,7 @@ #include #include +#include struct nvmem_device; struct nvmem_cell_info; @@ -45,6 +46,7 @@ enum nvmem_type { * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. * @priv: User context passed to read/write callbacks. + * @wp-gpio: Write protect pin * * Note: A default "nvmem" name will be assigned to the device if * no name is specified in its configuration. In such case "" is @@ -58,6 +60,7 @@ struct nvmem_config { const char *name; int id; struct module *owner; + struct gpio_desc *wp_gpio; const struct nvmem_cell_info *cells; int ncells; enum nvmem_type type; From 6c57a664c4871549ceb85ccde9600bd5287dd319 Mon Sep 17 00:00:00 2001 From: Khouloud Touil Date: Tue, 7 Jan 2020 10:29:20 +0100 Subject: [PATCH 050/658] dt-bindings: at24: make wp-gpios a reference to the property defined by nvmem NVMEM framework is an interface for the at24 EEPROMs as well as for other drivers, instead of passing the wp-gpios over the different drivers each time, it would be better to pass it over the NVMEM subsystem once and for all. Making wp-gpios a reference to the property defined by nvmem. Signed-off-by: Khouloud Touil Reviewed-by: Linus Walleij Reviewed-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index e8778560d966..767959941399 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -145,10 +145,7 @@ properties: over reads to the next slave address. Please consult the manual of your device. - wp-gpios: - description: - GPIO to which the write-protect pin of the chip is connected. - maxItems: 1 + wp-gpios: true address-width: allOf: From 1c89074bf85068d1b86f2e0f0c2110fdd9b83c9f Mon Sep 17 00:00:00 2001 From: Khouloud Touil Date: Thu, 9 Jan 2020 10:51:13 +0100 Subject: [PATCH 051/658] eeprom: at24: remove the write-protect pin support NVMEM framework is an interface for the at24 EEPROMs as well as for other drivers, instead of passing the wp-gpios over the different drivers each time, it would be better to pass it over the NVMEM subsystem once and for all. Removing the support for the write-protect pin after adding it to the NVMEM subsystem. Signed-off-by: Khouloud Touil Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index ffecabd5d527..896c1fe3c44b 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -89,8 +88,6 @@ struct at24_data { struct nvmem_device *nvmem; - struct gpio_desc *wp_gpio; - /* * Some chips tie up multiple I2C addresses; dummy devices reserve * them for us, and we'll use them with SMBus calls. @@ -457,12 +454,10 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) * from this host, but not from other I2C masters. */ mutex_lock(&at24->lock); - gpiod_set_value_cansleep(at24->wp_gpio, 0); while (count) { ret = at24_regmap_write(at24, buf, off, count); if (ret < 0) { - gpiod_set_value_cansleep(at24->wp_gpio, 1); mutex_unlock(&at24->lock); pm_runtime_put(dev); return ret; @@ -472,7 +467,6 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) count -= ret; } - gpiod_set_value_cansleep(at24->wp_gpio, 1); mutex_unlock(&at24->lock); pm_runtime_put(dev); @@ -662,9 +656,6 @@ static int at24_probe(struct i2c_client *client) at24->client[0].client = client; at24->client[0].regmap = regmap; - at24->wp_gpio = devm_gpiod_get_optional(dev, "wp", GPIOD_OUT_HIGH); - if (IS_ERR(at24->wp_gpio)) - return PTR_ERR(at24->wp_gpio); writable = !(flags & AT24_FLAG_READONLY); if (writable) { From a99d2c6ccd15570a74a39992efa82fd169026b4c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 10 Jan 2020 09:19:29 +0100 Subject: [PATCH 052/658] nvmem: fix a 'makes pointer from integer without a cast' build warning nvmem_register() returns a pointer, not a long int. Use ERR_CAST() to cast the struct gpio_desc pointer to struct nvmem_device. Reported-by: kbuild test robot Fixes: 2a127da461a9 ("nvmem: add support for the write-protect pin") Signed-off-by: Bartosz Golaszewski Acked-by: Srinivas Kandagatla --- drivers/nvmem/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 3e1c94c4eee8..408ce702347e 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -351,7 +351,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp", GPIOD_OUT_HIGH); if (IS_ERR(nvmem->wp_gpio)) - return PTR_ERR(nvmem->wp_gpio); + return ERR_CAST(nvmem->wp_gpio); kref_init(&nvmem->refcnt); From 56de4e8f9146680bcd048a29888f7438d5e58c55 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 13 Dec 2019 13:21:30 -0500 Subject: [PATCH 053/658] perf: Make struct ring_buffer less ambiguous eBPF requires needing to know the size of the perf ring buffer structure. But it unfortunately has the same name as the generic ring buffer used by tracing and oprofile. To make it less ambiguous, rename the perf ring buffer structure to "perf_buffer". As other parts of the ring buffer code has "perf_" as the prefix, it only makes sense to give the ring buffer the "perf_" prefix as well. Link: https://lore.kernel.org/r/20191213153553.GE20583@krava Acked-by: Peter Zijlstra Suggested-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) --- include/linux/perf_event.h | 6 ++--- kernel/events/core.c | 42 ++++++++++++++--------------- kernel/events/internal.h | 34 +++++++++++------------ kernel/events/ring_buffer.c | 54 ++++++++++++++++++------------------- 4 files changed, 68 insertions(+), 68 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6d4c22aee384..cf65763af0cb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -582,7 +582,7 @@ struct swevent_hlist { #define PERF_ATTACH_ITRACE 0x10 struct perf_cgroup; -struct ring_buffer; +struct perf_buffer; struct pmu_event_list { raw_spinlock_t lock; @@ -694,7 +694,7 @@ struct perf_event { struct mutex mmap_mutex; atomic_t mmap_count; - struct ring_buffer *rb; + struct perf_buffer *rb; struct list_head rb_entry; unsigned long rcu_batches; int rcu_pending; @@ -854,7 +854,7 @@ struct perf_cpu_context { struct perf_output_handle { struct perf_event *event; - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long wakeup; unsigned long size; u64 aux_flags; diff --git a/kernel/events/core.c b/kernel/events/core.c index a1f8bde19b56..455451d24b4a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4373,7 +4373,7 @@ static void free_event_rcu(struct rcu_head *head) } static void ring_buffer_attach(struct perf_event *event, - struct ring_buffer *rb); + struct perf_buffer *rb); static void detach_sb_event(struct perf_event *event) { @@ -5054,7 +5054,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static __poll_t perf_poll(struct file *file, poll_table *wait) { struct perf_event *event = file->private_data; - struct ring_buffer *rb; + struct perf_buffer *rb; __poll_t events = EPOLLHUP; poll_wait(file, &event->waitq, wait); @@ -5296,7 +5296,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon return perf_event_set_bpf_prog(event, arg); case PERF_EVENT_IOC_PAUSE_OUTPUT: { - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5432,7 +5432,7 @@ static void calc_timer_values(struct perf_event *event, static void perf_event_init_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5464,7 +5464,7 @@ void __weak arch_perf_update_userpage( void perf_event_update_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; - struct ring_buffer *rb; + struct perf_buffer *rb; u64 enabled, running, now; rcu_read_lock(); @@ -5515,7 +5515,7 @@ EXPORT_SYMBOL_GPL(perf_event_update_userpage); static vm_fault_t perf_mmap_fault(struct vm_fault *vmf) { struct perf_event *event = vmf->vma->vm_file->private_data; - struct ring_buffer *rb; + struct perf_buffer *rb; vm_fault_t ret = VM_FAULT_SIGBUS; if (vmf->flags & FAULT_FLAG_MKWRITE) { @@ -5548,9 +5548,9 @@ unlock: } static void ring_buffer_attach(struct perf_event *event, - struct ring_buffer *rb) + struct perf_buffer *rb) { - struct ring_buffer *old_rb = NULL; + struct perf_buffer *old_rb = NULL; unsigned long flags; if (event->rb) { @@ -5608,7 +5608,7 @@ static void ring_buffer_attach(struct perf_event *event, static void ring_buffer_wakeup(struct perf_event *event) { - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5619,9 +5619,9 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -struct ring_buffer *ring_buffer_get(struct perf_event *event) +struct perf_buffer *ring_buffer_get(struct perf_event *event) { - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5634,7 +5634,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event) return rb; } -void ring_buffer_put(struct ring_buffer *rb) +void ring_buffer_put(struct perf_buffer *rb) { if (!refcount_dec_and_test(&rb->refcount)) return; @@ -5672,7 +5672,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) { struct perf_event *event = vma->vm_file->private_data; - struct ring_buffer *rb = ring_buffer_get(event); + struct perf_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); @@ -5790,8 +5790,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; - struct ring_buffer *rb = NULL; unsigned long vma_size; unsigned long nr_pages; long user_extra = 0, extra = 0; @@ -6266,7 +6266,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, size_t size) { struct perf_event *sampler = event->aux_event; - struct ring_buffer *rb; + struct perf_buffer *rb; data->aux_size = 0; @@ -6299,7 +6299,7 @@ out: return data->aux_size; } -long perf_pmu_snapshot_aux(struct ring_buffer *rb, +long perf_pmu_snapshot_aux(struct perf_buffer *rb, struct perf_event *event, struct perf_output_handle *handle, unsigned long size) @@ -6338,8 +6338,8 @@ static void perf_aux_sample_output(struct perf_event *event, struct perf_sample_data *data) { struct perf_event *sampler = event->aux_event; + struct perf_buffer *rb; unsigned long pad; - struct ring_buffer *rb; long size; if (WARN_ON_ONCE(!sampler || !data->aux_size)) @@ -6707,7 +6707,7 @@ void perf_output_sample(struct perf_output_handle *handle, int wakeup_events = event->attr.wakeup_events; if (wakeup_events) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; int events = local_inc_return(&rb->events); if (events >= wakeup_events) { @@ -7150,7 +7150,7 @@ void perf_event_exec(void) } struct remote_output { - struct ring_buffer *rb; + struct perf_buffer *rb; int err; }; @@ -7158,7 +7158,7 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) { struct perf_event *parent = event->parent; struct remote_output *ro = data; - struct ring_buffer *rb = ro->rb; + struct perf_buffer *rb = ro->rb; struct stop_event_data sd = { .event = event, }; @@ -10998,7 +10998,7 @@ err_size: static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { - struct ring_buffer *rb = NULL; + struct perf_buffer *rb = NULL; int ret = -EINVAL; if (!output_event) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 747d67f130cb..f16f66b6b655 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -10,7 +10,7 @@ #define RING_BUFFER_WRITABLE 0x01 -struct ring_buffer { +struct perf_buffer { refcount_t refcount; struct rcu_head rcu_head; #ifdef CONFIG_PERF_USE_VMALLOC @@ -58,17 +58,17 @@ struct ring_buffer { void *data_pages[0]; }; -extern void rb_free(struct ring_buffer *rb); +extern void rb_free(struct perf_buffer *rb); static inline void rb_free_rcu(struct rcu_head *rcu_head) { - struct ring_buffer *rb; + struct perf_buffer *rb; - rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb = container_of(rcu_head, struct perf_buffer, rcu_head); rb_free(rb); } -static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause) +static inline void rb_toggle_paused(struct perf_buffer *rb, bool pause) { if (!pause && rb->nr_pages) rb->paused = 0; @@ -76,16 +76,16 @@ static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause) rb->paused = 1; } -extern struct ring_buffer * +extern struct perf_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); -extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, +extern int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, pgoff_t pgoff, int nr_pages, long watermark, int flags); -extern void rb_free_aux(struct ring_buffer *rb); -extern struct ring_buffer *ring_buffer_get(struct perf_event *event); -extern void ring_buffer_put(struct ring_buffer *rb); +extern void rb_free_aux(struct perf_buffer *rb); +extern struct perf_buffer *ring_buffer_get(struct perf_event *event); +extern void ring_buffer_put(struct perf_buffer *rb); -static inline bool rb_has_aux(struct ring_buffer *rb) +static inline bool rb_has_aux(struct perf_buffer *rb) { return !!rb->aux_nr_pages; } @@ -94,7 +94,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head, unsigned long size, u64 flags); extern struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); +perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff); #ifdef CONFIG_PERF_USE_VMALLOC /* @@ -103,25 +103,25 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); * Required for architectures that have d-cache aliasing issues. */ -static inline int page_order(struct ring_buffer *rb) +static inline int page_order(struct perf_buffer *rb) { return rb->page_order; } #else -static inline int page_order(struct ring_buffer *rb) +static inline int page_order(struct perf_buffer *rb) { return 0; } #endif -static inline unsigned long perf_data_size(struct ring_buffer *rb) +static inline unsigned long perf_data_size(struct perf_buffer *rb) { return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } -static inline unsigned long perf_aux_size(struct ring_buffer *rb) +static inline unsigned long perf_aux_size(struct perf_buffer *rb) { return rb->aux_nr_pages << PAGE_SHIFT; } @@ -141,7 +141,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb) buf += written; \ handle->size -= written; \ if (!handle->size) { \ - struct ring_buffer *rb = handle->rb; \ + struct perf_buffer *rb = handle->rb; \ \ handle->page++; \ handle->page &= rb->nr_pages - 1; \ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 7ffd5c763f93..192b8abc6330 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -35,7 +35,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle) */ static void perf_output_get_handle(struct perf_output_handle *handle) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; preempt_disable(); @@ -49,7 +49,7 @@ static void perf_output_get_handle(struct perf_output_handle *handle) static void perf_output_put_handle(struct perf_output_handle *handle) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; unsigned long head; unsigned int nest; @@ -150,7 +150,7 @@ __perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, bool backward) { - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long tail, offset, head; int have_lost, page_shift; struct { @@ -301,7 +301,7 @@ void perf_output_end(struct perf_output_handle *handle) } static void -ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) +ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) { long max_size = perf_data_size(rb); @@ -361,7 +361,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, { struct perf_event *output_event = event; unsigned long aux_head, aux_tail; - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned int nest; if (output_event->parent) @@ -449,7 +449,7 @@ err: } EXPORT_SYMBOL_GPL(perf_aux_output_begin); -static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb) +static __always_inline bool rb_need_aux_wakeup(struct perf_buffer *rb) { if (rb->aux_overwrite) return false; @@ -475,7 +475,7 @@ static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb) void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED); - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; unsigned long aux_head; /* in overwrite mode, driver provides aux_head via handle */ @@ -532,7 +532,7 @@ EXPORT_SYMBOL_GPL(perf_aux_output_end); */ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; if (size > handle->size) return -ENOSPC; @@ -569,8 +569,8 @@ long perf_output_copy_aux(struct perf_output_handle *aux_handle, struct perf_output_handle *handle, unsigned long from, unsigned long to) { + struct perf_buffer *rb = aux_handle->rb; unsigned long tocopy, remainder, len = 0; - struct ring_buffer *rb = aux_handle->rb; void *addr; from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1; @@ -626,7 +626,7 @@ static struct page *rb_alloc_aux_page(int node, int order) return page; } -static void rb_free_aux_page(struct ring_buffer *rb, int idx) +static void rb_free_aux_page(struct perf_buffer *rb, int idx) { struct page *page = virt_to_page(rb->aux_pages[idx]); @@ -635,7 +635,7 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx) __free_page(page); } -static void __rb_free_aux(struct ring_buffer *rb) +static void __rb_free_aux(struct perf_buffer *rb) { int pg; @@ -662,7 +662,7 @@ static void __rb_free_aux(struct ring_buffer *rb) } } -int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, +int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, pgoff_t pgoff, int nr_pages, long watermark, int flags) { bool overwrite = !(flags & RING_BUFFER_WRITABLE); @@ -753,7 +753,7 @@ out: return ret; } -void rb_free_aux(struct ring_buffer *rb) +void rb_free_aux(struct perf_buffer *rb) { if (refcount_dec_and_test(&rb->aux_refcount)) __rb_free_aux(rb); @@ -766,7 +766,7 @@ void rb_free_aux(struct ring_buffer *rb) */ static struct page * -__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { if (pgoff > rb->nr_pages) return NULL; @@ -798,13 +798,13 @@ static void perf_mmap_free_page(void *addr) __free_page(page); } -struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) +struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long size; int i; - size = sizeof(struct ring_buffer); + size = sizeof(struct perf_buffer); size += nr_pages * sizeof(void *); if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) @@ -843,7 +843,7 @@ fail: return NULL; } -void rb_free(struct ring_buffer *rb) +void rb_free(struct perf_buffer *rb) { int i; @@ -854,13 +854,13 @@ void rb_free(struct ring_buffer *rb) } #else -static int data_page_nr(struct ring_buffer *rb) +static int data_page_nr(struct perf_buffer *rb) { return rb->nr_pages << page_order(rb); } static struct page * -__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { /* The '>' counts in the user page. */ if (pgoff > data_page_nr(rb)) @@ -878,11 +878,11 @@ static void perf_mmap_unmark_page(void *addr) static void rb_free_work(struct work_struct *work) { - struct ring_buffer *rb; + struct perf_buffer *rb; void *base; int i, nr; - rb = container_of(work, struct ring_buffer, work); + rb = container_of(work, struct perf_buffer, work); nr = data_page_nr(rb); base = rb->user_page; @@ -894,18 +894,18 @@ static void rb_free_work(struct work_struct *work) kfree(rb); } -void rb_free(struct ring_buffer *rb) +void rb_free(struct perf_buffer *rb) { schedule_work(&rb->work); } -struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) +struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long size; void *all_buf; - size = sizeof(struct ring_buffer); + size = sizeof(struct perf_buffer); size += sizeof(void *); rb = kzalloc(size, GFP_KERNEL); @@ -939,7 +939,7 @@ fail: #endif struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { if (rb->aux_nr_pages) { /* above AUX space */ From 1c5eb4481e0151d579f738175497f998840f7bbc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 9 Jan 2020 18:53:48 -0500 Subject: [PATCH 054/658] tracing: Rename trace_buffer to array_buffer As we are working to remove the generic "ring_buffer" name that is used by both tracing and perf, the ring_buffer name for tracing will be renamed to trace_buffer, and perf's ring buffer will be renamed to perf_buffer. As there already exists a trace_buffer that is used by the trace_arrays, it needs to be first renamed to array_buffer. Link: https://lore.kernel.org/r/20191213153553.GE20583@krava Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 4 +- kernel/trace/blktrace.c | 4 +- kernel/trace/ftrace.c | 8 +- kernel/trace/trace.c | 230 +++++++++++++-------------- kernel/trace/trace.h | 16 +- kernel/trace/trace_branch.c | 4 +- kernel/trace/trace_events.c | 18 +-- kernel/trace/trace_events_hist.c | 2 +- kernel/trace/trace_functions.c | 8 +- kernel/trace/trace_functions_graph.c | 14 +- kernel/trace/trace_hwlat.c | 2 +- kernel/trace/trace_irqsoff.c | 8 +- kernel/trace/trace_kdb.c | 8 +- kernel/trace/trace_mmiotrace.c | 12 +- kernel/trace/trace_output.c | 2 +- kernel/trace/trace_sched_wakeup.c | 20 +-- kernel/trace/trace_selftest.c | 26 +-- kernel/trace/trace_syscalls.c | 4 +- 18 files changed, 195 insertions(+), 195 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 4c6e15605766..f70e5bc7e8db 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -11,7 +11,7 @@ #include struct trace_array; -struct trace_buffer; +struct array_buffer; struct tracer; struct dentry; struct bpf_prog; @@ -79,7 +79,7 @@ struct trace_entry { struct trace_iterator { struct trace_array *tr; struct tracer *trace; - struct trace_buffer *trace_buffer; + struct array_buffer *array_buffer; void *private; int cpu_file; struct mutex mutex; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 475e29498bca..3b926f62ed83 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -75,7 +75,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { - buffer = blk_tr->trace_buffer.buffer; + buffer = blk_tr->array_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, @@ -248,7 +248,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (blk_tracer) { tracing_record_cmdline(current); - buffer = blk_tr->trace_buffer.buffer; + buffer = blk_tr->array_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9bf1f2cd515e..3f0ae07e72ef 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -146,7 +146,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; - if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid)) + if (tr && this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid)) return; op->saved_func(ip, parent_ip, op, regs); @@ -6922,7 +6922,7 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->function_pids); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, trace_ignore_this_task(pid_list, next)); } @@ -6976,7 +6976,7 @@ static void clear_ftrace_pids(struct trace_array *tr) unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false; + per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = false; rcu_assign_pointer(tr->function_pids, NULL); @@ -7100,7 +7100,7 @@ static void ignore_task_cpu(void *data) pid_list = rcu_dereference_protected(tr->function_pids, mutex_is_locked(&ftrace_lock)); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, trace_ignore_this_task(pid_list, current)); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ddb7e7f5fe8d..67084b7945ff 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -603,7 +603,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, return read; } -static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) +static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) { u64 ts; @@ -619,7 +619,7 @@ static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) u64 ftrace_now(int cpu) { - return buffer_ftrace_now(&global_trace.trace_buffer, cpu); + return buffer_ftrace_now(&global_trace.array_buffer, cpu); } /** @@ -796,8 +796,8 @@ __trace_buffer_lock_reserve(struct ring_buffer *buffer, void tracer_tracing_on(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - ring_buffer_record_on(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + ring_buffer_record_on(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to @@ -865,7 +865,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) alloc = sizeof(*entry) + size + 2; /* possible \n added */ local_save_flags(irq_flags); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, irq_flags, pc); if (!event) @@ -913,7 +913,7 @@ int __trace_bputs(unsigned long ip, const char *str) return 0; local_save_flags(irq_flags); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, irq_flags, pc); if (!event) @@ -1036,9 +1036,9 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) } EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); -static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, - struct trace_buffer *size_buf, int cpu_id); -static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); +static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, + struct array_buffer *size_buf, int cpu_id); +static void set_buffer_entries(struct array_buffer *buf, unsigned long val); int tracing_alloc_snapshot_instance(struct trace_array *tr) { @@ -1048,7 +1048,7 @@ int tracing_alloc_snapshot_instance(struct trace_array *tr) /* allocate spare buffer */ ret = resize_buffer_duplicate_size(&tr->max_buffer, - &tr->trace_buffer, RING_BUFFER_ALL_CPUS); + &tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; @@ -1251,8 +1251,8 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); void tracer_tracing_off(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - ring_buffer_record_off(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + ring_buffer_record_off(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to @@ -1294,8 +1294,8 @@ void disable_trace_on_warning(void) */ bool tracer_tracing_is_on(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - return ring_buffer_record_is_on(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + return ring_buffer_record_is_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } @@ -1590,8 +1590,8 @@ void latency_fsnotify(struct trace_array *tr) static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct trace_buffer *trace_buf = &tr->trace_buffer; - struct trace_buffer *max_buf = &tr->max_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; + struct array_buffer *max_buf = &tr->max_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); @@ -1649,8 +1649,8 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, arch_spin_lock(&tr->max_lock); - /* Inherit the recordable setting from trace_buffer */ - if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + /* Inherit the recordable setting from array_buffer */ + if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) ring_buffer_record_on(tr->max_buffer.buffer); else ring_buffer_record_off(tr->max_buffer.buffer); @@ -1659,7 +1659,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) goto out_unlock; #endif - swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); + swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -1692,7 +1692,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); + ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { /* @@ -1718,7 +1718,7 @@ static int wait_on_pipe(struct trace_iterator *iter, int full) if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, + return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); } @@ -1769,7 +1769,7 @@ static int run_tracer_selftest(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); tr->current_trace = type; @@ -1795,7 +1795,7 @@ static int run_tracer_selftest(struct tracer *type) return -1; } /* Only reset on passing, to avoid touching corrupted buffers */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { @@ -1962,7 +1962,7 @@ int __init register_tracer(struct tracer *type) return ret; } -static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) +static void tracing_reset_cpu(struct array_buffer *buf, int cpu) { struct ring_buffer *buffer = buf->buffer; @@ -1978,7 +1978,7 @@ static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) ring_buffer_record_enable(buffer); } -void tracing_reset_online_cpus(struct trace_buffer *buf) +void tracing_reset_online_cpus(struct array_buffer *buf) { struct ring_buffer *buffer = buf->buffer; int cpu; @@ -2008,7 +2008,7 @@ void tracing_reset_all_online_cpus(void) if (!tr->clear_trace) continue; tr->clear_trace = false; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif @@ -2117,7 +2117,7 @@ void tracing_start(void) /* Prevent the buffers from switching */ arch_spin_lock(&global_trace.max_lock); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2156,7 +2156,7 @@ static void tracing_start_tr(struct trace_array *tr) goto out; } - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2182,7 +2182,7 @@ void tracing_stop(void) /* Prevent the buffers from switching */ arch_spin_lock(&global_trace.max_lock); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -2211,7 +2211,7 @@ static void tracing_stop_tr(struct trace_array *tr) if (tr->stop_count++) goto out; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -2572,7 +2572,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, struct ring_buffer_event *entry; int val; - *current_rb = trace_file->tr->trace_buffer.buffer; + *current_rb = trace_file->tr->array_buffer.buffer; if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && @@ -2845,7 +2845,7 @@ trace_function(struct trace_array *tr, int pc) { struct trace_event_call *call = &event_function; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -2971,7 +2971,7 @@ static inline void ftrace_trace_stack(struct trace_array *tr, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { __ftrace_trace_stack(buffer, flags, skip, pc, NULL); @@ -3009,7 +3009,7 @@ void trace_dump_stack(int skip) /* Skip 1 to skip this function. */ skip++; #endif - __ftrace_trace_stack(global_trace.trace_buffer.buffer, + __ftrace_trace_stack(global_trace.array_buffer.buffer, flags, skip, preempt_count(), NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3154,7 +3154,7 @@ void trace_printk_init_buffers(void) * directly here. If the global_trace.buffer is already * allocated here, then this was called by module code. */ - if (global_trace.trace_buffer.buffer) + if (global_trace.array_buffer.buffer) tracing_start_cmdline_record(); } EXPORT_SYMBOL_GPL(trace_printk_init_buffers); @@ -3217,7 +3217,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, flags, pc); if (!event) @@ -3302,7 +3302,7 @@ __printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } __printf(3, 0) @@ -3367,7 +3367,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); else - event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts, + event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, lost_events); if (event) { @@ -3382,7 +3382,7 @@ static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, unsigned long *missing_events, u64 *ent_ts) { - struct ring_buffer *buffer = iter->trace_buffer->buffer; + struct ring_buffer *buffer = iter->array_buffer->buffer; struct trace_entry *ent, *next = NULL; unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; @@ -3459,7 +3459,7 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter) static void trace_consume(struct trace_iterator *iter) { - ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts, + ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, &iter->lost_events); } @@ -3497,7 +3497,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) unsigned long entries = 0; u64 ts; - per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0; + per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; buf_iter = trace_buffer_iter(iter, cpu); if (!buf_iter) @@ -3511,13 +3511,13 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) * by the timestamp being before the start of the buffer. */ while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { - if (ts >= iter->trace_buffer->time_start) + if (ts >= iter->array_buffer->time_start) break; entries++; ring_buffer_read(buf_iter, NULL); } - per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries; + per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; } /* @@ -3602,7 +3602,7 @@ static void s_stop(struct seq_file *m, void *p) } static void -get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, +get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, unsigned long *entries, int cpu) { unsigned long count; @@ -3624,7 +3624,7 @@ get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, } static void -get_total_entries(struct trace_buffer *buf, +get_total_entries(struct array_buffer *buf, unsigned long *total, unsigned long *entries) { unsigned long t, e; @@ -3647,7 +3647,7 @@ unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) if (!tr) tr = &global_trace; - get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu); + get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); return entries; } @@ -3659,7 +3659,7 @@ unsigned long trace_total_entries(struct trace_array *tr) if (!tr) tr = &global_trace; - get_total_entries(&tr->trace_buffer, &total, &entries); + get_total_entries(&tr->array_buffer, &total, &entries); return entries; } @@ -3676,7 +3676,7 @@ static void print_lat_help_header(struct seq_file *m) "# \\ / ||||| \\ | / \n"); } -static void print_event_info(struct trace_buffer *buf, struct seq_file *m) +static void print_event_info(struct array_buffer *buf, struct seq_file *m) { unsigned long total; unsigned long entries; @@ -3687,7 +3687,7 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m) seq_puts(m, "#\n"); } -static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, +static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; @@ -3698,7 +3698,7 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } -static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, +static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; @@ -3720,7 +3720,7 @@ void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); - struct trace_buffer *buf = iter->trace_buffer; + struct array_buffer *buf = iter->array_buffer; struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); struct tracer *type = iter->trace; unsigned long entries; @@ -3795,7 +3795,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter) cpumask_test_cpu(iter->cpu, iter->started)) return; - if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) + if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) return; if (cpumask_available(iter->started)) @@ -3929,7 +3929,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } return 1; @@ -3941,7 +3941,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } } @@ -4031,10 +4031,10 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->trace_buffer, + print_func_help_header_irq(iter->array_buffer, m, trace_flags); else - print_func_help_header(iter->trace_buffer, m, + print_func_help_header(iter->array_buffer, m, trace_flags); } } @@ -4192,10 +4192,10 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) #ifdef CONFIG_TRACER_MAX_TRACE /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) - iter->trace_buffer = &tr->max_buffer; + iter->array_buffer = &tr->max_buffer; else #endif - iter->trace_buffer = &tr->trace_buffer; + iter->array_buffer = &tr->array_buffer; iter->snapshot = snapshot; iter->pos = -1; iter->cpu_file = tracing_get_cpu(inode); @@ -4206,7 +4206,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->trace_buffer->buffer)) + if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -4220,7 +4220,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, + ring_buffer_read_prepare(iter->array_buffer->buffer, cpu, GFP_KERNEL); } ring_buffer_read_prepare_sync(); @@ -4231,7 +4231,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, + ring_buffer_read_prepare(iter->array_buffer->buffer, cpu, GFP_KERNEL); ring_buffer_read_prepare_sync(); ring_buffer_read_start(iter->buffer_iter[cpu]); @@ -4357,7 +4357,7 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); - struct trace_buffer *trace_buf = &tr->trace_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE if (tr->current_trace->print_max) @@ -4578,13 +4578,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, */ if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); - ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu); + atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); - ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); + atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); } } arch_spin_unlock(&tr->max_lock); @@ -4726,7 +4726,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) ftrace_pid_follow_fork(tr, enabled); if (mask == TRACE_ITER_OVERWRITE) { - ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); + ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); #endif @@ -5534,11 +5534,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int tracer_init(struct tracer *t, struct trace_array *tr) { - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); return t->init(tr); } -static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) +static void set_buffer_entries(struct array_buffer *buf, unsigned long val) { int cpu; @@ -5548,8 +5548,8 @@ static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) #ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ -static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, - struct trace_buffer *size_buf, int cpu_id) +static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, + struct array_buffer *size_buf, int cpu_id) { int cpu, ret = 0; @@ -5587,10 +5587,10 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ring_buffer_expanded = true; /* May be called before buffers are initialized */ - if (!tr->trace_buffer.buffer) + if (!tr->array_buffer.buffer) return 0; - ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu); + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) return ret; @@ -5601,8 +5601,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); if (ret < 0) { - int r = resize_buffer_duplicate_size(&tr->trace_buffer, - &tr->trace_buffer, cpu); + int r = resize_buffer_duplicate_size(&tr->array_buffer, + &tr->array_buffer, cpu); if (r < 0) { /* * AARGH! We are left with different @@ -5633,9 +5633,9 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->trace_buffer, size); + set_buffer_entries(&tr->array_buffer, size); else - per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size; + per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size; return ret; } @@ -5979,7 +5979,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; iter->tr = tr; - iter->trace_buffer = &tr->trace_buffer; + iter->array_buffer = &tr->array_buffer; iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); filp->private_data = iter; @@ -6039,7 +6039,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl */ return EPOLLIN | EPOLLRDNORM; else - return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file, + return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, filp, poll_table); } @@ -6356,8 +6356,8 @@ tracing_entries_read(struct file *filp, char __user *ubuf, for_each_tracing_cpu(cpu) { /* fill in the size from first enabled cpu */ if (size == 0) - size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries; - if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) { + size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; + if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { buf_size_same = 0; break; } @@ -6373,7 +6373,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -6420,7 +6420,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { - size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10; + size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; if (!ring_buffer_expanded) expanded_size += trace_buf_size >> 10; } @@ -6499,7 +6499,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt < FAULTED_SIZE) size += FAULTED_SIZE - cnt; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (unlikely(!event)) @@ -6579,7 +6579,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, irq_flags, preempt_count()); if (!event) @@ -6634,13 +6634,13 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tr->clock_id = i; - ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func); + ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); /* * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) @@ -6703,7 +6703,7 @@ static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) mutex_lock(&trace_types_lock); - if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer)) + if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) seq_puts(m, "delta [absolute]\n"); else seq_puts(m, "[delta] absolute\n"); @@ -6748,7 +6748,7 @@ int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) goto out; } - ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); + ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) @@ -6797,7 +6797,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = 0; iter->tr = tr; - iter->trace_buffer = &tr->max_buffer; + iter->array_buffer = &tr->max_buffer; iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; @@ -6860,7 +6860,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, #endif if (tr->allocated_snapshot) ret = resize_buffer_duplicate_size(&tr->max_buffer, - &tr->trace_buffer, iter->cpu_file); + &tr->array_buffer, iter->cpu_file); else ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) @@ -6935,7 +6935,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) } info->iter.snapshot = true; - info->iter.trace_buffer = &info->iter.tr->max_buffer; + info->iter.array_buffer = &info->iter.tr->max_buffer; return ret; } @@ -7310,7 +7310,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) info->iter.tr = tr; info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; - info->iter.trace_buffer = &tr->trace_buffer; + info->iter.array_buffer = &tr->array_buffer; info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; @@ -7355,7 +7355,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, #endif if (!info->spare) { - info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, + info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, iter->cpu_file); if (IS_ERR(info->spare)) { ret = PTR_ERR(info->spare); @@ -7373,7 +7373,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, again: trace_access_lock(iter->cpu_file); - ret = ring_buffer_read_page(iter->trace_buffer->buffer, + ret = ring_buffer_read_page(iter->array_buffer->buffer, &info->spare, count, iter->cpu_file, 0); @@ -7423,7 +7423,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); if (info->spare) - ring_buffer_free_read_page(iter->trace_buffer->buffer, + ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); kfree(info); @@ -7528,7 +7528,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, again: trace_access_lock(iter->cpu_file); - entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { struct page *page; @@ -7541,7 +7541,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } refcount_set(&ref->refcount, 1); - ref->buffer = iter->trace_buffer->buffer; + ref->buffer = iter->array_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { ret = PTR_ERR(ref->page); @@ -7569,7 +7569,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.nr_pages++; *ppos += PAGE_SIZE; - entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); } trace_access_unlock(iter->cpu_file); @@ -7613,7 +7613,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; - struct trace_buffer *trace_buf = &tr->trace_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; @@ -8272,7 +8272,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; unsigned long val; int ret; @@ -8362,7 +8362,7 @@ static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); static int -allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) +allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) { enum ring_buffer_flags rb_flags; @@ -8382,8 +8382,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size } /* Allocate the first page for all buffers */ - set_buffer_entries(&tr->trace_buffer, - ring_buffer_size(tr->trace_buffer.buffer, 0)); + set_buffer_entries(&tr->array_buffer, + ring_buffer_size(tr->array_buffer.buffer, 0)); return 0; } @@ -8392,7 +8392,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; - ret = allocate_trace_buffer(tr, &tr->trace_buffer, size); + ret = allocate_trace_buffer(tr, &tr->array_buffer, size); if (ret) return ret; @@ -8400,10 +8400,10 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); if (WARN_ON(ret)) { - ring_buffer_free(tr->trace_buffer.buffer); - tr->trace_buffer.buffer = NULL; - free_percpu(tr->trace_buffer.data); - tr->trace_buffer.data = NULL; + ring_buffer_free(tr->array_buffer.buffer); + tr->array_buffer.buffer = NULL; + free_percpu(tr->array_buffer.data); + tr->array_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; @@ -8417,7 +8417,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } -static void free_trace_buffer(struct trace_buffer *buf) +static void free_trace_buffer(struct array_buffer *buf) { if (buf->buffer) { ring_buffer_free(buf->buffer); @@ -8432,7 +8432,7 @@ static void free_trace_buffers(struct trace_array *tr) if (!tr) return; - free_trace_buffer(&tr->trace_buffer); + free_trace_buffer(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); @@ -9036,13 +9036,13 @@ void trace_init_global_iter(struct trace_iterator *iter) iter->tr = &global_trace; iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; - iter->trace_buffer = &global_trace.trace_buffer; + iter->array_buffer = &global_trace.array_buffer; if (iter->trace && iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->trace_buffer->buffer)) + if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -9083,7 +9083,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; @@ -9151,7 +9151,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tr->trace_flags |= old_userobj; for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); printk_nmi_direct_exit(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 63bf60f79398..fd679fe92c1f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -176,7 +176,7 @@ struct trace_array_cpu { struct tracer; struct trace_option_dentry; -struct trace_buffer { +struct array_buffer { struct trace_array *tr; struct ring_buffer *buffer; struct trace_array_cpu __percpu *data; @@ -249,7 +249,7 @@ struct cond_snapshot { struct trace_array { struct list_head list; char *name; - struct trace_buffer trace_buffer; + struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum @@ -257,12 +257,12 @@ struct trace_array { * Some tracers will use this to store a maximum trace while * it continues examining live traces. * - * The buffers for the max_buffer are set up the same as the trace_buffer + * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped - * with the buffer of the trace_buffer and the buffers are reset for - * the trace_buffer so the tracing can continue. + * with the buffer of the array_buffer and the buffers are reset for + * the array_buffer so the tracing can continue. */ - struct trace_buffer max_buffer; + struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) @@ -685,7 +685,7 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset_online_cpus(struct trace_buffer *buf); +void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -1057,7 +1057,7 @@ struct ftrace_func_command { extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { - return !this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid); + return !this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 88e158d27965..d5989284a99a 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -55,12 +55,12 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) raw_local_irq_save(flags); current->trace_recursion |= TRACE_BRANCH_BIT; - data = this_cpu_ptr(tr->trace_buffer.data); + data = this_cpu_ptr(tr->array_buffer.data); if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, sizeof(*entry), flags, pc); if (!event) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a5b614cc3887..ac557f685f0b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -237,7 +237,7 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) if (!pid_list) return false; - data = this_cpu_ptr(tr->trace_buffer.data); + data = this_cpu_ptr(tr->array_buffer.data); return data->ignore_pid; } @@ -546,7 +546,7 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, prev) && trace_ignore_this_task(pid_list, next)); } @@ -560,7 +560,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, next)); } @@ -571,12 +571,12 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) struct trace_pid_list *pid_list; /* Nothing to do if we are already tracing */ - if (!this_cpu_read(tr->trace_buffer.data->ignore_pid)) + if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, task)); } @@ -587,13 +587,13 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) struct trace_pid_list *pid_list; /* Nothing to do if we are not tracing */ - if (this_cpu_read(tr->trace_buffer.data->ignore_pid)) + if (this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); /* Set tracing if current is enabled */ - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, current)); } @@ -625,7 +625,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr) } for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false; + per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; rcu_assign_pointer(tr->filtered_pids, NULL); @@ -1594,7 +1594,7 @@ static void ignore_task_cpu(void *data) pid_list = rcu_dereference_protected(tr->filtered_pids, mutex_is_locked(&event_mutex)); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, current)); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f62de5f43e79..94c581c1a897 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -895,7 +895,7 @@ static notrace void trace_event_raw_event_synth(void *__data, * Avoid ring buffer recursion detection, as this event * is being performed within another event. */ - buffer = trace_file->tr->trace_buffer.buffer; + buffer = trace_file->tr->array_buffer.buffer; ring_buffer_nest_start(buffer); entry = trace_event_buffer_reserve(&fbuffer, trace_file, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b611cd36e22d..8a4c8d5c2c98 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -101,7 +101,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->trace_buffer.cpu = get_cpu(); + tr->array_buffer.cpu = get_cpu(); put_cpu(); tracing_start_cmdline_record(); @@ -118,7 +118,7 @@ static void function_trace_reset(struct trace_array *tr) static void function_trace_start(struct trace_array *tr) { - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); } static void @@ -143,7 +143,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, goto out; cpu = smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (!atomic_read(&data->disabled)) { local_save_flags(flags); trace_function(tr, ip, parent_ip, flags, pc); @@ -192,7 +192,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, */ local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 78af97163147..79b2c2df00c5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -101,7 +101,7 @@ int __trace_graph_entry(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -171,7 +171,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -221,7 +221,7 @@ void __trace_graph_return(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, @@ -252,7 +252,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -444,9 +444,9 @@ get_return_for_leaf(struct trace_iterator *iter, * We need to consume the current entry to see * the next one. */ - ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, + ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, NULL, NULL); - event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu, + event = ring_buffer_peek(iter->array_buffer->buffer, iter->cpu, NULL, NULL); } @@ -503,7 +503,7 @@ print_graph_rel_time(struct trace_iterator *iter, struct trace_seq *s) { unsigned long long usecs; - usecs = iter->ts - iter->trace_buffer->time_start; + usecs = iter->ts - iter->array_buffer->time_start; do_div(usecs, NSEC_PER_USEC); trace_seq_printf(s, "%9llu us | ", usecs); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 6638d63f0921..fc62a6049bd3 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -104,7 +104,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) { struct trace_array *tr = hwlat_trace; struct trace_event_call *call = &event_hwlat; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; unsigned long flags; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a745b0cee5d3..10bbb0f381d5 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -122,7 +122,7 @@ static int func_prolog_dec(struct trace_array *tr, if (!irqs_disabled_flags(*flags) && !preempt_count()) return 0; - *data = per_cpu_ptr(tr->trace_buffer.data, cpu); + *data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (likely(disabled == 1)) @@ -167,7 +167,7 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) per_cpu(tracing_cpu, cpu) = 0; tr->max_latency = 0; - tracing_reset_online_cpus(&irqsoff_trace->trace_buffer); + tracing_reset_online_cpus(&irqsoff_trace->array_buffer); return start_irqsoff_tracer(irqsoff_trace, set); } @@ -382,7 +382,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) if (per_cpu(tracing_cpu, cpu)) return; - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (unlikely(!data) || atomic_read(&data->disabled)) return; @@ -420,7 +420,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) if (!tracer_enabled || !tracing_is_enabled()) return; - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (unlikely(!data) || !data->critical_start || atomic_read(&data->disabled)) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index cca65044c14c..9da76104f7a2 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -43,7 +43,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter.buffer_iter[cpu] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, + ring_buffer_read_prepare(iter.array_buffer->buffer, cpu, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu]); tracing_iter_reset(&iter, cpu); @@ -51,7 +51,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) } else { iter.cpu_file = cpu_file; iter.buffer_iter[cpu_file] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, + ring_buffer_read_prepare(iter.array_buffer->buffer, cpu_file, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu_file]); tracing_iter_reset(&iter, cpu_file); @@ -124,7 +124,7 @@ static int kdb_ftdump(int argc, const char **argv) iter.buffer_iter = buffer_iter; for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } /* A negative skip_entries means skip all but the last entries */ @@ -139,7 +139,7 @@ static int kdb_ftdump(int argc, const char **argv) ftrace_dump_buf(skip_entries, cpu_file); for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } kdb_trap_printk--; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index b0388016b687..c30137148759 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -32,7 +32,7 @@ static void mmio_reset_data(struct trace_array *tr) overrun_detected = false; prev_overruns = 0; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); } static int mmio_trace_init(struct trace_array *tr) @@ -122,7 +122,7 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { unsigned long cnt = atomic_xchg(&dropped_count, 0); - unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer); + unsigned long over = ring_buffer_overruns(iter->array_buffer->buffer); if (over > prev_overruns) cnt += over - prev_overruns; @@ -297,7 +297,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct mmiotrace_rw *rw) { struct trace_event_call *call = &event_mmiotrace_rw; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; int pc = preempt_count(); @@ -318,7 +318,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; - struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); + struct trace_array_cpu *data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id()); __trace_mmiotrace_rw(tr, data, rw); } @@ -327,7 +327,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct mmiotrace_map *map) { struct trace_event_call *call = &event_mmiotrace_map; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; int pc = preempt_count(); @@ -351,7 +351,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map) struct trace_array_cpu *data; preempt_disable(); - data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); + data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id()); __trace_mmiotrace_map(tr, data, map); preempt_enable(); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d9b4b7c22db4..b4909082f6a4 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -538,7 +538,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) struct trace_array *tr = iter->tr; unsigned long verbose = tr->trace_flags & TRACE_ITER_VERBOSE; unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; - unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start; + unsigned long long abs_ts = iter->ts - iter->array_buffer->time_start; unsigned long long rel_ts = next_ts - iter->ts; struct trace_seq *s = &iter->seq; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 617e297f46dc..510fda2fcd24 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -82,7 +82,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (cpu != wakeup_current_cpu) goto out_enable; - *data = per_cpu_ptr(tr->trace_buffer.data, cpu); + *data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (unlikely(disabled != 1)) goto out; @@ -378,7 +378,7 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags, int pc) { struct trace_event_call *call = &event_context_switch; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -408,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, sizeof(*entry), flags, pc); @@ -459,7 +459,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (likely(disabled != 1)) goto out; @@ -471,7 +471,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out_unlock; /* The task we are waiting for is waking up */ - data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); @@ -494,7 +494,7 @@ out_unlock: arch_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); } static void __wakeup_reset(struct trace_array *tr) @@ -513,7 +513,7 @@ static void wakeup_reset(struct trace_array *tr) { unsigned long flags; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); local_irq_save(flags); arch_spin_lock(&wakeup_lock); @@ -551,7 +551,7 @@ probe_wakeup(void *ignore, struct task_struct *p) return; pc = preempt_count(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; @@ -583,7 +583,7 @@ probe_wakeup(void *ignore, struct task_struct *p) local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); __trace_stack(wakeup_trace, flags, 0, pc); @@ -598,7 +598,7 @@ probe_wakeup(void *ignore, struct task_struct *p) out_locked: arch_spin_unlock(&wakeup_lock); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); } static void start_wakeup_tracer(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 69ee8ef12cee..b5e3496cf803 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,7 +23,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) return 0; } -static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) +static int trace_test_buffer_cpu(struct array_buffer *buf, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; @@ -60,7 +60,7 @@ static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) * Test the trace buffer to see if all the elements * are still sane. */ -static int __maybe_unused trace_test_buffer(struct trace_buffer *buf, unsigned long *count) +static int __maybe_unused trace_test_buffer(struct array_buffer *buf, unsigned long *count) { unsigned long flags, cnt = 0; int cpu, ret = 0; @@ -362,7 +362,7 @@ static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* we should have nothing in the buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); if (ret) goto out; @@ -383,7 +383,7 @@ static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); ftrace_enabled = 1; tracing_start(); @@ -682,7 +682,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); ftrace_enabled = 1; trace->reset(tr); @@ -768,7 +768,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); set_graph_array(tr); ret = register_ftrace_graph(&fgraph_ops); if (ret) { @@ -790,7 +790,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); /* Need to also simulate the tr->reset to remove this fgraph_ops */ tracing_stop_cmdline_record(); @@ -848,7 +848,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -910,7 +910,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -976,7 +976,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (ret) goto out; @@ -1006,7 +1006,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (ret) goto out; @@ -1136,7 +1136,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); @@ -1177,7 +1177,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); trace->reset(tr); tracing_start(); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 16fa218556fa..bd92843c2b0e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -345,7 +345,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) local_save_flags(irq_flags); pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) @@ -391,7 +391,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) local_save_flags(irq_flags); pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), irq_flags, pc); From 13292494379f92f532de71b31a54018336adc589 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 13 Dec 2019 13:58:57 -0500 Subject: [PATCH 055/658] tracing: Make struct ring_buffer less ambiguous As there's two struct ring_buffers in the kernel, it causes some confusion. The other one being the perf ring buffer. It was agreed upon that as neither of the ring buffers are generic enough to be used globally, they should be renamed as: perf's ring_buffer -> perf_buffer ftrace's ring_buffer -> trace_buffer This implements the changes to the ring buffer that ftrace uses. Link: https://lore.kernel.org/r/20191213140531.116b3200@gandalf.local.home Signed-off-by: Steven Rostedt (VMware) --- drivers/oprofile/cpu_buffer.c | 2 +- include/linux/ring_buffer.h | 104 +++++++++++----------- include/linux/trace_events.h | 4 +- include/trace/trace_events.h | 2 +- kernel/trace/blktrace.c | 4 +- kernel/trace/ring_buffer.c | 124 +++++++++++++-------------- kernel/trace/ring_buffer_benchmark.c | 2 +- kernel/trace/trace.c | 70 +++++++-------- kernel/trace/trace.h | 22 ++--- kernel/trace/trace_branch.c | 2 +- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_events_hist.c | 2 +- kernel/trace/trace_functions_graph.c | 4 +- kernel/trace/trace_hwlat.c | 2 +- kernel/trace/trace_kprobe.c | 4 +- kernel/trace/trace_mmiotrace.c | 4 +- kernel/trace/trace_sched_wakeup.c | 4 +- kernel/trace/trace_syscalls.c | 4 +- kernel/trace/trace_uprobe.c | 2 +- 19 files changed, 182 insertions(+), 182 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index eda2633a393d..9210a95cb4e6 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -32,7 +32,7 @@ #define OP_BUFFER_FLAGS 0 -static struct ring_buffer *op_ring_buffer; +static struct trace_buffer *op_ring_buffer; DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); static void wq_sync_buffer(struct work_struct *work); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1a40277b512c..df0124eabece 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -6,7 +6,7 @@ #include #include -struct ring_buffer; +struct trace_buffer; struct ring_buffer_iter; /* @@ -77,13 +77,13 @@ u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); * else * ring_buffer_unlock_commit(buffer, event); */ -void ring_buffer_discard_commit(struct ring_buffer *buffer, +void ring_buffer_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event); /* * size is in bytes for each per CPU buffer. */ -struct ring_buffer * +struct trace_buffer * __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); /* @@ -97,38 +97,38 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full); -__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); +__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); #define RING_BUFFER_ALL_CPUS -1 -void ring_buffer_free(struct ring_buffer *buffer); +void ring_buffer_free(struct trace_buffer *buffer); -int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu); +int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu); -void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val); +void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val); -struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, +struct ring_buffer_event *ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length); -int ring_buffer_unlock_commit(struct ring_buffer *buffer, +int ring_buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event); -int ring_buffer_write(struct ring_buffer *buffer, +int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data); -void ring_buffer_nest_start(struct ring_buffer *buffer); -void ring_buffer_nest_end(struct ring_buffer *buffer); +void ring_buffer_nest_start(struct trace_buffer *buffer); +void ring_buffer_nest_end(struct trace_buffer *buffer); struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags); +ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags); void ring_buffer_read_prepare_sync(void); void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); @@ -140,59 +140,59 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); void ring_buffer_iter_reset(struct ring_buffer_iter *iter); int ring_buffer_iter_empty(struct ring_buffer_iter *iter); -unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); -void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); -void ring_buffer_reset(struct ring_buffer *buffer); +void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu); +void ring_buffer_reset(struct trace_buffer *buffer); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP -int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, - struct ring_buffer *buffer_b, int cpu); +int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, + struct trace_buffer *buffer_b, int cpu); #else static inline int -ring_buffer_swap_cpu(struct ring_buffer *buffer_a, - struct ring_buffer *buffer_b, int cpu) +ring_buffer_swap_cpu(struct trace_buffer *buffer_a, + struct trace_buffer *buffer_b, int cpu) { return -ENODEV; } #endif -bool ring_buffer_empty(struct ring_buffer *buffer); -bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); +bool ring_buffer_empty(struct trace_buffer *buffer); +bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu); -void ring_buffer_record_disable(struct ring_buffer *buffer); -void ring_buffer_record_enable(struct ring_buffer *buffer); -void ring_buffer_record_off(struct ring_buffer *buffer); -void ring_buffer_record_on(struct ring_buffer *buffer); -bool ring_buffer_record_is_on(struct ring_buffer *buffer); -bool ring_buffer_record_is_set_on(struct ring_buffer *buffer); -void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); -void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); +void ring_buffer_record_disable(struct trace_buffer *buffer); +void ring_buffer_record_enable(struct trace_buffer *buffer); +void ring_buffer_record_off(struct trace_buffer *buffer); +void ring_buffer_record_on(struct trace_buffer *buffer); +bool ring_buffer_record_is_on(struct trace_buffer *buffer); +bool ring_buffer_record_is_set_on(struct trace_buffer *buffer); +void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu); +void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu); -u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_entries(struct ring_buffer *buffer); -unsigned long ring_buffer_overruns(struct ring_buffer *buffer); -unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu); +u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_entries(struct trace_buffer *buffer); +unsigned long ring_buffer_overruns(struct trace_buffer *buffer); +unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); -void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, +u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu); +void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts); -void ring_buffer_set_clock(struct ring_buffer *buffer, +void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)); -void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); -bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); +void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs); +bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); -size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu); -size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu); +size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); +size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu); -void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data); -int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, +void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data); +int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, size_t len, int cpu, int full); struct trace_seq; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f70e5bc7e8db..5f7b2b1fce24 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -153,7 +153,7 @@ void tracing_generic_entry_update(struct trace_entry *entry, struct trace_event_file; struct ring_buffer_event * -trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, +trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer, struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc); @@ -210,7 +210,7 @@ extern int trace_event_reg(struct trace_event_call *event, enum trace_reg type, void *data); struct trace_event_buffer { - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct ring_buffer_event *event; struct trace_event_file *trace_file; void *entry; diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 472b33d23a10..13a58d453992 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -570,7 +570,7 @@ static inline notrace int trace_event_get_offsets_##call( \ * enum event_trigger_type __tt = ETT_NONE; * struct ring_buffer_event *event; * struct trace_event_raw_ *entry; <-- defined in stage 1 - * struct ring_buffer *buffer; + * struct trace_buffer *buffer; * unsigned long irq_flags; * int __data_size; * int pc; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3b926f62ed83..0735ae8545d8 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -68,7 +68,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, { struct blk_io_trace *t; struct ring_buffer_event *event = NULL; - struct ring_buffer *buffer = NULL; + struct trace_buffer *buffer = NULL; int pc = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; @@ -215,7 +215,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; - struct ring_buffer *buffer = NULL; + struct trace_buffer *buffer = NULL; struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f655371eaf6..f846de2aa435 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -443,7 +443,7 @@ enum { struct ring_buffer_per_cpu { int cpu; atomic_t record_disabled; - struct ring_buffer *buffer; + struct trace_buffer *buffer; raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; @@ -482,7 +482,7 @@ struct ring_buffer_per_cpu { struct rb_irq_work irq_work; }; -struct ring_buffer { +struct trace_buffer { unsigned flags; int cpus; atomic_t record_disabled; @@ -518,7 +518,7 @@ struct ring_buffer_iter { * * Returns the number of pages used by a per_cpu buffer of the ring buffer. */ -size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu) +size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) { return buffer->buffers[cpu]->nr_pages; } @@ -530,7 +530,7 @@ size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu) * * Returns the number of pages that have content in the ring buffer. */ -size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu) +size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; size_t cnt; @@ -573,7 +573,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full) +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); DEFINE_WAIT(wait); @@ -684,7 +684,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full) * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, * zero otherwise. */ -__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, +__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table) { struct ring_buffer_per_cpu *cpu_buffer; @@ -742,13 +742,13 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 -static inline u64 rb_time_stamp(struct ring_buffer *buffer) +static inline u64 rb_time_stamp(struct trace_buffer *buffer) { /* shift to debug/test normalization and TIME_EXTENTS */ return buffer->clock() << DEBUG_SHIFT; } -u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu) { u64 time; @@ -760,7 +760,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); -void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, +void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts) { /* Just stupid testing the normalize function and deltas */ @@ -1283,7 +1283,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, } static struct ring_buffer_per_cpu * -rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu) +rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; @@ -1374,10 +1374,10 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ -struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, +struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; long nr_pages; int bsize; int cpu; @@ -1447,7 +1447,7 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc); * @buffer: the buffer to free. */ void -ring_buffer_free(struct ring_buffer *buffer) +ring_buffer_free(struct trace_buffer *buffer) { int cpu; @@ -1463,18 +1463,18 @@ ring_buffer_free(struct ring_buffer *buffer) } EXPORT_SYMBOL_GPL(ring_buffer_free); -void ring_buffer_set_clock(struct ring_buffer *buffer, +void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)) { buffer->clock = clock; } -void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) +void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) { buffer->time_stamp_abs = abs; } -bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) +bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) { return buffer->time_stamp_abs; } @@ -1712,7 +1712,7 @@ static void update_pages_handler(struct work_struct *work) * * Returns 0 on success and < 0 on failure. */ -int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, +int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; @@ -1891,7 +1891,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } EXPORT_SYMBOL_GPL(ring_buffer_resize); -void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) +void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) { mutex_lock(&buffer->mutex); if (val) @@ -2206,7 +2206,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, { struct buffer_page *tail_page = info->tail_page; struct buffer_page *commit_page = cpu_buffer->commit_page; - struct ring_buffer *buffer = cpu_buffer->buffer; + struct trace_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; @@ -2609,7 +2609,7 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, } static __always_inline void -rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) +rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { size_t nr_pages; size_t dirty; @@ -2733,7 +2733,7 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) * Call this function before calling another ring_buffer_lock_reserve() and * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). */ -void ring_buffer_nest_start(struct ring_buffer *buffer) +void ring_buffer_nest_start(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -2753,7 +2753,7 @@ void ring_buffer_nest_start(struct ring_buffer *buffer) * Must be called after ring_buffer_nest_start() and after the * ring_buffer_unlock_commit(). */ -void ring_buffer_nest_end(struct ring_buffer *buffer) +void ring_buffer_nest_end(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -2775,7 +2775,7 @@ void ring_buffer_nest_end(struct ring_buffer *buffer) * * Must be paired with ring_buffer_lock_reserve. */ -int ring_buffer_unlock_commit(struct ring_buffer *buffer, +int ring_buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; @@ -2868,7 +2868,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } static __always_inline struct ring_buffer_event * -rb_reserve_next_event(struct ring_buffer *buffer, +rb_reserve_next_event(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, unsigned long length) { @@ -2961,7 +2961,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) +ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -3062,7 +3062,7 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * If this function is called, do not call ring_buffer_unlock_commit on * the event. */ -void ring_buffer_discard_commit(struct ring_buffer *buffer, +void ring_buffer_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3113,7 +3113,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); * Note, like ring_buffer_lock_reserve, the length is the length of the data * and not the length of the event which would hold the header. */ -int ring_buffer_write(struct ring_buffer *buffer, +int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data) { @@ -3193,7 +3193,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * * The caller should call synchronize_rcu() after this. */ -void ring_buffer_record_disable(struct ring_buffer *buffer) +void ring_buffer_record_disable(struct trace_buffer *buffer) { atomic_inc(&buffer->record_disabled); } @@ -3206,7 +3206,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ -void ring_buffer_record_enable(struct ring_buffer *buffer) +void ring_buffer_record_enable(struct trace_buffer *buffer) { atomic_dec(&buffer->record_disabled); } @@ -3223,7 +3223,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable); * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ -void ring_buffer_record_off(struct ring_buffer *buffer) +void ring_buffer_record_off(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; @@ -3246,7 +3246,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off); * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ -void ring_buffer_record_on(struct ring_buffer *buffer) +void ring_buffer_record_on(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; @@ -3264,7 +3264,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_on); * * Returns true if the ring buffer is in a state that it accepts writes. */ -bool ring_buffer_record_is_on(struct ring_buffer *buffer) +bool ring_buffer_record_is_on(struct trace_buffer *buffer) { return !atomic_read(&buffer->record_disabled); } @@ -3280,7 +3280,7 @@ bool ring_buffer_record_is_on(struct ring_buffer *buffer) * ring_buffer_record_disable(), as that is a temporary disabling of * the ring buffer. */ -bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) +bool ring_buffer_record_is_set_on(struct trace_buffer *buffer) { return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); } @@ -3295,7 +3295,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * * The caller should call synchronize_rcu() after this. */ -void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3315,7 +3315,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ -void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3345,7 +3345,7 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; @@ -3378,7 +3378,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3398,7 +3398,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); * @buffer: The ring buffer * @cpu: The per CPU buffer to get the entries from. */ -unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3417,7 +3417,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ -unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3440,7 +3440,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long -ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3462,7 +3462,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long -ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3483,7 +3483,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); * @cpu: The per CPU buffer to get the number of events read */ unsigned long -ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3502,7 +3502,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); * Returns the total number of entries in the ring buffer * (all CPU entries) */ -unsigned long ring_buffer_entries(struct ring_buffer *buffer) +unsigned long ring_buffer_entries(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long entries = 0; @@ -3525,7 +3525,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries); * Returns the total number of overruns in the ring buffer * (all CPU entries) */ -unsigned long ring_buffer_overruns(struct ring_buffer *buffer) +unsigned long ring_buffer_overruns(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long overruns = 0; @@ -3949,7 +3949,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_peek); static struct ring_buffer_event * rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; int nr_loops = 0; @@ -4077,7 +4077,7 @@ rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) * not consume the data. */ struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; @@ -4141,7 +4141,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer; @@ -4201,7 +4201,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * -ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags) +ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; @@ -4332,7 +4332,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read); * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. */ -unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { /* * Earlier, this method returned @@ -4398,7 +4398,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer to reset a per cpu buffer of * @cpu: The CPU buffer to be reset */ -void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; unsigned long flags; @@ -4435,7 +4435,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); * ring_buffer_reset - reset a ring buffer * @buffer: The ring buffer to reset all cpu buffers */ -void ring_buffer_reset(struct ring_buffer *buffer) +void ring_buffer_reset(struct trace_buffer *buffer) { int cpu; @@ -4448,7 +4448,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); * rind_buffer_empty - is the ring buffer empty? * @buffer: The ring buffer to test */ -bool ring_buffer_empty(struct ring_buffer *buffer) +bool ring_buffer_empty(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; @@ -4478,7 +4478,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); * @buffer: The ring buffer * @cpu: The CPU buffer to test */ -bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) +bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; @@ -4510,8 +4510,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); * it is expected that the tracer handles the cpu buffer not being * used at the moment. */ -int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, - struct ring_buffer *buffer_b, int cpu) +int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, + struct trace_buffer *buffer_b, int cpu) { struct ring_buffer_per_cpu *cpu_buffer_a; struct ring_buffer_per_cpu *cpu_buffer_b; @@ -4590,7 +4590,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); * Returns: * The page allocated, or ERR_PTR */ -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) +void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_page *bpage = NULL; @@ -4637,7 +4637,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); * * Free a page allocated from ring_buffer_alloc_read_page. */ -void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct buffer_data_page *bpage = data; @@ -4697,7 +4697,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * >=0 if data has been transferred, returns the offset of consumed data. * <0 if no data has been transferred. */ -int ring_buffer_read_page(struct ring_buffer *buffer, +int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; @@ -4868,12 +4868,12 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_page); */ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; long nr_pages_same; int cpu_i; unsigned long nr_pages; - buffer = container_of(node, struct ring_buffer, node); + buffer = container_of(node, struct trace_buffer, node); if (cpumask_test_cpu(cpu, buffer->cpumask)) return 0; @@ -4923,7 +4923,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) static struct task_struct *rb_threads[NR_CPUS] __initdata; struct rb_test_data { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long events; unsigned long bytes_written; unsigned long bytes_alloc; @@ -5065,7 +5065,7 @@ static __init int rb_hammer_test(void *arg) static __init int test_ringbuffer(void) { struct task_struct *rb_hammer; - struct ring_buffer *buffer; + struct trace_buffer *buffer; int cpu; int ret = 0; diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 32149e46551c..8df0aa810950 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -29,7 +29,7 @@ static int reader_finish; static DECLARE_COMPLETION(read_start); static DECLARE_COMPLETION(read_done); -static struct ring_buffer *buffer; +static struct trace_buffer *buffer; static struct task_struct *producer; static struct task_struct *consumer; static unsigned long read; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 67084b7945ff..b4a07d7ed82a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -163,7 +163,7 @@ static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ static int tracing_set_tracer(struct trace_array *tr, const char *buf); -static void ftrace_trace_userstack(struct ring_buffer *buffer, +static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc); #define MAX_TRACER_SIZE 100 @@ -338,7 +338,7 @@ int tracing_check_open_get_tr(struct trace_array *tr) } int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event) { if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && @@ -747,22 +747,22 @@ static inline void trace_access_lock_init(void) #endif #ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct ring_buffer *buffer, +static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs); #else -static inline void __ftrace_trace_stack(struct ring_buffer *buffer, +static inline void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -780,7 +780,7 @@ trace_event_setup(struct ring_buffer_event *event, } static __always_inline struct ring_buffer_event * -__trace_buffer_lock_reserve(struct ring_buffer *buffer, +__trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) @@ -825,7 +825,7 @@ EXPORT_SYMBOL_GPL(tracing_on); static __always_inline void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_taskinfo_save, true); @@ -848,7 +848,7 @@ __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *eve int __trace_puts(unsigned long ip, const char *str, int size) { struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; int alloc; @@ -898,7 +898,7 @@ EXPORT_SYMBOL_GPL(__trace_puts); int __trace_bputs(unsigned long ip, const char *str) { struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); @@ -1964,7 +1964,7 @@ int __init register_tracer(struct tracer *type) static void tracing_reset_cpu(struct array_buffer *buf, int cpu) { - struct ring_buffer *buffer = buf->buffer; + struct trace_buffer *buffer = buf->buffer; if (!buffer) return; @@ -1980,7 +1980,7 @@ static void tracing_reset_cpu(struct array_buffer *buf, int cpu) void tracing_reset_online_cpus(struct array_buffer *buf) { - struct ring_buffer *buffer = buf->buffer; + struct trace_buffer *buffer = buf->buffer; int cpu; if (!buffer) @@ -2098,7 +2098,7 @@ int is_tracing_stopped(void) */ void tracing_start(void) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; if (tracing_disabled) @@ -2135,7 +2135,7 @@ void tracing_start(void) static void tracing_start_tr(struct trace_array *tr) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; if (tracing_disabled) @@ -2172,7 +2172,7 @@ static void tracing_start_tr(struct trace_array *tr) */ void tracing_stop(void) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; raw_spin_lock_irqsave(&global_trace.start_lock, flags); @@ -2200,7 +2200,7 @@ void tracing_stop(void) static void tracing_stop_tr(struct trace_array *tr) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; /* If global, we need to also stop the max tracer */ @@ -2442,7 +2442,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * -trace_buffer_lock_reserve(struct ring_buffer *buffer, +trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) @@ -2561,10 +2561,10 @@ void trace_buffered_event_disable(void) preempt_enable(); } -static struct ring_buffer *temp_buffer; +static struct trace_buffer *temp_buffer; struct ring_buffer_event * -trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, +trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc) @@ -2689,7 +2689,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); # define STACK_SKIP 3 void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs) @@ -2710,7 +2710,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. */ void -trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, +trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event) { __buffer_unlock_commit(buffer, event); @@ -2845,7 +2845,7 @@ trace_function(struct trace_array *tr, int pc) { struct trace_event_call *call = &event_function; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -2883,7 +2883,7 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); -static void __ftrace_trace_stack(struct ring_buffer *buffer, +static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -2958,7 +2958,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, } static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -2971,7 +2971,7 @@ static inline void ftrace_trace_stack(struct trace_array *tr, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { __ftrace_trace_stack(buffer, flags, skip, pc, NULL); @@ -3018,7 +3018,7 @@ EXPORT_SYMBOL_GPL(trace_dump_stack); static DEFINE_PER_CPU(int, user_stack_count); static void -ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) +ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3063,7 +3063,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ -static void ftrace_trace_userstack(struct ring_buffer *buffer, +static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) { } @@ -3188,7 +3188,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; unsigned long flags; @@ -3245,7 +3245,7 @@ EXPORT_SYMBOL_GPL(trace_vbprintk); __printf(3, 0) static int -__trace_array_vprintk(struct ring_buffer *buffer, +__trace_array_vprintk(struct trace_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { struct trace_event_call *call = &event_print; @@ -3326,7 +3326,7 @@ int trace_array_printk(struct trace_array *tr, EXPORT_SYMBOL_GPL(trace_array_printk); __printf(3, 4) -int trace_array_printk_buf(struct ring_buffer *buffer, +int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) { int ret; @@ -3382,7 +3382,7 @@ static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, unsigned long *missing_events, u64 *ent_ts) { - struct ring_buffer *buffer = iter->array_buffer->buffer; + struct trace_buffer *buffer = iter->array_buffer->buffer; struct trace_entry *ent, *next = NULL; unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; @@ -6470,7 +6470,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; enum event_trigger_type tt = ETT_NONE; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; ssize_t written; @@ -6550,7 +6550,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, { struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct raw_data_entry *entry; unsigned long irq_flags; ssize_t written; @@ -7433,7 +7433,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) } struct buffer_ref { - struct ring_buffer *buffer; + struct trace_buffer *buffer; void *page; int cpu; refcount_t refcount; @@ -8272,7 +8272,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; unsigned long val; int ret; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fd679fe92c1f..4812a36affac 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -178,7 +178,7 @@ struct trace_option_dentry; struct array_buffer { struct trace_array *tr; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; @@ -705,7 +705,7 @@ struct dentry *tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * -trace_buffer_lock_reserve(struct ring_buffer *buffer, +trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, @@ -717,7 +717,7 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, +void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); @@ -873,7 +873,7 @@ trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); -int trace_array_printk_buf(struct ring_buffer *buffer, +int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); @@ -1367,17 +1367,17 @@ struct trace_subsystem_dir { }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { @@ -1390,7 +1390,7 @@ void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void -__trace_event_discard_commit(struct ring_buffer *buffer, +__trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { @@ -1416,7 +1416,7 @@ __trace_event_discard_commit(struct ring_buffer *buffer, */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) @@ -1451,7 +1451,7 @@ __event_trigger_test_discard(struct trace_event_file *file, */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { @@ -1482,7 +1482,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index d5989284a99a..eff099123aa2 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -32,10 +32,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) { struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; + struct trace_buffer *buffer; struct trace_array_cpu *data; struct ring_buffer_event *event; struct trace_branch *entry; - struct ring_buffer *buffer; unsigned long flags; int pc; const char *p; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ac557f685f0b..a16d1b601c5c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3391,8 +3391,8 @@ static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; struct ftrace_entry *entry; unsigned long flags; long disabled; diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 94c581c1a897..0454abaeb486 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -879,7 +879,7 @@ static notrace void trace_event_raw_event_synth(void *__data, struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct synth_event *event; unsigned int i, n_u64; int fields_size = 0; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 79b2c2df00c5..7d71546ba00a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -101,7 +101,7 @@ int __trace_graph_entry(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -221,7 +221,7 @@ void __trace_graph_return(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index fc62a6049bd3..b44446bf0872 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -104,7 +104,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) { struct trace_array *tr = hwlat_trace; struct trace_event_call *call = &event_hwlat; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; unsigned long flags; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7f890262c8a3..477b6b011e7d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1175,8 +1175,8 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct trace_event_file *trace_file) { struct kprobe_trace_entry_head *entry; + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; int size, dsize, pc; unsigned long irq_flags; struct trace_event_call *call = trace_probe_event_call(&tk->tp); @@ -1223,8 +1223,8 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct trace_event_file *trace_file) { struct kretprobe_trace_entry_head *entry; + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; int size, pc, dsize; unsigned long irq_flags; struct trace_event_call *call = trace_probe_event_call(&tk->tp); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index c30137148759..84582bf1ed5f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -297,7 +297,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct mmiotrace_rw *rw) { struct trace_event_call *call = &event_mmiotrace_rw; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; int pc = preempt_count(); @@ -327,7 +327,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct mmiotrace_map *map) { struct trace_event_call *call = &event_mmiotrace_map; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; int pc = preempt_count(); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 510fda2fcd24..97b10bb31a1f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -378,7 +378,7 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags, int pc) { struct trace_event_call *call = &event_context_switch; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -408,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; - struct ring_buffer *buffer = tr->array_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, sizeof(*entry), flags, pc); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index bd92843c2b0e..837ad4818bb4 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -317,7 +317,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long irq_flags; unsigned long args[6]; int pc; @@ -367,7 +367,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long irq_flags; int pc; int syscall_nr; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 352073d36585..6c75d94f5c2f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -938,8 +938,8 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); From 76db5a27a827c2c89e5120a3d486472da847863b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:03:32 +0900 Subject: [PATCH 056/658] bootconfig: Add Extra Boot Config support Extra Boot Config (XBC) allows admin to pass a tree-structured boot configuration file when boot up the kernel. This extends the kernel command line in an efficient way. Boot config will contain some key-value commands, e.g. key.word = value1 another.key.word = value2 It can fold same keys with braces, also you can write array data. For example, key { word1 { setting1 = data setting2 } word2.array = "val1", "val2" } User can access these key-value pair and tree structure via SKC APIs. Link: http://lkml.kernel.org/r/157867221257.17873.1775090991929862549.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- MAINTAINERS | 6 + include/linux/bootconfig.h | 224 +++++++++++ init/Kconfig | 11 + lib/Kconfig | 3 + lib/Makefile | 2 + lib/bootconfig.c | 803 +++++++++++++++++++++++++++++++++++++ 6 files changed, 1049 insertions(+) create mode 100644 include/linux/bootconfig.h create mode 100644 lib/bootconfig.c diff --git a/MAINTAINERS b/MAINTAINERS index 4017e6b760be..8597285eb7c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15770,6 +15770,12 @@ W: http://www.stlinux.com S: Supported F: drivers/net/ethernet/stmicro/stmmac/ +EXTRA BOOT CONFIG +M: Masami Hiramatsu +S: Maintained +F: lib/bootconfig.c +F: include/linux/bootconfig.h + SUN3/3X M: Sam Creasey W: http://sammy.net/sun3/ diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h new file mode 100644 index 000000000000..7e18c939663e --- /dev/null +++ b/include/linux/bootconfig.h @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_XBC_H +#define _LINUX_XBC_H +/* + * Extra Boot Config + * Copyright (C) 2019 Linaro Ltd. + * Author: Masami Hiramatsu + */ + +#include +#include + +/* XBC tree node */ +struct xbc_node { + u16 next; + u16 child; + u16 parent; + u16 data; +} __attribute__ ((__packed__)); + +#define XBC_KEY 0 +#define XBC_VALUE (1 << 15) +/* Maximum size of boot config is 32KB - 1 */ +#define XBC_DATA_MAX (XBC_VALUE - 1) + +#define XBC_NODE_MAX 1024 +#define XBC_KEYLEN_MAX 256 +#define XBC_DEPTH_MAX 16 + +/* Node tree access raw APIs */ +struct xbc_node * __init xbc_root_node(void); +int __init xbc_node_index(struct xbc_node *node); +struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node); +struct xbc_node * __init xbc_node_get_child(struct xbc_node *node); +struct xbc_node * __init xbc_node_get_next(struct xbc_node *node); +const char * __init xbc_node_get_data(struct xbc_node *node); + +/** + * xbc_node_is_value() - Test the node is a value node + * @node: An XBC node. + * + * Test the @node is a value node and return true if a value node, false if not. + */ +static inline __init bool xbc_node_is_value(struct xbc_node *node) +{ + return node->data & XBC_VALUE; +} + +/** + * xbc_node_is_key() - Test the node is a key node + * @node: An XBC node. + * + * Test the @node is a key node and return true if a key node, false if not. + */ +static inline __init bool xbc_node_is_key(struct xbc_node *node) +{ + return !xbc_node_is_value(node); +} + +/** + * xbc_node_is_array() - Test the node is an arraied value node + * @node: An XBC node. + * + * Test the @node is an arraied value node. + */ +static inline __init bool xbc_node_is_array(struct xbc_node *node) +{ + return xbc_node_is_value(node) && node->next != 0; +} + +/** + * xbc_node_is_leaf() - Test the node is a leaf key node + * @node: An XBC node. + * + * Test the @node is a leaf key node which is a key node and has a value node + * or no child. Returns true if it is a leaf node, or false if not. + */ +static inline __init bool xbc_node_is_leaf(struct xbc_node *node) +{ + return xbc_node_is_key(node) && + (!node->child || xbc_node_is_value(xbc_node_get_child(node))); +} + +/* Tree-based key-value access APIs */ +struct xbc_node * __init xbc_node_find_child(struct xbc_node *parent, + const char *key); + +const char * __init xbc_node_find_value(struct xbc_node *parent, + const char *key, + struct xbc_node **vnode); + +struct xbc_node * __init xbc_node_find_next_leaf(struct xbc_node *root, + struct xbc_node *leaf); + +const char * __init xbc_node_find_next_key_value(struct xbc_node *root, + struct xbc_node **leaf); + +/** + * xbc_find_value() - Find a value which matches the key + * @key: Search key + * @vnode: A container pointer of XBC value node. + * + * Search a value whose key matches @key from whole of XBC tree and return + * the value if found. Found value node is stored in *@vnode. + * Note that this can return 0-length string and store NULL in *@vnode for + * key-only (non-value) entry. + */ +static inline const char * __init +xbc_find_value(const char *key, struct xbc_node **vnode) +{ + return xbc_node_find_value(NULL, key, vnode); +} + +/** + * xbc_find_node() - Find a node which matches the key + * @key: Search key + * + * Search a (key) node whose key matches @key from whole of XBC tree and + * return the node if found. If not found, returns NULL. + */ +static inline struct xbc_node * __init xbc_find_node(const char *key) +{ + return xbc_node_find_child(NULL, key); +} + +/** + * xbc_array_for_each_value() - Iterate value nodes on an array + * @anode: An XBC arraied value node + * @value: A value + * + * Iterate array value nodes and values starts from @anode. This is expected to + * be used with xbc_find_value() and xbc_node_find_value(), so that user can + * process each array entry node. + */ +#define xbc_array_for_each_value(anode, value) \ + for (value = xbc_node_get_data(anode); anode != NULL ; \ + anode = xbc_node_get_next(anode), \ + value = anode ? xbc_node_get_data(anode) : NULL) + +/** + * xbc_node_for_each_child() - Iterate child nodes + * @parent: An XBC node. + * @child: Iterated XBC node. + * + * Iterate child nodes of @parent. Each child nodes are stored to @child. + */ +#define xbc_node_for_each_child(parent, child) \ + for (child = xbc_node_get_child(parent); child != NULL ; \ + child = xbc_node_get_next(child)) + +/** + * xbc_node_for_each_array_value() - Iterate array entries of geven key + * @node: An XBC node. + * @key: A key string searched under @node + * @anode: Iterated XBC node of array entry. + * @value: Iterated value of array entry. + * + * Iterate array entries of given @key under @node. Each array entry node + * is stroed to @anode and @value. If the @node doesn't have @key node, + * it does nothing. + * Note that even if the found key node has only one value (not array) + * this executes block once. Hoever, if the found key node has no value + * (key-only node), this does nothing. So don't use this for testing the + * key-value pair existence. + */ +#define xbc_node_for_each_array_value(node, key, anode, value) \ + for (value = xbc_node_find_value(node, key, &anode); value != NULL; \ + anode = xbc_node_get_next(anode), \ + value = anode ? xbc_node_get_data(anode) : NULL) + +/** + * xbc_node_for_each_key_value() - Iterate key-value pairs under a node + * @node: An XBC node. + * @knode: Iterated key node + * @value: Iterated value string + * + * Iterate key-value pairs under @node. Each key node and value string are + * stored in @knode and @value respectively. + */ +#define xbc_node_for_each_key_value(node, knode, value) \ + for (knode = NULL, value = xbc_node_find_next_key_value(node, &knode);\ + knode != NULL; value = xbc_node_find_next_key_value(node, &knode)) + +/** + * xbc_for_each_key_value() - Iterate key-value pairs + * @knode: Iterated key node + * @value: Iterated value string + * + * Iterate key-value pairs in whole XBC tree. Each key node and value string + * are stored in @knode and @value respectively. + */ +#define xbc_for_each_key_value(knode, value) \ + xbc_node_for_each_key_value(NULL, knode, value) + +/* Compose partial key */ +int __init xbc_node_compose_key_after(struct xbc_node *root, + struct xbc_node *node, char *buf, size_t size); + +/** + * xbc_node_compose_key() - Compose full key string of the XBC node + * @node: An XBC node. + * @buf: A buffer to store the key. + * @size: The size of the @buf. + * + * Compose the full-length key of the @node into @buf. Returns the total + * length of the key stored in @buf. Or returns -EINVAL if @node is NULL, + * and -ERANGE if the key depth is deeper than max depth. + */ +static inline int __init xbc_node_compose_key(struct xbc_node *node, + char *buf, size_t size) +{ + return xbc_node_compose_key_after(NULL, node, buf, size); +} + +/* XBC node initializer */ +int __init xbc_init(char *buf); + +/* XBC cleanup data structures */ +void __init xbc_destroy_all(void); + +/* Debug dump functions */ +void __init xbc_debug_dump(void); + +#endif diff --git a/init/Kconfig b/init/Kconfig index a34064a031a5..63450d3bbf12 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1215,6 +1215,17 @@ source "usr/Kconfig" endif +config BOOT_CONFIG + bool "Boot config support" + select LIBXBC + default y + help + Extra boot config allows system admin to pass a config file as + complemental extension of kernel cmdline when booting. + The boot config file is usually attached at the end of initramfs. + + If unsure, say Y. + choice prompt "Compiler optimization level" default CC_OPTIMIZE_FOR_PERFORMANCE diff --git a/lib/Kconfig b/lib/Kconfig index 6e790dc55c5b..10012b646009 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -566,6 +566,9 @@ config DIMLIB config LIBFDT bool +config LIBXBC + bool + config OID_REGISTRY tristate help diff --git a/lib/Makefile b/lib/Makefile index 93217d44237f..75a64d2552a2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -228,6 +228,8 @@ $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) +lib-$(CONFIG_LIBXBC) += bootconfig.o + obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o diff --git a/lib/bootconfig.c b/lib/bootconfig.c new file mode 100644 index 000000000000..055014e233a5 --- /dev/null +++ b/lib/bootconfig.c @@ -0,0 +1,803 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Extra Boot Config + * Masami Hiramatsu + */ + +#define pr_fmt(fmt) "bootconfig: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* + * Extra Boot Config (XBC) is given as tree-structured ascii text of + * key-value pairs on memory. + * xbc_parse() parses the text to build a simple tree. Each tree node is + * simply a key word or a value. A key node may have a next key node or/and + * a child node (both key and value). A value node may have a next value + * node (for array). + */ + +static struct xbc_node xbc_nodes[XBC_NODE_MAX] __initdata; +static int xbc_node_num __initdata; +static char *xbc_data __initdata; +static size_t xbc_data_size __initdata; +static struct xbc_node *last_parent __initdata; + +static int __init xbc_parse_error(const char *msg, const char *p) +{ + int pos = p - xbc_data; + + pr_err("Parse error at pos %d: %s\n", pos, msg); + return -EINVAL; +} + +/** + * xbc_root_node() - Get the root node of extended boot config + * + * Return the address of root node of extended boot config. If the + * extended boot config is not initiized, return NULL. + */ +struct xbc_node * __init xbc_root_node(void) +{ + if (unlikely(!xbc_data)) + return NULL; + + return xbc_nodes; +} + +/** + * xbc_node_index() - Get the index of XBC node + * @node: A target node of getting index. + * + * Return the index number of @node in XBC node list. + */ +int __init xbc_node_index(struct xbc_node *node) +{ + return node - &xbc_nodes[0]; +} + +/** + * xbc_node_get_parent() - Get the parent XBC node + * @node: An XBC node. + * + * Return the parent node of @node. If the node is top node of the tree, + * return NULL. + */ +struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node) +{ + return node->parent == XBC_NODE_MAX ? NULL : &xbc_nodes[node->parent]; +} + +/** + * xbc_node_get_child() - Get the child XBC node + * @node: An XBC node. + * + * Return the first child node of @node. If the node has no child, return + * NULL. + */ +struct xbc_node * __init xbc_node_get_child(struct xbc_node *node) +{ + return node->child ? &xbc_nodes[node->child] : NULL; +} + +/** + * xbc_node_get_next() - Get the next sibling XBC node + * @node: An XBC node. + * + * Return the NEXT sibling node of @node. If the node has no next sibling, + * return NULL. Note that even if this returns NULL, it doesn't mean @node + * has no siblings. (You also has to check whether the parent's child node + * is @node or not.) + */ +struct xbc_node * __init xbc_node_get_next(struct xbc_node *node) +{ + return node->next ? &xbc_nodes[node->next] : NULL; +} + +/** + * xbc_node_get_data() - Get the data of XBC node + * @node: An XBC node. + * + * Return the data (which is always a null terminated string) of @node. + * If the node has invalid data, warn and return NULL. + */ +const char * __init xbc_node_get_data(struct xbc_node *node) +{ + int offset = node->data & ~XBC_VALUE; + + if (WARN_ON(offset >= xbc_data_size)) + return NULL; + + return xbc_data + offset; +} + +static bool __init +xbc_node_match_prefix(struct xbc_node *node, const char **prefix) +{ + const char *p = xbc_node_get_data(node); + int len = strlen(p); + + if (strncmp(*prefix, p, len)) + return false; + + p = *prefix + len; + if (*p == '.') + p++; + else if (*p != '\0') + return false; + *prefix = p; + + return true; +} + +/** + * xbc_node_find_child() - Find a child node which matches given key + * @parent: An XBC node. + * @key: A key string. + * + * Search a node under @parent which matches @key. The @key can contain + * several words jointed with '.'. If @parent is NULL, this searches the + * node from whole tree. Return NULL if no node is matched. + */ +struct xbc_node * __init +xbc_node_find_child(struct xbc_node *parent, const char *key) +{ + struct xbc_node *node; + + if (parent) + node = xbc_node_get_child(parent); + else + node = xbc_root_node(); + + while (node && xbc_node_is_key(node)) { + if (!xbc_node_match_prefix(node, &key)) + node = xbc_node_get_next(node); + else if (*key != '\0') + node = xbc_node_get_child(node); + else + break; + } + + return node; +} + +/** + * xbc_node_find_value() - Find a value node which matches given key + * @parent: An XBC node. + * @key: A key string. + * @vnode: A container pointer of found XBC node. + * + * Search a value node under @parent whose (parent) key node matches @key, + * store it in *@vnode, and returns the value string. + * The @key can contain several words jointed with '.'. If @parent is NULL, + * this searches the node from whole tree. Return the value string if a + * matched key found, return NULL if no node is matched. + * Note that this returns 0-length string and stores NULL in *@vnode if the + * key has no value. And also it will return the value of the first entry if + * the value is an array. + */ +const char * __init +xbc_node_find_value(struct xbc_node *parent, const char *key, + struct xbc_node **vnode) +{ + struct xbc_node *node = xbc_node_find_child(parent, key); + + if (!node || !xbc_node_is_key(node)) + return NULL; + + node = xbc_node_get_child(node); + if (node && !xbc_node_is_value(node)) + return NULL; + + if (vnode) + *vnode = node; + + return node ? xbc_node_get_data(node) : ""; +} + +/** + * xbc_node_compose_key_after() - Compose partial key string of the XBC node + * @root: Root XBC node + * @node: Target XBC node. + * @buf: A buffer to store the key. + * @size: The size of the @buf. + * + * Compose the partial key of the @node into @buf, which is starting right + * after @root (@root is not included.) If @root is NULL, this returns full + * key words of @node. + * Returns the total length of the key stored in @buf. Returns -EINVAL + * if @node is NULL or @root is not the ancestor of @node or @root is @node, + * or returns -ERANGE if the key depth is deeper than max depth. + * This is expected to be used with xbc_find_node() to list up all (child) + * keys under given key. + */ +int __init xbc_node_compose_key_after(struct xbc_node *root, + struct xbc_node *node, + char *buf, size_t size) +{ + u16 keys[XBC_DEPTH_MAX]; + int depth = 0, ret = 0, total = 0; + + if (!node || node == root) + return -EINVAL; + + if (xbc_node_is_value(node)) + node = xbc_node_get_parent(node); + + while (node && node != root) { + keys[depth++] = xbc_node_index(node); + if (depth == XBC_DEPTH_MAX) + return -ERANGE; + node = xbc_node_get_parent(node); + } + if (!node && root) + return -EINVAL; + + while (--depth >= 0) { + node = xbc_nodes + keys[depth]; + ret = snprintf(buf, size, "%s%s", xbc_node_get_data(node), + depth ? "." : ""); + if (ret < 0) + return ret; + if (ret > size) { + size = 0; + } else { + size -= ret; + buf += ret; + } + total += ret; + } + + return total; +} + +/** + * xbc_node_find_next_leaf() - Find the next leaf node under given node + * @root: An XBC root node + * @node: An XBC node which starts from. + * + * Search the next leaf node (which means the terminal key node) of @node + * under @root node (including @root node itself). + * Return the next node or NULL if next leaf node is not found. + */ +struct xbc_node * __init xbc_node_find_next_leaf(struct xbc_node *root, + struct xbc_node *node) +{ + if (unlikely(!xbc_data)) + return NULL; + + if (!node) { /* First try */ + node = root; + if (!node) + node = xbc_nodes; + } else { + if (node == root) /* @root was a leaf, no child node. */ + return NULL; + + while (!node->next) { + node = xbc_node_get_parent(node); + if (node == root) + return NULL; + /* User passed a node which is not uder parent */ + if (WARN_ON(!node)) + return NULL; + } + node = xbc_node_get_next(node); + } + + while (node && !xbc_node_is_leaf(node)) + node = xbc_node_get_child(node); + + return node; +} + +/** + * xbc_node_find_next_key_value() - Find the next key-value pair nodes + * @root: An XBC root node + * @leaf: A container pointer of XBC node which starts from. + * + * Search the next leaf node (which means the terminal key node) of *@leaf + * under @root node. Returns the value and update *@leaf if next leaf node + * is found, or NULL if no next leaf node is found. + * Note that this returns 0-length string if the key has no value, or + * the value of the first entry if the value is an array. + */ +const char * __init xbc_node_find_next_key_value(struct xbc_node *root, + struct xbc_node **leaf) +{ + /* tip must be passed */ + if (WARN_ON(!leaf)) + return NULL; + + *leaf = xbc_node_find_next_leaf(root, *leaf); + if (!*leaf) + return NULL; + if ((*leaf)->child) + return xbc_node_get_data(xbc_node_get_child(*leaf)); + else + return ""; /* No value key */ +} + +/* XBC parse and tree build */ + +static struct xbc_node * __init xbc_add_node(char *data, u32 flag) +{ + struct xbc_node *node; + unsigned long offset; + + if (xbc_node_num == XBC_NODE_MAX) + return NULL; + + node = &xbc_nodes[xbc_node_num++]; + offset = data - xbc_data; + node->data = (u16)offset; + if (WARN_ON(offset >= XBC_DATA_MAX)) + return NULL; + node->data |= flag; + node->child = 0; + node->next = 0; + + return node; +} + +static inline __init struct xbc_node *xbc_last_sibling(struct xbc_node *node) +{ + while (node->next) + node = xbc_node_get_next(node); + + return node; +} + +static struct xbc_node * __init xbc_add_sibling(char *data, u32 flag) +{ + struct xbc_node *sib, *node = xbc_add_node(data, flag); + + if (node) { + if (!last_parent) { + node->parent = XBC_NODE_MAX; + sib = xbc_last_sibling(xbc_nodes); + sib->next = xbc_node_index(node); + } else { + node->parent = xbc_node_index(last_parent); + if (!last_parent->child) { + last_parent->child = xbc_node_index(node); + } else { + sib = xbc_node_get_child(last_parent); + sib = xbc_last_sibling(sib); + sib->next = xbc_node_index(node); + } + } + } + + return node; +} + +static inline __init struct xbc_node *xbc_add_child(char *data, u32 flag) +{ + struct xbc_node *node = xbc_add_sibling(data, flag); + + if (node) + last_parent = node; + + return node; +} + +static inline __init bool xbc_valid_keyword(char *key) +{ + if (key[0] == '\0') + return false; + + while (isalnum(*key) || *key == '-' || *key == '_') + key++; + + return *key == '\0'; +} + +static char *skip_comment(char *p) +{ + char *ret; + + ret = strchr(p, '\n'); + if (!ret) + ret = p + strlen(p); + else + ret++; + + return ret; +} + +static char *skip_spaces_until_newline(char *p) +{ + while (isspace(*p) && *p != '\n') + p++; + return p; +} + +static int __init __xbc_open_brace(void) +{ + /* Mark the last key as open brace */ + last_parent->next = XBC_NODE_MAX; + + return 0; +} + +static int __init __xbc_close_brace(char *p) +{ + struct xbc_node *node; + + if (!last_parent || last_parent->next != XBC_NODE_MAX) + return xbc_parse_error("Unexpected closing brace", p); + + node = last_parent; + node->next = 0; + do { + node = xbc_node_get_parent(node); + } while (node && node->next != XBC_NODE_MAX); + last_parent = node; + + return 0; +} + +/* + * Return delimiter or error, no node added. As same as lib/cmdline.c, + * you can use " around spaces, but can't escape " for value. + */ +static int __init __xbc_parse_value(char **__v, char **__n) +{ + char *p, *v = *__v; + int c, quotes = 0; + + v = skip_spaces(v); + while (*v == '#') { + v = skip_comment(v); + v = skip_spaces(v); + } + if (*v == '"' || *v == '\'') { + quotes = *v; + v++; + } + p = v - 1; + while ((c = *++p)) { + if (!isprint(c) && !isspace(c)) + return xbc_parse_error("Non printable value", p); + if (quotes) { + if (c != quotes) + continue; + quotes = 0; + *p++ = '\0'; + p = skip_spaces_until_newline(p); + c = *p; + if (c && !strchr(",;\n#}", c)) + return xbc_parse_error("No value delimiter", p); + if (*p) + p++; + break; + } + if (strchr(",;\n#}", c)) { + v = strim(v); + *p++ = '\0'; + break; + } + } + if (quotes) + return xbc_parse_error("No closing quotes", p); + if (c == '#') { + p = skip_comment(p); + c = '\n'; /* A comment must be treated as a newline */ + } + *__n = p; + *__v = v; + + return c; +} + +static int __init xbc_parse_array(char **__v) +{ + struct xbc_node *node; + char *next; + int c = 0; + + do { + c = __xbc_parse_value(__v, &next); + if (c < 0) + return c; + + node = xbc_add_sibling(*__v, XBC_VALUE); + if (!node) + return -ENOMEM; + *__v = next; + } while (c == ','); + node->next = 0; + + return c; +} + +static inline __init +struct xbc_node *find_match_node(struct xbc_node *node, char *k) +{ + while (node) { + if (!strcmp(xbc_node_get_data(node), k)) + break; + node = xbc_node_get_next(node); + } + return node; +} + +static int __init __xbc_add_key(char *k) +{ + struct xbc_node *node; + + if (!xbc_valid_keyword(k)) + return xbc_parse_error("Invalid keyword", k); + + if (unlikely(xbc_node_num == 0)) + goto add_node; + + if (!last_parent) /* the first level */ + node = find_match_node(xbc_nodes, k); + else + node = find_match_node(xbc_node_get_child(last_parent), k); + + if (node) + last_parent = node; + else { +add_node: + node = xbc_add_child(k, XBC_KEY); + if (!node) + return -ENOMEM; + } + return 0; +} + +static int __init __xbc_parse_keys(char *k) +{ + char *p; + int ret; + + k = strim(k); + while ((p = strchr(k, '.'))) { + *p++ = '\0'; + ret = __xbc_add_key(k); + if (ret) + return ret; + k = p; + } + + return __xbc_add_key(k); +} + +static int __init xbc_parse_kv(char **k, char *v) +{ + struct xbc_node *prev_parent = last_parent; + struct xbc_node *node; + char *next; + int c, ret; + + ret = __xbc_parse_keys(*k); + if (ret) + return ret; + + c = __xbc_parse_value(&v, &next); + if (c < 0) + return c; + + node = xbc_add_sibling(v, XBC_VALUE); + if (!node) + return -ENOMEM; + + if (c == ',') { /* Array */ + c = xbc_parse_array(&next); + if (c < 0) + return c; + } + + last_parent = prev_parent; + + if (c == '}') { + ret = __xbc_close_brace(next - 1); + if (ret < 0) + return ret; + } + + *k = next; + + return 0; +} + +static int __init xbc_parse_key(char **k, char *n) +{ + struct xbc_node *prev_parent = last_parent; + int ret; + + *k = strim(*k); + if (**k != '\0') { + ret = __xbc_parse_keys(*k); + if (ret) + return ret; + last_parent = prev_parent; + } + *k = n; + + return 0; +} + +static int __init xbc_open_brace(char **k, char *n) +{ + int ret; + + ret = __xbc_parse_keys(*k); + if (ret) + return ret; + *k = n; + + return __xbc_open_brace(); +} + +static int __init xbc_close_brace(char **k, char *n) +{ + int ret; + + ret = xbc_parse_key(k, n); + if (ret) + return ret; + /* k is updated in xbc_parse_key() */ + + return __xbc_close_brace(n - 1); +} + +static int __init xbc_verify_tree(void) +{ + int i, depth, len, wlen; + struct xbc_node *n, *m; + + /* Empty tree */ + if (xbc_node_num == 0) + return -ENOENT; + + for (i = 0; i < xbc_node_num; i++) { + if (xbc_nodes[i].next > xbc_node_num) { + return xbc_parse_error("No closing brace", + xbc_node_get_data(xbc_nodes + i)); + } + } + + /* Key tree limitation check */ + n = &xbc_nodes[0]; + depth = 1; + len = 0; + + while (n) { + wlen = strlen(xbc_node_get_data(n)) + 1; + len += wlen; + if (len > XBC_KEYLEN_MAX) + return xbc_parse_error("Too long key length", + xbc_node_get_data(n)); + + m = xbc_node_get_child(n); + if (m && xbc_node_is_key(m)) { + n = m; + depth++; + if (depth > XBC_DEPTH_MAX) + return xbc_parse_error("Too many key words", + xbc_node_get_data(n)); + continue; + } + len -= wlen; + m = xbc_node_get_next(n); + while (!m) { + n = xbc_node_get_parent(n); + if (!n) + break; + len -= strlen(xbc_node_get_data(n)) + 1; + depth--; + m = xbc_node_get_next(n); + } + n = m; + } + + return 0; +} + +/** + * xbc_destroy_all() - Clean up all parsed bootconfig + * + * This clears all data structures of parsed bootconfig on memory. + * If you need to reuse xbc_init() with new boot config, you can + * use this. + */ +void __init xbc_destroy_all(void) +{ + xbc_data = NULL; + xbc_data_size = 0; + xbc_node_num = 0; + memset(xbc_nodes, 0, sizeof(xbc_nodes)); +} + +/** + * xbc_init() - Parse given XBC file and build XBC internal tree + * @buf: boot config text + * + * This parses the boot config text in @buf. @buf must be a + * null terminated string and smaller than XBC_DATA_MAX. + * Return 0 if succeeded, or -errno if there is any error. + */ +int __init xbc_init(char *buf) +{ + char *p, *q; + int ret, c; + + if (xbc_data) + return -EBUSY; + + ret = strlen(buf); + if (ret > XBC_DATA_MAX - 1 || ret == 0) + return -ERANGE; + + xbc_data = buf; + xbc_data_size = ret + 1; + last_parent = NULL; + + p = buf; + do { + q = strpbrk(p, "{}=;\n#"); + if (!q) { + p = skip_spaces(p); + if (*p != '\0') + ret = xbc_parse_error("No delimiter", p); + break; + } + + c = *q; + *q++ = '\0'; + switch (c) { + case '=': + ret = xbc_parse_kv(&p, q); + break; + case '{': + ret = xbc_open_brace(&p, q); + break; + case '#': + q = skip_comment(q); + /* fall through */ + case ';': + case '\n': + ret = xbc_parse_key(&p, q); + break; + case '}': + ret = xbc_close_brace(&p, q); + break; + } + } while (!ret); + + if (!ret) + ret = xbc_verify_tree(); + + if (ret < 0) + xbc_destroy_all(); + + return ret; +} + +/** + * xbc_debug_dump() - Dump current XBC node list + * + * Dump the current XBC node list on printk buffer for debug. + */ +void __init xbc_debug_dump(void) +{ + int i; + + for (i = 0; i < xbc_node_num; i++) { + pr_debug("[%d] %s (%s) .next=%d, .child=%d .parent=%d\n", i, + xbc_node_get_data(xbc_nodes + i), + xbc_node_is_value(xbc_nodes + i) ? "value" : "key", + xbc_nodes[i].next, xbc_nodes[i].child, + xbc_nodes[i].parent); + } +} From 7684b8582c24537dbe079a7d40e1d7e57ca56939 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:03:44 +0900 Subject: [PATCH 057/658] bootconfig: Load boot config from the tail of initrd Load the extended boot config data from the tail of initrd image. If there is an SKC data there, it has [(u32)size][(u32)checksum] header (in really, this is a footer) at the end of initrd. If the checksum (simple sum of bytes) is match, this starts parsing it from there. Link: http://lkml.kernel.org/r/157867222435.17873.9936667353335606867.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/Kconfig | 1 + init/main.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 63450d3bbf12..ffd240fb88c3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1217,6 +1217,7 @@ endif config BOOT_CONFIG bool "Boot config support" + depends on BLK_DEV_INITRD select LIBXBC default y help diff --git a/init/main.c b/init/main.c index 2cd736059416..59c418a57f92 100644 --- a/init/main.c +++ b/init/main.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -245,6 +246,58 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); +#ifdef CONFIG_BOOT_CONFIG +u32 boot_config_checksum(unsigned char *p, u32 size) +{ + u32 ret = 0; + + while (size--) + ret += *p++; + + return ret; +} + +static void __init setup_boot_config(void) +{ + u32 size, csum; + char *data, *copy; + u32 *hdr; + + if (!initrd_end) + return; + + hdr = (u32 *)(initrd_end - 8); + size = hdr[0]; + csum = hdr[1]; + + if (size >= XBC_DATA_MAX) + return; + + data = ((void *)hdr) - size; + if ((unsigned long)data < initrd_start) + return; + + if (boot_config_checksum((unsigned char *)data, size) != csum) + return; + + copy = memblock_alloc(size + 1, SMP_CACHE_BYTES); + if (!copy) { + pr_err("Failed to allocate memory for boot config\n"); + return; + } + + memcpy(copy, data, size); + copy[size] = '\0'; + + if (xbc_init(copy) < 0) + pr_err("Failed to parse boot config\n"); + else + pr_info("Load boot config: %d bytes\n", size); +} +#else +#define setup_boot_config() do { } while (0) +#endif + /* Change NUL term back to "=", to make "param" the whole string. */ static int __init repair_env_string(char *param, char *val, const char *unused, void *arg) @@ -595,6 +648,7 @@ asmlinkage __visible void __init start_kernel(void) pr_notice("%s", linux_banner); early_security_init(); setup_arch(&command_line); + setup_boot_config(); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); From 950313ebf79c65702f4c15d29328147766d1f1fd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:03:56 +0900 Subject: [PATCH 058/658] tools: bootconfig: Add bootconfig command Add "bootconfig" command which operates the bootconfig config-data on initrd image. User can add/delete/verify the boot config on initrd image using this command. e.g. Add a boot config to initrd image # bootconfig -a myboot.conf /boot/initrd.img Remove it. # bootconfig -d /boot/initrd.img Or verify (and show) it. # bootconfig /boot/initrd.img Link: http://lkml.kernel.org/r/157867223582.17873.14342161849213219982.stgit@devnote2 Signed-off-by: Masami Hiramatsu [ Removed extra blank line at end of bootconfig.c ] Signed-off-by: Steven Rostedt (VMware) --- MAINTAINERS | 1 + tools/Makefile | 11 +- tools/bootconfig/.gitignore | 1 + tools/bootconfig/Makefile | 20 ++ tools/bootconfig/include/linux/bootconfig.h | 7 + tools/bootconfig/include/linux/bug.h | 12 + tools/bootconfig/include/linux/ctype.h | 7 + tools/bootconfig/include/linux/errno.h | 7 + tools/bootconfig/include/linux/kernel.h | 18 + tools/bootconfig/include/linux/printk.h | 17 + tools/bootconfig/include/linux/string.h | 32 ++ tools/bootconfig/main.c | 353 ++++++++++++++++++++ 12 files changed, 481 insertions(+), 5 deletions(-) create mode 100644 tools/bootconfig/.gitignore create mode 100644 tools/bootconfig/Makefile create mode 100644 tools/bootconfig/include/linux/bootconfig.h create mode 100644 tools/bootconfig/include/linux/bug.h create mode 100644 tools/bootconfig/include/linux/ctype.h create mode 100644 tools/bootconfig/include/linux/errno.h create mode 100644 tools/bootconfig/include/linux/kernel.h create mode 100644 tools/bootconfig/include/linux/printk.h create mode 100644 tools/bootconfig/include/linux/string.h create mode 100644 tools/bootconfig/main.c diff --git a/MAINTAINERS b/MAINTAINERS index 8597285eb7c8..06005006de7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15775,6 +15775,7 @@ M: Masami Hiramatsu S: Maintained F: lib/bootconfig.c F: include/linux/bootconfig.h +F: tools/bootconfig/* SUN3/3X M: Sam Creasey diff --git a/tools/Makefile b/tools/Makefile index 7e42f7b8bfa7..bd778812e915 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -28,6 +28,7 @@ help: @echo ' pci - PCI tools' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' + @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @@ -63,7 +64,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE +cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE $(call descend,$@) liblockdep: FORCE @@ -96,7 +97,7 @@ kvm_stat: FORCE $(call descend,kvm/$@) all: acpi cgroup cpupower gpio hv firewire liblockdep \ - perf selftests spi turbostat usb \ + perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ pci debugging @@ -107,7 +108,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -141,7 +142,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: +cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -176,7 +177,7 @@ build_clean: $(call descend,build,clean) clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ - perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ + perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ diff --git a/tools/bootconfig/.gitignore b/tools/bootconfig/.gitignore new file mode 100644 index 000000000000..e7644dfaa4a7 --- /dev/null +++ b/tools/bootconfig/.gitignore @@ -0,0 +1 @@ +bootconfig diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile new file mode 100644 index 000000000000..681b7aef3e44 --- /dev/null +++ b/tools/bootconfig/Makefile @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for bootconfig command + +bindir ?= /usr/bin + +HEADER = include/linux/bootconfig.h +CFLAGS = -Wall -g -I./include + +PROGS = bootconfig + +all: $(PROGS) + +bootconfig: ../../lib/bootconfig.c main.c $(HEADER) + $(CC) $(filter %.c,$^) $(CFLAGS) -o $@ + +install: $(PROGS) + install bootconfig $(DESTDIR)$(bindir) + +clean: + $(RM) -f *.o bootconfig diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h new file mode 100644 index 000000000000..078cbd2ba651 --- /dev/null +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BOOTCONFIG_LINUX_BOOTCONFIG_H +#define _BOOTCONFIG_LINUX_BOOTCONFIG_H + +#include "../../../../include/linux/bootconfig.h" + +#endif diff --git a/tools/bootconfig/include/linux/bug.h b/tools/bootconfig/include/linux/bug.h new file mode 100644 index 000000000000..7b65a389c0dd --- /dev/null +++ b/tools/bootconfig/include/linux/bug.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_BUG_H +#define _SKC_LINUX_BUG_H + +#include +#include + +#define WARN_ON(cond) \ + ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ + __FILE__, __LINE__, __func__, #cond) : 0) + +#endif diff --git a/tools/bootconfig/include/linux/ctype.h b/tools/bootconfig/include/linux/ctype.h new file mode 100644 index 000000000000..c56ecc136448 --- /dev/null +++ b/tools/bootconfig/include/linux/ctype.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_CTYPE_H +#define _SKC_LINUX_CTYPE_H + +#include + +#endif diff --git a/tools/bootconfig/include/linux/errno.h b/tools/bootconfig/include/linux/errno.h new file mode 100644 index 000000000000..5d9f91ec2fda --- /dev/null +++ b/tools/bootconfig/include/linux/errno.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_ERRNO_H +#define _SKC_LINUX_ERRNO_H + +#include + +#endif diff --git a/tools/bootconfig/include/linux/kernel.h b/tools/bootconfig/include/linux/kernel.h new file mode 100644 index 000000000000..2d93320aa374 --- /dev/null +++ b/tools/bootconfig/include/linux/kernel.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_KERNEL_H +#define _SKC_LINUX_KERNEL_H + +#include +#include + +#include + +typedef unsigned short u16; +typedef unsigned int u32; + +#define unlikely(cond) (cond) + +#define __init +#define __initdata + +#endif diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h new file mode 100644 index 000000000000..017bcd6912a5 --- /dev/null +++ b/tools/bootconfig/include/linux/printk.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_PRINTK_H +#define _SKC_LINUX_PRINTK_H + +#include + +/* controllable printf */ +extern int pr_output; +#define printk(fmt, ...) \ + (pr_output ? printf(fmt, __VA_ARGS__) : 0) + +#define pr_err printk +#define pr_warn printk +#define pr_info printk +#define pr_debug printk + +#endif diff --git a/tools/bootconfig/include/linux/string.h b/tools/bootconfig/include/linux/string.h new file mode 100644 index 000000000000..8267af75153a --- /dev/null +++ b/tools/bootconfig/include/linux/string.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_STRING_H +#define _SKC_LINUX_STRING_H + +#include + +/* Copied from lib/string.c */ +static inline char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +static inline char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} + +#endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c new file mode 100644 index 000000000000..b8f174fd2a0a --- /dev/null +++ b/tools/bootconfig/main.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Boot config tool for initrd image + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +int pr_output = 1; + +static int xbc_show_array(struct xbc_node *node) +{ + const char *val; + int i = 0; + + xbc_array_for_each_value(node, val) { + printf("\"%s\"%s", val, node->next ? ", " : ";\n"); + i++; + } + return i; +} + +static void xbc_show_compact_tree(void) +{ + struct xbc_node *node, *cnode; + int depth = 0, i; + + node = xbc_root_node(); + while (node && xbc_node_is_key(node)) { + for (i = 0; i < depth; i++) + printf("\t"); + cnode = xbc_node_get_child(node); + while (cnode && xbc_node_is_key(cnode) && !cnode->next) { + printf("%s.", xbc_node_get_data(node)); + node = cnode; + cnode = xbc_node_get_child(node); + } + if (cnode && xbc_node_is_key(cnode)) { + printf("%s {\n", xbc_node_get_data(node)); + depth++; + node = cnode; + continue; + } else if (cnode && xbc_node_is_value(cnode)) { + printf("%s = ", xbc_node_get_data(node)); + if (cnode->next) + xbc_show_array(cnode); + else + printf("\"%s\";\n", xbc_node_get_data(cnode)); + } else { + printf("%s;\n", xbc_node_get_data(node)); + } + + if (node->next) { + node = xbc_node_get_next(node); + continue; + } + while (!node->next) { + node = xbc_node_get_parent(node); + if (!node) + return; + if (!xbc_node_get_child(node)->next) + continue; + depth--; + for (i = 0; i < depth; i++) + printf("\t"); + printf("}\n"); + } + node = xbc_node_get_next(node); + } +} + +/* Simple real checksum */ +int checksum(unsigned char *buf, int len) +{ + int i, sum = 0; + + for (i = 0; i < len; i++) + sum += buf[i]; + + return sum; +} + +#define PAGE_SIZE 4096 + +int load_xbc_fd(int fd, char **buf, int size) +{ + int ret; + + *buf = malloc(size + 1); + if (!*buf) + return -ENOMEM; + + ret = read(fd, *buf, size); + if (ret < 0) + return -errno; + (*buf)[size] = '\0'; + + return ret; +} + +/* Return the read size or -errno */ +int load_xbc_file(const char *path, char **buf) +{ + struct stat stat; + int fd, ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + ret = fstat(fd, &stat); + if (ret < 0) + return -errno; + + ret = load_xbc_fd(fd, buf, stat.st_size); + + close(fd); + + return ret; +} + +int load_xbc_from_initrd(int fd, char **buf) +{ + struct stat stat; + int ret; + u32 size = 0, csum = 0, rcsum; + + ret = fstat(fd, &stat); + if (ret < 0) + return -errno; + + if (stat.st_size < 8) + return 0; + + if (lseek(fd, -8, SEEK_END) < 0) { + printf("Faile to lseek: %d\n", -errno); + return -errno; + } + + if (read(fd, &size, sizeof(u32)) < 0) + return -errno; + + if (read(fd, &csum, sizeof(u32)) < 0) + return -errno; + + /* Wrong size, maybe no boot config here */ + if (stat.st_size < size + 8) + return 0; + + if (lseek(fd, stat.st_size - 8 - size, SEEK_SET) < 0) { + printf("Faile to lseek: %d\n", -errno); + return -errno; + } + + ret = load_xbc_fd(fd, buf, size); + if (ret < 0) + return ret; + + /* Wrong Checksum, maybe no boot config here */ + rcsum = checksum((unsigned char *)*buf, size); + if (csum != rcsum) { + printf("checksum error: %d != %d\n", csum, rcsum); + return 0; + } + + ret = xbc_init(*buf); + /* Wrong data, maybe no boot config here */ + if (ret < 0) + return 0; + + return size; +} + +int show_xbc(const char *path) +{ + int ret, fd; + char *buf = NULL; + + fd = open(path, O_RDONLY); + if (fd < 0) { + printf("Failed to open initrd %s: %d\n", path, fd); + return -errno; + } + + ret = load_xbc_from_initrd(fd, &buf); + if (ret < 0) + printf("Failed to load a boot config from initrd: %d\n", ret); + else + xbc_show_compact_tree(); + + close(fd); + free(buf); + + return ret; +} + +int delete_xbc(const char *path) +{ + struct stat stat; + int ret = 0, fd, size; + char *buf = NULL; + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("Failed to open initrd %s: %d\n", path, fd); + return -errno; + } + + /* + * Suppress error messages in xbc_init() because it can be just a + * data which concidentally matches the size and checksum footer. + */ + pr_output = 0; + size = load_xbc_from_initrd(fd, &buf); + pr_output = 1; + if (size < 0) { + ret = size; + printf("Failed to load a boot config from initrd: %d\n", ret); + } else if (size > 0) { + ret = fstat(fd, &stat); + if (!ret) + ret = ftruncate(fd, stat.st_size - size - 8); + if (ret) + ret = -errno; + } /* Ignore if there is no boot config in initrd */ + + close(fd); + free(buf); + + return ret; +} + +int apply_xbc(const char *path, const char *xbc_path) +{ + u32 size, csum; + char *buf, *data; + int ret, fd; + + ret = load_xbc_file(xbc_path, &buf); + if (ret < 0) { + printf("Failed to load %s : %d\n", xbc_path, ret); + return ret; + } + size = strlen(buf) + 1; + csum = checksum((unsigned char *)buf, size); + + /* Prepare xbc_path data */ + data = malloc(size + 8); + if (!data) + return -ENOMEM; + strcpy(data, buf); + *(u32 *)(data + size) = size; + *(u32 *)(data + size + 4) = csum; + + /* Check the data format */ + ret = xbc_init(buf); + if (ret < 0) { + printf("Failed to parse %s: %d\n", xbc_path, ret); + free(data); + free(buf); + return ret; + } + printf("Apply %s to %s\n", xbc_path, path); + printf("\tSize: %u bytes\n", (unsigned int)size); + printf("\tChecksum: %d\n", (unsigned int)csum); + + /* TODO: Check the options by schema */ + xbc_destroy_all(); + free(buf); + + /* Remove old boot config if exists */ + ret = delete_xbc(path); + if (ret < 0) { + printf("Failed to delete previous boot config: %d\n", ret); + return ret; + } + + /* Apply new one */ + fd = open(path, O_RDWR | O_APPEND); + if (fd < 0) { + printf("Failed to open %s: %d\n", path, fd); + return fd; + } + /* TODO: Ensure the @path is initramfs/initrd image */ + ret = write(fd, data, size + 8); + if (ret < 0) { + printf("Failed to apply a boot config: %d\n", ret); + return ret; + } + close(fd); + free(data); + + return 0; +} + +int usage(void) +{ + printf("Usage: bootconfig [OPTIONS] \n" + " Apply, delete or show boot config to initrd.\n" + " Options:\n" + " -a : Apply boot config to initrd\n" + " -d : Delete boot config file from initrd\n\n" + " If no option is given, show current applied boot config.\n"); + return -1; +} + +int main(int argc, char **argv) +{ + char *path = NULL; + char *apply = NULL; + bool delete = false; + int opt; + + while ((opt = getopt(argc, argv, "hda:")) != -1) { + switch (opt) { + case 'd': + delete = true; + break; + case 'a': + apply = optarg; + break; + case 'h': + default: + return usage(); + } + } + + if (apply && delete) { + printf("Error: You can not specify both -a and -d at once.\n"); + return usage(); + } + + if (optind >= argc) { + printf("Error: No initrd is specified.\n"); + return usage(); + } + + path = argv[optind]; + + if (apply) + return apply_xbc(path, apply); + else if (delete) + return delete_xbc(path); + + return show_xbc(path); +} From 081c65360bd817672d0753fdf68ab34802d7a81d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:04:07 +0900 Subject: [PATCH 059/658] tools: bootconfig: Add bootconfig test script Add a bootconfig test script to ensure the tool and boot config parser are working correctly. Link: http://lkml.kernel.org/r/157867224728.17873.18114241801246589416.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/Makefile | 3 + .../samples/bad-array-space-comment.bconf | 5 + tools/bootconfig/samples/bad-array.bconf | 2 + tools/bootconfig/samples/bad-dotword.bconf | 4 + tools/bootconfig/samples/bad-empty.bconf | 1 + tools/bootconfig/samples/bad-keyerror.bconf | 2 + tools/bootconfig/samples/bad-longkey.bconf | 1 + tools/bootconfig/samples/bad-manywords.bconf | 1 + tools/bootconfig/samples/bad-no-keyword.bconf | 2 + .../bootconfig/samples/bad-nonprintable.bconf | 2 + tools/bootconfig/samples/bad-spaceword.bconf | 2 + tools/bootconfig/samples/bad-tree.bconf | 5 + tools/bootconfig/samples/bad-value.bconf | 3 + tools/bootconfig/samples/escaped.bconf | 3 + .../samples/good-array-space-comment.bconf | 4 + .../samples/good-comment-after-value.bconf | 1 + .../bootconfig/samples/good-printables.bconf | 2 + tools/bootconfig/samples/good-simple.bconf | 11 ++ tools/bootconfig/samples/good-single.bconf | 4 + .../samples/good-space-after-value.bconf | 1 + tools/bootconfig/samples/good-tree.bconf | 12 ++ tools/bootconfig/test-bootconfig.sh | 105 ++++++++++++++++++ 22 files changed, 176 insertions(+) create mode 100644 tools/bootconfig/samples/bad-array-space-comment.bconf create mode 100644 tools/bootconfig/samples/bad-array.bconf create mode 100644 tools/bootconfig/samples/bad-dotword.bconf create mode 100644 tools/bootconfig/samples/bad-empty.bconf create mode 100644 tools/bootconfig/samples/bad-keyerror.bconf create mode 100644 tools/bootconfig/samples/bad-longkey.bconf create mode 100644 tools/bootconfig/samples/bad-manywords.bconf create mode 100644 tools/bootconfig/samples/bad-no-keyword.bconf create mode 100644 tools/bootconfig/samples/bad-nonprintable.bconf create mode 100644 tools/bootconfig/samples/bad-spaceword.bconf create mode 100644 tools/bootconfig/samples/bad-tree.bconf create mode 100644 tools/bootconfig/samples/bad-value.bconf create mode 100644 tools/bootconfig/samples/escaped.bconf create mode 100644 tools/bootconfig/samples/good-array-space-comment.bconf create mode 100644 tools/bootconfig/samples/good-comment-after-value.bconf create mode 100644 tools/bootconfig/samples/good-printables.bconf create mode 100644 tools/bootconfig/samples/good-simple.bconf create mode 100644 tools/bootconfig/samples/good-single.bconf create mode 100644 tools/bootconfig/samples/good-space-after-value.bconf create mode 100644 tools/bootconfig/samples/good-tree.bconf create mode 100755 tools/bootconfig/test-bootconfig.sh diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile index 681b7aef3e44..a6146ac64458 100644 --- a/tools/bootconfig/Makefile +++ b/tools/bootconfig/Makefile @@ -16,5 +16,8 @@ bootconfig: ../../lib/bootconfig.c main.c $(HEADER) install: $(PROGS) install bootconfig $(DESTDIR)$(bindir) +test: bootconfig + ./test-bootconfig.sh + clean: $(RM) -f *.o bootconfig diff --git a/tools/bootconfig/samples/bad-array-space-comment.bconf b/tools/bootconfig/samples/bad-array-space-comment.bconf new file mode 100644 index 000000000000..fda19e47d0db --- /dev/null +++ b/tools/bootconfig/samples/bad-array-space-comment.bconf @@ -0,0 +1,5 @@ +key = # comment + "value1", # comment1 + "value2" # comment2 +, + "value3" diff --git a/tools/bootconfig/samples/bad-array.bconf b/tools/bootconfig/samples/bad-array.bconf new file mode 100644 index 000000000000..0174af019d7f --- /dev/null +++ b/tools/bootconfig/samples/bad-array.bconf @@ -0,0 +1,2 @@ +# Array must be comma separated. +key = "value1" "value2" diff --git a/tools/bootconfig/samples/bad-dotword.bconf b/tools/bootconfig/samples/bad-dotword.bconf new file mode 100644 index 000000000000..ba5557b2bdd3 --- /dev/null +++ b/tools/bootconfig/samples/bad-dotword.bconf @@ -0,0 +1,4 @@ +# do not start keyword with . +key { + .word = 1 +} diff --git a/tools/bootconfig/samples/bad-empty.bconf b/tools/bootconfig/samples/bad-empty.bconf new file mode 100644 index 000000000000..2ba3f6cc6a47 --- /dev/null +++ b/tools/bootconfig/samples/bad-empty.bconf @@ -0,0 +1 @@ +# Wrong boot config: comment only diff --git a/tools/bootconfig/samples/bad-keyerror.bconf b/tools/bootconfig/samples/bad-keyerror.bconf new file mode 100644 index 000000000000..b6e247a099d0 --- /dev/null +++ b/tools/bootconfig/samples/bad-keyerror.bconf @@ -0,0 +1,2 @@ +# key word can not contain "," +key,word diff --git a/tools/bootconfig/samples/bad-longkey.bconf b/tools/bootconfig/samples/bad-longkey.bconf new file mode 100644 index 000000000000..eb97369f91a8 --- /dev/null +++ b/tools/bootconfig/samples/bad-longkey.bconf @@ -0,0 +1 @@ +key_word_is_too_long01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 diff --git a/tools/bootconfig/samples/bad-manywords.bconf b/tools/bootconfig/samples/bad-manywords.bconf new file mode 100644 index 000000000000..8db81967c48a --- /dev/null +++ b/tools/bootconfig/samples/bad-manywords.bconf @@ -0,0 +1 @@ +key1.is2.too3.long4.5.6.7.8.9.10.11.12.13.14.15.16.17 diff --git a/tools/bootconfig/samples/bad-no-keyword.bconf b/tools/bootconfig/samples/bad-no-keyword.bconf new file mode 100644 index 000000000000..eff26808566c --- /dev/null +++ b/tools/bootconfig/samples/bad-no-keyword.bconf @@ -0,0 +1,2 @@ +# No keyword +{} diff --git a/tools/bootconfig/samples/bad-nonprintable.bconf b/tools/bootconfig/samples/bad-nonprintable.bconf new file mode 100644 index 000000000000..3bb1a2864e52 --- /dev/null +++ b/tools/bootconfig/samples/bad-nonprintable.bconf @@ -0,0 +1,2 @@ +# Non printable +key = "" diff --git a/tools/bootconfig/samples/bad-spaceword.bconf b/tools/bootconfig/samples/bad-spaceword.bconf new file mode 100644 index 000000000000..90c703d32a9a --- /dev/null +++ b/tools/bootconfig/samples/bad-spaceword.bconf @@ -0,0 +1,2 @@ +# No space between words +key . word diff --git a/tools/bootconfig/samples/bad-tree.bconf b/tools/bootconfig/samples/bad-tree.bconf new file mode 100644 index 000000000000..5a6038edcd55 --- /dev/null +++ b/tools/bootconfig/samples/bad-tree.bconf @@ -0,0 +1,5 @@ +# brace is not closing +tree { + node { + value = 1 +} diff --git a/tools/bootconfig/samples/bad-value.bconf b/tools/bootconfig/samples/bad-value.bconf new file mode 100644 index 000000000000..a1217fed86cc --- /dev/null +++ b/tools/bootconfig/samples/bad-value.bconf @@ -0,0 +1,3 @@ +# Quotes error +value = "data + diff --git a/tools/bootconfig/samples/escaped.bconf b/tools/bootconfig/samples/escaped.bconf new file mode 100644 index 000000000000..9f72043b3216 --- /dev/null +++ b/tools/bootconfig/samples/escaped.bconf @@ -0,0 +1,3 @@ +key1 = "A\B\C" +key2 = '\'\'' +key3 = "\\" diff --git a/tools/bootconfig/samples/good-array-space-comment.bconf b/tools/bootconfig/samples/good-array-space-comment.bconf new file mode 100644 index 000000000000..45b938dc0695 --- /dev/null +++ b/tools/bootconfig/samples/good-array-space-comment.bconf @@ -0,0 +1,4 @@ +key = # comment + "value1", # comment1 + "value2" , # comment2 + "value3" diff --git a/tools/bootconfig/samples/good-comment-after-value.bconf b/tools/bootconfig/samples/good-comment-after-value.bconf new file mode 100644 index 000000000000..0d92a853df72 --- /dev/null +++ b/tools/bootconfig/samples/good-comment-after-value.bconf @@ -0,0 +1 @@ +key = "value" # comment diff --git a/tools/bootconfig/samples/good-printables.bconf b/tools/bootconfig/samples/good-printables.bconf new file mode 100644 index 000000000000..ebb985a66ed8 --- /dev/null +++ b/tools/bootconfig/samples/good-printables.bconf @@ -0,0 +1,2 @@ +key = " + !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" diff --git a/tools/bootconfig/samples/good-simple.bconf b/tools/bootconfig/samples/good-simple.bconf new file mode 100644 index 000000000000..37dd6d21c176 --- /dev/null +++ b/tools/bootconfig/samples/good-simple.bconf @@ -0,0 +1,11 @@ +# A good simple bootconfig + +key.word1 = 1 +key.word2=2 +key.word3 = 3; + +key { +word4 = 4 } + +key { word5 = 5; word6 = 6 } + diff --git a/tools/bootconfig/samples/good-single.bconf b/tools/bootconfig/samples/good-single.bconf new file mode 100644 index 000000000000..98e55ad8b711 --- /dev/null +++ b/tools/bootconfig/samples/good-single.bconf @@ -0,0 +1,4 @@ +# single key style +key = 1 +key2 = 2 +key3 = "alpha", "beta" diff --git a/tools/bootconfig/samples/good-space-after-value.bconf b/tools/bootconfig/samples/good-space-after-value.bconf new file mode 100644 index 000000000000..56c15cbc5741 --- /dev/null +++ b/tools/bootconfig/samples/good-space-after-value.bconf @@ -0,0 +1 @@ +key = "value" diff --git a/tools/bootconfig/samples/good-tree.bconf b/tools/bootconfig/samples/good-tree.bconf new file mode 100644 index 000000000000..f2ddefc8b52a --- /dev/null +++ b/tools/bootconfig/samples/good-tree.bconf @@ -0,0 +1,12 @@ +key { + word { + tree { + value = "0"} + } + word2 { + tree { + value = 1,2 } + } +} +other.tree { + value = 2; value2 = 3;} diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh new file mode 100755 index 000000000000..87725e8723f8 --- /dev/null +++ b/tools/bootconfig/test-bootconfig.sh @@ -0,0 +1,105 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +echo "Boot config test script" + +BOOTCONF=./bootconfig +INITRD=`mktemp initrd-XXXX` +TEMPCONF=`mktemp temp-XXXX.bconf` +NG=0 + +cleanup() { + rm -f $INITRD $TEMPCONF + exit $NG +} + +trap cleanup EXIT TERM + +NO=1 + +xpass() { # pass test command + echo "test case $NO ($3)... " + if ! ($@ && echo "\t\t[OK]"); then + echo "\t\t[NG]"; NG=$((NG + 1)) + fi + NO=$((NO + 1)) +} + +xfail() { # fail test command + echo "test case $NO ($3)... " + if ! (! $@ && echo "\t\t[OK]"); then + echo "\t\t[NG]"; NG=$((NG + 1)) + fi + NO=$((NO + 1)) +} + +echo "Basic command test" +xpass $BOOTCONF $INITRD + +echo "Delete command should success without bootconfig" +xpass $BOOTCONF -d $INITRD + +dd if=/dev/zero of=$INITRD bs=4096 count=1 +echo "key = value;" > $TEMPCONF +bconf_size=$(stat -c %s $TEMPCONF) +initrd_size=$(stat -c %s $INITRD) + +echo "Apply command test" +xpass $BOOTCONF -a $TEMPCONF $INITRD +new_size=$(stat -c %s $INITRD) + +echo "File size check" +xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9) + +echo "Apply command repeat test" +xpass $BOOTCONF -a $TEMPCONF $INITRD + +echo "File size check" +xpass test $new_size -eq $(stat -c %s $INITRD) + +echo "Delete command check" +xpass $BOOTCONF -d $INITRD + +echo "File size check" +new_size=$(stat -c %s $INITRD) +xpass test $new_size -eq $initrd_size + +echo "Max node number check" + +echo -n > $TEMPCONF +for i in `seq 1 1024` ; do + echo "node$i" >> $TEMPCONF +done +xpass $BOOTCONF -a $TEMPCONF $INITRD + +echo "badnode" >> $TEMPCONF +xfail $BOOTCONF -a $TEMPCONF $INITRD + +echo "Max filesize check" + +# Max size is 32767 (including terminal byte) +echo -n "data = \"" > $TEMPCONF +dd if=/dev/urandom bs=768 count=32 | base64 -w0 >> $TEMPCONF +echo "\"" >> $TEMPCONF +xfail $BOOTCONF -a $TEMPCONF $INITRD + +truncate -s 32764 $TEMPCONF +echo "\"" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0') +xpass $BOOTCONF -a $TEMPCONF $INITRD + +echo "=== expected failure cases ===" +for i in samples/bad-* ; do + xfail $BOOTCONF -a $i $INITRD +done + +echo "=== expected success cases ===" +for i in samples/good-* ; do + xpass $BOOTCONF -a $i $INITRD +done + +echo +if [ $NG -eq 0 ]; then + echo "All tests passed" +else + echo "$NG tests failed" +fi From c1a3c36017d4c4a4c61aa5f86704b7e80b92d3f7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:04:19 +0900 Subject: [PATCH 060/658] proc: bootconfig: Add /proc/bootconfig to show boot config list Add /proc/bootconfig which shows the list of key-value pairs in boot config. Since after boot, all boot configs and tree are removed, this interface just keep a copy of key-value pairs in text. Link: http://lkml.kernel.org/r/157867225967.17873.12155805787236073787.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- MAINTAINERS | 1 + fs/proc/Makefile | 1 + fs/proc/bootconfig.c | 89 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 fs/proc/bootconfig.c diff --git a/MAINTAINERS b/MAINTAINERS index 06005006de7c..903e8a7ed0bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15774,6 +15774,7 @@ EXTRA BOOT CONFIG M: Masami Hiramatsu S: Maintained F: lib/bootconfig.c +F: fs/proc/bootconfig.c F: include/linux/bootconfig.h F: tools/bootconfig/* diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ead487e80510..bd08616ed8ba 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -33,3 +33,4 @@ proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o +proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c new file mode 100644 index 000000000000..9955d75c0585 --- /dev/null +++ b/fs/proc/bootconfig.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * /proc/bootconfig - Extra boot configuration + */ +#include +#include +#include +#include +#include +#include +#include + +static char *saved_boot_config; + +static int boot_config_proc_show(struct seq_file *m, void *v) +{ + if (saved_boot_config) + seq_puts(m, saved_boot_config); + return 0; +} + +/* Rest size of buffer */ +#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0) + +/* Return the needed total length if @size is 0 */ +static int __init copy_xbc_key_value_list(char *dst, size_t size) +{ + struct xbc_node *leaf, *vnode; + const char *val; + char *key, *end = dst + size; + int ret = 0; + + key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); + + xbc_for_each_key_value(leaf, val) { + ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); + if (ret < 0) + break; + ret = snprintf(dst, rest(dst, end), "%s = ", key); + if (ret < 0) + break; + dst += ret; + vnode = xbc_node_get_child(leaf); + if (vnode && xbc_node_is_array(vnode)) { + xbc_array_for_each_value(vnode, val) { + ret = snprintf(dst, rest(dst, end), "\"%s\"%s", + val, vnode->next ? ", " : "\n"); + if (ret < 0) + goto out; + dst += ret; + } + } else { + ret = snprintf(dst, rest(dst, end), "\"%s\"\n", val); + if (ret < 0) + break; + dst += ret; + } + } +out: + kfree(key); + + return ret < 0 ? ret : dst - (end - size); +} + +static int __init proc_boot_config_init(void) +{ + int len; + + len = copy_xbc_key_value_list(NULL, 0); + if (len < 0) + return len; + + if (len > 0) { + saved_boot_config = kzalloc(len + 1, GFP_KERNEL); + if (!saved_boot_config) + return -ENOMEM; + + len = copy_xbc_key_value_list(saved_boot_config, len + 1); + if (len < 0) { + kfree(saved_boot_config); + return len; + } + } + + proc_create_single("bootconfig", 0, NULL, boot_config_proc_show); + + return 0; +} +fs_initcall(proc_boot_config_init); From 0068c92a92707789b8711e40d584a2433481a29d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:04:31 +0900 Subject: [PATCH 061/658] init/main.c: Alloc initcall_command_line in do_initcall() and free it Since initcall_command_line is used as a temporary buffer, it could be freed after usage. Allocate it in do_initcall() and free it after used. Link: http://lkml.kernel.org/r/157867227145.17873.17513760552008505454.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/init/main.c b/init/main.c index 59c418a57f92..0b4e0c8ccf16 100644 --- a/init/main.c +++ b/init/main.c @@ -137,8 +137,6 @@ char __initdata boot_command_line[COMMAND_LINE_SIZE]; char *saved_command_line; /* Command line for parameter parsing */ static char *static_command_line; -/* Command line for per-initcall parameter parsing */ -static char *initcall_command_line; static char *execute_command; static char *ramdisk_execute_command; @@ -433,10 +431,6 @@ static void __init setup_command_line(char *command_line) if (!saved_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); - initcall_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!initcall_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); - static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); @@ -1044,13 +1038,12 @@ static const char *initcall_level_names[] __initdata = { "late", }; -static void __init do_initcall_level(int level) +static void __init do_initcall_level(int level, char *command_line) { initcall_entry_t *fn; - strcpy(initcall_command_line, saved_command_line); parse_args(initcall_level_names[level], - initcall_command_line, __start___param, + command_line, __start___param, __stop___param - __start___param, level, level, NULL, &repair_env_string); @@ -1063,9 +1056,20 @@ static void __init do_initcall_level(int level) static void __init do_initcalls(void) { int level; + size_t len = strlen(saved_command_line) + 1; + char *command_line; - for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) - do_initcall_level(level); + command_line = kzalloc(len, GFP_KERNEL); + if (!command_line) + panic("%s: Failed to allocate %zu bytes\n", __func__, len); + + for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) { + /* Parser modifies command_line, restore it each time */ + strcpy(command_line, saved_command_line); + do_initcall_level(level, command_line); + } + + kfree(command_line); } /* From 51887d03aca101a24ab049179d1ab430464a24e6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:04:43 +0900 Subject: [PATCH 062/658] bootconfig: init: Allow admin to use bootconfig for kernel command line Since the current kernel command line is too short to describe many options which supported by kernel, allow user to use boot config to setup (add) the command line options. All kernel parameters under "kernel." keywords will be used for setting up extra kernel command line. For example, kernel { audit = on audit_backlog_limit = 256 } Note that you can not specify some early parameters (like console etc.) by this method, since it is loaded after early parameters parsed. Link: http://lkml.kernel.org/r/157867228333.17873.11962796367032622466.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 5 deletions(-) diff --git a/init/main.c b/init/main.c index 0b4e0c8ccf16..c0017d9d16e7 100644 --- a/init/main.c +++ b/init/main.c @@ -137,6 +137,8 @@ char __initdata boot_command_line[COMMAND_LINE_SIZE]; char *saved_command_line; /* Command line for parameter parsing */ static char *static_command_line; +/* Untouched extra command line */ +static char *extra_command_line; static char *execute_command; static char *ramdisk_execute_command; @@ -245,6 +247,83 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); #ifdef CONFIG_BOOT_CONFIG + +char xbc_namebuf[XBC_KEYLEN_MAX] __initdata; + +#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0) + +static int __init xbc_snprint_cmdline(char *buf, size_t size, + struct xbc_node *root) +{ + struct xbc_node *knode, *vnode; + char *end = buf + size; + char c = '\"'; + const char *val; + int ret; + + xbc_node_for_each_key_value(root, knode, val) { + ret = xbc_node_compose_key_after(root, knode, + xbc_namebuf, XBC_KEYLEN_MAX); + if (ret < 0) + return ret; + + vnode = xbc_node_get_child(knode); + ret = snprintf(buf, rest(buf, end), "%s%c", xbc_namebuf, + vnode ? '=' : ' '); + if (ret < 0) + return ret; + buf += ret; + if (!vnode) + continue; + + c = '\"'; + xbc_array_for_each_value(vnode, val) { + ret = snprintf(buf, rest(buf, end), "%c%s", c, val); + if (ret < 0) + return ret; + buf += ret; + c = ','; + } + if (rest(buf, end) > 2) + strcpy(buf, "\" "); + buf += 2; + } + + return buf - (end - size); +} +#undef rest + +/* Make an extra command line under given key word */ +static char * __init xbc_make_cmdline(const char *key) +{ + struct xbc_node *root; + char *new_cmdline; + int ret, len = 0; + + root = xbc_find_node(key); + if (!root) + return NULL; + + /* Count required buffer size */ + len = xbc_snprint_cmdline(NULL, 0, root); + if (len <= 0) + return NULL; + + new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES); + if (!new_cmdline) { + pr_err("Failed to allocate memory for extra kernel cmdline.\n"); + return NULL; + } + + ret = xbc_snprint_cmdline(new_cmdline, len + 1, root); + if (ret < 0 || ret > len) { + pr_err("Failed to print extra kernel cmdline.\n"); + return NULL; + } + + return new_cmdline; +} + u32 boot_config_checksum(unsigned char *p, u32 size) { u32 ret = 0; @@ -289,8 +368,11 @@ static void __init setup_boot_config(void) if (xbc_init(copy) < 0) pr_err("Failed to parse boot config\n"); - else + else { pr_info("Load boot config: %d bytes\n", size); + /* keys starting with "kernel." are passed via cmdline */ + extra_command_line = xbc_make_cmdline("kernel"); + } } #else #define setup_boot_config() do { } while (0) @@ -425,7 +507,12 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } */ static void __init setup_command_line(char *command_line) { - size_t len = strlen(boot_command_line) + 1; + size_t len, xlen = 0; + + if (extra_command_line) + xlen = strlen(extra_command_line); + + len = xlen + strlen(boot_command_line) + 1; saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!saved_command_line) @@ -435,8 +522,17 @@ static void __init setup_command_line(char *command_line) if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); - strcpy(saved_command_line, boot_command_line); - strcpy(static_command_line, command_line); + if (xlen) { + /* + * We have to put extra_command_line before boot command + * lines because there could be dashes (separator of init + * command line) in the command lines. + */ + strcpy(saved_command_line, extra_command_line); + strcpy(static_command_line, extra_command_line); + } + strcpy(saved_command_line + xlen, boot_command_line); + strcpy(static_command_line + xlen, command_line); } /* @@ -652,7 +748,7 @@ asmlinkage __visible void __init start_kernel(void) build_all_zonelists(NULL); page_alloc_init(); - pr_notice("Kernel command line: %s\n", boot_command_line); + pr_notice("Kernel command line: %s\n", saved_command_line); /* parameters may set static keys */ jump_label_init(); parse_early_param(); From 1319916209ce8f55a4f4f848e74500633e24bb99 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:04:55 +0900 Subject: [PATCH 063/658] bootconfig: init: Allow admin to use bootconfig for init command line Since the current kernel command line is too short to describe long and many options for init (e.g. systemd command line options), this allows admin to use boot config for init command line. All init command line under "init." keywords will be passed to init. For example, init.systemd { unified_cgroup_hierarchy = 1 debug_shell default_timeout_start_sec = 60 } Link: http://lkml.kernel.org/r/157867229521.17873.654222294326542349.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index c0017d9d16e7..dd7da62d99a5 100644 --- a/init/main.c +++ b/init/main.c @@ -139,6 +139,8 @@ char *saved_command_line; static char *static_command_line; /* Untouched extra command line */ static char *extra_command_line; +/* Extra init arguments */ +static char *extra_init_args; static char *execute_command; static char *ramdisk_execute_command; @@ -372,6 +374,8 @@ static void __init setup_boot_config(void) pr_info("Load boot config: %d bytes\n", size); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); + /* Also, "init." keys are init arguments */ + extra_init_args = xbc_make_cmdline("init"); } } #else @@ -507,16 +511,18 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } */ static void __init setup_command_line(char *command_line) { - size_t len, xlen = 0; + size_t len, xlen = 0, ilen = 0; if (extra_command_line) xlen = strlen(extra_command_line); + if (extra_init_args) + ilen = strlen(extra_init_args) + 4; /* for " -- " */ len = xlen + strlen(boot_command_line) + 1; - saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES); + saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES); if (!saved_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) @@ -533,6 +539,22 @@ static void __init setup_command_line(char *command_line) } strcpy(saved_command_line + xlen, boot_command_line); strcpy(static_command_line + xlen, command_line); + + if (ilen) { + /* + * Append supplemental init boot args to saved_command_line + * so that user can check what command line options passed + * to init. + */ + len = strlen(saved_command_line); + if (!strstr(boot_command_line, " -- ")) { + strcpy(saved_command_line + len, " -- "); + len += 4; + } else + saved_command_line[len++] = ' '; + + strcpy(saved_command_line + len, extra_init_args); + } } /* @@ -759,6 +781,9 @@ asmlinkage __visible void __init start_kernel(void) if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg); + if (extra_init_args) + parse_args("Setting extra init args", extra_init_args, + NULL, 0, -1, -1, NULL, set_init_arg); /* * These use large bootmem allocations and must precede From 7b9b816f4b9a3513602454b52c77f371388a2485 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:05:06 +0900 Subject: [PATCH 064/658] Documentation: bootconfig: Add a doc for extended boot config Add a documentation for extended boot config under admin-guide, since it is including the syntax of boot config. Link: http://lkml.kernel.org/r/157867230658.17873.9309879174829924324.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 184 +++++++++++++++++++++++ Documentation/admin-guide/index.rst | 1 + MAINTAINERS | 1 + 3 files changed, 186 insertions(+) create mode 100644 Documentation/admin-guide/bootconfig.rst diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst new file mode 100644 index 000000000000..f7475df2a718 --- /dev/null +++ b/Documentation/admin-guide/bootconfig.rst @@ -0,0 +1,184 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +Boot Configuration +================== + +:Author: Masami Hiramatsu + +Overview +======== + +The boot configuration is expanding current kernel cmdline to support +additional key-value data when boot the kernel in an efficient way. +This allows adoministrators to pass a structured-Key config file. + +Config File Syntax +================== + +The boot config syntax is a simple structured key-value. Each key consists +of dot-connected-words, and key and value are connected by "=". The value +has to be terminated by semi-colon (``;``) or newline (``\n``). +For array value, array entries are separated by comma (``,``). :: + +KEY[.WORD[...]] = VALUE[, VALUE2[...]][;] + +Each key word must contain only alphabets, numbers, dash (``-``) or underscore +(``_``). And each value only contains printable characters or spaces except +for delimiters such as semi-colon (``;``), new-line (``\n``), comma (``,``), +hash (``#``) and closing brace (``}``). + +If you want to use those delimiters in a value, you can use either double- +quotes (``"VALUE"``) or single-quotes (``'VALUE'``) to quote it. Note that +you can not escape these quotes. + +There can be a key which doesn't have value or has an empty value. Those keys +are used for checking the key exists or not (like a boolean). + +Key-Value Syntax +---------------- + +The boot config file syntax allows user to merge partially same word keys +by brace. For example:: + + foo.bar.baz = value1 + foo.bar.qux.quux = value2 + +These can be written also in:: + + foo.bar { + baz = value1 + qux.quux = value2 + } + +Or more shorter, written as following:: + + foo.bar { baz = value1; qux.quux = value2 } + +In both styles, same key words are automatically merged when parsing it +at boot time. So you can append similar trees or key-values. + +Comments +-------- + +The config syntax accepts shell-script style comments. The comments start +with hash ("#") until newline ("\n") will be ignored. + +:: + + # comment line + foo = value # value is set to foo. + bar = 1, # 1st element + 2, # 2nd element + 3 # 3rd element + +This is parsed as below:: + + foo = value + bar = 1, 2, 3 + +Note that you can not put a comment between value and delimiter(``,`` or +``;``). This means following config has a syntax error :: + + key = 1 # comment + ,2 + + +/proc/bootconfig +================ + +/proc/bootconfig is a user-space interface of the boot config. +Unlike /proc/cmdline, this file shows the key-value style list. +Each key-value pair is shown in each line with following style:: + + KEY[.WORDS...] = "[VALUE]"[,"VALUE2"...] + + +Boot Kernel With a Boot Config +============================== + +Since the boot configuration file is loaded with initrd, it will be added +to the end of the initrd (initramfs) image file. The Linux kernel decodes +the last part of the initrd image in memory to get the boot configuration +data. +Because of this "piggyback" method, there is no need to change or +update the boot loader and the kernel image itself. + +To do this operation, Linux kernel provides "bootconfig" command under +tools/bootconfig, which allows admin to apply or delete the config file +to/from initrd image. You can build it by follwoing command:: + + # make -C tools/bootconfig + +To add your boot config file to initrd image, run bootconfig as below +(Old data is removed automatically if exists):: + + # tools/bootconfig/bootconfig -a your-config /boot/initrd.img-X.Y.Z + +To remove the config from the image, you can use -d option as below:: + + # tools/bootconfig/bootconfig -d /boot/initrd.img-X.Y.Z + + +C onfig File Limitation +====================== + +Currently the maximum config size size is 32KB and the total key-words (not +key-value entries) must be under 1024 nodes. +Note: this is not the number of entries but nodes, an entry must consume +more than 2 nodes (a key-word and a value). So theoretically, it will be +up to 512 key-value pairs. If keys contains 3 words in average, it can +contain 256 key-value pairs. In most cases, the number of config items +will be under 100 entries and smaller than 8KB, so it would be enough. +If the node number exceeds 1024, parser returns an error even if the file +size is smaller than 32KB. +Anyway, since bootconfig command verifies it when appending a boot config +to initrd image, user can notice it before boot. + + +Bootconfig APIs +=============== + +User can query or loop on key-value pairs, also it is possible to find +a root (prefix) key node and find key-values under that node. + +If you have a key string, you can query the value directly with the key +using xbc_find_value(). If you want to know what keys exist in the SKC +tree, you can use xbc_for_each_key_value() to iterate key-value pairs. +Note that you need to use xbc_array_for_each_value() for accessing +each arraies value, e.g.:: + + vnode = NULL; + xbc_find_value("key.word", &vnode); + if (vnode && xbc_node_is_array(vnode)) + xbc_array_for_each_value(vnode, value) { + printk("%s ", value); + } + +If you want to focus on keys which has a prefix string, you can use +xbc_find_node() to find a node which prefix key words, and iterate +keys under the prefix node with xbc_node_for_each_key_value(). + +But the most typical usage is to get the named value under prefix +or get the named array under prefix as below:: + + root = xbc_find_node("key.prefix"); + value = xbc_node_find_value(root, "option", &vnode); + ... + xbc_node_for_each_array_value(root, "array-option", value, anode) { + ... + } + +This accesses a value of "key.prefix.option" and an array of +"key.prefix.array-option". + +Locking is not needed, since after initialized, the config becomes readonly. +All data and keys must be copied if you need to modify it. + + +Functions and structures +======================== + +.. kernel-doc:: include/linux/bootconfig.h +.. kernel-doc:: lib/bootconfig.c + diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 4405b7485312..9e0f1e3fd152 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -64,6 +64,7 @@ configure specific aspects of kernel behavior to your liking. binderfs binfmt-misc blockdev/index + bootconfig braille-console btmrvl cgroup-v1/index diff --git a/MAINTAINERS b/MAINTAINERS index 903e8a7ed0bf..47873f2e6696 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15777,6 +15777,7 @@ F: lib/bootconfig.c F: fs/proc/bootconfig.c F: include/linux/bootconfig.h F: tools/bootconfig/* +F: Documentation/admin-guide/bootconfig.rst SUN3/3X M: Sam Creasey From d8d0c245a7fdd176e2cf6317b3fddda650059d06 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:05:18 +0900 Subject: [PATCH 065/658] tracing: Apply soft-disabled and filter to tracepoints printk Apply soft-disabled and the filter rule of the trace events to the printk output of tracepoints (a.k.a. tp_printk kernel parameter) as same as trace buffer output. Link: http://lkml.kernel.org/r/157867231876.17873.15825819592284704068.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b4a07d7ed82a..b4294eb020f8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2610,6 +2610,7 @@ static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) { struct trace_event_call *event_call; + struct trace_event_file *file; struct trace_event *event; unsigned long flags; struct trace_iterator *iter = tracepoint_print_iter; @@ -2623,6 +2624,12 @@ static void output_printk(struct trace_event_buffer *fbuffer) !event_call->event.funcs->trace) return; + file = fbuffer->trace_file; + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || + (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && + !filter_match_preds(file->filter, fbuffer->entry))) + return; + event = &fbuffer->trace_file->event_call->event; spin_lock_irqsave(&tracepoint_iter_lock, flags); From 8cfcf15503f607e9597de19afeaa621897ae397e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:05:31 +0900 Subject: [PATCH 066/658] tracing: kprobes: Output kprobe event to printk buffer Since kprobe-events use event_trigger_unlock_commit_regs() directly, that events doesn't show up in printk buffer if "tp_printk" is set. Use trace_event_buffer_commit() in kprobe events so that it can invoke output_printk() as same as other trace events. Link: http://lkml.kernel.org/r/157867233085.17873.5210928676787339604.stgit@devnote2 Signed-off-by: Masami Hiramatsu [ Adjusted data var declaration placement in __kretprobe_trace_func() ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 1 + kernel/trace/trace.c | 4 +-- kernel/trace/trace_events.c | 1 + kernel/trace/trace_kprobe.c | 57 ++++++++++++++++++------------------ 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5f7b2b1fce24..20948ee56f8c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -216,6 +216,7 @@ struct trace_event_buffer { void *entry; unsigned long flags; int pc; + struct pt_regs *regs; }; void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b4294eb020f8..cb850d2c4bfa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2680,9 +2680,9 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); - event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, + event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc); + fbuffer->flags, fbuffer->pc, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a16d1b601c5c..dfb736a964d6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -272,6 +272,7 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, if (!fbuffer->event) return NULL; + fbuffer->regs = NULL; fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 477b6b011e7d..33a6a661904b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1175,35 +1175,35 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct trace_event_file *trace_file) { struct kprobe_trace_entry_head *entry; - struct trace_buffer *buffer; - struct ring_buffer_event *event; - int size, dsize, pc; - unsigned long irq_flags; struct trace_event_call *call = trace_probe_event_call(&tk->tp); + struct trace_event_buffer fbuffer; + int dsize; WARN_ON(call != trace_file->event_call); if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(irq_flags); - pc = preempt_count(); + local_save_flags(fbuffer.flags); + fbuffer.pc = preempt_count(); + fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); - size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, - size, irq_flags, pc); - if (!event) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.flags, fbuffer.pc); + if (!fbuffer.event) return; - entry = ring_buffer_event_data(event); + fbuffer.regs = regs; + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->ip = (unsigned long)tk->rp.kp.addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); - event_trigger_unlock_commit_regs(trace_file, buffer, event, - entry, irq_flags, pc, regs); + trace_event_buffer_commit(&fbuffer); } static void @@ -1223,36 +1223,35 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct trace_event_file *trace_file) { struct kretprobe_trace_entry_head *entry; - struct trace_buffer *buffer; - struct ring_buffer_event *event; - int size, pc, dsize; - unsigned long irq_flags; + struct trace_event_buffer fbuffer; struct trace_event_call *call = trace_probe_event_call(&tk->tp); + int dsize; WARN_ON(call != trace_file->event_call); if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(irq_flags); - pc = preempt_count(); + local_save_flags(fbuffer.flags); + fbuffer.pc = preempt_count(); + fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); - size = sizeof(*entry) + tk->tp.size + dsize; - - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, - size, irq_flags, pc); - if (!event) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.flags, fbuffer.pc); + if (!fbuffer.event) return; - entry = ring_buffer_event_data(event); + fbuffer.regs = regs; + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); - event_trigger_unlock_commit_regs(trace_file, buffer, event, - entry, irq_flags, pc, regs); + trace_event_buffer_commit(&fbuffer); } static void From d8d4c6d0e79c418f8c63f3c82429b1462f196155 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:05:42 +0900 Subject: [PATCH 067/658] tracing: kprobes: Register to dynevent earlier stage Register kprobe event to dynevent in subsys_initcall level. This will allow kernel to register new kprobe events in fs_initcall level via trace_run_command. Link: http://lkml.kernel.org/r/157867234213.17873.18039000024374948737.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 33a6a661904b..8113d6aa7bc5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1685,11 +1685,12 @@ static __init void setup_boot_kprobe_events(void) enable_boot_kprobe_events(); } -/* Make a tracefs interface for controlling probe points */ -static __init int init_kprobe_trace(void) +/* + * Register dynevent at subsys_initcall. This allows kernel to setup kprobe + * events in fs_initcall without tracefs. + */ +static __init int init_kprobe_trace_early(void) { - struct dentry *d_tracer; - struct dentry *entry; int ret; ret = dyn_event_register(&trace_kprobe_ops); @@ -1699,6 +1700,16 @@ static __init int init_kprobe_trace(void) if (register_module_notifier(&trace_kprobe_module_nb)) return -EINVAL; + return 0; +} +subsys_initcall(init_kprobe_trace_early); + +/* Make a tracefs interface for controlling probe points */ +static __init int init_kprobe_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + d_tracer = tracing_init_dentry(); if (IS_ERR(d_tracer)) return 0; From b05e89ae7cf3bcabc52399cb833ecc9aaa51ae04 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:05:53 +0900 Subject: [PATCH 068/658] tracing: Accept different type for synthetic event fields Make the synthetic event accepts a different type field to record. However, the size and signed flag must be same. Link: http://lkml.kernel.org/r/157867235358.17873.61732996461602171.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0454abaeb486..4f4759c6e972 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4110,8 +4110,11 @@ static int check_synth_field(struct synth_event *event, field = event->fields[field_pos]; - if (strcmp(field->type, hist_field->type) != 0) - return -EINVAL; + if (strcmp(field->type, hist_field->type) != 0) { + if (field->size != hist_field->size || + field->is_signed != hist_field->is_signed) + return -EINVAL; + } return 0; } From 48ac9488a597eb6116472b6cc0bd875e245e252c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:06:05 +0900 Subject: [PATCH 069/658] tracing: Add NULL trace-array check in print_synth_event() Add NULL trace-array check in print_synth_event(), because if we enable tp_printk option, iter->tr can be NULL. Link: http://lkml.kernel.org/r/157867236536.17873.12529350542460184019.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 4f4759c6e972..1cb4c4c8e5b7 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -833,7 +833,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, fmt = synth_field_fmt(se->fields[i]->type); /* parameter types */ - if (tr->trace_flags & TRACE_ITER_VERBOSE) + if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) trace_seq_printf(s, "%s ", fmt); snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); From 9c5b9d3d65e485826fb935453f01171b1a337aa8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:06:17 +0900 Subject: [PATCH 070/658] tracing/boot: Add boot-time tracing Setup tracing options via extra boot config in addition to kernel command line. This adds following commands support. These are applied to the global trace instance. - ftrace.options = OPT1[,OPT2...] Enable given ftrace options. - ftrace.trace_clock = CLOCK Set given CLOCK to ftrace's trace_clock. - ftrace.buffer_size = SIZE Configure ftrace buffer size to SIZE. You can use "KB" or "MB" for that SIZE. - ftrace.events = EVENT[, EVENT2...] Enable given events on boot. You can use a wild card in EVENT. - ftrace.tracer = TRACER Set TRACER to current tracer on boot. (e.g. function) Note that this is NOT replacing the kernel parameters, because this boot config based setting is later than that. If you want to trace earlier boot events, you still need kernel parameters. Link: http://lkml.kernel.org/r/157867237723.17873.17494943526320587488.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 9 +++ kernel/trace/Makefile | 1 + kernel/trace/trace.c | 10 ++-- kernel/trace/trace_boot.c | 113 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 kernel/trace/trace_boot.c diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 25a0fcfa7a5d..75326d8ab1af 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -814,6 +814,15 @@ config GCOV_PROFILE_FTRACE Note that on a kernel compiled with this config, ftrace will run significantly slower. +config BOOTTIME_TRACING + bool "Boot-time Tracing support" + depends on BOOT_CONFIG && TRACING + default y + help + Enable developer to setup ftrace subsystem via supplemental + kernel cmdline at boot time for debugging (tracing) driver + initialization and boot process. + endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e63db62225f..395e2db9c742 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -83,6 +83,7 @@ endif obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o +obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cb850d2c4bfa..6c996d1b1687 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -162,7 +162,7 @@ union trace_eval_map_item { static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ -static int tracing_set_tracer(struct trace_array *tr, const char *buf); +int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc); @@ -4747,7 +4747,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) return 0; } -static int trace_set_options(struct trace_array *tr, char *option) +int trace_set_options(struct trace_array *tr, char *option) { char *cmp; int neg = 0; @@ -5647,8 +5647,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, return ret; } -static ssize_t tracing_resize_ring_buffer(struct trace_array *tr, - unsigned long size, int cpu_id) +ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id) { int ret = size; @@ -5727,7 +5727,7 @@ static void add_tracer_options(struct trace_array *tr, struct tracer *t) create_trace_option_files(tr, t); } -static int tracing_set_tracer(struct trace_array *tr, const char *buf) +int tracing_set_tracer(struct trace_array *tr, const char *buf) { struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c new file mode 100644 index 000000000000..4b41310184df --- /dev/null +++ b/kernel/trace/trace_boot.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_boot.c + * Tracing kernel boot-time + */ + +#define pr_fmt(fmt) "trace_boot: " fmt + +#include +#include +#include + +#include "trace.h" + +#define MAX_BUF_LEN 256 + +extern int trace_set_options(struct trace_array *tr, char *option); +extern int tracing_set_tracer(struct trace_array *tr, const char *buf); +extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id); + +static void __init +trace_boot_set_ftrace_options(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + const char *p; + char buf[MAX_BUF_LEN]; + unsigned long v = 0; + + /* Common ftrace options */ + xbc_node_for_each_array_value(node, "options", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) { + pr_err("String is too long: %s\n", p); + continue; + } + + if (trace_set_options(tr, buf) < 0) + pr_err("Failed to set option: %s\n", buf); + } + + p = xbc_node_find_value(node, "trace_clock", NULL); + if (p && *p != '\0') { + if (tracing_set_clock(tr, p) < 0) + pr_err("Failed to set trace clock: %s\n", p); + } + + p = xbc_node_find_value(node, "buffer_size", NULL); + if (p && *p != '\0') { + v = memparse(p, NULL); + if (v < PAGE_SIZE) + pr_err("Buffer size is too small: %s\n", p); + if (tracing_resize_ring_buffer(tr, v, RING_BUFFER_ALL_CPUS) < 0) + pr_err("Failed to resize trace buffer to %s\n", p); + } +} + +#ifdef CONFIG_EVENT_TRACING +extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); + +static void __init +trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p; + + xbc_node_for_each_array_value(node, "events", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) { + pr_err("String is too long: %s\n", p); + continue; + } + + if (ftrace_set_clr_event(tr, buf, 1) < 0) + pr_err("Failed to enable event: %s\n", p); + } +} +#else +#define trace_boot_enable_events(tr, node) do {} while (0) +#endif + +static void __init +trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node) +{ + const char *p; + + p = xbc_node_find_value(node, "tracer", NULL); + if (p && *p != '\0') { + if (tracing_set_tracer(tr, p) < 0) + pr_err("Failed to set given tracer: %s\n", p); + } +} + +static int __init trace_boot_init(void) +{ + struct xbc_node *trace_node; + struct trace_array *tr; + + trace_node = xbc_find_node("ftrace"); + if (!trace_node) + return 0; + + tr = top_trace_array(); + if (!tr) + return 0; + + trace_boot_set_ftrace_options(tr, trace_node); + trace_boot_enable_events(tr, trace_node); + trace_boot_enable_tracer(tr, trace_node); + + return 0; +} + +fs_initcall(trace_boot_init); From 81a59555ff1593642824414267e1859024bd0162 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:06:29 +0900 Subject: [PATCH 071/658] tracing/boot: Add per-event settings Add per-event settings for boottime tracing. User can set filter, actions and enable on each event on boot. The event entries are under ftrace.event.GROUP.EVENT node (note that the option key includes event's group name and event name.) This supports below configs. - ftrace.event.GROUP.EVENT.enable Enables GROUP:EVENT tracing. - ftrace.event.GROUP.EVENT.filter = FILTER Set FILTER rule to the GROUP:EVENT. - ftrace.event.GROUP.EVENT.actions = ACTION[, ACTION2...] Set ACTIONs to the GROUP:EVENT. For example, ftrace.event.sched.sched_process_exec { filter = "pid < 128" enable } this will enable tracing "sched:sched_process_exec" event with "pid < 128" filter. Link: http://lkml.kernel.org/r/157867238942.17873.11177628789184546198.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 60 +++++++++++++++++++++++++++++ kernel/trace/trace_events_trigger.c | 2 +- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 4b41310184df..37524031533e 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -56,6 +56,7 @@ trace_boot_set_ftrace_options(struct trace_array *tr, struct xbc_node *node) #ifdef CONFIG_EVENT_TRACING extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); +extern int trigger_process_regex(struct trace_event_file *file, char *buff); static void __init trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) @@ -74,8 +75,66 @@ trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) pr_err("Failed to enable event: %s\n", p); } } + +static void __init +trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, + struct xbc_node *enode) +{ + struct trace_event_file *file; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p, *group, *event; + + group = xbc_node_get_data(gnode); + event = xbc_node_get_data(enode); + + mutex_lock(&event_mutex); + file = find_event_file(tr, group, event); + if (!file) { + pr_err("Failed to find event: %s:%s\n", group, event); + goto out; + } + + p = xbc_node_find_value(enode, "filter", NULL); + if (p && *p != '\0') { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + pr_err("filter string is too long: %s\n", p); + else if (apply_event_filter(file, buf) < 0) + pr_err("Failed to apply filter: %s\n", buf); + } + + xbc_node_for_each_array_value(enode, "actions", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + pr_err("action string is too long: %s\n", p); + else if (trigger_process_regex(file, buf) < 0) + pr_err("Failed to apply an action: %s\n", buf); + } + + if (xbc_node_find_value(enode, "enable", NULL)) { + if (trace_event_enable_disable(file, 1, 0) < 0) + pr_err("Failed to enable event node: %s:%s\n", + group, event); + } +out: + mutex_unlock(&event_mutex); +} + +static void __init +trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *gnode, *enode; + + node = xbc_node_find_child(node, "event"); + if (!node) + return; + /* per-event key starts with "event.GROUP.EVENT" */ + xbc_node_for_each_child(node, gnode) + xbc_node_for_each_child(gnode, enode) + trace_boot_init_one_event(tr, gnode, enode); +} #else #define trace_boot_enable_events(tr, node) do {} while (0) +#define trace_boot_init_events(tr, node) do {} while (0) #endif static void __init @@ -104,6 +163,7 @@ static int __init trace_boot_init(void) return 0; trace_boot_set_ftrace_options(tr, trace_node); + trace_boot_init_events(tr, trace_node); trace_boot_enable_events(tr, trace_node); trace_boot_enable_tracer(tr, trace_node); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 2cd53ca21b51..d8ada4c6f3f7 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -213,7 +213,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) return ret; } -static int trigger_process_regex(struct trace_event_file *file, char *buff) +int trigger_process_regex(struct trace_event_file *file, char *buff) { char *command, *next = buff; struct event_command *p; From 4d655281eb1bb59fad021c0f68afd033f8d0320d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:06:41 +0900 Subject: [PATCH 072/658] tracing/boot Add kprobe event support Add kprobe event support on event node to boot-time tracing. If the group name of event is "kprobes", the boot-time tracing defines new probe event according to "probes" values. - ftrace.event.kprobes.EVENT.probes = PROBE[, PROBE2...] Defines new kprobe event based on PROBEs. It is able to define multiple probes on one event, but those must have same type of arguments. For example, ftrace.events.kprobes.myevent { probes = "vfs_read $arg1 $arg2"; enable; } This will add kprobes:myevent on vfs_read with the 1st and the 2nd arguments. Link: http://lkml.kernel.org/r/157867240104.17873.9712052065426433111.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 46 +++++++++++++++++++++++++++++++++++++ kernel/trace/trace_kprobe.c | 5 ++++ 2 files changed, 51 insertions(+) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 37524031533e..a11dc60299fb 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -76,6 +76,48 @@ trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) } } +#ifdef CONFIG_KPROBE_EVENTS +extern int trace_kprobe_run_command(const char *command); + +static int __init +trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) +{ + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *val; + char *p; + int len; + + len = snprintf(buf, ARRAY_SIZE(buf) - 1, "p:kprobes/%s ", event); + if (len >= ARRAY_SIZE(buf)) { + pr_err("Event name is too long: %s\n", event); + return -E2BIG; + } + p = buf + len; + len = ARRAY_SIZE(buf) - len; + + xbc_node_for_each_array_value(node, "probes", anode, val) { + if (strlcpy(p, val, len) >= len) { + pr_err("Probe definition is too long: %s\n", val); + return -E2BIG; + } + if (trace_kprobe_run_command(buf) < 0) { + pr_err("Failed to add probe: %s\n", buf); + return -EINVAL; + } + } + + return 0; +} +#else +static inline int __init +trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) +{ + pr_err("Kprobe event is not supported.\n"); + return -ENOTSUPP; +} +#endif + static void __init trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, struct xbc_node *enode) @@ -88,6 +130,10 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, group = xbc_node_get_data(gnode); event = xbc_node_get_data(enode); + if (!strcmp(group, "kprobes")) + if (trace_boot_add_kprobe_event(enode, event) < 0) + return; + mutex_lock(&event_mutex); file = find_event_file(tr, group, event); if (!file) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8113d6aa7bc5..283b7c437440 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -902,6 +902,11 @@ static int create_or_delete_trace_kprobe(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } +int trace_kprobe_run_command(const char *command) +{ + return trace_run_command(command, create_or_delete_trace_kprobe); +} + static int trace_kprobe_release(struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); From 3fbe2d6e1fce255d918b622fb2af22e98364a154 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:06:52 +0900 Subject: [PATCH 073/658] tracing/boot: Add synthetic event support Add synthetic event node support to boot time tracing. The synthetic event is a kind of event node, but the group name is "synthetic". - ftrace.event.synthetic.EVENT.fields = FIELD[, FIELD2...] Defines new synthetic event with FIELDs. Each field should be "type varname". The synthetic node requires "fields" string arraies, which defines the fields as same as tracing/synth_events interface. Link: http://lkml.kernel.org/r/157867241236.17873.12411615143321557709.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 47 ++++++++++++++++++++++++++++++++ kernel/trace/trace_events_hist.c | 5 ++++ 2 files changed, 52 insertions(+) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index a11dc60299fb..3054921b0877 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -118,6 +118,50 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) } #endif +#ifdef CONFIG_HIST_TRIGGERS +extern int synth_event_run_command(const char *command); + +static int __init +trace_boot_add_synth_event(struct xbc_node *node, const char *event) +{ + struct xbc_node *anode; + char buf[MAX_BUF_LEN], *q; + const char *p; + int len, delta, ret; + + len = ARRAY_SIZE(buf); + delta = snprintf(buf, len, "%s", event); + if (delta >= len) { + pr_err("Event name is too long: %s\n", event); + return -E2BIG; + } + len -= delta; q = buf + delta; + + xbc_node_for_each_array_value(node, "fields", anode, p) { + delta = snprintf(q, len, " %s;", p); + if (delta >= len) { + pr_err("fields string is too long: %s\n", p); + return -E2BIG; + } + len -= delta; q += delta; + } + + ret = synth_event_run_command(buf); + if (ret < 0) + pr_err("Failed to add synthetic event: %s\n", buf); + + + return ret; +} +#else +static inline int __init +trace_boot_add_synth_event(struct xbc_node *node, const char *event) +{ + pr_err("Synthetic event is not supported.\n"); + return -ENOTSUPP; +} +#endif + static void __init trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, struct xbc_node *enode) @@ -133,6 +177,9 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, if (!strcmp(group, "kprobes")) if (trace_boot_add_kprobe_event(enode, event) < 0) return; + if (!strcmp(group, "synthetic")) + if (trace_boot_add_synth_event(enode, event) < 0) + return; mutex_lock(&event_mutex); file = find_event_file(tr, group, event); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1cb4c4c8e5b7..8e90f1ada437 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1384,6 +1384,11 @@ static int create_or_delete_synth_event(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } +int synth_event_run_command(const char *command) +{ + return trace_run_command(command, create_or_delete_synth_event); +} + static int synth_event_create(int argc, const char **argv) { const char *name = argv[0]; From 4f712a4d04a4e49167118b41d8ea96df70a98985 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:07:04 +0900 Subject: [PATCH 074/658] tracing/boot: Add instance node support Add instance node support to boot-time tracing. User can set some options and event nodes under instance node. - ftrace.instance.INSTANCE[...] Add new INSTANCE instance. Some options and event nodes are acceptable for instance node. Link: http://lkml.kernel.org/r/157867242413.17873.9814204526141500278.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 43 ++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 3054921b0877..f5db30d25b0b 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -20,7 +20,7 @@ extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id); static void __init -trace_boot_set_ftrace_options(struct trace_array *tr, struct xbc_node *node) +trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) { struct xbc_node *anode; const char *p; @@ -242,6 +242,40 @@ trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node) } } +static void __init +trace_boot_init_one_instance(struct trace_array *tr, struct xbc_node *node) +{ + trace_boot_set_instance_options(tr, node); + trace_boot_init_events(tr, node); + trace_boot_enable_events(tr, node); + trace_boot_enable_tracer(tr, node); +} + +static void __init +trace_boot_init_instances(struct xbc_node *node) +{ + struct xbc_node *inode; + struct trace_array *tr; + const char *p; + + node = xbc_node_find_child(node, "instance"); + if (!node) + return; + + xbc_node_for_each_child(node, inode) { + p = xbc_node_get_data(inode); + if (!p || *p == '\0') + continue; + + tr = trace_array_get_by_name(p); + if (IS_ERR(tr)) { + pr_err("Failed to get trace instance %s\n", p); + continue; + } + trace_boot_init_one_instance(tr, inode); + } +} + static int __init trace_boot_init(void) { struct xbc_node *trace_node; @@ -255,10 +289,9 @@ static int __init trace_boot_init(void) if (!tr) return 0; - trace_boot_set_ftrace_options(tr, trace_node); - trace_boot_init_events(tr, trace_node); - trace_boot_enable_events(tr, trace_node); - trace_boot_enable_tracer(tr, trace_node); + /* Global trace array is also one instance */ + trace_boot_init_one_instance(tr, trace_node); + trace_boot_init_instances(trace_node); return 0; } From 9d15dbbde1048e490f3773e67d7ef7604bab1409 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:07:16 +0900 Subject: [PATCH 075/658] tracing/boot: Add cpu_mask option support Add ftrace.cpumask option support to boot-time tracing. This sets cpumask for each instance. - ftrace.[instance.INSTANCE.]cpumask = CPUMASK; Set the trace cpumask. Note that the CPUMASK should be a string which /tracing_cpumask can accepts. Link: http://lkml.kernel.org/r/157867243625.17873.13613922641273149372.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 42 +++++++++++++++++++++++++++------------ kernel/trace/trace_boot.c | 14 +++++++++++++ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6c996d1b1687..106bbc0988fe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4561,20 +4561,13 @@ out_err: return count; } -static ssize_t -tracing_cpumask_write(struct file *filp, const char __user *ubuf, - size_t count, loff_t *ppos) +int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new) { - struct trace_array *tr = file_inode(filp)->i_private; - cpumask_var_t tracing_cpumask_new; - int err, cpu; + int cpu; - if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) - return -ENOMEM; - - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); - if (err) - goto err_unlock; + if (!tr) + return -EINVAL; local_irq_disable(); arch_spin_lock(&tr->max_lock); @@ -4598,11 +4591,34 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); + + return 0; +} + +static ssize_t +tracing_cpumask_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct trace_array *tr = file_inode(filp)->i_private; + cpumask_var_t tracing_cpumask_new; + int err; + + if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) + return -ENOMEM; + + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + if (err) + goto err_free; + + err = tracing_set_cpumask(tr, tracing_cpumask_new); + if (err) + goto err_free; + free_cpumask_var(tracing_cpumask_new); return count; -err_unlock: +err_free: free_cpumask_var(tracing_cpumask_new); return err; diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index f5db30d25b0b..81d923c16a4d 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -18,6 +18,8 @@ extern int trace_set_options(struct trace_array *tr, char *option); extern int tracing_set_tracer(struct trace_array *tr, const char *buf); extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id); +extern int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new); static void __init trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) @@ -52,6 +54,18 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) if (tracing_resize_ring_buffer(tr, v, RING_BUFFER_ALL_CPUS) < 0) pr_err("Failed to resize trace buffer to %s\n", p); } + + p = xbc_node_find_value(node, "cpumask", NULL); + if (p && *p != '\0') { + cpumask_var_t new_mask; + + if (alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + if (cpumask_parse(p, new_mask) < 0 || + tracing_set_cpumask(tr, new_mask) < 0) + pr_err("Failed to set new CPU mask %s\n", p); + free_cpumask_var(new_mask); + } + } } #ifdef CONFIG_EVENT_TRACING From fe1efe9252f938f0cc624e4ac817c27bcaef6ed0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:07:28 +0900 Subject: [PATCH 076/658] tracing/boot: Add function tracer filter options Add below function-tracer filter options to boot-time tracing. - ftrace.[instance.INSTANCE.]ftrace.filters This will take an array of tracing function filter rules - ftrace.[instance.INSTANCE.]ftrace.notraces This will take an array of NON-tracing function filter rules Link: http://lkml.kernel.org/r/157867244841.17873.10933616628243103561.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 81d923c16a4d..fa9603dc6469 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -244,11 +244,51 @@ trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) #define trace_boot_init_events(tr, node) do {} while (0) #endif +#ifdef CONFIG_DYNAMIC_FTRACE +extern bool ftrace_filter_param __initdata; +extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +static void __init +trace_boot_set_ftrace_filter(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + const char *p; + char *q; + + xbc_node_for_each_array_value(node, "ftrace.filters", anode, p) { + q = kstrdup(p, GFP_KERNEL); + if (!q) + return; + if (ftrace_set_filter(tr->ops, q, strlen(q), 0) < 0) + pr_err("Failed to add %s to ftrace filter\n", p); + else + ftrace_filter_param = true; + kfree(q); + } + xbc_node_for_each_array_value(node, "ftrace.notraces", anode, p) { + q = kstrdup(p, GFP_KERNEL); + if (!q) + return; + if (ftrace_set_notrace(tr->ops, q, strlen(q), 0) < 0) + pr_err("Failed to add %s to ftrace filter\n", p); + else + ftrace_filter_param = true; + kfree(q); + } +} +#else +#define trace_boot_set_ftrace_filter(tr, node) do {} while (0) +#endif + static void __init trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node) { const char *p; + trace_boot_set_ftrace_filter(tr, node); + p = xbc_node_find_value(node, "tracer", NULL); if (p && *p != '\0') { if (tracing_set_tracer(tr, p) < 0) From 47781947947a8f5e5e1205714fda64563d2b8f05 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jan 2020 01:07:40 +0900 Subject: [PATCH 077/658] Documentation: tracing: Add boot-time tracing document Add a documentation about boot-time tracing options in boot config. Link: http://lkml.kernel.org/r/157867246028.17873.8047384554383977870.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 2 + Documentation/trace/boottime-trace.rst | 184 +++++++++++++++++++++++ Documentation/trace/index.rst | 1 + 3 files changed, 187 insertions(+) create mode 100644 Documentation/trace/boottime-trace.rst diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index f7475df2a718..c8f7cd4cf44e 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -1,5 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _bootconfig: + ================== Boot Configuration ================== diff --git a/Documentation/trace/boottime-trace.rst b/Documentation/trace/boottime-trace.rst new file mode 100644 index 000000000000..1d10fdebf1b2 --- /dev/null +++ b/Documentation/trace/boottime-trace.rst @@ -0,0 +1,184 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Boot-time tracing +================= + +:Author: Masami Hiramatsu + +Overview +======== + +Boot-time tracing allows users to trace boot-time process including +device initialization with full features of ftrace including per-event +filter and actions, histograms, kprobe-events and synthetic-events, +and trace instances. +Since kernel cmdline is not enough to control these complex features, +this uses bootconfig file to describe tracing feature programming. + +Options in the Boot Config +========================== + +Here is the list of available options list for boot time tracing in +boot config file [1]_. All options are under "ftrace." or "kernel." +refix. See kernel parameters for the options which starts +with "kernel." prefix [2]_. + +.. [1] See :ref:`Documentation/admin-guide/bootconfig.rst ` +.. [2] See :ref:`Documentation/admin-guide/kernel-parameters.rst ` + +Ftrace Global Options +--------------------- + +Ftrace global options have "kernel." prefix in boot config, which means +these options are passed as a part of kernel legacy command line. + +kernel.tp_printk + Output trace-event data on printk buffer too. + +kernel.dump_on_oops [= MODE] + Dump ftrace on Oops. If MODE = 1 or omitted, dump trace buffer + on all CPUs. If MODE = 2, dump a buffer on a CPU which kicks Oops. + +kernel.traceoff_on_warning + Stop tracing if WARN_ON() occurs. + +kernel.fgraph_max_depth = MAX_DEPTH + Set MAX_DEPTH to maximum depth of fgraph tracer. + +kernel.fgraph_filters = FILTER[, FILTER2...] + Add fgraph tracing function filters. + +kernel.fgraph_notraces = FILTER[, FILTER2...] + Add fgraph non tracing function filters. + + +Ftrace Per-instance Options +--------------------------- + +These options can be used for each instance including global ftrace node. + +ftrace.[instance.INSTANCE.]options = OPT1[, OPT2[...]] + Enable given ftrace options. + +ftrace.[instance.INSTANCE.]trace_clock = CLOCK + Set given CLOCK to ftrace's trace_clock. + +ftrace.[instance.INSTANCE.]buffer_size = SIZE + Configure ftrace buffer size to SIZE. You can use "KB" or "MB" + for that SIZE. + +ftrace.[instance.INSTANCE.]alloc_snapshot + Allocate snapshot buffer. + +ftrace.[instance.INSTANCE.]cpumask = CPUMASK + Set CPUMASK as trace cpu-mask. + +ftrace.[instance.INSTANCE.]events = EVENT[, EVENT2[...]] + Enable given events on boot. You can use a wild card in EVENT. + +ftrace.[instance.INSTANCE.]tracer = TRACER + Set TRACER to current tracer on boot. (e.g. function) + +ftrace.[instance.INSTANCE.]ftrace.filters + This will take an array of tracing function filter rules + +ftrace.[instance.INSTANCE.]ftrace.notraces + This will take an array of NON-tracing function filter rules + + +Ftrace Per-Event Options +------------------------ + +These options are setting per-event options. + +ftrace.[instance.INSTANCE.]event.GROUP.EVENT.enable + Enables GROUP:EVENT tracing. + +ftrace.[instance.INSTANCE.]event.GROUP.EVENT.filter = FILTER + Set FILTER rule to the GROUP:EVENT. + +ftrace.[instance.INSTANCE.]event.GROUP.EVENT.actions = ACTION[, ACTION2[...]] + Set ACTIONs to the GROUP:EVENT. + +ftrace.[instance.INSTANCE.]event.kprobes.EVENT.probes = PROBE[, PROBE2[...]] + Defines new kprobe event based on PROBEs. It is able to define + multiple probes on one event, but those must have same type of + arguments. This option is available only for the event which + group name is "kprobes". + +ftrace.[instance.INSTANCE.]event.synthetic.EVENT.fields = FIELD[, FIELD2[...]] + Defines new synthetic event with FIELDs. Each field should be + "type varname". + +Note that kprobe and synthetic event definitions can be written under +instance node, but those are also visible from other instances. So please +take care for event name conflict. + + +Examples +======== + +For example, to add filter and actions for each event, define kprobe +events, and synthetic events with histogram, write a boot config like +below:: + + ftrace.event { + task.task_newtask { + filter = "pid < 128" + enable + } + kprobes.vfs_read { + probes = "vfs_read $arg1 $arg2" + filter = "common_pid < 200" + enable + } + synthetic.initcall_latency { + fields = "unsigned long func", "u64 lat" + actions = "hist:keys=func.sym,lat:vals=lat:sort=lat" + } + initcall.initcall_start { + actions = "hist:keys=func:ts0=common_timestamp.usecs" + } + initcall.initcall_finish { + actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)" + } + } + +Also, boottime tracing supports "instance" node, which allows us to run +several tracers for different purpose at once. For example, one tracer +is for tracing functions start with "user\_", and others tracing "kernel\_" +functions, you can write boot config as below:: + + ftrace.instance { + foo { + tracer = "function" + ftrace.filters = "user_*" + } + bar { + tracer = "function" + ftrace.filters = "kernel_*" + } + } + +The instance node also accepts event nodes so that each instance +can customize its event tracing. + +This boot-time tracing also supports ftrace kernel parameters via boot +config. +For example, following kernel parameters:: + + trace_options=sym-addr trace_event=initcall:* tp_printk trace_buf_size=1M ftrace=function ftrace_filter="vfs*" + +This can be written in boot config like below:: + + kernel { + trace_options = sym-addr + trace_event = "initcall:*" + tp_printk + trace_buf_size = 1M + ftrace = function + ftrace_filter = "vfs*" + } + +Note that parameters start with "kernel" prefix instead of "ftrace". diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 04acd277c5f6..fa9e1c730f6a 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -19,6 +19,7 @@ Linux Tracing Technologies events-msr mmiotrace histogram + boottime-trace hwlat_detector intel_th stm From 3b42a4c83a31d8f1d8a7cb7eb2f4ee809d42c69d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Dec 2019 11:31:43 +0900 Subject: [PATCH 078/658] tracing: trigger: Replace unneeded RCU-list traversals With CONFIG_PROVE_RCU_LIST, I had many suspicious RCU warnings when I ran ftracetest trigger testcases. ----- # dmesg -c > /dev/null # ./ftracetest test.d/trigger ... # dmesg | grep "RCU-list traversed" | cut -f 2 -d ] | cut -f 2 -d " " kernel/trace/trace_events_hist.c:6070 kernel/trace/trace_events_hist.c:1760 kernel/trace/trace_events_hist.c:5911 kernel/trace/trace_events_trigger.c:504 kernel/trace/trace_events_hist.c:1810 kernel/trace/trace_events_hist.c:3158 kernel/trace/trace_events_hist.c:3105 kernel/trace/trace_events_hist.c:5518 kernel/trace/trace_events_hist.c:5998 kernel/trace/trace_events_hist.c:6019 kernel/trace/trace_events_hist.c:6044 kernel/trace/trace_events_trigger.c:1500 kernel/trace/trace_events_trigger.c:1540 kernel/trace/trace_events_trigger.c:539 kernel/trace/trace_events_trigger.c:584 ----- I investigated those warnings and found that the RCU-list traversals in event trigger and hist didn't need to use RCU version because those were called only under event_mutex. I also checked other RCU-list traversals related to event trigger list, and found that most of them were called from event_hist_trigger_func() or hist_unregister_trigger() or register/unregister functions except for a few cases. Replace these unneeded RCU-list traversals with normal list traversal macro and lockdep_assert_held() to check the event_mutex is held. Link: http://lkml.kernel.org/r/157680910305.11685.15110237954275915782.stgit@devnote2 Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 41 +++++++++++++++++++++-------- kernel/trace/trace_events_trigger.c | 20 ++++++++++---- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 8e90f1ada437..117a1202a6b9 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1771,11 +1771,13 @@ static struct hist_field *find_var(struct hist_trigger_data *hist_data, struct event_trigger_data *test; struct hist_field *hist_field; + lockdep_assert_held(&event_mutex); + hist_field = find_var_field(hist_data, var_name); if (hist_field) return hist_field; - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { test_data = test->private_data; hist_field = find_var_field(test_data, var_name); @@ -1825,7 +1827,9 @@ static struct hist_field *find_file_var(struct trace_event_file *file, struct event_trigger_data *test; struct hist_field *hist_field; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { test_data = test->private_data; hist_field = find_var_field(test_data, var_name); @@ -3120,7 +3124,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, { struct event_trigger_data *test; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (test->private_data == hist_data) return test->filter_str; @@ -3171,9 +3177,11 @@ find_compatible_hist(struct hist_trigger_data *target_hist_data, struct event_trigger_data *test; unsigned int n_keys; + lockdep_assert_held(&event_mutex); + n_keys = target_hist_data->n_fields - target_hist_data->n_vals; - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { hist_data = test->private_data; @@ -5536,7 +5544,7 @@ static int hist_show(struct seq_file *m, void *v) goto out_unlock; } - list_for_each_entry_rcu(data, &event_file->triggers, list) { + list_for_each_entry(data, &event_file->triggers, list) { if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) hist_trigger_show(m, data, n++); } @@ -5929,7 +5937,9 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (hist_data->attrs->name && !named_data) goto new; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -6013,10 +6023,12 @@ static bool have_hist_trigger_match(struct event_trigger_data *data, struct event_trigger_data *test, *named_data = NULL; bool match = false; + lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (hist_trigger_match(data, test, named_data, false)) { match = true; @@ -6034,10 +6046,12 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data, struct hist_trigger_data *hist_data = data->private_data; struct event_trigger_data *test, *named_data = NULL; + lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -6059,10 +6073,12 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, *named_data = NULL; bool unregistered = false; + lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -6088,7 +6104,9 @@ static bool hist_file_check_refs(struct trace_event_file *file) struct hist_trigger_data *hist_data; struct event_trigger_data *test; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { hist_data = test->private_data; if (check_var_refs(hist_data)) @@ -6331,7 +6349,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec, struct enable_trigger_data *enable_data = data->private_data; struct event_trigger_data *test; - list_for_each_entry_rcu(test, &enable_data->file->triggers, list) { + list_for_each_entry_rcu(test, &enable_data->file->triggers, list, + lockdep_is_held(&event_mutex)) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (enable_data->enable) test->paused = false; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d8ada4c6f3f7..60959c31791d 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -501,7 +501,9 @@ void update_cond_flag(struct trace_event_file *file) struct event_trigger_data *data; bool set_cond = false; - list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { if (data->filter || event_command_post_trigger(data->cmd_ops) || event_command_needs_rec(data->cmd_ops)) { set_cond = true; @@ -536,7 +538,9 @@ static int register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test; int ret = 0; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) { ret = -EEXIST; goto out; @@ -581,7 +585,9 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data; bool unregistered = false; - list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { unregistered = true; list_del_rcu(&data->list); @@ -1497,7 +1503,9 @@ int event_enable_register_trigger(char *glob, struct event_trigger_data *test; int ret = 0; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { test_enable_data = test->private_data; if (test_enable_data && (test->cmd_ops->trigger_type == @@ -1537,7 +1545,9 @@ void event_enable_unregister_trigger(char *glob, struct event_trigger_data *data; bool unregistered = false; - list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { enable_data = data->private_data; if (enable_data && (data->cmd_ops->trigger_type == From fe402bd0904979ca4417c0d499b471484e588a9e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 8 Jan 2020 00:29:01 +0100 Subject: [PATCH 079/658] i2c: meson: implement the master_xfer_atomic callback Boards with some of the 32-bit SoCs (mostly Meson8 and Meson8m2) use a Ricoh RN5T618 PMU which acts as system power controller. The driver for the system power controller may need to the I2C bus just before shutting down or rebooting the system. At this stage the interrupts may be disabled already. Implement the master_xfer_atomic callback so the driver for the RN5T618 PMU can communicate properly with the PMU when shutting down or rebooting the board. The CTRL register has a status bit which can be polled to determine when processing has completed. According to the public S805 datasheet the value 0 means "idle" and 1 means "running". Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong [wsa: converted some whitespace alignment] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-meson.c | 97 +++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 32 deletions(-) diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index 1e2647f9a2a7..06b3bed78421 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -213,6 +214,30 @@ static void meson_i2c_prepare_xfer(struct meson_i2c *i2c) writel(i2c->tokens[1], i2c->regs + REG_TOK_LIST1); } +static void meson_i2c_transfer_complete(struct meson_i2c *i2c, u32 ctrl) +{ + if (ctrl & REG_CTRL_ERROR) { + /* + * The bit is set when the IGNORE_NAK bit is cleared + * and the device didn't respond. In this case, the + * I2C controller automatically generates a STOP + * condition. + */ + dev_dbg(i2c->dev, "error bit set\n"); + i2c->error = -ENXIO; + i2c->state = STATE_IDLE; + } else { + if (i2c->state == STATE_READ && i2c->count) + meson_i2c_get_data(i2c, i2c->msg->buf + i2c->pos, + i2c->count); + + i2c->pos += i2c->count; + + if (i2c->pos >= i2c->msg->len) + i2c->state = STATE_IDLE; + } +} + static irqreturn_t meson_i2c_irq(int irqno, void *dev_id) { struct meson_i2c *i2c = dev_id; @@ -232,27 +257,9 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id) return IRQ_NONE; } - if (ctrl & REG_CTRL_ERROR) { - /* - * The bit is set when the IGNORE_NAK bit is cleared - * and the device didn't respond. In this case, the - * I2C controller automatically generates a STOP - * condition. - */ - dev_dbg(i2c->dev, "error bit set\n"); - i2c->error = -ENXIO; - i2c->state = STATE_IDLE; - complete(&i2c->done); - goto out; - } + meson_i2c_transfer_complete(i2c, ctrl); - if (i2c->state == STATE_READ && i2c->count) - meson_i2c_get_data(i2c, i2c->msg->buf + i2c->pos, i2c->count); - - i2c->pos += i2c->count; - - if (i2c->pos >= i2c->msg->len) { - i2c->state = STATE_IDLE; + if (i2c->state == STATE_IDLE) { complete(&i2c->done); goto out; } @@ -279,10 +286,11 @@ static void meson_i2c_do_start(struct meson_i2c *i2c, struct i2c_msg *msg) } static int meson_i2c_xfer_msg(struct meson_i2c *i2c, struct i2c_msg *msg, - int last) + int last, bool atomic) { unsigned long time_left, flags; int ret = 0; + u32 ctrl; i2c->msg = msg; i2c->last = last; @@ -300,13 +308,24 @@ static int meson_i2c_xfer_msg(struct meson_i2c *i2c, struct i2c_msg *msg, i2c->state = (msg->flags & I2C_M_RD) ? STATE_READ : STATE_WRITE; meson_i2c_prepare_xfer(i2c); - reinit_completion(&i2c->done); + + if (!atomic) + reinit_completion(&i2c->done); /* Start the transfer */ meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_START, REG_CTRL_START); - time_left = msecs_to_jiffies(I2C_TIMEOUT_MS); - time_left = wait_for_completion_timeout(&i2c->done, time_left); + if (atomic) { + ret = readl_poll_timeout_atomic(i2c->regs + REG_CTRL, ctrl, + !(ctrl & REG_CTRL_STATUS), + 10, I2C_TIMEOUT_MS * 1000); + } else { + time_left = msecs_to_jiffies(I2C_TIMEOUT_MS); + time_left = wait_for_completion_timeout(&i2c->done, time_left); + + if (!time_left) + ret = -ETIMEDOUT; + } /* * Protect access to i2c struct and registers from interrupt @@ -315,13 +334,14 @@ static int meson_i2c_xfer_msg(struct meson_i2c *i2c, struct i2c_msg *msg, */ spin_lock_irqsave(&i2c->lock, flags); + if (atomic && !ret) + meson_i2c_transfer_complete(i2c, ctrl); + /* Abort any active operation */ meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_START, 0); - if (!time_left) { + if (ret) i2c->state = STATE_IDLE; - ret = -ETIMEDOUT; - } if (i2c->error) ret = i2c->error; @@ -331,8 +351,8 @@ static int meson_i2c_xfer_msg(struct meson_i2c *i2c, struct i2c_msg *msg, return ret; } -static int meson_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num) +static int meson_i2c_xfer_messages(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num, bool atomic) { struct meson_i2c *i2c = adap->algo_data; int i, ret = 0; @@ -340,7 +360,7 @@ static int meson_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, clk_enable(i2c->clk); for (i = 0; i < num; i++) { - ret = meson_i2c_xfer_msg(i2c, msgs + i, i == num - 1); + ret = meson_i2c_xfer_msg(i2c, msgs + i, i == num - 1, atomic); if (ret) break; } @@ -350,14 +370,27 @@ static int meson_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, return ret ?: i; } +static int meson_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num) +{ + return meson_i2c_xfer_messages(adap, msgs, num, false); +} + +static int meson_i2c_xfer_atomic(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return meson_i2c_xfer_messages(adap, msgs, num, true); +} + static u32 meson_i2c_func(struct i2c_adapter *adap) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static const struct i2c_algorithm meson_i2c_algorithm = { - .master_xfer = meson_i2c_xfer, - .functionality = meson_i2c_func, + .master_xfer = meson_i2c_xfer, + .master_xfer_atomic = meson_i2c_xfer_atomic, + .functionality = meson_i2c_func, }; static int meson_i2c_probe(struct platform_device *pdev) From 821e2c705fb9d4b7f54fd17e7a13614ee515a181 Mon Sep 17 00:00:00 2001 From: Khouloud Touil Date: Tue, 7 Jan 2020 10:29:21 +0100 Subject: [PATCH 080/658] dt-bindings: at25: add reference for the wp-gpios property As the at25 uses the NVMEM subsystem, and the property is now being handled, adding reference for it in the device tree binding document, which allows to specify the GPIO line to which the write-protect pin is connected. Signed-off-by: Khouloud Touil Reviewed-by: Rob Herring Acked-by: Greg Kroah-Hartman Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at25.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/eeprom/at25.txt b/Documentation/devicetree/bindings/eeprom/at25.txt index 42577dd113dd..fcacd97abd0a 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.txt +++ b/Documentation/devicetree/bindings/eeprom/at25.txt @@ -20,6 +20,7 @@ Optional properties: - spi-cpha : SPI shifted clock phase, as per spi-bus bindings. - spi-cpol : SPI inverse clock polarity, as per spi-bus bindings. - read-only : this parameter-less property disables writes to the eeprom +- wp-gpios : GPIO to which the write-protect pin of the chip is connected Obsolete legacy properties can be used in place of "size", "pagesize", "address-width", and "read-only": @@ -36,6 +37,7 @@ Example: spi-max-frequency = <5000000>; spi-cpha; spi-cpol; + wp-gpios = <&gpio1 3 0>; pagesize = <64>; size = <32768>; From 59e7cffe5cca6f15c21d394492fa4739172de1c5 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 5 Jun 2014 20:22:05 +0200 Subject: [PATCH 081/658] ring-bufer: kernel-doc warning fixes Also fixes a couple of typos Link: http://lkml.kernel.org/r/1401992525-10417-1-git-send-email-fabf@skynet.be Cc: Andrew Morton Signed-off-by: Fabian Frederick [ Found this deep in the abyss of my INBOX ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f846de2aa435..46d67ff68795 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1368,6 +1368,7 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. + * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data @@ -4331,6 +4332,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read); /** * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. + * @cpu: The CPU to get ring buffer size from. */ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { @@ -4504,6 +4506,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers * @buffer_a: One buffer to swap with * @buffer_b: The other buffer to swap with + * @cpu: the CPU of the buffers to swap * * This function is useful for tracers that want to take a "snapshot" * of a CPU buffer and has another back up buffer lying around. From cfc585a401764f0d352602d614c19866bb84738a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 14 Jan 2020 16:27:51 -0500 Subject: [PATCH 082/658] ring-buffer: Fix kernel doc for rb_update_event() rb_update_event has changed without the kernel-doc update. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 46d67ff68795..3bab9b0a90b6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2331,11 +2331,11 @@ static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, /** * rb_update_event - update event type and data + * @cpu_buffer: The per cpu buffer of the @event * @event: the event to update - * @type: the type of event - * @length: the size of the event field in the ring buffer + * @info: The info to update the @event with (contains length and delta) * - * Update the type and data fields of the event. The length + * Update the type and data fields of the @event. The length * is the actual size that is written to the ring buffer, * and with this, we can determine what to place into the * data field. From 444a52960c0f109daf78e50e2a07d1f0786e2052 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:49 -0500 Subject: [PATCH 083/658] saner calling conventions for nfs_fs_mount_common() Allow it to take ERR_PTR() for server and return ERR_CAST() of it in such case. All callers used to open-code that... Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/nfs4super.c | 16 +--------------- fs/nfs/super.c | 11 ++++------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 2c9cbade561a..beeaed872e6c 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -110,21 +110,12 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, { struct nfs_mount_info *mount_info = info; struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ server = nfs4_create_server(mount_info, &nfs_v4); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); - -out: - return mntroot; + return nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); } static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, @@ -280,11 +271,6 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, /* create a new volume representation */ server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); out: nfs_free_fhandle(mount_info.mntfh); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8d8d04bb9d64..f074c3773f0e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1903,9 +1903,6 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, else server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); - if (IS_ERR(server)) - return ERR_CAST(server); - return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } EXPORT_SYMBOL_GPL(nfs_try_mount); @@ -2666,6 +2663,9 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, }; int error; + if (IS_ERR(server)) + return ERR_CAST(server); + if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; @@ -2814,10 +2814,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, /* create a new volume representation */ server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - if (IS_ERR(server)) - mntroot = ERR_CAST(server); - else - mntroot = nfs_fs_mount_common(server, flags, + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount() = %ld\n", From d0b779d47c92fa17a31cde312d05155941e26651 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:50 -0500 Subject: [PATCH 084/658] nfs: stash server into struct nfs_mount_info Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 3 ++- fs/nfs/nfs4super.c | 10 ++++------ fs/nfs/super.c | 19 ++++++++----------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24a65da58aa9..8f4900bd04f7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -142,6 +142,7 @@ struct nfs_mount_info { int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; + struct nfs_server *server; struct nfs_fh *mntfh; }; @@ -397,7 +398,7 @@ struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, +struct dentry *nfs_fs_mount_common(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index beeaed872e6c..e7f2fd1925b1 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -109,13 +109,12 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *info) { struct nfs_mount_info *mount_info = info; - struct nfs_server *server; mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ - server = nfs4_create_server(mount_info, &nfs_v4); - return nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); + mount_info->server = nfs4_create_server(mount_info, &nfs_v4); + return nfs_fs_mount_common(flags, dev_name, mount_info, &nfs_v4); } static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, @@ -260,7 +259,6 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, .set_security = nfs_clone_sb_security, .cloned = raw_data, }; - struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); dprintk("--> nfs4_referral_get_sb()\n"); @@ -270,8 +268,8 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, goto out; /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); + mount_info.server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); + mntroot = nfs_fs_mount_common(flags, dev_name, &mount_info, &nfs_v4); out: nfs_free_fhandle(mount_info.mntfh); return mntroot; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f074c3773f0e..379c7b26051d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1896,14 +1896,12 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { - struct nfs_server *server; - if (mount_info->parsed->need_mount) - server = nfs_try_mount_request(mount_info, nfs_mod); + mount_info->server = nfs_try_mount_request(mount_info, nfs_mod); else - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + mount_info->server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); - return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); + return nfs_fs_mount_common(flags, dev_name, mount_info, nfs_mod); } EXPORT_SYMBOL_GPL(nfs_try_mount); @@ -2649,20 +2647,21 @@ static void nfs_set_readahead(struct backing_dev_info *bdi, bdi->io_pages = iomax_pages; } -struct dentry *nfs_fs_mount_common(struct nfs_server *server, - int flags, const char *dev_name, +struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_server *server = mount_info->server; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, .server = server, }; int error; + mount_info->server = NULL; if (IS_ERR(server)) return ERR_CAST(server); @@ -2803,7 +2802,6 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, .set_security = nfs_clone_sb_security, .cloned = data, }; - struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; @@ -2812,10 +2810,9 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ - server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); + mount_info.server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - mntroot = nfs_fs_mount_common(server, flags, - dev_name, &mount_info, nfs_mod); + mntroot = nfs_fs_mount_common(flags, dev_name, &mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount() = %ld\n", IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); From 7643c12e955740ac08abcd159f4205cf0c3eef4e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:51 -0500 Subject: [PATCH 085/658] nfs: lift setting mount_info from nfs4_remote{,_referral}_mount Do that (fhandle allocation, setting struct server up) in nfs4_referral_mount() and nfs4_try_mount() resp. and pass the server and pointer to mount_info into nfs_do_root_mount() so that nfs4_remote_referral_mount()/nfs_remote_mount() could be merged. Since we are moving stuff from ->mount() instances to the points prior to vfs_kern_mount() that would trigger those, we need to make sure that do_nfs_root_mount() will do the corresponding cleanup itself if it doesn't trigger those ->mount() instances. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/nfs4super.c | 67 ++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index e7f2fd1925b1..ac3e8928643d 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -108,32 +108,37 @@ static struct dentry * nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *info) { - struct nfs_mount_info *mount_info = info; - - mount_info->set_security = nfs_set_sb_security; - - /* Get a volume representation */ - mount_info->server = nfs4_create_server(mount_info, &nfs_v4); - return nfs_fs_mount_common(flags, dev_name, mount_info, &nfs_v4); + return nfs_fs_mount_common(flags, dev_name, info, &nfs_v4); } static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) + struct nfs_server *server, int flags, + struct nfs_mount_info *info, + const char *hostname) { struct vfsmount *root_mnt; char *root_devname; size_t len; + if (IS_ERR(server)) + return ERR_CAST(server); + len = strlen(hostname) + 5; root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) + if (root_devname == NULL) { + nfs_free_server(server); return ERR_PTR(-ENOMEM); + } /* Does hostname needs to be enclosed in brackets? */ if (strchr(hostname, ':')) snprintf(root_devname, len, "[%s]:/", hostname); else snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + info->server = server; + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, info); + if (info->server) + nfs_free_server(info->server); + info->server = NULL; kfree(root_devname); return root_mnt; } @@ -234,11 +239,15 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, struct dentry *res; struct nfs_parsed_mount_data *data = mount_info->parsed; + mount_info->set_security = nfs_set_sb_security; + dfprintk(MOUNT, "--> nfs4_try_mount()\n"); export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, + nfs4_create_server(mount_info, &nfs_v4), + flags, mount_info, data->nfs_server.hostname); data->nfs_server.export_path = export_path; @@ -254,25 +263,7 @@ static struct dentry * nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - dprintk("--> nfs4_referral_get_sb()\n"); - - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mount_info.mntfh == NULL) - goto out; - - /* create a new volume representation */ - mount_info.server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - mntroot = nfs_fs_mount_common(flags, dev_name, &mount_info, &nfs_v4); -out: - nfs_free_fhandle(mount_info.mntfh); - return mntroot; + return nfs_fs_mount_common(flags, dev_name, raw_data, &nfs_v4); } /* @@ -282,23 +273,35 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_clone_mount *data = raw_data; + struct nfs_mount_info mount_info = { + .fill_super = nfs_fill_super, + .set_security = nfs_clone_sb_security, + .cloned = data, + }; char *export_path; struct vfsmount *root_mnt; struct dentry *res; dprintk("--> nfs4_referral_mount()\n"); + mount_info.mntfh = nfs_alloc_fhandle(); + if (!mount_info.mntfh) + return ERR_PTR(-ENOMEM); + export_path = data->mnt_path; data->mnt_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); + nfs4_create_referral_server(mount_info.cloned, + mount_info.mntfh), + flags, &mount_info, data->hostname); data->mnt_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); dprintk("<-- nfs4_referral_mount() = %d%s\n", PTR_ERR_OR_ZERO(res), IS_ERR(res) ? " [error]" : ""); + + nfs_free_fhandle(mount_info.mntfh); return res; } From 15a9c4eff67298af0b6c5976ede86afa1bcf780a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:52 -0500 Subject: [PATCH 086/658] nfs: fold nfs4_remote_fs_type and nfs4_remote_referral_fs_type They are identical now. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/nfs4super.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index ac3e8928643d..54dbb4561ccc 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -22,8 +22,6 @@ static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, @@ -33,14 +31,6 @@ static struct file_system_type nfs4_remote_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; -static struct file_system_type nfs4_remote_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; - struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -111,8 +101,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, return nfs_fs_mount_common(flags, dev_name, info, &nfs_v4); } -static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - struct nfs_server *server, int flags, +static struct vfsmount *nfs_do_root_mount(struct nfs_server *server, int flags, struct nfs_mount_info *info, const char *hostname) { @@ -135,7 +124,7 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, else snprintf(root_devname, len, "%s:/", hostname); info->server = server; - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, info); + root_mnt = vfs_kern_mount(&nfs4_remote_fs_type, flags, root_devname, info); if (info->server) nfs_free_server(info->server); info->server = NULL; @@ -245,7 +234,7 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, + root_mnt = nfs_do_root_mount( nfs4_create_server(mount_info, &nfs_v4), flags, mount_info, data->nfs_server.hostname); @@ -259,13 +248,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -static struct dentry * -nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - return nfs_fs_mount_common(flags, dev_name, raw_data, &nfs_v4); -} - /* * Create an NFS4 server record on referral traversal */ @@ -290,7 +272,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, export_path = data->mnt_path; data->mnt_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + root_mnt = nfs_do_root_mount( nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh), flags, &mount_info, data->hostname); From 6654f8e24668c18bfd30a0430d3d94e8c168d8a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:53 -0500 Subject: [PATCH 087/658] nfs: don't bother setting/restoring export_path around do_nfs_root_mount() nothing in it will be looking at that thing anyway Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/nfs4super.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 54dbb4561ccc..8146edf4ce3a 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -233,12 +233,10 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); export_path = data->nfs_server.export_path; - data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount( nfs4_create_server(mount_info, &nfs_v4), flags, mount_info, data->nfs_server.hostname); - data->nfs_server.export_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); @@ -271,12 +269,10 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, return ERR_PTR(-ENOMEM); export_path = data->mnt_path; - data->mnt_path = "/"; root_mnt = nfs_do_root_mount( nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh), flags, &mount_info, data->hostname); - data->mnt_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); dprintk("<-- nfs4_referral_mount() = %d%s\n", From 4e357761bd44bbba3c3af226c2cc216beff0935c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:54 -0500 Subject: [PATCH 088/658] nfs4: fold nfs_do_root_mount/nfs_follow_remote_path Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/nfs4super.c | 88 +++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 51 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 8146edf4ce3a..c489942b9069 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -101,37 +101,6 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, return nfs_fs_mount_common(flags, dev_name, info, &nfs_v4); } -static struct vfsmount *nfs_do_root_mount(struct nfs_server *server, int flags, - struct nfs_mount_info *info, - const char *hostname) -{ - struct vfsmount *root_mnt; - char *root_devname; - size_t len; - - if (IS_ERR(server)) - return ERR_CAST(server); - - len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) { - nfs_free_server(server); - return ERR_PTR(-ENOMEM); - } - /* Does hostname needs to be enclosed in brackets? */ - if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); - else - snprintf(root_devname, len, "%s:/", hostname); - info->server = server; - root_mnt = vfs_kern_mount(&nfs4_remote_fs_type, flags, root_devname, info); - if (info->server) - nfs_free_server(info->server); - info->server = NULL; - kfree(root_devname); - return root_mnt; -} - struct nfs_referral_count { struct list_head list; const struct task_struct *task; @@ -198,11 +167,38 @@ static void nfs_referral_loop_unprotect(void) kfree(p); } -static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, - const char *export_path) +static struct dentry *do_nfs4_mount(struct nfs_server *server, int flags, + struct nfs_mount_info *info, + const char *hostname, + const char *export_path) { + struct vfsmount *root_mnt; struct dentry *dentry; + char *root_devname; int err; + size_t len; + + if (IS_ERR(server)) + return ERR_CAST(server); + + len = strlen(hostname) + 5; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) { + nfs_free_server(server); + return ERR_PTR(-ENOMEM); + } + + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + snprintf(root_devname, len, "[%s]:/", hostname); + else + snprintf(root_devname, len, "%s:/", hostname); + info->server = server; + root_mnt = vfs_kern_mount(&nfs4_remote_fs_type, flags, root_devname, info); + if (info->server) + nfs_free_server(info->server); + info->server = NULL; + kfree(root_devname); if (IS_ERR(root_mnt)) return ERR_CAST(root_mnt); @@ -223,22 +219,17 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; struct nfs_parsed_mount_data *data = mount_info->parsed; + struct dentry *res; mount_info->set_security = nfs_set_sb_security; dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - export_path = data->nfs_server.export_path; - root_mnt = nfs_do_root_mount( - nfs4_create_server(mount_info, &nfs_v4), - flags, mount_info, - data->nfs_server.hostname); - - res = nfs_follow_remote_path(root_mnt, export_path); + res = do_nfs4_mount(nfs4_create_server(mount_info, &nfs_v4), + flags, mount_info, + data->nfs_server.hostname, + data->nfs_server.export_path); dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", PTR_ERR_OR_ZERO(res), @@ -258,8 +249,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, .set_security = nfs_clone_sb_security, .cloned = data, }; - char *export_path; - struct vfsmount *root_mnt; struct dentry *res; dprintk("--> nfs4_referral_mount()\n"); @@ -268,13 +257,10 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, if (!mount_info.mntfh) return ERR_PTR(-ENOMEM); - export_path = data->mnt_path; - root_mnt = nfs_do_root_mount( - nfs4_create_referral_server(mount_info.cloned, - mount_info.mntfh), - flags, &mount_info, data->hostname); + res = do_nfs4_mount(nfs4_create_referral_server(mount_info.cloned, + mount_info.mntfh), + flags, &mount_info, data->hostname, data->mnt_path); - res = nfs_follow_remote_path(root_mnt, export_path); dprintk("<-- nfs4_referral_mount() = %d%s\n", PTR_ERR_OR_ZERO(res), IS_ERR(res) ? " [error]" : ""); From 250d69f6a465e36a6ab8602030267de2586e028f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:55 -0500 Subject: [PATCH 089/658] nfs: lift setting mount_info from nfs_xdev_mount() Do it in nfs_do_submount() instead. As a side benefit, nfs_clone_data doesn't need ->fh and ->fattr anymore. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 3 +-- fs/nfs/namespace.c | 35 +++++++++++++++++++++-------------- fs/nfs/super.c | 25 ++++--------------------- 3 files changed, 26 insertions(+), 37 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8f4900bd04f7..b193dd626c0a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -34,8 +34,6 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; - struct nfs_fh *fh; - struct nfs_fattr *fattr; char *hostname; char *mnt_path; struct sockaddr *addr; @@ -405,6 +403,7 @@ struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); void nfs_kill_super(struct super_block *); void nfs_fill_super(struct super_block *, struct nfs_mount_info *); +void nfs_clone_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5e0e9d29f5c5..a76aeb0c2923 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -19,6 +19,7 @@ #include #include #include "internal.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -213,16 +214,6 @@ void nfs_release_automount_timer(void) cancel_delayed_work(&nfs_automount_task); } -/* - * Clone a mountpoint of the appropriate type - */ -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, - const char *devname, - struct nfs_clone_mount *mountdata) -{ - return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); -} - /** * nfs_do_submount - set up mountpoint when crossing a filesystem boundary * @dentry: parent directory @@ -234,13 +225,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, rpc_authflavor_t authflavor) { + struct super_block *sb = dentry->d_sb; struct nfs_clone_mount mountdata = { - .sb = dentry->d_sb, + .sb = sb, .dentry = dentry, - .fh = fh, - .fattr = fattr, .authflavor = authflavor, }; + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, + .cloned = &mountdata, + .mntfh = fh, + }; + struct nfs_subversion *nfs_mod = NFS_SB(sb)->nfs_client->cl_nfs_mod; + struct nfs_server *server; struct vfsmount *mnt; char *page = (char *) __get_free_page(GFP_USER); char *devname; @@ -248,12 +246,21 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, if (page == NULL) return ERR_PTR(-ENOMEM); + server = nfs_mod->rpc_ops->clone_server(NFS_SB(sb), fh, + fattr, authflavor); + if (IS_ERR(server)) + return ERR_CAST(server); + + mount_info.server = server; + devname = nfs_devname(dentry, page, PAGE_SIZE); if (IS_ERR(devname)) mnt = ERR_CAST(devname); else - mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); + mnt = vfs_submount(dentry, &nfs_xdev_fs_type, devname, &mount_info); + if (mount_info.server) + nfs_free_server(mount_info.server); free_page((unsigned long)page); return mnt; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 379c7b26051d..97dc544eb220 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2395,7 +2395,7 @@ EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock */ -static void nfs_clone_super(struct super_block *sb, +void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; @@ -2796,27 +2796,10 @@ static struct dentry * nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_clone_mount *data = raw_data; - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = data, - }; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; + struct nfs_mount_info *info = raw_data; + struct nfs_subversion *nfs_mod = NFS_SB(info->cloned->sb)->nfs_client->cl_nfs_mod; - dprintk("--> nfs_xdev_mount()\n"); - - mount_info.mntfh = mount_info.cloned->fh; - - /* create a new volume representation */ - mount_info.server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - - mntroot = nfs_fs_mount_common(flags, dev_name, &mount_info, nfs_mod); - - dprintk("<-- nfs_xdev_mount() = %ld\n", - IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); - return mntroot; + return nfs_fs_mount_common(flags, dev_name, info, nfs_mod); } #if IS_ENABLED(CONFIG_NFS_V4) From 6a3f7a399ebf52164e40773f4fbcb31a4c1ec02a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:56 -0500 Subject: [PATCH 090/658] nfs: stash nfs_subversion reference into nfs_mount_info That will allow to get rid of passing those references around in quite a few places. Moreover, that will allow to merge xdev and remote file_system_type. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 1 + fs/nfs/namespace.c | 6 +++--- fs/nfs/nfs4super.c | 1 + fs/nfs/super.c | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b193dd626c0a..9888e9c7abe2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -142,6 +142,7 @@ struct nfs_mount_info { struct nfs_clone_mount *cloned; struct nfs_server *server; struct nfs_fh *mntfh; + struct nfs_subversion *nfs_mod; }; extern int nfs_mount(struct nfs_mount_request *info); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index a76aeb0c2923..a00936dd153b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -236,8 +236,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, .set_security = nfs_clone_sb_security, .cloned = &mountdata, .mntfh = fh, + .nfs_mod = NFS_SB(sb)->nfs_client->cl_nfs_mod, }; - struct nfs_subversion *nfs_mod = NFS_SB(sb)->nfs_client->cl_nfs_mod; struct nfs_server *server; struct vfsmount *mnt; char *page = (char *) __get_free_page(GFP_USER); @@ -246,8 +246,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, if (page == NULL) return ERR_PTR(-ENOMEM); - server = nfs_mod->rpc_ops->clone_server(NFS_SB(sb), fh, - fattr, authflavor); + server = mount_info.nfs_mod->rpc_ops->clone_server(NFS_SB(sb), fh, + fattr, authflavor); if (IS_ERR(server)) return ERR_CAST(server); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index c489942b9069..6e5417027021 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -248,6 +248,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, .fill_super = nfs_fill_super, .set_security = nfs_clone_sb_security, .cloned = data, + .nfs_mod = &nfs_v4, }; struct dentry *res; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 97dc544eb220..6189f768aa59 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2761,6 +2761,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, mntroot = ERR_CAST(nfs_mod); goto out; } + mount_info.nfs_mod = nfs_mod; mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); From a55d3297be587c6baa0242ae0fbb92489e576005 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:57 -0500 Subject: [PATCH 091/658] nfs: don't bother passing nfs_subversion to ->try_mount() and nfs_fs_mount_common() Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 6 ++---- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4super.c | 5 ++--- fs/nfs/super.c | 19 ++++++++----------- include/linux/nfs_xdr.h | 3 +-- 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9888e9c7abe2..4a0ba66bc3aa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -393,12 +393,10 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); -struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, - struct nfs_subversion *); +struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(int, const char *, - struct nfs_mount_info *, struct nfs_subversion *); +struct dentry *nfs_fs_mount_common(int, const char *, struct nfs_mount_info *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a7a73b1d1fec..5d539dce9cef 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -526,7 +526,7 @@ extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; extern struct nfs_subversion nfs_v4; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short max_session_cb_slots; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 6e5417027021..2b34d8e124cd 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -98,7 +98,7 @@ static struct dentry * nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *info) { - return nfs_fs_mount_common(flags, dev_name, info, &nfs_v4); + return nfs_fs_mount_common(flags, dev_name, info); } struct nfs_referral_count { @@ -216,8 +216,7 @@ static struct dentry *do_nfs4_mount(struct nfs_server *server, int flags, } struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) + struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct dentry *res; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6189f768aa59..cb0ead628842 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1893,15 +1893,15 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf } struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) + struct nfs_mount_info *mount_info) { + struct nfs_subversion *nfs_mod = mount_info->nfs_mod; if (mount_info->parsed->need_mount) mount_info->server = nfs_try_mount_request(mount_info, nfs_mod); else mount_info->server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); - return nfs_fs_mount_common(flags, dev_name, mount_info, nfs_mod); + return nfs_fs_mount_common(flags, dev_name, mount_info); } EXPORT_SYMBOL_GPL(nfs_try_mount); @@ -2648,8 +2648,7 @@ static void nfs_set_readahead(struct backing_dev_info *bdi, } struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) + struct nfs_mount_info *mount_info) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2677,7 +2676,8 @@ struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, sb_mntdata.mntflags |= SB_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); + s = sget(mount_info->nfs_mod->nfs_fs, compare_super, nfs_set_super, + flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; @@ -2763,7 +2763,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, } mount_info.nfs_mod = nfs_mod; - mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); + mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info); put_nfs_version(nfs_mod); out: @@ -2797,10 +2797,7 @@ static struct dentry * nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_mount_info *info = raw_data; - struct nfs_subversion *nfs_mod = NFS_SB(info->cloned->sb)->nfs_client->cl_nfs_mod; - - return nfs_fs_mount_common(flags, dev_name, info, nfs_mod); + return nfs_fs_mount_common(flags, dev_name, raw_data); } #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 72d5695c1b47..3ee2ad642cbc 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1655,8 +1655,7 @@ struct nfs_rpc_ops { struct nfs_fsinfo *); struct vfsmount *(*submount) (struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); - struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *, - struct nfs_subversion *); + struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *, struct inode *); From 82eaed2beef5ce31ecfeb70a33616e6331a51ef2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:58 -0500 Subject: [PATCH 092/658] nfs: merge xdev and remote file_system_type they are identical now... Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 2 +- fs/nfs/namespace.c | 2 +- fs/nfs/nfs4super.c | 22 +--------------------- fs/nfs/super.c | 14 ++++++++------ 4 files changed, 11 insertions(+), 29 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4a0ba66bc3aa..e338558b7908 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -388,7 +388,7 @@ extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; extern struct file_system_type nfs_fs_type; -extern struct file_system_type nfs_xdev_fs_type; +extern struct file_system_type nfs_prepared_fs_type; #if IS_ENABLED(CONFIG_NFS_V4) extern struct file_system_type nfs4_referral_fs_type; #endif diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index a00936dd153b..e5f4f2d760af 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -257,7 +257,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, if (IS_ERR(devname)) mnt = ERR_CAST(devname); else - mnt = vfs_submount(dentry, &nfs_xdev_fs_type, devname, &mount_info); + mnt = vfs_submount(dentry, &nfs_prepared_fs_type, devname, &mount_info); if (mount_info.server) nfs_free_server(mount_info.server); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 2b34d8e124cd..1358d8078737 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -18,19 +18,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); static void nfs4_evict_inode(struct inode *inode); -static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs4_remote_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; - struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -91,16 +81,6 @@ static void nfs4_evict_inode(struct inode *inode) nfs_clear_inode(inode); } -/* - * Get the superblock for the NFS4 root partition - */ -static struct dentry * -nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) -{ - return nfs_fs_mount_common(flags, dev_name, info); -} - struct nfs_referral_count { struct list_head list; const struct task_struct *task; @@ -194,7 +174,7 @@ static struct dentry *do_nfs4_mount(struct nfs_server *server, int flags, else snprintf(root_devname, len, "%s:/", hostname); info->server = server; - root_mnt = vfs_kern_mount(&nfs4_remote_fs_type, flags, root_devname, info); + root_mnt = vfs_kern_mount(&nfs_prepared_fs_type, flags, root_devname, info); if (info->server) nfs_free_server(info->server); info->server = NULL; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index cb0ead628842..60845dc864e7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -293,7 +293,7 @@ static match_table_t nfs_vers_tokens = { { Opt_vers_err, NULL } }; -static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, +static struct dentry *nfs_prepared_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); struct file_system_type nfs_fs_type = { @@ -306,13 +306,14 @@ struct file_system_type nfs_fs_type = { MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); -struct file_system_type nfs_xdev_fs_type = { +struct file_system_type nfs_prepared_fs_type = { .owner = THIS_MODULE, .name = "nfs", - .mount = nfs_xdev_mount, + .mount = nfs_prepared_mount, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +EXPORT_SYMBOL_GPL(nfs_prepared_fs_type); const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, @@ -2791,11 +2792,12 @@ void nfs_kill_super(struct super_block *s) EXPORT_SYMBOL_GPL(nfs_kill_super); /* - * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) + * Internal use only: mount_info is already set up by caller. + * Used for mountpoint crossings and for nfs4 root. */ static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) +nfs_prepared_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { return nfs_fs_mount_common(flags, dev_name, raw_data); } From 1bc3a2cbf239fc6c5984a074cc0d7b6de182407d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:30:59 -0500 Subject: [PATCH 093/658] nfs: unexport nfs_fs_mount_common() Make it static, even. And remove a stale extern of (long-gone) nfs_xdev_mount_common() from internal.h, while we are at it. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 3 --- fs/nfs/super.c | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e338558b7908..310f81a149b2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -396,10 +396,7 @@ bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(int, const char *, struct nfs_mount_info *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); -struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, - const char *, struct nfs_mount_info *); void nfs_kill_super(struct super_block *); void nfs_fill_super(struct super_block *, struct nfs_mount_info *); void nfs_clone_super(struct super_block *, struct nfs_mount_info *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 60845dc864e7..0bedac041272 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1893,6 +1893,8 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); } +static struct dentry *nfs_fs_mount_common(int, const char *, struct nfs_mount_info *); + struct dentry *nfs_try_mount(int flags, const char *dev_name, struct nfs_mount_info *mount_info) { @@ -2648,7 +2650,7 @@ static void nfs_set_readahead(struct backing_dev_info *bdi, bdi->io_pages = iomax_pages; } -struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, +static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, struct nfs_mount_info *mount_info) { struct super_block *s; @@ -2730,7 +2732,6 @@ error_splat_super: deactivate_locked_super(s); goto out; } -EXPORT_SYMBOL_GPL(nfs_fs_mount_common); struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) From 0c38f2131df9865aa9fb24b7ad30a9657588e0e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:31:00 -0500 Subject: [PATCH 094/658] nfs: don't pass nfs_subversion to ->create_server() pick it from mount_info Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 4 ++-- fs/nfs/internal.h | 7 ++----- fs/nfs/nfs3_fs.h | 2 +- fs/nfs/nfs3client.c | 5 ++--- fs/nfs/nfs4client.c | 3 +-- fs/nfs/nfs4super.c | 2 +- fs/nfs/super.c | 14 +++++++------- include/linux/nfs_xdr.h | 2 +- 8 files changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 02110a30a49e..a2049747adc4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -951,10 +951,10 @@ EXPORT_SYMBOL_GPL(nfs_free_server); * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info) { struct nfs_server *server; + struct nfs_subversion *nfs_mod = mount_info->nfs_mod; struct nfs_fattr *fattr; int error; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 310f81a149b2..0bb0493785fc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -170,11 +170,8 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); -extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, - struct nfs_subversion *); -extern struct nfs_server *nfs4_create_server( - struct nfs_mount_info *, - struct nfs_subversion *); +extern struct nfs_server *nfs_create_server(struct nfs_mount_info *); +extern struct nfs_server *nfs4_create_server(struct nfs_mount_info *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index f82e11c4cb56..09602dc1889f 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -27,7 +27,7 @@ static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, #endif /* CONFIG_NFS_V3_ACL */ /* nfs3client.c */ -struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_create_server(struct nfs_mount_info *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 223904bc40a7..54727d3d3042 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -46,10 +46,9 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) } #endif -struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info) { - struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + struct nfs_server *server = nfs_create_server(mount_info); /* Create a client RPC handle for the NFS v3 ACL management interface */ if (!IS_ERR(server)) nfs_init_server_aclclient(server); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 460d6251c405..538fd036b69d 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1112,8 +1112,7 @@ static int nfs4_init_server(struct nfs_server *server, */ /*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, struct nfs_fh *mntfh)*/ -struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info) { struct nfs_server *server; bool auth_probe; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1358d8078737..e5d8a76bd144 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -205,7 +205,7 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - res = do_nfs4_mount(nfs4_create_server(mount_info, &nfs_v4), + res = do_nfs4_mount(nfs4_create_server(mount_info), flags, mount_info, data->nfs_server.hostname, data->nfs_server.export_path); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0bedac041272..6239c78d8f54 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1820,8 +1820,7 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return 0; } -static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info) { int status; unsigned int i; @@ -1831,6 +1830,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf struct nfs_parsed_mount_data *args = mount_info->parsed; rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); + struct nfs_subversion *nfs_mod = mount_info->nfs_mod; status = nfs_request_mount(args, mount_info->mntfh, authlist, &authlist_len); @@ -1847,7 +1847,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf args->selected_flavor); if (status) return ERR_PTR(status); - return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + return nfs_mod->rpc_ops->create_server(mount_info); } /* @@ -1874,7 +1874,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); args->selected_flavor = flavor; - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + server = nfs_mod->rpc_ops->create_server(mount_info); if (!IS_ERR(server)) return server; } @@ -1890,7 +1890,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); args->selected_flavor = RPC_AUTH_UNIX; - return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + return nfs_mod->rpc_ops->create_server(mount_info); } static struct dentry *nfs_fs_mount_common(int, const char *, struct nfs_mount_info *); @@ -1900,9 +1900,9 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, { struct nfs_subversion *nfs_mod = mount_info->nfs_mod; if (mount_info->parsed->need_mount) - mount_info->server = nfs_try_mount_request(mount_info, nfs_mod); + mount_info->server = nfs_try_mount_request(mount_info); else - mount_info->server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + mount_info->server = nfs_mod->rpc_ops->create_server(mount_info); return nfs_fs_mount_common(flags, dev_name, mount_info); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3ee2ad642cbc..17527f6e6360 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1722,7 +1722,7 @@ struct nfs_rpc_ops { struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); void (*free_client) (struct nfs_client *); - struct nfs_server *(*create_server)(struct nfs_mount_info *, struct nfs_subversion *); + struct nfs_server *(*create_server)(struct nfs_mount_info *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); }; From ab88dca311a3722a66f7dee3c352e634ad1e6a25 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:31:01 -0500 Subject: [PATCH 095/658] nfs: get rid of mount_info ->fill_super() The only possible values are nfs_fill_super and nfs_clone_super. The latter is used only when crossing into a submount and it is almost identical to the former; the only differences are * ->s_time_gran unconditionally set to 1 (even for v2 mounts). Regression dating back to 2012, actually. * ->s_blocksize/->s_blocksize_bits set to that of parent. Rather than messing with the method, stash ->s_blocksize_bits in mount_info in submount case and after the (now unconditional) call of nfs_fill_super() override ->s_blocksize/->s_blocksize_bits if that has been set. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 4 +-- fs/nfs/namespace.c | 2 +- fs/nfs/nfs4super.c | 1 - fs/nfs/super.c | 75 ++++++++++------------------------------------ 4 files changed, 18 insertions(+), 64 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0bb0493785fc..65c8e353cb6b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -136,7 +136,7 @@ struct nfs_mount_request { }; struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); + unsigned int inherited_bsize; int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; @@ -395,8 +395,6 @@ int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_ int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); void nfs_kill_super(struct super_block *); -void nfs_fill_super(struct super_block *, struct nfs_mount_info *); -void nfs_clone_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e5f4f2d760af..30331558bd8e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -232,7 +232,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, .authflavor = authflavor, }; struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, + .inherited_bsize = sb->s_blocksize_bits, .set_security = nfs_clone_sb_security, .cloned = &mountdata, .mntfh = fh, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index e5d8a76bd144..5020a43b31c9 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -224,7 +224,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, { struct nfs_clone_mount *data = raw_data; struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, .set_security = nfs_clone_sb_security, .cloned = data, .nfs_mod = &nfs_v4, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6239c78d8f54..2bcf0f8295e0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2339,29 +2339,9 @@ out: EXPORT_SYMBOL_GPL(nfs_remount); /* - * Initialise the common bits of the superblock + * Finish setting up an NFS superblock */ -static void nfs_initialise_sb(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), - "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - - nfs_super_set_maxbytes(sb, server->maxfilesize); -} - -/* - * Finish setting up an NFS2/3 superblock - */ -void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); @@ -2391,44 +2371,17 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_time_max = S64_MAX; } - nfs_initialise_sb(sb); -} -EXPORT_SYMBOL_GPL(nfs_fill_super); + sb->s_magic = NFS_SUPER_MAGIC; -/* - * Finish setting up a cloned NFS2/3/4 superblock - */ -void nfs_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - const struct super_block *old_sb = mount_info->cloned->sb; - struct nfs_server *server = NFS_SB(sb); + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - sb->s_blocksize_bits = old_sb->s_blocksize_bits; - sb->s_blocksize = old_sb->s_blocksize; - sb->s_maxbytes = old_sb->s_maxbytes; - sb->s_xattr = old_sb->s_xattr; - sb->s_op = old_sb->s_op; - sb->s_export_op = old_sb->s_export_op; + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); - if (server->nfs_client->rpc_ops->version != 2) { - /* The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= SB_POSIXACL; - sb->s_time_gran = 1; - } else - sb->s_time_gran = 1000; - - if (server->nfs_client->rpc_ops->version != 4) { - sb->s_time_min = 0; - sb->s_time_max = U32_MAX; - } else { - sb->s_time_min = S64_MIN; - sb->s_time_max = S64_MAX; - } - - nfs_initialise_sb(sb); + nfs_super_set_maxbytes(sb, server->maxfilesize); } static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) @@ -2701,8 +2654,13 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, } if (!s->s_root) { + unsigned bsize = mount_info->inherited_bsize; /* initial superblock/root creation */ - mount_info->fill_super(s, mount_info); + nfs_fill_super(s, mount_info); + if (bsize) { + s->s_blocksize_bits = bsize; + s->s_blocksize = 1U << bsize; + } nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); if (!(server->flags & NFS_MOUNT_UNSHARED)) s->s_iflags |= SB_I_MULTIROOT; @@ -2737,7 +2695,6 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, .set_security = nfs_set_sb_security, }; struct dentry *mntroot = ERR_PTR(-ENOMEM); From ba8b6148067f51528dedf65b533d5094e25e6205 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:31:02 -0500 Subject: [PATCH 096/658] nfs_clone_sb_security(): simplify the check for server bogosity We used to check ->i_op for being nfs_dir_inode_operations. With separate inode_operations for v3 and v4 that became bogus, but rather than going for protocol-dependent comparison we could've just checked ->i_fop instead; _that_ is the same for all protocol versions. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2bcf0f8295e0..bec6c341f72c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2578,7 +2578,7 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, unsigned long kflags = 0, kflags_out = 0; /* clone any lsm security options from the parent to the new sb */ - if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) + if (d_inode(mntroot)->i_fop != &nfs_dir_operations) return -ESTALE; if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) From adf2314fe667565258f304a04990d334defabeaf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Dec 2019 07:31:03 -0500 Subject: [PATCH 097/658] nfs: get rid of ->set_security() it's always either nfs_set_sb_security() or nfs_clone_sb_security(), the choice being controlled by mount_info->cloned != NULL. No need to add methods, especially when both instances live right next to the caller and are never accessed anywhere else. Reviewed-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 3 -- fs/nfs/namespace.c | 1 - fs/nfs/nfs4super.c | 3 -- fs/nfs/super.c | 69 ++++++++++++++-------------------------------- 4 files changed, 21 insertions(+), 55 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 65c8e353cb6b..a467e43fc682 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -137,7 +137,6 @@ struct nfs_mount_request { struct nfs_mount_info { unsigned int inherited_bsize; - int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; struct nfs_server *server; @@ -391,8 +390,6 @@ extern struct file_system_type nfs4_referral_fs_type; #endif bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *); -int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); void nfs_kill_super(struct super_block *); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 30331558bd8e..bfe607374feb 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -233,7 +233,6 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, }; struct nfs_mount_info mount_info = { .inherited_bsize = sb->s_blocksize_bits, - .set_security = nfs_clone_sb_security, .cloned = &mountdata, .mntfh = fh, .nfs_mod = NFS_SB(sb)->nfs_client->cl_nfs_mod, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 5020a43b31c9..f1c2d294073a 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -201,8 +201,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, struct nfs_parsed_mount_data *data = mount_info->parsed; struct dentry *res; - mount_info->set_security = nfs_set_sb_security; - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); res = do_nfs4_mount(nfs4_create_server(mount_info), @@ -224,7 +222,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, { struct nfs_clone_mount *data = raw_data; struct nfs_mount_info mount_info = { - .set_security = nfs_clone_sb_security, .cloned = data, .nfs_mod = &nfs_v4, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bec6c341f72c..de00f89dbe6e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2550,52 +2550,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, } #endif -int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) -{ - int error; - unsigned long kflags = 0, kflags_out = 0; - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) - kflags |= SECURITY_LSM_NATIVE_LABELS; - - error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts, - kflags, &kflags_out); - if (error) - goto err; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && - !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; -err: - return error; -} -EXPORT_SYMBOL_GPL(nfs_set_sb_security); - -int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) -{ - int error; - unsigned long kflags = 0, kflags_out = 0; - - /* clone any lsm security options from the parent to the new sb */ - if (d_inode(mntroot)->i_fop != &nfs_dir_operations) - return -ESTALE; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) - kflags |= SECURITY_LSM_NATIVE_LABELS; - - error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags, - &kflags_out); - if (error) - return error; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && - !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; - return 0; -} -EXPORT_SYMBOL_GPL(nfs_clone_sb_security); - static void nfs_set_readahead(struct backing_dev_info *bdi, unsigned long iomax_pages) { @@ -2610,6 +2564,7 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, struct dentry *mntroot = ERR_PTR(-ENOMEM); int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_server *server = mount_info->server; + unsigned long kflags = 0, kflags_out = 0; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, .server = server, @@ -2670,7 +2625,26 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, if (IS_ERR(mntroot)) goto error_splat_super; - error = mount_info->set_security(s, mntroot, mount_info); + + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) + kflags |= SECURITY_LSM_NATIVE_LABELS; + if (mount_info->cloned) { + if (d_inode(mntroot)->i_fop != &nfs_dir_operations) { + error = -ESTALE; + goto error_splat_root; + } + /* clone any lsm security options from the parent to the new sb */ + error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags, + &kflags_out); + } else { + error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts, + kflags, &kflags_out); + } + if (error) + goto error_splat_root; + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && + !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) + NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; if (error) goto error_splat_root; @@ -2695,7 +2669,6 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .set_security = nfs_set_sb_security, }; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod; From 9954bf92c0cddd50a2a470be302e1c1ffdf21d42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:04 -0500 Subject: [PATCH 098/658] NFS: Move mount parameterisation bits into their own file Split various bits relating to mount parameterisation out from fs/nfs/super.c into their own file to form the basis of filesystem context handling for NFS. No other changes are made to the code beyond removing 'static' qualifiers. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/Makefile | 2 +- fs/nfs/fs_context.c | 1414 +++++++++++++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 30 + fs/nfs/super.c | 1411 ------------------------------------------ 4 files changed, 1445 insertions(+), 1412 deletions(-) create mode 100644 fs/nfs/fs_context.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 34cdeaecccf6..2433c3e03cfa 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,7 +9,7 @@ CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ io.o direct.o pagelist.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o nfstrace.o \ - export.o sysfs.o + export.o sysfs.o fs_context.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c new file mode 100644 index 000000000000..c8f99a3c7264 --- /dev/null +++ b/fs/nfs/fs_context.c @@ -0,0 +1,1414 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/fs/nfs/fs_context.c + * + * Copyright (C) 1992 Rick Sladkey + * + * NFS mount handling. + * + * Split from fs/nfs/super.c by David Howells + */ + +#include +#include +#include +#include +#include +#include +#include "nfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_MOUNT + +#if IS_ENABLED(CONFIG_NFS_V3) +#define NFS_DEFAULT_VERSION 3 +#else +#define NFS_DEFAULT_VERSION 2 +#endif + +#define NFS_MAX_CONNECTIONS 16 + +enum { + /* Mount options that take no arguments */ + Opt_soft, Opt_softerr, Opt_hard, + Opt_posix, Opt_noposix, + Opt_cto, Opt_nocto, + Opt_ac, Opt_noac, + Opt_lock, Opt_nolock, + Opt_udp, Opt_tcp, Opt_rdma, + Opt_acl, Opt_noacl, + Opt_rdirplus, Opt_nordirplus, + Opt_sharecache, Opt_nosharecache, + Opt_resvport, Opt_noresvport, + Opt_fscache, Opt_nofscache, + Opt_migration, Opt_nomigration, + + /* Mount options that take integer arguments */ + Opt_port, + Opt_rsize, Opt_wsize, Opt_bsize, + Opt_timeo, Opt_retrans, + Opt_acregmin, Opt_acregmax, + Opt_acdirmin, Opt_acdirmax, + Opt_actimeo, + Opt_namelen, + Opt_mountport, + Opt_mountvers, + Opt_minorversion, + + /* Mount options that take string arguments */ + Opt_nfsvers, + Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, + Opt_addr, Opt_mountaddr, Opt_clientaddr, + Opt_nconnect, + Opt_lookupcache, + Opt_fscache_uniq, + Opt_local_lock, + + /* Special mount options */ + Opt_userspace, Opt_deprecated, Opt_sloppy, + + Opt_err +}; + +static const match_table_t nfs_mount_option_tokens = { + { Opt_userspace, "bg" }, + { Opt_userspace, "fg" }, + { Opt_userspace, "retry=%s" }, + + { Opt_sloppy, "sloppy" }, + + { Opt_soft, "soft" }, + { Opt_softerr, "softerr" }, + { Opt_hard, "hard" }, + { Opt_deprecated, "intr" }, + { Opt_deprecated, "nointr" }, + { Opt_posix, "posix" }, + { Opt_noposix, "noposix" }, + { Opt_cto, "cto" }, + { Opt_nocto, "nocto" }, + { Opt_ac, "ac" }, + { Opt_noac, "noac" }, + { Opt_lock, "lock" }, + { Opt_nolock, "nolock" }, + { Opt_udp, "udp" }, + { Opt_tcp, "tcp" }, + { Opt_rdma, "rdma" }, + { Opt_acl, "acl" }, + { Opt_noacl, "noacl" }, + { Opt_rdirplus, "rdirplus" }, + { Opt_nordirplus, "nordirplus" }, + { Opt_sharecache, "sharecache" }, + { Opt_nosharecache, "nosharecache" }, + { Opt_resvport, "resvport" }, + { Opt_noresvport, "noresvport" }, + { Opt_fscache, "fsc" }, + { Opt_nofscache, "nofsc" }, + { Opt_migration, "migration" }, + { Opt_nomigration, "nomigration" }, + + { Opt_port, "port=%s" }, + { Opt_rsize, "rsize=%s" }, + { Opt_wsize, "wsize=%s" }, + { Opt_bsize, "bsize=%s" }, + { Opt_timeo, "timeo=%s" }, + { Opt_retrans, "retrans=%s" }, + { Opt_acregmin, "acregmin=%s" }, + { Opt_acregmax, "acregmax=%s" }, + { Opt_acdirmin, "acdirmin=%s" }, + { Opt_acdirmax, "acdirmax=%s" }, + { Opt_actimeo, "actimeo=%s" }, + { Opt_namelen, "namlen=%s" }, + { Opt_mountport, "mountport=%s" }, + { Opt_mountvers, "mountvers=%s" }, + { Opt_minorversion, "minorversion=%s" }, + + { Opt_nfsvers, "nfsvers=%s" }, + { Opt_nfsvers, "vers=%s" }, + + { Opt_sec, "sec=%s" }, + { Opt_proto, "proto=%s" }, + { Opt_mountproto, "mountproto=%s" }, + { Opt_addr, "addr=%s" }, + { Opt_clientaddr, "clientaddr=%s" }, + { Opt_mounthost, "mounthost=%s" }, + { Opt_mountaddr, "mountaddr=%s" }, + + { Opt_nconnect, "nconnect=%s" }, + + { Opt_lookupcache, "lookupcache=%s" }, + { Opt_fscache_uniq, "fsc=%s" }, + { Opt_local_lock, "local_lock=%s" }, + + /* The following needs to be listed after all other options */ + { Opt_nfsvers, "v%s" }, + + { Opt_err, NULL } +}; + +enum { + Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, + Opt_xprt_rdma6, + + Opt_xprt_err +}; + +static const match_table_t nfs_xprt_protocol_tokens = { + { Opt_xprt_udp, "udp" }, + { Opt_xprt_udp6, "udp6" }, + { Opt_xprt_tcp, "tcp" }, + { Opt_xprt_tcp6, "tcp6" }, + { Opt_xprt_rdma, "rdma" }, + { Opt_xprt_rdma6, "rdma6" }, + + { Opt_xprt_err, NULL } +}; + +enum { + Opt_sec_none, Opt_sec_sys, + Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, + Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, + Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, + + Opt_sec_err +}; + +static const match_table_t nfs_secflavor_tokens = { + { Opt_sec_none, "none" }, + { Opt_sec_none, "null" }, + { Opt_sec_sys, "sys" }, + + { Opt_sec_krb5, "krb5" }, + { Opt_sec_krb5i, "krb5i" }, + { Opt_sec_krb5p, "krb5p" }, + + { Opt_sec_lkey, "lkey" }, + { Opt_sec_lkeyi, "lkeyi" }, + { Opt_sec_lkeyp, "lkeyp" }, + + { Opt_sec_spkm, "spkm3" }, + { Opt_sec_spkmi, "spkm3i" }, + { Opt_sec_spkmp, "spkm3p" }, + + { Opt_sec_err, NULL } +}; + +enum { + Opt_lookupcache_all, Opt_lookupcache_positive, + Opt_lookupcache_none, + + Opt_lookupcache_err +}; + +static match_table_t nfs_lookupcache_tokens = { + { Opt_lookupcache_all, "all" }, + { Opt_lookupcache_positive, "pos" }, + { Opt_lookupcache_positive, "positive" }, + { Opt_lookupcache_none, "none" }, + + { Opt_lookupcache_err, NULL } +}; + +enum { + Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, + Opt_local_lock_none, + + Opt_local_lock_err +}; + +static match_table_t nfs_local_lock_tokens = { + { Opt_local_lock_all, "all" }, + { Opt_local_lock_flock, "flock" }, + { Opt_local_lock_posix, "posix" }, + { Opt_local_lock_none, "none" }, + + { Opt_local_lock_err, NULL } +}; + +enum { + Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, + Opt_vers_4_1, Opt_vers_4_2, + + Opt_vers_err +}; + +static match_table_t nfs_vers_tokens = { + { Opt_vers_2, "2" }, + { Opt_vers_3, "3" }, + { Opt_vers_4, "4" }, + { Opt_vers_4_0, "4.0" }, + { Opt_vers_4_1, "4.1" }, + { Opt_vers_4_2, "4.2" }, + + { Opt_vers_err, NULL } +}; + +struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) +{ + struct nfs_parsed_mount_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data) { + data->timeo = NFS_UNSPEC_TIMEO; + data->retrans = NFS_UNSPEC_RETRANS; + data->acregmin = NFS_DEF_ACREGMIN; + data->acregmax = NFS_DEF_ACREGMAX; + data->acdirmin = NFS_DEF_ACDIRMIN; + data->acdirmax = NFS_DEF_ACDIRMAX; + data->mount_server.port = NFS_UNSPEC_PORT; + data->nfs_server.port = NFS_UNSPEC_PORT; + data->nfs_server.protocol = XPRT_TRANSPORT_TCP; + data->selected_flavor = RPC_AUTH_MAXFLAVOR; + data->minorversion = 0; + data->need_mount = true; + data->net = current->nsproxy->net_ns; + data->lsm_opts = NULL; + } + return data; +} + +void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) +{ + if (data) { + kfree(data->client_address); + kfree(data->mount_server.hostname); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); + security_free_mnt_opts(&data->lsm_opts); + kfree(data); + } +} + +/* + * Sanity-check a server address provided by the mount command. + * + * Address family must be initialized, and address must not be + * the ANY address for that family. + */ +static int nfs_verify_server_address(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *)addr; + return sa->sin_addr.s_addr != htonl(INADDR_ANY); + } + case AF_INET6: { + struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; + return !ipv6_addr_any(sa); + } + } + + dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); + return 0; +} + +/* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + break; + default: + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + nfs_validate_transport_protocol(mnt); + + if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || + mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) + return; + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * Add 'flavor' to 'auth_info' if not already present. + * Returns true if 'flavor' ends up in the list, false otherwise + */ +static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, + rpc_authflavor_t flavor) +{ + unsigned int i; + unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); + + /* make sure this flavor isn't already in the list */ + for (i = 0; i < auth_info->flavor_len; i++) { + if (flavor == auth_info->flavors[i]) + return true; + } + + if (auth_info->flavor_len + 1 >= max_flavor_len) { + dfprintk(MOUNT, "NFS: too many sec= flavors\n"); + return false; + } + + auth_info->flavors[auth_info->flavor_len++] = flavor; + return true; +} + +/* + * Parse the value of the 'sec=' option. + */ +static int nfs_parse_security_flavors(char *value, + struct nfs_parsed_mount_data *mnt) +{ + substring_t args[MAX_OPT_ARGS]; + rpc_authflavor_t pseudoflavor; + char *p; + + dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); + + while ((p = strsep(&value, ":")) != NULL) { + switch (match_token(p, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + pseudoflavor = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + pseudoflavor = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + pseudoflavor = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + pseudoflavor = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + pseudoflavor = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + pseudoflavor = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + pseudoflavor = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + pseudoflavor = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + pseudoflavor = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + pseudoflavor = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + pseudoflavor = RPC_AUTH_GSS_SPKMP; + break; + default: + dfprintk(MOUNT, + "NFS: sec= option '%s' not recognized\n", p); + return 0; + } + + if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) + return 0; + } + + return 1; +} + +static int nfs_parse_version_string(char *string, + struct nfs_parsed_mount_data *mnt, + substring_t *args) +{ + mnt->flags &= ~NFS_MOUNT_VER3; + switch (match_token(string, nfs_vers_tokens, args)) { + case Opt_vers_2: + mnt->version = 2; + break; + case Opt_vers_3: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; + case Opt_vers_4: + /* Backward compatibility option. In future, + * the mount program should always supply + * a NFSv4 minor version number. + */ + mnt->version = 4; + break; + case Opt_vers_4_0: + mnt->version = 4; + mnt->minorversion = 0; + break; + case Opt_vers_4_1: + mnt->version = 4; + mnt->minorversion = 1; + break; + case Opt_vers_4_2: + mnt->version = 4; + mnt->minorversion = 2; + break; + default: + return 0; + } + return 1; +} + +static int nfs_get_option_str(substring_t args[], char **option) +{ + kfree(*option); + *option = match_strdup(args); + return !*option; +} + +static int nfs_get_option_ul(substring_t args[], unsigned long *option) +{ + int rc; + char *string; + + string = match_strdup(args); + if (string == NULL) + return -ENOMEM; + rc = kstrtoul(string, 10, option); + kfree(string); + + return rc; +} + +static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, + unsigned long l_bound, unsigned long u_bound) +{ + int ret; + + ret = nfs_get_option_ul(args, option); + if (ret != 0) + return ret; + if (*option < l_bound || *option > u_bound) + return -ERANGE; + return 0; +} + +/* + * Error-check and convert a string of mount options from user space into + * a data structure. The whole mount string is processed; bad options are + * skipped as they are encountered. If there were no errors, return 1; + * otherwise return 0 (zero). + */ +int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) +{ + char *p, *string; + int rc, sloppy = 0, invalid_option = 0; + unsigned short protofamily = AF_UNSPEC; + unsigned short mountfamily = AF_UNSPEC; + + if (!raw) { + dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); + return 1; + } + dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); + + rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts); + if (rc) + goto out_security_failure; + + while ((p = strsep(&raw, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + unsigned long option; + int token; + + if (!*p) + continue; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + + token = match_token(p, nfs_mount_option_tokens, args); + switch (token) { + + /* + * boolean options: foo/nofoo + */ + case Opt_soft: + mnt->flags |= NFS_MOUNT_SOFT; + mnt->flags &= ~NFS_MOUNT_SOFTERR; + break; + case Opt_softerr: + mnt->flags |= NFS_MOUNT_SOFTERR; + mnt->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_hard: + mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); + break; + case Opt_posix: + mnt->flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + mnt->flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + mnt->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + mnt->flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + mnt->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + mnt->flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + mnt->flags &= ~NFS_MOUNT_NONLM; + mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_nolock: + mnt->flags |= NFS_MOUNT_NONLM; + mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_rdma: + mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ + mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(p); + break; + case Opt_acl: + mnt->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + mnt->flags |= NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_nordirplus: + mnt->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + mnt->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_nosharecache: + mnt->flags |= NFS_MOUNT_UNSHARED; + break; + case Opt_resvport: + mnt->flags &= ~NFS_MOUNT_NORESVPORT; + break; + case Opt_noresvport: + mnt->flags |= NFS_MOUNT_NORESVPORT; + break; + case Opt_fscache: + mnt->options |= NFS_OPTION_FSCACHE; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = NULL; + break; + case Opt_nofscache: + mnt->options &= ~NFS_OPTION_FSCACHE; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = NULL; + break; + case Opt_migration: + mnt->options |= NFS_OPTION_MIGRATION; + break; + case Opt_nomigration: + mnt->options &= ~NFS_OPTION_MIGRATION; + break; + + /* + * options that take numeric values + */ + case Opt_port: + if (nfs_get_option_ul(args, &option) || + option > USHRT_MAX) + goto out_invalid_value; + mnt->nfs_server.port = option; + break; + case Opt_rsize: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->rsize = option; + break; + case Opt_wsize: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->wsize = option; + break; + case Opt_bsize: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->bsize = option; + break; + case Opt_timeo: + if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) + goto out_invalid_value; + mnt->timeo = option; + break; + case Opt_retrans: + if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) + goto out_invalid_value; + mnt->retrans = option; + break; + case Opt_acregmin: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->acregmin = option; + break; + case Opt_acregmax: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->acregmax = option; + break; + case Opt_acdirmin: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->acdirmin = option; + break; + case Opt_acdirmax: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->acdirmax = option; + break; + case Opt_actimeo: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->acregmin = mnt->acregmax = + mnt->acdirmin = mnt->acdirmax = option; + break; + case Opt_namelen: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + mnt->namlen = option; + break; + case Opt_mountport: + if (nfs_get_option_ul(args, &option) || + option > USHRT_MAX) + goto out_invalid_value; + mnt->mount_server.port = option; + break; + case Opt_mountvers: + if (nfs_get_option_ul(args, &option) || + option < NFS_MNT_VERSION || + option > NFS_MNT3_VERSION) + goto out_invalid_value; + mnt->mount_server.version = option; + break; + case Opt_minorversion: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + if (option > NFS4_MAX_MINOR_VERSION) + goto out_invalid_value; + mnt->minorversion = option; + break; + + /* + * options that take text values + */ + case Opt_nfsvers: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_version_string(string, mnt, args); + kfree(string); + if (!rc) + goto out_invalid_value; + break; + case Opt_sec: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_security_flavors(string, mnt); + kfree(string); + if (!rc) { + dfprintk(MOUNT, "NFS: unrecognized " + "security flavor\n"); + return 0; + } + break; + case Opt_proto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + + protofamily = AF_INET; + switch (token) { + case Opt_xprt_udp6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_rdma: + /* vector side protocols to TCP */ + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(string); + break; + default: + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); + kfree(string); + return 0; + } + kfree(string); + break; + case Opt_mountproto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + mountfamily = AF_INET; + switch (token) { + case Opt_xprt_udp6: + mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma: /* not used for side protocols */ + default: + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); + return 0; + } + break; + case Opt_addr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->nfs_server.addrlen = + rpc_pton(mnt->net, string, strlen(string), + (struct sockaddr *) + &mnt->nfs_server.address, + sizeof(mnt->nfs_server.address)); + kfree(string); + if (mnt->nfs_server.addrlen == 0) + goto out_invalid_address; + break; + case Opt_clientaddr: + if (nfs_get_option_str(args, &mnt->client_address)) + goto out_nomem; + break; + case Opt_mounthost: + if (nfs_get_option_str(args, + &mnt->mount_server.hostname)) + goto out_nomem; + break; + case Opt_mountaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->mount_server.addrlen = + rpc_pton(mnt->net, string, strlen(string), + (struct sockaddr *) + &mnt->mount_server.address, + sizeof(mnt->mount_server.address)); + kfree(string); + if (mnt->mount_server.addrlen == 0) + goto out_invalid_address; + break; + case Opt_nconnect: + if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) + goto out_invalid_value; + mnt->nfs_server.nconnect = option; + break; + case Opt_lookupcache: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_lookupcache_tokens, args); + kfree(string); + switch (token) { + case Opt_lookupcache_all: + mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); + break; + case Opt_lookupcache_positive: + mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; + break; + case Opt_lookupcache_none: + mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; + break; + default: + dfprintk(MOUNT, "NFS: invalid " + "lookupcache argument\n"); + return 0; + } + break; + case Opt_fscache_uniq: + if (nfs_get_option_str(args, &mnt->fscache_uniq)) + goto out_nomem; + mnt->options |= NFS_OPTION_FSCACHE; + break; + case Opt_local_lock: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_local_lock_tokens, + args); + kfree(string); + switch (token) { + case Opt_local_lock_all: + mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_local_lock_flock: + mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; + break; + case Opt_local_lock_posix: + mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; + break; + case Opt_local_lock_none: + mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + default: + dfprintk(MOUNT, "NFS: invalid " + "local_lock argument\n"); + return 0; + } + break; + + /* + * Special options + */ + case Opt_sloppy: + sloppy = 1; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; + case Opt_userspace: + case Opt_deprecated: + dfprintk(MOUNT, "NFS: ignoring mount option " + "'%s'\n", p); + break; + + default: + invalid_option = 1; + dfprintk(MOUNT, "NFS: unrecognized mount option " + "'%s'\n", p); + } + } + + if (!sloppy && invalid_option) + return 0; + + if (mnt->minorversion && mnt->version != 4) + goto out_minorversion_mismatch; + + if (mnt->options & NFS_OPTION_MIGRATION && + (mnt->version != 4 || mnt->minorversion != 0)) + goto out_migration_misuse; + + /* + * verify that any proto=/mountproto= options match the address + * families in the addr=/mountaddr= options. + */ + if (protofamily != AF_UNSPEC && + protofamily != mnt->nfs_server.address.ss_family) + goto out_proto_mismatch; + + if (mountfamily != AF_UNSPEC) { + if (mnt->mount_server.addrlen) { + if (mountfamily != mnt->mount_server.address.ss_family) + goto out_mountproto_mismatch; + } else { + if (mountfamily != mnt->nfs_server.address.ss_family) + goto out_mountproto_mismatch; + } + } + + return 1; + +out_mountproto_mismatch: + printk(KERN_INFO "NFS: mount server address does not match mountproto= " + "option\n"); + return 0; +out_proto_mismatch: + printk(KERN_INFO "NFS: server address does not match proto= option\n"); + return 0; +out_invalid_address: + printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); + return 0; +out_invalid_value: + printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); + return 0; +out_minorversion_mismatch: + printk(KERN_INFO "NFS: mount option vers=%u does not support " + "minorversion=%u\n", mnt->version, mnt->minorversion); + return 0; +out_migration_misuse: + printk(KERN_INFO + "NFS: 'migration' not supported for this NFS version\n"); + return 0; +out_nomem: + printk(KERN_INFO "NFS: not enough memory to parse option\n"); + return 0; +out_security_failure: + printk(KERN_INFO "NFS: security options invalid: %d\n", rc); + return 0; +} + +/* + * Split "dev_name" into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *end; + + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + + /* Is the host name protected with square brakcets? */ + if (*dev_name == '[') { + end = strchr(++dev_name, ']'); + if (end == NULL || end[1] != ':') + goto out_bad_devname; + + len = end - dev_name; + end++; + } else { + char *comma; + + end = strchr(dev_name, ':'); + if (end == NULL) + goto out_bad_devname; + len = end - dev_name; + + /* kill possible hostname list: not supported */ + comma = strchr(dev_name, ','); + if (comma != NULL && comma < end) + len = comma - dev_name; + } + + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (*hostname == NULL) + goto out_nomem; + len = strlen(++end); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(end, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + */ +static int nfs23_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) +{ + struct nfs_mount_data *data = (struct nfs_mount_data *)options; + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; + + if (data == NULL) + goto out_no_data; + + args->version = NFS_DEFAULT_VERSION; + switch (data->version) { + case 1: + data->namlen = 0; /* fall through */ + case 2: + data->bsize = 0; /* fall through */ + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + /* Turn off security negotiation */ + extra_flags |= NFS_MOUNT_SECFLAVOUR; + /* fall through */ + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + /* fall through */ + case 5: + memset(data->context, 0, sizeof(data->context)); + /* fall through */ + case 6: + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; + mntfh->size = data->root.size; + args->version = 3; + } else { + mntfh->size = NFS2_FHSIZE; + args->version = 2; + } + + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + + /* + * Translate to nfs_parsed_mount_data, which nfs_fill_super + * can deal with. + */ + args->flags = data->flags & NFS_MOUNT_FLAGMASK; + args->flags |= extra_flags; + args->rsize = data->rsize; + args->wsize = data->wsize; + args->timeo = data->timeo; + args->retrans = data->retrans; + args->acregmin = data->acregmin; + args->acregmax = data->acregmax; + args->acdirmin = data->acdirmin; + args->acdirmax = data->acdirmax; + args->need_mount = false; + + memcpy(sap, &data->addr, sizeof(data->addr)); + args->nfs_server.addrlen = sizeof(data->addr); + args->nfs_server.port = ntohs(data->addr.sin_port); + if (sap->sa_family != AF_INET || + !nfs_verify_server_address(sap)) + goto out_no_address; + + if (!(data->flags & NFS_MOUNT_TCP)) + args->nfs_server.protocol = XPRT_TRANSPORT_UDP; + /* N.B. caller will free nfs_server.hostname in all cases */ + args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + args->namlen = data->namlen; + args->bsize = data->bsize; + + if (data->flags & NFS_MOUNT_SECFLAVOUR) + args->selected_flavor = data->pseudoflavor; + else + args->selected_flavor = RPC_AUTH_UNIX; + if (!args->nfs_server.hostname) + goto out_nomem; + + if (!(data->flags & NFS_MOUNT_NONLM)) + args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + else + args->flags |= (NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + /* + * The legacy version 6 binary mount data from userspace has a + * field used only to transport selinux information into the + * the kernel. To continue to support that functionality we + * have a touch of selinux knowledge here in the NFS code. The + * userspace code converted context=blah to just blah so we are + * converting back to the full string selinux understands. + */ + if (data->context[0]){ +#ifdef CONFIG_SECURITY_SELINUX + int rc; + data->context[NFS_MAX_CONTEXT_LEN] = '\0'; + rc = security_add_mnt_opt("context", data->context, + strlen(data->context), &args->lsm_opts); + if (rc) + return rc; +#else + return -EINVAL; +#endif + } + + break; + default: + return NFS_TEXT_DATA; + } + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_no_v3: + dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", + data->version); + return -EINVAL; + +out_no_sec: + dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); + return -ENOMEM; + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; + +out_invalid_fh: + dfprintk(MOUNT, "NFS: invalid root filehandle\n"); + return -EINVAL; +} + +#if IS_ENABLED(CONFIG_NFS_V4) + +static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) +{ + args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| + NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); +} + +/* + * Validate NFSv4 mount options + */ +static int nfs4_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name) +{ + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; + char *c; + + if (data == NULL) + goto out_no_data; + + args->version = 4; + + switch (data->version) { + case 1: + if (data->host_addrlen > sizeof(args->nfs_server.address)) + goto out_no_address; + if (data->host_addrlen == 0) + goto out_no_address; + args->nfs_server.addrlen = data->host_addrlen; + if (copy_from_user(sap, data->host_addr, data->host_addrlen)) + return -EFAULT; + if (!nfs_verify_server_address(sap)) + goto out_no_address; + args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); + + if (data->auth_flavourlen) { + rpc_authflavor_t pseudoflavor; + if (data->auth_flavourlen > 1) + goto out_inval_auth; + if (copy_from_user(&pseudoflavor, + data->auth_flavours, + sizeof(pseudoflavor))) + return -EFAULT; + args->selected_flavor = pseudoflavor; + } else + args->selected_flavor = RPC_AUTH_UNIX; + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + args->nfs_server.hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + args->nfs_server.export_path = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + args->client_address = c; + + /* + * Translate to nfs_parsed_mount_data, which nfs4_fill_super + * can deal with. + */ + + args->flags = data->flags & NFS4_MOUNT_FLAGMASK; + args->rsize = data->rsize; + args->wsize = data->wsize; + args->timeo = data->timeo; + args->retrans = data->retrans; + args->acregmin = data->acregmin; + args->acregmax = data->acregmax; + args->acdirmin = data->acdirmin; + args->acdirmax = data->acdirmax; + args->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + + break; + default: + return NFS_TEXT_DATA; + } + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_inval_auth: + dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", + data->auth_flavourlen); + return -EINVAL; + +out_no_address: + dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); + return -EINVAL; + +out_invalid_transport_udp: + dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); + return -EINVAL; +} + +int nfs_validate_mount_data(struct file_system_type *fs_type, + void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) +{ + if (fs_type == &nfs_fs_type) + return nfs23_validate_mount_data(options, args, mntfh, dev_name); + return nfs4_validate_mount_data(options, args, dev_name); +} +#else +int nfs_validate_mount_data(struct file_system_type *fs_type, + void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) +{ + return nfs23_validate_mount_data(options, args, mntfh, dev_name); +} +#endif + +int nfs_validate_text_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name) +{ + int port = 0; + int max_namelen = PAGE_SIZE; + int max_pathlen = NFS_MAXPATHLEN; + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + + if (nfs_parse_mount_options((char *)options, args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (args->version == 4) { +#if IS_ENABLED(CONFIG_NFS_V4) + if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + else + port = NFS_PORT; + max_namelen = NFS4_MAXNAMLEN; + max_pathlen = NFS4_MAXPATHLEN; + nfs_validate_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + nfs4_validate_mount_flags(args); +#else + goto out_v4_not_compiled; +#endif /* CONFIG_NFS_V4 */ + } else { + nfs_set_mount_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + } + + nfs_set_port(sap, &args->nfs_server.port, port); + + return nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + max_namelen, + &args->nfs_server.export_path, + max_pathlen); + +#if !IS_ENABLED(CONFIG_NFS_V4) +out_v4_not_compiled: + dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#else +out_invalid_transport_udp: + dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); + return -EINVAL; +#endif /* !CONFIG_NFS_V4 */ + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; +} diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a467e43fc682..28ab31fc5aa6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -224,6 +225,22 @@ extern const struct svc_version nfs4_callback_version1; extern const struct svc_version nfs4_callback_version4; struct nfs_pageio_descriptor; + +/* mount.c */ +#define NFS_TEXT_DATA 1 + +extern struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void); +extern void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data); +extern int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt); +extern int nfs_validate_mount_data(struct file_system_type *fs_type, + void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name); +extern int nfs_validate_text_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name); + /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); @@ -765,3 +782,16 @@ static inline bool nfs_error_is_fatal_on_server(int err) } return nfs_error_is_fatal(err); } + +/* + * Select between a default port value and a user-specified port value. + * If a zero value is set, then autobind will be used. + */ +static inline void nfs_set_port(struct sockaddr *sap, int *port, + const unsigned short default_port) +{ + if (*port == NFS_UNSPEC_PORT) + *port = default_port; + + rpc_set_port(sap, *port); +} diff --git a/fs/nfs/super.c b/fs/nfs/super.c index de00f89dbe6e..b07585f62c65 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -69,229 +69,6 @@ #include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_TEXT_DATA 1 - -#if IS_ENABLED(CONFIG_NFS_V3) -#define NFS_DEFAULT_VERSION 3 -#else -#define NFS_DEFAULT_VERSION 2 -#endif - -#define NFS_MAX_CONNECTIONS 16 - -enum { - /* Mount options that take no arguments */ - Opt_soft, Opt_softerr, Opt_hard, - Opt_posix, Opt_noposix, - Opt_cto, Opt_nocto, - Opt_ac, Opt_noac, - Opt_lock, Opt_nolock, - Opt_udp, Opt_tcp, Opt_rdma, - Opt_acl, Opt_noacl, - Opt_rdirplus, Opt_nordirplus, - Opt_sharecache, Opt_nosharecache, - Opt_resvport, Opt_noresvport, - Opt_fscache, Opt_nofscache, - Opt_migration, Opt_nomigration, - - /* Mount options that take integer arguments */ - Opt_port, - Opt_rsize, Opt_wsize, Opt_bsize, - Opt_timeo, Opt_retrans, - Opt_acregmin, Opt_acregmax, - Opt_acdirmin, Opt_acdirmax, - Opt_actimeo, - Opt_namelen, - Opt_mountport, - Opt_mountvers, - Opt_minorversion, - - /* Mount options that take string arguments */ - Opt_nfsvers, - Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, - Opt_addr, Opt_mountaddr, Opt_clientaddr, - Opt_nconnect, - Opt_lookupcache, - Opt_fscache_uniq, - Opt_local_lock, - - /* Special mount options */ - Opt_userspace, Opt_deprecated, Opt_sloppy, - - Opt_err -}; - -static const match_table_t nfs_mount_option_tokens = { - { Opt_userspace, "bg" }, - { Opt_userspace, "fg" }, - { Opt_userspace, "retry=%s" }, - - { Opt_sloppy, "sloppy" }, - - { Opt_soft, "soft" }, - { Opt_softerr, "softerr" }, - { Opt_hard, "hard" }, - { Opt_deprecated, "intr" }, - { Opt_deprecated, "nointr" }, - { Opt_posix, "posix" }, - { Opt_noposix, "noposix" }, - { Opt_cto, "cto" }, - { Opt_nocto, "nocto" }, - { Opt_ac, "ac" }, - { Opt_noac, "noac" }, - { Opt_lock, "lock" }, - { Opt_nolock, "nolock" }, - { Opt_udp, "udp" }, - { Opt_tcp, "tcp" }, - { Opt_rdma, "rdma" }, - { Opt_acl, "acl" }, - { Opt_noacl, "noacl" }, - { Opt_rdirplus, "rdirplus" }, - { Opt_nordirplus, "nordirplus" }, - { Opt_sharecache, "sharecache" }, - { Opt_nosharecache, "nosharecache" }, - { Opt_resvport, "resvport" }, - { Opt_noresvport, "noresvport" }, - { Opt_fscache, "fsc" }, - { Opt_nofscache, "nofsc" }, - { Opt_migration, "migration" }, - { Opt_nomigration, "nomigration" }, - - { Opt_port, "port=%s" }, - { Opt_rsize, "rsize=%s" }, - { Opt_wsize, "wsize=%s" }, - { Opt_bsize, "bsize=%s" }, - { Opt_timeo, "timeo=%s" }, - { Opt_retrans, "retrans=%s" }, - { Opt_acregmin, "acregmin=%s" }, - { Opt_acregmax, "acregmax=%s" }, - { Opt_acdirmin, "acdirmin=%s" }, - { Opt_acdirmax, "acdirmax=%s" }, - { Opt_actimeo, "actimeo=%s" }, - { Opt_namelen, "namlen=%s" }, - { Opt_mountport, "mountport=%s" }, - { Opt_mountvers, "mountvers=%s" }, - { Opt_minorversion, "minorversion=%s" }, - - { Opt_nfsvers, "nfsvers=%s" }, - { Opt_nfsvers, "vers=%s" }, - - { Opt_sec, "sec=%s" }, - { Opt_proto, "proto=%s" }, - { Opt_mountproto, "mountproto=%s" }, - { Opt_addr, "addr=%s" }, - { Opt_clientaddr, "clientaddr=%s" }, - { Opt_mounthost, "mounthost=%s" }, - { Opt_mountaddr, "mountaddr=%s" }, - - { Opt_nconnect, "nconnect=%s" }, - - { Opt_lookupcache, "lookupcache=%s" }, - { Opt_fscache_uniq, "fsc=%s" }, - { Opt_local_lock, "local_lock=%s" }, - - /* The following needs to be listed after all other options */ - { Opt_nfsvers, "v%s" }, - - { Opt_err, NULL } -}; - -enum { - Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, - Opt_xprt_rdma6, - - Opt_xprt_err -}; - -static const match_table_t nfs_xprt_protocol_tokens = { - { Opt_xprt_udp, "udp" }, - { Opt_xprt_udp6, "udp6" }, - { Opt_xprt_tcp, "tcp" }, - { Opt_xprt_tcp6, "tcp6" }, - { Opt_xprt_rdma, "rdma" }, - { Opt_xprt_rdma6, "rdma6" }, - - { Opt_xprt_err, NULL } -}; - -enum { - Opt_sec_none, Opt_sec_sys, - Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, - Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, - Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, - - Opt_sec_err -}; - -static const match_table_t nfs_secflavor_tokens = { - { Opt_sec_none, "none" }, - { Opt_sec_none, "null" }, - { Opt_sec_sys, "sys" }, - - { Opt_sec_krb5, "krb5" }, - { Opt_sec_krb5i, "krb5i" }, - { Opt_sec_krb5p, "krb5p" }, - - { Opt_sec_lkey, "lkey" }, - { Opt_sec_lkeyi, "lkeyi" }, - { Opt_sec_lkeyp, "lkeyp" }, - - { Opt_sec_spkm, "spkm3" }, - { Opt_sec_spkmi, "spkm3i" }, - { Opt_sec_spkmp, "spkm3p" }, - - { Opt_sec_err, NULL } -}; - -enum { - Opt_lookupcache_all, Opt_lookupcache_positive, - Opt_lookupcache_none, - - Opt_lookupcache_err -}; - -static match_table_t nfs_lookupcache_tokens = { - { Opt_lookupcache_all, "all" }, - { Opt_lookupcache_positive, "pos" }, - { Opt_lookupcache_positive, "positive" }, - { Opt_lookupcache_none, "none" }, - - { Opt_lookupcache_err, NULL } -}; - -enum { - Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, - Opt_local_lock_none, - - Opt_local_lock_err -}; - -static match_table_t nfs_local_lock_tokens = { - { Opt_local_lock_all, "all" }, - { Opt_local_lock_flock, "flock" }, - { Opt_local_lock_posix, "posix" }, - { Opt_local_lock_none, "none" }, - - { Opt_local_lock_err, NULL } -}; - -enum { - Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, - Opt_vers_4_1, Opt_vers_4_2, - - Opt_vers_err -}; - -static match_table_t nfs_vers_tokens = { - { Opt_vers_2, "2" }, - { Opt_vers_3, "3" }, - { Opt_vers_4, "4" }, - { Opt_vers_4_0, "4.0" }, - { Opt_vers_4_1, "4.1" }, - { Opt_vers_4_2, "4.2" }, - - { Opt_vers_err, NULL } -}; static struct dentry *nfs_prepared_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); @@ -332,10 +109,6 @@ const struct super_operations nfs_sops = { EXPORT_SYMBOL_GPL(nfs_sops); #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, const char *dev_name); - struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -932,141 +705,6 @@ void nfs_umount_begin(struct super_block *sb) } EXPORT_SYMBOL_GPL(nfs_umount_begin); -static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) -{ - struct nfs_parsed_mount_data *data; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data) { - data->timeo = NFS_UNSPEC_TIMEO; - data->retrans = NFS_UNSPEC_RETRANS; - data->acregmin = NFS_DEF_ACREGMIN; - data->acregmax = NFS_DEF_ACREGMAX; - data->acdirmin = NFS_DEF_ACDIRMIN; - data->acdirmax = NFS_DEF_ACDIRMAX; - data->mount_server.port = NFS_UNSPEC_PORT; - data->nfs_server.port = NFS_UNSPEC_PORT; - data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->selected_flavor = RPC_AUTH_MAXFLAVOR; - data->minorversion = 0; - data->need_mount = true; - data->net = current->nsproxy->net_ns; - data->lsm_opts = NULL; - } - return data; -} - -static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) -{ - if (data) { - kfree(data->client_address); - kfree(data->mount_server.hostname); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); - security_free_mnt_opts(&data->lsm_opts); - kfree(data); - } -} - -/* - * Sanity-check a server address provided by the mount command. - * - * Address family must be initialized, and address must not be - * the ANY address for that family. - */ -static int nfs_verify_server_address(struct sockaddr *addr) -{ - switch (addr->sa_family) { - case AF_INET: { - struct sockaddr_in *sa = (struct sockaddr_in *)addr; - return sa->sin_addr.s_addr != htonl(INADDR_ANY); - } - case AF_INET6: { - struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; - return !ipv6_addr_any(sa); - } - } - - dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); - return 0; -} - -/* - * Select between a default port value and a user-specified port value. - * If a zero value is set, then autobind will be used. - */ -static void nfs_set_port(struct sockaddr *sap, int *port, - const unsigned short default_port) -{ - if (*port == NFS_UNSPEC_PORT) - *port = default_port; - - rpc_set_port(sap, *port); -} - -/* - * Sanity check the NFS transport protocol. - * - */ -static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) -{ - switch (mnt->nfs_server.protocol) { - case XPRT_TRANSPORT_UDP: - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: - break; - default: - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - } -} - -/* - * For text based NFSv2/v3 mounts, the mount protocol transport default - * settings should depend upon the specified NFS transport. - */ -static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) -{ - nfs_validate_transport_protocol(mnt); - - if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || - mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) - return; - switch (mnt->nfs_server.protocol) { - case XPRT_TRANSPORT_UDP: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; - } -} - -/* - * Add 'flavor' to 'auth_info' if not already present. - * Returns true if 'flavor' ends up in the list, false otherwise - */ -static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, - rpc_authflavor_t flavor) -{ - unsigned int i; - unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); - - /* make sure this flavor isn't already in the list */ - for (i = 0; i < auth_info->flavor_len; i++) { - if (flavor == auth_info->flavors[i]) - return true; - } - - if (auth_info->flavor_len + 1 >= max_flavor_len) { - dfprintk(MOUNT, "NFS: too many sec= flavors\n"); - return false; - } - - auth_info->flavors[auth_info->flavor_len++] = flavor; - return true; -} - /* * Return true if 'match' is in auth_info or auth_info is empty. * Return false otherwise. @@ -1087,627 +725,6 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, } EXPORT_SYMBOL_GPL(nfs_auth_info_match); -/* - * Parse the value of the 'sec=' option. - */ -static int nfs_parse_security_flavors(char *value, - struct nfs_parsed_mount_data *mnt) -{ - substring_t args[MAX_OPT_ARGS]; - rpc_authflavor_t pseudoflavor; - char *p; - - dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); - - while ((p = strsep(&value, ":")) != NULL) { - switch (match_token(p, nfs_secflavor_tokens, args)) { - case Opt_sec_none: - pseudoflavor = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - pseudoflavor = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - pseudoflavor = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - pseudoflavor = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - pseudoflavor = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - pseudoflavor = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - pseudoflavor = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - pseudoflavor = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - pseudoflavor = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - pseudoflavor = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - pseudoflavor = RPC_AUTH_GSS_SPKMP; - break; - default: - dfprintk(MOUNT, - "NFS: sec= option '%s' not recognized\n", p); - return 0; - } - - if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) - return 0; - } - - return 1; -} - -static int nfs_parse_version_string(char *string, - struct nfs_parsed_mount_data *mnt, - substring_t *args) -{ - mnt->flags &= ~NFS_MOUNT_VER3; - switch (match_token(string, nfs_vers_tokens, args)) { - case Opt_vers_2: - mnt->version = 2; - break; - case Opt_vers_3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case Opt_vers_4: - /* Backward compatibility option. In future, - * the mount program should always supply - * a NFSv4 minor version number. - */ - mnt->version = 4; - break; - case Opt_vers_4_0: - mnt->version = 4; - mnt->minorversion = 0; - break; - case Opt_vers_4_1: - mnt->version = 4; - mnt->minorversion = 1; - break; - case Opt_vers_4_2: - mnt->version = 4; - mnt->minorversion = 2; - break; - default: - return 0; - } - return 1; -} - -static int nfs_get_option_str(substring_t args[], char **option) -{ - kfree(*option); - *option = match_strdup(args); - return !*option; -} - -static int nfs_get_option_ul(substring_t args[], unsigned long *option) -{ - int rc; - char *string; - - string = match_strdup(args); - if (string == NULL) - return -ENOMEM; - rc = kstrtoul(string, 10, option); - kfree(string); - - return rc; -} - -static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, - unsigned long l_bound, unsigned long u_bound) -{ - int ret; - - ret = nfs_get_option_ul(args, option); - if (ret != 0) - return ret; - if (*option < l_bound || *option > u_bound) - return -ERANGE; - return 0; -} - -/* - * Error-check and convert a string of mount options from user space into - * a data structure. The whole mount string is processed; bad options are - * skipped as they are encountered. If there were no errors, return 1; - * otherwise return 0 (zero). - */ -static int nfs_parse_mount_options(char *raw, - struct nfs_parsed_mount_data *mnt) -{ - char *p, *string; - int rc, sloppy = 0, invalid_option = 0; - unsigned short protofamily = AF_UNSPEC; - unsigned short mountfamily = AF_UNSPEC; - - if (!raw) { - dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); - return 1; - } - dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - - rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts); - if (rc) - goto out_security_failure; - - while ((p = strsep(&raw, ",")) != NULL) { - substring_t args[MAX_OPT_ARGS]; - unsigned long option; - int token; - - if (!*p) - continue; - - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); - - token = match_token(p, nfs_mount_option_tokens, args); - switch (token) { - - /* - * boolean options: foo/nofoo - */ - case Opt_soft: - mnt->flags |= NFS_MOUNT_SOFT; - mnt->flags &= ~NFS_MOUNT_SOFTERR; - break; - case Opt_softerr: - mnt->flags |= NFS_MOUNT_SOFTERR; - mnt->flags &= ~NFS_MOUNT_SOFT; - break; - case Opt_hard: - mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); - break; - case Opt_posix: - mnt->flags |= NFS_MOUNT_POSIX; - break; - case Opt_noposix: - mnt->flags &= ~NFS_MOUNT_POSIX; - break; - case Opt_cto: - mnt->flags &= ~NFS_MOUNT_NOCTO; - break; - case Opt_nocto: - mnt->flags |= NFS_MOUNT_NOCTO; - break; - case Opt_ac: - mnt->flags &= ~NFS_MOUNT_NOAC; - break; - case Opt_noac: - mnt->flags |= NFS_MOUNT_NOAC; - break; - case Opt_lock: - mnt->flags &= ~NFS_MOUNT_NONLM; - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_nolock: - mnt->flags |= NFS_MOUNT_NONLM; - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_rdma: - mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(p); - break; - case Opt_acl: - mnt->flags &= ~NFS_MOUNT_NOACL; - break; - case Opt_noacl: - mnt->flags |= NFS_MOUNT_NOACL; - break; - case Opt_rdirplus: - mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; - break; - case Opt_nordirplus: - mnt->flags |= NFS_MOUNT_NORDIRPLUS; - break; - case Opt_sharecache: - mnt->flags &= ~NFS_MOUNT_UNSHARED; - break; - case Opt_nosharecache: - mnt->flags |= NFS_MOUNT_UNSHARED; - break; - case Opt_resvport: - mnt->flags &= ~NFS_MOUNT_NORESVPORT; - break; - case Opt_noresvport: - mnt->flags |= NFS_MOUNT_NORESVPORT; - break; - case Opt_fscache: - mnt->options |= NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; - break; - case Opt_nofscache: - mnt->options &= ~NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; - break; - case Opt_migration: - mnt->options |= NFS_OPTION_MIGRATION; - break; - case Opt_nomigration: - mnt->options &= ~NFS_OPTION_MIGRATION; - break; - - /* - * options that take numeric values - */ - case Opt_port: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - mnt->nfs_server.port = option; - break; - case Opt_rsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->rsize = option; - break; - case Opt_wsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->wsize = option; - break; - case Opt_bsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->bsize = option; - break; - case Opt_timeo: - if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) - goto out_invalid_value; - mnt->timeo = option; - break; - case Opt_retrans: - if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) - goto out_invalid_value; - mnt->retrans = option; - break; - case Opt_acregmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmin = option; - break; - case Opt_acregmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmax = option; - break; - case Opt_acdirmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acdirmin = option; - break; - case Opt_acdirmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acdirmax = option; - break; - case Opt_actimeo: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmin = mnt->acregmax = - mnt->acdirmin = mnt->acdirmax = option; - break; - case Opt_namelen: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->namlen = option; - break; - case Opt_mountport: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - mnt->mount_server.port = option; - break; - case Opt_mountvers: - if (nfs_get_option_ul(args, &option) || - option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) - goto out_invalid_value; - mnt->mount_server.version = option; - break; - case Opt_minorversion: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - if (option > NFS4_MAX_MINOR_VERSION) - goto out_invalid_value; - mnt->minorversion = option; - break; - - /* - * options that take text values - */ - case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_version_string(string, mnt, args); - kfree(string); - if (!rc) - goto out_invalid_value; - break; - case Opt_sec: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_security_flavors(string, mnt); - kfree(string); - if (!rc) { - dfprintk(MOUNT, "NFS: unrecognized " - "security flavor\n"); - return 0; - } - break; - case Opt_proto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); - - protofamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_rdma: - /* vector side protocols to TCP */ - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(string); - break; - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - kfree(string); - return 0; - } - kfree(string); - break; - case Opt_mountproto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); - kfree(string); - - mountfamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma: /* not used for side protocols */ - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - return 0; - } - break; - case Opt_addr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - mnt->nfs_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), - (struct sockaddr *) - &mnt->nfs_server.address, - sizeof(mnt->nfs_server.address)); - kfree(string); - if (mnt->nfs_server.addrlen == 0) - goto out_invalid_address; - break; - case Opt_clientaddr: - if (nfs_get_option_str(args, &mnt->client_address)) - goto out_nomem; - break; - case Opt_mounthost: - if (nfs_get_option_str(args, - &mnt->mount_server.hostname)) - goto out_nomem; - break; - case Opt_mountaddr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - mnt->mount_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), - (struct sockaddr *) - &mnt->mount_server.address, - sizeof(mnt->mount_server.address)); - kfree(string); - if (mnt->mount_server.addrlen == 0) - goto out_invalid_address; - break; - case Opt_nconnect: - if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) - goto out_invalid_value; - mnt->nfs_server.nconnect = option; - break; - case Opt_lookupcache: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_lookupcache_tokens, args); - kfree(string); - switch (token) { - case Opt_lookupcache_all: - mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); - break; - case Opt_lookupcache_positive: - mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; - break; - case Opt_lookupcache_none: - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "lookupcache argument\n"); - return 0; - } - break; - case Opt_fscache_uniq: - if (nfs_get_option_str(args, &mnt->fscache_uniq)) - goto out_nomem; - mnt->options |= NFS_OPTION_FSCACHE; - break; - case Opt_local_lock: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_local_lock_tokens, - args); - kfree(string); - switch (token) { - case Opt_local_lock_all: - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_local_lock_flock: - mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; - break; - case Opt_local_lock_posix: - mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; - break; - case Opt_local_lock_none: - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "local_lock argument\n"); - return 0; - } - break; - - /* - * Special options - */ - case Opt_sloppy: - sloppy = 1; - dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); - break; - case Opt_userspace: - case Opt_deprecated: - dfprintk(MOUNT, "NFS: ignoring mount option " - "'%s'\n", p); - break; - - default: - invalid_option = 1; - dfprintk(MOUNT, "NFS: unrecognized mount option " - "'%s'\n", p); - } - } - - if (!sloppy && invalid_option) - return 0; - - if (mnt->minorversion && mnt->version != 4) - goto out_minorversion_mismatch; - - if (mnt->options & NFS_OPTION_MIGRATION && - (mnt->version != 4 || mnt->minorversion != 0)) - goto out_migration_misuse; - - /* - * verify that any proto=/mountproto= options match the address - * families in the addr=/mountaddr= options. - */ - if (protofamily != AF_UNSPEC && - protofamily != mnt->nfs_server.address.ss_family) - goto out_proto_mismatch; - - if (mountfamily != AF_UNSPEC) { - if (mnt->mount_server.addrlen) { - if (mountfamily != mnt->mount_server.address.ss_family) - goto out_mountproto_mismatch; - } else { - if (mountfamily != mnt->nfs_server.address.ss_family) - goto out_mountproto_mismatch; - } - } - - return 1; - -out_mountproto_mismatch: - printk(KERN_INFO "NFS: mount server address does not match mountproto= " - "option\n"); - return 0; -out_proto_mismatch: - printk(KERN_INFO "NFS: server address does not match proto= option\n"); - return 0; -out_invalid_address: - printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); - return 0; -out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); - return 0; -out_minorversion_mismatch: - printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", mnt->version, mnt->minorversion); - return 0; -out_migration_misuse: - printk(KERN_INFO - "NFS: 'migration' not supported for this NFS version\n"); - return 0; -out_nomem: - printk(KERN_INFO "NFS: not enough memory to parse option\n"); - return 0; -out_security_failure: - printk(KERN_INFO "NFS: security options invalid: %d\n", rc); - return 0; -} - /* * Ensure that a specified authtype in args->auth_info is supported by * the server. Returns 0 and sets args->selected_flavor if it's ok, and @@ -1908,327 +925,6 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, } EXPORT_SYMBOL_GPL(nfs_try_mount); -/* - * Split "dev_name" into "hostname:export_path". - * - * The leftmost colon demarks the split between the server's hostname - * and the export path. If the hostname starts with a left square - * bracket, then it may contain colons. - * - * Note: caller frees hostname and export path, even on error. - */ -static int nfs_parse_devname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) -{ - size_t len; - char *end; - - if (unlikely(!dev_name || !*dev_name)) { - dfprintk(MOUNT, "NFS: device name not specified\n"); - return -EINVAL; - } - - /* Is the host name protected with square brakcets? */ - if (*dev_name == '[') { - end = strchr(++dev_name, ']'); - if (end == NULL || end[1] != ':') - goto out_bad_devname; - - len = end - dev_name; - end++; - } else { - char *comma; - - end = strchr(dev_name, ':'); - if (end == NULL) - goto out_bad_devname; - len = end - dev_name; - - /* kill possible hostname list: not supported */ - comma = strchr(dev_name, ','); - if (comma != NULL && comma < end) - len = comma - dev_name; - } - - if (len > maxnamlen) - goto out_hostname; - - /* N.B. caller will free nfs_server.hostname in all cases */ - *hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (*hostname == NULL) - goto out_nomem; - len = strlen(++end); - if (len > maxpathlen) - goto out_path; - *export_path = kstrndup(end, len, GFP_KERNEL); - if (!*export_path) - goto out_nomem; - - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); - return 0; - -out_bad_devname: - dfprintk(MOUNT, "NFS: device name not in host:path format\n"); - return -EINVAL; - -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); - return -ENOMEM; - -out_hostname: - dfprintk(MOUNT, "NFS: server hostname too long\n"); - return -ENAMETOOLONG; - -out_path: - dfprintk(MOUNT, "NFS: export pathname too long\n"); - return -ENAMETOOLONG; -} - -/* - * Validate the NFS2/NFS3 mount data - * - fills in the mount root filehandle - * - * For option strings, user space handles the following behaviors: - * - * + DNS: mapping server host name to IP address ("addr=" option) - * - * + failure mode: how to behave if a mount request can't be handled - * immediately ("fg/bg" option) - * - * + retry: how often to retry a mount request ("retry=" option) - * - * + breaking back: trying proto=udp after proto=tcp, v2 after v3, - * mountproto=tcp after mountproto=udp, and so on - */ -static int nfs23_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - struct nfs_mount_data *data = (struct nfs_mount_data *)options; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; - - if (data == NULL) - goto out_no_data; - - args->version = NFS_DEFAULT_VERSION; - switch (data->version) { - case 1: - data->namlen = 0; /* fall through */ - case 2: - data->bsize = 0; /* fall through */ - case 3: - if (data->flags & NFS_MOUNT_VER3) - goto out_no_v3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - /* Turn off security negotiation */ - extra_flags |= NFS_MOUNT_SECFLAVOUR; - /* fall through */ - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) - goto out_no_sec; - /* fall through */ - case 5: - memset(data->context, 0, sizeof(data->context)); - /* fall through */ - case 6: - if (data->flags & NFS_MOUNT_VER3) { - if (data->root.size > NFS3_FHSIZE || data->root.size == 0) - goto out_invalid_fh; - mntfh->size = data->root.size; - args->version = 3; - } else { - mntfh->size = NFS2_FHSIZE; - args->version = 2; - } - - - memcpy(mntfh->data, data->root.data, mntfh->size); - if (mntfh->size < sizeof(mntfh->data)) - memset(mntfh->data + mntfh->size, 0, - sizeof(mntfh->data) - mntfh->size); - - /* - * Translate to nfs_parsed_mount_data, which nfs_fill_super - * can deal with. - */ - args->flags = data->flags & NFS_MOUNT_FLAGMASK; - args->flags |= extra_flags; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->need_mount = false; - - memcpy(sap, &data->addr, sizeof(data->addr)); - args->nfs_server.addrlen = sizeof(data->addr); - args->nfs_server.port = ntohs(data->addr.sin_port); - if (sap->sa_family != AF_INET || - !nfs_verify_server_address(sap)) - goto out_no_address; - - if (!(data->flags & NFS_MOUNT_TCP)) - args->nfs_server.protocol = XPRT_TRANSPORT_UDP; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); - args->namlen = data->namlen; - args->bsize = data->bsize; - - if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->selected_flavor = data->pseudoflavor; - else - args->selected_flavor = RPC_AUTH_UNIX; - if (!args->nfs_server.hostname) - goto out_nomem; - - if (!(data->flags & NFS_MOUNT_NONLM)) - args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| - NFS_MOUNT_LOCAL_FCNTL); - else - args->flags |= (NFS_MOUNT_LOCAL_FLOCK| - NFS_MOUNT_LOCAL_FCNTL); - /* - * The legacy version 6 binary mount data from userspace has a - * field used only to transport selinux information into the - * the kernel. To continue to support that functionality we - * have a touch of selinux knowledge here in the NFS code. The - * userspace code converted context=blah to just blah so we are - * converting back to the full string selinux understands. - */ - if (data->context[0]){ -#ifdef CONFIG_SECURITY_SELINUX - int rc; - data->context[NFS_MAX_CONTEXT_LEN] = '\0'; - rc = security_add_mnt_opt("context", data->context, - strlen(data->context), &args->lsm_opts); - if (rc) - return rc; -#else - return -EINVAL; -#endif - } - - break; - default: - return NFS_TEXT_DATA; - } - - return 0; - -out_no_data: - dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); - return -EINVAL; - -out_no_v3: - dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", - data->version); - return -EINVAL; - -out_no_sec: - dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); - return -EINVAL; - -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); - return -ENOMEM; - -out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; - -out_invalid_fh: - dfprintk(MOUNT, "NFS: invalid root filehandle\n"); - return -EINVAL; -} - -#if IS_ENABLED(CONFIG_NFS_V4) -static int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - if (fs_type == &nfs_fs_type) - return nfs23_validate_mount_data(options, args, mntfh, dev_name); - return nfs4_validate_mount_data(options, args, dev_name); -} -#else -static int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - return nfs23_validate_mount_data(options, args, mntfh, dev_name); -} -#endif - -static int nfs_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - int port = 0; - int max_namelen = PAGE_SIZE; - int max_pathlen = NFS_MAXPATHLEN; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - - if (nfs_parse_mount_options((char *)options, args) == 0) - return -EINVAL; - - if (!nfs_verify_server_address(sap)) - goto out_no_address; - - if (args->version == 4) { -#if IS_ENABLED(CONFIG_NFS_V4) - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - else - port = NFS_PORT; - max_namelen = NFS4_MAXNAMLEN; - max_pathlen = NFS4_MAXPATHLEN; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - nfs4_validate_mount_flags(args); -#else - goto out_v4_not_compiled; -#endif /* CONFIG_NFS_V4 */ - } else { - nfs_set_mount_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - } - - nfs_set_port(sap, &args->nfs_server.port, port); - - return nfs_parse_devname(dev_name, - &args->nfs_server.hostname, - max_namelen, - &args->nfs_server.export_path, - max_pathlen); - -#if !IS_ENABLED(CONFIG_NFS_V4) -out_v4_not_compiled: - dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); - return -EPROTONOSUPPORT; -#else -out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; -#endif /* !CONFIG_NFS_V4 */ - -out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; -} - #define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | NFS_MOUNT_SECURE \ | NFS_MOUNT_TCP \ @@ -2735,113 +1431,6 @@ nfs_prepared_mount(struct file_system_type *fs_type, int flags, #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) -{ - args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| - NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); -} - -/* - * Validate NFSv4 mount options - */ -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; - char *c; - - if (data == NULL) - goto out_no_data; - - args->version = 4; - - switch (data->version) { - case 1: - if (data->host_addrlen > sizeof(args->nfs_server.address)) - goto out_no_address; - if (data->host_addrlen == 0) - goto out_no_address; - args->nfs_server.addrlen = data->host_addrlen; - if (copy_from_user(sap, data->host_addr, data->host_addrlen)) - return -EFAULT; - if (!nfs_verify_server_address(sap)) - goto out_no_address; - args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); - - if (data->auth_flavourlen) { - rpc_authflavor_t pseudoflavor; - if (data->auth_flavourlen > 1) - goto out_inval_auth; - if (copy_from_user(&pseudoflavor, - data->auth_flavours, - sizeof(pseudoflavor))) - return -EFAULT; - args->selected_flavor = pseudoflavor; - } else - args->selected_flavor = RPC_AUTH_UNIX; - - c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); - if (IS_ERR(c)) - return PTR_ERR(c); - args->nfs_server.hostname = c; - - c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); - if (IS_ERR(c)) - return PTR_ERR(c); - args->nfs_server.export_path = c; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); - - c = strndup_user(data->client_addr.data, 16); - if (IS_ERR(c)) - return PTR_ERR(c); - args->client_address = c; - - /* - * Translate to nfs_parsed_mount_data, which nfs4_fill_super - * can deal with. - */ - - args->flags = data->flags & NFS4_MOUNT_FLAGMASK; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->nfs_server.protocol = data->proto; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - - break; - default: - return NFS_TEXT_DATA; - } - - return 0; - -out_no_data: - dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); - return -EINVAL; - -out_inval_auth: - dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", - data->auth_flavourlen); - return -EINVAL; - -out_no_address: - dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); - return -EINVAL; - -out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; -} - /* * NFS v4 module parameters need to stay in the * NFS client for backwards compatibility From e0a626b1247496971dfbed35d104f77e286c70bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:05 -0500 Subject: [PATCH 099/658] NFS: Constify mount argument match tables The mount argument match tables should never be altered so constify them. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index c8f99a3c7264..8fbfd526d6b8 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -199,7 +199,7 @@ enum { Opt_lookupcache_err }; -static match_table_t nfs_lookupcache_tokens = { +static const match_table_t nfs_lookupcache_tokens = { { Opt_lookupcache_all, "all" }, { Opt_lookupcache_positive, "pos" }, { Opt_lookupcache_positive, "positive" }, @@ -215,7 +215,7 @@ enum { Opt_local_lock_err }; -static match_table_t nfs_local_lock_tokens = { +static const match_table_t nfs_local_lock_tokens = { { Opt_local_lock_all, "all" }, { Opt_local_lock_flock, "flock" }, { Opt_local_lock_posix, "posix" }, @@ -231,7 +231,7 @@ enum { Opt_vers_err }; -static match_table_t nfs_vers_tokens = { +static const match_table_t nfs_vers_tokens = { { Opt_vers_2, "2" }, { Opt_vers_3, "3" }, { Opt_vers_4, "4" }, From 5eb005caf5383dd328199f0f2114cd7dad731d3d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:06 -0500 Subject: [PATCH 100/658] NFS: Rename struct nfs_parsed_mount_data to struct nfs_fs_context Rename struct nfs_parsed_mount_data to struct nfs_fs_context and rename pointers to it to "ctx". At some point this will be pointed to by an fs_context struct's fs_private pointer. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 66 +++---- fs/nfs/fs_context.c | 444 ++++++++++++++++++++++---------------------- fs/nfs/internal.h | 14 +- fs/nfs/nfs4client.c | 60 +++--- fs/nfs/nfs4super.c | 6 +- fs/nfs/super.c | 194 +++++++++---------- 6 files changed, 392 insertions(+), 392 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a2049747adc4..0a00df8e71bb 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -658,28 +658,28 @@ EXPORT_SYMBOL_GPL(nfs_init_client); * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data, + const struct nfs_fs_context *cfg, struct nfs_subversion *nfs_mod) { struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { - .hostname = data->nfs_server.hostname, - .addr = (const struct sockaddr *)&data->nfs_server.address, - .addrlen = data->nfs_server.addrlen, + .hostname = cfg->nfs_server.hostname, + .addr = (const struct sockaddr *)&cfg->nfs_server.address, + .addrlen = cfg->nfs_server.addrlen, .nfs_mod = nfs_mod, - .proto = data->nfs_server.protocol, - .net = data->net, + .proto = cfg->nfs_server.protocol, + .net = cfg->net, .timeparms = &timeparms, .cred = server->cred, - .nconnect = data->nfs_server.nconnect, + .nconnect = cfg->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; int error; - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - if (data->flags & NFS_MOUNT_NORESVPORT) + nfs_init_timeout_values(&timeparms, cfg->nfs_server.protocol, + cfg->timeo, cfg->retrans); + if (cfg->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ @@ -690,46 +690,46 @@ static int nfs_init_server(struct nfs_server *server, server->nfs_client = clp; /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->options = data->options; + server->flags = cfg->flags; + server->options = cfg->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); + if (cfg->rsize) + server->rsize = nfs_block_size(cfg->rsize, NULL); + if (cfg->wsize) + server->wsize = nfs_block_size(cfg->wsize, NULL); - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; + server->acregmin = cfg->acregmin * HZ; + server->acregmax = cfg->acregmax * HZ; + server->acdirmin = cfg->acdirmin * HZ; + server->acdirmax = cfg->acdirmax * HZ; /* Start lockd here, before we might error out */ error = nfs_start_lockd(server); if (error < 0) goto error; - server->port = data->nfs_server.port; - server->auth_info = data->auth_info; + server->port = cfg->nfs_server.port; + server->auth_info = cfg->auth_info; error = nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + cfg->selected_flavor); if (error < 0) goto error; /* Preserve the values of mount_server-related mount options */ - if (data->mount_server.addrlen) { - memcpy(&server->mountd_address, &data->mount_server.address, - data->mount_server.addrlen); - server->mountd_addrlen = data->mount_server.addrlen; + if (cfg->mount_server.addrlen) { + memcpy(&server->mountd_address, &cfg->mount_server.address, + cfg->mount_server.addrlen); + server->mountd_addrlen = cfg->mount_server.addrlen; } - server->mountd_version = data->mount_server.version; - server->mountd_port = data->mount_server.port; - server->mountd_protocol = data->mount_server.protocol; + server->mountd_version = cfg->mount_server.version; + server->mountd_port = cfg->mount_server.port; + server->mountd_protocol = cfg->mount_server.protocol; - server->namelen = data->namlen; + server->namelen = cfg->namlen; return 0; error: @@ -970,7 +970,7 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info) goto error; /* Get a client representation */ - error = nfs_init_server(server, mount_info->parsed, nfs_mod); + error = nfs_init_server(server, mount_info->ctx, nfs_mod); if (error < 0) goto error; @@ -981,7 +981,7 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info) if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(mount_info->ctx->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 8fbfd526d6b8..52aa2b8522f0 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -242,40 +242,40 @@ static const match_table_t nfs_vers_tokens = { { Opt_vers_err, NULL } }; -struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) +struct nfs_fs_context *nfs_alloc_parsed_mount_data(void) { - struct nfs_parsed_mount_data *data; + struct nfs_fs_context *ctx; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data) { - data->timeo = NFS_UNSPEC_TIMEO; - data->retrans = NFS_UNSPEC_RETRANS; - data->acregmin = NFS_DEF_ACREGMIN; - data->acregmax = NFS_DEF_ACREGMAX; - data->acdirmin = NFS_DEF_ACDIRMIN; - data->acdirmax = NFS_DEF_ACDIRMAX; - data->mount_server.port = NFS_UNSPEC_PORT; - data->nfs_server.port = NFS_UNSPEC_PORT; - data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->selected_flavor = RPC_AUTH_MAXFLAVOR; - data->minorversion = 0; - data->need_mount = true; - data->net = current->nsproxy->net_ns; - data->lsm_opts = NULL; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx) { + ctx->timeo = NFS_UNSPEC_TIMEO; + ctx->retrans = NFS_UNSPEC_RETRANS; + ctx->acregmin = NFS_DEF_ACREGMIN; + ctx->acregmax = NFS_DEF_ACREGMAX; + ctx->acdirmin = NFS_DEF_ACDIRMIN; + ctx->acdirmax = NFS_DEF_ACDIRMAX; + ctx->mount_server.port = NFS_UNSPEC_PORT; + ctx->nfs_server.port = NFS_UNSPEC_PORT; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; + ctx->minorversion = 0; + ctx->need_mount = true; + ctx->net = current->nsproxy->net_ns; + ctx->lsm_opts = NULL; } - return data; + return ctx; } -void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) +void nfs_free_parsed_mount_data(struct nfs_fs_context *ctx) { - if (data) { - kfree(data->client_address); - kfree(data->mount_server.hostname); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); - security_free_mnt_opts(&data->lsm_opts); - kfree(data); + if (ctx) { + kfree(ctx->client_address); + kfree(ctx->mount_server.hostname); + kfree(ctx->nfs_server.export_path); + kfree(ctx->nfs_server.hostname); + kfree(ctx->fscache_uniq); + security_free_mnt_opts(&ctx->lsm_opts); + kfree(ctx); } } @@ -306,15 +306,15 @@ static int nfs_verify_server_address(struct sockaddr *addr) * Sanity check the NFS transport protocol. * */ -static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) +static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx) { - switch (mnt->nfs_server.protocol) { + switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: break; default: - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; } } @@ -322,20 +322,20 @@ static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) * For text based NFSv2/v3 mounts, the mount protocol transport default * settings should depend upon the specified NFS transport. */ -static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) +static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) { - nfs_validate_transport_protocol(mnt); + nfs_validate_transport_protocol(ctx); - if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || - mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) + if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || + ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) return; - switch (mnt->nfs_server.protocol) { + switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; } } @@ -367,8 +367,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, /* * Parse the value of the 'sec=' option. */ -static int nfs_parse_security_flavors(char *value, - struct nfs_parsed_mount_data *mnt) +static int nfs_parse_security_flavors(char *value, struct nfs_fs_context *ctx) { substring_t args[MAX_OPT_ARGS]; rpc_authflavor_t pseudoflavor; @@ -417,7 +416,7 @@ static int nfs_parse_security_flavors(char *value, return 0; } - if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) + if (!nfs_auth_info_add(&ctx->auth_info, pseudoflavor)) return 0; } @@ -425,36 +424,36 @@ static int nfs_parse_security_flavors(char *value, } static int nfs_parse_version_string(char *string, - struct nfs_parsed_mount_data *mnt, + struct nfs_fs_context *ctx, substring_t *args) { - mnt->flags &= ~NFS_MOUNT_VER3; + ctx->flags &= ~NFS_MOUNT_VER3; switch (match_token(string, nfs_vers_tokens, args)) { case Opt_vers_2: - mnt->version = 2; + ctx->version = 2; break; case Opt_vers_3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; + ctx->flags |= NFS_MOUNT_VER3; + ctx->version = 3; break; case Opt_vers_4: /* Backward compatibility option. In future, * the mount program should always supply * a NFSv4 minor version number. */ - mnt->version = 4; + ctx->version = 4; break; case Opt_vers_4_0: - mnt->version = 4; - mnt->minorversion = 0; + ctx->version = 4; + ctx->minorversion = 0; break; case Opt_vers_4_1: - mnt->version = 4; - mnt->minorversion = 1; + ctx->version = 4; + ctx->minorversion = 1; break; case Opt_vers_4_2: - mnt->version = 4; - mnt->minorversion = 2; + ctx->version = 4; + ctx->minorversion = 2; break; default: return 0; @@ -502,7 +501,7 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, * skipped as they are encountered. If there were no errors, return 1; * otherwise return 0 (zero). */ -int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) +int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) { char *p, *string; int rc, sloppy = 0, invalid_option = 0; @@ -515,7 +514,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) } dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts); + rc = security_sb_eat_lsm_opts(raw, &ctx->lsm_opts); if (rc) goto out_security_failure; @@ -536,96 +535,96 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) * boolean options: foo/nofoo */ case Opt_soft: - mnt->flags |= NFS_MOUNT_SOFT; - mnt->flags &= ~NFS_MOUNT_SOFTERR; + ctx->flags |= NFS_MOUNT_SOFT; + ctx->flags &= ~NFS_MOUNT_SOFTERR; break; case Opt_softerr: - mnt->flags |= NFS_MOUNT_SOFTERR; - mnt->flags &= ~NFS_MOUNT_SOFT; + ctx->flags |= NFS_MOUNT_SOFTERR; + ctx->flags &= ~NFS_MOUNT_SOFT; break; case Opt_hard: - mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); + ctx->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); break; case Opt_posix: - mnt->flags |= NFS_MOUNT_POSIX; + ctx->flags |= NFS_MOUNT_POSIX; break; case Opt_noposix: - mnt->flags &= ~NFS_MOUNT_POSIX; + ctx->flags &= ~NFS_MOUNT_POSIX; break; case Opt_cto: - mnt->flags &= ~NFS_MOUNT_NOCTO; + ctx->flags &= ~NFS_MOUNT_NOCTO; break; case Opt_nocto: - mnt->flags |= NFS_MOUNT_NOCTO; + ctx->flags |= NFS_MOUNT_NOCTO; break; case Opt_ac: - mnt->flags &= ~NFS_MOUNT_NOAC; + ctx->flags &= ~NFS_MOUNT_NOAC; break; case Opt_noac: - mnt->flags |= NFS_MOUNT_NOAC; + ctx->flags |= NFS_MOUNT_NOAC; break; case Opt_lock: - mnt->flags &= ~NFS_MOUNT_NONLM; - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_nolock: - mnt->flags |= NFS_MOUNT_NONLM; - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_rdma: - mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; xprt_load_transport(p); break; case Opt_acl: - mnt->flags &= ~NFS_MOUNT_NOACL; + ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_noacl: - mnt->flags |= NFS_MOUNT_NOACL; + ctx->flags |= NFS_MOUNT_NOACL; break; case Opt_rdirplus: - mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; break; case Opt_nordirplus: - mnt->flags |= NFS_MOUNT_NORDIRPLUS; + ctx->flags |= NFS_MOUNT_NORDIRPLUS; break; case Opt_sharecache: - mnt->flags &= ~NFS_MOUNT_UNSHARED; + ctx->flags &= ~NFS_MOUNT_UNSHARED; break; case Opt_nosharecache: - mnt->flags |= NFS_MOUNT_UNSHARED; + ctx->flags |= NFS_MOUNT_UNSHARED; break; case Opt_resvport: - mnt->flags &= ~NFS_MOUNT_NORESVPORT; + ctx->flags &= ~NFS_MOUNT_NORESVPORT; break; case Opt_noresvport: - mnt->flags |= NFS_MOUNT_NORESVPORT; + ctx->flags |= NFS_MOUNT_NORESVPORT; break; case Opt_fscache: - mnt->options |= NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; + ctx->options |= NFS_OPTION_FSCACHE; + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = NULL; break; case Opt_nofscache: - mnt->options &= ~NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; + ctx->options &= ~NFS_OPTION_FSCACHE; + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = NULL; break; case Opt_migration: - mnt->options |= NFS_OPTION_MIGRATION; + ctx->options |= NFS_OPTION_MIGRATION; break; case Opt_nomigration: - mnt->options &= ~NFS_OPTION_MIGRATION; + ctx->options &= ~NFS_OPTION_MIGRATION; break; /* @@ -635,83 +634,83 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) if (nfs_get_option_ul(args, &option) || option > USHRT_MAX) goto out_invalid_value; - mnt->nfs_server.port = option; + ctx->nfs_server.port = option; break; case Opt_rsize: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->rsize = option; + ctx->rsize = option; break; case Opt_wsize: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->wsize = option; + ctx->wsize = option; break; case Opt_bsize: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->bsize = option; + ctx->bsize = option; break; case Opt_timeo: if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) goto out_invalid_value; - mnt->timeo = option; + ctx->timeo = option; break; case Opt_retrans: if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) goto out_invalid_value; - mnt->retrans = option; + ctx->retrans = option; break; case Opt_acregmin: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->acregmin = option; + ctx->acregmin = option; break; case Opt_acregmax: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->acregmax = option; + ctx->acregmax = option; break; case Opt_acdirmin: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->acdirmin = option; + ctx->acdirmin = option; break; case Opt_acdirmax: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->acdirmax = option; + ctx->acdirmax = option; break; case Opt_actimeo: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->acregmin = mnt->acregmax = - mnt->acdirmin = mnt->acdirmax = option; + ctx->acregmin = ctx->acregmax = + ctx->acdirmin = ctx->acdirmax = option; break; case Opt_namelen: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; - mnt->namlen = option; + ctx->namlen = option; break; case Opt_mountport: if (nfs_get_option_ul(args, &option) || option > USHRT_MAX) goto out_invalid_value; - mnt->mount_server.port = option; + ctx->mount_server.port = option; break; case Opt_mountvers: if (nfs_get_option_ul(args, &option) || option < NFS_MNT_VERSION || option > NFS_MNT3_VERSION) goto out_invalid_value; - mnt->mount_server.version = option; + ctx->mount_server.version = option; break; case Opt_minorversion: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; if (option > NFS4_MAX_MINOR_VERSION) goto out_invalid_value; - mnt->minorversion = option; + ctx->minorversion = option; break; /* @@ -721,7 +720,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) string = match_strdup(args); if (string == NULL) goto out_nomem; - rc = nfs_parse_version_string(string, mnt, args); + rc = nfs_parse_version_string(string, ctx, args); kfree(string); if (!rc) goto out_invalid_value; @@ -730,7 +729,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) string = match_strdup(args); if (string == NULL) goto out_nomem; - rc = nfs_parse_security_flavors(string, mnt); + rc = nfs_parse_security_flavors(string, ctx); kfree(string); if (!rc) { dfprintk(MOUNT, "NFS: unrecognized " @@ -751,23 +750,23 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) protofamily = AF_INET6; /* fall through */ case Opt_xprt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; /* fall through */ case Opt_xprt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; /* fall through */ case Opt_xprt_rdma: /* vector side protocols to TCP */ - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; xprt_load_transport(string); break; default: @@ -792,13 +791,13 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) mountfamily = AF_INET6; /* fall through */ case Opt_xprt_udp: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; /* fall through */ case Opt_xprt_tcp: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma: /* not used for side protocols */ default: @@ -811,41 +810,41 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) string = match_strdup(args); if (string == NULL) goto out_nomem; - mnt->nfs_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), + ctx->nfs_server.addrlen = + rpc_pton(ctx->net, string, strlen(string), (struct sockaddr *) - &mnt->nfs_server.address, - sizeof(mnt->nfs_server.address)); + &ctx->nfs_server.address, + sizeof(ctx->nfs_server.address)); kfree(string); - if (mnt->nfs_server.addrlen == 0) + if (ctx->nfs_server.addrlen == 0) goto out_invalid_address; break; case Opt_clientaddr: - if (nfs_get_option_str(args, &mnt->client_address)) + if (nfs_get_option_str(args, &ctx->client_address)) goto out_nomem; break; case Opt_mounthost: if (nfs_get_option_str(args, - &mnt->mount_server.hostname)) + &ctx->mount_server.hostname)) goto out_nomem; break; case Opt_mountaddr: string = match_strdup(args); if (string == NULL) goto out_nomem; - mnt->mount_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), + ctx->mount_server.addrlen = + rpc_pton(ctx->net, string, strlen(string), (struct sockaddr *) - &mnt->mount_server.address, - sizeof(mnt->mount_server.address)); + &ctx->mount_server.address, + sizeof(ctx->mount_server.address)); kfree(string); - if (mnt->mount_server.addrlen == 0) + if (ctx->mount_server.addrlen == 0) goto out_invalid_address; break; case Opt_nconnect: if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) goto out_invalid_value; - mnt->nfs_server.nconnect = option; + ctx->nfs_server.nconnect = option; break; case Opt_lookupcache: string = match_strdup(args); @@ -856,14 +855,14 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) kfree(string); switch (token) { case Opt_lookupcache_all: - mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); + ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); break; case Opt_lookupcache_positive: - mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; + ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; break; case Opt_lookupcache_none: - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: dfprintk(MOUNT, "NFS: invalid " @@ -872,9 +871,9 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) } break; case Opt_fscache_uniq: - if (nfs_get_option_str(args, &mnt->fscache_uniq)) + if (nfs_get_option_str(args, &ctx->fscache_uniq)) goto out_nomem; - mnt->options |= NFS_OPTION_FSCACHE; + ctx->options |= NFS_OPTION_FSCACHE; break; case Opt_local_lock: string = match_strdup(args); @@ -885,17 +884,17 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) kfree(string); switch (token) { case Opt_local_lock_all: - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_local_lock_flock: - mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; + ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; break; case Opt_local_lock_posix: - mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; + ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; break; case Opt_local_lock_none: - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; default: @@ -928,11 +927,11 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) if (!sloppy && invalid_option) return 0; - if (mnt->minorversion && mnt->version != 4) + if (ctx->minorversion && ctx->version != 4) goto out_minorversion_mismatch; - if (mnt->options & NFS_OPTION_MIGRATION && - (mnt->version != 4 || mnt->minorversion != 0)) + if (ctx->options & NFS_OPTION_MIGRATION && + (ctx->version != 4 || ctx->minorversion != 0)) goto out_migration_misuse; /* @@ -940,15 +939,15 @@ int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) * families in the addr=/mountaddr= options. */ if (protofamily != AF_UNSPEC && - protofamily != mnt->nfs_server.address.ss_family) + protofamily != ctx->nfs_server.address.ss_family) goto out_proto_mismatch; if (mountfamily != AF_UNSPEC) { - if (mnt->mount_server.addrlen) { - if (mountfamily != mnt->mount_server.address.ss_family) + if (ctx->mount_server.addrlen) { + if (mountfamily != ctx->mount_server.address.ss_family) goto out_mountproto_mismatch; } else { - if (mountfamily != mnt->nfs_server.address.ss_family) + if (mountfamily != ctx->nfs_server.address.ss_family) goto out_mountproto_mismatch; } } @@ -970,7 +969,7 @@ out_invalid_value: return 0; out_minorversion_mismatch: printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", mnt->version, mnt->minorversion); + "minorversion=%u\n", ctx->version, ctx->minorversion); return 0; out_migration_misuse: printk(KERN_INFO @@ -1078,18 +1077,18 @@ out_path: * mountproto=tcp after mountproto=udp, and so on */ static int nfs23_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, struct nfs_fh *mntfh, const char *dev_name) { struct nfs_mount_data *data = (struct nfs_mount_data *)options; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; if (data == NULL) goto out_no_data; - args->version = NFS_DEFAULT_VERSION; + ctx->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: data->namlen = 0; /* fall through */ @@ -1115,10 +1114,10 @@ static int nfs23_validate_mount_data(void *options, if (data->root.size > NFS3_FHSIZE || data->root.size == 0) goto out_invalid_fh; mntfh->size = data->root.size; - args->version = 3; + ctx->version = 3; } else { mntfh->size = NFS2_FHSIZE; - args->version = 2; + ctx->version = 2; } @@ -1128,47 +1127,47 @@ static int nfs23_validate_mount_data(void *options, sizeof(mntfh->data) - mntfh->size); /* - * Translate to nfs_parsed_mount_data, which nfs_fill_super + * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ - args->flags = data->flags & NFS_MOUNT_FLAGMASK; - args->flags |= extra_flags; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->need_mount = false; + ctx->flags = data->flags & NFS_MOUNT_FLAGMASK; + ctx->flags |= extra_flags; + ctx->rsize = data->rsize; + ctx->wsize = data->wsize; + ctx->timeo = data->timeo; + ctx->retrans = data->retrans; + ctx->acregmin = data->acregmin; + ctx->acregmax = data->acregmax; + ctx->acdirmin = data->acdirmin; + ctx->acdirmax = data->acdirmax; + ctx->need_mount = false; memcpy(sap, &data->addr, sizeof(data->addr)); - args->nfs_server.addrlen = sizeof(data->addr); - args->nfs_server.port = ntohs(data->addr.sin_port); + ctx->nfs_server.addrlen = sizeof(data->addr); + ctx->nfs_server.port = ntohs(data->addr.sin_port); if (sap->sa_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) - args->nfs_server.protocol = XPRT_TRANSPORT_UDP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); - args->namlen = data->namlen; - args->bsize = data->bsize; + ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + ctx->namlen = data->namlen; + ctx->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->selected_flavor = data->pseudoflavor; + ctx->selected_flavor = data->pseudoflavor; else - args->selected_flavor = RPC_AUTH_UNIX; - if (!args->nfs_server.hostname) + ctx->selected_flavor = RPC_AUTH_UNIX; + if (!ctx->nfs_server.hostname) goto out_nomem; if (!(data->flags & NFS_MOUNT_NONLM)) - args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); else - args->flags |= (NFS_MOUNT_LOCAL_FLOCK| + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); /* * The legacy version 6 binary mount data from userspace has a @@ -1183,7 +1182,7 @@ static int nfs23_validate_mount_data(void *options, int rc; data->context[NFS_MAX_CONTEXT_LEN] = '\0'; rc = security_add_mnt_opt("context", data->context, - strlen(data->context), &args->lsm_opts); + strlen(data->context), ctx->lsm_opts); if (rc) return rc; #else @@ -1225,10 +1224,9 @@ out_invalid_fh: } #if IS_ENABLED(CONFIG_NFS_V4) - -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) +static void nfs4_validate_mount_flags(struct nfs_fs_context *ctx) { - args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| + ctx->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); } @@ -1236,30 +1234,30 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) * Validate NFSv4 mount options */ static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, const char *dev_name) { - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; if (data == NULL) goto out_no_data; - args->version = 4; + ctx->version = 4; switch (data->version) { case 1: - if (data->host_addrlen > sizeof(args->nfs_server.address)) + if (data->host_addrlen > sizeof(ctx->nfs_server.address)) goto out_no_address; if (data->host_addrlen == 0) goto out_no_address; - args->nfs_server.addrlen = data->host_addrlen; + ctx->nfs_server.addrlen = data->host_addrlen; if (copy_from_user(sap, data->host_addr, data->host_addrlen)) return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; - args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); + ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { rpc_authflavor_t pseudoflavor; @@ -1269,43 +1267,43 @@ static int nfs4_validate_mount_data(void *options, data->auth_flavours, sizeof(pseudoflavor))) return -EFAULT; - args->selected_flavor = pseudoflavor; + ctx->selected_flavor = pseudoflavor; } else - args->selected_flavor = RPC_AUTH_UNIX; + ctx->selected_flavor = RPC_AUTH_UNIX; c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) return PTR_ERR(c); - args->nfs_server.hostname = c; + ctx->nfs_server.hostname = c; c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(c)) return PTR_ERR(c); - args->nfs_server.export_path = c; + ctx->nfs_server.export_path = c; dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); - args->client_address = c; + ctx->client_address = c; /* - * Translate to nfs_parsed_mount_data, which nfs4_fill_super + * Translate to nfs_fs_context, which nfs4_fill_super * can deal with. */ - args->flags = data->flags & NFS4_MOUNT_FLAGMASK; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->nfs_server.protocol = data->proto; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK; + ctx->rsize = data->rsize; + ctx->wsize = data->wsize; + ctx->timeo = data->timeo; + ctx->retrans = data->retrans; + ctx->acregmin = data->acregmin; + ctx->acregmax = data->acregmax; + ctx->acdirmin = data->acdirmin; + ctx->acdirmax = data->acdirmax; + ctx->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) goto out_invalid_transport_udp; break; @@ -1335,67 +1333,67 @@ out_invalid_transport_udp: int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, struct nfs_fh *mntfh, const char *dev_name) { if (fs_type == &nfs_fs_type) - return nfs23_validate_mount_data(options, args, mntfh, dev_name); - return nfs4_validate_mount_data(options, args, dev_name); + return nfs23_validate_mount_data(options, ctx, mntfh, dev_name); + return nfs4_validate_mount_data(options, ctx, dev_name); } #else int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, struct nfs_fh *mntfh, const char *dev_name) { - return nfs23_validate_mount_data(options, args, mntfh, dev_name); + return nfs23_validate_mount_data(options, ctx, mntfh, dev_name); } #endif int nfs_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, const char *dev_name) { int port = 0; int max_namelen = PAGE_SIZE; int max_pathlen = NFS_MAXPATHLEN; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; - if (nfs_parse_mount_options((char *)options, args) == 0) + if (nfs_parse_mount_options((char *)options, ctx) == 0) return -EINVAL; if (!nfs_verify_server_address(sap)) goto out_no_address; - if (args->version == 4) { + if (ctx->version == 4) { #if IS_ENABLED(CONFIG_NFS_V4) - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; else port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) goto out_invalid_transport_udp; - nfs4_validate_mount_flags(args); + nfs4_validate_mount_flags(ctx); #else goto out_v4_not_compiled; #endif /* CONFIG_NFS_V4 */ } else { - nfs_set_mount_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + nfs_set_mount_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; } - nfs_set_port(sap, &args->nfs_server.port, port); + nfs_set_port(sap, &ctx->nfs_server.port, port); return nfs_parse_devname(dev_name, - &args->nfs_server.hostname, + &ctx->nfs_server.hostname, max_namelen, - &args->nfs_server.export_path, + &ctx->nfs_server.export_path, max_pathlen); #if !IS_ENABLED(CONFIG_NFS_V4) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 28ab31fc5aa6..7131fa150d1b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -81,7 +81,7 @@ struct nfs_client_initdata { /* * In-kernel mount arguments */ -struct nfs_parsed_mount_data { +struct nfs_fs_context { int flags; unsigned int rsize, wsize; unsigned int timeo, retrans; @@ -138,7 +138,7 @@ struct nfs_mount_request { struct nfs_mount_info { unsigned int inherited_bsize; - struct nfs_parsed_mount_data *parsed; + struct nfs_fs_context *ctx; struct nfs_clone_mount *cloned; struct nfs_server *server; struct nfs_fh *mntfh; @@ -229,16 +229,16 @@ struct nfs_pageio_descriptor; /* mount.c */ #define NFS_TEXT_DATA 1 -extern struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void); -extern void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data); -extern int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt); +extern struct nfs_fs_context *nfs_alloc_parsed_mount_data(void); +extern void nfs_free_parsed_mount_data(struct nfs_fs_context *ctx); +extern int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx); extern int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, struct nfs_fh *mntfh, const char *dev_name); extern int nfs_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, + struct nfs_fs_context *ctx, const char *dev_name); /* pagelist.c */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 538fd036b69d..2216d166768b 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1056,61 +1056,61 @@ out: * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, - struct nfs_parsed_mount_data *data) + struct nfs_fs_context *ctx) { struct rpc_timeout timeparms; int error; - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); + nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, + ctx->timeo, ctx->retrans); /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->options = data->options; - server->auth_info = data->auth_info; + server->flags = ctx->flags; + server->options = ctx->options; + server->auth_info = ctx->auth_info; /* Use the first specified auth flavor. If this flavor isn't * allowed by the server, use the SECINFO path to try the * other specified flavors */ - if (data->auth_info.flavor_len >= 1) - data->selected_flavor = data->auth_info.flavors[0]; + if (ctx->auth_info.flavor_len >= 1) + ctx->selected_flavor = ctx->auth_info.flavors[0]; else - data->selected_flavor = RPC_AUTH_UNIX; + ctx->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ error = nfs4_set_client(server, - data->nfs_server.hostname, - (const struct sockaddr *)&data->nfs_server.address, - data->nfs_server.addrlen, - data->client_address, - data->nfs_server.protocol, + ctx->nfs_server.hostname, + (const struct sockaddr *)&ctx->nfs_server.address, + ctx->nfs_server.addrlen, + ctx->client_address, + ctx->nfs_server.protocol, &timeparms, - data->minorversion, - data->nfs_server.nconnect, - data->net); + ctx->minorversion, + ctx->nfs_server.nconnect, + ctx->net); if (error < 0) return error; - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); + if (ctx->rsize) + server->rsize = nfs_block_size(ctx->rsize, NULL); + if (ctx->wsize) + server->wsize = nfs_block_size(ctx->wsize, NULL); - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; - server->port = data->nfs_server.port; + server->acregmin = ctx->acregmin * HZ; + server->acregmax = ctx->acregmax * HZ; + server->acdirmin = ctx->acdirmin * HZ; + server->acdirmax = ctx->acdirmax * HZ; + server->port = ctx->nfs_server.port; return nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + ctx->selected_flavor); } /* * Create a version 4 volume record * - keyed on server and FSID */ -/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, +/*struct nfs_server *nfs4_create_server(const struct nfs_fs_context *data, struct nfs_fh *mntfh)*/ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info) { @@ -1124,10 +1124,10 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info) server->cred = get_cred(current_cred()); - auth_probe = mount_info->parsed->auth_info.flavor_len < 1; + auth_probe = mount_info->ctx->auth_info.flavor_len < 1; /* set up the general RPC client */ - error = nfs4_init_server(server, mount_info->parsed); + error = nfs4_init_server(server, mount_info->ctx); if (error < 0) goto error; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index f1c2d294073a..f931e8f49b05 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -198,15 +198,15 @@ static struct dentry *do_nfs4_mount(struct nfs_server *server, int flags, struct dentry *nfs4_try_mount(int flags, const char *dev_name, struct nfs_mount_info *mount_info) { - struct nfs_parsed_mount_data *data = mount_info->parsed; + struct nfs_fs_context *ctx = mount_info->ctx; struct dentry *res; dfprintk(MOUNT, "--> nfs4_try_mount()\n"); res = do_nfs4_mount(nfs4_create_server(mount_info), flags, mount_info, - data->nfs_server.hostname, - data->nfs_server.export_path); + ctx->nfs_server.hostname, + ctx->nfs_server.export_path); dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", PTR_ERR_OR_ZERO(res), diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b07585f62c65..83527515590e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -726,12 +726,13 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, EXPORT_SYMBOL_GPL(nfs_auth_info_match); /* - * Ensure that a specified authtype in args->auth_info is supported by - * the server. Returns 0 and sets args->selected_flavor if it's ok, and + * Ensure that a specified authtype in cfg->auth_info is supported by + * the server. Returns 0 and sets cfg->selected_flavor if it's ok, and * -EACCES if not. */ -static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, - rpc_authflavor_t *server_authlist, unsigned int count) +static int nfs_verify_authflavors(struct nfs_fs_context *cfg, + rpc_authflavor_t *server_authlist, + unsigned int count) { rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; bool found_auth_null = false; @@ -752,7 +753,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, for (i = 0; i < count; i++) { flavor = server_authlist[i]; - if (nfs_auth_info_match(&args->auth_info, flavor)) + if (nfs_auth_info_match(&cfg->auth_info, flavor)) goto out; if (flavor == RPC_AUTH_NULL) @@ -760,7 +761,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, } if (found_auth_null) { - flavor = args->auth_info.flavors[0]; + flavor = cfg->auth_info.flavors[0]; goto out; } @@ -769,8 +770,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, return -EACCES; out: - args->selected_flavor = flavor; - dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor); + cfg->selected_flavor = flavor; + dfprintk(MOUNT, "NFS: using auth flavor %u\n", cfg->selected_flavor); return 0; } @@ -778,50 +779,50 @@ out: * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ -static int nfs_request_mount(struct nfs_parsed_mount_data *args, +static int nfs_request_mount(struct nfs_fs_context *cfg, struct nfs_fh *root_fh, rpc_authflavor_t *server_authlist, unsigned int *server_authlist_len) { struct nfs_mount_request request = { .sap = (struct sockaddr *) - &args->mount_server.address, - .dirpath = args->nfs_server.export_path, - .protocol = args->mount_server.protocol, + &cfg->mount_server.address, + .dirpath = cfg->nfs_server.export_path, + .protocol = cfg->mount_server.protocol, .fh = root_fh, - .noresvport = args->flags & NFS_MOUNT_NORESVPORT, + .noresvport = cfg->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = server_authlist_len, .auth_flavs = server_authlist, - .net = args->net, + .net = cfg->net, }; int status; - if (args->mount_server.version == 0) { - switch (args->version) { + if (cfg->mount_server.version == 0) { + switch (cfg->version) { default: - args->mount_server.version = NFS_MNT3_VERSION; + cfg->mount_server.version = NFS_MNT3_VERSION; break; case 2: - args->mount_server.version = NFS_MNT_VERSION; + cfg->mount_server.version = NFS_MNT_VERSION; } } - request.version = args->mount_server.version; + request.version = cfg->mount_server.version; - if (args->mount_server.hostname) - request.hostname = args->mount_server.hostname; + if (cfg->mount_server.hostname) + request.hostname = cfg->mount_server.hostname; else - request.hostname = args->nfs_server.hostname; + request.hostname = cfg->nfs_server.hostname; /* * Construct the mount server's address. */ - if (args->mount_server.address.ss_family == AF_UNSPEC) { - memcpy(request.sap, &args->nfs_server.address, - args->nfs_server.addrlen); - args->mount_server.addrlen = args->nfs_server.addrlen; + if (cfg->mount_server.address.ss_family == AF_UNSPEC) { + memcpy(request.sap, &cfg->nfs_server.address, + cfg->nfs_server.addrlen); + cfg->mount_server.addrlen = cfg->nfs_server.addrlen; } - request.salen = args->mount_server.addrlen; - nfs_set_port(request.sap, &args->mount_server.port, 0); + request.salen = cfg->mount_server.addrlen; + nfs_set_port(request.sap, &cfg->mount_server.port, 0); /* * Now ask the mount server to map our export path @@ -844,12 +845,12 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf bool tried_auth_unix = false; bool auth_null_in_list = false; struct nfs_server *server = ERR_PTR(-EACCES); - struct nfs_parsed_mount_data *args = mount_info->parsed; + struct nfs_fs_context *ctx = mount_info->ctx; rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); struct nfs_subversion *nfs_mod = mount_info->nfs_mod; - status = nfs_request_mount(args, mount_info->mntfh, authlist, + status = nfs_request_mount(ctx, mount_info->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); @@ -858,10 +859,10 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf * Was a sec= authflavor specified in the options? First, verify * whether the server supports it, and then just try to use it if so. */ - if (args->auth_info.flavor_len > 0) { - status = nfs_verify_authflavors(args, authlist, authlist_len); + if (ctx->auth_info.flavor_len > 0) { + status = nfs_verify_authflavors(ctx, authlist, authlist_len); dfprintk(MOUNT, "NFS: using auth flavor %u\n", - args->selected_flavor); + ctx->selected_flavor); if (status) return ERR_PTR(status); return nfs_mod->rpc_ops->create_server(mount_info); @@ -890,7 +891,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Fallthrough */ } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); - args->selected_flavor = flavor; + ctx->selected_flavor = flavor; server = nfs_mod->rpc_ops->create_server(mount_info); if (!IS_ERR(server)) return server; @@ -906,7 +907,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); - args->selected_flavor = RPC_AUTH_UNIX; + ctx->selected_flavor = RPC_AUTH_UNIX; return nfs_mod->rpc_ops->create_server(mount_info); } @@ -916,7 +917,7 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, struct nfs_mount_info *mount_info) { struct nfs_subversion *nfs_mod = mount_info->nfs_mod; - if (mount_info->parsed->need_mount) + if (mount_info->ctx->need_mount) mount_info->server = nfs_try_mount_request(mount_info); else mount_info->server = nfs_mod->rpc_ops->create_server(mount_info); @@ -940,24 +941,24 @@ EXPORT_SYMBOL_GPL(nfs_try_mount); static int nfs_compare_remount_data(struct nfs_server *nfss, - struct nfs_parsed_mount_data *data) + struct nfs_fs_context *ctx) { - if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || - data->rsize != nfss->rsize || - data->wsize != nfss->wsize || - data->version != nfss->nfs_client->rpc_ops->version || - data->minorversion != nfss->nfs_client->cl_minorversion || - data->retrans != nfss->client->cl_timeout->to_retries || - !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || - data->acregmin != nfss->acregmin / HZ || - data->acregmax != nfss->acregmax / HZ || - data->acdirmin != nfss->acdirmin / HZ || - data->acdirmax != nfss->acdirmax / HZ || - data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || - (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || - data->nfs_server.port != nfss->port || - data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || - !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, + if ((ctx->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || + ctx->rsize != nfss->rsize || + ctx->wsize != nfss->wsize || + ctx->version != nfss->nfs_client->rpc_ops->version || + ctx->minorversion != nfss->nfs_client->cl_minorversion || + ctx->retrans != nfss->client->cl_timeout->to_retries || + !nfs_auth_info_match(&ctx->auth_info, nfss->client->cl_auth->au_flavor) || + ctx->acregmin != nfss->acregmin / HZ || + ctx->acregmax != nfss->acregmax / HZ || + ctx->acdirmin != nfss->acdirmin / HZ || + ctx->acdirmax != nfss->acdirmax / HZ || + ctx->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + (ctx->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || + ctx->nfs_server.port != nfss->port || + ctx->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || + !rpc_cmp_addr((struct sockaddr *)&ctx->nfs_server.address, (struct sockaddr *)&nfss->nfs_client->cl_addr)) return -EINVAL; @@ -969,7 +970,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) { int error; struct nfs_server *nfss = sb->s_fs_info; - struct nfs_parsed_mount_data *data; + struct nfs_fs_context *ctx; struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; u32 nfsvers = nfss->nfs_client->rpc_ops->version; @@ -987,32 +988,32 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) options->version <= 6)))) return 0; - data = nfs_alloc_parsed_mount_data(); - if (data == NULL) + ctx = nfs_alloc_parsed_mount_data(); + if (ctx == NULL) return -ENOMEM; /* fill out struct with values from existing mount */ - data->flags = nfss->flags; - data->rsize = nfss->rsize; - data->wsize = nfss->wsize; - data->retrans = nfss->client->cl_timeout->to_retries; - data->selected_flavor = nfss->client->cl_auth->au_flavor; - data->acregmin = nfss->acregmin / HZ; - data->acregmax = nfss->acregmax / HZ; - data->acdirmin = nfss->acdirmin / HZ; - data->acdirmax = nfss->acdirmax / HZ; - data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; - data->nfs_server.port = nfss->port; - data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; - data->version = nfsvers; - data->minorversion = nfss->nfs_client->cl_minorversion; - data->net = current->nsproxy->net_ns; - memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, - data->nfs_server.addrlen); + ctx->flags = nfss->flags; + ctx->rsize = nfss->rsize; + ctx->wsize = nfss->wsize; + ctx->retrans = nfss->client->cl_timeout->to_retries; + ctx->selected_flavor = nfss->client->cl_auth->au_flavor; + ctx->acregmin = nfss->acregmin / HZ; + ctx->acregmax = nfss->acregmax / HZ; + ctx->acdirmin = nfss->acdirmin / HZ; + ctx->acdirmax = nfss->acdirmax / HZ; + ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + ctx->nfs_server.port = nfss->port; + ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + ctx->version = nfsvers; + ctx->minorversion = nfss->nfs_client->cl_minorversion; + ctx->net = current->nsproxy->net_ns; + memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr, + ctx->nfs_server.addrlen); /* overwrite those values with any that were specified */ error = -EINVAL; - if (!nfs_parse_mount_options((char *)options, data)) + if (!nfs_parse_mount_options((char *)options, ctx)) goto out; /* @@ -1021,15 +1022,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the * remount options, so we have to explicitly reset it. */ - if (data->flags & NFS_MOUNT_NOAC) + if (ctx->flags & NFS_MOUNT_NOAC) *flags |= SB_SYNCHRONOUS; /* compare new mount options with old ones */ - error = nfs_compare_remount_data(nfss, data); + error = nfs_compare_remount_data(nfss, ctx); if (!error) - error = security_sb_remount(sb, data->lsm_opts); + error = security_sb_remount(sb, ctx->lsm_opts); out: - nfs_free_parsed_mount_data(data); + nfs_free_parsed_mount_data(ctx); return error; } EXPORT_SYMBOL_GPL(nfs_remount); @@ -1039,15 +1040,15 @@ EXPORT_SYMBOL_GPL(nfs_remount); */ static void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { - struct nfs_parsed_mount_data *data = mount_info->parsed; + struct nfs_fs_context *ctx = mount_info->ctx; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = 0; sb->s_blocksize = 0; sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; sb->s_op = server->nfs_client->cl_nfs_mod->sops; - if (data && data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + if (ctx && ctx->bsize) + sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits); if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do @@ -1208,7 +1209,7 @@ static int nfs_compare_super(struct super_block *sb, void *data) #ifdef CONFIG_NFS_FSCACHE static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *parsed, + struct nfs_fs_context *ctx, struct nfs_clone_mount *cloned) { struct nfs_server *nfss = NFS_SB(sb); @@ -1218,12 +1219,12 @@ static void nfs_get_cache_cookie(struct super_block *sb, nfss->fscache_key = NULL; nfss->fscache = NULL; - if (parsed) { - if (!(parsed->options & NFS_OPTION_FSCACHE)) + if (ctx) { + if (!(ctx->options & NFS_OPTION_FSCACHE)) return; - if (parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); + if (ctx->fscache_uniq) { + uniq = ctx->fscache_uniq; + ulen = strlen(ctx->fscache_uniq); } } else if (cloned) { struct nfs_server *mnt_s = NFS_SB(cloned->sb); @@ -1240,7 +1241,7 @@ static void nfs_get_cache_cookie(struct super_block *sb, } #else static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *parsed, + struct nfs_fs_context *parsed, struct nfs_clone_mount *cloned) { } @@ -1312,7 +1313,7 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, s->s_blocksize_bits = bsize; s->s_blocksize = 1U << bsize; } - nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); + nfs_get_cache_cookie(s, mount_info->ctx, mount_info->cloned); if (!(server->flags & NFS_MOUNT_UNSHARED)) s->s_iflags |= SB_I_MULTIROOT; } @@ -1333,7 +1334,7 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags, &kflags_out); } else { - error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts, + error = security_sb_set_mnt_opts(s, mount_info->ctx->lsm_opts, kflags, &kflags_out); } if (error) @@ -1370,21 +1371,22 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, struct nfs_subversion *nfs_mod; int error; - mount_info.parsed = nfs_alloc_parsed_mount_data(); + mount_info.ctx = nfs_alloc_parsed_mount_data(); mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.parsed == NULL || mount_info.mntfh == NULL) + if (mount_info.ctx == NULL || mount_info.mntfh == NULL) goto out; /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name); + error = nfs_validate_mount_data(fs_type, raw_data, mount_info.ctx, mount_info.mntfh, dev_name); if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); + error = nfs_validate_text_mount_data(raw_data, + mount_info.ctx, dev_name); if (error < 0) { mntroot = ERR_PTR(error); goto out; } - nfs_mod = get_nfs_version(mount_info.parsed->version); + nfs_mod = get_nfs_version(mount_info.ctx->version); if (IS_ERR(nfs_mod)) { mntroot = ERR_CAST(nfs_mod); goto out; @@ -1395,7 +1397,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, put_nfs_version(nfs_mod); out: - nfs_free_parsed_mount_data(mount_info.parsed); + nfs_free_parsed_mount_data(mount_info.ctx); nfs_free_fhandle(mount_info.mntfh); return mntroot; } From f8ee01e3e2c8845e812497f0d4925c18cafaad87 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:07 -0500 Subject: [PATCH 101/658] NFS: Split nfs_parse_mount_options() Split nfs_parse_mount_options() to move the prologue, list-splitting and epilogue into one function and the per-option processing into another. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 126 ++++++++++++++++++++++++-------------------- fs/nfs/internal.h | 3 ++ 2 files changed, 73 insertions(+), 56 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 52aa2b8522f0..a386825c3b0f 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -496,36 +496,18 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, } /* - * Error-check and convert a string of mount options from user space into - * a data structure. The whole mount string is processed; bad options are - * skipped as they are encountered. If there were no errors, return 1; - * otherwise return 0 (zero). + * Parse a single mount option in "key[=val]" form. */ -int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) +static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) { - char *p, *string; - int rc, sloppy = 0, invalid_option = 0; - unsigned short protofamily = AF_UNSPEC; - unsigned short mountfamily = AF_UNSPEC; + char *string; + int rc; - if (!raw) { - dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); - return 1; - } - dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - - rc = security_sb_eat_lsm_opts(raw, &ctx->lsm_opts); - if (rc) - goto out_security_failure; - - while ((p = strsep(&raw, ",")) != NULL) { + { substring_t args[MAX_OPT_ARGS]; unsigned long option; int token; - if (!*p) - continue; - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); token = match_token(p, nfs_mount_option_tokens, args); @@ -734,7 +716,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) if (!rc) { dfprintk(MOUNT, "NFS: unrecognized " "security flavor\n"); - return 0; + return -EINVAL; } break; case Opt_proto: @@ -744,24 +726,24 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) token = match_token(string, nfs_xprt_protocol_tokens, args); - protofamily = AF_INET; + ctx->protofamily = AF_INET; switch (token) { case Opt_xprt_udp6: - protofamily = AF_INET6; + ctx->protofamily = AF_INET6; /* fall through */ case Opt_xprt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: - protofamily = AF_INET6; + ctx->protofamily = AF_INET6; /* fall through */ case Opt_xprt_tcp: ctx->flags |= NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: - protofamily = AF_INET6; + ctx->protofamily = AF_INET6; /* fall through */ case Opt_xprt_rdma: /* vector side protocols to TCP */ @@ -773,7 +755,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); kfree(string); - return 0; + return -EINVAL; } kfree(string); break; @@ -785,16 +767,16 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) nfs_xprt_protocol_tokens, args); kfree(string); - mountfamily = AF_INET; + ctx->mountfamily = AF_INET; switch (token) { case Opt_xprt_udp6: - mountfamily = AF_INET6; + ctx->mountfamily = AF_INET6; /* fall through */ case Opt_xprt_udp: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: - mountfamily = AF_INET6; + ctx->mountfamily = AF_INET6; /* fall through */ case Opt_xprt_tcp: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; @@ -803,7 +785,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) default: dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); - return 0; + return -EINVAL; } break; case Opt_addr: @@ -867,7 +849,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) default: dfprintk(MOUNT, "NFS: invalid " "lookupcache argument\n"); - return 0; + return -EINVAL; } break; case Opt_fscache_uniq: @@ -900,7 +882,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) default: dfprintk(MOUNT, "NFS: invalid " "local_lock argument\n"); - return 0; + return -EINVAL; } break; @@ -908,7 +890,7 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) * Special options */ case Opt_sloppy: - sloppy = 1; + ctx->sloppy = 1; dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); break; case Opt_userspace: @@ -918,12 +900,53 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) break; default: - invalid_option = 1; dfprintk(MOUNT, "NFS: unrecognized mount option " "'%s'\n", p); + return -EINVAL; } } + return 0; + +out_invalid_address: + printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); + return -EINVAL; +out_invalid_value: + printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); + return -EINVAL; +out_nomem: + printk(KERN_INFO "NFS: not enough memory to parse option\n"); + return -ENOMEM; +} + +/* + * Error-check and convert a string of mount options from user space into + * a data structure. The whole mount string is processed; bad options are + * skipped as they are encountered. If there were no errors, return 1; + * otherwise return 0 (zero). + */ +int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) +{ + char *p; + int rc, sloppy = 0, invalid_option = 0; + + if (!raw) { + dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); + return 1; + } + dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); + + rc = security_sb_eat_lsm_opts(raw, &ctx->lsm_opts); + if (rc) + goto out_security_failure; + + while ((p = strsep(&raw, ",")) != NULL) { + if (!*p) + continue; + if (nfs_fs_context_parse_option(ctx, p) < 0) + invalid_option = true; + } + if (!sloppy && invalid_option) return 0; @@ -938,22 +961,26 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) * verify that any proto=/mountproto= options match the address * families in the addr=/mountaddr= options. */ - if (protofamily != AF_UNSPEC && - protofamily != ctx->nfs_server.address.ss_family) + if (ctx->protofamily != AF_UNSPEC && + ctx->protofamily != ctx->nfs_server.address.ss_family) goto out_proto_mismatch; - if (mountfamily != AF_UNSPEC) { + if (ctx->mountfamily != AF_UNSPEC) { if (ctx->mount_server.addrlen) { - if (mountfamily != ctx->mount_server.address.ss_family) + if (ctx->mountfamily != ctx->mount_server.address.ss_family) goto out_mountproto_mismatch; } else { - if (mountfamily != ctx->nfs_server.address.ss_family) + if (ctx->mountfamily != ctx->nfs_server.address.ss_family) goto out_mountproto_mismatch; } } return 1; +out_minorversion_mismatch: + printk(KERN_INFO "NFS: mount option vers=%u does not support " + "minorversion=%u\n", ctx->version, ctx->minorversion); + return 0; out_mountproto_mismatch: printk(KERN_INFO "NFS: mount server address does not match mountproto= " "option\n"); @@ -961,23 +988,10 @@ out_mountproto_mismatch: out_proto_mismatch: printk(KERN_INFO "NFS: server address does not match proto= option\n"); return 0; -out_invalid_address: - printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); - return 0; -out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); - return 0; -out_minorversion_mismatch: - printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", ctx->version, ctx->minorversion); - return 0; out_migration_misuse: printk(KERN_INFO "NFS: 'migration' not supported for this NFS version\n"); - return 0; -out_nomem: - printk(KERN_INFO "NFS: not enough memory to parse option\n"); - return 0; + return -EINVAL; out_security_failure: printk(KERN_INFO "NFS: security options invalid: %d\n", rc); return 0; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7131fa150d1b..d0abc7b65cd2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -96,7 +96,10 @@ struct nfs_fs_context { unsigned int version; unsigned int minorversion; char *fscache_uniq; + unsigned short protofamily; + unsigned short mountfamily; bool need_mount; + bool sloppy; struct { struct sockaddr_storage address; From cbd071b5daa070976ada8601188fcefc986747d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:08 -0500 Subject: [PATCH 102/658] NFS: Deindent nfs_fs_context_parse_option() Deindent nfs_fs_context_parse_option(). Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 737 ++++++++++++++++++++++---------------------- 1 file changed, 366 insertions(+), 371 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index a386825c3b0f..92a1e4bd9133 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -500,410 +500,405 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, */ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) { + substring_t args[MAX_OPT_ARGS]; + unsigned long option; char *string; - int rc; + int token, rc; - { - substring_t args[MAX_OPT_ARGS]; - unsigned long option; - int token; - - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); - - token = match_token(p, nfs_mount_option_tokens, args); - switch (token) { + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + token = match_token(p, nfs_mount_option_tokens, args); + switch (token) { /* * boolean options: foo/nofoo */ - case Opt_soft: - ctx->flags |= NFS_MOUNT_SOFT; - ctx->flags &= ~NFS_MOUNT_SOFTERR; - break; - case Opt_softerr: - ctx->flags |= NFS_MOUNT_SOFTERR; - ctx->flags &= ~NFS_MOUNT_SOFT; - break; - case Opt_hard: - ctx->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); - break; - case Opt_posix: - ctx->flags |= NFS_MOUNT_POSIX; - break; - case Opt_noposix: - ctx->flags &= ~NFS_MOUNT_POSIX; - break; - case Opt_cto: - ctx->flags &= ~NFS_MOUNT_NOCTO; - break; - case Opt_nocto: - ctx->flags |= NFS_MOUNT_NOCTO; - break; - case Opt_ac: - ctx->flags &= ~NFS_MOUNT_NOAC; - break; - case Opt_noac: - ctx->flags |= NFS_MOUNT_NOAC; - break; - case Opt_lock: - ctx->flags &= ~NFS_MOUNT_NONLM; - ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_nolock: - ctx->flags |= NFS_MOUNT_NONLM; - ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_udp: - ctx->flags &= ~NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_tcp: - ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_rdma: - ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ - ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(p); - break; - case Opt_acl: - ctx->flags &= ~NFS_MOUNT_NOACL; - break; - case Opt_noacl: - ctx->flags |= NFS_MOUNT_NOACL; - break; - case Opt_rdirplus: - ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; - break; - case Opt_nordirplus: - ctx->flags |= NFS_MOUNT_NORDIRPLUS; - break; - case Opt_sharecache: - ctx->flags &= ~NFS_MOUNT_UNSHARED; - break; - case Opt_nosharecache: - ctx->flags |= NFS_MOUNT_UNSHARED; - break; - case Opt_resvport: - ctx->flags &= ~NFS_MOUNT_NORESVPORT; - break; - case Opt_noresvport: - ctx->flags |= NFS_MOUNT_NORESVPORT; - break; - case Opt_fscache: - ctx->options |= NFS_OPTION_FSCACHE; - kfree(ctx->fscache_uniq); - ctx->fscache_uniq = NULL; - break; - case Opt_nofscache: - ctx->options &= ~NFS_OPTION_FSCACHE; - kfree(ctx->fscache_uniq); - ctx->fscache_uniq = NULL; - break; - case Opt_migration: - ctx->options |= NFS_OPTION_MIGRATION; - break; - case Opt_nomigration: - ctx->options &= ~NFS_OPTION_MIGRATION; - break; + case Opt_soft: + ctx->flags |= NFS_MOUNT_SOFT; + ctx->flags &= ~NFS_MOUNT_SOFTERR; + break; + case Opt_softerr: + ctx->flags |= NFS_MOUNT_SOFTERR; + ctx->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_hard: + ctx->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); + break; + case Opt_posix: + ctx->flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + ctx->flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + ctx->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + ctx->flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + ctx->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + ctx->flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_nolock: + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_udp: + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_tcp: + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_rdma: + ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(p); + break; + case Opt_acl: + ctx->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + ctx->flags |= NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_nordirplus: + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + ctx->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_nosharecache: + ctx->flags |= NFS_MOUNT_UNSHARED; + break; + case Opt_resvport: + ctx->flags &= ~NFS_MOUNT_NORESVPORT; + break; + case Opt_noresvport: + ctx->flags |= NFS_MOUNT_NORESVPORT; + break; + case Opt_fscache: + ctx->options |= NFS_OPTION_FSCACHE; + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = NULL; + break; + case Opt_nofscache: + ctx->options &= ~NFS_OPTION_FSCACHE; + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = NULL; + break; + case Opt_migration: + ctx->options |= NFS_OPTION_MIGRATION; + break; + case Opt_nomigration: + ctx->options &= ~NFS_OPTION_MIGRATION; + break; /* * options that take numeric values */ - case Opt_port: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - ctx->nfs_server.port = option; - break; - case Opt_rsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->rsize = option; - break; - case Opt_wsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->wsize = option; - break; - case Opt_bsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->bsize = option; - break; - case Opt_timeo: - if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) - goto out_invalid_value; - ctx->timeo = option; - break; - case Opt_retrans: - if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) - goto out_invalid_value; - ctx->retrans = option; - break; - case Opt_acregmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->acregmin = option; - break; - case Opt_acregmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->acregmax = option; - break; - case Opt_acdirmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->acdirmin = option; - break; - case Opt_acdirmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->acdirmax = option; - break; - case Opt_actimeo: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->acregmin = ctx->acregmax = + case Opt_port: + if (nfs_get_option_ul(args, &option) || + option > USHRT_MAX) + goto out_invalid_value; + ctx->nfs_server.port = option; + break; + case Opt_rsize: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->rsize = option; + break; + case Opt_wsize: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->wsize = option; + break; + case Opt_bsize: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->bsize = option; + break; + case Opt_timeo: + if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) + goto out_invalid_value; + ctx->timeo = option; + break; + case Opt_retrans: + if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) + goto out_invalid_value; + ctx->retrans = option; + break; + case Opt_acregmin: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->acregmin = option; + break; + case Opt_acregmax: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->acregmax = option; + break; + case Opt_acdirmin: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->acdirmin = option; + break; + case Opt_acdirmax: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->acdirmax = option; + break; + case Opt_actimeo: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->acregmin = ctx->acregmax = ctx->acdirmin = ctx->acdirmax = option; - break; - case Opt_namelen: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - ctx->namlen = option; - break; - case Opt_mountport: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - ctx->mount_server.port = option; - break; - case Opt_mountvers: - if (nfs_get_option_ul(args, &option) || - option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) - goto out_invalid_value; - ctx->mount_server.version = option; - break; - case Opt_minorversion: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - if (option > NFS4_MAX_MINOR_VERSION) - goto out_invalid_value; - ctx->minorversion = option; - break; + break; + case Opt_namelen: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + ctx->namlen = option; + break; + case Opt_mountport: + if (nfs_get_option_ul(args, &option) || + option > USHRT_MAX) + goto out_invalid_value; + ctx->mount_server.port = option; + break; + case Opt_mountvers: + if (nfs_get_option_ul(args, &option) || + option < NFS_MNT_VERSION || + option > NFS_MNT3_VERSION) + goto out_invalid_value; + ctx->mount_server.version = option; + break; + case Opt_minorversion: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + if (option > NFS4_MAX_MINOR_VERSION) + goto out_invalid_value; + ctx->minorversion = option; + break; /* * options that take text values */ - case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_version_string(string, ctx, args); - kfree(string); - if (!rc) - goto out_invalid_value; - break; - case Opt_sec: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_security_flavors(string, ctx); - kfree(string); - if (!rc) { - dfprintk(MOUNT, "NFS: unrecognized " - "security flavor\n"); - return -EINVAL; - } - break; - case Opt_proto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); + case Opt_nfsvers: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_version_string(string, ctx, args); + kfree(string); + if (!rc) + goto out_invalid_value; + break; + case Opt_sec: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_security_flavors(string, ctx); + kfree(string); + if (!rc) { + dfprintk(MOUNT, "NFS: unrecognized " + "security flavor\n"); + return -EINVAL; + } + break; + case Opt_proto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); - ctx->protofamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - ctx->protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - ctx->flags &= ~NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - ctx->protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma6: - ctx->protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_rdma: - /* vector side protocols to TCP */ - ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(string); - break; - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - kfree(string); - return -EINVAL; - } - kfree(string); + ctx->protofamily = AF_INET; + switch (token) { + case Opt_xprt_udp6: + ctx->protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; - case Opt_mountproto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); + case Opt_xprt_tcp6: + ctx->protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma6: + ctx->protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_rdma: + /* vector side protocols to TCP */ + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(string); + break; + default: + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); kfree(string); + return -EINVAL; + } + kfree(string); + break; + case Opt_mountproto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); - ctx->mountfamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - ctx->mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - ctx->mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma: /* not used for side protocols */ - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - return -EINVAL; - } + ctx->mountfamily = AF_INET; + switch (token) { + case Opt_xprt_udp6: + ctx->mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; - case Opt_addr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - ctx->nfs_server.addrlen = - rpc_pton(ctx->net, string, strlen(string), - (struct sockaddr *) - &ctx->nfs_server.address, - sizeof(ctx->nfs_server.address)); - kfree(string); - if (ctx->nfs_server.addrlen == 0) - goto out_invalid_address; + case Opt_xprt_tcp6: + ctx->mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; break; - case Opt_clientaddr: - if (nfs_get_option_str(args, &ctx->client_address)) - goto out_nomem; + case Opt_xprt_rdma: /* not used for side protocols */ + default: + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); + return -EINVAL; + } + break; + case Opt_addr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + ctx->nfs_server.addrlen = + rpc_pton(ctx->net, string, strlen(string), + (struct sockaddr *) + &ctx->nfs_server.address, + sizeof(ctx->nfs_server.address)); + kfree(string); + if (ctx->nfs_server.addrlen == 0) + goto out_invalid_address; + break; + case Opt_clientaddr: + if (nfs_get_option_str(args, &ctx->client_address)) + goto out_nomem; + break; + case Opt_mounthost: + if (nfs_get_option_str(args, + &ctx->mount_server.hostname)) + goto out_nomem; + break; + case Opt_mountaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + ctx->mount_server.addrlen = + rpc_pton(ctx->net, string, strlen(string), + (struct sockaddr *) + &ctx->mount_server.address, + sizeof(ctx->mount_server.address)); + kfree(string); + if (ctx->mount_server.addrlen == 0) + goto out_invalid_address; + break; + case Opt_nconnect: + if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) + goto out_invalid_value; + ctx->nfs_server.nconnect = option; + break; + case Opt_lookupcache: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_lookupcache_tokens, args); + kfree(string); + switch (token) { + case Opt_lookupcache_all: + ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); break; - case Opt_mounthost: - if (nfs_get_option_str(args, - &ctx->mount_server.hostname)) - goto out_nomem; + case Opt_lookupcache_positive: + ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; break; - case Opt_mountaddr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - ctx->mount_server.addrlen = - rpc_pton(ctx->net, string, strlen(string), - (struct sockaddr *) - &ctx->mount_server.address, - sizeof(ctx->mount_server.address)); - kfree(string); - if (ctx->mount_server.addrlen == 0) - goto out_invalid_address; + case Opt_lookupcache_none: + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; - case Opt_nconnect: - if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) - goto out_invalid_value; - ctx->nfs_server.nconnect = option; + default: + dfprintk(MOUNT, "NFS: invalid " + "lookupcache argument\n"); + return -EINVAL; + } + break; + case Opt_fscache_uniq: + if (nfs_get_option_str(args, &ctx->fscache_uniq)) + goto out_nomem; + ctx->options |= NFS_OPTION_FSCACHE; + break; + case Opt_local_lock: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_local_lock_tokens, + args); + kfree(string); + switch (token) { + case Opt_local_lock_all: + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); break; - case Opt_lookupcache: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_lookupcache_tokens, args); - kfree(string); - switch (token) { - case Opt_lookupcache_all: - ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); - break; - case Opt_lookupcache_positive: - ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; - ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; - break; - case Opt_lookupcache_none: - ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "lookupcache argument\n"); - return -EINVAL; - } + case Opt_local_lock_flock: + ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; break; - case Opt_fscache_uniq: - if (nfs_get_option_str(args, &ctx->fscache_uniq)) - goto out_nomem; - ctx->options |= NFS_OPTION_FSCACHE; + case Opt_local_lock_posix: + ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; break; - case Opt_local_lock: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_local_lock_tokens, - args); - kfree(string); - switch (token) { - case Opt_local_lock_all: - ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_local_lock_flock: - ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; - break; - case Opt_local_lock_posix: - ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; - break; - case Opt_local_lock_none: - ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "local_lock argument\n"); - return -EINVAL; - } + case Opt_local_lock_none: + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); break; + default: + dfprintk(MOUNT, "NFS: invalid " + "local_lock argument\n"); + return -EINVAL; + } + break; /* * Special options */ - case Opt_sloppy: - ctx->sloppy = 1; - dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); - break; - case Opt_userspace: - case Opt_deprecated: - dfprintk(MOUNT, "NFS: ignoring mount option " - "'%s'\n", p); - break; + case Opt_sloppy: + ctx->sloppy = 1; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; + case Opt_userspace: + case Opt_deprecated: + dfprintk(MOUNT, "NFS: ignoring mount option " + "'%s'\n", p); + break; - default: - dfprintk(MOUNT, "NFS: unrecognized mount option " - "'%s'\n", p); - return -EINVAL; - } + default: + dfprintk(MOUNT, "NFS: unrecognized mount option " + "'%s'\n", p); + return -EINVAL; } return 0; From 48be8a66cf98accca033c42d214698dd64ac4f79 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:09 -0500 Subject: [PATCH 103/658] NFS: Add a small buffer in nfs_fs_context to avoid string dup Add a small buffer in nfs_fs_context to avoid string duplication when parsing numbers. Also make the parsing function wrapper place the parsed integer directly in the appropriate nfs_fs_context struct member. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 102 +++++++++++++++++++++----------------------- fs/nfs/internal.h | 2 + 2 files changed, 50 insertions(+), 54 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 92a1e4bd9133..2921c5820c31 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -468,27 +468,38 @@ static int nfs_get_option_str(substring_t args[], char **option) return !*option; } -static int nfs_get_option_ul(substring_t args[], unsigned long *option) +static int nfs_get_option_ui(struct nfs_fs_context *ctx, + substring_t args[], unsigned int *option) { - int rc; - char *string; - - string = match_strdup(args); - if (string == NULL) - return -ENOMEM; - rc = kstrtoul(string, 10, option); - kfree(string); - - return rc; + match_strlcpy(ctx->buf, args, sizeof(ctx->buf)); + return kstrtouint(ctx->buf, 10, option); } -static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, - unsigned long l_bound, unsigned long u_bound) +static int nfs_get_option_ui_bound(struct nfs_fs_context *ctx, + substring_t args[], unsigned int *option, + unsigned int l_bound, unsigned u_bound) { int ret; - ret = nfs_get_option_ul(args, option); - if (ret != 0) + match_strlcpy(ctx->buf, args, sizeof(ctx->buf)); + ret = kstrtouint(ctx->buf, 10, option); + if (ret < 0) + return ret; + if (*option < l_bound || *option > u_bound) + return -ERANGE; + return 0; +} + +static int nfs_get_option_us_bound(struct nfs_fs_context *ctx, + substring_t args[], unsigned short *option, + unsigned short l_bound, + unsigned short u_bound) +{ + int ret; + + match_strlcpy(ctx->buf, args, sizeof(ctx->buf)); + ret = kstrtou16(ctx->buf, 10, option); + if (ret < 0) return ret; if (*option < l_bound || *option > u_bound) return -ERANGE; @@ -501,7 +512,6 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) { substring_t args[MAX_OPT_ARGS]; - unsigned long option; char *string; int token, rc; @@ -609,86 +619,70 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) * options that take numeric values */ case Opt_port: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) + if (nfs_get_option_ui_bound(ctx, args, &ctx->nfs_server.port, + 0, USHRT_MAX)) goto out_invalid_value; - ctx->nfs_server.port = option; break; case Opt_rsize: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->rsize)) goto out_invalid_value; - ctx->rsize = option; break; case Opt_wsize: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->wsize)) goto out_invalid_value; - ctx->wsize = option; break; case Opt_bsize: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->bsize)) goto out_invalid_value; - ctx->bsize = option; break; case Opt_timeo: - if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) + if (nfs_get_option_ui_bound(ctx, args, &ctx->timeo, 1, INT_MAX)) goto out_invalid_value; - ctx->timeo = option; break; case Opt_retrans: - if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) + if (nfs_get_option_ui_bound(ctx, args, &ctx->retrans, 0, INT_MAX)) goto out_invalid_value; - ctx->retrans = option; break; case Opt_acregmin: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->acregmin)) goto out_invalid_value; - ctx->acregmin = option; break; case Opt_acregmax: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->acregmax)) goto out_invalid_value; - ctx->acregmax = option; break; case Opt_acdirmin: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->acdirmin)) goto out_invalid_value; - ctx->acdirmin = option; break; case Opt_acdirmax: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->acdirmax)) goto out_invalid_value; - ctx->acdirmax = option; break; case Opt_actimeo: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->acdirmax)) goto out_invalid_value; ctx->acregmin = ctx->acregmax = - ctx->acdirmin = ctx->acdirmax = option; + ctx->acdirmin = ctx->acdirmax; break; case Opt_namelen: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui(ctx, args, &ctx->namlen)) goto out_invalid_value; - ctx->namlen = option; break; case Opt_mountport: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) + if (nfs_get_option_ui_bound(ctx, args, &ctx->mount_server.port, + 0, USHRT_MAX)) goto out_invalid_value; - ctx->mount_server.port = option; break; case Opt_mountvers: - if (nfs_get_option_ul(args, &option) || - option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) + if (nfs_get_option_ui_bound(ctx, args, &ctx->mount_server.version, + NFS_MNT_VERSION, NFS_MNT3_VERSION)) goto out_invalid_value; - ctx->mount_server.version = option; break; case Opt_minorversion: - if (nfs_get_option_ul(args, &option)) + if (nfs_get_option_ui_bound(ctx, args, &ctx->minorversion, + 0, NFS4_MAX_MINOR_VERSION)) goto out_invalid_value; - if (option > NFS4_MAX_MINOR_VERSION) - goto out_invalid_value; - ctx->minorversion = option; break; /* @@ -820,9 +814,9 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) goto out_invalid_address; break; case Opt_nconnect: - if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) + if (nfs_get_option_us_bound(ctx, args, &ctx->nfs_server.nconnect, + 1, NFS_MAX_CONNECTIONS)) goto out_invalid_value; - ctx->nfs_server.nconnect = option; break; case Opt_lookupcache: string = match_strdup(args); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d0abc7b65cd2..5342f3e4d565 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -122,6 +122,8 @@ struct nfs_fs_context { void *lsm_opts; struct net *net; + + char buf[32]; /* Parse buffer */ }; /* mount_clnt.c */ From e558100fda7e8c7888f523920214bcb35ed9382b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:10 -0500 Subject: [PATCH 104/658] NFS: Do some tidying of the parsing code Do some tidying of the parsing code, including: (*) Returning 0/error rather than true/false. (*) Putting the nfs_fs_context pointer first in some arg lists. (*) Unwrap some lines that will now fit on one line. (*) Provide unioned sockaddr/sockaddr_storage fields to avoid casts. (*) nfs_parse_devname() can paste its return values directly into the nfs_fs_context struct as that's where the caller puts them. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 126 +++++++++++++++++++------------------------- fs/nfs/internal.h | 16 ++++-- fs/nfs/super.c | 2 +- 3 files changed, 66 insertions(+), 78 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 2921c5820c31..44531443a92b 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -343,8 +343,9 @@ static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) * Add 'flavor' to 'auth_info' if not already present. * Returns true if 'flavor' ends up in the list, false otherwise */ -static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, - rpc_authflavor_t flavor) +static int nfs_auth_info_add(struct nfs_fs_context *ctx, + struct nfs_auth_info *auth_info, + rpc_authflavor_t flavor) { unsigned int i; unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); @@ -352,26 +353,27 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, /* make sure this flavor isn't already in the list */ for (i = 0; i < auth_info->flavor_len; i++) { if (flavor == auth_info->flavors[i]) - return true; + return 0; } if (auth_info->flavor_len + 1 >= max_flavor_len) { dfprintk(MOUNT, "NFS: too many sec= flavors\n"); - return false; + return -EINVAL; } auth_info->flavors[auth_info->flavor_len++] = flavor; - return true; + return 0; } /* * Parse the value of the 'sec=' option. */ -static int nfs_parse_security_flavors(char *value, struct nfs_fs_context *ctx) +static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, char *value) { substring_t args[MAX_OPT_ARGS]; rpc_authflavor_t pseudoflavor; char *p; + int ret; dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); @@ -413,19 +415,20 @@ static int nfs_parse_security_flavors(char *value, struct nfs_fs_context *ctx) default: dfprintk(MOUNT, "NFS: sec= option '%s' not recognized\n", p); - return 0; + return -EINVAL; } - if (!nfs_auth_info_add(&ctx->auth_info, pseudoflavor)) - return 0; + ret = nfs_auth_info_add(ctx, &ctx->auth_info, pseudoflavor); + if (ret < 0) + return ret; } - return 1; + return 0; } -static int nfs_parse_version_string(char *string, - struct nfs_fs_context *ctx, - substring_t *args) +static int nfs_parse_version_string(struct nfs_fs_context *ctx, + char *string, + substring_t *args) { ctx->flags &= ~NFS_MOUNT_VER3; switch (match_token(string, nfs_vers_tokens, args)) { @@ -456,9 +459,10 @@ static int nfs_parse_version_string(char *string, ctx->minorversion = 2; break; default: - return 0; + dfprintk(MOUNT, "NFS: Unsupported NFS version\n"); + return -EINVAL; } - return 1; + return 0; } static int nfs_get_option_str(substring_t args[], char **option) @@ -513,7 +517,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) { substring_t args[MAX_OPT_ARGS]; char *string; - int token, rc; + int token, ret; dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); @@ -553,13 +557,11 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) break; case Opt_lock: ctx->flags &= ~NFS_MOUNT_NONLM; - ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_nolock: ctx->flags |= NFS_MOUNT_NONLM; - ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_udp: ctx->flags &= ~NFS_MOUNT_TCP; @@ -692,29 +694,25 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) string = match_strdup(args); if (string == NULL) goto out_nomem; - rc = nfs_parse_version_string(string, ctx, args); + ret = nfs_parse_version_string(ctx, string, args); kfree(string); - if (!rc) - goto out_invalid_value; + if (ret < 0) + return ret; break; case Opt_sec: string = match_strdup(args); if (string == NULL) goto out_nomem; - rc = nfs_parse_security_flavors(string, ctx); + ret = nfs_parse_security_flavors(ctx, string); kfree(string); - if (!rc) { - dfprintk(MOUNT, "NFS: unrecognized " - "security flavor\n"); - return -EINVAL; - } + if (ret < 0) + return ret; break; case Opt_proto: string = match_strdup(args); if (string == NULL) goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); + token = match_token(string, nfs_xprt_protocol_tokens, args); ctx->protofamily = AF_INET; switch (token) { @@ -742,9 +740,8 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) xprt_load_transport(string); break; default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); kfree(string); + dfprintk(MOUNT, "NFS: unrecognized transport protocol\n"); return -EINVAL; } kfree(string); @@ -753,8 +750,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) string = match_strdup(args); if (string == NULL) goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); + token = match_token(string, nfs_xprt_protocol_tokens, args); kfree(string); ctx->mountfamily = AF_INET; @@ -773,8 +769,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) break; case Opt_xprt_rdma: /* not used for side protocols */ default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); + dfprintk(MOUNT, "NFS: unrecognized transport protocol\n"); return -EINVAL; } break; @@ -784,9 +779,8 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) goto out_nomem; ctx->nfs_server.addrlen = rpc_pton(ctx->net, string, strlen(string), - (struct sockaddr *) &ctx->nfs_server.address, - sizeof(ctx->nfs_server.address)); + sizeof(ctx->nfs_server._address)); kfree(string); if (ctx->nfs_server.addrlen == 0) goto out_invalid_address; @@ -796,8 +790,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) goto out_nomem; break; case Opt_mounthost: - if (nfs_get_option_str(args, - &ctx->mount_server.hostname)) + if (nfs_get_option_str(args, &ctx->mount_server.hostname)) goto out_nomem; break; case Opt_mountaddr: @@ -806,9 +799,8 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) goto out_nomem; ctx->mount_server.addrlen = rpc_pton(ctx->net, string, strlen(string), - (struct sockaddr *) &ctx->mount_server.address, - sizeof(ctx->mount_server.address)); + sizeof(ctx->mount_server._address)); kfree(string); if (ctx->mount_server.addrlen == 0) goto out_invalid_address; @@ -822,8 +814,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) string = match_strdup(args); if (string == NULL) goto out_nomem; - token = match_token(string, - nfs_lookupcache_tokens, args); + token = match_token(string, nfs_lookupcache_tokens, args); kfree(string); switch (token) { case Opt_lookupcache_all: @@ -837,8 +828,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: - dfprintk(MOUNT, "NFS: invalid " - "lookupcache argument\n"); + dfprintk(MOUNT, "NFS: invalid lookupcache argument\n"); return -EINVAL; } break; @@ -851,8 +841,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) string = match_strdup(args); if (string == NULL) goto out_nomem; - token = match_token(string, nfs_local_lock_tokens, - args); + token = match_token(string, nfs_local_lock_tokens, args); kfree(string); switch (token) { case Opt_local_lock_all: @@ -870,8 +859,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) NFS_MOUNT_LOCAL_FCNTL); break; default: - dfprintk(MOUNT, "NFS: invalid " - "local_lock argument\n"); + dfprintk(MOUNT, "NFS: invalid local_lock argument\n"); return -EINVAL; } break; @@ -885,13 +873,11 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) break; case Opt_userspace: case Opt_deprecated: - dfprintk(MOUNT, "NFS: ignoring mount option " - "'%s'\n", p); + dfprintk(MOUNT, "NFS: ignoring mount option '%s'\n", p); break; default: - dfprintk(MOUNT, "NFS: unrecognized mount option " - "'%s'\n", p); + dfprintk(MOUNT, "NFS: unrecognized mount option '%s'\n", p); return -EINVAL; } @@ -951,15 +937,15 @@ int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) * families in the addr=/mountaddr= options. */ if (ctx->protofamily != AF_UNSPEC && - ctx->protofamily != ctx->nfs_server.address.ss_family) + ctx->protofamily != ctx->nfs_server.address.sa_family) goto out_proto_mismatch; if (ctx->mountfamily != AF_UNSPEC) { if (ctx->mount_server.addrlen) { - if (ctx->mountfamily != ctx->mount_server.address.ss_family) + if (ctx->mountfamily != ctx->mount_server.address.sa_family) goto out_mountproto_mismatch; } else { - if (ctx->mountfamily != ctx->nfs_server.address.ss_family) + if (ctx->mountfamily != ctx->nfs_server.address.sa_family) goto out_mountproto_mismatch; } } @@ -995,9 +981,9 @@ out_security_failure: * * Note: caller frees hostname and export path, even on error. */ -static int nfs_parse_devname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) +static int nfs_parse_devname(struct nfs_fs_context *ctx, + const char *dev_name, + size_t maxnamlen, size_t maxpathlen) { size_t len; char *end; @@ -1033,17 +1019,17 @@ static int nfs_parse_devname(const char *dev_name, goto out_hostname; /* N.B. caller will free nfs_server.hostname in all cases */ - *hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (*hostname == NULL) + ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); + if (!ctx->nfs_server.hostname) goto out_nomem; len = strlen(++end); if (len > maxpathlen) goto out_path; - *export_path = kstrndup(end, len, GFP_KERNEL); - if (!*export_path) + ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL); + if (!ctx->nfs_server.export_path) goto out_nomem; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path); return 0; out_bad_devname: @@ -1064,7 +1050,7 @@ out_path: } /* - * Validate the NFS2/NFS3 mount data + * Parse monolithic NFS2/NFS3 mount data * - fills in the mount root filehandle * * For option strings, user space handles the following behaviors: @@ -1393,11 +1379,7 @@ int nfs_validate_text_mount_data(void *options, nfs_set_port(sap, &ctx->nfs_server.port, port); - return nfs_parse_devname(dev_name, - &ctx->nfs_server.hostname, - max_namelen, - &ctx->nfs_server.export_path, - max_pathlen); + return nfs_parse_devname(ctx, dev_name, max_namelen, max_pathlen); #if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5342f3e4d565..003c2b8eb1e6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -82,11 +82,11 @@ struct nfs_client_initdata { * In-kernel mount arguments */ struct nfs_fs_context { - int flags; + unsigned int flags; /* NFS{,4}_MOUNT_* flags */ unsigned int rsize, wsize; unsigned int timeo, retrans; - unsigned int acregmin, acregmax, - acdirmin, acdirmax; + unsigned int acregmin, acregmax; + unsigned int acdirmin, acdirmax; unsigned int namlen; unsigned int options; unsigned int bsize; @@ -102,7 +102,10 @@ struct nfs_fs_context { bool sloppy; struct { - struct sockaddr_storage address; + union { + struct sockaddr address; + struct sockaddr_storage _address; + }; size_t addrlen; char *hostname; u32 version; @@ -111,7 +114,10 @@ struct nfs_fs_context { } mount_server; struct { - struct sockaddr_storage address; + union { + struct sockaddr address; + struct sockaddr_storage _address; + }; size_t addrlen; char *hostname; char *export_path; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 83527515590e..59962bc0118f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -816,7 +816,7 @@ static int nfs_request_mount(struct nfs_fs_context *cfg, /* * Construct the mount server's address. */ - if (cfg->mount_server.address.ss_family == AF_UNSPEC) { + if (cfg->mount_server.address.sa_family == AF_UNSPEC) { memcpy(request.sap, &cfg->nfs_server.address, cfg->nfs_server.addrlen); cfg->mount_server.addrlen = cfg->nfs_server.addrlen; From 38465f5d1af932494d66b52d26bb3a02b837cdf8 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 10 Dec 2019 07:31:11 -0500 Subject: [PATCH 105/658] NFS: rename nfs_fs_context pointer arg in a few functions Split out from commit "NFS: Add fs_context support." Rename cfg to ctx in nfs_init_server(), nfs_verify_authflavors(), and nfs_request_mount(). No functional changes. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 62 ++++++++++++++++++++++++------------------------- fs/nfs/super.c | 54 +++++++++++++++++++++--------------------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0a00df8e71bb..69c0708b2acc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -658,28 +658,28 @@ EXPORT_SYMBOL_GPL(nfs_init_client); * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_fs_context *cfg, + const struct nfs_fs_context *ctx, struct nfs_subversion *nfs_mod) { struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { - .hostname = cfg->nfs_server.hostname, - .addr = (const struct sockaddr *)&cfg->nfs_server.address, - .addrlen = cfg->nfs_server.addrlen, + .hostname = ctx->nfs_server.hostname, + .addr = (const struct sockaddr *)&ctx->nfs_server.address, + .addrlen = ctx->nfs_server.addrlen, .nfs_mod = nfs_mod, - .proto = cfg->nfs_server.protocol, - .net = cfg->net, + .proto = ctx->nfs_server.protocol, + .net = ctx->net, .timeparms = &timeparms, .cred = server->cred, - .nconnect = cfg->nfs_server.nconnect, + .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; int error; - nfs_init_timeout_values(&timeparms, cfg->nfs_server.protocol, - cfg->timeo, cfg->retrans); - if (cfg->flags & NFS_MOUNT_NORESVPORT) + nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, + ctx->timeo, ctx->retrans); + if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ @@ -690,46 +690,46 @@ static int nfs_init_server(struct nfs_server *server, server->nfs_client = clp; /* Initialise the client representation from the mount data */ - server->flags = cfg->flags; - server->options = cfg->options; + server->flags = ctx->flags; + server->options = ctx->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; - if (cfg->rsize) - server->rsize = nfs_block_size(cfg->rsize, NULL); - if (cfg->wsize) - server->wsize = nfs_block_size(cfg->wsize, NULL); + if (ctx->rsize) + server->rsize = nfs_block_size(ctx->rsize, NULL); + if (ctx->wsize) + server->wsize = nfs_block_size(ctx->wsize, NULL); - server->acregmin = cfg->acregmin * HZ; - server->acregmax = cfg->acregmax * HZ; - server->acdirmin = cfg->acdirmin * HZ; - server->acdirmax = cfg->acdirmax * HZ; + server->acregmin = ctx->acregmin * HZ; + server->acregmax = ctx->acregmax * HZ; + server->acdirmin = ctx->acdirmin * HZ; + server->acdirmax = ctx->acdirmax * HZ; /* Start lockd here, before we might error out */ error = nfs_start_lockd(server); if (error < 0) goto error; - server->port = cfg->nfs_server.port; - server->auth_info = cfg->auth_info; + server->port = ctx->nfs_server.port; + server->auth_info = ctx->auth_info; error = nfs_init_server_rpcclient(server, &timeparms, - cfg->selected_flavor); + ctx->selected_flavor); if (error < 0) goto error; /* Preserve the values of mount_server-related mount options */ - if (cfg->mount_server.addrlen) { - memcpy(&server->mountd_address, &cfg->mount_server.address, - cfg->mount_server.addrlen); - server->mountd_addrlen = cfg->mount_server.addrlen; + if (ctx->mount_server.addrlen) { + memcpy(&server->mountd_address, &ctx->mount_server.address, + ctx->mount_server.addrlen); + server->mountd_addrlen = ctx->mount_server.addrlen; } - server->mountd_version = cfg->mount_server.version; - server->mountd_port = cfg->mount_server.port; - server->mountd_protocol = cfg->mount_server.protocol; + server->mountd_version = ctx->mount_server.version; + server->mountd_port = ctx->mount_server.port; + server->mountd_protocol = ctx->mount_server.protocol; - server->namelen = cfg->namlen; + server->namelen = ctx->namlen; return 0; error: diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 59962bc0118f..6c9573a32a69 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -726,11 +726,11 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, EXPORT_SYMBOL_GPL(nfs_auth_info_match); /* - * Ensure that a specified authtype in cfg->auth_info is supported by - * the server. Returns 0 and sets cfg->selected_flavor if it's ok, and + * Ensure that a specified authtype in ctx->auth_info is supported by + * the server. Returns 0 and sets ctx->selected_flavor if it's ok, and * -EACCES if not. */ -static int nfs_verify_authflavors(struct nfs_fs_context *cfg, +static int nfs_verify_authflavors(struct nfs_fs_context *ctx, rpc_authflavor_t *server_authlist, unsigned int count) { @@ -753,7 +753,7 @@ static int nfs_verify_authflavors(struct nfs_fs_context *cfg, for (i = 0; i < count; i++) { flavor = server_authlist[i]; - if (nfs_auth_info_match(&cfg->auth_info, flavor)) + if (nfs_auth_info_match(&ctx->auth_info, flavor)) goto out; if (flavor == RPC_AUTH_NULL) @@ -761,7 +761,7 @@ static int nfs_verify_authflavors(struct nfs_fs_context *cfg, } if (found_auth_null) { - flavor = cfg->auth_info.flavors[0]; + flavor = ctx->auth_info.flavors[0]; goto out; } @@ -770,8 +770,8 @@ static int nfs_verify_authflavors(struct nfs_fs_context *cfg, return -EACCES; out: - cfg->selected_flavor = flavor; - dfprintk(MOUNT, "NFS: using auth flavor %u\n", cfg->selected_flavor); + ctx->selected_flavor = flavor; + dfprintk(MOUNT, "NFS: using auth flavor %u\n", ctx->selected_flavor); return 0; } @@ -779,50 +779,50 @@ out: * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ -static int nfs_request_mount(struct nfs_fs_context *cfg, +static int nfs_request_mount(struct nfs_fs_context *ctx, struct nfs_fh *root_fh, rpc_authflavor_t *server_authlist, unsigned int *server_authlist_len) { struct nfs_mount_request request = { .sap = (struct sockaddr *) - &cfg->mount_server.address, - .dirpath = cfg->nfs_server.export_path, - .protocol = cfg->mount_server.protocol, + &ctx->mount_server.address, + .dirpath = ctx->nfs_server.export_path, + .protocol = ctx->mount_server.protocol, .fh = root_fh, - .noresvport = cfg->flags & NFS_MOUNT_NORESVPORT, + .noresvport = ctx->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = server_authlist_len, .auth_flavs = server_authlist, - .net = cfg->net, + .net = ctx->net, }; int status; - if (cfg->mount_server.version == 0) { - switch (cfg->version) { + if (ctx->mount_server.version == 0) { + switch (ctx->version) { default: - cfg->mount_server.version = NFS_MNT3_VERSION; + ctx->mount_server.version = NFS_MNT3_VERSION; break; case 2: - cfg->mount_server.version = NFS_MNT_VERSION; + ctx->mount_server.version = NFS_MNT_VERSION; } } - request.version = cfg->mount_server.version; + request.version = ctx->mount_server.version; - if (cfg->mount_server.hostname) - request.hostname = cfg->mount_server.hostname; + if (ctx->mount_server.hostname) + request.hostname = ctx->mount_server.hostname; else - request.hostname = cfg->nfs_server.hostname; + request.hostname = ctx->nfs_server.hostname; /* * Construct the mount server's address. */ - if (cfg->mount_server.address.sa_family == AF_UNSPEC) { - memcpy(request.sap, &cfg->nfs_server.address, - cfg->nfs_server.addrlen); - cfg->mount_server.addrlen = cfg->nfs_server.addrlen; + if (ctx->mount_server.address.sa_family == AF_UNSPEC) { + memcpy(request.sap, &ctx->nfs_server.address, + ctx->nfs_server.addrlen); + ctx->mount_server.addrlen = ctx->nfs_server.addrlen; } - request.salen = cfg->mount_server.addrlen; - nfs_set_port(request.sap, &cfg->mount_server.port, 0); + request.salen = ctx->mount_server.addrlen; + nfs_set_port(request.sap, &ctx->mount_server.port, 0); /* * Now ask the mount server to map our export path From e38bb238ed8ce280a217629294ba51dc217c5a2c Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 10 Dec 2019 07:31:12 -0500 Subject: [PATCH 106/658] NFS: Convert mount option parsing to use functionality from fs_parser.h Split out from commit "NFS: Add fs_context support." Convert existing mount option definitions to fs_parameter_enum's and fs_parameter_spec's. Parse mount options using fs_parse() and lookup_constant(). Notes: 1) Fixed a typo in the udp6 definition in nfs_xprt_protocol_tokens from the original commit. 2) fs_parse() expects an fs_context as the first arg so that any errors can be logged to the fs_context. We're passing NULL for the fs_context (this will change in commit "NFS: Add fs_context support.") which is okay as it will cause logfc() to do a printk() instead. 3) fs_parse() expects an fs_paramter as the third arg. We're building an fs_parameter manually in nfs_fs_context_parse_option(), which will go away in commit "NFS: Add fs_context support.". Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 821 ++++++++++++++++++++------------------------ 1 file changed, 381 insertions(+), 440 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 44531443a92b..9a3162055d5d 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -11,7 +11,8 @@ #include #include -#include +#include +#include #include #include #include @@ -28,218 +29,215 @@ #define NFS_MAX_CONNECTIONS 16 -enum { - /* Mount options that take no arguments */ - Opt_soft, Opt_softerr, Opt_hard, - Opt_posix, Opt_noposix, - Opt_cto, Opt_nocto, - Opt_ac, Opt_noac, - Opt_lock, Opt_nolock, - Opt_udp, Opt_tcp, Opt_rdma, - Opt_acl, Opt_noacl, - Opt_rdirplus, Opt_nordirplus, - Opt_sharecache, Opt_nosharecache, - Opt_resvport, Opt_noresvport, - Opt_fscache, Opt_nofscache, - Opt_migration, Opt_nomigration, - - /* Mount options that take integer arguments */ - Opt_port, - Opt_rsize, Opt_wsize, Opt_bsize, - Opt_timeo, Opt_retrans, - Opt_acregmin, Opt_acregmax, - Opt_acdirmin, Opt_acdirmax, +enum nfs_param { + Opt_ac, + Opt_acdirmax, + Opt_acdirmin, + Opt_acl, + Opt_acregmax, + Opt_acregmin, Opt_actimeo, - Opt_namelen, - Opt_mountport, - Opt_mountvers, - Opt_minorversion, - - /* Mount options that take string arguments */ - Opt_nfsvers, - Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, - Opt_addr, Opt_mountaddr, Opt_clientaddr, - Opt_nconnect, - Opt_lookupcache, - Opt_fscache_uniq, + Opt_addr, + Opt_bg, + Opt_bsize, + Opt_clientaddr, + Opt_cto, + Opt_fg, + Opt_fscache, + Opt_hard, + Opt_intr, Opt_local_lock, - - /* Special mount options */ - Opt_userspace, Opt_deprecated, Opt_sloppy, - - Opt_err + Opt_lock, + Opt_lookupcache, + Opt_migration, + Opt_minorversion, + Opt_mountaddr, + Opt_mounthost, + Opt_mountport, + Opt_mountproto, + Opt_mountvers, + Opt_namelen, + Opt_nconnect, + Opt_port, + Opt_posix, + Opt_proto, + Opt_rdirplus, + Opt_rdma, + Opt_resvport, + Opt_retrans, + Opt_retry, + Opt_rsize, + Opt_sec, + Opt_sharecache, + Opt_sloppy, + Opt_soft, + Opt_softerr, + Opt_source, + Opt_tcp, + Opt_timeo, + Opt_udp, + Opt_v, + Opt_vers, + Opt_wsize, }; -static const match_table_t nfs_mount_option_tokens = { - { Opt_userspace, "bg" }, - { Opt_userspace, "fg" }, - { Opt_userspace, "retry=%s" }, - - { Opt_sloppy, "sloppy" }, - - { Opt_soft, "soft" }, - { Opt_softerr, "softerr" }, - { Opt_hard, "hard" }, - { Opt_deprecated, "intr" }, - { Opt_deprecated, "nointr" }, - { Opt_posix, "posix" }, - { Opt_noposix, "noposix" }, - { Opt_cto, "cto" }, - { Opt_nocto, "nocto" }, - { Opt_ac, "ac" }, - { Opt_noac, "noac" }, - { Opt_lock, "lock" }, - { Opt_nolock, "nolock" }, - { Opt_udp, "udp" }, - { Opt_tcp, "tcp" }, - { Opt_rdma, "rdma" }, - { Opt_acl, "acl" }, - { Opt_noacl, "noacl" }, - { Opt_rdirplus, "rdirplus" }, - { Opt_nordirplus, "nordirplus" }, - { Opt_sharecache, "sharecache" }, - { Opt_nosharecache, "nosharecache" }, - { Opt_resvport, "resvport" }, - { Opt_noresvport, "noresvport" }, - { Opt_fscache, "fsc" }, - { Opt_nofscache, "nofsc" }, - { Opt_migration, "migration" }, - { Opt_nomigration, "nomigration" }, - - { Opt_port, "port=%s" }, - { Opt_rsize, "rsize=%s" }, - { Opt_wsize, "wsize=%s" }, - { Opt_bsize, "bsize=%s" }, - { Opt_timeo, "timeo=%s" }, - { Opt_retrans, "retrans=%s" }, - { Opt_acregmin, "acregmin=%s" }, - { Opt_acregmax, "acregmax=%s" }, - { Opt_acdirmin, "acdirmin=%s" }, - { Opt_acdirmax, "acdirmax=%s" }, - { Opt_actimeo, "actimeo=%s" }, - { Opt_namelen, "namlen=%s" }, - { Opt_mountport, "mountport=%s" }, - { Opt_mountvers, "mountvers=%s" }, - { Opt_minorversion, "minorversion=%s" }, - - { Opt_nfsvers, "nfsvers=%s" }, - { Opt_nfsvers, "vers=%s" }, - - { Opt_sec, "sec=%s" }, - { Opt_proto, "proto=%s" }, - { Opt_mountproto, "mountproto=%s" }, - { Opt_addr, "addr=%s" }, - { Opt_clientaddr, "clientaddr=%s" }, - { Opt_mounthost, "mounthost=%s" }, - { Opt_mountaddr, "mountaddr=%s" }, - - { Opt_nconnect, "nconnect=%s" }, - - { Opt_lookupcache, "lookupcache=%s" }, - { Opt_fscache_uniq, "fsc=%s" }, - { Opt_local_lock, "local_lock=%s" }, - - /* The following needs to be listed after all other options */ - { Opt_nfsvers, "v%s" }, - - { Opt_err, NULL } +static const struct fs_parameter_spec nfs_param_specs[] = { + fsparam_flag_no("ac", Opt_ac), + fsparam_u32 ("acdirmax", Opt_acdirmax), + fsparam_u32 ("acdirmin", Opt_acdirmin), + fsparam_flag_no("acl", Opt_acl), + fsparam_u32 ("acregmax", Opt_acregmax), + fsparam_u32 ("acregmin", Opt_acregmin), + fsparam_u32 ("actimeo", Opt_actimeo), + fsparam_string("addr", Opt_addr), + fsparam_flag ("bg", Opt_bg), + fsparam_u32 ("bsize", Opt_bsize), + fsparam_string("clientaddr", Opt_clientaddr), + fsparam_flag_no("cto", Opt_cto), + fsparam_flag ("fg", Opt_fg), + __fsparam(fs_param_is_string, "fsc", Opt_fscache, + fs_param_neg_with_no|fs_param_v_optional), + fsparam_flag ("hard", Opt_hard), + __fsparam(fs_param_is_flag, "intr", Opt_intr, + fs_param_neg_with_no|fs_param_deprecated), + fsparam_enum ("local_lock", Opt_local_lock), + fsparam_flag_no("lock", Opt_lock), + fsparam_enum ("lookupcache", Opt_lookupcache), + fsparam_flag_no("migration", Opt_migration), + fsparam_u32 ("minorversion", Opt_minorversion), + fsparam_string("mountaddr", Opt_mountaddr), + fsparam_string("mounthost", Opt_mounthost), + fsparam_u32 ("mountport", Opt_mountport), + fsparam_string("mountproto", Opt_mountproto), + fsparam_u32 ("mountvers", Opt_mountvers), + fsparam_u32 ("namlen", Opt_namelen), + fsparam_u32 ("nconnect", Opt_nconnect), + fsparam_string("nfsvers", Opt_vers), + fsparam_u32 ("port", Opt_port), + fsparam_flag_no("posix", Opt_posix), + fsparam_string("proto", Opt_proto), + fsparam_flag_no("rdirplus", Opt_rdirplus), + fsparam_flag ("rdma", Opt_rdma), + fsparam_flag_no("resvport", Opt_resvport), + fsparam_u32 ("retrans", Opt_retrans), + fsparam_string("retry", Opt_retry), + fsparam_u32 ("rsize", Opt_rsize), + fsparam_string("sec", Opt_sec), + fsparam_flag_no("sharecache", Opt_sharecache), + fsparam_flag ("sloppy", Opt_sloppy), + fsparam_flag ("soft", Opt_soft), + fsparam_flag ("softerr", Opt_softerr), + fsparam_string("source", Opt_source), + fsparam_flag ("tcp", Opt_tcp), + fsparam_u32 ("timeo", Opt_timeo), + fsparam_flag ("udp", Opt_udp), + fsparam_flag ("v2", Opt_v), + fsparam_flag ("v3", Opt_v), + fsparam_flag ("v4", Opt_v), + fsparam_flag ("v4.0", Opt_v), + fsparam_flag ("v4.1", Opt_v), + fsparam_flag ("v4.2", Opt_v), + fsparam_string("vers", Opt_vers), + fsparam_u32 ("wsize", Opt_wsize), + {} }; enum { - Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, - Opt_xprt_rdma6, - - Opt_xprt_err -}; - -static const match_table_t nfs_xprt_protocol_tokens = { - { Opt_xprt_udp, "udp" }, - { Opt_xprt_udp6, "udp6" }, - { Opt_xprt_tcp, "tcp" }, - { Opt_xprt_tcp6, "tcp6" }, - { Opt_xprt_rdma, "rdma" }, - { Opt_xprt_rdma6, "rdma6" }, - - { Opt_xprt_err, NULL } -}; - -enum { - Opt_sec_none, Opt_sec_sys, - Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, - Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, - Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, - - Opt_sec_err -}; - -static const match_table_t nfs_secflavor_tokens = { - { Opt_sec_none, "none" }, - { Opt_sec_none, "null" }, - { Opt_sec_sys, "sys" }, - - { Opt_sec_krb5, "krb5" }, - { Opt_sec_krb5i, "krb5i" }, - { Opt_sec_krb5p, "krb5p" }, - - { Opt_sec_lkey, "lkey" }, - { Opt_sec_lkeyi, "lkeyi" }, - { Opt_sec_lkeyp, "lkeyp" }, - - { Opt_sec_spkm, "spkm3" }, - { Opt_sec_spkmi, "spkm3i" }, - { Opt_sec_spkmp, "spkm3p" }, - - { Opt_sec_err, NULL } -}; - -enum { - Opt_lookupcache_all, Opt_lookupcache_positive, - Opt_lookupcache_none, - - Opt_lookupcache_err -}; - -static const match_table_t nfs_lookupcache_tokens = { - { Opt_lookupcache_all, "all" }, - { Opt_lookupcache_positive, "pos" }, - { Opt_lookupcache_positive, "positive" }, - { Opt_lookupcache_none, "none" }, - - { Opt_lookupcache_err, NULL } -}; - -enum { - Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, + Opt_local_lock_all, + Opt_local_lock_flock, Opt_local_lock_none, - - Opt_local_lock_err -}; - -static const match_table_t nfs_local_lock_tokens = { - { Opt_local_lock_all, "all" }, - { Opt_local_lock_flock, "flock" }, - { Opt_local_lock_posix, "posix" }, - { Opt_local_lock_none, "none" }, - - { Opt_local_lock_err, NULL } + Opt_local_lock_posix, }; enum { - Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, - Opt_vers_4_1, Opt_vers_4_2, - - Opt_vers_err + Opt_lookupcache_all, + Opt_lookupcache_none, + Opt_lookupcache_positive, }; -static const match_table_t nfs_vers_tokens = { - { Opt_vers_2, "2" }, - { Opt_vers_3, "3" }, - { Opt_vers_4, "4" }, - { Opt_vers_4_0, "4.0" }, - { Opt_vers_4_1, "4.1" }, - { Opt_vers_4_2, "4.2" }, +static const struct fs_parameter_enum nfs_param_enums[] = { + { Opt_local_lock, "all", Opt_local_lock_all }, + { Opt_local_lock, "flock", Opt_local_lock_flock }, + { Opt_local_lock, "none", Opt_local_lock_none }, + { Opt_local_lock, "posix", Opt_local_lock_posix }, + { Opt_lookupcache, "all", Opt_lookupcache_all }, + { Opt_lookupcache, "none", Opt_lookupcache_none }, + { Opt_lookupcache, "pos", Opt_lookupcache_positive }, + { Opt_lookupcache, "positive", Opt_lookupcache_positive }, + {} +}; - { Opt_vers_err, NULL } +static const struct fs_parameter_description nfs_fs_parameters = { + .name = "nfs", + .specs = nfs_param_specs, + .enums = nfs_param_enums, +}; + +enum { + Opt_vers_2, + Opt_vers_3, + Opt_vers_4, + Opt_vers_4_0, + Opt_vers_4_1, + Opt_vers_4_2, +}; + +static const struct constant_table nfs_vers_tokens[] = { + { "2", Opt_vers_2 }, + { "3", Opt_vers_3 }, + { "4", Opt_vers_4 }, + { "4.0", Opt_vers_4_0 }, + { "4.1", Opt_vers_4_1 }, + { "4.2", Opt_vers_4_2 }, +}; + +enum { + Opt_xprt_rdma, + Opt_xprt_rdma6, + Opt_xprt_tcp, + Opt_xprt_tcp6, + Opt_xprt_udp, + Opt_xprt_udp6, + nr__Opt_xprt +}; + +static const struct constant_table nfs_xprt_protocol_tokens[nr__Opt_xprt] = { + { "rdma", Opt_xprt_rdma }, + { "rdma6", Opt_xprt_rdma6 }, + { "tcp", Opt_xprt_tcp }, + { "tcp6", Opt_xprt_tcp6 }, + { "udp", Opt_xprt_udp }, + { "udp6", Opt_xprt_udp6 }, +}; + +enum { + Opt_sec_krb5, + Opt_sec_krb5i, + Opt_sec_krb5p, + Opt_sec_lkey, + Opt_sec_lkeyi, + Opt_sec_lkeyp, + Opt_sec_none, + Opt_sec_spkm, + Opt_sec_spkmi, + Opt_sec_spkmp, + Opt_sec_sys, + nr__Opt_sec +}; + +static const struct constant_table nfs_secflavor_tokens[] = { + { "krb5", Opt_sec_krb5 }, + { "krb5i", Opt_sec_krb5i }, + { "krb5p", Opt_sec_krb5p }, + { "lkey", Opt_sec_lkey }, + { "lkeyi", Opt_sec_lkeyi }, + { "lkeyp", Opt_sec_lkeyp }, + { "none", Opt_sec_none }, + { "null", Opt_sec_none }, + { "spkm3", Opt_sec_spkm }, + { "spkm3i", Opt_sec_spkmi }, + { "spkm3p", Opt_sec_spkmp }, + { "sys", Opt_sec_sys }, }; struct nfs_fs_context *nfs_alloc_parsed_mount_data(void) @@ -368,17 +366,19 @@ static int nfs_auth_info_add(struct nfs_fs_context *ctx, /* * Parse the value of the 'sec=' option. */ -static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, char *value) +static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, + struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; rpc_authflavor_t pseudoflavor; - char *p; + char *string = param->string, *p; int ret; - dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); + dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string); - while ((p = strsep(&value, ":")) != NULL) { - switch (match_token(p, nfs_secflavor_tokens, args)) { + while ((p = strsep(&string, ":")) != NULL) { + if (!*p) + continue; + switch (lookup_constant(nfs_secflavor_tokens, p, -1)) { case Opt_sec_none: pseudoflavor = RPC_AUTH_NULL; break; @@ -427,11 +427,10 @@ static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, char *value) } static int nfs_parse_version_string(struct nfs_fs_context *ctx, - char *string, - substring_t *args) + const char *string) { ctx->flags &= ~NFS_MOUNT_VER3; - switch (match_token(string, nfs_vers_tokens, args)) { + switch (lookup_constant(nfs_vers_tokens, string, -1)) { case Opt_vers_2: ctx->version = 2; break; @@ -465,64 +464,24 @@ static int nfs_parse_version_string(struct nfs_fs_context *ctx, return 0; } -static int nfs_get_option_str(substring_t args[], char **option) -{ - kfree(*option); - *option = match_strdup(args); - return !*option; -} - -static int nfs_get_option_ui(struct nfs_fs_context *ctx, - substring_t args[], unsigned int *option) -{ - match_strlcpy(ctx->buf, args, sizeof(ctx->buf)); - return kstrtouint(ctx->buf, 10, option); -} - -static int nfs_get_option_ui_bound(struct nfs_fs_context *ctx, - substring_t args[], unsigned int *option, - unsigned int l_bound, unsigned u_bound) -{ - int ret; - - match_strlcpy(ctx->buf, args, sizeof(ctx->buf)); - ret = kstrtouint(ctx->buf, 10, option); - if (ret < 0) - return ret; - if (*option < l_bound || *option > u_bound) - return -ERANGE; - return 0; -} - -static int nfs_get_option_us_bound(struct nfs_fs_context *ctx, - substring_t args[], unsigned short *option, - unsigned short l_bound, - unsigned short u_bound) -{ - int ret; - - match_strlcpy(ctx->buf, args, sizeof(ctx->buf)); - ret = kstrtou16(ctx->buf, 10, option); - if (ret < 0) - return ret; - if (*option < l_bound || *option > u_bound) - return -ERANGE; - return 0; -} - /* - * Parse a single mount option in "key[=val]" form. + * Parse a single mount parameter. */ -static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) +static int nfs_fs_context_parse_param(struct nfs_fs_context *ctx, + struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; - char *string; - int token, ret; + struct fs_parse_result result; + unsigned short protofamily, mountfamily; + unsigned int len; + int ret, opt; - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key); - token = match_token(p, nfs_mount_option_tokens, args); - switch (token) { + opt = fs_parse(NULL, &nfs_fs_parameters, param, &result); + if (opt < 0) + return ctx->sloppy ? 1 : opt; + + switch (opt) { /* * boolean options: foo/nofoo */ @@ -538,30 +497,31 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) ctx->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); break; case Opt_posix: - ctx->flags |= NFS_MOUNT_POSIX; - break; - case Opt_noposix: - ctx->flags &= ~NFS_MOUNT_POSIX; + if (result.negated) + ctx->flags &= ~NFS_MOUNT_POSIX; + else + ctx->flags |= NFS_MOUNT_POSIX; break; case Opt_cto: - ctx->flags &= ~NFS_MOUNT_NOCTO; - break; - case Opt_nocto: - ctx->flags |= NFS_MOUNT_NOCTO; + if (result.negated) + ctx->flags |= NFS_MOUNT_NOCTO; + else + ctx->flags &= ~NFS_MOUNT_NOCTO; break; case Opt_ac: - ctx->flags &= ~NFS_MOUNT_NOAC; - break; - case Opt_noac: - ctx->flags |= NFS_MOUNT_NOAC; + if (result.negated) + ctx->flags |= NFS_MOUNT_NOAC; + else + ctx->flags &= ~NFS_MOUNT_NOAC; break; case Opt_lock: - ctx->flags &= ~NFS_MOUNT_NONLM; - ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_nolock: - ctx->flags |= NFS_MOUNT_NONLM; - ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + if (result.negated) { + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } else { + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } break; case Opt_udp: ctx->flags &= ~NFS_MOUNT_TCP; @@ -574,195 +534,177 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) case Opt_rdma: ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(p); + xprt_load_transport(param->key); break; case Opt_acl: - ctx->flags &= ~NFS_MOUNT_NOACL; - break; - case Opt_noacl: - ctx->flags |= NFS_MOUNT_NOACL; + if (result.negated) + ctx->flags |= NFS_MOUNT_NOACL; + else + ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: - ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; - break; - case Opt_nordirplus: - ctx->flags |= NFS_MOUNT_NORDIRPLUS; + if (result.negated) + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + else + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; break; case Opt_sharecache: - ctx->flags &= ~NFS_MOUNT_UNSHARED; - break; - case Opt_nosharecache: - ctx->flags |= NFS_MOUNT_UNSHARED; + if (result.negated) + ctx->flags |= NFS_MOUNT_UNSHARED; + else + ctx->flags &= ~NFS_MOUNT_UNSHARED; break; case Opt_resvport: - ctx->flags &= ~NFS_MOUNT_NORESVPORT; - break; - case Opt_noresvport: - ctx->flags |= NFS_MOUNT_NORESVPORT; + if (result.negated) + ctx->flags |= NFS_MOUNT_NORESVPORT; + else + ctx->flags &= ~NFS_MOUNT_NORESVPORT; break; case Opt_fscache: - ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); - ctx->fscache_uniq = NULL; - break; - case Opt_nofscache: - ctx->options &= ~NFS_OPTION_FSCACHE; - kfree(ctx->fscache_uniq); - ctx->fscache_uniq = NULL; + ctx->fscache_uniq = param->string; + param->string = NULL; + if (result.negated) + ctx->options &= ~NFS_OPTION_FSCACHE; + else + ctx->options |= NFS_OPTION_FSCACHE; break; case Opt_migration: - ctx->options |= NFS_OPTION_MIGRATION; - break; - case Opt_nomigration: - ctx->options &= ~NFS_OPTION_MIGRATION; + if (result.negated) + ctx->options &= ~NFS_OPTION_MIGRATION; + else + ctx->options |= NFS_OPTION_MIGRATION; break; /* * options that take numeric values */ case Opt_port: - if (nfs_get_option_ui_bound(ctx, args, &ctx->nfs_server.port, - 0, USHRT_MAX)) - goto out_invalid_value; + if (result.uint_32 > USHRT_MAX) + goto out_of_bounds; + ctx->nfs_server.port = result.uint_32; break; case Opt_rsize: - if (nfs_get_option_ui(ctx, args, &ctx->rsize)) - goto out_invalid_value; + ctx->rsize = result.uint_32; break; case Opt_wsize: - if (nfs_get_option_ui(ctx, args, &ctx->wsize)) - goto out_invalid_value; + ctx->wsize = result.uint_32; break; case Opt_bsize: - if (nfs_get_option_ui(ctx, args, &ctx->bsize)) - goto out_invalid_value; + ctx->bsize = result.uint_32; break; case Opt_timeo: - if (nfs_get_option_ui_bound(ctx, args, &ctx->timeo, 1, INT_MAX)) - goto out_invalid_value; + if (result.uint_32 < 1 || result.uint_32 > INT_MAX) + goto out_of_bounds; + ctx->timeo = result.uint_32; break; case Opt_retrans: - if (nfs_get_option_ui_bound(ctx, args, &ctx->retrans, 0, INT_MAX)) - goto out_invalid_value; + if (result.uint_32 > INT_MAX) + goto out_of_bounds; + ctx->retrans = result.uint_32; break; case Opt_acregmin: - if (nfs_get_option_ui(ctx, args, &ctx->acregmin)) - goto out_invalid_value; + ctx->acregmin = result.uint_32; break; case Opt_acregmax: - if (nfs_get_option_ui(ctx, args, &ctx->acregmax)) - goto out_invalid_value; + ctx->acregmax = result.uint_32; break; case Opt_acdirmin: - if (nfs_get_option_ui(ctx, args, &ctx->acdirmin)) - goto out_invalid_value; + ctx->acdirmin = result.uint_32; break; case Opt_acdirmax: - if (nfs_get_option_ui(ctx, args, &ctx->acdirmax)) - goto out_invalid_value; + ctx->acdirmax = result.uint_32; break; case Opt_actimeo: - if (nfs_get_option_ui(ctx, args, &ctx->acdirmax)) - goto out_invalid_value; - ctx->acregmin = ctx->acregmax = - ctx->acdirmin = ctx->acdirmax; + ctx->acregmin = result.uint_32; + ctx->acregmax = result.uint_32; + ctx->acdirmin = result.uint_32; + ctx->acdirmax = result.uint_32; break; case Opt_namelen: - if (nfs_get_option_ui(ctx, args, &ctx->namlen)) - goto out_invalid_value; + ctx->namlen = result.uint_32; break; case Opt_mountport: - if (nfs_get_option_ui_bound(ctx, args, &ctx->mount_server.port, - 0, USHRT_MAX)) - goto out_invalid_value; + if (result.uint_32 > USHRT_MAX) + goto out_of_bounds; + ctx->mount_server.port = result.uint_32; break; case Opt_mountvers: - if (nfs_get_option_ui_bound(ctx, args, &ctx->mount_server.version, - NFS_MNT_VERSION, NFS_MNT3_VERSION)) - goto out_invalid_value; + if (result.uint_32 < NFS_MNT_VERSION || + result.uint_32 > NFS_MNT3_VERSION) + goto out_of_bounds; + ctx->mount_server.version = result.uint_32; break; case Opt_minorversion: - if (nfs_get_option_ui_bound(ctx, args, &ctx->minorversion, - 0, NFS4_MAX_MINOR_VERSION)) - goto out_invalid_value; + if (result.uint_32 > NFS4_MAX_MINOR_VERSION) + goto out_of_bounds; + ctx->minorversion = result.uint_32; break; /* * options that take text values */ - case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - ret = nfs_parse_version_string(ctx, string, args); - kfree(string); + case Opt_v: + ret = nfs_parse_version_string(ctx, param->key + 1); + if (ret < 0) + return ret; + break; + case Opt_vers: + ret = nfs_parse_version_string(ctx, param->string); if (ret < 0) return ret; break; case Opt_sec: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - ret = nfs_parse_security_flavors(ctx, string); - kfree(string); + ret = nfs_parse_security_flavors(ctx, param); if (ret < 0) return ret; break; - case Opt_proto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_xprt_protocol_tokens, args); - ctx->protofamily = AF_INET; - switch (token) { + case Opt_proto: + protofamily = AF_INET; + switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: - ctx->protofamily = AF_INET6; + protofamily = AF_INET6; /* fall through */ case Opt_xprt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: - ctx->protofamily = AF_INET6; + protofamily = AF_INET6; /* fall through */ case Opt_xprt_tcp: ctx->flags |= NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: - ctx->protofamily = AF_INET6; + protofamily = AF_INET6; /* fall through */ case Opt_xprt_rdma: /* vector side protocols to TCP */ ctx->flags |= NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(string); + xprt_load_transport(param->string); break; default: - kfree(string); dfprintk(MOUNT, "NFS: unrecognized transport protocol\n"); return -EINVAL; } - kfree(string); - break; - case Opt_mountproto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_xprt_protocol_tokens, args); - kfree(string); - ctx->mountfamily = AF_INET; - switch (token) { + ctx->protofamily = protofamily; + break; + + case Opt_mountproto: + mountfamily = AF_INET; + switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: - ctx->mountfamily = AF_INET6; + mountfamily = AF_INET6; /* fall through */ case Opt_xprt_udp: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: - ctx->mountfamily = AF_INET6; + mountfamily = AF_INET6; /* fall through */ case Opt_xprt_tcp: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; @@ -772,51 +714,42 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) dfprintk(MOUNT, "NFS: unrecognized transport protocol\n"); return -EINVAL; } + ctx->mountfamily = mountfamily; break; + case Opt_addr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - ctx->nfs_server.addrlen = - rpc_pton(ctx->net, string, strlen(string), - &ctx->nfs_server.address, - sizeof(ctx->nfs_server._address)); - kfree(string); - if (ctx->nfs_server.addrlen == 0) + len = rpc_pton(ctx->net, param->string, param->size, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address)); + if (len == 0) goto out_invalid_address; + ctx->nfs_server.addrlen = len; break; case Opt_clientaddr: - if (nfs_get_option_str(args, &ctx->client_address)) - goto out_nomem; + kfree(ctx->client_address); + ctx->client_address = param->string; + param->string = NULL; break; case Opt_mounthost: - if (nfs_get_option_str(args, &ctx->mount_server.hostname)) - goto out_nomem; + kfree(ctx->mount_server.hostname); + ctx->mount_server.hostname = param->string; + param->string = NULL; break; case Opt_mountaddr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - ctx->mount_server.addrlen = - rpc_pton(ctx->net, string, strlen(string), - &ctx->mount_server.address, - sizeof(ctx->mount_server._address)); - kfree(string); - if (ctx->mount_server.addrlen == 0) + len = rpc_pton(ctx->net, param->string, param->size, + &ctx->mount_server.address, + sizeof(ctx->mount_server._address)); + if (len == 0) goto out_invalid_address; + ctx->mount_server.addrlen = len; break; case Opt_nconnect: - if (nfs_get_option_us_bound(ctx, args, &ctx->nfs_server.nconnect, - 1, NFS_MAX_CONNECTIONS)) - goto out_invalid_value; + if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) + goto out_of_bounds; + ctx->nfs_server.nconnect = result.uint_32; break; case Opt_lookupcache: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_lookupcache_tokens, args); - kfree(string); - switch (token) { + switch (result.uint_32) { case Opt_lookupcache_all: ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); break; @@ -828,22 +761,11 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: - dfprintk(MOUNT, "NFS: invalid lookupcache argument\n"); - return -EINVAL; + goto out_invalid_value; } break; - case Opt_fscache_uniq: - if (nfs_get_option_str(args, &ctx->fscache_uniq)) - goto out_nomem; - ctx->options |= NFS_OPTION_FSCACHE; - break; case Opt_local_lock: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_local_lock_tokens, args); - kfree(string); - switch (token) { + switch (result.uint_32) { case Opt_local_lock_all: ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); @@ -859,8 +781,7 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) NFS_MOUNT_LOCAL_FCNTL); break; default: - dfprintk(MOUNT, "NFS: invalid local_lock argument\n"); - return -EINVAL; + goto out_invalid_value; } break; @@ -868,30 +789,50 @@ static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) * Special options */ case Opt_sloppy: - ctx->sloppy = 1; + ctx->sloppy = true; dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); break; - case Opt_userspace: - case Opt_deprecated: - dfprintk(MOUNT, "NFS: ignoring mount option '%s'\n", p); - break; - - default: - dfprintk(MOUNT, "NFS: unrecognized mount option '%s'\n", p); - return -EINVAL; } return 0; -out_invalid_address: - printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); - return -EINVAL; out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); + printk(KERN_INFO "NFS: Bad mount option value specified\n"); return -EINVAL; -out_nomem: - printk(KERN_INFO "NFS: not enough memory to parse option\n"); - return -ENOMEM; +out_invalid_address: + printk(KERN_INFO "NFS: Bad IP address specified\n"); + return -EINVAL; +out_of_bounds: + printk(KERN_INFO "NFS: Value for '%s' out of range\n", param->key); + return -ERANGE; +} + +/* cribbed from generic_parse_monolithic and vfs_parse_fs_string */ +static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) +{ + int ret; + char *key = p, *value; + size_t v_size = 0; + struct fs_parameter param; + + memset(¶m, 0, sizeof(param)); + value = strchr(key, '='); + if (value && value != key) { + *value++ = 0; + v_size = strlen(value); + } + param.key = key; + param.type = fs_value_is_flag; + param.size = v_size; + if (v_size > 0) { + param.type = fs_value_is_string; + param.string = kmemdup_nul(value, v_size, GFP_KERNEL); + if (!param.string) + return -ENOMEM; + } + ret = nfs_fs_context_parse_param(ctx, ¶m); + kfree(param.string); + return ret; } /* From f2aedb713c284429987dc66c7aaf38decfc8da2a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 10 Dec 2019 07:31:13 -0500 Subject: [PATCH 107/658] NFS: Add fs_context support. Add filesystem context support to NFS, parsing the options in advance and attaching the information to struct nfs_fs_context. The highlights are: (*) Merge nfs_mount_info and nfs_clone_mount into nfs_fs_context. This structure represents NFS's superblock config. (*) Make use of the VFS's parsing support to split comma-separated lists (*) Pin the NFS protocol module in the nfs_fs_context. (*) Attach supplementary error information to fs_context. This has the downside that these strings must be static and can't be formatted. (*) Remove the auxiliary file_system_type structs since the information necessary can be conveyed in the nfs_fs_context struct instead. (*) Root mounts are made by duplicating the config for the requested mount so as to have the same parameters. Submounts pick up their parameters from the parent superblock. [AV -- retrans is u32, not string] [SM -- Renamed cfg to ctx in a few functions in an earlier patch] [SM -- Moved fs_context mount option parsing to an earlier patch] [SM -- Moved fs_context error logging to a later patch] [SM -- Fixed printks in nfs4_try_get_tree() and nfs4_get_referral_tree()] [SM -- Added is_remount_fc() helper] [SM -- Deferred some refactoring to a later patch] [SM -- Fixed referral mounts, which were broken in the original patch] [SM -- Fixed leak of nfs_fattr when fs_context is freed] Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 474 +++++++++++++++++++++++++++------------- fs/nfs/internal.h | 74 +++---- fs/nfs/namespace.c | 136 +++++++----- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4_fs.h | 9 +- fs/nfs/nfs4file.c | 1 + fs/nfs/nfs4namespace.c | 297 ++++++++++++++----------- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4super.c | 151 +++++++------ fs/nfs/proc.c | 2 +- fs/nfs/super.c | 288 +++++++----------------- include/linux/nfs_xdr.h | 6 +- 12 files changed, 778 insertions(+), 664 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 9a3162055d5d..ac1a8d7d7393 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -3,6 +3,7 @@ * linux/fs/nfs/fs_context.c * * Copyright (C) 1992 Rick Sladkey + * Conversion to new mount api Copyright (C) David Howells * * NFS mount handling. * @@ -467,21 +468,31 @@ static int nfs_parse_version_string(struct nfs_fs_context *ctx, /* * Parse a single mount parameter. */ -static int nfs_fs_context_parse_param(struct nfs_fs_context *ctx, +static int nfs_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; + struct nfs_fs_context *ctx = nfs_fc2context(fc); unsigned short protofamily, mountfamily; unsigned int len; int ret, opt; dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key); - opt = fs_parse(NULL, &nfs_fs_parameters, param, &result); + opt = fs_parse(fc, &nfs_fs_parameters, param, &result); if (opt < 0) return ctx->sloppy ? 1 : opt; switch (opt) { + case Opt_source: + if (fc->source) { + dfprintk(MOUNT, "NFS: Multiple sources not supported\n"); + return -EINVAL; + } + fc->source = param->string; + param->string = NULL; + break; + /* * boolean options: foo/nofoo */ @@ -807,112 +818,6 @@ out_of_bounds: return -ERANGE; } -/* cribbed from generic_parse_monolithic and vfs_parse_fs_string */ -static int nfs_fs_context_parse_option(struct nfs_fs_context *ctx, char *p) -{ - int ret; - char *key = p, *value; - size_t v_size = 0; - struct fs_parameter param; - - memset(¶m, 0, sizeof(param)); - value = strchr(key, '='); - if (value && value != key) { - *value++ = 0; - v_size = strlen(value); - } - param.key = key; - param.type = fs_value_is_flag; - param.size = v_size; - if (v_size > 0) { - param.type = fs_value_is_string; - param.string = kmemdup_nul(value, v_size, GFP_KERNEL); - if (!param.string) - return -ENOMEM; - } - ret = nfs_fs_context_parse_param(ctx, ¶m); - kfree(param.string); - return ret; -} - -/* - * Error-check and convert a string of mount options from user space into - * a data structure. The whole mount string is processed; bad options are - * skipped as they are encountered. If there were no errors, return 1; - * otherwise return 0 (zero). - */ -int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx) -{ - char *p; - int rc, sloppy = 0, invalid_option = 0; - - if (!raw) { - dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); - return 1; - } - dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - - rc = security_sb_eat_lsm_opts(raw, &ctx->lsm_opts); - if (rc) - goto out_security_failure; - - while ((p = strsep(&raw, ",")) != NULL) { - if (!*p) - continue; - if (nfs_fs_context_parse_option(ctx, p) < 0) - invalid_option = true; - } - - if (!sloppy && invalid_option) - return 0; - - if (ctx->minorversion && ctx->version != 4) - goto out_minorversion_mismatch; - - if (ctx->options & NFS_OPTION_MIGRATION && - (ctx->version != 4 || ctx->minorversion != 0)) - goto out_migration_misuse; - - /* - * verify that any proto=/mountproto= options match the address - * families in the addr=/mountaddr= options. - */ - if (ctx->protofamily != AF_UNSPEC && - ctx->protofamily != ctx->nfs_server.address.sa_family) - goto out_proto_mismatch; - - if (ctx->mountfamily != AF_UNSPEC) { - if (ctx->mount_server.addrlen) { - if (ctx->mountfamily != ctx->mount_server.address.sa_family) - goto out_mountproto_mismatch; - } else { - if (ctx->mountfamily != ctx->nfs_server.address.sa_family) - goto out_mountproto_mismatch; - } - } - - return 1; - -out_minorversion_mismatch: - printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", ctx->version, ctx->minorversion); - return 0; -out_mountproto_mismatch: - printk(KERN_INFO "NFS: mount server address does not match mountproto= " - "option\n"); - return 0; -out_proto_mismatch: - printk(KERN_INFO "NFS: server address does not match proto= option\n"); - return 0; -out_migration_misuse: - printk(KERN_INFO - "NFS: 'migration' not supported for this NFS version\n"); - return -EINVAL; -out_security_failure: - printk(KERN_INFO "NFS: security options invalid: %d\n", rc); - return 0; -} - /* * Split "dev_name" into "hostname:export_path". * @@ -990,6 +895,11 @@ out_path: return -ENAMETOOLONG; } +static inline bool is_remount_fc(struct fs_context *fc) +{ + return fc->root != NULL; +} + /* * Parse monolithic NFS2/NFS3 mount data * - fills in the mount root filehandle @@ -1006,12 +916,11 @@ out_path: * + breaking back: trying proto=udp after proto=tcp, v2 after v3, * mountproto=tcp after mountproto=udp, and so on */ -static int nfs23_validate_mount_data(void *options, - struct nfs_fs_context *ctx, - struct nfs_fh *mntfh, - const char *dev_name) +static int nfs23_parse_monolithic(struct fs_context *fc, + struct nfs_mount_data *data) { - struct nfs_mount_data *data = (struct nfs_mount_data *)options; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_fh *mntfh = ctx->mount_info.mntfh; struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; @@ -1083,6 +992,9 @@ static int nfs23_validate_mount_data(void *options, ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + goto out_nomem; + ctx->namlen = data->namlen; ctx->bsize = data->bsize; @@ -1090,8 +1002,6 @@ static int nfs23_validate_mount_data(void *options, ctx->selected_flavor = data->pseudoflavor; else ctx->selected_flavor = RPC_AUTH_UNIX; - if (!ctx->nfs_server.hostname) - goto out_nomem; if (!(data->flags & NFS_MOUNT_NONLM)) ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| @@ -1109,12 +1019,13 @@ static int nfs23_validate_mount_data(void *options, */ if (data->context[0]){ #ifdef CONFIG_SECURITY_SELINUX - int rc; + int ret; + data->context[NFS_MAX_CONTEXT_LEN] = '\0'; - rc = security_add_mnt_opt("context", data->context, - strlen(data->context), ctx->lsm_opts); - if (rc) - return rc; + ret = vfs_parse_fs_string(fc, "context", + data->context, strlen(data->context)); + if (ret < 0) + return ret; #else return -EINVAL; #endif @@ -1122,12 +1033,20 @@ static int nfs23_validate_mount_data(void *options, break; default: - return NFS_TEXT_DATA; + goto generic; } + ctx->skip_reconfig_option_check = true; return 0; +generic: + return generic_parse_monolithic(fc, data); + out_no_data: + if (is_remount_fc(fc)) { + ctx->skip_reconfig_option_check = true; + return 0; + } dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); return -EINVAL; @@ -1163,12 +1082,11 @@ static void nfs4_validate_mount_flags(struct nfs_fs_context *ctx) /* * Validate NFSv4 mount options */ -static int nfs4_validate_mount_data(void *options, - struct nfs_fs_context *ctx, - const char *dev_name) +static int nfs4_parse_monolithic(struct fs_context *fc, + struct nfs4_mount_data *data) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; - struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; if (data == NULL) @@ -1218,7 +1136,7 @@ static int nfs4_validate_mount_data(void *options, ctx->client_address = c; /* - * Translate to nfs_fs_context, which nfs4_fill_super + * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ @@ -1238,12 +1156,20 @@ static int nfs4_validate_mount_data(void *options, break; default: - return NFS_TEXT_DATA; + goto generic; } + ctx->skip_reconfig_option_check = true; return 0; +generic: + return generic_parse_monolithic(fc, data); + out_no_data: + if (is_remount_fc(fc)) { + ctx->skip_reconfig_option_check = true; + return 0; + } dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); return -EINVAL; @@ -1260,39 +1186,66 @@ out_invalid_transport_udp: dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); return -EINVAL; } - -int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_fs_context *ctx, - struct nfs_fh *mntfh, - const char *dev_name) -{ - if (fs_type == &nfs_fs_type) - return nfs23_validate_mount_data(options, ctx, mntfh, dev_name); - return nfs4_validate_mount_data(options, ctx, dev_name); -} -#else -int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_fs_context *ctx, - struct nfs_fh *mntfh, - const char *dev_name) -{ - return nfs23_validate_mount_data(options, ctx, mntfh, dev_name); -} #endif -int nfs_validate_text_mount_data(void *options, - struct nfs_fs_context *ctx, - const char *dev_name) +/* + * Parse a monolithic block of data from sys_mount(). + */ +static int nfs_fs_context_parse_monolithic(struct fs_context *fc, + void *data) { - int port = 0; + if (fc->fs_type == &nfs_fs_type) + return nfs23_parse_monolithic(fc, data); + +#if IS_ENABLED(CONFIG_NFS_V4) + if (fc->fs_type == &nfs4_fs_type) + return nfs4_parse_monolithic(fc, data); +#endif + + dfprintk(MOUNT, "NFS: Unsupported monolithic data version\n"); + return -EINVAL; +} + +/* + * Validate the preparsed information in the config. + */ +static int nfs_fs_context_validate(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_subversion *nfs_mod; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; int max_namelen = PAGE_SIZE; int max_pathlen = NFS_MAXPATHLEN; - struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + int port = 0; + int ret; - if (nfs_parse_mount_options((char *)options, ctx) == 0) - return -EINVAL; + if (!fc->source) + goto out_no_device_name; + + /* Check for sanity first. */ + if (ctx->minorversion && ctx->version != 4) + goto out_minorversion_mismatch; + + if (ctx->options & NFS_OPTION_MIGRATION && + (ctx->version != 4 || ctx->minorversion != 0)) + goto out_migration_misuse; + + /* Verify that any proto=/mountproto= options match the address + * families in the addr=/mountaddr= options. + */ + if (ctx->protofamily != AF_UNSPEC && + ctx->protofamily != ctx->nfs_server.address.sa_family) + goto out_proto_mismatch; + + if (ctx->mountfamily != AF_UNSPEC) { + if (ctx->mount_server.addrlen) { + if (ctx->mountfamily != ctx->mount_server.address.sa_family) + goto out_mountproto_mismatch; + } else { + if (ctx->mountfamily != ctx->nfs_server.address.sa_family) + goto out_mountproto_mismatch; + } + } if (!nfs_verify_server_address(sap)) goto out_no_address; @@ -1320,8 +1273,24 @@ int nfs_validate_text_mount_data(void *options, nfs_set_port(sap, &ctx->nfs_server.port, port); - return nfs_parse_devname(ctx, dev_name, max_namelen, max_pathlen); + ret = nfs_parse_devname(ctx, fc->source, max_namelen, max_pathlen); + if (ret < 0) + return ret; + /* Load the NFS protocol module if we haven't done so yet */ + if (!ctx->mount_info.nfs_mod) { + nfs_mod = get_nfs_version(ctx->version); + if (IS_ERR(nfs_mod)) { + ret = PTR_ERR(nfs_mod); + goto out_version_unavailable; + } + ctx->mount_info.nfs_mod = nfs_mod; + } + return 0; + +out_no_device_name: + dfprintk(MOUNT, "NFS: Device name not specified\n"); + return -EINVAL; #if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); @@ -1331,8 +1300,201 @@ out_invalid_transport_udp: dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); return -EINVAL; #endif /* !CONFIG_NFS_V4 */ - out_no_address: dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); return -EINVAL; +out_mountproto_mismatch: + dfprintk(MOUNT, "NFS: Mount server address does not match mountproto= option\n"); + return -EINVAL; +out_proto_mismatch: + dfprintk(MOUNT, "NFS: Server address does not match proto= option\n"); + return -EINVAL; +out_minorversion_mismatch: + dfprintk(MOUNT, "NFS: Mount option vers=%u does not support minorversion=%u\n", + ctx->version, ctx->minorversion); + return -EINVAL; +out_migration_misuse: + dfprintk(MOUNT, "NFS: 'Migration' not supported for this NFS version\n"); + return -EINVAL; +out_version_unavailable: + dfprintk(MOUNT, "NFS: Version unavailable\n"); + return ret; } + +/* + * Create an NFS superblock by the appropriate method. + */ +static int nfs_get_tree(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err = nfs_fs_context_validate(fc); + + if (err) + return err; + if (!ctx->internal) + return ctx->mount_info.nfs_mod->rpc_ops->try_get_tree(fc); + else + return nfs_get_tree_common(fc); +} + +/* + * Handle duplication of a configuration. The caller copied *src into *sc, but + * it can't deal with resource pointers in the filesystem context, so we have + * to do that. We need to clear pointers, copy data or get extra refs as + * appropriate. + */ +static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) +{ + struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx; + + ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->mount_info.mntfh = nfs_alloc_fhandle(); + if (!ctx->mount_info.mntfh) { + kfree(ctx); + return -ENOMEM; + } + nfs_copy_fh(ctx->mount_info.mntfh, src->mount_info.mntfh); + + __module_get(ctx->mount_info.nfs_mod->owner); + ctx->client_address = NULL; + ctx->mount_server.hostname = NULL; + ctx->nfs_server.export_path = NULL; + ctx->nfs_server.hostname = NULL; + ctx->fscache_uniq = NULL; + ctx->clone_data.addr = NULL; + ctx->clone_data.fattr = NULL; + fc->fs_private = ctx; + return 0; +} + +static void nfs_fs_context_free(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + if (ctx) { + if (ctx->mount_info.server) + nfs_free_server(ctx->mount_info.server); + if (ctx->mount_info.nfs_mod) + put_nfs_version(ctx->mount_info.nfs_mod); + kfree(ctx->client_address); + kfree(ctx->mount_server.hostname); + kfree(ctx->nfs_server.export_path); + kfree(ctx->nfs_server.hostname); + kfree(ctx->fscache_uniq); + nfs_free_fhandle(ctx->mount_info.mntfh); + kfree(ctx->clone_data.addr); + nfs_free_fattr(ctx->clone_data.fattr); + kfree(ctx); + } +} + +static const struct fs_context_operations nfs_fs_context_ops = { + .free = nfs_fs_context_free, + .dup = nfs_fs_context_dup, + .parse_param = nfs_fs_context_parse_param, + .parse_monolithic = nfs_fs_context_parse_monolithic, + .get_tree = nfs_get_tree, + .reconfigure = nfs_reconfigure, +}; + +/* + * Prepare superblock configuration. We use the namespaces attached to the + * context. This may be the current process's namespaces, or it may be a + * container's namespaces. + */ +static int nfs_init_fs_context(struct fs_context *fc) +{ + struct nfs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL); + if (unlikely(!ctx)) + return -ENOMEM; + + ctx->mount_info.ctx = ctx; + ctx->mount_info.mntfh = nfs_alloc_fhandle(); + if (unlikely(!ctx->mount_info.mntfh)) { + kfree(ctx); + return -ENOMEM; + } + + ctx->protofamily = AF_UNSPEC; + ctx->mountfamily = AF_UNSPEC; + ctx->mount_server.port = NFS_UNSPEC_PORT; + + if (fc->root) { + /* reconfigure, start with the current config */ + struct nfs_server *nfss = fc->root->d_sb->s_fs_info; + struct net *net = nfss->nfs_client->cl_net; + + ctx->flags = nfss->flags; + ctx->rsize = nfss->rsize; + ctx->wsize = nfss->wsize; + ctx->retrans = nfss->client->cl_timeout->to_retries; + ctx->selected_flavor = nfss->client->cl_auth->au_flavor; + ctx->acregmin = nfss->acregmin / HZ; + ctx->acregmax = nfss->acregmax / HZ; + ctx->acdirmin = nfss->acdirmin / HZ; + ctx->acdirmax = nfss->acdirmax / HZ; + ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + ctx->nfs_server.port = nfss->port; + ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + ctx->version = nfss->nfs_client->rpc_ops->version; + ctx->minorversion = nfss->nfs_client->cl_minorversion; + + memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr, + ctx->nfs_server.addrlen); + + if (fc->net_ns != net) { + put_net(fc->net_ns); + fc->net_ns = get_net(net); + } + + ctx->mount_info.nfs_mod = nfss->nfs_client->cl_nfs_mod; + __module_get(ctx->mount_info.nfs_mod->owner); + } else { + /* defaults */ + ctx->timeo = NFS_UNSPEC_TIMEO; + ctx->retrans = NFS_UNSPEC_RETRANS; + ctx->acregmin = NFS_DEF_ACREGMIN; + ctx->acregmax = NFS_DEF_ACREGMAX; + ctx->acdirmin = NFS_DEF_ACDIRMIN; + ctx->acdirmax = NFS_DEF_ACDIRMAX; + ctx->nfs_server.port = NFS_UNSPEC_PORT; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; + ctx->minorversion = 0; + ctx->need_mount = true; + } + ctx->net = fc->net_ns; + fc->fs_private = ctx; + fc->ops = &nfs_fs_context_ops; + return 0; +} + +struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .init_fs_context = nfs_init_fs_context, + .parameters = &nfs_fs_parameters, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, +}; +MODULE_ALIAS_FS("nfs"); +EXPORT_SYMBOL_GPL(nfs_fs_type); + +#if IS_ENABLED(CONFIG_NFS_V4) +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .init_fs_context = nfs_init_fs_context, + .parameters = &nfs_fs_parameters, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, +}; +MODULE_ALIAS_FS("nfs4"); +MODULE_ALIAS("nfs4"); +EXPORT_SYMBOL_GPL(nfs4_fs_type); +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 003c2b8eb1e6..1cd09df9e0b5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -4,7 +4,7 @@ */ #include "nfs4_fs.h" -#include +#include #include #include #include @@ -16,6 +16,7 @@ extern const struct export_operations nfs_export_ops; struct nfs_string; +struct nfs_pageio_descriptor; static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { @@ -34,12 +35,13 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) struct nfs_clone_mount { const struct super_block *sb; - const struct dentry *dentry; + struct dentry *dentry; char *hostname; char *mnt_path; struct sockaddr *addr; size_t addrlen; rpc_authflavor_t authflavor; + struct nfs_fattr *fattr; }; /* @@ -78,10 +80,23 @@ struct nfs_client_initdata { const struct cred *cred; }; +struct nfs_mount_info { + unsigned int inherited_bsize; + struct nfs_fs_context *ctx; + struct nfs_clone_mount *cloned; + struct nfs_server *server; + struct nfs_fh *mntfh; + struct nfs_subversion *nfs_mod; +}; + /* * In-kernel mount arguments */ struct nfs_fs_context { + bool internal; + bool skip_reconfig_option_check; + bool need_mount; + bool sloppy; unsigned int flags; /* NFS{,4}_MOUNT_* flags */ unsigned int rsize, wsize; unsigned int timeo, retrans; @@ -98,8 +113,6 @@ struct nfs_fs_context { char *fscache_uniq; unsigned short protofamily; unsigned short mountfamily; - bool need_mount; - bool sloppy; struct { union { @@ -124,14 +137,23 @@ struct nfs_fs_context { int port; unsigned short protocol; unsigned short nconnect; + unsigned short export_path_len; } nfs_server; void *lsm_opts; struct net *net; char buf[32]; /* Parse buffer */ + + struct nfs_mount_info mount_info; + struct nfs_clone_mount clone_data; }; +static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) +{ + return fc->fs_private; +} + /* mount_clnt.c */ struct nfs_mount_request { struct sockaddr *sap; @@ -147,15 +169,6 @@ struct nfs_mount_request { struct net *net; }; -struct nfs_mount_info { - unsigned int inherited_bsize; - struct nfs_fs_context *ctx; - struct nfs_clone_mount *cloned; - struct nfs_server *server; - struct nfs_fh *mntfh; - struct nfs_subversion *nfs_mod; -}; - extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); @@ -235,22 +248,8 @@ static inline void nfs_fs_proc_exit(void) extern const struct svc_version nfs4_callback_version1; extern const struct svc_version nfs4_callback_version4; -struct nfs_pageio_descriptor; - -/* mount.c */ -#define NFS_TEXT_DATA 1 - -extern struct nfs_fs_context *nfs_alloc_parsed_mount_data(void); -extern void nfs_free_parsed_mount_data(struct nfs_fs_context *ctx); -extern int nfs_parse_mount_options(char *raw, struct nfs_fs_context *ctx); -extern int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_fs_context *ctx, - struct nfs_fh *mntfh, - const char *dev_name); -extern int nfs_validate_text_mount_data(void *options, - struct nfs_fs_context *ctx, - const char *dev_name); +/* fs_context.c */ +extern struct file_system_type nfs_fs_type; /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); @@ -411,14 +410,9 @@ extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; -extern struct file_system_type nfs_fs_type; -extern struct file_system_type nfs_prepared_fs_type; -#if IS_ENABLED(CONFIG_NFS_V4) -extern struct file_system_type nfs4_referral_fs_type; -#endif bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); -struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); +int nfs_try_get_tree(struct fs_context *); +int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); extern struct rpc_stat nfs_rpcstat; @@ -446,10 +440,8 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); -struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); -struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, - struct nfs_fattr *, rpc_authflavor_t); +int nfs_submount(struct fs_context *, struct nfs_server *); +int nfs_do_submount(struct fs_context *); /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, @@ -476,7 +468,7 @@ int nfs_show_options(struct seq_file *, struct dentry *); int nfs_show_devname(struct seq_file *, struct dentry *); int nfs_show_path(struct seq_file *, struct dentry *); int nfs_show_stats(struct seq_file *, struct dentry *); -int nfs_remount(struct super_block *sb, int *flags, char *raw_data); +int nfs_reconfigure(struct fs_context *); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bfe607374feb..9b344fcd23b0 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -140,34 +140,65 @@ EXPORT_SYMBOL_GPL(nfs_path); */ struct vfsmount *nfs_d_automount(struct path *path) { - struct vfsmount *mnt; + struct nfs_fs_context *ctx; + struct fs_context *fc; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct nfs_server *server = NFS_SERVER(d_inode(path->dentry)); - struct nfs_fh *fh = NULL; - struct nfs_fattr *fattr = NULL; + struct nfs_client *client = server->nfs_client; + int ret; if (IS_ROOT(path->dentry)) return ERR_PTR(-ESTALE); - mnt = ERR_PTR(-ENOMEM); - fh = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - if (fh == NULL || fattr == NULL) - goto out; + /* Open a new filesystem context, transferring parameters from the + * parent superblock, including the network namespace. + */ + fc = fs_context_for_submount(&nfs_fs_type, path->dentry); + if (IS_ERR(fc)) + return ERR_CAST(fc); - mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); + ctx = nfs_fc2context(fc); + ctx->clone_data.dentry = path->dentry; + ctx->clone_data.sb = path->dentry->d_sb; + ctx->clone_data.fattr = nfs_alloc_fattr(); + if (!ctx->clone_data.fattr) + goto out_fc; + + if (fc->net_ns != client->cl_net) { + put_net(fc->net_ns); + fc->net_ns = get_net(client->cl_net); + } + + /* for submounts we want the same server; referrals will reassign */ + memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen); + ctx->nfs_server.addrlen = client->cl_addrlen; + ctx->nfs_server.port = server->port; + + ctx->version = client->rpc_ops->version; + ctx->minorversion = client->cl_minorversion; + ctx->mount_info.nfs_mod = client->cl_nfs_mod; + __module_get(ctx->mount_info.nfs_mod->owner); + + ret = client->rpc_ops->submount(fc, server); + if (ret < 0) { + mnt = ERR_PTR(ret); + goto out_fc; + } + + up_write(&fc->root->d_sb->s_umount); + mnt = vfs_create_mount(fc); if (IS_ERR(mnt)) - goto out; + goto out_fc; if (nfs_mountpoint_expiry_timeout < 0) - goto out; + goto out_fc; mntget(mnt); /* prevent immediate expiration */ mnt_set_expiry(mnt, &nfs_automount_list); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); -out: - nfs_free_fattr(fattr); - nfs_free_fhandle(fh); +out_fc: + put_fs_context(fc); return mnt; } @@ -222,61 +253,62 @@ void nfs_release_automount_timer(void) * @authflavor: security flavor to use when performing the mount * */ -struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr, rpc_authflavor_t authflavor) +int nfs_do_submount(struct fs_context *fc) { - struct super_block *sb = dentry->d_sb; - struct nfs_clone_mount mountdata = { - .sb = sb, - .dentry = dentry, - .authflavor = authflavor, - }; - struct nfs_mount_info mount_info = { - .inherited_bsize = sb->s_blocksize_bits, - .cloned = &mountdata, - .mntfh = fh, - .nfs_mod = NFS_SB(sb)->nfs_client->cl_nfs_mod, - }; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct nfs_server *server; - struct vfsmount *mnt; - char *page = (char *) __get_free_page(GFP_USER); - char *devname; + char *buffer, *p; + int ret; - if (page == NULL) - return ERR_PTR(-ENOMEM); + /* create a new volume representation */ + server = ctx->mount_info.nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb), + ctx->mount_info.mntfh, + ctx->clone_data.fattr, + ctx->selected_flavor); - server = mount_info.nfs_mod->rpc_ops->clone_server(NFS_SB(sb), fh, - fattr, authflavor); if (IS_ERR(server)) - return ERR_CAST(server); + return PTR_ERR(server); - mount_info.server = server; + ctx->mount_info.server = server; - devname = nfs_devname(dentry, page, PAGE_SIZE); - if (IS_ERR(devname)) - mnt = ERR_CAST(devname); - else - mnt = vfs_submount(dentry, &nfs_prepared_fs_type, devname, &mount_info); + buffer = kmalloc(4096, GFP_USER); + if (!buffer) + return -ENOMEM; - if (mount_info.server) - nfs_free_server(mount_info.server); - free_page((unsigned long)page); - return mnt; + ctx->internal = true; + ctx->mount_info.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits; + + p = nfs_devname(dentry, buffer, 4096); + if (IS_ERR(p)) { + dprintk("NFS: Couldn't determine submount pathname\n"); + ret = PTR_ERR(p); + } else { + ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p); + if (!ret) + ret = vfs_get_tree(fc); + } + kfree(buffer); + return ret; } EXPORT_SYMBOL_GPL(nfs_do_submount); -struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) +int nfs_submount(struct fs_context *fc, struct nfs_server *server) { - int err; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); + int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL); + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, + ctx->mount_info.mntfh, ctx->clone_data.fattr, + NULL); dput(parent); if (err != 0) - return ERR_PTR(err); + return err; - return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); + ctx->selected_flavor = server->client->cl_auth->au_flavor; + return nfs_do_submount(fc); } EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9eb2f1a503ab..657041c3a03f 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -990,7 +990,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .nlmclnt_ops = &nlmclnt_fl_close_lock_ops, .getroot = nfs3_proc_get_root, .submount = nfs_submount, - .try_mount = nfs_try_mount, + .try_get_tree = nfs_try_get_tree, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5d539dce9cef..9c136d53987d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -268,14 +268,13 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t); -/* super.c */ +/* fs_context.c */ extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, const struct qstr *); -struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); +int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); @@ -526,7 +525,6 @@ extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; extern struct nfs_subversion nfs_v4; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short max_session_cb_slots; @@ -536,6 +534,9 @@ extern bool recover_lost_locks; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; +extern int nfs4_try_get_tree(struct fs_context *); +extern int nfs4_get_referral_tree(struct fs_context *); + /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 620de905cba9..be4eb720d5b6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "delegation.h" #include "internal.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2e460c33ae48..a1a0c4c53ce1 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -8,6 +8,7 @@ * NFSv4 namespace */ +#include #include #include #include @@ -21,37 +22,64 @@ #include #include "internal.h" #include "nfs4_fs.h" +#include "nfs.h" #include "dns_resolve.h" #define NFSDBG_FACILITY NFSDBG_VFS /* - * Convert the NFSv4 pathname components into a standard posix path. - * - * Note that the resulting string will be placed at the end of the buffer + * Work out the length that an NFSv4 path would render to as a standard posix + * path, with a leading slash but no terminating slash. */ -static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, - char *buffer, ssize_t buflen) +static ssize_t nfs4_pathname_len(const struct nfs4_pathname *pathname) { - char *end = buffer + buflen; - int n; + ssize_t len = 0; + int i; - *--end = '\0'; - buflen--; + for (i = 0; i < pathname->ncomponents; i++) { + const struct nfs4_string *component = &pathname->components[i]; - n = pathname->ncomponents; - while (--n >= 0) { - const struct nfs4_string *component = &pathname->components[n]; - buflen -= component->len + 1; - if (buflen < 0) - goto Elong; - end -= component->len; - memcpy(end, component->data, component->len); - *--end = '/'; + if (component->len > NAME_MAX) + goto too_long; + len += 1 + component->len; /* Adding "/foo" */ + if (len > PATH_MAX) + goto too_long; } - return end; -Elong: - return ERR_PTR(-ENAMETOOLONG); + return len; + +too_long: + return -ENAMETOOLONG; +} + +/* + * Convert the NFSv4 pathname components into a standard posix path. + */ +static char *nfs4_pathname_string(const struct nfs4_pathname *pathname, + unsigned short *_len) +{ + ssize_t len; + char *buf, *p; + int i; + + len = nfs4_pathname_len(pathname); + if (len < 0) + return ERR_PTR(len); + *_len = len; + + p = buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < pathname->ncomponents; i++) { + const struct nfs4_string *component = &pathname->components[i]; + + *p++ = '/'; + memcpy(p, component->data, component->len); + p += component->len; + } + + *p = 0; + return buf; } /* @@ -100,21 +128,32 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) */ static int nfs4_validate_fspath(struct dentry *dentry, const struct nfs4_fs_locations *locations, - char *page, char *page2) + struct nfs_fs_context *ctx) { const char *path, *fs_path; + char *buf; + unsigned short len; + int n; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) + buf = kmalloc(4096, GFP_KERNEL); + path = nfs4_path(dentry, buf, 4096); + if (IS_ERR(path)) { + kfree(buf); return PTR_ERR(path); + } - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) + fs_path = nfs4_pathname_string(&locations->fs_path, &len); + if (IS_ERR(fs_path)) { + kfree(buf); return PTR_ERR(fs_path); + } - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + n = strncmp(path, fs_path, len); + kfree(buf); + kfree(fs_path); + if (n != 0) { dprintk("%s: path %s does not begin with fsroot %s\n", - __func__, path, fs_path); + __func__, path, ctx->nfs_server.export_path); return -ENOENT; } @@ -236,55 +275,83 @@ out: return new; } -static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, - char *page, char *page2, - const struct nfs4_fs_location *location) +static int try_location(struct fs_context *fc, + const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); - struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); - struct vfsmount *mnt = ERR_PTR(-ENOENT); - char *mnt_path; - unsigned int maxbuflen; - unsigned int s; - - mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); - if (IS_ERR(mnt_path)) - return ERR_CAST(mnt_path); - mountdata->mnt_path = mnt_path; - maxbuflen = mnt_path - 1 - page2; - - mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL); - if (mountdata->addr == NULL) - return ERR_PTR(-ENOMEM); + struct nfs_fs_context *ctx = nfs_fc2context(fc); + unsigned int len, s; + char *export_path, *source, *p; + int ret = -ENOENT; + /* Allocate a buffer big enough to hold any of the hostnames plus a + * terminating char and also a buffer big enough to hold the hostname + * plus a colon plus the path. + */ + len = 0; for (s = 0; s < location->nservers; s++) { const struct nfs4_string *buf = &location->servers[s]; + if (buf->len > len) + len = buf->len; + } - if (buf->len <= 0 || buf->len >= maxbuflen) - continue; + kfree(ctx->nfs_server.hostname); + ctx->nfs_server.hostname = kmalloc(len + 1, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + return -ENOMEM; + + export_path = nfs4_pathname_string(&location->rootpath, + &ctx->nfs_server.export_path_len); + if (IS_ERR(export_path)) + return PTR_ERR(export_path); + + ctx->nfs_server.export_path = export_path; + + source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1, + GFP_KERNEL); + if (!source) + return -ENOMEM; + + kfree(fc->source); + fc->source = source; + + ctx->clone_data.addr = kmalloc(addr_bufsize, GFP_KERNEL); + if (ctx->clone_data.addr == NULL) + return -ENOMEM; + for (s = 0; s < location->nservers; s++) { + const struct nfs4_string *buf = &location->servers[s]; if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) continue; - mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, net); - if (mountdata->addrlen == 0) + ctx->clone_data.addrlen = + nfs_parse_server_name(buf->data, buf->len, + ctx->clone_data.addr, + addr_bufsize, + fc->net_ns); + if (ctx->clone_data.addrlen == 0) continue; - memcpy(page2, buf->data, buf->len); - page2[buf->len] = '\0'; - mountdata->hostname = page2; + rpc_set_port(ctx->clone_data.addr, NFS_PORT); - snprintf(page, PAGE_SIZE, "%s:%s", - mountdata->hostname, - mountdata->mnt_path); + memcpy(ctx->nfs_server.hostname, buf->data, buf->len); + ctx->nfs_server.hostname[buf->len] = '\0'; + ctx->clone_data.hostname = ctx->nfs_server.hostname; - mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); - if (!IS_ERR(mnt)) - break; + p = source; + memcpy(p, buf->data, buf->len); + p += buf->len; + *p++ = ':'; + memcpy(p, ctx->nfs_server.export_path, ctx->nfs_server.export_path_len); + p += ctx->nfs_server.export_path_len; + *p = 0; + + ret = nfs4_get_referral_tree(fc); + if (ret == 0) + return 0; } - kfree(mountdata->addr); - return mnt; + + return ret; } /** @@ -293,38 +360,23 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, * @locations: array of NFSv4 server location information * */ -static struct vfsmount *nfs_follow_referral(struct dentry *dentry, - const struct nfs4_fs_locations *locations) +static int nfs_follow_referral(struct fs_context *fc, + const struct nfs4_fs_locations *locations) { - struct vfsmount *mnt = ERR_PTR(-ENOENT); - struct nfs_clone_mount mountdata = { - .sb = dentry->d_sb, - .dentry = dentry, - .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor, - }; - char *page = NULL, *page2 = NULL; + struct nfs_fs_context *ctx = nfs_fc2context(fc); int loc, error; if (locations == NULL || locations->nlocations <= 0) - goto out; + return -ENOENT; - dprintk("%s: referral at %pd2\n", __func__, dentry); - - page = (char *) __get_free_page(GFP_USER); - if (!page) - goto out; - - page2 = (char *) __get_free_page(GFP_USER); - if (!page2) - goto out; + dprintk("%s: referral at %pd2\n", __func__, ctx->clone_data.dentry); /* Ensure fs path is a prefix of current dentry path */ - error = nfs4_validate_fspath(dentry, locations, page, page2); - if (error < 0) { - mnt = ERR_PTR(error); - goto out; - } + error = nfs4_validate_fspath(ctx->clone_data.dentry, locations, ctx); + if (error < 0) + return error; + error = -ENOENT; for (loc = 0; loc < locations->nlocations; loc++) { const struct nfs4_fs_location *location = &locations->locations[loc]; @@ -332,15 +384,12 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, location->rootpath.ncomponents == 0) continue; - mnt = try_location(&mountdata, page, page2, location); - if (!IS_ERR(mnt)) - break; + error = try_location(fc, location); + if (error == 0) + return 0; } -out: - free_page((unsigned long) page); - free_page((unsigned long) page2); - return mnt; + return error; } /* @@ -348,71 +397,73 @@ out: * @dentry - dentry of referral * */ -static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) +static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) { - struct vfsmount *mnt = ERR_PTR(-ENOMEM); - struct dentry *parent; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry, *parent; struct nfs4_fs_locations *fs_locations = NULL; struct page *page; - int err; + int err = -ENOMEM; /* BUG_ON(IS_ROOT(dentry)); */ page = alloc_page(GFP_KERNEL); - if (page == NULL) - return mnt; + if (!page) + return -ENOMEM; fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (fs_locations == NULL) + if (!fs_locations) goto out_free; /* Get locations */ - mnt = ERR_PTR(-ENOENT); - + dentry = ctx->clone_data.dentry; parent = dget_parent(dentry); dprintk("%s: getting locations for %pd2\n", __func__, dentry); err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page); dput(parent); - if (err != 0 || - fs_locations->nlocations <= 0 || - fs_locations->fs_path.ncomponents <= 0) - goto out_free; + if (err != 0) + goto out_free_2; - mnt = nfs_follow_referral(dentry, fs_locations); + err = -ENOENT; + if (fs_locations->nlocations <= 0 || + fs_locations->fs_path.ncomponents <= 0) + goto out_free_2; + + err = nfs_follow_referral(fc, fs_locations); +out_free_2: + kfree(fs_locations); out_free: __free_page(page); - kfree(fs_locations); - return mnt; + return err; } -struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) +int nfs4_submount(struct fs_context *fc, struct nfs_server *server) { - rpc_authflavor_t flavor = server->client->cl_auth->au_flavor; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); const struct qstr *name = &dentry->d_name; struct rpc_clnt *client; - struct vfsmount *mnt; + int ret; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); + client = nfs4_proc_lookup_mountpoint(dir, name, ctx->mount_info.mntfh, + ctx->clone_data.fattr); dput(parent); if (IS_ERR(client)) - return ERR_CAST(client); + return PTR_ERR(client); - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - mnt = nfs_do_refmount(client, dentry); - goto out; + ctx->selected_flavor = client->cl_auth->au_flavor; + if (ctx->clone_data.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + ret = nfs_do_refmount(fc, client); + } else { + ret = nfs_do_submount(fc); } - if (client->cl_auth->au_flavor != flavor) - flavor = client->cl_auth->au_flavor; - mnt = nfs_do_submount(dentry, fh, fattr, flavor); -out: rpc_shutdown_client(client); - return mnt; + return ret; } /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 76d37161409a..5f51f62b0652 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10001,7 +10001,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, - .try_mount = nfs4_try_mount, + .try_get_tree = nfs4_try_get_tree, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index f931e8f49b05..469726410c5c 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -4,6 +4,7 @@ */ #include #include +#include #include #include #include "delegation.h" @@ -18,16 +19,6 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); static void nfs4_evict_inode(struct inode *inode); -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -struct file_system_type nfs4_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, @@ -41,7 +32,6 @@ static const struct super_operations nfs4_sops = { .show_devname = nfs_show_devname, .show_path = nfs_show_path, .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, }; struct nfs_subversion nfs_v4 = { @@ -147,102 +137,121 @@ static void nfs_referral_loop_unprotect(void) kfree(p); } -static struct dentry *do_nfs4_mount(struct nfs_server *server, int flags, - struct nfs_mount_info *info, - const char *hostname, - const char *export_path) +static int do_nfs4_mount(struct nfs_server *server, + struct fs_context *fc, + const char *hostname, + const char *export_path) { + struct nfs_fs_context *root_ctx; + struct fs_context *root_fc; struct vfsmount *root_mnt; struct dentry *dentry; - char *root_devname; - int err; size_t len; + int ret; + + struct fs_parameter param = { + .key = "source", + .type = fs_value_is_string, + .dirfd = -1, + }; if (IS_ERR(server)) - return ERR_CAST(server); + return PTR_ERR(server); + + root_fc = vfs_dup_fs_context(fc); + if (IS_ERR(root_fc)) { + nfs_free_server(server); + return PTR_ERR(root_fc); + } + kfree(root_fc->source); + root_fc->source = NULL; + + root_ctx = nfs_fc2context(root_fc); + root_ctx->internal = true; + root_ctx->mount_info.server = server; + /* We leave export_path unset as it's not used to find the root. */ len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) { - nfs_free_server(server); - return ERR_PTR(-ENOMEM); + param.string = kmalloc(len, GFP_KERNEL); + if (param.string == NULL) { + put_fs_context(root_fc); + return -ENOMEM; } /* Does hostname needs to be enclosed in brackets? */ if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); + param.size = snprintf(param.string, len, "[%s]:/", hostname); else - snprintf(root_devname, len, "%s:/", hostname); - info->server = server; - root_mnt = vfs_kern_mount(&nfs_prepared_fs_type, flags, root_devname, info); - if (info->server) - nfs_free_server(info->server); - info->server = NULL; - kfree(root_devname); + param.size = snprintf(param.string, len, "%s:/", hostname); + ret = vfs_parse_fs_param(root_fc, ¶m); + kfree(param.string); + if (ret < 0) { + put_fs_context(root_fc); + return ret; + } + root_mnt = fc_mount(root_fc); + put_fs_context(root_fc); if (IS_ERR(root_mnt)) - return ERR_CAST(root_mnt); + return PTR_ERR(root_mnt); - err = nfs_referral_loop_protect(); - if (err) { + ret = nfs_referral_loop_protect(); + if (ret) { mntput(root_mnt); - return ERR_PTR(err); + return ret; } dentry = mount_subtree(root_mnt, export_path); nfs_referral_loop_unprotect(); - return dentry; + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + fc->root = dentry; + return 0; } -struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +int nfs4_try_get_tree(struct fs_context *fc) { - struct nfs_fs_context *ctx = mount_info->ctx; - struct dentry *res; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err; - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + dfprintk(MOUNT, "--> nfs4_try_get_tree()\n"); - res = do_nfs4_mount(nfs4_create_server(mount_info), - flags, mount_info, - ctx->nfs_server.hostname, - ctx->nfs_server.export_path); - - dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); - return res; + /* We create a mount for the server's root, walk to the requested + * location and then create another mount for that. + */ + err= do_nfs4_mount(nfs4_create_server(&ctx->mount_info), + fc, ctx->nfs_server.hostname, + ctx->nfs_server.export_path); + if (err) { + dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); + } else { + dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); + } + return err; } /* * Create an NFS4 server record on referral traversal */ -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +int nfs4_get_referral_tree(struct fs_context *fc) { - struct nfs_clone_mount *data = raw_data; - struct nfs_mount_info mount_info = { - .cloned = data, - .nfs_mod = &nfs_v4, - }; - struct dentry *res; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err; dprintk("--> nfs4_referral_mount()\n"); - mount_info.mntfh = nfs_alloc_fhandle(); - if (!mount_info.mntfh) - return ERR_PTR(-ENOMEM); - - res = do_nfs4_mount(nfs4_create_referral_server(mount_info.cloned, - mount_info.mntfh), - flags, &mount_info, data->hostname, data->mnt_path); - - dprintk("<-- nfs4_referral_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); - - nfs_free_fhandle(mount_info.mntfh); - return res; + /* create a new volume representation */ + err = do_nfs4_mount(nfs4_create_referral_server(&ctx->clone_data, ctx->mount_info.mntfh), + fc, ctx->nfs_server.hostname, + ctx->nfs_server.export_path); + if (err) { + dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); + } else { + dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); + } + return err; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0f7288b94633..44a15523bf40 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -710,7 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, - .try_mount = nfs_try_mount, + .try_get_tree = nfs_try_get_tree, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6c9573a32a69..6ff99da978a8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -70,28 +70,6 @@ #define NFSDBG_FACILITY NFSDBG_VFS -static struct dentry *nfs_prepared_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -struct file_system_type nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("nfs"); -EXPORT_SYMBOL_GPL(nfs_fs_type); - -struct file_system_type nfs_prepared_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .mount = nfs_prepared_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -EXPORT_SYMBOL_GPL(nfs_prepared_fs_type); - const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .free_inode = nfs_free_inode, @@ -104,22 +82,10 @@ const struct super_operations nfs_sops = { .show_devname = nfs_show_devname, .show_path = nfs_show_path, .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, }; EXPORT_SYMBOL_GPL(nfs_sops); #if IS_ENABLED(CONFIG_NFS_V4) -struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("nfs4"); -MODULE_ALIAS("nfs4"); -EXPORT_SYMBOL_GPL(nfs4_fs_type); - static int __init register_nfs4_fs(void) { return register_filesystem(&nfs4_fs_type); @@ -911,20 +877,19 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf return nfs_mod->rpc_ops->create_server(mount_info); } -static struct dentry *nfs_fs_mount_common(int, const char *, struct nfs_mount_info *); - -struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +int nfs_try_get_tree(struct fs_context *fc) { - struct nfs_subversion *nfs_mod = mount_info->nfs_mod; - if (mount_info->ctx->need_mount) - mount_info->server = nfs_try_mount_request(mount_info); - else - mount_info->server = nfs_mod->rpc_ops->create_server(mount_info); + struct nfs_fs_context *ctx = nfs_fc2context(fc); - return nfs_fs_mount_common(flags, dev_name, mount_info); + if (ctx->need_mount) + ctx->mount_info.server = nfs_try_mount_request(&ctx->mount_info); + else + ctx->mount_info.server = ctx->mount_info.nfs_mod->rpc_ops->create_server(&ctx->mount_info); + + return nfs_get_tree_common(fc); } -EXPORT_SYMBOL_GPL(nfs_try_mount); +EXPORT_SYMBOL_GPL(nfs_try_get_tree); + #define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | NFS_MOUNT_SECURE \ @@ -965,15 +930,11 @@ nfs_compare_remount_data(struct nfs_server *nfss, return 0; } -int -nfs_remount(struct super_block *sb, int *flags, char *raw_data) +int nfs_reconfigure(struct fs_context *fc) { - int error; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct super_block *sb = fc->root->d_sb; struct nfs_server *nfss = sb->s_fs_info; - struct nfs_fs_context *ctx; - struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; - struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; - u32 nfsvers = nfss->nfs_client->rpc_ops->version; sync_filesystem(sb); @@ -983,57 +944,24 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) * ones were explicitly specified. Fall back to legacy behavior and * just return success. */ - if ((nfsvers == 4 && (!options4 || options4->version == 1)) || - (nfsvers <= 3 && (!options || (options->version >= 1 && - options->version <= 6)))) + if (ctx->skip_reconfig_option_check) return 0; - ctx = nfs_alloc_parsed_mount_data(); - if (ctx == NULL) - return -ENOMEM; - - /* fill out struct with values from existing mount */ - ctx->flags = nfss->flags; - ctx->rsize = nfss->rsize; - ctx->wsize = nfss->wsize; - ctx->retrans = nfss->client->cl_timeout->to_retries; - ctx->selected_flavor = nfss->client->cl_auth->au_flavor; - ctx->acregmin = nfss->acregmin / HZ; - ctx->acregmax = nfss->acregmax / HZ; - ctx->acdirmin = nfss->acdirmin / HZ; - ctx->acdirmax = nfss->acdirmax / HZ; - ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; - ctx->nfs_server.port = nfss->port; - ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; - ctx->version = nfsvers; - ctx->minorversion = nfss->nfs_client->cl_minorversion; - ctx->net = current->nsproxy->net_ns; - memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr, - ctx->nfs_server.addrlen); - - /* overwrite those values with any that were specified */ - error = -EINVAL; - if (!nfs_parse_mount_options((char *)options, ctx)) - goto out; - /* * noac is a special case. It implies -o sync, but that's not - * necessarily reflected in the mtab options. do_remount_sb + * necessarily reflected in the mtab options. reconfigure_super * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the * remount options, so we have to explicitly reset it. */ - if (ctx->flags & NFS_MOUNT_NOAC) - *flags |= SB_SYNCHRONOUS; + if (ctx->flags & NFS_MOUNT_NOAC) { + fc->sb_flags |= SB_SYNCHRONOUS; + fc->sb_flags_mask |= SB_SYNCHRONOUS; + } /* compare new mount options with old ones */ - error = nfs_compare_remount_data(nfss, ctx); - if (!error) - error = security_sb_remount(sb, ctx->lsm_opts); -out: - nfs_free_parsed_mount_data(ctx); - return error; + return nfs_compare_remount_data(nfss, ctx); } -EXPORT_SYMBOL_GPL(nfs_remount); +EXPORT_SYMBOL_GPL(nfs_reconfigure); /* * Finish setting up an NFS superblock @@ -1112,19 +1040,11 @@ Ebusy: return 0; } -struct nfs_sb_mountdata { - struct nfs_server *server; - int mntflags; -}; - -static int nfs_set_super(struct super_block *s, void *data) +static int nfs_set_super(struct super_block *s, struct fs_context *fc) { - struct nfs_sb_mountdata *sb_mntdata = data; - struct nfs_server *server = sb_mntdata->server; + struct nfs_server *server = fc->s_fs_info; int ret; - s->s_flags = sb_mntdata->mntflags; - s->s_fs_info = server; s->s_d_op = server->nfs_client->rpc_ops->dentry_ops; ret = set_anon_super(s, server); if (ret == 0) @@ -1189,11 +1109,9 @@ static int nfs_compare_userns(const struct nfs_server *old, return 1; } -static int nfs_compare_super(struct super_block *sb, void *data) +static int nfs_compare_super(struct super_block *sb, struct fs_context *fc) { - struct nfs_sb_mountdata *sb_mntdata = data; - struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); - int mntflags = sb_mntdata->mntflags; + struct nfs_server *server = fc->s_fs_info, *old = NFS_SB(sb); if (!nfs_compare_super_address(old, server)) return 0; @@ -1204,13 +1122,12 @@ static int nfs_compare_super(struct super_block *sb, void *data) return 0; if (!nfs_compare_userns(old, server)) return 0; - return nfs_compare_mount_options(sb, server, mntflags); + return nfs_compare_mount_options(sb, server, fc->sb_flags); } #ifdef CONFIG_NFS_FSCACHE static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_fs_context *ctx, - struct nfs_clone_mount *cloned) + struct nfs_fs_context *ctx) { struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; @@ -1219,30 +1136,32 @@ static void nfs_get_cache_cookie(struct super_block *sb, nfss->fscache_key = NULL; nfss->fscache = NULL; - if (ctx) { - if (!(ctx->options & NFS_OPTION_FSCACHE)) - return; - if (ctx->fscache_uniq) { - uniq = ctx->fscache_uniq; - ulen = strlen(ctx->fscache_uniq); - } - } else if (cloned) { - struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!ctx) + return; + + if (ctx->clone_data.sb) { + struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb); if (!(mnt_s->options & NFS_OPTION_FSCACHE)) return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; } - } else + } else { + if (!(ctx->options & NFS_OPTION_FSCACHE)) + return; + if (ctx->fscache_uniq) { + uniq = ctx->fscache_uniq; + ulen = strlen(ctx->fscache_uniq); + } return; + } nfs_fscache_get_super_cookie(sb, uniq, ulen); } #else static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_fs_context *parsed, - struct nfs_clone_mount *cloned) + struct nfs_fs_context *ctx) { } #endif @@ -1254,40 +1173,41 @@ static void nfs_set_readahead(struct backing_dev_info *bdi, bdi->io_pages = iomax_pages; } -static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +int nfs_get_tree_common(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_server *server = mount_info->server; + int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super; + struct nfs_server *server = ctx->mount_info.server; unsigned long kflags = 0, kflags_out = 0; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, - .server = server, - }; int error; - mount_info->server = NULL; + ctx->mount_info.server = NULL; if (IS_ERR(server)) - return ERR_CAST(server); + return PTR_ERR(server); if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= SB_SYNCHRONOUS; + fc->sb_flags |= SB_SYNCHRONOUS; - if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) - if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS) - sb_mntdata.mntflags |= SB_SYNCHRONOUS; + if (ctx->clone_data.sb) + if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS) + fc->sb_flags |= SB_SYNCHRONOUS; + + if (server->caps & NFS_CAP_SECURITY_LABEL) + fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(mount_info->nfs_mod->nfs_fs, compare_super, nfs_set_super, - flags, &sb_mntdata); + fc->s_fs_info = server; + s = sget_fc(fc, compare_super, nfs_set_super); + fc->s_fs_info = NULL; if (IS_ERR(s)) { - mntroot = ERR_CAST(s); + error = PTR_ERR(s); + dfprintk(MOUNT, "NFS: Couldn't get superblock\n"); goto out_err_nosb; } @@ -1297,44 +1217,43 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, } else { error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - if (error) { - mntroot = ERR_PTR(error); + if (error) goto error_splat_super; - } nfs_set_readahead(s->s_bdi, server->rpages); server->super = s; } if (!s->s_root) { - unsigned bsize = mount_info->inherited_bsize; + unsigned bsize = ctx->mount_info.inherited_bsize; /* initial superblock/root creation */ - nfs_fill_super(s, mount_info); + nfs_fill_super(s, &ctx->mount_info); if (bsize) { s->s_blocksize_bits = bsize; s->s_blocksize = 1U << bsize; } - nfs_get_cache_cookie(s, mount_info->ctx, mount_info->cloned); - if (!(server->flags & NFS_MOUNT_UNSHARED)) - s->s_iflags |= SB_I_MULTIROOT; + nfs_get_cache_cookie(s, ctx); } - mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); - if (IS_ERR(mntroot)) + mntroot = nfs_get_root(s, ctx->mount_info.mntfh, fc->source); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + dfprintk(MOUNT, "NFS: Couldn't get root dentry\n"); goto error_splat_super; - + } + fc->root = mntroot; if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) kflags |= SECURITY_LSM_NATIVE_LABELS; - if (mount_info->cloned) { - if (d_inode(mntroot)->i_fop != &nfs_dir_operations) { + if (ctx->clone_data.sb) { + if (d_inode(fc->root)->i_fop != &nfs_dir_operations) { error = -ESTALE; goto error_splat_root; } /* clone any lsm security options from the parent to the new sb */ - error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags, + error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags, &kflags_out); } else { - error = security_sb_set_mnt_opts(s, mount_info->ctx->lsm_opts, + error = security_sb_set_mnt_opts(s, fc->security, kflags, &kflags_out); } if (error) @@ -1342,67 +1261,25 @@ static struct dentry *nfs_fs_mount_common(int flags, const char *dev_name, if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; - if (error) - goto error_splat_root; s->s_flags |= SB_ACTIVE; + error = 0; out: - return mntroot; + return error; out_err_nosb: nfs_free_server(server); goto out; error_splat_root: - dput(mntroot); - mntroot = ERR_PTR(error); + dput(fc->root); + fc->root = NULL; error_splat_super: deactivate_locked_super(s); goto out; } -struct dentry *nfs_fs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - }; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_subversion *nfs_mod; - int error; - - mount_info.ctx = nfs_alloc_parsed_mount_data(); - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.ctx == NULL || mount_info.mntfh == NULL) - goto out; - - /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, mount_info.ctx, mount_info.mntfh, dev_name); - if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, - mount_info.ctx, dev_name); - if (error < 0) { - mntroot = ERR_PTR(error); - goto out; - } - - nfs_mod = get_nfs_version(mount_info.ctx->version); - if (IS_ERR(nfs_mod)) { - mntroot = ERR_CAST(nfs_mod); - goto out; - } - mount_info.nfs_mod = nfs_mod; - - mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info); - - put_nfs_version(nfs_mod); -out: - nfs_free_parsed_mount_data(mount_info.ctx); - nfs_free_fhandle(mount_info.mntfh); - return mntroot; -} -EXPORT_SYMBOL_GPL(nfs_fs_mount); - /* * Destroy an NFS2/3 superblock */ @@ -1420,17 +1297,6 @@ void nfs_kill_super(struct super_block *s) } EXPORT_SYMBOL_GPL(nfs_kill_super); -/* - * Internal use only: mount_info is already set up by caller. - * Used for mountpoint crossings and for nfs4 root. - */ -static struct dentry * -nfs_prepared_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - return nfs_fs_mount_common(flags, dev_name, raw_data); -} - #if IS_ENABLED(CONFIG_NFS_V4) /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 17527f6e6360..574741d5418d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1639,6 +1639,7 @@ struct nfs_subversion; struct nfs_mount_info; struct nfs_client_initdata; struct nfs_pageio_descriptor; +struct fs_context; /* * RPC procedure vector for NFSv2/NFSv3 demuxing @@ -1653,9 +1654,8 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); - struct vfsmount *(*submount) (struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); - struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *); + int (*submount) (struct fs_context *, struct nfs_server *); + int (*try_get_tree) (struct fs_context *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *, struct inode *); From 62a55d088cd87d480a6fd67b0d63b14ccae80838 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 10 Dec 2019 07:31:14 -0500 Subject: [PATCH 108/658] NFS: Additional refactoring for fs_context conversion Split out from commit "NFS: Add fs_context support." This patch adds additional refactoring for the conversion of NFS to use fs_context, namely: (*) Merge nfs_mount_info and nfs_clone_mount into nfs_fs_context. nfs_clone_mount has had several fields removed, and nfs_mount_info has been removed altogether. (*) Various functions now take an fs_context as an argument instead of nfs_mount_info, nfs_fs_context, etc. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 22 +++--- fs/nfs/fs_context.c | 155 +++++++++++++++------------------------- fs/nfs/fscache.c | 2 +- fs/nfs/getroot.c | 70 +++++++++--------- fs/nfs/internal.h | 51 ++++--------- fs/nfs/namespace.c | 14 ++-- fs/nfs/nfs3_fs.h | 2 +- fs/nfs/nfs3client.c | 5 +- fs/nfs/nfs4client.c | 62 ++++++++-------- fs/nfs/nfs4namespace.c | 23 +++--- fs/nfs/nfs4super.c | 19 +++-- fs/nfs/super.c | 48 ++++++------- include/linux/nfs_xdr.h | 2 +- 13 files changed, 203 insertions(+), 272 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 69c0708b2acc..8f760f23748c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -658,17 +658,17 @@ EXPORT_SYMBOL_GPL(nfs_init_client); * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_fs_context *ctx, - struct nfs_subversion *nfs_mod) + const struct fs_context *fc) { + const struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { .hostname = ctx->nfs_server.hostname, .addr = (const struct sockaddr *)&ctx->nfs_server.address, .addrlen = ctx->nfs_server.addrlen, - .nfs_mod = nfs_mod, + .nfs_mod = ctx->nfs_mod, .proto = ctx->nfs_server.protocol, - .net = ctx->net, + .net = fc->net_ns, .timeparms = &timeparms, .cred = server->cred, .nconnect = ctx->nfs_server.nconnect, @@ -951,10 +951,10 @@ EXPORT_SYMBOL_GPL(nfs_free_server); * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info) +struct nfs_server *nfs_create_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; - struct nfs_subversion *nfs_mod = mount_info->nfs_mod; struct nfs_fattr *fattr; int error; @@ -970,18 +970,18 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info) goto error; /* Get a client representation */ - error = nfs_init_server(server, mount_info->ctx, nfs_mod); + error = nfs_init_server(server, fc); if (error < 0) goto error; /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); + error = nfs_probe_fsinfo(server, ctx->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(mount_info->ctx->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -989,8 +989,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info) } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, - fattr, NULL, NULL); + error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, + fattr, NULL, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index ac1a8d7d7393..e472334b978d 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -241,43 +241,6 @@ static const struct constant_table nfs_secflavor_tokens[] = { { "sys", Opt_sec_sys }, }; -struct nfs_fs_context *nfs_alloc_parsed_mount_data(void) -{ - struct nfs_fs_context *ctx; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (ctx) { - ctx->timeo = NFS_UNSPEC_TIMEO; - ctx->retrans = NFS_UNSPEC_RETRANS; - ctx->acregmin = NFS_DEF_ACREGMIN; - ctx->acregmax = NFS_DEF_ACREGMAX; - ctx->acdirmin = NFS_DEF_ACDIRMIN; - ctx->acdirmax = NFS_DEF_ACDIRMAX; - ctx->mount_server.port = NFS_UNSPEC_PORT; - ctx->nfs_server.port = NFS_UNSPEC_PORT; - ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; - ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; - ctx->minorversion = 0; - ctx->need_mount = true; - ctx->net = current->nsproxy->net_ns; - ctx->lsm_opts = NULL; - } - return ctx; -} - -void nfs_free_parsed_mount_data(struct nfs_fs_context *ctx) -{ - if (ctx) { - kfree(ctx->client_address); - kfree(ctx->mount_server.hostname); - kfree(ctx->nfs_server.export_path); - kfree(ctx->nfs_server.hostname); - kfree(ctx->fscache_uniq); - security_free_mnt_opts(&ctx->lsm_opts); - kfree(ctx); - } -} - /* * Sanity-check a server address provided by the mount command. * @@ -342,7 +305,7 @@ static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) * Add 'flavor' to 'auth_info' if not already present. * Returns true if 'flavor' ends up in the list, false otherwise */ -static int nfs_auth_info_add(struct nfs_fs_context *ctx, +static int nfs_auth_info_add(struct fs_context *fc, struct nfs_auth_info *auth_info, rpc_authflavor_t flavor) { @@ -367,9 +330,10 @@ static int nfs_auth_info_add(struct nfs_fs_context *ctx, /* * Parse the value of the 'sec=' option. */ -static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, +static int nfs_parse_security_flavors(struct fs_context *fc, struct fs_parameter *param) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); rpc_authflavor_t pseudoflavor; char *string = param->string, *p; int ret; @@ -419,7 +383,7 @@ static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, return -EINVAL; } - ret = nfs_auth_info_add(ctx, &ctx->auth_info, pseudoflavor); + ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); if (ret < 0) return ret; } @@ -427,9 +391,11 @@ static int nfs_parse_security_flavors(struct nfs_fs_context *ctx, return 0; } -static int nfs_parse_version_string(struct nfs_fs_context *ctx, +static int nfs_parse_version_string(struct fs_context *fc, const char *string) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); + ctx->flags &= ~NFS_MOUNT_VER3; switch (lookup_constant(nfs_vers_tokens, string, -1)) { case Opt_vers_2: @@ -656,17 +622,17 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, * options that take text values */ case Opt_v: - ret = nfs_parse_version_string(ctx, param->key + 1); + ret = nfs_parse_version_string(fc, param->key + 1); if (ret < 0) return ret; break; case Opt_vers: - ret = nfs_parse_version_string(ctx, param->string); + ret = nfs_parse_version_string(fc, param->string); if (ret < 0) return ret; break; case Opt_sec: - ret = nfs_parse_security_flavors(ctx, param); + ret = nfs_parse_security_flavors(fc, param); if (ret < 0) return ret; break; @@ -729,7 +695,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_addr: - len = rpc_pton(ctx->net, param->string, param->size, + len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address)); if (len == 0) @@ -747,7 +713,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, param->string = NULL; break; case Opt_mountaddr: - len = rpc_pton(ctx->net, param->string, param->size, + len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->mount_server.address, sizeof(ctx->mount_server._address)); if (len == 0) @@ -819,7 +785,7 @@ out_of_bounds: } /* - * Split "dev_name" into "hostname:export_path". + * Split fc->source into "hostname:export_path". * * The leftmost colon demarks the split between the server's hostname * and the export path. If the hostname starts with a left square @@ -827,12 +793,13 @@ out_of_bounds: * * Note: caller frees hostname and export path, even on error. */ -static int nfs_parse_devname(struct nfs_fs_context *ctx, - const char *dev_name, - size_t maxnamlen, size_t maxpathlen) +static int nfs_parse_source(struct fs_context *fc, + size_t maxnamlen, size_t maxpathlen) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); + const char *dev_name = fc->source; size_t len; - char *end; + const char *end; if (unlikely(!dev_name || !*dev_name)) { dfprintk(MOUNT, "NFS: device name not specified\n"); @@ -848,7 +815,7 @@ static int nfs_parse_devname(struct nfs_fs_context *ctx, len = end - dev_name; end++; } else { - char *comma; + const char *comma; end = strchr(dev_name, ':'); if (end == NULL) @@ -856,8 +823,8 @@ static int nfs_parse_devname(struct nfs_fs_context *ctx, len = end - dev_name; /* kill possible hostname list: not supported */ - comma = strchr(dev_name, ','); - if (comma != NULL && comma < end) + comma = memchr(dev_name, ',', len); + if (comma) len = comma - dev_name; } @@ -920,7 +887,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc, struct nfs_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); - struct nfs_fh *mntfh = ctx->mount_info.mntfh; + struct nfs_fh *mntfh = ctx->mntfh; struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; @@ -1009,6 +976,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc, else ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); + /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the @@ -1073,12 +1041,6 @@ out_invalid_fh: } #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_fs_context *ctx) -{ - ctx->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| - NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); -} - /* * Validate NFSv4 mount options */ @@ -1251,20 +1213,22 @@ static int nfs_fs_context_validate(struct fs_context *fc) goto out_no_address; if (ctx->version == 4) { -#if IS_ENABLED(CONFIG_NFS_V4) - if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - else - port = NFS_PORT; - max_namelen = NFS4_MAXNAMLEN; - max_pathlen = NFS4_MAXPATHLEN; - nfs_validate_transport_protocol(ctx); - if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - nfs4_validate_mount_flags(ctx); -#else - goto out_v4_not_compiled; -#endif /* CONFIG_NFS_V4 */ + if (IS_ENABLED(CONFIG_NFS_V4)) { + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + else + port = NFS_PORT; + max_namelen = NFS4_MAXNAMLEN; + max_pathlen = NFS4_MAXPATHLEN; + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | + NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + } else { + goto out_v4_not_compiled; + } } else { nfs_set_mount_transport_protocol(ctx); if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) @@ -1273,33 +1237,30 @@ static int nfs_fs_context_validate(struct fs_context *fc) nfs_set_port(sap, &ctx->nfs_server.port, port); - ret = nfs_parse_devname(ctx, fc->source, max_namelen, max_pathlen); + ret = nfs_parse_source(fc, max_namelen, max_pathlen); if (ret < 0) return ret; /* Load the NFS protocol module if we haven't done so yet */ - if (!ctx->mount_info.nfs_mod) { + if (!ctx->nfs_mod) { nfs_mod = get_nfs_version(ctx->version); if (IS_ERR(nfs_mod)) { ret = PTR_ERR(nfs_mod); goto out_version_unavailable; } - ctx->mount_info.nfs_mod = nfs_mod; + ctx->nfs_mod = nfs_mod; } return 0; out_no_device_name: dfprintk(MOUNT, "NFS: Device name not specified\n"); return -EINVAL; -#if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; -#else out_invalid_transport_udp: dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); return -EINVAL; -#endif /* !CONFIG_NFS_V4 */ out_no_address: dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); return -EINVAL; @@ -1332,7 +1293,7 @@ static int nfs_get_tree(struct fs_context *fc) if (err) return err; if (!ctx->internal) - return ctx->mount_info.nfs_mod->rpc_ops->try_get_tree(fc); + return ctx->nfs_mod->rpc_ops->try_get_tree(fc); else return nfs_get_tree_common(fc); } @@ -1351,20 +1312,19 @@ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) if (!ctx) return -ENOMEM; - ctx->mount_info.mntfh = nfs_alloc_fhandle(); - if (!ctx->mount_info.mntfh) { + ctx->mntfh = nfs_alloc_fhandle(); + if (!ctx->mntfh) { kfree(ctx); return -ENOMEM; } - nfs_copy_fh(ctx->mount_info.mntfh, src->mount_info.mntfh); + nfs_copy_fh(ctx->mntfh, src->mntfh); - __module_get(ctx->mount_info.nfs_mod->owner); + __module_get(ctx->nfs_mod->owner); ctx->client_address = NULL; ctx->mount_server.hostname = NULL; ctx->nfs_server.export_path = NULL; ctx->nfs_server.hostname = NULL; ctx->fscache_uniq = NULL; - ctx->clone_data.addr = NULL; ctx->clone_data.fattr = NULL; fc->fs_private = ctx; return 0; @@ -1375,17 +1335,16 @@ static void nfs_fs_context_free(struct fs_context *fc) struct nfs_fs_context *ctx = nfs_fc2context(fc); if (ctx) { - if (ctx->mount_info.server) - nfs_free_server(ctx->mount_info.server); - if (ctx->mount_info.nfs_mod) - put_nfs_version(ctx->mount_info.nfs_mod); + if (ctx->server) + nfs_free_server(ctx->server); + if (ctx->nfs_mod) + put_nfs_version(ctx->nfs_mod); kfree(ctx->client_address); kfree(ctx->mount_server.hostname); kfree(ctx->nfs_server.export_path); kfree(ctx->nfs_server.hostname); kfree(ctx->fscache_uniq); - nfs_free_fhandle(ctx->mount_info.mntfh); - kfree(ctx->clone_data.addr); + nfs_free_fhandle(ctx->mntfh); nfs_free_fattr(ctx->clone_data.fattr); kfree(ctx); } @@ -1413,9 +1372,8 @@ static int nfs_init_fs_context(struct fs_context *fc) if (unlikely(!ctx)) return -ENOMEM; - ctx->mount_info.ctx = ctx; - ctx->mount_info.mntfh = nfs_alloc_fhandle(); - if (unlikely(!ctx->mount_info.mntfh)) { + ctx->mntfh = nfs_alloc_fhandle(); + if (unlikely(!ctx->mntfh)) { kfree(ctx); return -ENOMEM; } @@ -1452,8 +1410,8 @@ static int nfs_init_fs_context(struct fs_context *fc) fc->net_ns = get_net(net); } - ctx->mount_info.nfs_mod = nfss->nfs_client->cl_nfs_mod; - __module_get(ctx->mount_info.nfs_mod->owner); + ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; + __module_get(ctx->nfs_mod->owner); } else { /* defaults */ ctx->timeo = NFS_UNSPEC_TIMEO; @@ -1468,7 +1426,6 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->minorversion = 0; ctx->need_mount = true; } - ctx->net = fc->net_ns; fc->fs_private = ctx; fc->ops = &nfs_fs_context_ops; return 0; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3800ab6f08fa..4a8df8c30a03 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -128,7 +128,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int return; key->nfs_client = nfss->nfs_client; - key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; + key->key.super.s_flags = sb->s_flags & NFS_SB_MASK; key->key.nfs_server.flags = nfss->flags; key->key.nfs_server.rsize = nfss->rsize; key->key.nfs_server.wsize = nfss->wsize; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 878c4c5982d9..ab45496d23a6 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -64,66 +64,68 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i /* * get an NFS2/NFS3 root dentry from the root filehandle */ -struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, - const char *devname) +int nfs_get_root(struct super_block *s, struct fs_context *fc) { - struct nfs_server *server = NFS_SB(sb); + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_server *server = NFS_SB(s); struct nfs_fsinfo fsinfo; - struct dentry *ret; + struct dentry *root; struct inode *inode; - void *name = kstrdup(devname, GFP_KERNEL); - int error; + char *name; + int error = -ENOMEM; + name = kstrdup(fc->source, GFP_KERNEL); if (!name) - return ERR_PTR(-ENOMEM); + goto out; /* get the actual root for this mount */ fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) { - kfree(name); - return ERR_PTR(-ENOMEM); - } + if (fsinfo.fattr == NULL) + goto out_name; - error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); - ret = ERR_PTR(error); - goto out; + goto out_fattr; } - inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL); + inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - ret = ERR_CAST(inode); - goto out; + error = PTR_ERR(inode); + goto out_fattr; } - error = nfs_superblock_set_dummy_root(sb, inode); - if (error != 0) { - ret = ERR_PTR(error); - goto out; - } + error = nfs_superblock_set_dummy_root(s, inode); + if (error != 0) + goto out_fattr; /* root dentries normally start off anonymous and get spliced in later * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - ret = d_obtain_root(inode); - if (IS_ERR(ret)) { + root = d_obtain_root(inode); + if (IS_ERR(root)) { dprintk("nfs_get_root: get root dentry failed\n"); - goto out; + error = PTR_ERR(root); + goto out_fattr; } - security_d_instantiate(ret, inode); - spin_lock(&ret->d_lock); - if (IS_ROOT(ret) && !ret->d_fsdata && - !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { - ret->d_fsdata = name; + security_d_instantiate(root, inode); + spin_lock(&root->d_lock); + if (IS_ROOT(root) && !root->d_fsdata && + !(root->d_flags & DCACHE_NFSFS_RENAMED)) { + root->d_fsdata = name; name = NULL; } - spin_unlock(&ret->d_lock); -out: - kfree(name); + spin_unlock(&root->d_lock); + fc->root = root; + error = 0; + +out_fattr: nfs_free_fattr(fsinfo.fattr); - return ret; +out_name: + kfree(name); +out: + return error; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1cd09df9e0b5..a1fd4c3ebc4e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -11,7 +11,7 @@ #include #include -#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) +#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; @@ -33,17 +33,6 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) return 1; } -struct nfs_clone_mount { - const struct super_block *sb; - struct dentry *dentry; - char *hostname; - char *mnt_path; - struct sockaddr *addr; - size_t addrlen; - rpc_authflavor_t authflavor; - struct nfs_fattr *fattr; -}; - /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. @@ -80,15 +69,6 @@ struct nfs_client_initdata { const struct cred *cred; }; -struct nfs_mount_info { - unsigned int inherited_bsize; - struct nfs_fs_context *ctx; - struct nfs_clone_mount *cloned; - struct nfs_server *server; - struct nfs_fh *mntfh; - struct nfs_subversion *nfs_mod; -}; - /* * In-kernel mount arguments */ @@ -140,13 +120,17 @@ struct nfs_fs_context { unsigned short export_path_len; } nfs_server; - void *lsm_opts; - struct net *net; + struct nfs_fh *mntfh; + struct nfs_server *server; + struct nfs_subversion *nfs_mod; - char buf[32]; /* Parse buffer */ - - struct nfs_mount_info mount_info; - struct nfs_clone_mount clone_data; + /* Information for a cloned mount. */ + struct nfs_clone_mount { + struct super_block *sb; + struct dentry *dentry; + struct nfs_fattr *fattr; + unsigned int inherited_bsize; + } clone_data; }; static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) @@ -194,10 +178,9 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); -extern struct nfs_server *nfs_create_server(struct nfs_mount_info *); -extern struct nfs_server *nfs4_create_server(struct nfs_mount_info *); -extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server(struct fs_context *); +extern struct nfs_server *nfs4_create_server(struct fs_context *); +extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, struct sockaddr *sap, size_t salen, struct net *net); @@ -444,12 +427,8 @@ int nfs_submount(struct fs_context *, struct nfs_server *); int nfs_do_submount(struct fs_context *); /* getroot.c */ -extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, - const char *); +extern int nfs_get_root(struct super_block *s, struct fs_context *fc); #if IS_ENABLED(CONFIG_NFS_V4) -extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, - const char *); - extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 9b344fcd23b0..d537350c1fb7 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -176,8 +176,8 @@ struct vfsmount *nfs_d_automount(struct path *path) ctx->version = client->rpc_ops->version; ctx->minorversion = client->cl_minorversion; - ctx->mount_info.nfs_mod = client->cl_nfs_mod; - __module_get(ctx->mount_info.nfs_mod->owner); + ctx->nfs_mod = client->cl_nfs_mod; + __module_get(ctx->nfs_mod->owner); ret = client->rpc_ops->submount(fc, server); if (ret < 0) { @@ -262,22 +262,22 @@ int nfs_do_submount(struct fs_context *fc) int ret; /* create a new volume representation */ - server = ctx->mount_info.nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb), - ctx->mount_info.mntfh, + server = ctx->nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb), + ctx->mntfh, ctx->clone_data.fattr, ctx->selected_flavor); if (IS_ERR(server)) return PTR_ERR(server); - ctx->mount_info.server = server; + ctx->server = server; buffer = kmalloc(4096, GFP_USER); if (!buffer) return -ENOMEM; ctx->internal = true; - ctx->mount_info.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits; + ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits; p = nfs_devname(dentry, buffer, 4096); if (IS_ERR(p)) { @@ -302,7 +302,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) /* Look it up again to get its attributes */ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, - ctx->mount_info.mntfh, ctx->clone_data.fattr, + ctx->mntfh, ctx->clone_data.fattr, NULL); dput(parent); if (err != 0) diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 09602dc1889f..1b950b66b3bb 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -27,7 +27,7 @@ static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, #endif /* CONFIG_NFS_V3_ACL */ /* nfs3client.c */ -struct nfs_server *nfs3_create_server(struct nfs_mount_info *); +struct nfs_server *nfs3_create_server(struct fs_context *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 54727d3d3042..5601e47360c2 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -46,9 +46,10 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) } #endif -struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info) +struct nfs_server *nfs3_create_server(struct fs_context *fc) { - struct nfs_server *server = nfs_create_server(mount_info); + struct nfs_server *server = nfs_create_server(fc); + /* Create a client RPC handle for the NFS v3 ACL management interface */ if (!IS_ERR(server)) nfs_init_server_aclclient(server); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2216d166768b..0cd767e5c977 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1055,9 +1055,9 @@ out: /* * Create a version 4 volume record */ -static int nfs4_init_server(struct nfs_server *server, - struct nfs_fs_context *ctx) +static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; int error; @@ -1079,15 +1079,15 @@ static int nfs4_init_server(struct nfs_server *server, /* Get a client record */ error = nfs4_set_client(server, - ctx->nfs_server.hostname, - (const struct sockaddr *)&ctx->nfs_server.address, - ctx->nfs_server.addrlen, - ctx->client_address, - ctx->nfs_server.protocol, - &timeparms, - ctx->minorversion, - ctx->nfs_server.nconnect, - ctx->net); + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, + ctx->client_address, + ctx->nfs_server.protocol, + &timeparms, + ctx->minorversion, + ctx->nfs_server.nconnect, + fc->net_ns); if (error < 0) return error; @@ -1110,10 +1110,9 @@ static int nfs4_init_server(struct nfs_server *server, * Create a version 4 volume record * - keyed on server and FSID */ -/*struct nfs_server *nfs4_create_server(const struct nfs_fs_context *data, - struct nfs_fh *mntfh)*/ -struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info) +struct nfs_server *nfs4_create_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; bool auth_probe; int error; @@ -1124,14 +1123,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info) server->cred = get_cred(current_cred()); - auth_probe = mount_info->ctx->auth_info.flavor_len < 1; + auth_probe = ctx->auth_info.flavor_len < 1; /* set up the general RPC client */ - error = nfs4_init_server(server, mount_info->ctx); + error = nfs4_init_server(server, fc); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe); + error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe); if (error < 0) goto error; @@ -1145,9 +1144,9 @@ error: /* * Create an NFS4 referral server record */ -struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) +struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_client *parent_client; struct nfs_server *server, *parent_server; bool auth_probe; @@ -1157,7 +1156,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (!server) return ERR_PTR(-ENOMEM); - parent_server = NFS_SB(data->sb); + parent_server = NFS_SB(ctx->clone_data.sb); parent_client = parent_server->nfs_client; server->cred = get_cred(parent_server->cred); @@ -1167,10 +1166,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Get a client representation */ #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) - rpc_set_port(data->addr, NFS_RDMA_PORT); - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, + rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT); + error = nfs4_set_client(server, + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_RDMA, parent_server->client->cl_timeout, @@ -1181,10 +1181,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ - rpc_set_port(data->addr, NFS_PORT); - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, + rpc_set_port(&ctx->nfs_server.address, NFS_PORT); + error = nfs4_set_client(server, + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_TCP, parent_server->client->cl_timeout, @@ -1197,13 +1198,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) init_server: #endif - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, + ctx->selected_flavor); if (error < 0) goto error; auth_probe = parent_server->auth_info.flavor_len < 1; - error = nfs4_server_common_setup(server, mntfh, auth_probe); + error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe); if (error < 0) goto error; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a1a0c4c53ce1..10e9e1887841 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -130,9 +130,10 @@ static int nfs4_validate_fspath(struct dentry *dentry, const struct nfs4_fs_locations *locations, struct nfs_fs_context *ctx) { - const char *path, *fs_path; - char *buf; + const char *path; + char *fs_path; unsigned short len; + char *buf; int n; buf = kmalloc(4096, GFP_KERNEL); @@ -278,7 +279,6 @@ out: static int try_location(struct fs_context *fc, const struct nfs4_fs_location *location) { - const size_t addr_bufsize = sizeof(struct sockaddr_storage); struct nfs_fs_context *ctx = nfs_fc2context(fc); unsigned int len, s; char *export_path, *source, *p; @@ -314,29 +314,24 @@ static int try_location(struct fs_context *fc, kfree(fc->source); fc->source = source; - - ctx->clone_data.addr = kmalloc(addr_bufsize, GFP_KERNEL); - if (ctx->clone_data.addr == NULL) - return -ENOMEM; for (s = 0; s < location->nservers; s++) { const struct nfs4_string *buf = &location->servers[s]; if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) continue; - ctx->clone_data.addrlen = + ctx->nfs_server.addrlen = nfs_parse_server_name(buf->data, buf->len, - ctx->clone_data.addr, - addr_bufsize, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address), fc->net_ns); - if (ctx->clone_data.addrlen == 0) + if (ctx->nfs_server.addrlen == 0) continue; - rpc_set_port(ctx->clone_data.addr, NFS_PORT); + rpc_set_port(&ctx->nfs_server.address, NFS_PORT); memcpy(ctx->nfs_server.hostname, buf->data, buf->len); ctx->nfs_server.hostname[buf->len] = '\0'; - ctx->clone_data.hostname = ctx->nfs_server.hostname; p = source; memcpy(p, buf->data, buf->len); @@ -449,7 +444,7 @@ int nfs4_submount(struct fs_context *fc, struct nfs_server *server) int ret; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(dir, name, ctx->mount_info.mntfh, + client = nfs4_proc_lookup_mountpoint(dir, name, ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (IS_ERR(client)) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 469726410c5c..7d5ed37633d8 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -35,12 +35,12 @@ static const struct super_operations nfs4_sops = { }; struct nfs_subversion nfs_v4 = { - .owner = THIS_MODULE, - .nfs_fs = &nfs4_fs_type, - .rpc_vers = &nfs_version4, - .rpc_ops = &nfs_v4_clientops, - .sops = &nfs4_sops, - .xattr = nfs4_xattr_handlers, + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, }; static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -168,7 +168,7 @@ static int do_nfs4_mount(struct nfs_server *server, root_ctx = nfs_fc2context(root_fc); root_ctx->internal = true; - root_ctx->mount_info.server = server; + root_ctx->server = server; /* We leave export_path unset as it's not used to find the root. */ len = strlen(hostname) + 5; @@ -221,7 +221,7 @@ int nfs4_try_get_tree(struct fs_context *fc) /* We create a mount for the server's root, walk to the requested * location and then create another mount for that. */ - err= do_nfs4_mount(nfs4_create_server(&ctx->mount_info), + err= do_nfs4_mount(nfs4_create_server(fc), fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { @@ -243,7 +243,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) dprintk("--> nfs4_referral_mount()\n"); /* create a new volume representation */ - err = do_nfs4_mount(nfs4_create_referral_server(&ctx->clone_data, ctx->mount_info.mntfh), + err = do_nfs4_mount(nfs4_create_referral_server(fc), fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { @@ -254,7 +254,6 @@ int nfs4_get_referral_tree(struct fs_context *fc) return err; } - static int __init init_nfs_v4(void) { int err; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6ff99da978a8..ed0290d5ebf3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -745,11 +745,12 @@ out: * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ -static int nfs_request_mount(struct nfs_fs_context *ctx, +static int nfs_request_mount(struct fs_context *fc, struct nfs_fh *root_fh, rpc_authflavor_t *server_authlist, unsigned int *server_authlist_len) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_mount_request request = { .sap = (struct sockaddr *) &ctx->mount_server.address, @@ -759,7 +760,7 @@ static int nfs_request_mount(struct nfs_fs_context *ctx, .noresvport = ctx->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = server_authlist_len, .auth_flavs = server_authlist, - .net = ctx->net, + .net = fc->net_ns, }; int status; @@ -804,20 +805,18 @@ static int nfs_request_mount(struct nfs_fs_context *ctx, return 0; } -static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info) +static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); int status; unsigned int i; bool tried_auth_unix = false; bool auth_null_in_list = false; struct nfs_server *server = ERR_PTR(-EACCES); - struct nfs_fs_context *ctx = mount_info->ctx; rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); - struct nfs_subversion *nfs_mod = mount_info->nfs_mod; - status = nfs_request_mount(ctx, mount_info->mntfh, authlist, - &authlist_len); + status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); @@ -831,7 +830,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf ctx->selected_flavor); if (status) return ERR_PTR(status); - return nfs_mod->rpc_ops->create_server(mount_info); + return ctx->nfs_mod->rpc_ops->create_server(fc); } /* @@ -858,7 +857,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); ctx->selected_flavor = flavor; - server = nfs_mod->rpc_ops->create_server(mount_info); + server = ctx->nfs_mod->rpc_ops->create_server(fc); if (!IS_ERR(server)) return server; } @@ -874,7 +873,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); ctx->selected_flavor = RPC_AUTH_UNIX; - return nfs_mod->rpc_ops->create_server(mount_info); + return ctx->nfs_mod->rpc_ops->create_server(fc); } int nfs_try_get_tree(struct fs_context *fc) @@ -882,9 +881,9 @@ int nfs_try_get_tree(struct fs_context *fc) struct nfs_fs_context *ctx = nfs_fc2context(fc); if (ctx->need_mount) - ctx->mount_info.server = nfs_try_mount_request(&ctx->mount_info); + ctx->server = nfs_try_mount_request(fc); else - ctx->mount_info.server = ctx->mount_info.nfs_mod->rpc_ops->create_server(&ctx->mount_info); + ctx->server = ctx->nfs_mod->rpc_ops->create_server(fc); return nfs_get_tree_common(fc); } @@ -966,9 +965,8 @@ EXPORT_SYMBOL_GPL(nfs_reconfigure); /* * Finish setting up an NFS superblock */ -static void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) { - struct nfs_fs_context *ctx = mount_info->ctx; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = 0; @@ -1009,13 +1007,14 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_ nfs_super_set_maxbytes(sb, server->maxfilesize); } -static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, + const struct fs_context *fc) { const struct nfs_server *a = s->s_fs_info; const struct rpc_clnt *clnt_a = a->client; const struct rpc_clnt *clnt_b = b->client; - if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + if ((s->s_flags & NFS_SB_MASK) != (fc->sb_flags & NFS_SB_MASK)) goto Ebusy; if (a->nfs_client != b->nfs_client) goto Ebusy; @@ -1122,7 +1121,7 @@ static int nfs_compare_super(struct super_block *sb, struct fs_context *fc) return 0; if (!nfs_compare_userns(old, server)) return 0; - return nfs_compare_mount_options(sb, server, fc->sb_flags); + return nfs_compare_mount_options(sb, server, fc); } #ifdef CONFIG_NFS_FSCACHE @@ -1177,13 +1176,12 @@ int nfs_get_tree_common(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct super_block *s; - struct dentry *mntroot = ERR_PTR(-ENOMEM); int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super; - struct nfs_server *server = ctx->mount_info.server; + struct nfs_server *server = ctx->server; unsigned long kflags = 0, kflags_out = 0; int error; - ctx->mount_info.server = NULL; + ctx->server = NULL; if (IS_ERR(server)) return PTR_ERR(server); @@ -1224,9 +1222,9 @@ int nfs_get_tree_common(struct fs_context *fc) } if (!s->s_root) { - unsigned bsize = ctx->mount_info.inherited_bsize; + unsigned bsize = ctx->clone_data.inherited_bsize; /* initial superblock/root creation */ - nfs_fill_super(s, &ctx->mount_info); + nfs_fill_super(s, ctx); if (bsize) { s->s_blocksize_bits = bsize; s->s_blocksize = 1U << bsize; @@ -1234,13 +1232,11 @@ int nfs_get_tree_common(struct fs_context *fc) nfs_get_cache_cookie(s, ctx); } - mntroot = nfs_get_root(s, ctx->mount_info.mntfh, fc->source); - if (IS_ERR(mntroot)) { - error = PTR_ERR(mntroot); + error = nfs_get_root(s, fc); + if (error < 0) { dfprintk(MOUNT, "NFS: Couldn't get root dentry\n"); goto error_splat_super; } - fc->root = mntroot; if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) kflags |= SECURITY_LSM_NATIVE_LABELS; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 574741d5418d..0a36c6f62b58 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1722,7 +1722,7 @@ struct nfs_rpc_ops { struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); void (*free_client) (struct nfs_client *); - struct nfs_server *(*create_server)(struct nfs_mount_info *); + struct nfs_server *(*create_server)(struct fs_context *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); }; From ce8866f0913ff157edc098f06bad07763ad317e7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 10 Dec 2019 07:31:15 -0500 Subject: [PATCH 109/658] NFS: Attach supplementary error information to fs_context. Split out from commit "NFS: Add fs_context support." Add wrappers nfs_errorf(), nfs_invalf(), and nfs_warnf() which log error information to the fs_context. Convert some printk's to use these new wrappers instead. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 105 +++++++++++++++----------------------------- fs/nfs/getroot.c | 3 ++ fs/nfs/internal.h | 4 ++ fs/nfs/namespace.c | 2 +- fs/nfs/nfs4super.c | 2 + fs/nfs/super.c | 4 +- 6 files changed, 48 insertions(+), 72 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index e472334b978d..429315c011ae 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -318,10 +318,8 @@ static int nfs_auth_info_add(struct fs_context *fc, return 0; } - if (auth_info->flavor_len + 1 >= max_flavor_len) { - dfprintk(MOUNT, "NFS: too many sec= flavors\n"); - return -EINVAL; - } + if (auth_info->flavor_len + 1 >= max_flavor_len) + return nfs_invalf(fc, "NFS: too many sec= flavors"); auth_info->flavors[auth_info->flavor_len++] = flavor; return 0; @@ -378,9 +376,7 @@ static int nfs_parse_security_flavors(struct fs_context *fc, pseudoflavor = RPC_AUTH_GSS_SPKMP; break; default: - dfprintk(MOUNT, - "NFS: sec= option '%s' not recognized\n", p); - return -EINVAL; + return nfs_invalf(fc, "NFS: sec=%s option not recognized", p); } ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); @@ -425,8 +421,7 @@ static int nfs_parse_version_string(struct fs_context *fc, ctx->minorversion = 2; break; default: - dfprintk(MOUNT, "NFS: Unsupported NFS version\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Unsupported NFS version"); } return 0; } @@ -451,10 +446,8 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, switch (opt) { case Opt_source: - if (fc->source) { - dfprintk(MOUNT, "NFS: Multiple sources not supported\n"); - return -EINVAL; - } + if (fc->source) + return nfs_invalf(fc, "NFS: Multiple sources not supported"); fc->source = param->string; param->string = NULL; break; @@ -664,8 +657,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, xprt_load_transport(param->string); break; default: - dfprintk(MOUNT, "NFS: unrecognized transport protocol\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } ctx->protofamily = protofamily; @@ -688,8 +680,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - dfprintk(MOUNT, "NFS: unrecognized transport protocol\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } ctx->mountfamily = mountfamily; break; @@ -774,13 +765,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, return 0; out_invalid_value: - printk(KERN_INFO "NFS: Bad mount option value specified\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Bad mount option value specified"); out_invalid_address: - printk(KERN_INFO "NFS: Bad IP address specified\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: - printk(KERN_INFO "NFS: Value for '%s' out of range\n", param->key); + nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); return -ERANGE; } @@ -846,19 +835,15 @@ static int nfs_parse_source(struct fs_context *fc, return 0; out_bad_devname: - dfprintk(MOUNT, "NFS: device name not in host:path format\n"); - return -EINVAL; - + return nfs_invalf(fc, "NFS: device name not in host:path format"); out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + nfs_errorf(fc, "NFS: not enough memory to parse device name"); return -ENOMEM; - out_hostname: - dfprintk(MOUNT, "NFS: server hostname too long\n"); + nfs_errorf(fc, "NFS: server hostname too long"); return -ENAMETOOLONG; - out_path: - dfprintk(MOUNT, "NFS: export pathname too long\n"); + nfs_errorf(fc, "NFS: export pathname too long"); return -ENAMETOOLONG; } @@ -1015,29 +1000,23 @@ out_no_data: ctx->skip_reconfig_option_check = true; return 0; } - dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: mount program didn't pass any mount data"); out_no_v3: - dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", - data->version); - return -EINVAL; + return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3"); out_no_sec: - dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); + dfprintk(MOUNT, "NFS: not enough memory to handle mount options"); return -ENOMEM; out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_invalid_fh: - dfprintk(MOUNT, "NFS: invalid root filehandle\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: invalid root filehandle"); } #if IS_ENABLED(CONFIG_NFS_V4) @@ -1132,21 +1111,17 @@ out_no_data: ctx->skip_reconfig_option_check = true; return 0; } - dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data"); out_inval_auth: - dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", - data->auth_flavourlen); - return -EINVAL; + return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d", + data->auth_flavourlen); out_no_address: - dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; + return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp"); } #endif @@ -1164,8 +1139,7 @@ static int nfs_fs_context_parse_monolithic(struct fs_context *fc, return nfs4_parse_monolithic(fc, data); #endif - dfprintk(MOUNT, "NFS: Unsupported monolithic data version\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Unsupported monolithic data version"); } /* @@ -1253,32 +1227,25 @@ static int nfs_fs_context_validate(struct fs_context *fc) return 0; out_no_device_name: - dfprintk(MOUNT, "NFS: Device name not specified\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Device name not specified"); out_v4_not_compiled: - dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); + nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); return -EPROTONOSUPPORT; out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; + return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp"); out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_mountproto_mismatch: - dfprintk(MOUNT, "NFS: Mount server address does not match mountproto= option\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option"); out_proto_mismatch: - dfprintk(MOUNT, "NFS: Server address does not match proto= option\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: Server address does not match proto= option"); out_minorversion_mismatch: - dfprintk(MOUNT, "NFS: Mount option vers=%u does not support minorversion=%u\n", + return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u", ctx->version, ctx->minorversion); - return -EINVAL; out_migration_misuse: - dfprintk(MOUNT, "NFS: 'Migration' not supported for this NFS version\n"); - return -EINVAL; + return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version"); out_version_unavailable: - dfprintk(MOUNT, "NFS: Version unavailable\n"); + nfs_errorf(fc, "NFS: Version unavailable"); return ret; } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index ab45496d23a6..b012c2668a1f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -86,6 +86,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); + nfs_errorf(fc, "NFS: Couldn't getattr on root"); goto out_fattr; } @@ -93,6 +94,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); error = PTR_ERR(inode); + nfs_errorf(fc, "NFS: Couldn't get root inode"); goto out_fattr; } @@ -108,6 +110,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) if (IS_ERR(root)) { dprintk("nfs_get_root: get root dentry failed\n"); error = PTR_ERR(root); + nfs_errorf(fc, "NFS: Couldn't get root dentry"); goto out_fattr; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a1fd4c3ebc4e..c0257411e158 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -133,6 +133,10 @@ struct nfs_fs_context { } clone_data; }; +#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) +#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) +#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) + static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { return fc->fs_private; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d537350c1fb7..4fd22c0d730c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -281,7 +281,7 @@ int nfs_do_submount(struct fs_context *fc) p = nfs_devname(dentry, buffer, 4096); if (IS_ERR(p)) { - dprintk("NFS: Couldn't determine submount pathname\n"); + nfs_errorf(fc, "NFS: Couldn't determine submount pathname"); ret = PTR_ERR(p); } else { ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 7d5ed37633d8..1475f932d7da 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -225,6 +225,7 @@ int nfs4_try_get_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { + nfs_errorf(fc, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); @@ -247,6 +248,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { + nfs_errorf(fc, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ed0290d5ebf3..76e0198adcfa 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1205,7 +1205,7 @@ int nfs_get_tree_common(struct fs_context *fc) fc->s_fs_info = NULL; if (IS_ERR(s)) { error = PTR_ERR(s); - dfprintk(MOUNT, "NFS: Couldn't get superblock\n"); + nfs_errorf(fc, "NFS: Couldn't get superblock"); goto out_err_nosb; } @@ -1234,7 +1234,7 @@ int nfs_get_tree_common(struct fs_context *fc) error = nfs_get_root(s, fc); if (error < 0) { - dfprintk(MOUNT, "NFS: Couldn't get root dentry\n"); + nfs_errorf(fc, "NFS: Couldn't get root dentry"); goto error_splat_super; } From 52879b464a6a85ff4070786e8a5c876233ac6f67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Nov 2019 21:16:21 +0100 Subject: [PATCH 110/658] sunrpc: convert to time64_t for expiry Using signed 32-bit types for UTC time leads to the y2038 overflow, which is what happens in the sunrpc code at the moment. This changes the sunrpc code over to use time64_t where possible. The one exception is the gss_import_v{1,2}_context() function for kerberos5, which uses 32-bit timestamps in the protocol. Here, we can at least treat the numbers as 'unsigned', which extends the range from 2038 to 2106. Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- include/linux/sunrpc/gss_api.h | 4 ++-- include/linux/sunrpc/gss_krb5.h | 2 +- net/sunrpc/auth_gss/gss_krb5_mech.c | 12 +++++++++--- net/sunrpc/auth_gss/gss_krb5_seal.c | 8 ++++---- net/sunrpc/auth_gss/gss_krb5_unseal.c | 6 +++--- net/sunrpc/auth_gss/gss_krb5_wrap.c | 16 ++++++++-------- net/sunrpc/auth_gss/gss_mech_switch.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- 8 files changed, 30 insertions(+), 24 deletions(-) diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index bd691e08be3b..1cc6cefb1220 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -48,7 +48,7 @@ int gss_import_sec_context( size_t bufsize, struct gss_api_mech *mech, struct gss_ctx **ctx_id, - time_t *endtime, + time64_t *endtime, gfp_t gfp_mask); u32 gss_get_mic( struct gss_ctx *ctx_id, @@ -108,7 +108,7 @@ struct gss_api_ops { const void *input_token, size_t bufsize, struct gss_ctx *ctx_id, - time_t *endtime, + time64_t *endtime, gfp_t gfp_mask); u32 (*gss_get_mic)( struct gss_ctx *ctx_id, diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 02c0412e368c..c1d77dd8ed41 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -106,9 +106,9 @@ struct krb5_ctx { struct crypto_sync_skcipher *initiator_enc_aux; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; - s32 endtime; atomic_t seq_send; atomic64_t seq_send64; + time64_t endtime; struct xdr_netobj mech_used; u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 6e5d6d240215..75b3c2e9e8f8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -253,6 +253,7 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) { u32 seq_send; int tmp; + u32 time32; p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); if (IS_ERR(p)) @@ -290,9 +291,11 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) p = ERR_PTR(-ENOSYS); goto out_err; } - p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); + p = simple_get_bytes(p, end, &time32, sizeof(time32)); if (IS_ERR(p)) goto out_err; + /* unsigned 32-bit time overflows in year 2106 */ + ctx->endtime = (time64_t)time32; p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send)); if (IS_ERR(p)) goto out_err; @@ -587,15 +590,18 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, { u64 seq_send64; int keylen; + u32 time32; p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags)); if (IS_ERR(p)) goto out_err; ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR; - p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); + p = simple_get_bytes(p, end, &time32, sizeof(time32)); if (IS_ERR(p)) goto out_err; + /* unsigned 32-bit time overflows in year 2106 */ + ctx->endtime = (time64_t)time32; p = simple_get_bytes(p, end, &seq_send64, sizeof(seq_send64)); if (IS_ERR(p)) goto out_err; @@ -659,7 +665,7 @@ out_err: static int gss_import_sec_context_kerberos(const void *p, size_t len, struct gss_ctx *ctx_id, - time_t *endtime, + time64_t *endtime, gfp_t gfp_mask) { const void *end = (const void *)((const char *)p + len); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 48fe4a591b54..f1d280accf43 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -131,14 +131,14 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj md5cksum = {.len = sizeof(cksumdata), .data = cksumdata}; void *ptr; - s32 now; + time64_t now; u32 seq_send; u8 *cksumkey; dprintk("RPC: %s\n", __func__); BUG_ON(ctx == NULL); - now = get_seconds(); + now = ktime_get_real_seconds(); ptr = setup_token(ctx, token); @@ -170,7 +170,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj cksumobj = { .len = sizeof(cksumdata), .data = cksumdata}; void *krb5_hdr; - s32 now; + time64_t now; u8 *cksumkey; unsigned int cksum_usage; __be64 seq_send_be64; @@ -198,7 +198,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len); - now = get_seconds(); + now = ktime_get_real_seconds(); return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index ef2b25b86d2f..aaab91cf24c8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -124,7 +124,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx, /* it got through unscathed. Make sure the context is unexpired */ - now = get_seconds(); + now = ktime_get_real_seconds(); if (now > ctx->endtime) return GSS_S_CONTEXT_EXPIRED; @@ -149,7 +149,7 @@ gss_verify_mic_v2(struct krb5_ctx *ctx, char cksumdata[GSS_KRB5_MAX_CKSUM_LEN]; struct xdr_netobj cksumobj = {.len = sizeof(cksumdata), .data = cksumdata}; - s32 now; + time64_t now; u8 *ptr = read_token->data; u8 *cksumkey; u8 flags; @@ -194,7 +194,7 @@ gss_verify_mic_v2(struct krb5_ctx *ctx, return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ - now = get_seconds(); + now = ktime_get_real_seconds(); if (now > ctx->endtime) return GSS_S_CONTEXT_EXPIRED; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 14a0aff0cd84..6c1920eed771 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -163,7 +163,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, .data = cksumdata}; int blocksize = 0, plainlen; unsigned char *ptr, *msg_start; - s32 now; + time64_t now; int headlen; struct page **tmp_pages; u32 seq_send; @@ -172,7 +172,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, dprintk("RPC: %s\n", __func__); - now = get_seconds(); + now = ktime_get_real_seconds(); blocksize = crypto_sync_skcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); @@ -268,7 +268,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) char cksumdata[GSS_KRB5_MAX_CKSUM_LEN]; struct xdr_netobj md5cksum = {.len = sizeof(cksumdata), .data = cksumdata}; - s32 now; + time64_t now; int direction; s32 seqnum; unsigned char *ptr; @@ -359,7 +359,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) /* it got through unscathed. Make sure the context is unexpired */ - now = get_seconds(); + now = ktime_get_real_seconds(); if (now > kctx->endtime) return GSS_S_CONTEXT_EXPIRED; @@ -439,7 +439,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages) { u8 *ptr, *plainhdr; - s32 now; + time64_t now; u8 flags = 0x00; __be16 *be16ptr; __be64 *be64ptr; @@ -481,14 +481,14 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, if (err) return err; - now = get_seconds(); + now = ktime_get_real_seconds(); return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } static u32 gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) { - s32 now; + time64_t now; u8 *ptr; u8 flags = 0x00; u16 ec, rrc; @@ -557,7 +557,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) /* do sequencing checks */ /* it got through unscathed. Make sure the context is unexpired */ - now = get_seconds(); + now = ktime_get_real_seconds(); if (now > kctx->endtime) return GSS_S_CONTEXT_EXPIRED; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 30b7de6f3d76..d3685d4ed9e0 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -376,7 +376,7 @@ int gss_import_sec_context(const void *input_token, size_t bufsize, struct gss_api_mech *mech, struct gss_ctx **ctx_id, - time_t *endtime, + time64_t *endtime, gfp_t gfp_mask) { if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask))) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c62d1f10978b..0c3e22838ddf 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -436,7 +436,7 @@ static int rsc_parse(struct cache_detail *cd, int id; int len, rv; struct rsc rsci, *rscp = NULL; - time_t expiry; + time64_t expiry; int status = -EINVAL; struct gss_api_mech *gm = NULL; @@ -1221,7 +1221,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, static atomic64_t ctxhctr; long long ctxh; struct gss_api_mech *gm = NULL; - time_t expiry; + time64_t expiry; int status = -EINVAL; memset(&rsci, 0, sizeof(rsci)); From ae08483cdd666045b60ceb502dcbf30038334d58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Nov 2019 21:16:23 +0100 Subject: [PATCH 111/658] nfs: use timespec64 in nfs_fattr Push down the use of timespec64 into NFS nfs_fattr, to avoid needless conversions, and get closer to having 64-bit time_t support on 32-bit NFSv4 and removing some old interfaces from the kernel. Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 4 ++-- fs/nfs/nfs4xdr.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c0257411e158..17f082442804 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -697,9 +697,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) } /* - * Convert a struct timespec into a 64-bit change attribute + * Convert a struct timespec64 into a 64-bit change attribute * - * This does approximately the same thing as timespec_to_ns(), + * This does approximately the same thing as timespec64_to_ns(), * but for calculation efficiency, we multiply the seconds by * 1024*1024*1024. */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 936c57779ff4..728d88b6a698 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4097,7 +4097,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str status = NFS_ATTR_FATTR_ATIME; bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; } - dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec); + dprintk("%s: atime=%lld\n", __func__, time->tv_sec); return status; } @@ -4115,7 +4115,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s status = NFS_ATTR_FATTR_CTIME; bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; } - dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec); + dprintk("%s: ctime=%lld\n", __func__, time->tv_sec); return status; } @@ -4132,8 +4132,8 @@ static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, status = decode_attr_time(xdr, time); bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA; } - dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec, - (long)time->tv_nsec); + dprintk("%s: time_delta=%lld %ld\n", __func__, time->tv_sec, + time->tv_nsec); return status; } @@ -4197,7 +4197,7 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str status = NFS_ATTR_FATTR_MTIME; bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; } - dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec); + dprintk("%s: mtime=%lld\n", __func__, time->tv_sec); return status; } From bc35b6b0cf4a83da5015ae8f1aa072a05586ee84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Nov 2019 21:16:25 +0100 Subject: [PATCH 112/658] nfs: fscache: use timespec64 in inode auxdata nfs currently behaves differently on 32-bit and 64-bit kernels regarding the on-disk format of nfs_fscache_inode_auxdata. That format should really be the same on any kernel, and we should avoid the 'timespec' type in order to remove that from the kernel later on. Using plain 'timespec64' would not be good here, since that includes implied padding and would possibly leak kernel stack data to the on-disk format on 32-bit architectures. struct __kernel_timespec would work as a replacement, but open-coding the two struct members in nfs_fscache_inode_auxdata makes it more obvious what's going on here, and keeps the current format for 64-bit architectures. Cc: David Howells Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- fs/nfs/fscache-index.c | 6 ++++-- fs/nfs/fscache.c | 18 ++++++++++++------ fs/nfs/fscache.h | 8 +++++--- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 15f271401dcc..573b1da9342c 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c @@ -84,8 +84,10 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, return FSCACHE_CHECKAUX_OBSOLETE; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); - auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); + auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; + auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; + auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; + auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 4a8df8c30a03..52270bfac120 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -238,8 +238,10 @@ void nfs_fscache_init_inode(struct inode *inode) return; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); - auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); + auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; + auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; + auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; + auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); @@ -263,8 +265,10 @@ void nfs_fscache_clear_inode(struct inode *inode) dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); - auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); + auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; + auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; + auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; + auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; fscache_relinquish_cookie(cookie, &auxdata, false); nfsi->fscache = NULL; } @@ -305,8 +309,10 @@ void nfs_fscache_open_file(struct inode *inode, struct file *filp) return; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); - auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); + auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; + auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; + auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; + auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; if (inode_is_open_for_write(inode)) { dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index ad041cfbf9ec..6754c8607230 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -62,9 +62,11 @@ struct nfs_fscache_key { * cache object. */ struct nfs_fscache_inode_auxdata { - struct timespec mtime; - struct timespec ctime; - u64 change_attr; + s64 mtime_sec; + s64 mtime_nsec; + s64 ctime_sec; + s64 ctime_nsec; + u64 change_attr; }; /* From e5189e9a51172de650b263f0f8bc87a02d728c58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Nov 2019 21:16:26 +0100 Subject: [PATCH 113/658] nfs: remove timespec from xdr_encode_nfstime For NFSv2 and NFSv3, timestamps are stored using 32-bit entities and overflow in y2038. For historic reasons we truncate the 64-bit timestamps by converting from a timespec64 to a timespec first. Remove this unnecessary conversion step and do the truncation in the final functions that take a timestamp. This is transparent to users, but avoids one of the last uses of 'timespec' and lets us remove it later. Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- fs/nfs/nfs2xdr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index d94c7abdf25a..f6676af37d5d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -360,17 +360,17 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr, else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); - if (attr->ia_valid & ATTR_ATIME_SET) { + if (attr->ia_valid & ATTR_ATIME_SET) p = xdr_encode_time(p, &attr->ia_atime); - } else if (attr->ia_valid & ATTR_ATIME) { + else if (attr->ia_valid & ATTR_ATIME) p = xdr_encode_current_server_time(p, &attr->ia_atime); - } else + else p = xdr_time_not_set(p); - if (attr->ia_valid & ATTR_MTIME_SET) { + if (attr->ia_valid & ATTR_MTIME_SET) xdr_encode_time(p, &attr->ia_mtime); - } else if (attr->ia_valid & ATTR_MTIME) { + else if (attr->ia_valid & ATTR_MTIME) xdr_encode_current_server_time(p, &attr->ia_mtime); - } else + else xdr_time_not_set(p); } From a3167dacbad3a42dffb78cca8684fafcd94575f9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Nov 2019 21:16:27 +0100 Subject: [PATCH 114/658] nfs: encode nfsv4 timestamps as 64-bit On 32-bit architectures, xdr_encode_nfstime4() needlessly truncates timestamps to a 32-bit value in the range between year 1902 and 2038. Change it to use 'struct timespec64' to allow the entire range of values supported by the server. Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 728d88b6a698..8de4d250924b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1061,7 +1061,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve static __be32 * xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t) { - p = xdr_encode_hyper(p, (__s64)t->tv_sec); + p = xdr_encode_hyper(p, t->tv_sec); *p++ = cpu_to_be32(t->tv_nsec); return p; } From 8b98a532481d6f2075b460dbc157f0502f1803a8 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Thu, 19 Dec 2019 18:34:47 +0800 Subject: [PATCH 115/658] NFS4: Remove unneeded semicolon Fixes coccicheck warning: fs/nfs/nfs4state.c:1138:2-3: Unneeded semicolon fs/nfs/nfs4proc.c:6862:2-3: Unneeded semicolon fs/nfs/nfs4proc.c:8629:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/nfs4state.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5f51f62b0652..904335b91b6a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6859,7 +6859,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ case -NFS4ERR_STALE_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; nfs4_schedule_lease_recovery(server->nfs_client); - }; + } } static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) @@ -8626,7 +8626,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, case -EACCES: case -EAGAIN: goto out; - }; + } clp->cl_seqid++; if (!status) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 34552329233d..fe1b908eecc8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1135,7 +1135,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; - }; + } /* * Note: no locking needed as we are guaranteed to be first * on the sequence list From 6ed2144a80ebd9e40fa15a369f5528bafbc9b50d Mon Sep 17 00:00:00 2001 From: zhengbin Date: Thu, 19 Dec 2019 14:14:18 +0800 Subject: [PATCH 116/658] NFS: move dprintk after nfs_alloc_fattr in nfs3_proc_lookup In nfs3_proc_lookup, if nfs_alloc_fattr fails, will only print "NFS call lookup". This may be confusing, move dprintk after nfs_alloc_fattr. Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 657041c3a03f..67a05f35bb89 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -169,11 +169,11 @@ nfs3_proc_lookup(struct inode *dir, const struct qstr *name, }; int status; - dprintk("NFS call lookup %s\n", name->name); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; + dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_refresh_inode(dir, res.dir_attr); From abf8af78a61523c15d366228b4a598141208a264 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Dec 2019 10:28:28 -0500 Subject: [PATCH 117/658] SUNRPC: Capture signalled RPC tasks Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/sched.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 8c73ffb5f7fd..ee993575d2fa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -185,6 +185,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); DECLARE_EVENT_CLASS(rpc_task_queued, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9c79548c6847..55e900255b0c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -846,6 +846,8 @@ void rpc_signal_task(struct rpc_task *task) if (!RPC_IS_ACTIVATED(task)) return; + + trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); @@ -949,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task) * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - dprintk("RPC: %5u got signal\n", task->tk_pid); + trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); task->tk_rpc_status = -ERESTARTSYS; rpc_exit(task, -ERESTARTSYS); From 861e1671bc2eed063aa624cdb2be8cfff16331c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Dec 2019 10:28:33 -0500 Subject: [PATCH 118/658] NFS: Introduce trace events triggered by page writeback errors Try to capture the reason for the writeback path tagging an error on a page. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 3 +++ 2 files changed, 48 insertions(+) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index f64a33d2a1d1..4d6eb1703943 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -989,6 +989,51 @@ TRACE_EVENT(nfs_writeback_done, ) ); +DECLARE_EVENT_CLASS(nfs_page_error_class, + TP_PROTO( + const struct nfs_page *req, + int error + ), + + TP_ARGS(req, error), + + TP_STRUCT__entry( + __field(const void *, req) + __field(pgoff_t, index) + __field(unsigned int, offset) + __field(unsigned int, pgbase) + __field(unsigned int, bytes) + __field(int, error) + ), + + TP_fast_assign( + __entry->req = req; + __entry->index = req->wb_index; + __entry->offset = req->wb_offset; + __entry->pgbase = req->wb_pgbase; + __entry->bytes = req->wb_bytes; + __entry->error = error; + ), + + TP_printk( + "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d", + __entry->req, __entry->index, __entry->offset, + __entry->pgbase, __entry->bytes, __entry->error + ) +); + +#define DEFINE_NFS_PAGEERR_EVENT(name) \ + DEFINE_EVENT(nfs_page_error_class, name, \ + TP_PROTO( \ + const struct nfs_page *req, \ + int error \ + ), \ + TP_ARGS(req, error)) + +DEFINE_NFS_PAGEERR_EVENT(nfs_write_error); +DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error); +DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error); + TRACE_EVENT(nfs_initiate_commit, TP_PROTO( const struct nfs_commit_data *data diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 52cab65f91cf..21787711e352 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -593,6 +593,7 @@ release_request: static void nfs_write_error(struct nfs_page *req, int error) { nfs_set_pageerror(page_file_mapping(req->wb_page)); + trace_nfs_write_error(req, error); nfs_mapping_set_error(req->wb_page, error); nfs_inode_remove_request(req); nfs_end_page_writeback(req); @@ -999,6 +1000,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { nfs_set_pageerror(page_file_mapping(req->wb_page)); + trace_nfs_comp_error(req, hdr->error); nfs_mapping_set_error(req->wb_page, hdr->error); goto remove_req; } @@ -1847,6 +1849,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { if (req->wb_page) { + trace_nfs_commit_error(req, status); nfs_mapping_set_error(req->wb_page, status); nfs_inode_remove_request(req); } From 2bb50aabb6f32c11dc9b542b4bb214b14e031e6c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Dec 2019 10:28:38 -0500 Subject: [PATCH 119/658] NFS4: Report callback authentication errors This seems to be a somewhat common issue with Kerberos NFSv4.0 set-ups. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 11 ++++++++--- fs/nfs/nfs4trace.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 03a20f5716c7..79ff172eb1c8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,6 +18,7 @@ #include "callback.h" #include "internal.h" #include "nfs4session.h" +#include "nfs4trace.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status @@ -946,9 +947,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) if (hdr_arg.minorversion == 0) { cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); - if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) { - if (cps.clp) - nfs_put_client(cps.clp); + if (!cps.clp) { + trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident); + goto out_invalidcred; + } + if (!check_gss_callback_principal(cps.clp, rqstp)) { + trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident); + nfs_put_client(cps.clp); goto out_invalidcred; } } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index e60b6fbd5ada..e3586c16ef59 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -691,6 +691,41 @@ TRACE_EVENT(nfs4_xdr_status, ) ); +DECLARE_EVENT_CLASS(nfs4_cb_error_class, + TP_PROTO( + __be32 xid, + u32 cb_ident + ), + + TP_ARGS(xid, cb_ident), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, cbident) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(xid); + __entry->cbident = cb_ident; + ), + + TP_printk( + "xid=0x%08x cb_ident=0x%08x", + __entry->xid, __entry->cbident + ) +); + +#define DEFINE_CB_ERROR_EVENT(name) \ + DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \ + TP_PROTO( \ + __be32 xid, \ + u32 cb_ident \ + ), \ + TP_ARGS(xid, cb_ident)) + +DEFINE_CB_ERROR_EVENT(no_clp); +DEFINE_CB_ERROR_EVENT(badprinc); + DECLARE_EVENT_CLASS(nfs4_open_event, TP_PROTO( const struct nfs_open_context *ctx, From b8457606d95f219052cbcf07bd54c24c4290cd49 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Dec 2019 10:28:44 -0500 Subject: [PATCH 120/658] SUNRPC: call_connect_status should handle -EPROTO The xprtrdma connect logic can return -EPROTO if the underlying device or network path does not support RDMA. This can happen after a device removal/insertion. - When SOFTCONN is set, EPROTO is a permanent error. - When SOFTCONN is not set, EPROTO is treated as a temporary error. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a3379765605d..7324b21f923e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2130,6 +2130,7 @@ call_connect_status(struct rpc_task *task) case -ENETUNREACH: case -EHOSTUNREACH: case -EPIPE: + case -EPROTO: xprt_conditional_disconnect(task->tk_rqstp->rq_xprt, task->tk_rqstp->rq_connect_cookie); if (RPC_IS_SOFTCONN(task)) From bd89bc67f64fa23846645ce8b1c3cb51efebab32 Mon Sep 17 00:00:00 2001 From: Murphy Zhou Date: Thu, 2 Jan 2020 16:04:26 +0800 Subject: [PATCH 121/658] fs/nfs, swapon: check holes in swapfile swapon over NFS does not go through generic_swapfile_activate code path when setting up extents. This makes holes in NFS swapfiles possible which is not expected for swapon. Signed-off-by: Murphy Zhou Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8eb731d9be3e..ccd9bc098806 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -489,7 +489,19 @@ static int nfs_launder_page(struct page *page) static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { + unsigned long blocks; + long long isize; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); + struct inode *inode = file->f_mapping->host; + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } *span = sis->pages; From c2bd2c0a55dd36c16b25a8fd93aa1053b576b72c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 1 Jan 2020 08:43:30 +0100 Subject: [PATCH 122/658] SUNRPC: constify copied structure The empty_iov structure is only copied into another structure, so make it const. The opportunity for this change was found using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f3104be8ff5d..e5497dc2475b 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1079,7 +1079,7 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_enter_page); -static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; +static const struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) From 474c4f306eefbb21b67ebd1de802d005c7d7ecdc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 30 Dec 2019 16:32:38 +0100 Subject: [PATCH 123/658] nfs: NFS_SWAP should depend on SWAP If CONFIG_SWAP=n, it does not make much sense to offer the user the option to enable support for swapping over NFS, as that will still fail at run time: # swapon /swap swapon: /swap: swapon failed: Function not implemented Fix this by adding a dependency on CONFIG_SWAP. Fixes: a564b8f0398636ba ("nfs: enable swap on NFS") Signed-off-by: Geert Uytterhoeven Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 295a7a21b774..e7dd07f47825 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -90,7 +90,7 @@ config NFS_V4 config NFS_SWAP bool "Provide swap over NFS support" default n - depends on NFS_FS + depends on NFS_FS && SWAP select SUNRPC_SWAP help This option enables swapon to work on files located on NFS mounts. From e0b27d98bfed9cd9a22f96996bab24858646883d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jan 2020 13:17:34 +0000 Subject: [PATCH 124/658] NFS: Add missing null check for failed allocation Currently the allocation of buf is not being null checked and a null pointer dereference can occur when the memory allocation fails. Fix this by adding a check and returning -ENOMEM. Addresses-Coverity: ("Dereference null return") Fixes: 6d972518b821 ("NFS: Add fs_context support.") Signed-off-by: Colin Ian King Signed-off-by: Anna Schumaker --- fs/nfs/nfs4namespace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 10e9e1887841..de6875a9b391 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -137,6 +137,9 @@ static int nfs4_validate_fspath(struct dentry *dentry, int n; buf = kmalloc(4096, GFP_KERNEL); + if (!buf) + return -ENOMEM; + path = nfs4_path(dentry, buf, 4096); if (IS_ERR(path)) { kfree(buf); From 2e87036814290887a188652a893ab968bad9fad7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:27 -0500 Subject: [PATCH 125/658] xprtrdma: Eliminate ri_max_send_sges Clean-up. The max_send_sge value also happens to be stored in ep->rep_attr. Let's keep just a single copy. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 10 ++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 17 +++-------------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 523722be6a16..859c301d9d30 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -190,6 +190,16 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) { struct ib_device_attr *attrs = &ia->ri_id->device->attrs; int max_qp_wr, depth, delta; + unsigned int max_sge; + + max_sge = min_t(unsigned int, attrs->max_send_sge, + RPCRDMA_MAX_SEND_SGES); + if (max_sge < RPCRDMA_MIN_SEND_SGES) { + pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge); + return -ENOMEM; + } + ep->rep_attr.cap.max_send_sge = max_sge; + ep->rep_attr.cap.max_recv_sge = 1; ia->ri_mrtype = IB_MR_TYPE_MEM_REG; if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index aec3beb93b25..af917228d245 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -145,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, remaining -= min_t(unsigned int, PAGE_SIZE - offset, remaining); offset = 0; - if (++count > r_xprt->rx_ia.ri_max_send_sges) + if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge) return false; } } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fda3889993cb..945603662419 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -470,21 +470,12 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; struct ib_cq *sendcq, *recvcq; - unsigned int max_sge; int rc; ep->rep_max_requests = xprt_rdma_slot_table_entries; ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; - max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge, - RPCRDMA_MAX_SEND_SGES); - if (max_sge < RPCRDMA_MIN_SEND_SGES) { - pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); - return -ENOMEM; - } - ia->ri_max_send_sges = max_sge; - rc = frwr_open(ia, ep); if (rc) return rc; @@ -492,8 +483,6 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->rep_attr.event_handler = rpcrdma_qp_event_handler; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; - ep->rep_attr.cap.max_send_sge = max_sge; - ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; @@ -796,11 +785,11 @@ static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_sc_ctxs); } -static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) +static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) { struct rpcrdma_sendctx *sc; - sc = kzalloc(struct_size(sc, sc_sges, ia->ri_max_send_sges), + sc = kzalloc(struct_size(sc, sc_sges, ep->rep_attr.cap.max_send_sge), GFP_KERNEL); if (!sc) return NULL; @@ -828,7 +817,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) buf->rb_sc_last = i - 1; for (i = 0; i <= buf->rb_sc_last; i++) { - sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); + sc = rpcrdma_sendctx_create(&r_xprt->rx_ep); if (!sc) return -ENOMEM; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d796d68609ed..7655a99fd559 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -73,7 +73,6 @@ struct rpcrdma_ia { int ri_async_rc; unsigned int ri_max_segs; unsigned int ri_max_frwr_depth; - unsigned int ri_max_send_sges; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; unsigned long ri_flags; From cb586decbb88fcd068116af2d4e1e3e2e86978d6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:32 -0500 Subject: [PATCH 126/658] xprtrdma: Make sendctx queue lifetime the same as connection lifetime The size of the sendctx queue depends on the value stored in ia->ri_max_send_sges. This value is determined by querying the underlying device. Eventually, rpcrdma_ia_open() and rpcrdma_ep_create() will be called in the connect worker rather than at transport set-up time. The underlying device will not have been chosen device set-up time. The sendctx queue will thus have to be created after the underlying device has been chosen via address and route resolution; in other words, in the connect worker. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 12 ++++++++---- net/sunrpc/xprtrdma/verbs.c | 22 +++++++++++++++------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 18790582d2a5..c0e4c93324f5 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -729,6 +729,7 @@ TRACE_EVENT(xprtrdma_post_send, TP_STRUCT__entry( __field(const void *, req) + __field(const void *, sc) __field(unsigned int, task_id) __field(unsigned int, client_id) __field(int, num_sge) @@ -743,14 +744,15 @@ TRACE_EVENT(xprtrdma_post_send, __entry->client_id = rqst->rq_task->tk_client ? rqst->rq_task->tk_client->cl_clid : -1; __entry->req = req; + __entry->sc = req->rl_sendctx; __entry->num_sge = req->rl_wr.num_sge; __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; __entry->status = status; ), - TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d", + TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %sstatus=%d", __entry->task_id, __entry->client_id, - __entry->req, __entry->num_sge, + __entry->req, __entry->sc, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), (__entry->signaled ? "signaled " : ""), __entry->status @@ -849,6 +851,7 @@ TRACE_EVENT(xprtrdma_wc_send, TP_STRUCT__entry( __field(const void *, req) + __field(const void *, sc) __field(unsigned int, unmap_count) __field(unsigned int, status) __field(unsigned int, vendor_err) @@ -856,13 +859,14 @@ TRACE_EVENT(xprtrdma_wc_send, TP_fast_assign( __entry->req = sc->sc_req; + __entry->sc = sc; __entry->unmap_count = sc->sc_unmap_count; __entry->status = wc->status; __entry->vendor_err = __entry->status ? wc->vendor_err : 0; ), - TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)", - __entry->req, __entry->unmap_count, + TP_printk("req=%p sc=%p unmapped=%u: %s (%u/0x%x)", + __entry->req, __entry->sc, __entry->unmap_count, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 945603662419..b6aba0c85998 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -74,6 +74,8 @@ /* * internal functions */ +static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, struct rpcrdma_sendctx *sc); static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); @@ -428,6 +430,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); } rpcrdma_mrs_destroy(r_xprt); + rpcrdma_sendctxs_destroy(r_xprt); ib_dealloc_pd(ia->ri_pd); ia->ri_pd = NULL; @@ -705,6 +708,10 @@ retry: rpcrdma_reset_cwnd(r_xprt); rpcrdma_post_recvs(r_xprt, true); + rc = rpcrdma_sendctxs_create(r_xprt); + if (rc) + goto out; + rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); if (rc) goto out; @@ -757,6 +764,7 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) rpcrdma_xprt_drain(r_xprt); rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); + rpcrdma_sendctxs_destroy(r_xprt); } /* Fixed-size circular FIFO queue. This implementation is wait-free and @@ -776,13 +784,17 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) * queue activity, and rpcrdma_xprt_drain has flushed all remaining * Send requests. */ -static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; unsigned long i; + if (!buf->rb_sc_ctxs) + return; for (i = 0; i <= buf->rb_sc_last; i++) kfree(buf->rb_sc_ctxs[i]); kfree(buf->rb_sc_ctxs); + buf->rb_sc_ctxs = NULL; } static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) @@ -810,7 +822,6 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * Sends are posted. */ i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; - dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i); buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); if (!buf->rb_sc_ctxs) return -ENOMEM; @@ -824,6 +835,8 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) buf->rb_sc_ctxs[i] = sc; } + buf->rb_sc_head = 0; + buf->rb_sc_tail = 0; return 0; } @@ -1166,10 +1179,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) init_llist_head(&buf->rb_free_reps); - rc = rpcrdma_sendctxs_create(r_xprt); - if (rc) - goto out; - return 0; out: rpcrdma_buffer_destroy(buf); @@ -1245,7 +1254,6 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { - rpcrdma_sendctxs_destroy(buf); rpcrdma_reps_destroy(buf); while (!list_empty(&buf->rb_send_bufs)) { From 7581d90109cad7d7322fd90cea023c706912f4bd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:37 -0500 Subject: [PATCH 127/658] xprtrdma: Refactor initialization of ep->rep_max_requests Clean up: there is no need to keep two copies of the same value. Also, in subsequent patches, rpcrdma_ep_create() will be called in the connect worker rather than at set-up time. Minor fix: Initialize the transport's sendctx to the value based on the capabilities of the underlying device, not the maximum setting. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 6 +++--- net/sunrpc/xprtrdma/transport.c | 3 ++- net/sunrpc/xprtrdma/verbs.c | 8 ++++---- net/sunrpc/xprtrdma/xprt_rdma.h | 5 ++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index af917228d245..520323ddc930 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -909,7 +909,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) goto out_err; *p++ = rqst->rq_xid; *p++ = rpcrdma_version; - *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); + *p++ = r_xprt->rx_buf.rb_max_requests; /* When the ULP employs a GSS flavor that guarantees integrity * or privacy, direct data placement of individual data items @@ -1480,8 +1480,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) if (credits == 0) credits = 1; /* don't deadlock */ - else if (credits > buf->rb_max_requests) - credits = buf->rb_max_requests; + else if (credits > r_xprt->rx_ep.rep_max_requests) + credits = r_xprt->rx_ep.rep_max_requests; if (buf->rb_credits != credits) rpcrdma_update_cwnd(r_xprt, credits); rpcrdma_post_recvs(r_xprt, false); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7395eb2cfdeb..f868a75057ad 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -316,7 +316,8 @@ xprt_setup_rdma(struct xprt_create *args) if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); - xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, + xprt_rdma_slot_table_entries); if (!xprt) return ERR_PTR(-ENOMEM); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b6aba0c85998..766e77592cfd 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -475,13 +475,14 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct ib_cq *sendcq, *recvcq; int rc; - ep->rep_max_requests = xprt_rdma_slot_table_entries; + ep->rep_max_requests = r_xprt->rx_xprt.max_reqs; ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; rc = frwr_open(ia, ep); if (rc) return rc; + r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests); ep->rep_attr.event_handler = rpcrdma_qp_event_handler; ep->rep_attr.qp_context = ep; @@ -821,7 +822,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * the ->send_request call to fail temporarily before too many * Sends are posted. */ - i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; + i = r_xprt->rx_ep.rep_max_requests + RPCRDMA_MAX_BC_REQUESTS; buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); if (!buf->rb_sc_ctxs) return -ENOMEM; @@ -1155,7 +1156,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; int i, rc; - buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; buf->rb_bc_srv_max_requests = 0; spin_lock_init(&buf->rb_lock); INIT_LIST_HEAD(&buf->rb_mrs); @@ -1167,7 +1167,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_all_reps); rc = -ENOMEM; - for (i = 0; i < buf->rb_max_requests; i++) { + for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { struct rpcrdma_req *req; req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 7655a99fd559..0fde694144f5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -98,7 +98,7 @@ struct rpcrdma_ep { wait_queue_head_t rep_connect_wait; struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; - unsigned int rep_max_requests; /* set by /proc */ + unsigned int rep_max_requests; /* depends on device */ unsigned int rep_inline_send; /* negotiated */ unsigned int rep_inline_recv; /* negotiated */ int rep_receive_count; @@ -372,7 +372,7 @@ struct rpcrdma_buffer { struct llist_head rb_free_reps; - u32 rb_max_requests; + __be32 rb_max_requests; u32 rb_credits; /* most recent credit grant */ u32 rb_bc_srv_max_requests; @@ -582,7 +582,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) /* RPC/RDMA module init - xprtrdma/transport.c */ -extern unsigned int xprt_rdma_slot_table_entries; extern unsigned int xprt_rdma_max_inline_read; extern unsigned int xprt_rdma_max_inline_write; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); From 18d065a5d4f16eeefb690c298671c3f9131121fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:43 -0500 Subject: [PATCH 128/658] xprtrdma: Eliminate per-transport "max pages" To support device hotplug and migrating a connection between devices of different capabilities, we have to guarantee that all in-kernel devices can support the same max NFS payload size (1 megabyte). This means that possibly one or two in-tree devices are no longer supported for NFS/RDMA because they cannot support 1MB rsize/wsize. The only one I confirmed was cxgb3, but it has already been removed from the kernel. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 40 +++++++++++++-------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/transport.c | 14 ++++-------- net/sunrpc/xprtrdma/verbs.c | 4 ++-- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +-- 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 859c301d9d30..032a89656f75 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -178,7 +178,7 @@ out_list_err: * ep->rep_attr.cap.max_send_wr * ep->rep_attr.cap.max_recv_wr * ep->rep_max_requests - * ia->ri_max_segs + * ia->ri_max_rdma_segs * * And these FRWR-related fields: * ia->ri_max_frwr_depth @@ -209,14 +209,12 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) * capability, but perform optimally when the MRs are not larger * than a page. */ - if (attrs->max_sge_rd > 1) + if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS) ia->ri_max_frwr_depth = attrs->max_sge_rd; else ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len; if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS) ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS; - dprintk("RPC: %s: max FR page list depth = %u\n", - __func__, ia->ri_max_frwr_depth); /* Add room for frwr register and invalidate WRs. * 1. FRWR reg WR for head @@ -260,32 +258,24 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ - ia->ri_max_segs = + ia->ri_max_rdma_segs = DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth); /* Reply chunks require segments for head and tail buffers */ - ia->ri_max_segs += 2; - if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) - ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS; + ia->ri_max_rdma_segs += 2; + if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS) + ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS; + + /* Ensure the underlying device is capable of conveying the + * largest r/wsize NFS will ask for. This guarantees that + * failing over from one RDMA device to another will not + * break NFS I/O. + */ + if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS) + return -ENOMEM; + return 0; } -/** - * frwr_maxpages - Compute size of largest payload - * @r_xprt: transport - * - * Returns maximum size of an RPC message, in pages. - * - * FRWR mode conveys a list of pages per chunk segment. The - * maximum length of that list is the FRWR page list depth. - */ -size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - - return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth); -} - /** * frwr_map - Register a memory region * @r_xprt: controlling transport diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 520323ddc930..c6dcea06c754 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -111,7 +111,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) */ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) { - unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs; + unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs; struct rpcrdma_ep *ep = &r_xprt->rx_ep; ep->rep_max_inline_send = diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index f868a75057ad..3cfeba68ee9a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -359,19 +359,13 @@ xprt_setup_rdma(struct xprt_create *args) if (rc) goto out3; - INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, - xprt_rdma_connect_worker); - - xprt->max_payload = frwr_maxpages(new_xprt); - if (xprt->max_payload == 0) - goto out4; - xprt->max_payload <<= PAGE_SHIFT; - dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", - __func__, xprt->max_payload); - if (!try_module_get(THIS_MODULE)) goto out4; + INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, + xprt_rdma_connect_worker); + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; + dprintk("RPC: %s: %s:%s\n", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 766e77592cfd..21fc5766dcde 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -936,7 +936,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; unsigned int count; - for (count = 0; count < ia->ri_max_segs; count++) { + for (count = 0; count < ia->ri_max_rdma_segs; count++) { struct rpcrdma_mr *mr; int rc; @@ -1018,7 +1018,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, /* Compute maximum header buffer size in bytes */ maxhdrsize = rpcrdma_fixed_maxsz + 3 + - r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz; + r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz; maxhdrsize *= sizeof(__be32); rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), DMA_TO_DEVICE, flags); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0fde694144f5..aac4cf959c3a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -71,7 +71,7 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; int ri_async_rc; - unsigned int ri_max_segs; + unsigned int ri_max_rdma_segs; unsigned int ri_max_frwr_depth; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; @@ -539,7 +539,6 @@ void frwr_reset(struct rpcrdma_req *req); int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); void frwr_release_mr(struct rpcrdma_mr *mr); -size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, __be32 xid, From 25868e610aed20e06f6ff10a562a04e8aaea5a5e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:48 -0500 Subject: [PATCH 129/658] xprtrdma: Refactor frwr_is_supported Refactor: Perform the "is supported" check in rpcrdma_ep_create() instead of in rpcrdma_ia_open(). frwr_open() is where most of the logic to query device attributes is already located. The current code displays a redundant error message when the device does not support FRWR. As an additional clean-up, this patch removes the extra message. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 54 ++++++++++++++------------------- net/sunrpc/xprtrdma/verbs.c | 14 +-------- net/sunrpc/xprtrdma/xprt_rdma.h | 4 +-- 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 032a89656f75..095be887753e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -50,28 +50,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -/** - * frwr_is_supported - Check if device supports FRWR - * @device: interface adapter to check - * - * Returns true if device supports FRWR, otherwise false - */ -bool frwr_is_supported(struct ib_device *device) -{ - struct ib_device_attr *attrs = &device->attrs; - - if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) - goto out_not_supported; - if (attrs->max_fast_reg_page_list_len == 0) - goto out_not_supported; - return true; - -out_not_supported: - pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", - device->name); - return false; -} - /** * frwr_release_mr - Destroy one MR * @mr: MR allocated by frwr_init_mr @@ -170,13 +148,12 @@ out_list_err: } /** - * frwr_open - Prepare an endpoint for use with FRWR - * @ia: interface adapter this endpoint will use - * @ep: endpoint to prepare + * frwr_query_device - Prepare a transport for use with FRWR + * @r_xprt: controlling transport instance + * @device: RDMA device to query * * On success, sets: - * ep->rep_attr.cap.max_send_wr - * ep->rep_attr.cap.max_recv_wr + * ep->rep_attr * ep->rep_max_requests * ia->ri_max_rdma_segs * @@ -184,14 +161,27 @@ out_list_err: * ia->ri_max_frwr_depth * ia->ri_mrtype * - * On failure, a negative errno is returned. + * Return values: + * On success, returns zero. + * %-EINVAL - the device does not support FRWR memory registration + * %-ENOMEM - the device is not sufficiently capable for NFS/RDMA */ -int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) +int frwr_query_device(struct rpcrdma_xprt *r_xprt, + const struct ib_device *device) { - struct ib_device_attr *attrs = &ia->ri_id->device->attrs; + const struct ib_device_attr *attrs = &device->attrs; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; int max_qp_wr, depth, delta; unsigned int max_sge; + if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) || + attrs->max_fast_reg_page_list_len == 0) { + pr_err("rpcrdma: 'frwr' mode is not supported by device %s\n", + device->name); + return -EINVAL; + } + max_sge = min_t(unsigned int, attrs->max_send_sge, RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { @@ -238,7 +228,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) } while (delta > 0); } - max_qp_wr = ia->ri_id->device->attrs.max_qp_wr; + max_qp_wr = attrs->max_qp_wr; max_qp_wr -= RPCRDMA_BACKWARD_WRS; max_qp_wr -= 1; if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) @@ -249,7 +239,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { ep->rep_max_requests = max_qp_wr / depth; if (!ep->rep_max_requests) - return -EINVAL; + return -ENOMEM; ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth; } ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 21fc5766dcde..dcb2073ec5bd 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -368,18 +368,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) goto out_err; } - switch (xprt_rdma_memreg_strategy) { - case RPCRDMA_FRWR: - if (frwr_is_supported(ia->ri_id->device)) - break; - /*FALLTHROUGH*/ - default: - pr_err("rpcrdma: Device %s does not support memreg mode %d\n", - ia->ri_id->device->name, xprt_rdma_memreg_strategy); - rc = -EINVAL; - goto out_err; - } - return 0; out_err: @@ -479,7 +467,7 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; - rc = frwr_open(ia, ep); + rc = frwr_query_device(r_xprt, ia->ri_id->device); if (rc) return rc; r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index aac4cf959c3a..0aed1e98f2bf 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -534,9 +534,9 @@ rpcrdma_data_dir(bool writing) /* Memory registration calls xprtrdma/frwr_ops.c */ -bool frwr_is_supported(struct ib_device *device); void frwr_reset(struct rpcrdma_req *req); -int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); +int frwr_query_device(struct rpcrdma_xprt *r_xprt, + const struct ib_device *device); int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); void frwr_release_mr(struct rpcrdma_mr *mr); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, From b78de1dca00376aaba7a58bb5fe21c1606524abe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:53 -0500 Subject: [PATCH 130/658] xprtrdma: Allocate and map transport header buffers at connect time Currently the underlying RDMA device is chosen at transport set-up time. But it will soon be at connect time instead. The maximum size of a transport header is based on device capabilities. Thus transport header buffers have to be allocated _after_ the underlying device has been chosen (via address and route resolution); ie, in the connect worker. Thus, move the allocation of transport header buffers to the connect worker, after the point at which the underlying RDMA device has been chosen. This also means the RDMA device is available to do a DMA mapping of these buffers at connect time, instead of in the hot I/O path. Make that optimization as well. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 4 ++ net/sunrpc/xprtrdma/rpc_rdma.c | 10 +-- net/sunrpc/xprtrdma/verbs.c | 108 +++++++++++++++++++++--------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 4 files changed, 85 insertions(+), 38 deletions(-) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 9d02eae353c6..1a0ae0c61353 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -194,6 +194,10 @@ create_req: req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); if (!req) return NULL; + if (rpcrdma_req_setup(r_xprt, req)) { + rpcrdma_req_destroy(req); + return NULL; + } xprt->bc_alloc_count++; rqst = &req->rl_slot; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c6dcea06c754..28020ec104d4 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -580,22 +580,19 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc) /* Prepare an SGE for the RPC-over-RDMA transport header. */ -static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, +static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, u32 len) { struct rpcrdma_sendctx *sc = req->rl_sendctx; struct rpcrdma_regbuf *rb = req->rl_rdmabuf; struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) - return false; sge->addr = rdmab_addr(rb); sge->length = len; sge->lkey = rdmab_lkey(rb); ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, DMA_TO_DEVICE); - return true; } /* The head iovec is straightforward, as it is usually already @@ -836,10 +833,9 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, req->rl_wr.num_sge = 0; req->rl_wr.opcode = IB_WR_SEND; - ret = -EIO; - if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) - goto out_unmap; + rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen); + ret = -EIO; switch (rtype) { case rpcrdma_noch_pullup: if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr)) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index dcb2073ec5bd..90c215beef06 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -78,6 +78,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, struct rpcrdma_sendctx *sc); +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); @@ -381,6 +382,8 @@ out_err: * * Divest transport H/W resources associated with this adapter, * but allow it to be restored later. + * + * Caller must hold the transport send lock. */ void rpcrdma_ia_remove(struct rpcrdma_ia *ia) @@ -388,8 +391,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_req *req; /* This is similar to rpcrdma_ep_destroy, but: * - Don't cancel the connect worker. @@ -412,11 +413,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) * mappings and MRs are gone. */ rpcrdma_reps_unmap(r_xprt); - list_for_each_entry(req, &buf->rb_allreqs, rl_all) { - rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf); - rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); - rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); - } + rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); ib_dealloc_pd(ia->ri_pd); @@ -715,6 +712,11 @@ retry: goto out; } + rc = rpcrdma_reqs_setup(r_xprt); + if (rc) { + rpcrdma_ep_disconnect(ep, ia); + goto out; + } rpcrdma_mrs_create(r_xprt); out: @@ -996,32 +998,19 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, gfp_t flags) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; - struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; - size_t maxhdrsize; req = kzalloc(sizeof(*req), flags); if (req == NULL) goto out1; - /* Compute maximum header buffer size in bytes */ - maxhdrsize = rpcrdma_fixed_maxsz + 3 + - r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz; - maxhdrsize *= sizeof(__be32); - rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), - DMA_TO_DEVICE, flags); - if (!rb) - goto out2; - req->rl_rdmabuf = rb; - xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); - req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); if (!req->rl_sendbuf) - goto out3; + goto out2; req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); if (!req->rl_recvbuf) - goto out4; + goto out3; INIT_LIST_HEAD(&req->rl_free_mrs); INIT_LIST_HEAD(&req->rl_registered); @@ -1030,10 +1019,8 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, spin_unlock(&buffer->rb_lock); return req; -out4: - kfree(req->rl_sendbuf); out3: - kfree(req->rl_rdmabuf); + kfree(req->rl_sendbuf); out2: kfree(req); out1: @@ -1041,10 +1028,71 @@ out1: } /** - * rpcrdma_reqs_reset - Reset all reqs owned by a transport + * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object * @r_xprt: controlling transport instance + * @req: rpcrdma_req object to set up * - * ASSUMPTION: the rb_allreqs list is stable for the duration, + * Returns zero on success, and a negative errno on failure. + */ +int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) +{ + struct rpcrdma_regbuf *rb; + size_t maxhdrsize; + + /* Compute maximum header buffer size in bytes */ + maxhdrsize = rpcrdma_fixed_maxsz + 3 + + r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz; + maxhdrsize *= sizeof(__be32); + rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), + DMA_TO_DEVICE, GFP_KERNEL); + if (!rb) + goto out; + + if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) + goto out_free; + + req->rl_rdmabuf = rb; + xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); + return 0; + +out_free: + rpcrdma_regbuf_free(rb); +out: + return -ENOMEM; +} + +/* ASSUMPTION: the rb_allreqs list is stable for the duration, + * and thus can be walked without holding rb_lock. Eg. the + * caller is holding the transport send lock to exclude + * device removal or disconnection. + */ +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_req *req; + int rc; + + list_for_each_entry(req, &buf->rb_allreqs, rl_all) { + rc = rpcrdma_req_setup(r_xprt, req); + if (rc) + return rc; + } + return 0; +} + +static void rpcrdma_req_reset(struct rpcrdma_req *req) +{ + /* Credits are valid for only one connection */ + req->rl_slot.rq_cong = 0; + + rpcrdma_regbuf_free(req->rl_rdmabuf); + req->rl_rdmabuf = NULL; + + rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); + rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); +} + +/* ASSUMPTION: the rb_allreqs list is stable for the duration, * and thus can be walked without holding rb_lock. Eg. the * caller is holding the transport send lock to exclude * device removal or disconnection. @@ -1054,10 +1102,8 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_req *req; - list_for_each_entry(req, &buf->rb_allreqs, rl_all) { - /* Credits are valid only for one connection */ - req->rl_slot.rq_cong = 0; - } + list_for_each_entry(req, &buf->rb_allreqs, rl_all) + rpcrdma_req_reset(req); } static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0aed1e98f2bf..37d5080c250b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -478,6 +478,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); */ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, gfp_t flags); +int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); From 85810388a9ddcc8e82738a3df6d3d7b32a79e0ea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:56:58 -0500 Subject: [PATCH 131/658] xprtrdma: Destroy rpcrdma_rep when Receive is flushed This reduces the hardware and memory footprint of an unconnected transport. At some point in the future, transport reconnect will allow resolving the destination IP address through a different device. The current change enables reps for the new connection to be allocated on whichever NUMA node the new device affines to after a reconnect. Note that this does not destroy _all_ the transport's reps... there will be a few that are still part of a running RPC completion. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 90c215beef06..52481e70891a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -80,6 +80,7 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, struct rpcrdma_sendctx *sc); static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); @@ -177,7 +178,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: - rpcrdma_recv_buffer_put(rep); + rpcrdma_rep_destroy(rep); } static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, @@ -1106,6 +1107,9 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) rpcrdma_req_reset(req); } +/* No locking needed here. This function is called only by the + * Receive completion handler. + */ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) { @@ -1138,6 +1142,9 @@ out: return NULL; } +/* No locking needed here. This function is invoked only by the + * Receive completion handler, or during transport shutdown. + */ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) { list_del(&rep->rr_all); From b7ff0185e92a68e77da7edff38e124dfb25b079c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:57:04 -0500 Subject: [PATCH 132/658] xprtrdma: Destroy reps from previous connection instance To safely get rid of all rpcrdma_reps from a particular connection instance, xprtrdma has to wait until each of those reps is finished being used. A rep may be backing the rq_rcv_buf of an RPC that has just completed, for example. Since it is safe to invoke rpcrdma_rep_destroy() only in the Receive completion handler, simply mark reps remaining in the rb_all_reps list after the transport is drained. These will then be deleted as rpcrdma_post_recvs pulls them off the rep free list. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 52481e70891a..ec557e434de0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1174,8 +1174,10 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - list_for_each_entry(rep, &buf->rb_all_reps, rr_all) + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); + rep->rr_temp = true; + } } static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) From e515dd9d76d22446b67f1568e3fc39ec84635360 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:57:09 -0500 Subject: [PATCH 133/658] xprtrdma: DMA map rr_rdma_buf as each rpcrdma_rep is created Clean up: This simplifies the logic in rpcrdma_post_recvs. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ec557e434de0..353f61ac8d51 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1110,8 +1110,9 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) /* No locking needed here. This function is called only by the * Receive completion handler. */ -static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, - bool temp) +static noinline +struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, + bool temp) { struct rpcrdma_rep *rep; @@ -1124,6 +1125,9 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, if (!rep->rr_rdmabuf) goto out_free; + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) + goto out_free_regbuf; + xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf)); rep->rr_cqe.done = rpcrdma_wc_receive; @@ -1136,6 +1140,8 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); return rep; +out_free_regbuf: + rpcrdma_regbuf_free(rep->rr_rdmabuf); out_free: kfree(rep); out: @@ -1537,7 +1543,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct ib_recv_wr *i, *wr, *bad_wr; + struct ib_recv_wr *wr, *bad_wr; struct rpcrdma_rep *rep; int needed, count, rc; @@ -1564,23 +1570,15 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (!rep) break; + trace_xprtrdma_post_recv(rep); rep->rr_recv_wr.next = wr; wr = &rep->rr_recv_wr; --needed; + ++count; } if (!wr) goto out; - for (i = wr; i; i = i->next) { - rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); - - if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) - goto release_wrs; - - trace_xprtrdma_post_recv(rep); - ++count; - } - rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); out: @@ -1597,11 +1595,4 @@ out: } ep->rep_receive_count += count; return; - -release_wrs: - for (i = wr; i;) { - rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); - i = i->next; - rpcrdma_recv_buffer_put(rep); - } } From 0df68ced55443243951d02cc497be31fadf28173 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:00 -0500 Subject: [PATCH 134/658] NFS: Revalidate the file size on a fatal write error If we suffer a fatal error upon writing a file, which causes us to need to revalidate the entire mapping, then we should also revalidate the file size. Fixes: d2ceb7e57086 ("NFS: Don't use page_file_mapping after removing the page") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 21787711e352..adedc16780c5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -243,7 +243,15 @@ out: /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct address_space *mapping) { + struct inode *inode = mapping->host; + nfs_zap_mapping(mapping->host, mapping); + /* Force file size revalidation */ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_SIZE; + spin_unlock(&inode->i_lock); } static void nfs_mapping_set_error(struct page *page, int error) From b8946d7bfb9417ec171693d4478a831420aead5f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:01 -0500 Subject: [PATCH 135/658] NFS: Revalidate the file mapping on all fatal writeback errors If a write or commit failed, and the mapping sees a fatal error, we need to revalidate the contents of that mapping. Fixes: 06c9fdf3b9f1 ("NFS: On fatal writeback errors, we need to call nfs_inode_remove_request()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index adedc16780c5..6dd8d6e6d847 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -256,8 +256,11 @@ static void nfs_set_pageerror(struct address_space *mapping) static void nfs_mapping_set_error(struct page *page, int error) { + struct address_space *mapping = page_file_mapping(page); + SetPageError(page); - mapping_set_error(page_file_mapping(page), error); + mapping_set_error(mapping, error); + nfs_set_pageerror(mapping); } /* @@ -600,7 +603,6 @@ release_request: static void nfs_write_error(struct nfs_page *req, int error) { - nfs_set_pageerror(page_file_mapping(req->wb_page)); trace_nfs_write_error(req, error); nfs_mapping_set_error(req->wb_page, error); nfs_inode_remove_request(req); @@ -1007,7 +1009,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - nfs_set_pageerror(page_file_mapping(req->wb_page)); trace_nfs_comp_error(req, hdr->error); nfs_mapping_set_error(req->wb_page, hdr->error); goto remove_req; From b32d285539e061dc3961e86f825d4ded5ba6de14 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:02 -0500 Subject: [PATCH 136/658] SUNRPC: Remove broken gss_mech_list_pseudoflavors() Remove gss_mech_list_pseudoflavors() and its callers. This is part of an unused API, and could leak an RCU reference if it were ever called. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 2 -- include/linux/sunrpc/gss_api.h | 3 -- net/sunrpc/auth.c | 49 --------------------------- net/sunrpc/auth_gss/auth_gss.c | 1 - net/sunrpc/auth_gss/gss_mech_switch.c | 29 ---------------- 5 files changed, 84 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index e9ec742796e7..4f6b28487f28 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -113,7 +113,6 @@ struct rpc_authops { int (*hash_cred)(struct auth_cred *, unsigned int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); - int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, struct rpcsec_gss_info *); @@ -158,7 +157,6 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, struct rpcsec_gss_info *); int rpcauth_get_gssinfo(rpc_authflavor_t, struct rpcsec_gss_info *); -int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 1cc6cefb1220..48c1b1674cbf 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -150,9 +150,6 @@ struct gss_api_mech *gss_mech_get_by_name(const char *); /* Similar, but get by pseudoflavor. */ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); -/* Fill in an array with a list of supported pseudoflavors */ -int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); - struct gss_api_mech * gss_mech_get(struct gss_api_mech *); /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index cdb05b48de44..5748ad0ba1bd 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -221,55 +221,6 @@ rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info) } EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo); -/** - * rpcauth_list_flavors - discover registered flavors and pseudoflavors - * @array: array to fill in - * @size: size of "array" - * - * Returns the number of array items filled in, or a negative errno. - * - * The returned array is not sorted by any policy. Callers should not - * rely on the order of the items in the returned array. - */ -int -rpcauth_list_flavors(rpc_authflavor_t *array, int size) -{ - const struct rpc_authops *ops; - rpc_authflavor_t flavor, pseudos[4]; - int i, len, result = 0; - - rcu_read_lock(); - for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) { - ops = rcu_dereference(auth_flavors[flavor]); - if (result >= size) { - result = -ENOMEM; - break; - } - - if (ops == NULL) - continue; - if (ops->list_pseudoflavors == NULL) { - array[result++] = ops->au_flavor; - continue; - } - len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos)); - if (len < 0) { - result = len; - break; - } - for (i = 0; i < len; i++) { - if (result >= size) { - result = -ENOMEM; - break; - } - array[result++] = pseudos[i]; - } - } - rcu_read_unlock(); - return result; -} -EXPORT_SYMBOL_GPL(rpcauth_list_flavors); - struct rpc_auth * rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d75fddca44c9..24ca861815b1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2118,7 +2118,6 @@ static const struct rpc_authops authgss_ops = { .hash_cred = gss_hash_cred, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, - .list_pseudoflavors = gss_mech_list_pseudoflavors, .info2flavor = gss_mech_info2flavor, .flavor2info = gss_mech_flavor2info, }; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index d3685d4ed9e0..db550bfc2642 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -219,35 +219,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) return gm; } -/** - * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors - * @array_ptr: array to fill in - * @size: size of "array" - * - * Returns the number of array items filled in, or a negative errno. - * - * The returned array is not sorted by any policy. Callers should not - * rely on the order of the items in the returned array. - */ -int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) -{ - struct gss_api_mech *pos = NULL; - int j, i = 0; - - rcu_read_lock(); - list_for_each_entry_rcu(pos, ®istered_mechs, gm_list) { - for (j = 0; j < pos->gm_pf_num; j++) { - if (i >= size) { - spin_unlock(®istered_mechs_lock); - return -ENOMEM; - } - array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; - } - } - rcu_read_unlock(); - return i; -} - /** * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor * @gm: GSS mechanism handle From 2197e9b06c228b65a2cef98ef34d6bf42fa1af3d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:03 -0500 Subject: [PATCH 137/658] NFS: Fix up fsync() when the server rebooted Don't clear the NFS_CONTEXT_RESEND_WRITES flag until after calling nfs_commit_inode(). Otherwise, if nfs_commit_inode() returns an error, we end up with dirty pages in the page cache, but no tag to tell us that those pages need resending. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ccd9bc098806..f96367a2463e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -204,44 +204,39 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); static int nfs_file_fsync_commit(struct file *file, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); - int do_resend, status; - int ret = 0; + int ret; dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - status = nfs_commit_inode(inode, FLUSH_SYNC); - if (status == 0) - status = file_check_and_advance_wb_err(file); - if (status < 0) { - ret = status; - goto out; - } - do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - if (do_resend) - ret = -EAGAIN; -out: - return ret; + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + return ret; + return file_check_and_advance_wb_err(file); } int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int ret; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + int ret; trace_nfs_fsync_enter(inode); - do { + for (;;) { ret = file_write_and_wait_range(file, start, end); if (ret != 0) break; ret = nfs_file_fsync_commit(file, datasync); - if (!ret) - ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + break; /* * If nfs_file_fsync_commit detected a server reboot, then * resend all dirty pages that might have been covered by @@ -249,7 +244,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ start = 0; end = LLONG_MAX; - } while (ret == -EAGAIN); + } trace_nfs_fsync_exit(inode, ret); return ret; From 221203ce6406273cf00e5c6397257d986c003ee6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:04 -0500 Subject: [PATCH 138/658] NFS/pnfs: Fix pnfs_generic_prepare_to_resend_writes() Instead of making assumptions about the commit verifier contents, change the commit code to ensure we always check that the verifier was set by the XDR code. Fixes: f54bcf2ecee9 ("pnfs: Prepare for flexfiles by pulling out common code") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 4 ++-- fs/nfs/nfs3xdr.c | 5 ++++- fs/nfs/nfs4xdr.c | 5 ++++- fs/nfs/pnfs_nfs.c | 7 +++---- fs/nfs/write.c | 4 +++- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 040a50fd9bf3..29f00da8a0b7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -245,10 +245,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index); /* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1; - return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); } /** diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 927eb680f161..69971f6c840d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2334,6 +2334,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error; @@ -2346,7 +2347,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8de4d250924b..47817ef0aadb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4313,11 +4313,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 82af4809b869..8b37e7f8e789 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -31,12 +31,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf; data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6dd8d6e6d847..c86fc9efd99b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1840,6 +1840,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1868,7 +1869,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req); From 25925b00a972e4ec12702483686b48b609e24703 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:05 -0500 Subject: [PATCH 139/658] NFSv4: Improve read/write/commit tracing Ensure we always return the number of bytes read/written. Also display the pnfs filehandle if it is in use. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4trace.h | 52 ++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index e3586c16ef59..497740f89f00 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1729,7 +1729,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(size_t, count) + __field(u32, arg_count) + __field(u32, res_count) __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) @@ -1737,13 +1738,18 @@ DECLARE_EVENT_CLASS(nfs4_read_event, TP_fast_assign( const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = hdr->args.offset; - __entry->count = hdr->args.count; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); @@ -1753,14 +1759,14 @@ DECLARE_EVENT_CLASS(nfs4_read_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count, + __entry->arg_count, __entry->res_count, __entry->stateid_seq, __entry->stateid_hash ) ); @@ -1789,7 +1795,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(size_t, count) + __field(u32, arg_count) + __field(u32, res_count) __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) @@ -1797,13 +1804,18 @@ DECLARE_EVENT_CLASS(nfs4_write_event, TP_fast_assign( const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = hdr->args.offset; - __entry->count = hdr->args.count; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); @@ -1813,14 +1825,14 @@ DECLARE_EVENT_CLASS(nfs4_write_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count, + __entry->arg_count, __entry->res_count, __entry->stateid_seq, __entry->stateid_hash ) ); @@ -1849,24 +1861,28 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(loff_t, offset) - __field(size_t, count) __field(unsigned long, error) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = data->args.offset; __entry->count = data->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu", + "offset=%lld count=%u", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), From 118b6292195cfb86a9f43cb65610fc6d980c65f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:06 -0500 Subject: [PATCH 140/658] NFS: Fix fix of show_nfs_errors Casting a negative value to an unsigned long is not the same as converting it to its absolute value. Fixes: 96650e2effa2 ("NFS: Fix show_nfs_errors macros again") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4trace.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 497740f89f00..99d3c705c00d 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -352,7 +352,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(dstaddr, clp->cl_hostname); ), @@ -432,7 +432,8 @@ TRACE_EVENT(nfs4_sequence_done, __entry->target_highest_slotid = res->sr_target_highest_slotid; __entry->status_flags = res->sr_status_flags; - __entry->error = res->sr_status; + __entry->error = res->sr_status < 0 ? + -res->sr_status : 0; ), TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " @@ -640,7 +641,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, ), TP_fast_assign( - __entry->error = status; + __entry->error = status < 0 ? -status : 0; __entry->state = clp->cl_state; __assign_str(hostname, clp->cl_hostname); __assign_str(section, section); @@ -659,7 +660,7 @@ TRACE_EVENT(nfs4_xdr_status, TP_PROTO( const struct xdr_stream *xdr, u32 op, - int error + u32 error ), TP_ARGS(xdr, op, error), @@ -884,7 +885,7 @@ TRACE_EVENT(nfs4_close, __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->fmode = (__force unsigned int)state->state; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid.seqid); __entry->stateid_hash = @@ -949,7 +950,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -1021,7 +1022,7 @@ TRACE_EVENT(nfs4_set_lock, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -1199,7 +1200,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, TP_fast_assign( __entry->dev = res->server->s_dev; __entry->fhandle = nfs_fhandle_hash(args->fhandle); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid->seqid); __entry->stateid_hash = @@ -1239,7 +1240,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); @@ -1341,7 +1342,7 @@ TRACE_EVENT(nfs4_lookupp, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = NFS_FILEID(inode); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1377,7 +1378,7 @@ TRACE_EVENT(nfs4_rename, __entry->dev = olddir->i_sb->s_dev; __entry->olddir = NFS_FILEID(olddir); __entry->newdir = NFS_FILEID(newdir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(oldname, oldname->name); __assign_str(newname, newname->name); ), @@ -1468,7 +1469,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = @@ -1524,7 +1525,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, __entry->valid = fattr->valid; __entry->fhandle = nfs_fhandle_hash(fhandle); __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1571,7 +1572,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1628,7 +1629,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1947,7 +1948,7 @@ TRACE_EVENT(nfs4_layoutget, __entry->iomode = args->iomode; __entry->offset = args->offset; __entry->count = args->length; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = From 0722dc9fea58e8199577963bb7601ff9f3e00b0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:07 -0500 Subject: [PATCH 141/658] pNFS/flexfiles: Record resend attempts on I/O failure If the attempt to do pNFS fails, then record what action we take to recover (resend, reset to pnfs or reset to mds). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 +++--- fs/nfs/nfs4trace.h | 8 +++++++- fs/nfs/pnfs.h | 8 ++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5657b7f2611f..3163b78b1d2c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1321,7 +1321,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, int new_idx = hdr->pgio_mirror_idx; int err; - trace_nfs4_pnfs_read(hdr, task->tk_status); if (task->tk_status < 0) ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, @@ -1331,6 +1330,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + trace_nfs4_pnfs_read(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { @@ -1494,7 +1494,6 @@ static int ff_layout_write_done_cb(struct rpc_task *task, loff_t end_offs = 0; int err; - trace_nfs4_pnfs_write(hdr, task->tk_status); if (task->tk_status < 0) ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, @@ -1504,6 +1503,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + trace_nfs4_pnfs_write(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { @@ -1537,7 +1537,6 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, { int err; - trace_nfs4_pnfs_commit_ds(data, task->tk_status); if (task->tk_status < 0) ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, data->args.offset, data->args.count, @@ -1546,6 +1545,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, err = ff_layout_async_handle_error(task, NULL, data->ds_clp, data->lseg, data->ds_commit_index); + trace_nfs4_pnfs_commit_ds(data, err); switch (err) { case -NFS4ERR_RESET_TO_PNFS: pnfs_generic_prepare_to_resend_writes(data); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 99d3c705c00d..c83a7db91e49 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -155,6 +155,9 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); TRACE_DEFINE_ENUM(NFS4ERR_XDEV); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); + #define show_nfsv4_errors(error) \ __print_symbolic(error, \ { NFS4_OK, "OK" }, \ @@ -305,7 +308,10 @@ TRACE_DEFINE_ENUM(NFS4ERR_XDEV); { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { NFS4ERR_XDEV, "XDEV" }) + { NFS4ERR_XDEV, "XDEV" }, \ + /* ***** Internal to Linux NFS client ***** */ \ + { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ + { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) #define show_open_flags(flags) \ __print_flags(flags, "|", \ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f8a38065c7e4..0fafdadc9c8d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -79,6 +79,10 @@ enum pnfs_try_status { PNFS_TRY_AGAIN = 2, }; +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" @@ -91,10 +95,6 @@ enum pnfs_try_status { #define NFS4_DEF_DS_RETRANS 5 #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 -#define NFS4ERR_RESET_TO_PNFS 12002 - enum { NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ From 2343172d34c6296f79b404a0eb291e15ab19e5ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:08 -0500 Subject: [PATCH 142/658] NFS: Clean up generic file read tracepoints Clean up the generic file read tracepoints so they do pass the full structures as arguments. Also ensure we report the number of bytes actually read. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 54 ++++++++++++++++++++++++++++------------------- fs/nfs/read.c | 5 ++--- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 4d6eb1703943..0710b91f82d3 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -820,75 +820,85 @@ TRACE_EVENT(nfs_sillyrename_unlink, TRACE_EVENT(nfs_initiate_read, TP_PROTO( - const struct inode *inode, - loff_t offset, unsigned long count + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, offset, count), + TP_ARGS(hdr), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->offset = offset; - __entry->count = count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu", + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count + (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_readpage_done, TP_PROTO( - const struct inode *inode, - int status, loff_t offset, bool eof + const struct rpc_task *task, + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, status, offset, eof), + TP_ARGS(task, hdr), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(bool, eof) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(bool, eof) + __field(int, status) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->status = status; - __entry->offset = offset; - __entry->eof = eof; + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d%s", + "offset=%lld count=%u res=%u status=%d%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, __entry->eof ? " eof" : "" ) ); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfe0b586eadd..12deb3bdb2a0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -214,7 +214,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, task_setup_data->flags |= swap_flags; rpc_ops->read_setup(hdr, msg); - trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes); + trace_nfs_initiate_read(hdr); } static void @@ -247,8 +247,7 @@ static int nfs_readpage_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); - trace_nfs_readpage_done(inode, task->tk_status, - hdr->args.offset, hdr->res.eof); + trace_nfs_readpage_done(task, hdr); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); From 5bb2a7cb9fe58d2b1efedd6058d442c7871c45ec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:09 -0500 Subject: [PATCH 143/658] NFS: Clean up generic writeback tracepoints Clean up the generic writeback tracepoints so they do pass the full structures as arguments. Also ensure we report the number of bytes actually written. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 73 ++++++++++++++++++++++++++--------------------- fs/nfs/write.c | 6 ++-- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 0710b91f82d3..7ed75b3b7aac 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -915,87 +915,96 @@ TRACE_DEFINE_ENUM(NFS_FILE_SYNC); TRACE_EVENT(nfs_initiate_write, TP_PROTO( - const struct inode *inode, - loff_t offset, unsigned long count, - enum nfs3_stable_how stable + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, offset, count, stable), + TP_ARGS(hdr), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) - __field(enum nfs3_stable_how, stable) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __field(enum nfs3_stable_how, stable) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->offset = offset; - __entry->count = count; - __entry->stable = stable; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; + __entry->stable = hdr->args.stable; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu stable=%s", + "offset=%lld count=%u stable=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count, + (long long)__entry->offset, __entry->count, nfs_show_stable(__entry->stable) ) ); TRACE_EVENT(nfs_writeback_done, TP_PROTO( - const struct inode *inode, - int status, - loff_t offset, - struct nfs_writeverf *writeverf + const struct rpc_task *task, + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, status, offset, writeverf), + TP_ARGS(task, hdr), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(enum nfs3_stable_how, stable) - __field(unsigned long long, verifier) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(int, status) + __field(enum nfs3_stable_how, stable) + __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + const struct nfs_writeverf *verf = hdr->res.verf; - __entry->status = status; - __entry->offset = offset; - __entry->stable = writeverf->committed; - memcpy(&__entry->verifier, &writeverf->verifier, - sizeof(__entry->verifier)); + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->stable = verf->committed; + memcpy(__entry->verifier, + &verf->verifier, + NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d stable=%s " - "verifier 0x%016llx", + "offset=%lld count=%u res=%u status=%d stable=%s " + "verifier=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, nfs_show_stable(__entry->stable), - __entry->verifier + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) ) ); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c86fc9efd99b..cd837744a732 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1414,8 +1414,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, task_setup_data->priority = priority; rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); - trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, - hdr->args.stable); + trace_nfs_initiate_write(hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1579,8 +1578,7 @@ static int nfs_writeback_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); - trace_nfs_writeback_done(inode, task->tk_status, - hdr->args.offset, hdr->res.verf); + trace_nfs_writeback_done(task, hdr); if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { From 7bdd297ea6e695f27be67cb99b1fea1ff83e38e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:10 -0500 Subject: [PATCH 144/658] NFS: Clean up generic file commit tracepoint Clean up the generic file commit tracepoints to use a 64-bit value for the verifier, and to display the pNFS filehandle, if it exists. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 42 ++++++++++++++++++++++++++---------------- fs/nfs/write.c | 2 +- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 7ed75b3b7aac..a543573e038f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1061,71 +1061,81 @@ TRACE_EVENT(nfs_initiate_commit, TP_ARGS(data), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; __entry->offset = data->args.offset; __entry->count = data->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu", + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count + (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_commit_done, TP_PROTO( + const struct rpc_task *task, const struct nfs_commit_data *data ), - TP_ARGS(data), + TP_ARGS(task, data), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(unsigned long long, verifier) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(int, status) + __field(enum nfs3_stable_how, stable) + __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; + const struct nfs_writeverf *verf = data->res.verf; - __entry->status = data->res.op_status; + __entry->status = task->tk_status; __entry->offset = data->args.offset; - memcpy(&__entry->verifier, &data->verf.verifier, - sizeof(__entry->verifier)); + __entry->stable = verf->committed; + memcpy(__entry->verifier, + &verf->verifier, + NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d verifier 0x%016llx", + "offset=%lld status=%d stable=%s verifier=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, - __entry->verifier + (long long)__entry->offset, __entry->status, + nfs_show_stable(__entry->stable), + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) ) ); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cd837744a732..83f92a4d65dc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1833,7 +1833,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); - trace_nfs_commit_done(data); + trace_nfs_commit_done(task, data); } static void nfs_commit_release_pages(struct nfs_commit_data *data) From 088f3e68d899eb0b1fc184b0a2f947a5c91031fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:11 -0500 Subject: [PATCH 145/658] pNFS/flexfiles: Add tracing for layout errors Trace layout errors for pNFS/flexfiles on read/write/commit operations. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 28 +++++-- fs/nfs/nfs4trace.c | 4 + fs/nfs/nfs4trace.h | 109 +++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 9 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3163b78b1d2c..bb9148b83166 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1266,9 +1266,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task, static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, int idx, u64 offset, u64 length, - u32 status, int opnum, int error) + u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; + u32 status = *op_status; int err; if (status == 0) { @@ -1286,10 +1287,10 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ENOBUFS: case -EPIPE: case -EPERM: - status = NFS4ERR_NXIO; + *op_status = status = NFS4ERR_NXIO; break; case -EACCES: - status = NFS4ERR_ACCESS; + *op_status = status = NFS4ERR_ACCESS; break; default: return; @@ -1321,11 +1322,14 @@ static int ff_layout_read_done_cb(struct rpc_task *task, int new_idx = hdr->pgio_mirror_idx; int err; - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, - hdr->res.op_status, OP_READ, + &hdr->res.op_status, OP_READ, task->tk_status); + trace_ff_layout_read_error(hdr); + } + err = ff_layout_async_handle_error(task, hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1494,11 +1498,14 @@ static int ff_layout_write_done_cb(struct rpc_task *task, loff_t end_offs = 0; int err; - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, - hdr->res.op_status, OP_WRITE, + &hdr->res.op_status, OP_WRITE, task->tk_status); + trace_ff_layout_write_error(hdr); + } + err = ff_layout_async_handle_error(task, hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1537,11 +1544,14 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, { int err; - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, data->args.offset, data->args.count, - data->res.op_status, OP_COMMIT, + &data->res.op_status, OP_COMMIT, task->tk_status); + trace_ff_layout_commit_error(data); + } + err = ff_layout_async_handle_error(task, NULL, data->ds_clp, data->lseg, data->ds_commit_index); diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 1a8f376b3f73..d9ac556bebcf 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -24,4 +24,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); + +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index c83a7db91e49..1e97e5e04cb4 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2152,6 +2152,115 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); +DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, + TP_PROTO( + const struct nfs_pgio_header *hdr + ), + + TP_ARGS(hdr), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __string(dstaddr, hdr->ds_clp ? + rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + + __entry->error = hdr->res.op_status; + __entry->fhandle = nfs_fhandle_hash(hdr->args.fh); + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; + __entry->stateid_seq = + be32_to_cpu(hdr->args.stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&hdr->args.stateid); + __assign_str(dstaddr, hdr->ds_clp ? + rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown"); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", + -__entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __entry->stateid_seq, __entry->stateid_hash, + __get_str(dstaddr) + ) +); + +#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \ + DEFINE_EVENT(nfs4_flexfiles_io_event, name, \ + TP_PROTO( \ + const struct nfs_pgio_header *hdr \ + ), \ + TP_ARGS(hdr)) +DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error); +DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error); + +TRACE_EVENT(ff_layout_commit_error, + TP_PROTO( + const struct nfs_commit_data *data + ), + + TP_ARGS(data), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __string(dstaddr, data->ds_clp ? + rpc_peeraddr2str(data->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + + __entry->error = data->res.op_status; + __entry->fhandle = nfs_fhandle_hash(data->args.fh); + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __assign_str(dstaddr, data->ds_clp ? + rpc_peeraddr2str(data->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown"); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%llu count=%u dstaddr=%s", + -__entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __get_str(dstaddr) + ) +); + + #endif /* CONFIG_NFS_V4_1 */ #endif /* _TRACE_NFS4_H */ From e8194b7dd39ec5423c32a43542f8348a9bd6956f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:12 -0500 Subject: [PATCH 146/658] NFS: Improve tracing of permission calls On exit from nfs_do_access(), record the mask representing the requested permissions, as well as the server-supplied set of access rights for this user. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 4 ++-- fs/nfs/nfstrace.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e180033e35cf..372c16b3042c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2476,7 +2476,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; - int cache_mask; + int cache_mask = -1; int status; trace_nfs_access_enter(inode); @@ -2515,7 +2515,7 @@ out_cached: if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: - trace_nfs_access_exit(inode, status); + trace_nfs_access_exit(inode, mask, cache_mask, status); return status; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a543573e038f..f8d677dd6705 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -198,7 +198,66 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); -DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); + +TRACE_EVENT(nfs_access_exit, + TP_PROTO( + const struct inode *inode, + unsigned int mask, + unsigned int permitted, + int error + ), + + TP_ARGS(inode, mask, permitted, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(unsigned char, type) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, size) + __field(unsigned long, nfsi_flags) + __field(unsigned long, cache_validity) + __field(unsigned int, mask) + __field(unsigned int, permitted) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->error = error < 0 ? -error : 0; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->type = nfs_umode_to_dtype(inode->i_mode); + __entry->version = inode_peek_iversion_raw(inode); + __entry->size = i_size_read(inode); + __entry->nfsi_flags = nfsi->flags; + __entry->cache_validity = nfsi->cache_validity; + __entry->mask = mask; + __entry->permitted = permitted; + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "type=%u (%s) version=%llu size=%lld " + "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " + "mask=0x%x permitted=0x%x", + -__entry->error, nfs_show_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->type, + nfs_show_file_type(__entry->type), + (unsigned long long)__entry->version, + (long long)__entry->size, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity), + __entry->nfsi_flags, + nfs_show_nfsi_flags(__entry->nfsi_flags), + __entry->mask, __entry->permitted + ) +); TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); From 8c9cb71491e7fd3ebee90d9799c9ca5b769bd0f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:13 -0500 Subject: [PATCH 147/658] NFS: When resending after a short write, reset the reply count to zero If we're resending a write due to a short read or write, ensure we reset the reply count to zero. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 2 ++ fs/nfs/write.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 12deb3bdb2a0..34bb9add2302 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -281,6 +281,8 @@ static void nfs_readpage_retry(struct rpc_task *task, argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; + resp->count = 0; + resp->eof = 0; rpc_restart_call_prepare(task); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 83f92a4d65dc..c478b772cc49 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1658,6 +1658,8 @@ static void nfs_writeback_result(struct rpc_task *task, */ argp->stable = NFS_FILE_SYNC; } + resp->count = 0; + resp->verf->committed = 0; rpc_restart_call_prepare(task); } } From 4daaeba938228f7c97e80817ccda1e091c7d3fb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:14 -0500 Subject: [PATCH 148/658] NFS: Fix nfs_direct_write_reschedule_io() The 'hdr->good_bytes' is defined as the number of bytes we expect to read or write starting at offset hdr->io_start. In the case of a partial read/write we may end up adjusting hdr->args.offset and hdr->args.count to skip I/O for data that was already read/written, and so we must ensure the calculation takes that into account. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 29f00da8a0b7..b768a0b42e82 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -824,7 +824,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.count; + hdr->good_bytes = hdr->args.offset + hdr->args.count - + hdr->io_start; } spin_unlock(&dreq->lock); } From 5c965db86e6b0e90a3112228820598e824920278 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:39:36 -0500 Subject: [PATCH 149/658] NFS: Trust cached access if we've already revalidated the inode once If we've already revalidated the inode once then don't distrust the access cache unless the NFS_INO_INVALID_ACCESS flag is actually set. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 372c16b3042c..9405eeadc3f3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2312,11 +2312,11 @@ static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, s /* Found an entry, is our attribute cache valid? */ if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; + if (!retry) + break; err = -ECHILD; if (!may_block) goto out; - if (!retry) - goto out_zap; spin_unlock(&inode->i_lock); err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (err) From c74dfe97c104bda5144bfa8193d8e5ea67d5da7f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:39:37 -0500 Subject: [PATCH 150/658] NFS: Add mount option 'softreval' Add a mount option 'softreval' that allows attribute revalidation 'getattr' calls to time out, and causes them to fall back to using the cached attributes. The use case for this option is for ensuring that we can still (slowly) traverse paths and use cached information even when the server is down. Once the server comes back up again, the getattr calls start succeeding, and the caches will revalidate as usual. The 'softreval' mount option is automatically enabled if you have specified 'softerr'. It can be turned off using the options 'nosoftreval', or 'hard'. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 14 ++++++++++++-- fs/nfs/inode.c | 8 +++++++- fs/nfs/nfs3proc.c | 7 ++++++- fs/nfs/nfs4proc.c | 33 ++++++++++++++++++++++++++------- fs/nfs/proc.c | 7 ++++++- fs/nfs/super.c | 1 + include/linux/nfs_fs_sb.h | 1 + 7 files changed, 59 insertions(+), 12 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 429315c011ae..0247dcb7b316 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -73,6 +73,7 @@ enum nfs_param { Opt_sloppy, Opt_soft, Opt_softerr, + Opt_softreval, Opt_source, Opt_tcp, Opt_timeo, @@ -128,6 +129,7 @@ static const struct fs_parameter_spec nfs_param_specs[] = { fsparam_flag ("sloppy", Opt_sloppy), fsparam_flag ("soft", Opt_soft), fsparam_flag ("softerr", Opt_softerr), + fsparam_flag ("softreval", Opt_softreval), fsparam_string("source", Opt_source), fsparam_flag ("tcp", Opt_tcp), fsparam_u32 ("timeo", Opt_timeo), @@ -460,11 +462,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->flags &= ~NFS_MOUNT_SOFTERR; break; case Opt_softerr: - ctx->flags |= NFS_MOUNT_SOFTERR; + ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL; ctx->flags &= ~NFS_MOUNT_SOFT; break; case Opt_hard: - ctx->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); + ctx->flags &= ~(NFS_MOUNT_SOFT | + NFS_MOUNT_SOFTERR | + NFS_MOUNT_SOFTREVAL); + break; + case Opt_softreval: + if (result.negated) + ctx->flags &= ~NFS_MOUNT_SOFTREVAL; + else + ctx->flags &= NFS_MOUNT_SOFTREVAL; break; case Opt_posix: if (result.negated) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b0b4b9f303fd..71dfc9d2fc3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1156,7 +1156,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - if (status == -ESTALE) { + switch (status) { + case -ETIMEDOUT: + /* A soft timeout occurred. Use cached information? */ + if (server->flags & NFS_MOUNT_SOFTREVAL) + status = 0; + break; + case -ESTALE: nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 67a05f35bb89..19f3d1b2807e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -110,10 +110,15 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fattr, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, task_flags); dprintk("NFS reply getattr: %d\n", status); return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 904335b91b6a..294d27be3868 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1097,11 +1097,12 @@ static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup) return ret; } -static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) +static int nfs4_do_call_sync(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + unsigned short task_flags) { struct nfs_client *clp = server->nfs_client; struct nfs4_call_sync_data data = { @@ -1113,12 +1114,23 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .rpc_client = clnt, .rpc_message = msg, .callback_ops = clp->cl_mvops->call_sync_ops, - .callback_data = &data + .callback_data = &data, + .flags = task_flags, }; return nfs4_call_sync_custom(&task_setup); } +static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res) +{ + return nfs4_do_call_sync(clnt, server, msg, args, res, 0); +} + + int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -4064,11 +4076,18 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); nfs_fattr_init(fattr); - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + return nfs4_do_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, task_flags); } int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 44a15523bf40..0451a094e89e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -108,10 +108,15 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fattr, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, task_flags); dprintk("NFS reply getattr: %d\n", status); return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 76e0198adcfa..dada09b391c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -375,6 +375,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", "" }, { NFS_MOUNT_SOFTERR, ",softerr", "" }, + { NFS_MOUNT_SOFTREVAL, ",softreval", "" }, { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c176f705bf98..465fa98258a3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -152,6 +152,7 @@ struct nfs_server { #define NFS_MOUNT_LOCAL_FLOCK 0x100000 #define NFS_MOUNT_LOCAL_FCNTL 0x200000 #define NFS_MOUNT_SOFTERR 0x400000 +#define NFS_MOUNT_SOFTREVAL 0x800000 unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ From 62a1573fcf844a559a79bec2eafc4309f2be5c5b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 2 Jan 2020 17:09:54 -0500 Subject: [PATCH 151/658] NFSv4 fix acl retrieval over krb5i/krb5p mounts For the krb5i and krb5p mount, it was problematic to truncate the received ACL to the provided buffer because an integrity check could not be preformed. Instead, provide enough pages to accommodate the largest buffer bounded by the largest RPC receive buffer size. Note: I don't think it's possible for the ACL to be truncated now. Thus NFS4_ACL_TRUNC flag and related code could be possibly removed but since I'm unsure, I'm leaving it. v2: needs +1 page. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 294d27be3868..22175b85b586 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5601,10 +5601,9 @@ out: */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, }; + struct page **pages; struct nfs_getaclargs args = { .fh = NFS_FH(inode), - .acl_pages = pages, .acl_len = buflen, }; struct nfs_getaclres res = { @@ -5615,11 +5614,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + unsigned int npages; int ret = -ENOMEM, i; + struct nfs_server *server = NFS_SERVER(inode); - if (npages > ARRAY_SIZE(pages)) - return -ERANGE; + if (buflen == 0) + buflen = server->rsize; + + npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS); + if (!pages) + return -ENOMEM; + + args.acl_pages = pages; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -5665,6 +5672,7 @@ out_free: __free_page(pages[i]); if (res.acl_scratch) __free_page(res.acl_scratch); + kfree(pages); return ret; } From d826e5b827641ae1bebb33d23a774f4e9bb8e94f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 18 Dec 2019 16:50:42 -0500 Subject: [PATCH 152/658] NFSv4.x recover from pre-mature loss of openstateid Ever since the commit 0e0cb35b417f, it's possible to lose an open stateid while retrying a CLOSE due to ERR_OLD_STATEID. Once that happens, operations that require openstateid fail with EAGAIN which is propagated to the application then tests like generic/446 and generic/168 fail with "Resource temporarily unavailable". Instead of returning this error, initiate state recovery when possible to recover the open stateid and then try calling nfs4_select_rw_stateid() again. Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 36 ++++++++++++++++++++++++++++-------- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/pnfs.c | 2 -- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1fe83e0f663e..9637aad36bdc 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context, lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.falloc_fattr = nfs_alloc_fattr(); if (!res.falloc_fattr) @@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, } else { status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } } status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, pos_src, pos_src + (loff_t)count - 1); @@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_sync_inode(dst_inode); if (status) @@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, FMODE_READ); nfs_put_lock_context(l_ctx); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs4_call_sync(src_server->client, src_server, &msg, &args->cna_seq_args, &res->cnr_seq_res, 0); @@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep, status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context, lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; - + } status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.dst_fattr = nfs_alloc_fattr(); if (!res.dst_fattr) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 22175b85b586..e178e2e7ad80 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3251,6 +3251,8 @@ static int _nfs4_do_setattr(struct inode *inode, nfs_put_lock_context(l_ctx); if (status == -EIO) return -EBADF; + else if (status == -EAGAIN) + goto zero_stateid; } else { zero_stateid: nfs4_stateid_copy(&arg->stateid, &zero_stateid); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cec3070ab577..3ac6b4dea72d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1998,8 +1998,6 @@ lookup_again: trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - if (status != -EAGAIN) - goto out_unlock; spin_unlock(&ino->i_lock); nfs4_schedule_stateid_recovery(server, ctx->state); pnfs_clear_first_layoutget(lo); From fe1e8dbec11fcad3ae7a34e95fe483d4a2b018fc Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Wed, 25 Dec 2019 11:37:57 +0800 Subject: [PATCH 153/658] NFSv3: FIx bug when using chacl and chmod to change acl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We find a bug when running test under nfsv3  as below. 1) chacl u::r--,g::rwx,o:rw- file1 2) chmod u+w file1 3) chacl -l file1 We expect u::rw-, but it shows u::r--, more likely it returns the cached acl in inode. We dig the code find that the code path is different. chacl->..->__nfs3_proc_setacls->nfs_zap_acl_cache Then nfs_zap_acl_cache clears the NFS_INO_INVALID_ACL in NFS_I(inode)->cache_validity. chmod->..->nfs3_proc_setattr Because NFS_INO_INVALID_ACL has been cleared by chacl path, nfs_zap_acl_cache wont be called. nfs_setattr_update_inode will set NFS_INO_INVALID_ACL so let it before nfs_zap_acl_cache call. Signed-off-by: Su Yanjun Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 19f3d1b2807e..4c93a8bca7dc 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -145,9 +145,9 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) { + nfs_setattr_update_inode(inode, sattr, fattr); if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setattr_update_inode(inode, sattr, fattr); } dprintk("NFS reply setattr: %d\n", status); return status; From 710b65335c19eea696741eb2d5e45a39aa23d0bb Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 13 Jan 2020 22:06:41 +0100 Subject: [PATCH 154/658] i2c: parport-light: remove driver The justification of a light version of the parport driver was less overhead for embedded systems. Well, today, even if an embedded system still has a parport, it surely can handle the fully-fledged parport driver. Remove it to reduce the maintenance burden. Signed-off-by: Wolfram Sang Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- .../i2c/busses/i2c-parport-light.rst | 24 -- Documentation/i2c/busses/index.rst | 1 - MAINTAINERS | 2 - drivers/i2c/busses/Kconfig | 28 -- drivers/i2c/busses/Makefile | 1 - drivers/i2c/busses/i2c-parport-light.c | 267 ------------------ 6 files changed, 323 deletions(-) delete mode 100644 Documentation/i2c/busses/i2c-parport-light.rst delete mode 100644 drivers/i2c/busses/i2c-parport-light.c diff --git a/Documentation/i2c/busses/i2c-parport-light.rst b/Documentation/i2c/busses/i2c-parport-light.rst deleted file mode 100644 index e73af975d2c8..000000000000 --- a/Documentation/i2c/busses/i2c-parport-light.rst +++ /dev/null @@ -1,24 +0,0 @@ -=============================== -Kernel driver i2c-parport-light -=============================== - -Author: Jean Delvare - -This driver is a light version of i2c-parport. It doesn't depend -on the parport driver, and uses direct I/O access instead. This might be -preferred on embedded systems where wasting memory for the clean but heavy -parport handling is not an option. The drawback is a reduced portability -and the impossibility to daisy-chain other parallel port devices. - -Please see i2c-parport for documentation. - -Module parameters: - -* type: type of adapter (see i2c-parport or modinfo) - -* base: base I/O address - Default is 0x378 which is fairly common for parallel ports, at least on PC. - -* irq: optional IRQ - This must be passed if you want SMBus alert support, assuming your adapter - actually supports this. diff --git a/Documentation/i2c/busses/index.rst b/Documentation/i2c/busses/index.rst index 2a26e251a335..5e4077b08d86 100644 --- a/Documentation/i2c/busses/index.rst +++ b/Documentation/i2c/busses/index.rst @@ -20,7 +20,6 @@ I2C Bus Drivers i2c-nforce2 i2c-nvidia-gpu i2c-ocores - i2c-parport-light i2c-parport i2c-pca-isa i2c-piix4 diff --git a/MAINTAINERS b/MAINTAINERS index cc0a4a8ae06a..0a84db6d9653 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7720,9 +7720,7 @@ M: Jean Delvare L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/i2c/busses/i2c-parport.rst -F: Documentation/i2c/busses/i2c-parport-light.rst F: drivers/i2c/busses/i2c-parport.c -F: drivers/i2c/busses/i2c-parport-light.c I2C SUBSYSTEM M: Wolfram Sang diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 6a0aa76859f3..933f15f0ad67 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1211,37 +1211,9 @@ config I2C_PARPORT An adapter type parameter is now mandatory. Please read the file Documentation/i2c/busses/i2c-parport.rst for details. - Another driver exists, named i2c-parport-light, which doesn't depend - on the parport driver. This is meant for embedded systems. Don't say - Y here if you intend to say Y or M there. - This support is also available as a module. If so, the module will be called i2c-parport. -config I2C_PARPORT_LIGHT - tristate "Parallel port adapter (light)" - select I2C_ALGOBIT - select I2C_SMBUS - help - This supports parallel port I2C adapters such as the ones made by - Philips or Velleman, Analog Devices evaluation boards, and more. - Basically any adapter using the parallel port as an I2C bus with - no extra chipset is supported by this driver, or could be. - - This driver is a light version of i2c-parport. It doesn't depend - on the parport driver, and uses direct I/O access instead. This - might be preferred on embedded systems where wasting memory for - the clean but heavy parport handling is not an option. The - drawback is a reduced portability and the impossibility to - daisy-chain other parallel port devices. - - Don't say Y here if you said Y or M to i2c-parport. Saying M to - both is possible but both modules should not be loaded at the same - time. - - This support is also available as a module. If so, the module - will be called i2c-parport-light. - config I2C_ROBOTFUZZ_OSIF tristate "RobotFuzz Open Source InterFace USB adapter" depends on USB diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 3ab8aebc39c9..25d60889713c 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -128,7 +128,6 @@ obj-$(CONFIG_I2C_ZX2967) += i2c-zx2967.o obj-$(CONFIG_I2C_DIOLAN_U2C) += i2c-diolan-u2c.o obj-$(CONFIG_I2C_DLN2) += i2c-dln2.o obj-$(CONFIG_I2C_PARPORT) += i2c-parport.o -obj-$(CONFIG_I2C_PARPORT_LIGHT) += i2c-parport-light.o obj-$(CONFIG_I2C_ROBOTFUZZ_OSIF) += i2c-robotfuzz-osif.o obj-$(CONFIG_I2C_TAOS_EVM) += i2c-taos-evm.o obj-$(CONFIG_I2C_TINY_USB) += i2c-tiny-usb.o diff --git a/drivers/i2c/busses/i2c-parport-light.c b/drivers/i2c/busses/i2c-parport-light.c deleted file mode 100644 index 00f6aaf22cfc..000000000000 --- a/drivers/i2c/busses/i2c-parport-light.c +++ /dev/null @@ -1,267 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* ------------------------------------------------------------------------ * - * i2c-parport-light.c I2C bus over parallel port * - * ------------------------------------------------------------------------ * - Copyright (C) 2003-2010 Jean Delvare - - Based on older i2c-velleman.c driver - Copyright (C) 1995-2000 Simon G. Vogl - With some changes from: - Frodo Looijaard - Kyösti Mälkki - - * ------------------------------------------------------------------------ */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "i2c-parport.h" - -#define DEFAULT_BASE 0x378 -#define DRVNAME "i2c-parport-light" - -static struct platform_device *pdev; - -static u16 base; -module_param_hw(base, ushort, ioport, 0); -MODULE_PARM_DESC(base, "Base I/O address"); - -static int irq; -module_param_hw(irq, int, irq, 0); -MODULE_PARM_DESC(irq, "IRQ (optional)"); - -/* ----- Low-level parallel port access ----------------------------------- */ - -static inline void port_write(unsigned char p, unsigned char d) -{ - outb(d, base+p); -} - -static inline unsigned char port_read(unsigned char p) -{ - return inb(base+p); -} - -/* ----- Unified line operation functions --------------------------------- */ - -static inline void line_set(int state, const struct lineop *op) -{ - u8 oldval = port_read(op->port); - - /* Touch only the bit(s) needed */ - if ((op->inverted && !state) || (!op->inverted && state)) - port_write(op->port, oldval | op->val); - else - port_write(op->port, oldval & ~op->val); -} - -static inline int line_get(const struct lineop *op) -{ - u8 oldval = port_read(op->port); - - return ((op->inverted && (oldval & op->val) != op->val) - || (!op->inverted && (oldval & op->val) == op->val)); -} - -/* ----- I2C algorithm call-back functions and structures ----------------- */ - -static void parport_setscl(void *data, int state) -{ - line_set(state, &adapter_parm[type].setscl); -} - -static void parport_setsda(void *data, int state) -{ - line_set(state, &adapter_parm[type].setsda); -} - -static int parport_getscl(void *data) -{ - return line_get(&adapter_parm[type].getscl); -} - -static int parport_getsda(void *data) -{ - return line_get(&adapter_parm[type].getsda); -} - -/* Encapsulate the functions above in the correct structure - Note that getscl will be set to NULL by the attaching code for adapters - that cannot read SCL back */ -static struct i2c_algo_bit_data parport_algo_data = { - .setsda = parport_setsda, - .setscl = parport_setscl, - .getsda = parport_getsda, - .getscl = parport_getscl, - .udelay = 50, - .timeout = HZ, -}; - -/* ----- Driver registration ---------------------------------------------- */ - -static struct i2c_adapter parport_adapter = { - .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON, - .algo_data = &parport_algo_data, - .name = "Parallel port adapter (light)", -}; - -/* SMBus alert support */ -static struct i2c_smbus_alert_setup alert_data = { -}; -static struct i2c_client *ara; -static struct lineop parport_ctrl_irq = { - .val = (1 << 4), - .port = PORT_CTRL, -}; - -static int i2c_parport_probe(struct platform_device *pdev) -{ - int err; - - /* Reset hardware to a sane state (SCL and SDA high) */ - parport_setsda(NULL, 1); - parport_setscl(NULL, 1); - /* Other init if needed (power on...) */ - if (adapter_parm[type].init.val) { - line_set(1, &adapter_parm[type].init); - /* Give powered devices some time to settle */ - msleep(100); - } - - parport_adapter.dev.parent = &pdev->dev; - err = i2c_bit_add_bus(&parport_adapter); - if (err) { - dev_err(&pdev->dev, "Unable to register with I2C\n"); - return err; - } - - /* Setup SMBus alert if supported */ - if (adapter_parm[type].smbus_alert && irq) { - alert_data.irq = irq; - ara = i2c_setup_smbus_alert(&parport_adapter, &alert_data); - if (ara) - line_set(1, &parport_ctrl_irq); - else - dev_warn(&pdev->dev, "Failed to register ARA client\n"); - } - - return 0; -} - -static int i2c_parport_remove(struct platform_device *pdev) -{ - if (ara) { - line_set(0, &parport_ctrl_irq); - i2c_unregister_device(ara); - ara = NULL; - } - i2c_del_adapter(&parport_adapter); - - /* Un-init if needed (power off...) */ - if (adapter_parm[type].init.val) - line_set(0, &adapter_parm[type].init); - - return 0; -} - -static struct platform_driver i2c_parport_driver = { - .driver = { - .name = DRVNAME, - }, - .probe = i2c_parport_probe, - .remove = i2c_parport_remove, -}; - -static int __init i2c_parport_device_add(u16 address) -{ - int err; - - pdev = platform_device_alloc(DRVNAME, -1); - if (!pdev) { - err = -ENOMEM; - printk(KERN_ERR DRVNAME ": Device allocation failed\n"); - goto exit; - } - - err = platform_device_add(pdev); - if (err) { - printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n", - err); - goto exit_device_put; - } - - return 0; - -exit_device_put: - platform_device_put(pdev); -exit: - return err; -} - -static int __init i2c_parport_init(void) -{ - int err; - - if (type < 0) { - printk(KERN_ERR DRVNAME ": adapter type unspecified\n"); - return -ENODEV; - } - - if (type >= ARRAY_SIZE(adapter_parm)) { - printk(KERN_ERR DRVNAME ": invalid type (%d)\n", type); - return -ENODEV; - } - - if (base == 0) { - pr_info(DRVNAME ": using default base 0x%x\n", DEFAULT_BASE); - base = DEFAULT_BASE; - } - - if (!request_region(base, 3, DRVNAME)) - return -EBUSY; - - if (irq != 0) - pr_info(DRVNAME ": using irq %d\n", irq); - - if (!adapter_parm[type].getscl.val) - parport_algo_data.getscl = NULL; - - /* Sets global pdev as a side effect */ - err = i2c_parport_device_add(base); - if (err) - goto exit_release; - - err = platform_driver_register(&i2c_parport_driver); - if (err) - goto exit_device; - - return 0; - -exit_device: - platform_device_unregister(pdev); -exit_release: - release_region(base, 3); - return err; -} - -static void __exit i2c_parport_exit(void) -{ - platform_driver_unregister(&i2c_parport_driver); - platform_device_unregister(pdev); - release_region(base, 3); -} - -MODULE_AUTHOR("Jean Delvare "); -MODULE_DESCRIPTION("I2C bus over parallel port (light)"); -MODULE_LICENSE("GPL"); - -module_init(i2c_parport_init); -module_exit(i2c_parport_exit); From 511f7d54842fd12eff5687f51c0ee4f812066399 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 13 Jan 2020 22:06:42 +0100 Subject: [PATCH 155/658] i2c: parport: simplify Kconfig description The driver is not 'new' anymore, so remove details from the driver it surpassed. Signed-off-by: Wolfram Sang Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 933f15f0ad67..b2840c5ebf24 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1202,14 +1202,8 @@ config I2C_PARPORT This supports parallel port I2C adapters such as the ones made by Philips or Velleman, Analog Devices evaluation boards, and more. Basically any adapter using the parallel port as an I2C bus with - no extra chipset is supported by this driver, or could be. - - This driver is a replacement for (and was inspired by) an older - driver named i2c-philips-par. The new driver supports more devices, - and makes it easier to add support for new devices. - - An adapter type parameter is now mandatory. Please read the file - Documentation/i2c/busses/i2c-parport.rst for details. + no extra chipset is supported by this driver, or could be. Please + read the file Documentation/i2c/busses/i2c-parport.rst for details. This support is also available as a module. If so, the module will be called i2c-parport. From 9f7a03642e0e25dc0561be124bddae2b8ed787ab Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 13 Jan 2020 22:06:43 +0100 Subject: [PATCH 156/658] i2c: parport: move include file into main source After removal of the parport-light driver, this include is used by the parport driver exclusively and can be included in the main source. Move module parameter declarations to its variable declaration while here. Signed-off-by: Wolfram Sang Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-parport.c | 113 ++++++++++++++++++++++++++++--- drivers/i2c/busses/i2c-parport.h | 106 ----------------------------- 2 files changed, 105 insertions(+), 114 deletions(-) delete mode 100644 drivers/i2c/busses/i2c-parport.h diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c index e8ed882de402..f8f94a25af26 100644 --- a/drivers/i2c/busses/i2c-parport.c +++ b/drivers/i2c/busses/i2c-parport.c @@ -25,7 +25,90 @@ #include #include #include -#include "i2c-parport.h" + +#define PORT_DATA 0 +#define PORT_STAT 1 +#define PORT_CTRL 2 + +struct lineop { + u8 val; + u8 port; + u8 inverted; +}; + +struct adapter_parm { + struct lineop setsda; + struct lineop setscl; + struct lineop getsda; + struct lineop getscl; + struct lineop init; + unsigned int smbus_alert:1; +}; + +static const struct adapter_parm adapter_parm[] = { + /* type 0: Philips adapter */ + { + .setsda = { 0x80, PORT_DATA, 1 }, + .setscl = { 0x08, PORT_CTRL, 0 }, + .getsda = { 0x80, PORT_STAT, 0 }, + .getscl = { 0x08, PORT_STAT, 0 }, + }, + /* type 1: home brew teletext adapter */ + { + .setsda = { 0x02, PORT_DATA, 0 }, + .setscl = { 0x01, PORT_DATA, 0 }, + .getsda = { 0x80, PORT_STAT, 1 }, + }, + /* type 2: Velleman K8000 adapter */ + { + .setsda = { 0x02, PORT_CTRL, 1 }, + .setscl = { 0x08, PORT_CTRL, 1 }, + .getsda = { 0x10, PORT_STAT, 0 }, + }, + /* type 3: ELV adapter */ + { + .setsda = { 0x02, PORT_DATA, 1 }, + .setscl = { 0x01, PORT_DATA, 1 }, + .getsda = { 0x40, PORT_STAT, 1 }, + .getscl = { 0x08, PORT_STAT, 1 }, + }, + /* type 4: ADM1032 evaluation board */ + { + .setsda = { 0x02, PORT_DATA, 1 }, + .setscl = { 0x01, PORT_DATA, 1 }, + .getsda = { 0x10, PORT_STAT, 1 }, + .init = { 0xf0, PORT_DATA, 0 }, + .smbus_alert = 1, + }, + /* type 5: ADM1025, ADM1030 and ADM1031 evaluation boards */ + { + .setsda = { 0x02, PORT_DATA, 1 }, + .setscl = { 0x01, PORT_DATA, 1 }, + .getsda = { 0x10, PORT_STAT, 1 }, + }, + /* type 6: Barco LPT->DVI (K5800236) adapter */ + { + .setsda = { 0x02, PORT_DATA, 1 }, + .setscl = { 0x01, PORT_DATA, 1 }, + .getsda = { 0x20, PORT_STAT, 0 }, + .getscl = { 0x40, PORT_STAT, 0 }, + .init = { 0xfc, PORT_DATA, 0 }, + }, + /* type 7: One For All JP1 parallel port adapter */ + { + .setsda = { 0x01, PORT_DATA, 0 }, + .setscl = { 0x02, PORT_DATA, 0 }, + .getsda = { 0x80, PORT_STAT, 1 }, + .init = { 0x04, PORT_DATA, 1 }, + }, + /* type 8: VCT-jig */ + { + .setsda = { 0x04, PORT_DATA, 1 }, + .setscl = { 0x01, PORT_DATA, 1 }, + .getsda = { 0x40, PORT_STAT, 0 }, + .getscl = { 0x80, PORT_STAT, 1 }, + }, +}; /* ----- Device list ------------------------------------------------------ */ @@ -40,9 +123,30 @@ struct i2c_par { static LIST_HEAD(adapter_list); static DEFINE_MUTEX(adapter_list_lock); + #define MAX_DEVICE 4 static int parport[MAX_DEVICE] = {0, -1, -1, -1}; +module_param_array(parport, int, NULL, 0); +MODULE_PARM_DESC(parport, + "List of parallel ports to bind to, by index.\n" + " Atmost " __stringify(MAX_DEVICE) " devices are supported.\n" + " Default is one device connected to parport0.\n" +); +static int type = -1; +module_param(type, int, 0); +MODULE_PARM_DESC(type, + "Type of adapter:\n" + " 0 = Philips adapter\n" + " 1 = home brew teletext adapter\n" + " 2 = Velleman K8000 adapter\n" + " 3 = ELV adapter\n" + " 4 = ADM1032 evaluation board\n" + " 5 = ADM1025, ADM1030 and ADM1031 evaluation boards\n" + " 6 = Barco LPT->DVI (K5800236) adapter\n" + " 7 = One For All JP1 parallel port adapter\n" + " 8 = VCT-jig\n" +); /* ----- Low-level parallel port access ----------------------------------- */ @@ -311,12 +415,5 @@ MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("I2C bus over parallel port"); MODULE_LICENSE("GPL"); -module_param_array(parport, int, NULL, 0); -MODULE_PARM_DESC(parport, - "List of parallel ports to bind to, by index.\n" - " Atmost " __stringify(MAX_DEVICE) " devices are supported.\n" - " Default is one device connected to parport0.\n" -); - module_init(i2c_parport_init); module_exit(i2c_parport_exit); diff --git a/drivers/i2c/busses/i2c-parport.h b/drivers/i2c/busses/i2c-parport.h deleted file mode 100644 index 3b32d92b1264..000000000000 --- a/drivers/i2c/busses/i2c-parport.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* ------------------------------------------------------------------------ * - * i2c-parport.h I2C bus over parallel port * - * ------------------------------------------------------------------------ * - Copyright (C) 2003-2010 Jean Delvare - - * ------------------------------------------------------------------------ */ - -#define PORT_DATA 0 -#define PORT_STAT 1 -#define PORT_CTRL 2 - -struct lineop { - u8 val; - u8 port; - u8 inverted; -}; - -struct adapter_parm { - struct lineop setsda; - struct lineop setscl; - struct lineop getsda; - struct lineop getscl; - struct lineop init; - unsigned int smbus_alert:1; -}; - -static const struct adapter_parm adapter_parm[] = { - /* type 0: Philips adapter */ - { - .setsda = { 0x80, PORT_DATA, 1 }, - .setscl = { 0x08, PORT_CTRL, 0 }, - .getsda = { 0x80, PORT_STAT, 0 }, - .getscl = { 0x08, PORT_STAT, 0 }, - }, - /* type 1: home brew teletext adapter */ - { - .setsda = { 0x02, PORT_DATA, 0 }, - .setscl = { 0x01, PORT_DATA, 0 }, - .getsda = { 0x80, PORT_STAT, 1 }, - }, - /* type 2: Velleman K8000 adapter */ - { - .setsda = { 0x02, PORT_CTRL, 1 }, - .setscl = { 0x08, PORT_CTRL, 1 }, - .getsda = { 0x10, PORT_STAT, 0 }, - }, - /* type 3: ELV adapter */ - { - .setsda = { 0x02, PORT_DATA, 1 }, - .setscl = { 0x01, PORT_DATA, 1 }, - .getsda = { 0x40, PORT_STAT, 1 }, - .getscl = { 0x08, PORT_STAT, 1 }, - }, - /* type 4: ADM1032 evaluation board */ - { - .setsda = { 0x02, PORT_DATA, 1 }, - .setscl = { 0x01, PORT_DATA, 1 }, - .getsda = { 0x10, PORT_STAT, 1 }, - .init = { 0xf0, PORT_DATA, 0 }, - .smbus_alert = 1, - }, - /* type 5: ADM1025, ADM1030 and ADM1031 evaluation boards */ - { - .setsda = { 0x02, PORT_DATA, 1 }, - .setscl = { 0x01, PORT_DATA, 1 }, - .getsda = { 0x10, PORT_STAT, 1 }, - }, - /* type 6: Barco LPT->DVI (K5800236) adapter */ - { - .setsda = { 0x02, PORT_DATA, 1 }, - .setscl = { 0x01, PORT_DATA, 1 }, - .getsda = { 0x20, PORT_STAT, 0 }, - .getscl = { 0x40, PORT_STAT, 0 }, - .init = { 0xfc, PORT_DATA, 0 }, - }, - /* type 7: One For All JP1 parallel port adapter */ - { - .setsda = { 0x01, PORT_DATA, 0 }, - .setscl = { 0x02, PORT_DATA, 0 }, - .getsda = { 0x80, PORT_STAT, 1 }, - .init = { 0x04, PORT_DATA, 1 }, - }, - /* type 8: VCT-jig */ - { - .setsda = { 0x04, PORT_DATA, 1 }, - .setscl = { 0x01, PORT_DATA, 1 }, - .getsda = { 0x40, PORT_STAT, 0 }, - .getscl = { 0x80, PORT_STAT, 1 }, - }, -}; - -static int type = -1; -module_param(type, int, 0); -MODULE_PARM_DESC(type, - "Type of adapter:\n" - " 0 = Philips adapter\n" - " 1 = home brew teletext adapter\n" - " 2 = Velleman K8000 adapter\n" - " 3 = ELV adapter\n" - " 4 = ADM1032 evaluation board\n" - " 5 = ADM1025, ADM1030 and ADM1031 evaluation boards\n" - " 6 = Barco LPT->DVI (K5800236) adapter\n" - " 7 = One For All JP1 parallel port adapter\n" - " 8 = VCT-jig\n" -); From b5d5605ca3cebb9b16c4f251635ef171ad18b80d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 14 Jan 2020 04:34:37 +0300 Subject: [PATCH 157/658] i2c: tegra: Prevent interrupt triggering after transfer timeout Potentially it is possible that interrupt may fire after transfer timeout. That may not end up well for the next transfer because interrupt handling may race with hardware resetting. This is very unlikely to happen in practice, but anyway let's prevent the potential problem by enabling interrupt only at the moments when it is actually necessary to get some interrupt event. Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 70 +++++++++++++++++----------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 61339c665ebd..882b283e0ed7 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,6 @@ struct tegra_i2c_hw_feature { * @base_phys: physical base address of the I2C controller * @cont_id: I2C controller ID, used for packet header * @irq: IRQ number of transfer complete interrupt - * @irq_disabled: used to track whether or not the interrupt is enabled * @is_dvc: identifies the DVC I2C controller, has a different register layout * @msg_complete: transfer completion notifier * @msg_err: error code for completed message @@ -240,7 +240,6 @@ struct tegra_i2c_hw_feature { * @bus_clk_rate: current I2C bus clock rate * @clk_divisor_non_hs_mode: clock divider for non-high-speed modes * @is_multimaster_mode: track if I2C controller is in multi-master mode - * @xfer_lock: lock to serialize transfer submission and processing * @tx_dma_chan: DMA transmit channel * @rx_dma_chan: DMA receive channel * @dma_phys: handle to DMA resources @@ -260,7 +259,6 @@ struct tegra_i2c_dev { phys_addr_t base_phys; int cont_id; int irq; - bool irq_disabled; int is_dvc; struct completion msg_complete; int msg_err; @@ -270,8 +268,6 @@ struct tegra_i2c_dev { u32 bus_clk_rate; u16 clk_divisor_non_hs_mode; bool is_multimaster_mode; - /* xfer_lock: lock to serialize transfer submission and processing */ - spinlock_t xfer_lock; struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; dma_addr_t dma_phys; @@ -790,11 +786,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) if (err) return err; - if (i2c_dev->irq_disabled) { - i2c_dev->irq_disabled = false; - enable_irq(i2c_dev->irq); - } - return 0; } @@ -825,18 +816,12 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) status = i2c_readl(i2c_dev, I2C_INT_STATUS); - spin_lock(&i2c_dev->xfer_lock); if (status == 0) { dev_warn(i2c_dev->dev, "irq status 0 %08x %08x %08x\n", i2c_readl(i2c_dev, I2C_PACKET_TRANSFER_STATUS), i2c_readl(i2c_dev, I2C_STATUS), i2c_readl(i2c_dev, I2C_CNFG)); i2c_dev->msg_err |= I2C_ERR_UNKNOWN_INTERRUPT; - - if (!i2c_dev->irq_disabled) { - disable_irq_nosync(i2c_dev->irq); - i2c_dev->irq_disabled = true; - } goto err; } @@ -925,7 +910,6 @@ err: complete(&i2c_dev->msg_complete); done: - spin_unlock(&i2c_dev->xfer_lock); return IRQ_HANDLED; } @@ -999,6 +983,30 @@ out: i2c_writel(i2c_dev, val, reg); } +static unsigned long +tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, + struct completion *complete, + unsigned int timeout_ms) +{ + unsigned long ret; + + enable_irq(i2c_dev->irq); + ret = wait_for_completion_timeout(complete, + msecs_to_jiffies(timeout_ms)); + disable_irq(i2c_dev->irq); + + /* + * There is a chance that completion may happen after IRQ + * synchronization, which is done by disable_irq(). + */ + if (ret == 0 && completion_done(complete)) { + dev_warn(i2c_dev->dev, "completion done after timeout\n"); + ret = 1; + } + + return ret; +} + static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); @@ -1020,8 +1028,8 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); tegra_i2c_unmask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); - time_left = wait_for_completion_timeout(&i2c_dev->msg_complete, - msecs_to_jiffies(50)); + time_left = tegra_i2c_wait_completion_timeout( + i2c_dev, &i2c_dev->msg_complete, 50); if (time_left == 0) { dev_err(i2c_dev->dev, "timed out for bus clear\n"); return -ETIMEDOUT; @@ -1044,7 +1052,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, u32 packet_header; u32 int_mask; unsigned long time_left; - unsigned long flags; size_t xfer_size; u32 *buffer = NULL; int err = 0; @@ -1075,7 +1082,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, */ xfer_time += DIV_ROUND_CLOSEST(((xfer_size * 9) + 2) * MSEC_PER_SEC, i2c_dev->bus_clk_rate); - spin_lock_irqsave(&i2c_dev->xfer_lock, flags); int_mask = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; tegra_i2c_unmask_irq(i2c_dev, int_mask); @@ -1090,7 +1096,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, dev_err(i2c_dev->dev, "starting RX DMA failed, err %d\n", err); - goto unlock; + return err; } } else { @@ -1149,7 +1155,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, dev_err(i2c_dev->dev, "starting TX DMA failed, err %d\n", err); - goto unlock; + return err; } } else { tegra_i2c_fill_tx_fifo(i2c_dev); @@ -1169,15 +1175,10 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, dev_dbg(i2c_dev->dev, "unmasked irq: %02x\n", i2c_readl(i2c_dev, I2C_INT_MASK)); -unlock: - spin_unlock_irqrestore(&i2c_dev->xfer_lock, flags); - if (dma) { - if (err) - return err; + time_left = tegra_i2c_wait_completion_timeout( + i2c_dev, &i2c_dev->dma_complete, xfer_time); - time_left = wait_for_completion_timeout(&i2c_dev->dma_complete, - msecs_to_jiffies(xfer_time)); if (time_left == 0) { dev_err(i2c_dev->dev, "DMA transfer timeout\n"); dmaengine_terminate_sync(i2c_dev->msg_read ? @@ -1202,13 +1203,13 @@ unlock: i2c_dev->tx_dma_chan); } - time_left = wait_for_completion_timeout(&i2c_dev->msg_complete, - msecs_to_jiffies(xfer_time)); + time_left = tegra_i2c_wait_completion_timeout( + i2c_dev, &i2c_dev->msg_complete, xfer_time); + tegra_i2c_mask_irq(i2c_dev, int_mask); if (time_left == 0) { dev_err(i2c_dev->dev, "i2c transfer timed out\n"); - tegra_i2c_init(i2c_dev, true); return -ETIMEDOUT; } @@ -1568,7 +1569,6 @@ static int tegra_i2c_probe(struct platform_device *pdev) I2C_PACKET_HEADER_SIZE; init_completion(&i2c_dev->msg_complete); init_completion(&i2c_dev->dma_complete); - spin_lock_init(&i2c_dev->xfer_lock); if (!i2c_dev->hw->has_single_clk_source) { fast_clk = devm_clk_get(&pdev->dev, "fast-clk"); @@ -1644,6 +1644,8 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto release_dma; } + irq_set_status_flags(i2c_dev->irq, IRQ_NOAUTOEN); + ret = devm_request_irq(&pdev->dev, i2c_dev->irq, tegra_i2c_isr, 0, dev_name(&pdev->dev), i2c_dev); if (ret) { From ede2299f7101a79fe8610ca0000734c9887ad4b2 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 14 Jan 2020 04:34:38 +0300 Subject: [PATCH 158/658] i2c: tegra: Support atomic transfers System shutdown may happen with interrupts being disabled and in this case kernel may hang if atomic transfer isn't supported by driver. There were several occurrences where I found my Nexus 7 completely discharged despite of being turned off and then one day I spotted this in the log: reboot: Power down ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/i2c/i2c-core.h:40 i2c_transfer+0x95/0x9c No atomic I2C transfer handler for 'i2c-1' Modules linked in: tegra30_devfreq CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 5.4.0-next-20191202-00120-gf7ecd80fb803-dirty #3195 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x85/0x94) [] (dump_stack) from [] (__warn+0xc1/0xc4) [] (__warn) from [] (warn_slowpath_fmt+0x61/0x78) [] (warn_slowpath_fmt) from [] (i2c_transfer+0x95/0x9c) [] (i2c_transfer) from [] (regmap_i2c_read+0x4d/0x6c) [] (regmap_i2c_read) from [] (_regmap_raw_read+0x99/0x1cc) [] (_regmap_raw_read) from [] (_regmap_bus_read+0x23/0x38) [] (_regmap_bus_read) from [] (_regmap_read+0x3d/0xfc) [] (_regmap_read) from [] (_regmap_update_bits+0x87/0xc4) [] (_regmap_update_bits) from [] (regmap_update_bits_base+0x39/0x50) [] (regmap_update_bits_base) from [] (max77620_pm_power_off+0x29/0x2c) [] (max77620_pm_power_off) from [] (__do_sys_reboot+0xe9/0x170) [] (__do_sys_reboot) from [] (ret_fast_syscall+0x1/0x28) Exception stack(0xde907fa8 to 0xde907ff0) 7fa0: 00000000 00000000 fee1dead 28121969 4321fedc 00000000 7fc0: 00000000 00000000 00000000 00000058 00000000 00000000 00000000 00000000 7fe0: 0045adf0 bed9abb8 004444a0 b6c666d0 ---[ end trace bdd18f87595b1a5e ]--- The atomic transferring is implemented by enforcing PIO mode for the transfer and by polling interrupt status until transfer is completed or failed. Now system shuts down properly every time. Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 84 ++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 882b283e0ed7..0245fc2b5684 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -247,6 +248,7 @@ struct tegra_i2c_hw_feature { * @dma_buf_size: DMA buffer size * @is_curr_dma_xfer: indicates active DMA transfer * @dma_complete: DMA completion notifier + * @is_curr_atomic_xfer: indicates active atomic transfer */ struct tegra_i2c_dev { struct device *dev; @@ -275,6 +277,7 @@ struct tegra_i2c_dev { unsigned int dma_buf_size; bool is_curr_dma_xfer; struct completion dma_complete; + bool is_curr_atomic_xfer; }; static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, @@ -683,7 +686,8 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_CONFIG_LOAD); addr = i2c_dev->base + reg_offset; i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); - if (in_interrupt()) + + if (i2c_dev->is_curr_atomic_xfer) err = readl_poll_timeout_atomic(addr, val, val == 0, 1000, I2C_CONFIG_LOAD_TIMEOUT); @@ -983,6 +987,34 @@ out: i2c_writel(i2c_dev, val, reg); } +static unsigned long +tegra_i2c_poll_completion_timeout(struct tegra_i2c_dev *i2c_dev, + struct completion *complete, + unsigned int timeout_ms) +{ + ktime_t ktime = ktime_get(); + ktime_t ktimeout = ktime_add_ms(ktime, timeout_ms); + + do { + u32 status = i2c_readl(i2c_dev, I2C_INT_STATUS); + + if (status) { + tegra_i2c_isr(i2c_dev->irq, i2c_dev); + + if (completion_done(complete)) { + s64 delta = ktime_ms_delta(ktimeout, ktime); + + return msecs_to_jiffies(delta) ?: 1; + } + } + + ktime = ktime_get(); + + } while (ktime_before(ktime, ktimeout)); + + return 0; +} + static unsigned long tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, struct completion *complete, @@ -990,18 +1022,24 @@ tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, { unsigned long ret; - enable_irq(i2c_dev->irq); - ret = wait_for_completion_timeout(complete, - msecs_to_jiffies(timeout_ms)); - disable_irq(i2c_dev->irq); + if (i2c_dev->is_curr_atomic_xfer) { + ret = tegra_i2c_poll_completion_timeout(i2c_dev, complete, + timeout_ms); + } else { + enable_irq(i2c_dev->irq); + ret = wait_for_completion_timeout(complete, + msecs_to_jiffies(timeout_ms)); + disable_irq(i2c_dev->irq); - /* - * There is a chance that completion may happen after IRQ - * synchronization, which is done by disable_irq(). - */ - if (ret == 0 && completion_done(complete)) { - dev_warn(i2c_dev->dev, "completion done after timeout\n"); - ret = 1; + /* + * There is a chance that completion may happen after IRQ + * synchronization, which is done by disable_irq(). + */ + if (ret == 0 && completion_done(complete)) { + dev_warn(i2c_dev->dev, + "completion done after timeout\n"); + ret = 1; + } } return ret; @@ -1073,7 +1111,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size = ALIGN(xfer_size, BYTES_PER_FIFO_WORD); i2c_dev->is_curr_dma_xfer = (xfer_size > I2C_PIO_MODE_MAX_LEN) && - i2c_dev->dma_buf; + i2c_dev->dma_buf && + !i2c_dev->is_curr_atomic_xfer; tegra_i2c_config_fifo_trig(i2c_dev, xfer_size); dma = i2c_dev->is_curr_dma_xfer; /* @@ -1271,6 +1310,19 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], return ret ?: i; } +static int tegra_i2c_xfer_atomic(struct i2c_adapter *adap, + struct i2c_msg msgs[], int num) +{ + struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); + int ret; + + i2c_dev->is_curr_atomic_xfer = true; + ret = tegra_i2c_xfer(adap, msgs, num); + i2c_dev->is_curr_atomic_xfer = false; + + return ret; +} + static u32 tegra_i2c_func(struct i2c_adapter *adap) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); @@ -1298,8 +1350,9 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) } static const struct i2c_algorithm tegra_i2c_algo = { - .master_xfer = tegra_i2c_xfer, - .functionality = tegra_i2c_func, + .master_xfer = tegra_i2c_xfer, + .master_xfer_atomic = tegra_i2c_xfer_atomic, + .functionality = tegra_i2c_func, }; /* payload size is only 12 bit */ @@ -1607,6 +1660,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto unprepare_fast_clk; } + pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { ret = tegra_i2c_runtime_resume(&pdev->dev); From ae6028a65582118d8e8f5fa3c72524b44a878d1d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 14 Jan 2020 04:34:39 +0300 Subject: [PATCH 159/658] i2c: tegra: Rename I2C_PIO_MODE_MAX_LEN to I2C_PIO_MODE_PREFERRED_LEN DMA is preferred for a larger transfers, while PIO is preferred for a smaller transfers to avoid unnecessary DMA overhead. There is no strict size limitations for the PIO-mode transfers, so let's rename the constant for clarity. Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 0245fc2b5684..e0eb8f5dcd6b 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -131,11 +131,12 @@ #define I2C_PACKET_HEADER_SIZE 12 /* - * Upto I2C_PIO_MODE_MAX_LEN bytes, controller will use PIO mode, - * above this, controller will use DMA to fill FIFO. - * MAX PIO len is 20 bytes excluding packet header. + * I2C Controller will use PIO mode for transfers up to 32 bytes in order to + * avoid DMA overhead, otherwise external APB DMA controller will be used. + * Note that the actual MAX PIO length is 20 bytes because 32 bytes include + * I2C_PACKET_HEADER_SIZE. */ -#define I2C_PIO_MODE_MAX_LEN 32 +#define I2C_PIO_MODE_PREFERRED_LEN 32 /* * msg_end_type: The bus control which need to be send at end of transfer. @@ -1110,7 +1111,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size = msg->len + I2C_PACKET_HEADER_SIZE; xfer_size = ALIGN(xfer_size, BYTES_PER_FIFO_WORD); - i2c_dev->is_curr_dma_xfer = (xfer_size > I2C_PIO_MODE_MAX_LEN) && + i2c_dev->is_curr_dma_xfer = (xfer_size > I2C_PIO_MODE_PREFERRED_LEN) && i2c_dev->dma_buf && !i2c_dev->is_curr_atomic_xfer; tegra_i2c_config_fifo_trig(i2c_dev, xfer_size); From 4211ffc3ad15a091966cd7be7889fbe4562cd215 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 14 Jan 2020 04:34:40 +0300 Subject: [PATCH 160/658] i2c: tegra: Use relaxed versions of readl/writel There is nothing to synchronize in regards to memory accesses for PIO transfers and for DMA transfers the DMA API takes care of the syncing. Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index e0eb8f5dcd6b..1a390e1bff72 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -284,12 +284,12 @@ struct tegra_i2c_dev { static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) { - writel(val, i2c_dev->base + reg); + writel_relaxed(val, i2c_dev->base + reg); } static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) { - return readl(i2c_dev->base + reg); + return readl_relaxed(i2c_dev->base + reg); } /* @@ -307,16 +307,16 @@ static unsigned long tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) { - writel(val, i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + writel_relaxed(val, i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); /* Read back register to make sure that register writes completed */ if (reg != I2C_TX_FIFO) - readl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) { - return readl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + return readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, @@ -689,12 +689,13 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); if (i2c_dev->is_curr_atomic_xfer) - err = readl_poll_timeout_atomic(addr, val, val == 0, - 1000, - I2C_CONFIG_LOAD_TIMEOUT); + err = readl_relaxed_poll_timeout_atomic( + addr, val, val == 0, 1000, + I2C_CONFIG_LOAD_TIMEOUT); else - err = readl_poll_timeout(addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); + err = readl_relaxed_poll_timeout( + addr, val, val == 0, 1000, + I2C_CONFIG_LOAD_TIMEOUT); if (err) { dev_warn(i2c_dev->dev, From 28d98666dbc033e8834edcba75dc1984efde0279 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 14 Jan 2020 04:34:41 +0300 Subject: [PATCH 161/658] i2c: tegra: Always terminate DMA transfer It is possible that I2C could error out in the middle of DMA transfer and in this case DMA channel needs to be reset, otherwise a follow up transfer will fail because DMA channel stays blocked. Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1a390e1bff72..3c7c86d4b0e4 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1220,11 +1220,12 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, time_left = tegra_i2c_wait_completion_timeout( i2c_dev, &i2c_dev->dma_complete, xfer_time); + dmaengine_terminate_sync(i2c_dev->msg_read ? + i2c_dev->rx_dma_chan : + i2c_dev->tx_dma_chan); + if (time_left == 0) { dev_err(i2c_dev->dev, "DMA transfer timeout\n"); - dmaengine_terminate_sync(i2c_dev->msg_read ? - i2c_dev->rx_dma_chan : - i2c_dev->tx_dma_chan); tegra_i2c_init(i2c_dev, true); return -ETIMEDOUT; } @@ -1237,11 +1238,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, msg->len); } - - if (i2c_dev->msg_err != I2C_ERR_NONE) - dmaengine_synchronize(i2c_dev->msg_read ? - i2c_dev->rx_dma_chan : - i2c_dev->tx_dma_chan); } time_left = tegra_i2c_wait_completion_timeout( From b3ec946975737b949137fbb1a2db9e7cc5b9ae82 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 14 Jan 2020 04:34:42 +0300 Subject: [PATCH 162/658] i2c: tegra: Check DMA completion status in addition to left time It is more robust to check completion status in addition to the left time in a case of DMA transfer because transfer's completion happens in two phases [one is ISR, other is tasklet] and thus it is possible that DMA is completed while I2C completion awaiting times out because of the deferred notification done by the DMA driver. The DMA completion status becomes 100% actual after DMA synchronization. This fixes spurious DMA timeouts when system is under load. Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 3c7c86d4b0e4..cbc2ad49043e 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1224,7 +1224,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->rx_dma_chan : i2c_dev->tx_dma_chan); - if (time_left == 0) { + if (!time_left && !completion_done(&i2c_dev->dma_complete)) { dev_err(i2c_dev->dev, "DMA transfer timeout\n"); tegra_i2c_init(i2c_dev, true); return -ETIMEDOUT; From 49945ef0f90b5becef67db68338e8ce34ef70975 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2020 10:06:05 +0100 Subject: [PATCH 163/658] i2c: stu300: Use proper printk format for iomem pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit iomem pointers should be printed with pointer format to hide the actual value and fix warnings when compiling on 64-bit platform (e.g. with COMPILE_TEST): drivers/i2c/busses/i2c-stu300.c: In function ‘stu300_wait_while_busy’: drivers/i2c/busses/i2c-stu300.c:446:76: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stu300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c index 8c3e2d409d63..42e0a53e7fa4 100644 --- a/drivers/i2c/busses/i2c-stu300.c +++ b/drivers/i2c/busses/i2c-stu300.c @@ -444,7 +444,7 @@ static int stu300_wait_while_busy(struct stu300_dev *dev) "Attempt: %d\n", i+1); dev_err(&dev->pdev->dev, "base address = " - "0x%08x, reinit hardware\n", (u32) dev->virtbase); + "0x%p, reinit hardware\n", dev->virtbase); (void) stu300_init_hw(dev); } From 6b5794abdcddfd3d1584aff1a13f440d5999ac84 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:35 +0100 Subject: [PATCH 164/658] i2c: cht-wc: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Tested-by: Hans de Goede Reviewed-by: Hans de Goede Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cht-wc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c index b8fde61bb5d8..35e55feda763 100644 --- a/drivers/i2c/busses/i2c-cht-wc.c +++ b/drivers/i2c/busses/i2c-cht-wc.c @@ -388,9 +388,9 @@ static int cht_wc_i2c_adap_i2c_probe(struct platform_device *pdev) */ if (acpi_dev_present("INT33FE", NULL, -1)) { board_info.irq = adap->client_irq; - adap->client = i2c_new_device(&adap->adapter, &board_info); - if (!adap->client) { - ret = -ENOMEM; + adap->client = i2c_new_client_device(&adap->adapter, &board_info); + if (IS_ERR(adap->client)) { + ret = PTR_ERR(adap->client); goto del_adapter; } } From 41d06630b494b7ff27c07f4475571996fba6bb6d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:36 +0100 Subject: [PATCH 165/658] i2c: i801: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index f5e69fe56532..44db3a91d32d 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1142,7 +1142,7 @@ static void dmi_check_onboard_device(u8 type, const char *name, memset(&info, 0, sizeof(struct i2c_board_info)); info.addr = dmi_devices[i].i2c_addr; strlcpy(info.type, dmi_devices[i].i2c_type, I2C_NAME_SIZE); - i2c_new_device(adap, &info); + i2c_new_client_device(adap, &info); break; } } @@ -1296,7 +1296,7 @@ static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv) memset(&info, 0, sizeof(struct i2c_board_info)); info.addr = dell_lis3lv02d_devices[i].i2c_addr; strlcpy(info.type, "lis3lv02d", I2C_NAME_SIZE); - i2c_new_device(&priv->adapter, &info); + i2c_new_client_device(&priv->adapter, &info); } /* Register optional slaves */ @@ -1312,7 +1312,7 @@ static void i801_probe_optional_slaves(struct i801_priv *priv) memset(&info, 0, sizeof(struct i2c_board_info)); info.addr = apanel_addr; strlcpy(info.type, "fujitsu_apanel", I2C_NAME_SIZE); - i2c_new_device(&priv->adapter, &info); + i2c_new_client_device(&priv->adapter, &info); } if (dmi_name_in_vendors("FUJITSU")) From 7fd0379f8696378128c73c29f7b69a9d3ee18fdc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:37 +0100 Subject: [PATCH 166/658] i2c: nvidia-gpu: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-nvidia-gpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index 5a1235fd86bb..62e18b4db0ed 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -280,9 +280,9 @@ static int gpu_populate_client(struct gpu_i2c_dev *i2cd, int irq) i2cd->gpu_ccgx_ucsi->addr = 0x8; i2cd->gpu_ccgx_ucsi->irq = irq; i2cd->gpu_ccgx_ucsi->properties = ccgx_props; - i2cd->ccgx_client = i2c_new_device(&i2cd->adapter, i2cd->gpu_ccgx_ucsi); - if (!i2cd->ccgx_client) - return -ENODEV; + i2cd->ccgx_client = i2c_new_client_device(&i2cd->adapter, i2cd->gpu_ccgx_ucsi); + if (IS_ERR(i2cd->ccgx_client)) + return PTR_ERR(i2cd->ccgx_client); return 0; } From 7de69dbf0d7f2a6abc02dc190fc260c0d11626b4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:38 +0100 Subject: [PATCH 167/658] i2c: ocores: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Andrew Lunn Reviewed-by: Peter Korsgaard Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ocores.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index ca8b3ecfa93d..f5fc75b65a19 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -731,7 +731,7 @@ static int ocores_i2c_probe(struct platform_device *pdev) /* add in known devices to the bus */ if (pdata) { for (i = 0; i < pdata->num_devices; i++) - i2c_new_device(&i2c->adap, pdata->devices + i); + i2c_new_client_device(&i2c->adap, pdata->devices + i); } return 0; From f12c529005dc64babe23c1fc2fb404ab8daa6ba2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:39 +0100 Subject: [PATCH 168/658] i2c: powermac: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-powermac.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c index 504f5bf0e625..973e5339033c 100644 --- a/drivers/i2c/busses/i2c-powermac.c +++ b/drivers/i2c/busses/i2c-powermac.c @@ -240,8 +240,8 @@ static void i2c_powermac_create_one(struct i2c_adapter *adap, strncpy(info.type, type, sizeof(info.type)); info.addr = addr; - newdev = i2c_new_device(adap, &info); - if (!newdev) + newdev = i2c_new_client_device(adap, &info); + if (IS_ERR(newdev)) dev_err(&adap->dev, "i2c-powermac: Failure to register missing %s\n", type); @@ -359,8 +359,8 @@ static void i2c_powermac_register_devices(struct i2c_adapter *adap, info.irq = irq_of_parse_and_map(node, 0); info.of_node = of_node_get(node); - newdev = i2c_new_device(adap, &info); - if (!newdev) { + newdev = i2c_new_client_device(adap, &info); + if (IS_ERR(newdev)) { dev_err(&adap->dev, "i2c-powermac: Failure to register" " %pOF\n", node); of_node_put(node); From 36056290ee0cec6b7b239dd68b9b874488226ada Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:40 +0100 Subject: [PATCH 169/658] i2c: taos-evm: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Jean Delvare Tested-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-taos-evm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c index 0bff3f3a8779..b4050f5b6746 100644 --- a/drivers/i2c/busses/i2c-taos-evm.c +++ b/drivers/i2c/busses/i2c-taos-evm.c @@ -49,10 +49,10 @@ static struct i2c_client *taos_instantiate_device(struct i2c_adapter *adapter) if (!strncmp(adapter->name, "TAOS TSL2550 EVM", 16)) { dev_info(&adapter->dev, "Instantiating device %s at 0x%02x\n", tsl2550_info.type, tsl2550_info.addr); - return i2c_new_device(adapter, &tsl2550_info); + return i2c_new_client_device(adapter, &tsl2550_info); } - return NULL; + return ERR_PTR(-ENODEV); } static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr, From bf255befe7adc14ba42bf71fbee4e84f54249e07 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:41 +0100 Subject: [PATCH 170/658] i2c: xiic: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Acked-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index d8d49f1814c7..61e081b186cc 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -806,7 +806,7 @@ static int xiic_i2c_probe(struct platform_device *pdev) if (pdata) { /* add in known devices to the bus */ for (i = 0; i < pdata->num_devices; i++) - i2c_new_device(&i2c->adap, pdata->devices + i); + i2c_new_client_device(&i2c->adap, pdata->devices + i); } return 0; From 90a3be9b194505165ecbbd9fe600818f5618dfbe Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:42 +0100 Subject: [PATCH 171/658] i2c: i2c-core-acpi: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 62a1c92ab803..8f3dbc97a057 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -225,7 +225,7 @@ static void i2c_acpi_register_device(struct i2c_adapter *adapter, adev->power.flags.ignore_parent = true; acpi_device_set_enumerated(adev); - if (!i2c_new_device(adapter, info)) { + if (IS_ERR(i2c_new_client_device(adapter, info))) { adev->power.flags.ignore_parent = false; dev_err(&adapter->dev, "failed to add I2C device %s from ACPI\n", @@ -451,7 +451,8 @@ struct notifier_block i2c_acpi_notifier = { * resources, in that case this function can be used to create an i2c-client * for other I2cSerialBus resources in the Current Resource Settings table. * - * Also see i2c_new_device, which this function calls to create the i2c-client. + * Also see i2c_new_client_device, which this function calls to create the + * i2c-client. * * Returns a pointer to the new i2c-client, or error pointer in case of failure. * Specifically, -EPROBE_DEFER is returned if the adapter is not found. @@ -461,7 +462,6 @@ struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, { struct i2c_acpi_lookup lookup; struct i2c_adapter *adapter; - struct i2c_client *client; struct acpi_device *adev; LIST_HEAD(resource_list); int ret; @@ -489,11 +489,7 @@ struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, if (!adapter) return ERR_PTR(-EPROBE_DEFER); - client = i2c_new_device(adapter, info); - if (!client) - return ERR_PTR(-ENODEV); - - return client; + return i2c_new_client_device(adapter, info); } EXPORT_SYMBOL_GPL(i2c_acpi_new_device); From 87e07437df936b5ebdccd5a847fd1801f36bef02 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:43 +0100 Subject: [PATCH 172/658] i2c: i2c-core-base: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 19d929948cd2..cefad0881942 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -831,8 +831,8 @@ EXPORT_SYMBOL_GPL(i2c_new_device); /** - * i2c_unregister_device - reverse effect of i2c_new_device() - * @client: value returned from i2c_new_device() + * i2c_unregister_device - reverse effect of i2c_new_*_device() + * @client: value returned from i2c_new_*_device() * Context: can sleep */ void i2c_unregister_device(struct i2c_client *client) @@ -1178,9 +1178,8 @@ static void i2c_scan_static_board_info(struct i2c_adapter *adapter) down_read(&__i2c_board_lock); list_for_each_entry(devinfo, &__i2c_board_list, list) { - if (devinfo->busnum == adapter->nr - && !i2c_new_device(adapter, - &devinfo->board_info)) + if (devinfo->busnum == adapter->nr && + IS_ERR(i2c_new_client_device(adapter, &devinfo->board_info))) dev_err(&adapter->dev, "Can't create device at 0x%02x\n", devinfo->board_info.addr); @@ -2167,8 +2166,8 @@ static int i2c_detect_address(struct i2c_client *temp_client, dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n", info.type, info.addr); - client = i2c_new_device(adapter, &info); - if (client) + client = i2c_new_client_device(adapter, &info); + if (!IS_ERR(client)) list_add_tail(&client->detected, &driver->clients); else dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n", From 5f0155b44e1e053e4fd420c48af020524cea299e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:44 +0100 Subject: [PATCH 173/658] i2c: i2c-core-of: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-of.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index e4d296b40baa..6787c1f71483 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -75,11 +75,10 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, if (ret) return ERR_PTR(ret); - client = i2c_new_device(adap, &info); - if (!client) { + client = i2c_new_client_device(adap, &info); + if (IS_ERR(client)) dev_err(&adap->dev, "of_i2c: Failure registering %pOF\n", node); - return ERR_PTR(-EINVAL); - } + return client; } From e634a50c9cd1896b09f33dcbe4b5de5f29e4bbb7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:45 +0100 Subject: [PATCH 174/658] docs: i2c: use the new API in 'instantiating-devices.rst' i2c_new_device is deprecated, use i2c_new_client_device. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/instantiating-devices.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst index 875ebe9e78e3..b7b90b1b82f9 100644 --- a/Documentation/i2c/instantiating-devices.rst +++ b/Documentation/i2c/instantiating-devices.rst @@ -98,7 +98,7 @@ tuner, a video decoder, an audio decoder, etc. usually connected to the main chip by the means of an I2C bus. You won't know the number of the I2C bus in advance, so the method 1 described above can't be used. Instead, you can instantiate your I2C devices explicitly. This is done by filling -a struct i2c_board_info and calling i2c_new_device(). +a struct i2c_board_info and calling i2c_new_client_device(). Example (from the sfe4001 network driver):: @@ -110,7 +110,7 @@ Example (from the sfe4001 network driver):: { (...) efx->board_info.hwmon_client = - i2c_new_device(&efx->i2c_adap, &sfe4001_hwmon_info); + i2c_new_client_device(&efx->i2c_adap, &sfe4001_hwmon_info); (...) } @@ -123,7 +123,7 @@ present or not (for example for an optional feature which is not present on cheap variants of a board but you have no way to tell them apart), or it may have different addresses from one board to the next (manufacturer changing its design without notice). In this case, you can call -i2c_new_scanned_device() instead of i2c_new_device(). +i2c_new_scanned_device() instead of i2c_new_client_device(). Example (from the nxp OHCI driver):: @@ -152,7 +152,7 @@ simply gives up. The driver which instantiated the I2C device is responsible for destroying it on cleanup. This is done by calling i2c_unregister_device() on the -pointer that was earlier returned by i2c_new_device() or +pointer that was earlier returned by i2c_new_client_device() or i2c_new_scanned_device(). From e8d51e962936bf4527f41db318d53a80006f2bf7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Jan 2020 18:47:46 +0100 Subject: [PATCH 175/658] docs: i2c: use the new API in 'writing-clients' i2c_new_device is deprecated, use i2c_new_client_device. Also, align a paragraph while here. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/writing-clients.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst index ced309b5e0cc..0336909ca01b 100644 --- a/Documentation/i2c/writing-clients.rst +++ b/Documentation/i2c/writing-clients.rst @@ -175,8 +175,8 @@ Device Creation If you know for a fact that an I2C device is connected to a given I2C bus, you can instantiate that device by simply filling an i2c_board_info structure with the device address and driver name, and calling -i2c_new_device(). This will create the device, then the driver core will -take care of finding the right driver and will call its probe() method. +i2c_new_client_device(). This will create the device, then the driver core +will take care of finding the right driver and will call its probe() method. If a driver supports different device types, you can specify the type you want using the type field. You can also specify an IRQ and platform data if needed. @@ -186,14 +186,14 @@ don't know the exact address it uses. This happens on TV adapters for example, where the same driver supports dozens of slightly different models, and I2C device addresses change from one model to the next. In that case, you can use the i2c_new_scanned_device() variant, which is -similar to i2c_new_device(), except that it takes an additional list of -possible I2C addresses to probe. A device is created for the first +similar to i2c_new_client_device(), except that it takes an additional list +of possible I2C addresses to probe. A device is created for the first responsive address in the list. If you expect more than one device to be present in the address range, simply call i2c_new_scanned_device() that many times. -The call to i2c_new_device() or i2c_new_scanned_device() typically happens -in the I2C bus driver. You may want to save the returned i2c_client +The call to i2c_new_client_device() or i2c_new_scanned_device() typically +happens in the I2C bus driver. You may want to save the returned i2c_client reference for later use. @@ -236,11 +236,11 @@ possible. Device Deletion --------------- -Each I2C device which has been created using i2c_new_device() or -i2c_new_scanned_device() can be unregistered by calling +Each I2C device which has been created using i2c_new_client_device() +or i2c_new_scanned_device() can be unregistered by calling i2c_unregister_device(). If you don't call it explicitly, it will be -called automatically before the underlying I2C bus itself is removed, as a -device can't survive its parent in the device driver model. +called automatically before the underlying I2C bus itself is removed, +as a device can't survive its parent in the device driver model. Initializing the driver From 066e6e805d4af26d24776f4628683ddcef6297f7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jan 2020 21:02:48 +0100 Subject: [PATCH 176/658] i2c: pmcmsp: Use proper printk format for resource_size_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resource_size_t should be printed with its own size-independent format to fix warnings when compiling on 64-bit platform (e.g. with COMPILE_TEST): drivers/i2c/busses/i2c-pmcmsp.c: In function ‘pmcmsptwi_probe’: drivers/i2c/busses/i2c-pmcmsp.c:276:25: warning: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘resource_size_t {aka long long unsigned int}’ [-Wformat=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pmcmsp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c index 0829cb696d9d..aa638f6cee66 100644 --- a/drivers/i2c/busses/i2c-pmcmsp.c +++ b/drivers/i2c/busses/i2c-pmcmsp.c @@ -274,8 +274,8 @@ static int pmcmsptwi_probe(struct platform_device *pldev) if (!request_mem_region(res->start, resource_size(res), pldev->name)) { dev_err(&pldev->dev, - "Unable to get memory/io address region 0x%08x\n", - res->start); + "Unable to get memory/io address region %pap\n", + &res->start); rc = -EBUSY; goto ret_err; } @@ -285,7 +285,7 @@ static int pmcmsptwi_probe(struct platform_device *pldev) resource_size(res)); if (!pmcmsptwi_data.iobase) { dev_err(&pldev->dev, - "Unable to ioremap address 0x%08x\n", res->start); + "Unable to ioremap address %pap\n", &res->start); rc = -EIO; goto ret_unreserve; } From 17b3fe8b04ec92161638378744d4d915fd8cfa2e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jan 2020 21:02:49 +0100 Subject: [PATCH 177/658] i2c: pnx: Use proper printk format for resource_size_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resource_size_t should be printed with its own size-independent format to fix warnings when compiling on 64-bit platform (e.g. with COMPILE_TEST): drivers/i2c/busses/i2c-pnx.c: In function ‘i2c_pnx_probe’: drivers/i2c/busses/i2c-pnx.c:737:47: warning: format ‘%x’ expects argument of type ‘unsigned int’, but argument 5 has type ‘resource_size_t {aka long long unsigned int}’ [-Wformat=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pnx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 6e0e546ef83f..686c06f31625 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -734,8 +734,8 @@ static int i2c_pnx_probe(struct platform_device *pdev) if (ret < 0) goto out_clock; - dev_dbg(&pdev->dev, "%s: Master at %#8x, irq %d.\n", - alg_data->adapter.name, res->start, alg_data->irq); + dev_dbg(&pdev->dev, "%s: Master at %pap, irq %d.\n", + alg_data->adapter.name, &res->start, alg_data->irq); return 0; From 40b2ec1251c370bc5557568e259d4058c651d405 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jan 2020 21:02:50 +0100 Subject: [PATCH 178/658] i2c: highlander: Use proper printk format for size_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit size_t should be printed with its own format to be 64-bit friendly and fix warning when compiling on 64-bit platform (e.g. with COMPILE_TEST): drivers/i2c/busses/i2c-highlander.c: In function ‘highlander_i2c_smbus_xfer’: drivers/i2c/busses/i2c-highlander.c:325:22: warning: format ‘%d’ expects argument of type ‘int’, but argument 3 has type ‘size_t {aka long unsigned int}’ [-Wformat=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-highlander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c index ff340d7ae2e5..33dc9a557607 100644 --- a/drivers/i2c/busses/i2c-highlander.c +++ b/drivers/i2c/busses/i2c-highlander.c @@ -322,7 +322,7 @@ static int highlander_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, tmp |= (SMMR_MODE0 | SMMR_MODE1); break; default: - dev_err(dev->dev, "unsupported xfer size %d\n", dev->buf_len); + dev_err(dev->dev, "unsupported xfer size %zu\n", dev->buf_len); return -EINVAL; } From b4c119dbc300c7a6ee2da70d5c7ba14747b35142 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 9 Jan 2020 17:07:58 +0530 Subject: [PATCH 179/658] i2c: xiic: Add timeout to the rx fifo wait loop Add timeout to the rx fifo empty wait loop. Also check for the return value for xiic_reinit and pass it to xiic_start_xfer. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 52 ++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 61e081b186cc..3a403202bd28 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -156,6 +156,8 @@ struct xiic_i2c { #define XIIC_RESET_MASK 0xAUL #define XIIC_PM_TIMEOUT 1000 /* ms */ +/* timeout waiting for the controller to respond */ +#define XIIC_I2C_TIMEOUT (msecs_to_jiffies(1000)) /* * The following constant is used for the device global interrupt enable * register, to enable all interrupts for the device, this is the only bit @@ -166,7 +168,7 @@ struct xiic_i2c { #define xiic_tx_space(i2c) ((i2c)->tx_msg->len - (i2c)->tx_pos) #define xiic_rx_space(i2c) ((i2c)->rx_msg->len - (i2c)->rx_pos) -static void xiic_start_xfer(struct xiic_i2c *i2c); +static int xiic_start_xfer(struct xiic_i2c *i2c); static void __xiic_start_xfer(struct xiic_i2c *i2c); /* @@ -247,17 +249,29 @@ static inline void xiic_irq_clr_en(struct xiic_i2c *i2c, u32 mask) xiic_irq_en(i2c, mask); } -static void xiic_clear_rx_fifo(struct xiic_i2c *i2c) +static int xiic_clear_rx_fifo(struct xiic_i2c *i2c) { u8 sr; + unsigned long timeout; + + timeout = jiffies + XIIC_I2C_TIMEOUT; for (sr = xiic_getreg8(i2c, XIIC_SR_REG_OFFSET); !(sr & XIIC_SR_RX_FIFO_EMPTY_MASK); - sr = xiic_getreg8(i2c, XIIC_SR_REG_OFFSET)) + sr = xiic_getreg8(i2c, XIIC_SR_REG_OFFSET)) { xiic_getreg8(i2c, XIIC_DRR_REG_OFFSET); + if (time_after(jiffies, timeout)) { + dev_err(i2c->dev, "Failed to clear rx fifo\n"); + return -ETIMEDOUT; + } + } + + return 0; } -static void xiic_reinit(struct xiic_i2c *i2c) +static int xiic_reinit(struct xiic_i2c *i2c) { + int ret; + xiic_setreg32(i2c, XIIC_RESETR_OFFSET, XIIC_RESET_MASK); /* Set receive Fifo depth to maximum (zero based). */ @@ -270,12 +284,16 @@ static void xiic_reinit(struct xiic_i2c *i2c) xiic_setreg8(i2c, XIIC_CR_REG_OFFSET, XIIC_CR_ENABLE_DEVICE_MASK); /* make sure RX fifo is empty */ - xiic_clear_rx_fifo(i2c); + ret = xiic_clear_rx_fifo(i2c); + if (ret) + return ret; /* Enable interrupts */ xiic_setreg32(i2c, XIIC_DGIER_OFFSET, XIIC_GINTR_ENABLE_MASK); xiic_irq_clr_en(i2c, XIIC_INTR_ARB_LOST_MASK); + + return 0; } static void xiic_deinit(struct xiic_i2c *i2c) @@ -655,12 +673,18 @@ static void __xiic_start_xfer(struct xiic_i2c *i2c) } -static void xiic_start_xfer(struct xiic_i2c *i2c) +static int xiic_start_xfer(struct xiic_i2c *i2c) { + int ret; mutex_lock(&i2c->lock); - xiic_reinit(i2c); - __xiic_start_xfer(i2c); + + ret = xiic_reinit(i2c); + if (!ret) + __xiic_start_xfer(i2c); + mutex_unlock(&i2c->lock); + + return ret; } static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) @@ -682,7 +706,11 @@ static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) i2c->tx_msg = msgs; i2c->nmsgs = num; - xiic_start_xfer(i2c); + err = xiic_start_xfer(i2c); + if (err < 0) { + dev_err(adap->dev.parent, "Error xiic_start_xfer\n"); + goto out; + } if (wait_event_timeout(i2c->wait, (i2c->state == STATE_ERROR) || (i2c->state == STATE_DONE), HZ)) { @@ -794,7 +822,11 @@ static int xiic_i2c_probe(struct platform_device *pdev) if (!(sr & XIIC_SR_TX_FIFO_EMPTY_MASK)) i2c->endianness = BIG; - xiic_reinit(i2c); + ret = xiic_reinit(i2c); + if (ret < 0) { + dev_err(&pdev->dev, "Cannot xiic_reinit\n"); + goto err_clk_dis; + } /* add i2c adapter to i2c tree */ ret = i2c_add_adapter(&i2c->adap); From c9d059681b846f5d0a280950ea4dc58495b9b7a9 Mon Sep 17 00:00:00 2001 From: Venkatesh Yadav Abbarapu Date: Thu, 9 Jan 2020 17:07:59 +0530 Subject: [PATCH 180/658] i2c: xiic: defer the probe if clock is not found It's not always the case that clock is already available when i2c driver get probed at the first time, e.g. the clock is provided by clock wizard which may be probed after i2c driver. So let's defer the probe when devm_clk_get() call fails and give it chance to try later. Signed-off-by: Venkatesh Yadav Abbarapu Signed-off-by: Michal Simek Signed-off-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 3a403202bd28..d719bf5f9d76 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -788,7 +788,8 @@ static int xiic_i2c_probe(struct platform_device *pdev) i2c->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(i2c->clk)) { - dev_err(&pdev->dev, "input clock not found.\n"); + if (PTR_ERR(i2c->clk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "input clock not found.\n"); return PTR_ERR(i2c->clk); } ret = clk_prepare_enable(i2c->clk); From 10b17004a74c384c6f410af355b0d6d7a168f613 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 9 Jan 2020 17:08:00 +0530 Subject: [PATCH 181/658] i2c: xiic: Fix the clocking across bind unbind The recommendation is that the set_active should be done with pm runtime disabled. Also fix the remove path for clocking. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index d719bf5f9d76..b17d30c9ab40 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -798,10 +798,10 @@ static int xiic_i2c_probe(struct platform_device *pdev) return ret; } i2c->dev = &pdev->dev; - pm_runtime_enable(i2c->dev); pm_runtime_set_autosuspend_delay(i2c->dev, XIIC_PM_TIMEOUT); pm_runtime_use_autosuspend(i2c->dev); pm_runtime_set_active(i2c->dev); + pm_runtime_enable(i2c->dev); ret = devm_request_threaded_irq(&pdev->dev, irq, xiic_isr, xiic_process, IRQF_ONESHOT, pdev->name, i2c); @@ -859,14 +859,16 @@ static int xiic_i2c_remove(struct platform_device *pdev) /* remove adapter & data */ i2c_del_adapter(&i2c->adap); - ret = clk_prepare_enable(i2c->clk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable clock.\n"); + ret = pm_runtime_get_sync(i2c->dev); + if (ret < 0) return ret; - } + xiic_deinit(i2c); + pm_runtime_put_sync(i2c->dev); clk_disable_unprepare(i2c->clk); pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); return 0; } From 82d1b8158c9a77c2c9b04c4af22fd62f3686cd9d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 16 Jan 2020 08:20:18 -0500 Subject: [PATCH 182/658] tracing: Allow trace_printk() to nest in other tracing code trace_printk() is used to debug the kernel which includes the tracing infrastructure. But because it writes to the ring buffer, and so does much of the tracing infrastructure, the ring buffer's recursive detection will drop writes to the ring buffer that is in the same context as the current write is happening (it allows interrupts to write when normal context is writing, but wont let normal context write while normal context is writing). This can cause confusion and think that the code is where the trace_printk() exists is not hit. To solve this, up the recursive nesting of the ring buffer when trace_printk() is called before it writes to the buffer itself. Note, this does make it dangerous to use trace_printk() in the ring buffer code itself, because this basically disables the recursion protection of trace_printk() buffer writes. But as trace_printk() is only used for debugging, and if this does occur, the developer will see the cause real quick (recursive blowing up of the stack). Thus the developer can deal with that. But having trace_printk() silently ignored is a much bigger problem, and disabling recursive protection is a small price to pay to fix it. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 106bbc0988fe..2e1db19dce97 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -866,10 +866,13 @@ int __trace_puts(unsigned long ip, const char *str, int size) local_save_flags(irq_flags); buffer = global_trace.array_buffer.buffer; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, irq_flags, pc); - if (!event) - return 0; + if (!event) { + size = 0; + goto out; + } entry = ring_buffer_event_data(event); entry->ip = ip; @@ -885,7 +888,8 @@ int __trace_puts(unsigned long ip, const char *str, int size) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); - + out: + ring_buffer_nest_end(buffer); return size; } EXPORT_SYMBOL_GPL(__trace_puts); @@ -902,6 +906,7 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int ret = 0; int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) @@ -914,10 +919,12 @@ int __trace_bputs(unsigned long ip, const char *str) local_save_flags(irq_flags); buffer = global_trace.array_buffer.buffer; + + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, irq_flags, pc); if (!event) - return 0; + goto out; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -926,7 +933,10 @@ int __trace_bputs(unsigned long ip, const char *str) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); - return 1; + ret = 1; + out: + ring_buffer_nest_end(buffer); + return ret; } EXPORT_SYMBOL_GPL(__trace_bputs); @@ -3225,6 +3235,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, flags, pc); if (!event) @@ -3240,6 +3251,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } out: + ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: @@ -3282,6 +3294,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, local_save_flags(flags); size = sizeof(*entry) + len + 1; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, flags, pc); if (!event) @@ -3296,6 +3309,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, } out: + ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: From 9a09cd74e7dc6c2ac34b39ea6e74440ceb4c501e Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 21 Jan 2020 13:50:07 +0800 Subject: [PATCH 183/658] ftrace: Remove abandoned macros These 2 macros aren't used from commit eee8ded131f1 ("ftrace: Have the function probes call their own function"), so remove them. Link: http://lkml.kernel.org/r/1579585807-43316-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f0ae07e72ef..7fe87c7ab1a8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,8 +62,6 @@ }) /* hash bits for specific function selection */ -#define FTRACE_HASH_BITS 7 -#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 From aff4866db56e5cc5601dcd896e056160e07ca361 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 21 Jan 2020 13:54:23 +0800 Subject: [PATCH 184/658] ftrace: Remove NR_TO_INIT macro This macro isn't used from commit cb7be3b2fc2c ("ftrace: remove daemon"). So no needs to keep it. Link: http://lkml.kernel.org/r/1579586063-44984-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7fe87c7ab1a8..5c701765da5b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1101,9 +1101,6 @@ struct ftrace_page { #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; From 0947db01d9b9fd77a627e501fe563bdaf4c724a1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 20 Jan 2020 12:23:00 +0900 Subject: [PATCH 185/658] bootconfig: Fix Kconfig help message for BOOT_CONFIG Fix Kconfig help message since the bootconfig file is only available to be appended to initramfs. And also add a reference to the documentation. Link: http://lkml.kernel.org/r/157949058031.25888.18399447161895787505.stgit@devnote2 Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index ffd240fb88c3..9506299a53e3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1223,7 +1223,9 @@ config BOOT_CONFIG help Extra boot config allows system admin to pass a config file as complemental extension of kernel cmdline when booting. - The boot config file is usually attached at the end of initramfs. + The boot config file must be attached at the end of initramfs + with checksum and size. + See for details. If unsure, say Y. From a4798eb42a261ff39d991b198a09b840c11010d5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 20 Jan 2020 12:23:12 +0900 Subject: [PATCH 186/658] Documentation: bootconfig: Fix typos in bootconfig documentation Fix typos in bootconfig.rst according to Randy's suggestions. Link: http://lkml.kernel.org/r/157949059219.25888.16939971423610233631.stgit@devnote2 Suggested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 32 +++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index c8f7cd4cf44e..4d617693c0c8 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -11,20 +11,22 @@ Boot Configuration Overview ======== -The boot configuration is expanding current kernel cmdline to support -additional key-value data when boot the kernel in an efficient way. -This allows adoministrators to pass a structured-Key config file. +The boot configuration expands the current kernel command line to support +additional key-value data when booting the kernel in an efficient way. +This allows administrators to pass a structured-Key config file. Config File Syntax ================== The boot config syntax is a simple structured key-value. Each key consists -of dot-connected-words, and key and value are connected by "=". The value +of dot-connected-words, and key and value are connected by ``=``. The value has to be terminated by semi-colon (``;``) or newline (``\n``). For array value, array entries are separated by comma (``,``). :: KEY[.WORD[...]] = VALUE[, VALUE2[...]][;] +Unlike the kernel command line syntax, spaces are OK around the comma and ``=``. + Each key word must contain only alphabets, numbers, dash (``-``) or underscore (``_``). And each value only contains printable characters or spaces except for delimiters such as semi-colon (``;``), new-line (``\n``), comma (``,``), @@ -35,7 +37,7 @@ quotes (``"VALUE"``) or single-quotes (``'VALUE'``) to quote it. Note that you can not escape these quotes. There can be a key which doesn't have value or has an empty value. Those keys -are used for checking the key exists or not (like a boolean). +are used for checking if the key exists or not (like a boolean). Key-Value Syntax ---------------- @@ -63,7 +65,7 @@ at boot time. So you can append similar trees or key-values. Comments -------- -The config syntax accepts shell-script style comments. The comments start +The config syntax accepts shell-script style comments. The comments starting with hash ("#") until newline ("\n") will be ignored. :: @@ -108,7 +110,7 @@ update the boot loader and the kernel image itself. To do this operation, Linux kernel provides "bootconfig" command under tools/bootconfig, which allows admin to apply or delete the config file -to/from initrd image. You can build it by follwoing command:: +to/from initrd image. You can build it by the following command:: # make -C tools/bootconfig @@ -122,7 +124,7 @@ To remove the config from the image, you can use -d option as below:: # tools/bootconfig/bootconfig -d /boot/initrd.img-X.Y.Z -C onfig File Limitation +Config File Limitation ====================== Currently the maximum config size size is 32KB and the total key-words (not @@ -145,10 +147,10 @@ User can query or loop on key-value pairs, also it is possible to find a root (prefix) key node and find key-values under that node. If you have a key string, you can query the value directly with the key -using xbc_find_value(). If you want to know what keys exist in the SKC -tree, you can use xbc_for_each_key_value() to iterate key-value pairs. +using xbc_find_value(). If you want to know what keys exist in the boot +config, you can use xbc_for_each_key_value() to iterate key-value pairs. Note that you need to use xbc_array_for_each_value() for accessing -each arraies value, e.g.:: +each array's value, e.g.:: vnode = NULL; xbc_find_value("key.word", &vnode); @@ -157,8 +159,8 @@ each arraies value, e.g.:: printk("%s ", value); } -If you want to focus on keys which has a prefix string, you can use -xbc_find_node() to find a node which prefix key words, and iterate +If you want to focus on keys which have a prefix string, you can use +xbc_find_node() to find a node by the prefix string, and iterate keys under the prefix node with xbc_node_for_each_key_value(). But the most typical usage is to get the named value under prefix @@ -174,8 +176,8 @@ or get the named array under prefix as below:: This accesses a value of "key.prefix.option" and an array of "key.prefix.array-option". -Locking is not needed, since after initialized, the config becomes readonly. -All data and keys must be copied if you need to modify it. +Locking is not needed, since after initialization, the config becomes +read-only. All data and keys must be copied if you need to modify it. Functions and structures From b8381ce7aa8ef1ab5a79bf710508e504c494acf7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 20 Jan 2020 12:23:23 +0900 Subject: [PATCH 187/658] Documentation: tracing: Fix typos in boot-time tracing documentation Fix typos in boottime-trace.rst according to Randy's suggestions. Link: http://lkml.kernel.org/r/157949060335.25888.13153184562531693684.stgit@devnote2 Suggested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/boottime-trace.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/trace/boottime-trace.rst b/Documentation/trace/boottime-trace.rst index 1d10fdebf1b2..dcb390075ca1 100644 --- a/Documentation/trace/boottime-trace.rst +++ b/Documentation/trace/boottime-trace.rst @@ -13,7 +13,7 @@ Boot-time tracing allows users to trace boot-time process including device initialization with full features of ftrace including per-event filter and actions, histograms, kprobe-events and synthetic-events, and trace instances. -Since kernel cmdline is not enough to control these complex features, +Since kernel command line is not enough to control these complex features, this uses bootconfig file to describe tracing feature programming. Options in the Boot Config @@ -21,7 +21,7 @@ Options in the Boot Config Here is the list of available options list for boot time tracing in boot config file [1]_. All options are under "ftrace." or "kernel." -refix. See kernel parameters for the options which starts +prefix. See kernel parameters for the options which starts with "kernel." prefix [2]_. .. [1] See :ref:`Documentation/admin-guide/bootconfig.rst ` @@ -50,7 +50,7 @@ kernel.fgraph_filters = FILTER[, FILTER2...] Add fgraph tracing function filters. kernel.fgraph_notraces = FILTER[, FILTER2...] - Add fgraph non tracing function filters. + Add fgraph non-tracing function filters. Ftrace Per-instance Options @@ -81,10 +81,10 @@ ftrace.[instance.INSTANCE.]tracer = TRACER Set TRACER to current tracer on boot. (e.g. function) ftrace.[instance.INSTANCE.]ftrace.filters - This will take an array of tracing function filter rules + This will take an array of tracing function filter rules. ftrace.[instance.INSTANCE.]ftrace.notraces - This will take an array of NON-tracing function filter rules + This will take an array of NON-tracing function filter rules. Ftrace Per-Event Options @@ -93,7 +93,7 @@ Ftrace Per-Event Options These options are setting per-event options. ftrace.[instance.INSTANCE.]event.GROUP.EVENT.enable - Enables GROUP:EVENT tracing. + Enable GROUP:EVENT tracing. ftrace.[instance.INSTANCE.]event.GROUP.EVENT.filter = FILTER Set FILTER rule to the GROUP:EVENT. @@ -145,10 +145,10 @@ below:: } } -Also, boottime tracing supports "instance" node, which allows us to run +Also, boot-time tracing supports "instance" node, which allows us to run several tracers for different purpose at once. For example, one tracer -is for tracing functions start with "user\_", and others tracing "kernel\_" -functions, you can write boot config as below:: +is for tracing functions starting with "user\_", and others tracing +"kernel\_" functions, you can write boot config as below:: ftrace.instance { foo { From bcc717ed95dea6c6b87013e97f39ee393aed0487 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 16 Jan 2020 09:22:06 +0000 Subject: [PATCH 188/658] tools: bootconfig: Fix spelling mistake "faile" -> "failed" There are two spelling mistakes in printf statements, fix these. Link: http://lkml.kernel.org/r/20200116092206.52192-1-colin.king@canonical.com Acked-by: Masami Hiramatsu Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index b8f174fd2a0a..91c9a5c0c499 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -140,7 +140,7 @@ int load_xbc_from_initrd(int fd, char **buf) return 0; if (lseek(fd, -8, SEEK_END) < 0) { - printf("Faile to lseek: %d\n", -errno); + printf("Failed to lseek: %d\n", -errno); return -errno; } @@ -155,7 +155,7 @@ int load_xbc_from_initrd(int fd, char **buf) return 0; if (lseek(fd, stat.st_size - 8 - size, SEEK_SET) < 0) { - printf("Faile to lseek: %d\n", -errno); + printf("Failed to lseek: %d\n", -errno); return -errno; } From b83479482ff6f856b1308a17768f228be779543a Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 21 Jan 2020 13:54:40 +0800 Subject: [PATCH 189/658] ring-buffer: Remove abandoned macro RB_MISSED_FLAGS This macro isn't used since commit d325c402964e ("ring-buffer: Remove unused function ring_buffer_page_len()"), so better to remove it. Link: http://lkml.kernel.org/r/1579586080-45300-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3bab9b0a90b6..61f0e92ace99 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -300,8 +300,6 @@ u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) -#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) - struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ From 141597204ea2dc173668c8cd7202c4bac2b0c476 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 21 Jan 2020 13:54:46 +0800 Subject: [PATCH 190/658] tracing: Remove unused TRACE_SEQ_BUF_USED This macro isn't used from commit 3a161d99c43c ("tracing: Create seq_buf layer in trace_seq"). so no needs to keep it. Link: http://lkml.kernel.org/r/1579586086-45543-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_seq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 87de6edafd14..1d84fcc78e3e 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -30,9 +30,6 @@ /* How much buffer is left on the trace_seq? */ #define TRACE_SEQ_BUF_LEFT(s) seq_buf_buffer_left(&(s)->seq) -/* How much buffer is written? */ -#define TRACE_SEQ_BUF_USED(s) seq_buf_used(&(s)->seq) - /* * trace_seq should work with being initialized with 0s. */ From 532f49a6f19a153e202b5a174f8556fd50c36dd4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Jan 2020 08:30:07 +0300 Subject: [PATCH 191/658] tracing/boot: Fix an IS_ERR() vs NULL bug The trace_array_get_by_name() function doesn't return error pointers, it returns NULL on error. Link: http://lkml.kernel.org/r/20200117053007.5h2juv272pokqhtq@kili.mountain Fixes: 4f712a4d04a4 ("tracing/boot: Add instance node support") Acked-by: Masami Hiramatsu Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index fa9603dc6469..cd541ac1cbc1 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -322,7 +322,7 @@ trace_boot_init_instances(struct xbc_node *node) continue; tr = trace_array_get_by_name(p); - if (IS_ERR(tr)) { + if (!tr) { pr_err("Failed to get trace instance %s\n", p); continue; } From 34423f250a372d71346922edf2b84a19d811a311 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 22 Jan 2020 06:44:50 -0500 Subject: [PATCH 192/658] tracing: Fix uninitialized buffer var on early exit to trace_vbprintk() If we exit due to a bad input to trace_printk() (highly unlikely), then the buffer variable will not be initialized when we unnest the ring buffer. Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2e1db19dce97..d1410b4462ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3230,7 +3230,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) - goto out; + goto out_put; local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; @@ -3252,6 +3252,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) out: ring_buffer_nest_end(buffer); +out_put: put_trace_buf(); out_nobuffer: From 659ded30272d67a04b3692f0bfa12263be20d790 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 21 Jan 2020 13:54:35 +0800 Subject: [PATCH 193/658] trace/kprobe: Remove unused MAX_KPROBE_CMDLINE_SIZE This limitation are never lunched from introduce commit 970988e19eb0 ("tracing/kprobe: Add kprobe_event= boot parameter") Could we remove it if no intention to implement it? Link: http://lkml.kernel.org/r/1579586075-45132-1-git-send-email-alex.shi@linux.alibaba.com Acked-by: Masami Hiramatsu Signed-off-by: Alex Shi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 283b7c437440..bf20cd7f2666 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -22,7 +22,6 @@ #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 -#define MAX_KPROBE_CMDLINE_SIZE 1024 /* Kprobe early definition from command line */ static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata; From 28c7d86bb6172ffbb1a1237c6388e77f9fe5f181 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:03 -0500 Subject: [PATCH 194/658] nfsd: fix filecache lookup If the lookup keeps finding a nfsd_file with an unhashed open file, then retry once only. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Fixes: 65294c1f2c5e "nfsd: add a new struct file caching facility to nfsd" Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 32a9bf22ac08..0a3e5c2aac4b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -789,6 +789,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, *new; struct inode *inode; unsigned int hashval; + bool retry = true; /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, @@ -824,6 +825,11 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + if (!retry) { + status = nfserr_jukebox; + goto out; + } + retry = false; nfsd_file_put_noref(nf); goto retry; } From 36ebbdb96b694dd9c6b25ad98f2bbd263d022b63 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:04 -0500 Subject: [PATCH 195/658] nfsd: cleanup nfsd_file_lru_dispose() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0a3e5c2aac4b..c048e3071db7 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -256,8 +256,6 @@ nfsd_file_do_unhash(struct nfsd_file *nf) nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; hlist_del_rcu(&nf->nf_node); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); atomic_long_dec(&nfsd_filecache_count); } @@ -266,6 +264,8 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); + if (!list_empty(&nf->nf_lru)) + list_lru_del(&nfsd_file_lru, &nf->nf_lru); return true; } return false; @@ -402,15 +402,14 @@ out_skip: static void nfsd_file_lru_dispose(struct list_head *head) { - while(!list_empty(head)) { - struct nfsd_file *nf = list_first_entry(head, - struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + struct nfsd_file *nf; + + list_for_each_entry(nf, head, nf_lru) { spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); nfsd_file_do_unhash(nf); spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_put_noref(nf); } + nfsd_file_dispose_list(head); } static unsigned long From 9542e6a643fc69d528dfb3303f145719c61d3050 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:05 -0500 Subject: [PATCH 196/658] nfsd: Containerise filecache laundrette Ensure that if the filecache laundrette gets stuck, it only affects the knfsd instances of one container. The notifier callbacks can be called from various contexts so avoid using synchonous filesystem operations that might deadlock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 238 ++++++++++++++++++++++++++++++++++++-------- fs/nfsd/filecache.h | 2 + fs/nfsd/nfssvc.c | 9 +- 3 files changed, 207 insertions(+), 42 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c048e3071db7..e71af553c2ed 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -44,6 +44,17 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +struct nfsd_fcache_disposal { + struct list_head list; + struct work_struct work; + struct net *net; + spinlock_t lock; + struct list_head freeme; + struct rcu_head rcu; +}; + +struct workqueue_struct *nfsd_filecache_wq __read_mostly; + static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct nfsd_fcache_bucket *nfsd_file_hashtbl; @@ -52,32 +63,21 @@ static long nfsd_file_lru_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static DEFINE_SPINLOCK(laundrette_lock); +static LIST_HEAD(laundrettes); -enum nfsd_file_laundrette_ctl { - NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, - NFSD_FILE_LAUNDRETTE_MAY_FLUSH -}; +static void nfsd_file_gc(void); static void -nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) +nfsd_file_schedule_laundrette(void) { long count = atomic_long_read(&nfsd_filecache_count); if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) return; - /* Be more aggressive about scanning if over the threshold */ - if (count > NFSD_FILE_LRU_THRESHOLD) - mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); - else - schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); - - if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) - return; - - /* ...and don't delay flushing if we're out of control */ - if (count >= NFSD_FILE_LRU_LIMIT) - flush_delayed_work(&nfsd_filecache_laundrette); + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -312,7 +312,9 @@ nfsd_file_put(struct nfsd_file *nf) set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); + nfsd_file_schedule_laundrette(); + if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) + nfsd_file_gc(); } struct nfsd_file * @@ -353,6 +355,58 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) flush_delayed_fput(); } +static void +nfsd_file_list_remove_disposal(struct list_head *dst, + struct nfsd_fcache_disposal *l) +{ + spin_lock(&l->lock); + list_splice_init(&l->freeme, dst); + spin_unlock(&l->lock); +} + +static void +nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net == net) { + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); + break; + } + } + rcu_read_unlock(); +} + +static void +nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, + struct net *net) +{ + struct nfsd_file *nf, *tmp; + + list_for_each_entry_safe(nf, tmp, src, nf_lru) { + if (nf->nf_net == net) + list_move_tail(&nf->nf_lru, dst); + } +} + +static void +nfsd_file_dispose_list_delayed(struct list_head *dispose) +{ + LIST_HEAD(list); + struct nfsd_file *nf; + + while(!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); + nfsd_file_list_add_disposal(&list, nf->nf_net); + } +} + /* * Note this can deadlock with nfsd_file_cache_purge. */ @@ -399,17 +453,40 @@ out_skip: return LRU_SKIP; } -static void -nfsd_file_lru_dispose(struct list_head *head) +static unsigned long +nfsd_file_lru_walk_list(struct shrink_control *sc) { + LIST_HEAD(head); struct nfsd_file *nf; + unsigned long ret; - list_for_each_entry(nf, head, nf_lru) { + if (sc) + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &head); + else + ret = list_lru_walk(&nfsd_file_lru, + nfsd_file_lru_cb, + &head, LONG_MAX); + list_for_each_entry(nf, &head, nf_lru) { spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); nfsd_file_do_unhash(nf); spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); } - nfsd_file_dispose_list(head); + nfsd_file_dispose_list_delayed(&head); + return ret; +} + +static void +nfsd_file_gc(void) +{ + nfsd_file_lru_walk_list(NULL); +} + +static void +nfsd_file_gc_worker(struct work_struct *work) +{ + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -421,12 +498,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - LIST_HEAD(head); - unsigned long ret; - - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); - nfsd_file_lru_dispose(&head); - return ret; + return nfsd_file_lru_walk_list(sc); } static struct shrinker nfsd_file_shrinker = { @@ -488,7 +560,7 @@ nfsd_file_close_inode(struct inode *inode) __nfsd_file_close_inode(inode, hashval, &dispose); trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); - nfsd_file_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** @@ -504,16 +576,11 @@ static void nfsd_file_delayed_close(struct work_struct *work) { LIST_HEAD(head); + struct nfsd_fcache_disposal *l = container_of(work, + struct nfsd_fcache_disposal, work); - list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); - - if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); - - if (!list_empty(&head)) { - nfsd_file_lru_dispose(&head); - flush_delayed_fput(); - } + nfsd_file_list_remove_disposal(&head, l); + nfsd_file_dispose_list(&head); } static int @@ -574,6 +641,10 @@ nfsd_file_cache_init(void) if (nfsd_file_hashtbl) return 0; + nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); + if (!nfsd_filecache_wq) + goto out; + nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { @@ -627,7 +698,7 @@ nfsd_file_cache_init(void) spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; out_notifier: @@ -643,6 +714,8 @@ out_err: nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; goto out; } @@ -681,6 +754,88 @@ nfsd_file_cache_purge(struct net *net) } } +static struct nfsd_fcache_disposal * +nfsd_alloc_fcache_disposal(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = kmalloc(sizeof(*l), GFP_KERNEL); + if (!l) + return NULL; + INIT_WORK(&l->work, nfsd_file_delayed_close); + l->net = net; + spin_lock_init(&l->lock); + INIT_LIST_HEAD(&l->freeme); + return l; +} + +static void +nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + rcu_assign_pointer(l->net, NULL); + cancel_work_sync(&l->work); + nfsd_file_dispose_list(&l->freeme); + kfree_rcu(l, rcu); +} + +static void +nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_add_tail_rcu(&l->list, &laundrettes); + spin_unlock(&laundrette_lock); +} + +static void +nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_del_rcu(&l->list); + spin_unlock(&laundrette_lock); +} + +static int +nfsd_alloc_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = nfsd_alloc_fcache_disposal(net); + if (!l) + return -ENOMEM; + nfsd_add_fcache_disposal(l); + return 0; +} + +static void +nfsd_free_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net != net) + continue; + nfsd_del_fcache_disposal(l); + rcu_read_unlock(); + nfsd_free_fcache_disposal(l); + return; + } + rcu_read_unlock(); +} + +int +nfsd_file_cache_start_net(struct net *net) +{ + return nfsd_alloc_fcache_disposal_net(net); +} + +void +nfsd_file_cache_shutdown_net(struct net *net) +{ + nfsd_file_cache_purge(net); + nfsd_free_fcache_disposal_net(net); +} + void nfsd_file_cache_shutdown(void) { @@ -705,6 +860,8 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; } static bool @@ -872,7 +1029,8 @@ open_file: nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, nfsd_file_hashtbl[hashval].nfb_count); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - atomic_long_inc(&nfsd_filecache_count); + if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) + nfsd_file_gc(); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 851d9abf54c2..79a7d6808d97 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -51,6 +51,8 @@ struct nfsd_file { int nfsd_file_cache_init(void); void nfsd_file_cache_purge(struct net *); void nfsd_file_cache_shutdown(void); +int nfsd_file_cache_start_net(struct net *net); +void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7900f3494ecb..3b77b904212d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -400,13 +400,18 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre nn->lockd_up = true; } - ret = nfs4_state_start_net(net); + ret = nfsd_file_cache_start_net(net); if (ret) goto out_lockd; + ret = nfs4_state_start_net(net); + if (ret) + goto out_filecache; nn->nfsd_net_up = true; return 0; +out_filecache: + nfsd_file_cache_shutdown_net(net); out_lockd: if (nn->lockd_up) { lockd_down(net); @@ -421,7 +426,7 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_purge(net); + nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); From bd6e1cece834f1b1322c85269b93379f8092077f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:06 -0500 Subject: [PATCH 197/658] nfsd: Remove unused constant NFSD_FILE_LRU_RESCAN Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e71af553c2ed..6b0ab43b0618 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -27,7 +27,6 @@ #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_LRU_RESCAN (0) #define NFSD_FILE_SHUTDOWN (1) #define NFSD_FILE_LRU_THRESHOLD (4096UL) #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) @@ -440,15 +439,13 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, goto out_skip; if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_rescan; + goto out_skip; if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) goto out_skip; list_lru_isolate_move(lru, &nf->nf_lru, head); return LRU_REMOVED; -out_rescan: - set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); out_skip: return LRU_SKIP; } From 55f84cc47f73bdc84ef73b702c23051b426505a4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:07 -0500 Subject: [PATCH 198/658] nfsd: Schedule the laundrette regularly irrespective of file errors Emsure we schedule the laundrette even if the struct file is carrying file errors. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6b0ab43b0618..4cef03a7726c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -237,13 +237,6 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } -static bool -nfsd_file_in_use(struct nfsd_file *nf) -{ - return nfsd_file_check_writeback(nf) || - nfsd_file_check_write_error(nf); -} - static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -307,10 +300,9 @@ void nfsd_file_put(struct nfsd_file *nf) { bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - bool unused = !nfsd_file_in_use(nf); set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) + if (nfsd_file_put_noref(nf) == 1 && is_hashed) nfsd_file_schedule_laundrette(); if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From b6669305d35a3459877afef6e9f68e81bef92972 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:08 -0500 Subject: [PATCH 199/658] nfsd: Reduce the number of calls to nfsd_file_gc() Don't call nfsd_file_gc() on every put of the reference in nfsd_file_put(). Instead, do it only when we're expecting the refcount to go to 1. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 4cef03a7726c..9c2b29e07975 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -282,27 +282,32 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp return true; } -static int +static void nfsd_file_put_noref(struct nfsd_file *nf) { - int count; trace_nfsd_file_put(nf); - count = atomic_dec_return(&nf->nf_ref); - if (!count) { + if (atomic_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); nfsd_file_free(nf); } - return count; } void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; + bool is_hashed; set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (nfsd_file_put_noref(nf) == 1 && is_hashed) + if (atomic_read(&nf->nf_ref) > 2 || !nf->nf_file) { + nfsd_file_put_noref(nf); + return; + } + + filemap_flush(nf->nf_file->f_mapping); + is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; + nfsd_file_put_noref(nf); + if (is_hashed) nfsd_file_schedule_laundrette(); if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From 90d2f1da832fd23290ef0c0d964d97501e5e8553 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:20:47 -0500 Subject: [PATCH 200/658] nfsd: Fix a soft lockup race in nfsd_file_mark_find_or_create() If nfsd_file_mark_find_or_create() keeps winning the race for the nfsd_file_fsnotify_group->mark_mutex against nfsd_file_mark_put() then it can soft lock up, since fsnotify_add_inode_mark() ends up always finding an existing entry. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9c2b29e07975..f275c11c4e28 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -132,9 +132,13 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) struct nfsd_file_mark, nfm_mark)); mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); - fsnotify_put_mark(mark); - if (likely(nfm)) + if (nfm) { + fsnotify_put_mark(mark); break; + } + /* Avoid soft lockup race with nfsd_file_mark_put() */ + fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); + fsnotify_put_mark(mark); } else mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); From 16f8f894108270fd99667f06bae6ebcdd0f4e4bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:29 -0500 Subject: [PATCH 201/658] nfsd: Allow nfsd_vfs_write() to take the nfsd_file as an argument Needed in order to fix stable writes. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/vfs.c | 5 +++-- fs/nfsd/vfs.h | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 1e14b3ed5674..a2baf538473c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1021,7 +1021,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &write->wr_head, write->wr_buflen); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file, + status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, write->wr_offset, rqstp->rq_vec, nvecs, &cnt, write->wr_how_written); nfsd_file_put(nf); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3aeffb26fca5..e1ffefab2552 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -947,10 +947,11 @@ static int wait_for_concurrent_writes(struct file *file) } __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int stable) { + struct file *file = nf->nf_file; struct svc_export *exp; struct iov_iter iter; __be32 nfserr; @@ -1057,7 +1058,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec, + err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, vlen, cnt, stable); nfsd_file_put(nf); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index bbb485177b25..0174e957b27e 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -34,6 +34,8 @@ #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) +struct nfsd_file; + /* * Callback function for readdir */ @@ -93,7 +95,7 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, int); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, + struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int stable); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, From 5011af4c698a95b4dc4e374b1ce3acd122a9170f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:30 -0500 Subject: [PATCH 202/658] nfsd: Fix stable writes Strictly speaking, a stable write error needs to reflect the write + the commit of that write (and only that write). To ensure that we don't pick up the write errors from other writebacks, add a rw_semaphore to provide exclusion. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 1 + fs/nfsd/filecache.h | 1 + fs/nfsd/vfs.c | 18 ++++++++++++++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f275c11c4e28..2fadf080ac42 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -195,6 +195,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); } nf->nf_mark = NULL; + init_rwsem(&nf->nf_rwsem); trace_nfsd_file_alloc(nf); } return nf; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 79a7d6808d97..986c325a54bd 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -46,6 +46,7 @@ struct nfsd_file { atomic_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; + struct rw_semaphore nf_rwsem; }; int nfsd_file_cache_init(void); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e1ffefab2552..4652854f3dd5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -982,7 +982,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, flags |= RWF_SYNC; iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); - host_err = vfs_iter_write(file, &iter, &pos, flags); + if (flags & RWF_SYNC) { + down_write(&nf->nf_rwsem); + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); + up_write(&nf->nf_rwsem); + } else { + down_read(&nf->nf_rwsem); + host_err = vfs_iter_write(file, &iter, &pos, flags); + up_read(&nf->nf_rwsem); + } if (host_err < 0) goto out_nfserr; *cnt = host_err; @@ -1097,8 +1108,10 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { - int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + int err2; + down_write(&nf->nf_rwsem); + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: break; @@ -1110,6 +1123,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); } + up_write(&nf->nf_rwsem); } nfsd_file_put(nf); From 7bf94c6ba9fb291bd28fac3228553cb305bfc92a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:31 -0500 Subject: [PATCH 203/658] nfsd: Update the boot verifier on stable writes too. We don't know if the error returned by the fsync() call is exclusive to the data written by the stable write, so play it safe. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4652854f3dd5..0a048dfb68ec 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -994,8 +994,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, host_err = vfs_iter_write(file, &iter, &pos, flags); up_read(&nf->nf_rwsem); } - if (host_err < 0) + if (host_err < 0) { + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); goto out_nfserr; + } *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); From b66ae6dd0c30c750cbc5c633dea08712203abc03 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:32 -0500 Subject: [PATCH 204/658] nfsd: Pass the nfsd_file as arguments to nfsd4_clone_file_range() Needed in order to fix exclusion w.r.t. writes. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/vfs.c | 6 ++++-- fs/nfsd/vfs.h | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a2baf538473c..634f32d847e0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1085,8 +1085,8 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos, - dst->nf_file, clone->cl_dst_pos, clone->cl_count, + status = nfsd4_clone_file_range(src, clone->cl_src_pos, + dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); nfsd_file_put(dst); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0a048dfb68ec..7950f2ea1d95 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -530,9 +530,11 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif -__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, - u64 dst_pos, u64 count, bool sync) +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) { + struct file *src = nf_src->nf_file; + struct file *dst = nf_dst->nf_file; loff_t cloned; cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 0174e957b27e..a91cd7982885 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -57,8 +57,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct file *, u64, struct file *, - u64, u64, bool); +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, + u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, From 1b28d756b2eaee03e838390465754e431389e483 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:33 -0500 Subject: [PATCH 205/658] nfsd: Ensure exclusion between CLONE and WRITE errors Ensure that we can distinguish between synchronous CLONE and WRITE errors, and that we can assign them correctly. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7950f2ea1d95..126149b06463 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -536,22 +536,33 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; loff_t cloned; + __be32 ret = 0; + down_write(&nf_dst->nf_rwsem); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); - if (cloned < 0) - return nfserrno(cloned); - if (count && cloned != count) - return nfserrno(-EINVAL); + if (cloned < 0) { + ret = nfserrno(cloned); + goto out_err; + } + if (count && cloned != count) { + ret = nfserrno(-EINVAL); + goto out_err; + } if (sync) { loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); if (!status) status = commit_inode_metadata(file_inode(src)); - if (status < 0) - return nfserrno(status); + if (status < 0) { + nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, + nfsd_net_id)); + ret = nfserrno(status); + } } - return 0; +out_err: + up_write(&nf_dst->nf_rwsem); + return ret; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, From 1d82163714c16ebe09c7a8c9cd3cef7abcc16208 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:34 -0500 Subject: [PATCH 206/658] sunrpc: Fix potential leaks in sunrpc_cache_unhash() When we unhash the cache entry, we need to handle any pending upcalls by calling cache_fresh_unlocked(). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f740cb51802a..7ede1e52fd81 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1888,7 +1888,9 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) if (!hlist_unhashed(&h->cache_list)){ hlist_del_init_rcu(&h->cache_list); cd->entries--; + set_bit(CACHE_CLEANED, &h->flags); spin_unlock(&cd->hash_lock); + cache_fresh_unlocked(h, cd); cache_put(h, cd); } else spin_unlock(&cd->hash_lock); From 809fe3c533789ebb51c1873b80c7dafbad9dbf19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:35 -0500 Subject: [PATCH 207/658] sunrpc: clean up cache entry add/remove from hashtable Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 50 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7ede1e52fd81..52d927210d32 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -77,6 +77,22 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, return NULL; } +static void sunrpc_begin_cache_remove_entry(struct cache_head *ch, + struct cache_detail *cd) +{ + /* Must be called under cd->hash_lock */ + hlist_del_init_rcu(&ch->cache_list); + set_bit(CACHE_CLEANED, &ch->flags); + cd->entries --; +} + +static void sunrpc_end_cache_remove_entry(struct cache_head *ch, + struct cache_detail *cd) +{ + cache_fresh_unlocked(ch, cd); + cache_put(ch, cd); +} + static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, struct cache_head *key, int hash) @@ -100,8 +116,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, hlist_for_each_entry_rcu(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) { - hlist_del_init_rcu(&tmp->cache_list); - detail->entries --; + sunrpc_begin_cache_remove_entry(tmp, detail); freeme = tmp; break; } @@ -117,10 +132,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, cache_get(new); spin_unlock(&detail->hash_lock); - if (freeme) { - cache_fresh_unlocked(freeme, detail); - cache_put(freeme, detail); - } + if (freeme) + sunrpc_end_cache_remove_entry(freeme, detail); return new; } @@ -454,8 +467,7 @@ static int cache_clean(void) if (!cache_is_expired(current_detail, ch)) continue; - hlist_del_init_rcu(&ch->cache_list); - current_detail->entries--; + sunrpc_begin_cache_remove_entry(ch, current_detail); rv = 1; break; } @@ -465,11 +477,8 @@ static int cache_clean(void) if (!ch) current_index ++; spin_unlock(&cache_list_lock); - if (ch) { - set_bit(CACHE_CLEANED, &ch->flags); - cache_fresh_unlocked(ch, d); - cache_put(ch, d); - } + if (ch) + sunrpc_end_cache_remove_entry(ch, d); } else spin_unlock(&cache_list_lock); @@ -525,13 +534,9 @@ void cache_purge(struct cache_detail *detail) for (i = 0; i < detail->hash_size; i++) { head = &detail->hash_table[i]; hlist_for_each_entry_safe(ch, tmp, head, cache_list) { - hlist_del_init_rcu(&ch->cache_list); - detail->entries--; - - set_bit(CACHE_CLEANED, &ch->flags); + sunrpc_begin_cache_remove_entry(ch, detail); spin_unlock(&detail->hash_lock); - cache_fresh_unlocked(ch, detail); - cache_put(ch, detail); + sunrpc_end_cache_remove_entry(ch, detail); spin_lock(&detail->hash_lock); } } @@ -1886,12 +1891,9 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) { spin_lock(&cd->hash_lock); if (!hlist_unhashed(&h->cache_list)){ - hlist_del_init_rcu(&h->cache_list); - cd->entries--; - set_bit(CACHE_CLEANED, &h->flags); + sunrpc_begin_cache_remove_entry(h, cd); spin_unlock(&cd->hash_lock); - cache_fresh_unlocked(h, cd); - cache_put(h, cd); + sunrpc_end_cache_remove_entry(h, cd); } else spin_unlock(&cd->hash_lock); } From 524ff1af226ce0f6348d48b413849b2bd16c5b60 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:36 -0500 Subject: [PATCH 208/658] nfsd: Ensure sampling of the commit verifier is atomic with the commit When we have a successful commit, ensure we sample the commit verifier before releasing the lock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 3 ++- fs/nfsd/nfs3xdr.c | 8 ++------ fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/vfs.c | 8 ++++++-- fs/nfsd/vfs.h | 2 +- fs/nfsd/xdr3.h | 1 + 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index cea68d8411ac..ffdc592868a6 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -683,7 +683,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_inval); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count); + nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, + resp->verf); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index c997b710af27..1ae4fc2eec00 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1121,16 +1121,12 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_commitres *resp = rqstp->rq_resp; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - __be32 verf[2]; p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - /* unique identifier, y2038 overflow can be ignored */ - nfsd_copy_boot_verifier(verf, nn); - *p++ = verf[0]; - *p++ = verf[1]; + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 634f32d847e0..c3455e1b0d50 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -590,9 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_commit *commit = &u->commit; - gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, - commit->co_count); + commit->co_count, + (__be32 *)commit->co_verf.data); } static __be32 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 126149b06463..dc07d9797f11 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1105,7 +1105,7 @@ out: */ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count) + loff_t offset, unsigned long count, __be32 *verf) { struct nfsd_file *nf; loff_t end = LLONG_MAX; @@ -1130,6 +1130,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); break; case -EINVAL: err = nfserr_notsupp; @@ -1140,7 +1142,9 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_net_id)); } up_write(&nf->nf_rwsem); - } + } else + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); nfsd_file_put(nf); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a91cd7982885..d23d9daacc3d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -74,7 +74,7 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, struct svc_fh *res, int createmode, u32 *verifier, bool *truncp, bool *created); __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, - loff_t, unsigned long); + loff_t, unsigned long, __be32 *verf); #endif /* CONFIG_NFSD_V3 */ int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 0fa12988fb6a..fe6fd4556c2e 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -223,6 +223,7 @@ struct nfsd3_pathconfres { struct nfsd3_commitres { __be32 status; struct svc_fh fh; + __be32 verf[2]; }; struct nfsd3_getaclres { From 19e0663ff9bce2efb87be8b30f9e46b7843600f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:37 -0500 Subject: [PATCH 209/658] nfsd: Ensure sampling of the write verifier is atomic with the write When doing an unstable write, we need to ensure that we sample the write verifier before releasing the lock, and allowing a commit to the same file to proceed. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs3xdr.c | 8 ++------ fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfsproc.c | 2 +- fs/nfsd/vfs.c | 12 +++++++++--- fs/nfsd/vfs.h | 5 +++-- fs/nfsd/xdr3.h | 1 + 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index ffdc592868a6..288bc76b4574 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -203,7 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, nvecs, &cnt, - resp->committed); + resp->committed, resp->verf); resp->count = cnt; RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1ae4fc2eec00..aae514d40b64 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -747,17 +747,13 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_writeres *resp = rqstp->rq_resp; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - __be32 verf[2]; p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - /* unique identifier, y2038 overflow can be ignored */ - nfsd_copy_boot_verifier(verf, nn); - *p++ = verf[0]; - *p++ = verf[1]; + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c3455e1b0d50..e4ddaf87493a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1015,7 +1015,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } write->wr_how_written = write->wr_stable_how; - gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, &write->wr_head, write->wr_buflen); @@ -1023,7 +1022,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - write->wr_how_written); + write->wr_how_written, + (__be32 *)write->wr_verifier.data); nfsd_file_put(nf); write->wr_bytes_written = cnt; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index b25c90be29fb..543bbe0a556e 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -226,7 +226,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) return nfserr_io; nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC); + &cnt, NFS_DATA_SYNC, NULL); return nfsd_return_attrs(nfserr, resp); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index dc07d9797f11..0aa02eb18bd3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -962,7 +962,8 @@ static int wait_for_concurrent_writes(struct file *file) __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable) + unsigned long *cnt, int stable, + __be32 *verf) { struct file *file = nf->nf_file; struct svc_export *exp; @@ -1004,6 +1005,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, up_write(&nf->nf_rwsem); } else { down_read(&nf->nf_rwsem); + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); host_err = vfs_iter_write(file, &iter, &pos, flags); up_read(&nf->nf_rwsem); } @@ -1074,7 +1079,8 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable) + struct kvec *vec, int vlen, unsigned long *cnt, int stable, + __be32 *verf) { struct nfsd_file *nf; __be32 err; @@ -1086,7 +1092,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, goto out; err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, - vlen, cnt, stable); + vlen, cnt, stable, verf); nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index d23d9daacc3d..3eb660ad80d1 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -94,11 +94,12 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, u32 *eof); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, - struct kvec *, int, unsigned long *, int); + struct kvec *, int, unsigned long *, + int stable, __be32 *verf); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, - int stable); + int stable, __be32 *verf); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index fe6fd4556c2e..4155fd71714c 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -159,6 +159,7 @@ struct nfsd3_writeres { struct svc_fh fh; unsigned long count; int committed; + __be32 verf[2]; }; struct nfsd3_renameres { From 36a1707afda9abc704543d6b419a998c64df41ca Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 13 Jan 2020 21:27:56 -0600 Subject: [PATCH 210/658] drm/amdgpu: modify packet size for pm4 flush tlbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] PM4 packet size for flush message was oversized. [How] Packet size adjusted to allocate flush + fence packets. Signed-off-by: Alex Sierra Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 874f641de281..a0d4e79c4821 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -368,7 +368,7 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, - .invalidate_tlbs_size = 12, + .invalidate_tlbs_size = 2, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46ab46757b25..6c02fe62b497 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -859,7 +859,7 @@ static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, - .invalidate_tlbs_size = 12, + .invalidate_tlbs_size = 2, }; static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index bbede09983e1..86f4ffe408e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -418,7 +418,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (amdgpu_emu_mode == 0 && ring->sched.ready) { spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + /* 2 dwords flush + 8 dwords fence */ + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); amdgpu_fence_emit_polling(ring, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 40a496804356..54bdc1786ab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -578,7 +578,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (ring->sched.ready) { spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + /* 2 dwords flush + 8 dwords fence */ + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); amdgpu_fence_emit_polling(ring, &seq); From a6c44d2538c469f412c3fded0de2290494d762d7 Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 17 Jan 2020 12:18:00 +0800 Subject: [PATCH 211/658] drm/amdgpu: added support to get mGPU DRAM base resolves issue with RAS error injection in mGPU configuration Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 +++++++ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 59 ++++++++++++++++++- .../amd/include/asic_reg/df/df_3_6_offset.h | 3 + .../amd/include/asic_reg/df/df_3_6_sh_mask.h | 8 +++ 5 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 61a26c15c8dd..057f6ea645d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -52,6 +52,9 @@ struct amdgpu_df_funcs { uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, uint32_t ficadl_val, uint32_t ficadh_val); + uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev, + uint32_t df_inst); + uint32_t (*get_df_inst_id)(struct amdgpu_device *adev); }; struct amdgpu_df { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 766be7f18282..cef94e2169fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -742,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, return 0; } +uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) +{ + uint32_t df_inst_id; + + if ((!adev->df.funcs) || + (!adev->df.funcs->get_df_inst_id) || + (!adev->df.funcs->get_dram_base_addr)) + return addr; + + df_inst_id = adev->df.funcs->get_df_inst_id(adev); + + return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id); +} + /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info) @@ -759,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; + /* Calculate XGMI relative offset */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + block_info.address = get_xgmi_relative_phy_addr(adev, + block_info.address); + } + switch (info->head.block) { case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->ras_error_inject) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index f51326598a8c..5a1bd8ed1a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -27,6 +27,9 @@ #include "df/df_3_6_offset.h" #include "df/df_3_6_sh_mask.h" +#define DF_3_6_SMN_REG_INST_DIST 0x8 +#define DF_3_6_INST_CNT 8 + static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; @@ -683,6 +686,58 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, } } +static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev, + uint32_t df_inst) +{ + uint32_t base_addr_reg_val = 0; + uint64_t base_addr = 0; + + base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 + + df_inst * DF_3_6_SMN_REG_INST_DIST); + + if (REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + AddrRngVal) == 0) { + DRM_WARN("address range not valid"); + return 0; + } + + base_addr = REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + DramBaseAddr); + + return base_addr << 28; +} + +static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev) +{ + uint32_t xgmi_node_id = 0; + uint32_t df_inst_id = 0; + + /* Walk through DF dst nodes to find current XGMI node */ + for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) { + + xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 + + df_inst_id * DF_3_6_SMN_REG_INST_DIST); + xgmi_node_id = REG_GET_FIELD(xgmi_node_id, + DF_CS_UMC_AON0_DramLimitAddress0, + DstFabricID); + + /* TODO: establish reason dest fabric id is offset by 7 */ + xgmi_node_id = xgmi_node_id >> 7; + + if (adev->gmc.xgmi.physical_node_id == xgmi_node_id) + break; + } + + if (df_inst_id == DF_3_6_INST_CNT) { + DRM_WARN("cant match df dst id with gpu node"); + return 0; + } + + return df_inst_id; +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, .sw_fini = df_v3_6_sw_fini, @@ -696,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .pmc_stop = df_v3_6_pmc_stop, .pmc_get_count = df_v3_6_pmc_get_count, .get_fica = df_v3_6_get_fica, - .set_fica = df_v3_6_set_fica + .set_fica = df_v3_6_set_fica, + .get_dram_base_addr = df_v3_6_get_dram_base_addr, + .get_df_inst_id = df_v3_6_get_df_inst_id }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index 87c84691b5be..bb2c9c7a18df 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -71,4 +71,7 @@ #define smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3 0x1d098UL #define smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3 0x1d09cUL +#define smnDF_CS_UMC_AON0_DramBaseAddress0 0x1c110UL +#define smnDF_CS_UMC_AON0_DramLimitAddress0 0x1c114UL + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h index 65e9f756e86e..7afa87c7ff54 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h @@ -53,4 +53,12 @@ #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000E00L #define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +//DF_CS_UMC_AON0_DramLimitAddress0 +#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID__SHIFT 0x0 +#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO__SHIFT 0xa +#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr__SHIFT 0xc +#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID_MASK 0x000003FFL +#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO_MASK 0x00000400L +#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr_MASK 0xFFFFF000L + #endif From d33a99c4b63643d3d7a85d4dfb2ac5afb01eebca Mon Sep 17 00:00:00 2001 From: chen gong Date: Wed, 15 Jan 2020 14:32:00 +0800 Subject: [PATCH 212/658] drm/amdgpu: provide a generic function interface for reading/writing register by KIQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move amdgpu_virt_kiq_rreg/amdgpu_virt_kiq_wreg function to amdgpu_gfx.c, and rename them to amdgpu_kiq_rreg/amdgpu_kiq_wreg.Make it generic and flexible. Signed-off-by: chen gong Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 96 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 92 --------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 - drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +- 8 files changed, 108 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 53d882000101..afdea8f594ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -217,7 +217,7 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t ret; if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - return amdgpu_virt_kiq_rreg(adev, reg); + return amdgpu_kiq_rreg(adev, reg); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -295,7 +295,7 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, } if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - return amdgpu_virt_kiq_wreg(adev, reg, v); + return amdgpu_kiq_wreg(adev, reg, v); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b88b8b82bb64..0f960b498792 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -296,7 +296,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); + r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); if (r) return r; @@ -321,7 +321,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -658,3 +658,95 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_rreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_rreg(ring, reg); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_read; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + return adev->wb.wb[kiq->reg_val_offs]; + +failed_kiq_read: + pr_err("failed to read reg:%x\n", reg); + return ~0; +} + +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_wreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_wreg(ring, reg, v); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_write; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_kiq_write: + pr_err("failed to write reg:%x\n", reg); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index af4bd279f42f..ca17ffb01301 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -94,6 +94,7 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; + uint32_t reg_val_offs; }; /* @@ -375,4 +376,6 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 103033f96f13..adc813cde8e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -45,98 +45,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->pg_flags = 0; } -uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - BUG_ON(!ring->funcs->emit_rreg); - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for gpu reset case because this way may - * block gpu_recover() routine forever, e.g. this virt_kiq_rreg - * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will - * never return if we keep waiting in virt_kiq_rreg, which cause - * gpu_recover() hang there. - * - * also don't wait anymore for IRQ context - * */ - if (r < 1 && (adev->in_gpu_reset || in_interrupt())) - goto failed_kiq_read; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq_read; - - return adev->wb.wb[adev->virt.reg_val_offs]; - -failed_kiq_read: - pr_err("failed to read reg:%x\n", reg); - return ~0; -} - -void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - BUG_ON(!ring->funcs->emit_wreg); - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for gpu reset case because this way may - * block gpu_recover() routine forever, e.g. this virt_kiq_rreg - * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will - * never return if we keep waiting in virt_kiq_rreg, which cause - * gpu_recover() hang there. - * - * also don't wait anymore for IRQ context - * */ - if (r < 1 && (adev->in_gpu_reset || in_interrupt())) - goto failed_kiq_write; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq_write; - - return; - -failed_kiq_write: - pr_err("failed to write reg:%x\n", reg); -} - void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 4d1ac7612967..daaf909d009a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -287,8 +287,6 @@ static inline bool is_virtual_machine(void) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); -uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, uint32_t ref, uint32_t mask); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a0d4e79c4821..65885ba92a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4737,6 +4737,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -4745,9 +4746,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 46f0533ba43f..fa245973de12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6449,6 +6449,7 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -6457,9 +6458,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6c02fe62b497..d700f1fa387c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5213,6 +5213,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -5221,9 +5222,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, From c68dbcd8f9b0aa01e0459a5841716cb6a2f447b9 Mon Sep 17 00:00:00 2001 From: chen gong Date: Wed, 15 Jan 2020 14:44:43 +0800 Subject: [PATCH 213/658] drm/amdgpu: add kiq version interface for RREG32/WREG32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading some registers by mmio will result in hang when GPU is in "gfxoff" state.This problem can be solved by GPU in "ring command packages" way. Signed-off-by: chen gong Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b1bb10625cd9..da3bcff61b97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1009,10 +1009,14 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define AMDGPU_REGS_IDX (1<<0) #define AMDGPU_REGS_NO_KIQ (1<<1) +#define AMDGPU_REGS_KIQ (1<<2) #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ) +#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ) + #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index afdea8f594ae..64b1b2d2d19b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -216,7 +216,7 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) + if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) return amdgpu_kiq_rreg(adev, reg); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) @@ -294,7 +294,7 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, adev->last_mm_index = v; } - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) + if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) return amdgpu_kiq_wreg(adev, reg, v); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) From e3cd03603dbe0e972196f12db75b6917b4db049c Mon Sep 17 00:00:00 2001 From: chen gong Date: Tue, 14 Jan 2020 14:28:48 +0800 Subject: [PATCH 214/658] drm/amdgpu: read gfx register using RREG32_KIQ macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading CP_MEM_SLP_CNTL register with RREG32_SOC15 macro will lead to hang when GPU is in "gfxoff" state. I do a uniform substitution here. Signed-off-by: chen gong Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d700f1fa387c..fbf4ea3e0cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4717,12 +4717,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags = 0; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -4731,18 +4731,18 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; if (adev->asic_type != CHIP_ARCTURUS) { /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; From f519cd13c23e8c884011295b205c1463babbf416 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Mon, 13 Jan 2020 16:15:38 +0800 Subject: [PATCH 215/658] drm/amdgpu: update mmhub 9.4.1 header files for Acrturus Add mask & shift definition of MAM_D(0~3)MEM for all mmhub ranges. Signed-off-by: Dennis Li Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../asic_reg/mmhub/mmhub_9_4_1_sh_mask.h | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h index 40dfbf16bd34..111a71b434e2 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h @@ -11185,6 +11185,14 @@ #define MMEA0_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA0_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA0_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA0_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA0_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA0_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA0_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA0_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA0_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA0_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA0_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA0_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA0_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA0_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -11193,6 +11201,14 @@ #define MMEA0_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA0_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA0_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA0_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA0_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA0_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA0_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA0_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA0_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA0_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA0_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA0_DSM_CNTL #define MMEA0_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA0_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -14197,6 +14213,14 @@ #define MMEA1_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA1_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA1_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA1_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA1_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA1_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA1_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA1_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA1_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA1_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA1_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA1_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA1_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA1_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -14205,6 +14229,14 @@ #define MMEA1_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA1_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA1_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA1_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA1_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA1_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA1_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA1_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA1_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA1_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA1_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA1_DSM_CNTL #define MMEA1_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA1_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -17209,6 +17241,14 @@ #define MMEA2_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA2_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA2_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA2_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA2_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA2_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA2_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA2_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA2_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA2_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA2_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA2_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA2_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA2_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -17217,6 +17257,14 @@ #define MMEA2_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA2_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA2_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA2_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA2_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA2_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA2_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA2_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA2_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA2_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA2_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA2_DSM_CNTL #define MMEA2_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA2_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -20221,6 +20269,14 @@ #define MMEA3_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA3_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA3_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA3_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA3_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA3_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA3_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA3_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA3_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA3_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA3_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA3_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA3_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA3_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -20229,6 +20285,14 @@ #define MMEA3_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA3_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA3_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA3_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA3_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA3_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA3_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA3_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA3_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA3_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA3_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA3_DSM_CNTL #define MMEA3_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA3_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -23233,6 +23297,14 @@ #define MMEA4_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA4_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA4_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA4_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA4_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA4_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA4_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA4_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA4_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA4_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA4_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA4_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA4_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA4_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -23241,6 +23313,14 @@ #define MMEA4_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA4_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA4_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA4_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA4_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA4_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA4_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA4_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA4_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA4_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA4_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA4_DSM_CNTL #define MMEA4_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA4_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -34952,6 +35032,14 @@ #define MMEA5_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA5_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA5_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA5_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA5_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA5_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA5_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA5_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA5_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA5_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA5_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA5_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA5_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA5_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -34960,6 +35048,14 @@ #define MMEA5_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA5_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA5_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA5_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA5_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA5_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA5_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA5_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA5_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA5_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA5_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA5_DSM_CNTL #define MMEA5_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA5_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -37964,6 +38060,14 @@ #define MMEA6_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA6_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA6_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA6_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA6_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA6_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA6_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA6_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA6_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA6_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA6_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA6_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA6_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA6_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -37972,6 +38076,14 @@ #define MMEA6_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA6_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA6_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA6_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA6_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA6_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA6_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA6_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA6_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA6_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA6_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA6_DSM_CNTL #define MMEA6_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA6_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 @@ -40976,6 +41088,14 @@ #define MMEA7_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa #define MMEA7_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc #define MMEA7_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define MMEA7_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define MMEA7_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define MMEA7_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define MMEA7_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define MMEA7_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define MMEA7_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define MMEA7_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define MMEA7_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e #define MMEA7_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L #define MMEA7_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL #define MMEA7_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L @@ -40984,6 +41104,14 @@ #define MMEA7_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L #define MMEA7_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L #define MMEA7_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define MMEA7_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define MMEA7_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define MMEA7_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define MMEA7_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define MMEA7_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define MMEA7_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define MMEA7_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define MMEA7_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L //MMEA7_DSM_CNTL #define MMEA7_DSM_CNTL__DRAMRD_CMDMEM_DSM_IRRITATOR_DATA__SHIFT 0x0 #define MMEA7_DSM_CNTL__DRAMRD_CMDMEM_ENABLE_SINGLE_WRITE__SHIFT 0x2 From 39aa0ef1638ad7722e8182331c61deae8da97105 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Mon, 13 Jan 2020 17:03:53 +0800 Subject: [PATCH 216/658] drm/amdgpu: enable RAS feature for more mmhub sub-blocks of Acrturus Compared with Vg20, the size of mmhub range is changed from 2 to 8. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 701 +++++++++++++++++++++++- 1 file changed, 695 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 5c42387c9274..a32c9757711f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -663,6 +663,7 @@ void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) } static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = { + /* MMHUB Range 0 */ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), @@ -751,6 +752,24 @@ static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = { 0, 0, SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), }, + { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 1 */ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), @@ -838,16 +857,686 @@ static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = { { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHAB Range 2*/ + { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Rang 3 */ + { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 4 */ + { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUAB Range 5 */ + { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 6 */ + { "MMEA6_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA6_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA6_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA6_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA6_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA6_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 7*/ + { "MMEA7_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA7_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA7_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA7_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA7_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA7_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_DED_COUNT), } }; static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = { - { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0}, - { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0}, - { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0}, - { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0}, - { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0}, - { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 }, }; static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg, From 5e66403e4d709decacf2b53e55deb55d8f6e6982 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Thu, 16 Jan 2020 11:07:55 +0800 Subject: [PATCH 217/658] drm/amdgpu: refine the security check for RAS functions To avoid calling RAS related functions when RAS feature isn't supported in hardware. Change to check supported features, instead of checking asic type. v2: reuse amdgpu_ras_is_supported function, instead of introducing a new flag for hardware ras feature. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fbf4ea3e0cfb..2ff8b9e312fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5994,7 +5994,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, int ret; struct ta_ras_trigger_error_input block_info = { 0 }; - if (adev->asic_type != CHIP_VEGA20) + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return -EINVAL; if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) @@ -6245,7 +6245,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, uint32_t i, j, k; uint32_t reg_value; - if (adev->asic_type != CHIP_VEGA20) + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return -EINVAL; err_data->ue_count = 0; From 504c5e72d781e48946c32bf427c665ed15d532d0 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Thu, 16 Jan 2020 11:25:08 +0800 Subject: [PATCH 218/658] drm/amdgpu: abstract EDC counter clear to a separated function 1. Add IP prefix for the IP related codes. 2. Refactor the code to clear EDC counter. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 112 ++++++++++++++++++-------- 1 file changed, 77 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2ff8b9e312fe..583800ab5fe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -736,6 +736,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); +static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev); static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); @@ -4029,7 +4030,7 @@ static const struct soc15_reg_entry sgpr2_init_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, }; -static const struct soc15_reg_entry sec_ded_counter_registers[] = { +static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, @@ -4118,7 +4119,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; struct amdgpu_ib ib; struct dma_fence *f = NULL; - int r, i, j, k; + int r, i; unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; @@ -4264,18 +4265,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) goto fail; } - /* read back registers to clear the counters */ - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { - for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { - for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0x0, k); - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - } - } - } - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); - mutex_unlock(&adev->grbm_idx_mutex); + gfx_v9_0_clear_ras_edc_counter(adev); fail: amdgpu_ib_free(adev, &ib, NULL); @@ -5546,7 +5536,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, } -static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = { +static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) @@ -6119,7 +6109,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); - for (i = 0; i < 16; i++) { + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); @@ -6138,7 +6128,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, } } - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); @@ -6159,7 +6149,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, } } - for (i = 0; i < 4; i++) { + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); @@ -6171,7 +6161,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, } } - for (i = 0; i < 32; i++) { + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); @@ -6198,36 +6188,36 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, return 0; } -static int __get_ras_error_count(const struct soc15_reg_entry *reg, +static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg, uint32_t se_id, uint32_t inst_id, uint32_t value, uint32_t *sec_count, uint32_t *ded_count) { uint32_t i; uint32_t sec_cnt, ded_cnt; - for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) { - if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset || - gc_ras_fields_vg20[i].seg != reg->seg || - gc_ras_fields_vg20[i].inst != reg->inst) + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { + if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || + gfx_v9_0_ras_fields[i].seg != reg->seg || + gfx_v9_0_ras_fields[i].inst != reg->inst) continue; sec_cnt = (value & - gc_ras_fields_vg20[i].sec_count_mask) >> - gc_ras_fields_vg20[i].sec_count_shift; + gfx_v9_0_ras_fields[i].sec_count_mask) >> + gfx_v9_0_ras_fields[i].sec_count_shift; if (sec_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", - gc_ras_fields_vg20[i].name, + gfx_v9_0_ras_fields[i].name, se_id, inst_id, sec_cnt); *sec_count += sec_cnt; } ded_cnt = (value & - gc_ras_fields_vg20[i].ded_count_mask) >> - gc_ras_fields_vg20[i].ded_count_shift; + gfx_v9_0_ras_fields[i].ded_count_mask) >> + gfx_v9_0_ras_fields[i].ded_count_shift; if (ded_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", - gc_ras_fields_vg20[i].name, + gfx_v9_0_ras_fields[i].name, se_id, inst_id, ded_cnt); *ded_count += ded_cnt; @@ -6237,6 +6227,58 @@ static int __get_ras_error_count(const struct soc15_reg_entry *reg, return 0; } +static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev) +{ + int i, j, k; + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); + } + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); +} + static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { @@ -6253,14 +6295,14 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { - for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { - for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0, k); reg_value = - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) - __get_ras_error_count(&sec_ded_counter_registers[i], + gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i], j, k, reg_value, &sec_count, &ded_count); } From 19cf0dd4b9d2771015fc9d74ec1b0b9203cf8c5a Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Thu, 16 Jan 2020 12:30:33 +0800 Subject: [PATCH 219/658] drm/amdgpu: add EDC counter registers of gc for Arcturus add reg headers to gc includes v2: remove unused registers and fields in this patch set Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Reviewed-by: Tao Zhou Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../amd/include/asic_reg/gc/gc_9_4_1_offset.h | 264 +++++++ .../include/asic_reg/gc/gc_9_4_1_sh_mask.h | 748 ++++++++++++++++++ 2 files changed, 1012 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h new file mode 100644 index 000000000000..f41556abfbbc --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _gc_9_4_1_OFFSET_HEADER +#define _gc_9_4_1_OFFSET_HEADER + +// addressBlock: gc_grbmdec +// base address: 0x8000 +#define mmGRBM_CNTL 0x0000 +#define mmGRBM_CNTL_BASE_IDX 0 +#define mmGRBM_SKEW_CNTL 0x0001 +#define mmGRBM_SKEW_CNTL_BASE_IDX 0 +#define mmGRBM_STATUS2 0x0002 +#define mmGRBM_STATUS2_BASE_IDX 0 +#define mmGRBM_PWR_CNTL 0x0003 +#define mmGRBM_PWR_CNTL_BASE_IDX 0 +#define mmGRBM_STATUS 0x0004 +#define mmGRBM_STATUS_BASE_IDX 0 +#define mmGRBM_STATUS_SE0 0x0005 +#define mmGRBM_STATUS_SE0_BASE_IDX 0 +#define mmGRBM_STATUS_SE1 0x0006 +#define mmGRBM_STATUS_SE1_BASE_IDX 0 +#define mmGRBM_SOFT_RESET 0x0008 +#define mmGRBM_SOFT_RESET_BASE_IDX 0 +#define mmGRBM_GFX_CLKEN_CNTL 0x000c +#define mmGRBM_GFX_CLKEN_CNTL_BASE_IDX 0 +#define mmGRBM_WAIT_IDLE_CLOCKS 0x000d +#define mmGRBM_WAIT_IDLE_CLOCKS_BASE_IDX 0 +#define mmGRBM_STATUS_SE2 0x000e +#define mmGRBM_STATUS_SE2_BASE_IDX 0 +#define mmGRBM_STATUS_SE3 0x000f +#define mmGRBM_STATUS_SE3_BASE_IDX 0 +#define mmGRBM_READ_ERROR 0x0016 +#define mmGRBM_READ_ERROR_BASE_IDX 0 +#define mmGRBM_READ_ERROR2 0x0017 +#define mmGRBM_READ_ERROR2_BASE_IDX 0 +#define mmGRBM_INT_CNTL 0x0018 +#define mmGRBM_INT_CNTL_BASE_IDX 0 +#define mmGRBM_TRAP_OP 0x0019 +#define mmGRBM_TRAP_OP_BASE_IDX 0 +#define mmGRBM_TRAP_ADDR 0x001a +#define mmGRBM_TRAP_ADDR_BASE_IDX 0 +#define mmGRBM_TRAP_ADDR_MSK 0x001b +#define mmGRBM_TRAP_ADDR_MSK_BASE_IDX 0 +#define mmGRBM_TRAP_WD 0x001c +#define mmGRBM_TRAP_WD_BASE_IDX 0 +#define mmGRBM_TRAP_WD_MSK 0x001d +#define mmGRBM_TRAP_WD_MSK_BASE_IDX 0 +#define mmGRBM_DSM_BYPASS 0x001e +#define mmGRBM_DSM_BYPASS_BASE_IDX 0 +#define mmGRBM_WRITE_ERROR 0x001f +#define mmGRBM_WRITE_ERROR_BASE_IDX 0 +#define mmGRBM_IOV_ERROR 0x0020 +#define mmGRBM_IOV_ERROR_BASE_IDX 0 +#define mmGRBM_CHIP_REVISION 0x0021 +#define mmGRBM_CHIP_REVISION_BASE_IDX 0 +#define mmGRBM_GFX_CNTL 0x0022 +#define mmGRBM_GFX_CNTL_BASE_IDX 0 +#define mmGRBM_RSMU_CFG 0x0023 +#define mmGRBM_RSMU_CFG_BASE_IDX 0 +#define mmGRBM_IH_CREDIT 0x0024 +#define mmGRBM_IH_CREDIT_BASE_IDX 0 +#define mmGRBM_PWR_CNTL2 0x0025 +#define mmGRBM_PWR_CNTL2_BASE_IDX 0 +#define mmGRBM_UTCL2_INVAL_RANGE_START 0x0026 +#define mmGRBM_UTCL2_INVAL_RANGE_START_BASE_IDX 0 +#define mmGRBM_UTCL2_INVAL_RANGE_END 0x0027 +#define mmGRBM_UTCL2_INVAL_RANGE_END_BASE_IDX 0 +#define mmGRBM_RSMU_READ_ERROR 0x0028 +#define mmGRBM_RSMU_READ_ERROR_BASE_IDX 0 +#define mmGRBM_CHICKEN_BITS 0x0029 +#define mmGRBM_CHICKEN_BITS_BASE_IDX 0 +#define mmGRBM_FENCE_RANGE0 0x002a +#define mmGRBM_FENCE_RANGE0_BASE_IDX 0 +#define mmGRBM_FENCE_RANGE1 0x002b +#define mmGRBM_FENCE_RANGE1_BASE_IDX 0 +#define mmGRBM_NOWHERE 0x003f +#define mmGRBM_NOWHERE_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG0 0x0040 +#define mmGRBM_SCRATCH_REG0_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG1 0x0041 +#define mmGRBM_SCRATCH_REG1_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG2 0x0042 +#define mmGRBM_SCRATCH_REG2_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG3 0x0043 +#define mmGRBM_SCRATCH_REG3_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG4 0x0044 +#define mmGRBM_SCRATCH_REG4_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG5 0x0045 +#define mmGRBM_SCRATCH_REG5_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG6 0x0046 +#define mmGRBM_SCRATCH_REG6_BASE_IDX 0 +#define mmGRBM_SCRATCH_REG7 0x0047 +#define mmGRBM_SCRATCH_REG7_BASE_IDX 0 + +// addressBlock: gc_cppdec2 +// base address: 0xc600 +#define mmCPF_EDC_TAG_CNT 0x1189 +#define mmCPF_EDC_TAG_CNT_BASE_IDX 0 +#define mmCPF_EDC_ROQ_CNT 0x118a +#define mmCPF_EDC_ROQ_CNT_BASE_IDX 0 +#define mmCPG_EDC_TAG_CNT 0x118b +#define mmCPG_EDC_TAG_CNT_BASE_IDX 0 +#define mmCPG_EDC_DMA_CNT 0x118d +#define mmCPG_EDC_DMA_CNT_BASE_IDX 0 +#define mmCPC_EDC_SCRATCH_CNT 0x118e +#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX 0 +#define mmCPC_EDC_UCODE_CNT 0x118f +#define mmCPC_EDC_UCODE_CNT_BASE_IDX 0 +#define mmDC_EDC_STATE_CNT 0x1191 +#define mmDC_EDC_STATE_CNT_BASE_IDX 0 +#define mmDC_EDC_CSINVOC_CNT 0x1192 +#define mmDC_EDC_CSINVOC_CNT_BASE_IDX 0 +#define mmDC_EDC_RESTORE_CNT 0x1193 +#define mmDC_EDC_RESTORE_CNT_BASE_IDX 0 + +// addressBlock: gc_gdsdec +// base address: 0x9700 +#define mmGDS_EDC_CNT 0x05c5 +#define mmGDS_EDC_CNT_BASE_IDX 0 +#define mmGDS_EDC_GRBM_CNT 0x05c6 +#define mmGDS_EDC_GRBM_CNT_BASE_IDX 0 +#define mmGDS_EDC_OA_DED 0x05c7 +#define mmGDS_EDC_OA_DED_BASE_IDX 0 +#define mmGDS_EDC_OA_PHY_CNT 0x05cb +#define mmGDS_EDC_OA_PHY_CNT_BASE_IDX 0 +#define mmGDS_EDC_OA_PIPE_CNT 0x05cc +#define mmGDS_EDC_OA_PIPE_CNT_BASE_IDX 0 + +// addressBlock: gc_shsdec +// base address: 0x9000 +#define mmSPI_EDC_CNT 0x0445 +#define mmSPI_EDC_CNT_BASE_IDX 0 + +// addressBlock: gc_sqdec +// base address: 0x8c00 +#define mmSQC_EDC_CNT2 0x032c +#define mmSQC_EDC_CNT2_BASE_IDX 0 +#define mmSQC_EDC_CNT3 0x032d +#define mmSQC_EDC_CNT3_BASE_IDX 0 +#define mmSQC_EDC_PARITY_CNT3 0x032e +#define mmSQC_EDC_PARITY_CNT3_BASE_IDX 0 +#define mmSQC_EDC_CNT 0x03a2 +#define mmSQC_EDC_CNT_BASE_IDX 0 +#define mmSQ_EDC_SEC_CNT 0x03a3 +#define mmSQ_EDC_SEC_CNT_BASE_IDX 0 +#define mmSQ_EDC_DED_CNT 0x03a4 +#define mmSQ_EDC_DED_CNT_BASE_IDX 0 +#define mmSQ_EDC_INFO 0x03a5 +#define mmSQ_EDC_INFO_BASE_IDX 0 +#define mmSQ_EDC_CNT 0x03a6 +#define mmSQ_EDC_CNT_BASE_IDX 0 + +// addressBlock: gc_tpdec +// base address: 0x9400 +#define mmTA_EDC_CNT 0x0586 +#define mmTA_EDC_CNT_BASE_IDX 0 + +// addressBlock: gc_tcdec +// base address: 0xac00 +#define mmTCP_EDC_CNT 0x0b17 +#define mmTCP_EDC_CNT_BASE_IDX 0 +#define mmTCP_EDC_CNT_NEW 0x0b18 +#define mmTCP_EDC_CNT_NEW_BASE_IDX 0 +#define mmTCP_ATC_EDC_GATCL1_CNT 0x12b1 +#define mmTCP_ATC_EDC_GATCL1_CNT_BASE_IDX 0 +#define mmTCI_EDC_CNT 0x0b60 +#define mmTCI_EDC_CNT_BASE_IDX 0 +#define mmTCC_EDC_CNT 0x0b82 +#define mmTCC_EDC_CNT_BASE_IDX 0 +#define mmTCC_EDC_CNT2 0x0b83 +#define mmTCC_EDC_CNT2_BASE_IDX 0 +#define mmTCA_EDC_CNT 0x0bc5 +#define mmTCA_EDC_CNT_BASE_IDX 0 + +// addressBlock: gc_tpdec +// base address: 0x9400 +#define mmTD_EDC_CNT 0x052e +#define mmTD_EDC_CNT_BASE_IDX 0 +#define mmTA_EDC_CNT 0x0586 +#define mmTA_EDC_CNT_BASE_IDX 0 + +// addressBlock: gc_ea_gceadec2 +// base address: 0x9c00 +#define mmGCEA_EDC_CNT 0x0706 +#define mmGCEA_EDC_CNT_BASE_IDX 0 +#define mmGCEA_EDC_CNT2 0x0707 +#define mmGCEA_EDC_CNT2_BASE_IDX 0 +#define mmGCEA_EDC_CNT3 0x071b +#define mmGCEA_EDC_CNT3_BASE_IDX 0 + +// addressBlock: gc_gfxudec +// base address: 0x30000 +#define mmSCRATCH_REG0 0x2040 +#define mmSCRATCH_REG0_BASE_IDX 1 +#define mmSCRATCH_REG1 0x2041 +#define mmSCRATCH_REG1_BASE_IDX 1 +#define mmSCRATCH_REG2 0x2042 +#define mmSCRATCH_REG2_BASE_IDX 1 +#define mmSCRATCH_REG3 0x2043 +#define mmSCRATCH_REG3_BASE_IDX 1 +#define mmSCRATCH_REG4 0x2044 +#define mmSCRATCH_REG4_BASE_IDX 1 +#define mmSCRATCH_REG5 0x2045 +#define mmSCRATCH_REG5_BASE_IDX 1 +#define mmSCRATCH_REG6 0x2046 +#define mmSCRATCH_REG6_BASE_IDX 1 +#define mmSCRATCH_REG7 0x2047 +#define mmSCRATCH_REG7_BASE_IDX 1 +#define mmGRBM_GFX_INDEX 0x2200 +#define mmGRBM_GFX_INDEX_BASE_IDX 1 + +// addressBlock: gc_utcl2_atcl2dec +// base address: 0xa000 +#define mmATC_L2_CACHE_4K_DSM_INDEX 0x080e +#define mmATC_L2_CACHE_4K_DSM_INDEX_BASE_IDX 0 +#define mmATC_L2_CACHE_2M_DSM_INDEX 0x080f +#define mmATC_L2_CACHE_2M_DSM_INDEX_BASE_IDX 0 +#define mmATC_L2_CACHE_4K_DSM_CNTL 0x0810 +#define mmATC_L2_CACHE_4K_DSM_CNTL_BASE_IDX 0 +#define mmATC_L2_CACHE_2M_DSM_CNTL 0x0811 +#define mmATC_L2_CACHE_2M_DSM_CNTL_BASE_IDX 0 + +// addressBlock: gc_utcl2_vml2pfdec +// base address: 0xa100 +#define mmVML2_MEM_ECC_INDEX 0x0860 +#define mmVML2_MEM_ECC_INDEX_BASE_IDX 0 +#define mmVML2_WALKER_MEM_ECC_INDEX 0x0861 +#define mmVML2_WALKER_MEM_ECC_INDEX_BASE_IDX 0 +#define mmUTCL2_MEM_ECC_INDEX 0x0862 +#define mmUTCL2_MEM_ECC_INDEX_BASE_IDX 0 + +#define mmVML2_MEM_ECC_CNTL 0x0863 +#define mmVML2_MEM_ECC_CNTL_BASE_IDX 0 +#define mmVML2_WALKER_MEM_ECC_CNTL 0x0864 +#define mmVML2_WALKER_MEM_ECC_CNTL_BASE_IDX 0 +#define mmUTCL2_MEM_ECC_CNTL 0x0865 +#define mmUTCL2_MEM_ECC_CNTL_BASE_IDX 0 + +// addressBlock: gc_rlcpdec +// base address: 0x3b000 +#define mmRLC_EDC_CNT 0x4d40 +#define mmRLC_EDC_CNT_BASE_IDX 1 +#define mmRLC_EDC_CNT2 0x4d41 +#define mmRLC_EDC_CNT2_BASE_IDX 1 + +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h new file mode 100644 index 000000000000..f26246a600c6 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h @@ -0,0 +1,748 @@ +/* + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _gc_9_4_1_SH_MASK_HEADER +#define _gc_9_4_1_SH_MASK_HEADER + +// addressBlock: gc_cppdec2 +//CPF_EDC_TAG_CNT +#define CPF_EDC_TAG_CNT__DED_COUNT__SHIFT 0x0 +#define CPF_EDC_TAG_CNT__SEC_COUNT__SHIFT 0x2 +#define CPF_EDC_TAG_CNT__DED_COUNT_MASK 0x00000003L +#define CPF_EDC_TAG_CNT__SEC_COUNT_MASK 0x0000000CL +//CPF_EDC_ROQ_CNT +#define CPF_EDC_ROQ_CNT__DED_COUNT_ME1__SHIFT 0x0 +#define CPF_EDC_ROQ_CNT__SEC_COUNT_ME1__SHIFT 0x2 +#define CPF_EDC_ROQ_CNT__DED_COUNT_ME2__SHIFT 0x4 +#define CPF_EDC_ROQ_CNT__SEC_COUNT_ME2__SHIFT 0x6 +#define CPF_EDC_ROQ_CNT__DED_COUNT_ME1_MASK 0x00000003L +#define CPF_EDC_ROQ_CNT__SEC_COUNT_ME1_MASK 0x0000000CL +#define CPF_EDC_ROQ_CNT__DED_COUNT_ME2_MASK 0x00000030L +#define CPF_EDC_ROQ_CNT__SEC_COUNT_ME2_MASK 0x000000C0L +//CPG_EDC_TAG_CNT +#define CPG_EDC_TAG_CNT__DED_COUNT__SHIFT 0x0 +#define CPG_EDC_TAG_CNT__SEC_COUNT__SHIFT 0x2 +#define CPG_EDC_TAG_CNT__DED_COUNT_MASK 0x00000003L +#define CPG_EDC_TAG_CNT__SEC_COUNT_MASK 0x0000000CL +//CPG_EDC_DMA_CNT +#define CPG_EDC_DMA_CNT__ROQ_DED_COUNT__SHIFT 0x0 +#define CPG_EDC_DMA_CNT__ROQ_SEC_COUNT__SHIFT 0x2 +#define CPG_EDC_DMA_CNT__TAG_DED_COUNT__SHIFT 0x4 +#define CPG_EDC_DMA_CNT__TAG_SEC_COUNT__SHIFT 0x6 +#define CPG_EDC_DMA_CNT__ROQ_DED_COUNT_MASK 0x00000003L +#define CPG_EDC_DMA_CNT__ROQ_SEC_COUNT_MASK 0x0000000CL +#define CPG_EDC_DMA_CNT__TAG_DED_COUNT_MASK 0x00000030L +#define CPG_EDC_DMA_CNT__TAG_SEC_COUNT_MASK 0x000000C0L +//CPC_EDC_SCRATCH_CNT +#define CPC_EDC_SCRATCH_CNT__DED_COUNT__SHIFT 0x0 +#define CPC_EDC_SCRATCH_CNT__SEC_COUNT__SHIFT 0x2 +#define CPC_EDC_SCRATCH_CNT__DED_COUNT_MASK 0x00000003L +#define CPC_EDC_SCRATCH_CNT__SEC_COUNT_MASK 0x0000000CL +//CPC_EDC_UCODE_CNT +#define CPC_EDC_UCODE_CNT__DED_COUNT__SHIFT 0x0 +#define CPC_EDC_UCODE_CNT__SEC_COUNT__SHIFT 0x2 +#define CPC_EDC_UCODE_CNT__DED_COUNT_MASK 0x00000003L +#define CPC_EDC_UCODE_CNT__SEC_COUNT_MASK 0x0000000CL +//DC_EDC_STATE_CNT +#define DC_EDC_STATE_CNT__DED_COUNT_ME1__SHIFT 0x0 +#define DC_EDC_STATE_CNT__SEC_COUNT_ME1__SHIFT 0x2 +#define DC_EDC_STATE_CNT__DED_COUNT_ME1_MASK 0x00000003L +#define DC_EDC_STATE_CNT__SEC_COUNT_ME1_MASK 0x0000000CL +//DC_EDC_CSINVOC_CNT +#define DC_EDC_CSINVOC_CNT__DED_COUNT_ME1__SHIFT 0x0 +#define DC_EDC_CSINVOC_CNT__SEC_COUNT_ME1__SHIFT 0x2 +#define DC_EDC_CSINVOC_CNT__DED_COUNT1_ME1__SHIFT 0x4 +#define DC_EDC_CSINVOC_CNT__SEC_COUNT1_ME1__SHIFT 0x6 +#define DC_EDC_CSINVOC_CNT__DED_COUNT_ME1_MASK 0x00000003L +#define DC_EDC_CSINVOC_CNT__SEC_COUNT_ME1_MASK 0x0000000CL +#define DC_EDC_CSINVOC_CNT__DED_COUNT1_ME1_MASK 0x00000030L +#define DC_EDC_CSINVOC_CNT__SEC_COUNT1_ME1_MASK 0x000000C0L +//DC_EDC_RESTORE_CNT +#define DC_EDC_RESTORE_CNT__DED_COUNT_ME1__SHIFT 0x0 +#define DC_EDC_RESTORE_CNT__SEC_COUNT_ME1__SHIFT 0x2 +#define DC_EDC_RESTORE_CNT__DED_COUNT1_ME1__SHIFT 0x4 +#define DC_EDC_RESTORE_CNT__SEC_COUNT1_ME1__SHIFT 0x6 +#define DC_EDC_RESTORE_CNT__DED_COUNT_ME1_MASK 0x00000003L +#define DC_EDC_RESTORE_CNT__SEC_COUNT_ME1_MASK 0x0000000CL +#define DC_EDC_RESTORE_CNT__DED_COUNT1_ME1_MASK 0x00000030L +#define DC_EDC_RESTORE_CNT__SEC_COUNT1_ME1_MASK 0x000000C0L + +// addressBlock: gc_gdsdec +//GDS_EDC_CNT +#define GDS_EDC_CNT__GDS_MEM_DED__SHIFT 0x0 +#define GDS_EDC_CNT__GDS_MEM_SEC__SHIFT 0x4 +#define GDS_EDC_CNT__UNUSED__SHIFT 0x6 +#define GDS_EDC_CNT__GDS_MEM_DED_MASK 0x00000003L +#define GDS_EDC_CNT__GDS_MEM_SEC_MASK 0x00000030L +#define GDS_EDC_CNT__UNUSED_MASK 0xFFFFFFC0L +//GDS_EDC_GRBM_CNT +#define GDS_EDC_GRBM_CNT__DED__SHIFT 0x0 +#define GDS_EDC_GRBM_CNT__SEC__SHIFT 0x2 +#define GDS_EDC_GRBM_CNT__UNUSED__SHIFT 0x4 +#define GDS_EDC_GRBM_CNT__DED_MASK 0x00000003L +#define GDS_EDC_GRBM_CNT__SEC_MASK 0x0000000CL +#define GDS_EDC_GRBM_CNT__UNUSED_MASK 0xFFFFFFF0L +//GDS_EDC_OA_DED +#define GDS_EDC_OA_DED__ME0_GFXHP3D_PIX_DED__SHIFT 0x0 +#define GDS_EDC_OA_DED__ME0_GFXHP3D_VTX_DED__SHIFT 0x1 +#define GDS_EDC_OA_DED__ME0_CS_DED__SHIFT 0x2 +#define GDS_EDC_OA_DED__ME0_GFXHP3D_GS_DED__SHIFT 0x3 +#define GDS_EDC_OA_DED__ME1_PIPE0_DED__SHIFT 0x4 +#define GDS_EDC_OA_DED__ME1_PIPE1_DED__SHIFT 0x5 +#define GDS_EDC_OA_DED__ME1_PIPE2_DED__SHIFT 0x6 +#define GDS_EDC_OA_DED__ME1_PIPE3_DED__SHIFT 0x7 +#define GDS_EDC_OA_DED__ME2_PIPE0_DED__SHIFT 0x8 +#define GDS_EDC_OA_DED__ME2_PIPE1_DED__SHIFT 0x9 +#define GDS_EDC_OA_DED__ME2_PIPE2_DED__SHIFT 0xa +#define GDS_EDC_OA_DED__ME2_PIPE3_DED__SHIFT 0xb +#define GDS_EDC_OA_DED__UNUSED1__SHIFT 0xc +#define GDS_EDC_OA_DED__ME0_GFXHP3D_PIX_DED_MASK 0x00000001L +#define GDS_EDC_OA_DED__ME0_GFXHP3D_VTX_DED_MASK 0x00000002L +#define GDS_EDC_OA_DED__ME0_CS_DED_MASK 0x00000004L +#define GDS_EDC_OA_DED__ME0_GFXHP3D_GS_DED_MASK 0x00000008L +#define GDS_EDC_OA_DED__ME1_PIPE0_DED_MASK 0x00000010L +#define GDS_EDC_OA_DED__ME1_PIPE1_DED_MASK 0x00000020L +#define GDS_EDC_OA_DED__ME1_PIPE2_DED_MASK 0x00000040L +#define GDS_EDC_OA_DED__ME1_PIPE3_DED_MASK 0x00000080L +#define GDS_EDC_OA_DED__ME2_PIPE0_DED_MASK 0x00000100L +#define GDS_EDC_OA_DED__ME2_PIPE1_DED_MASK 0x00000200L +#define GDS_EDC_OA_DED__ME2_PIPE2_DED_MASK 0x00000400L +#define GDS_EDC_OA_DED__ME2_PIPE3_DED_MASK 0x00000800L +#define GDS_EDC_OA_DED__UNUSED1_MASK 0xFFFFF000L +//GDS_EDC_OA_PHY_CNT +#define GDS_EDC_OA_PHY_CNT__ME0_CS_PIPE_MEM_SEC__SHIFT 0x0 +#define GDS_EDC_OA_PHY_CNT__ME0_CS_PIPE_MEM_DED__SHIFT 0x2 +#define GDS_EDC_OA_PHY_CNT__PHY_CMD_RAM_MEM_SEC__SHIFT 0x4 +#define GDS_EDC_OA_PHY_CNT__PHY_CMD_RAM_MEM_DED__SHIFT 0x6 +#define GDS_EDC_OA_PHY_CNT__PHY_DATA_RAM_MEM_SEC__SHIFT 0x8 +#define GDS_EDC_OA_PHY_CNT__PHY_DATA_RAM_MEM_DED__SHIFT 0xa +#define GDS_EDC_OA_PHY_CNT__UNUSED1__SHIFT 0xc +#define GDS_EDC_OA_PHY_CNT__ME0_CS_PIPE_MEM_SEC_MASK 0x00000003L +#define GDS_EDC_OA_PHY_CNT__ME0_CS_PIPE_MEM_DED_MASK 0x0000000CL +#define GDS_EDC_OA_PHY_CNT__PHY_CMD_RAM_MEM_SEC_MASK 0x00000030L +#define GDS_EDC_OA_PHY_CNT__PHY_CMD_RAM_MEM_DED_MASK 0x000000C0L +#define GDS_EDC_OA_PHY_CNT__PHY_DATA_RAM_MEM_SEC_MASK 0x00000300L +#define GDS_EDC_OA_PHY_CNT__PHY_DATA_RAM_MEM_DED_MASK 0x00000C00L +#define GDS_EDC_OA_PHY_CNT__UNUSED1_MASK 0xFFFFF000L +//GDS_EDC_OA_PIPE_CNT +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE0_PIPE_MEM_SEC__SHIFT 0x0 +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE0_PIPE_MEM_DED__SHIFT 0x2 +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE1_PIPE_MEM_SEC__SHIFT 0x4 +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE1_PIPE_MEM_DED__SHIFT 0x6 +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE2_PIPE_MEM_SEC__SHIFT 0x8 +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE2_PIPE_MEM_DED__SHIFT 0xa +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE3_PIPE_MEM_SEC__SHIFT 0xc +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE3_PIPE_MEM_DED__SHIFT 0xe +#define GDS_EDC_OA_PIPE_CNT__UNUSED__SHIFT 0x10 +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE0_PIPE_MEM_SEC_MASK 0x00000003L +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE0_PIPE_MEM_DED_MASK 0x0000000CL +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE1_PIPE_MEM_SEC_MASK 0x00000030L +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE1_PIPE_MEM_DED_MASK 0x000000C0L +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE2_PIPE_MEM_SEC_MASK 0x00000300L +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE2_PIPE_MEM_DED_MASK 0x00000C00L +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE3_PIPE_MEM_SEC_MASK 0x00003000L +#define GDS_EDC_OA_PIPE_CNT__ME1_PIPE3_PIPE_MEM_DED_MASK 0x0000C000L +#define GDS_EDC_OA_PIPE_CNT__UNUSED_MASK 0xFFFF0000L + +// addressBlock: gc_shsdec +//SPI_EDC_CNT +#define SPI_EDC_CNT__SPI_SR_MEM_SEC_COUNT__SHIFT 0x0 +#define SPI_EDC_CNT__SPI_SR_MEM_DED_COUNT__SHIFT 0x2 +#define SPI_EDC_CNT__SPI_GDS_EXPREQ_SEC_COUNT__SHIFT 0x4 +#define SPI_EDC_CNT__SPI_GDS_EXPREQ_DED_COUNT__SHIFT 0x6 +#define SPI_EDC_CNT__SPI_WB_GRANT_30_SEC_COUNT__SHIFT 0x8 +#define SPI_EDC_CNT__SPI_WB_GRANT_30_DED_COUNT__SHIFT 0xa +#define SPI_EDC_CNT__SPI_WB_GRANT_61_SEC_COUNT__SHIFT 0xc +#define SPI_EDC_CNT__SPI_WB_GRANT_61_DED_COUNT__SHIFT 0xe +#define SPI_EDC_CNT__SPI_LIFE_CNT_SEC_COUNT__SHIFT 0x10 +#define SPI_EDC_CNT__SPI_LIFE_CNT_DED_COUNT__SHIFT 0x12 +#define SPI_EDC_CNT__SPI_SR_MEM_SEC_COUNT_MASK 0x00000003L +#define SPI_EDC_CNT__SPI_SR_MEM_DED_COUNT_MASK 0x0000000CL +#define SPI_EDC_CNT__SPI_GDS_EXPREQ_SEC_COUNT_MASK 0x00000030L +#define SPI_EDC_CNT__SPI_GDS_EXPREQ_DED_COUNT_MASK 0x000000C0L +#define SPI_EDC_CNT__SPI_WB_GRANT_30_SEC_COUNT_MASK 0x00000300L +#define SPI_EDC_CNT__SPI_WB_GRANT_30_DED_COUNT_MASK 0x00000C00L +#define SPI_EDC_CNT__SPI_WB_GRANT_61_SEC_COUNT_MASK 0x00003000L +#define SPI_EDC_CNT__SPI_WB_GRANT_61_DED_COUNT_MASK 0x0000C000L +#define SPI_EDC_CNT__SPI_LIFE_CNT_SEC_COUNT_MASK 0x00030000L +#define SPI_EDC_CNT__SPI_LIFE_CNT_DED_COUNT_MASK 0x000C0000L + +// addressBlock: gc_sqdec +//SQC_EDC_CNT2 +#define SQC_EDC_CNT2__INST_BANKA_TAG_RAM_SEC_COUNT__SHIFT 0x0 +#define SQC_EDC_CNT2__INST_BANKA_TAG_RAM_DED_COUNT__SHIFT 0x2 +#define SQC_EDC_CNT2__INST_BANKA_BANK_RAM_SEC_COUNT__SHIFT 0x4 +#define SQC_EDC_CNT2__INST_BANKA_BANK_RAM_DED_COUNT__SHIFT 0x6 +#define SQC_EDC_CNT2__DATA_BANKA_TAG_RAM_SEC_COUNT__SHIFT 0x8 +#define SQC_EDC_CNT2__DATA_BANKA_TAG_RAM_DED_COUNT__SHIFT 0xa +#define SQC_EDC_CNT2__DATA_BANKA_BANK_RAM_SEC_COUNT__SHIFT 0xc +#define SQC_EDC_CNT2__DATA_BANKA_BANK_RAM_DED_COUNT__SHIFT 0xe +#define SQC_EDC_CNT2__INST_UTCL1_LFIFO_SEC_COUNT__SHIFT 0x10 +#define SQC_EDC_CNT2__INST_UTCL1_LFIFO_DED_COUNT__SHIFT 0x12 +#define SQC_EDC_CNT2__INST_BANKA_TAG_RAM_SEC_COUNT_MASK 0x00000003L +#define SQC_EDC_CNT2__INST_BANKA_TAG_RAM_DED_COUNT_MASK 0x0000000CL +#define SQC_EDC_CNT2__INST_BANKA_BANK_RAM_SEC_COUNT_MASK 0x00000030L +#define SQC_EDC_CNT2__INST_BANKA_BANK_RAM_DED_COUNT_MASK 0x000000C0L +#define SQC_EDC_CNT2__DATA_BANKA_TAG_RAM_SEC_COUNT_MASK 0x00000300L +#define SQC_EDC_CNT2__DATA_BANKA_TAG_RAM_DED_COUNT_MASK 0x00000C00L +#define SQC_EDC_CNT2__DATA_BANKA_BANK_RAM_SEC_COUNT_MASK 0x00003000L +#define SQC_EDC_CNT2__DATA_BANKA_BANK_RAM_DED_COUNT_MASK 0x0000C000L +#define SQC_EDC_CNT2__INST_UTCL1_LFIFO_SEC_COUNT_MASK 0x00030000L +#define SQC_EDC_CNT2__INST_UTCL1_LFIFO_DED_COUNT_MASK 0x000C0000L +//SQC_EDC_CNT3 +#define SQC_EDC_CNT3__INST_BANKB_TAG_RAM_SEC_COUNT__SHIFT 0x0 +#define SQC_EDC_CNT3__INST_BANKB_TAG_RAM_DED_COUNT__SHIFT 0x2 +#define SQC_EDC_CNT3__INST_BANKB_BANK_RAM_SEC_COUNT__SHIFT 0x4 +#define SQC_EDC_CNT3__INST_BANKB_BANK_RAM_DED_COUNT__SHIFT 0x6 +#define SQC_EDC_CNT3__DATA_BANKB_TAG_RAM_SEC_COUNT__SHIFT 0x8 +#define SQC_EDC_CNT3__DATA_BANKB_TAG_RAM_DED_COUNT__SHIFT 0xa +#define SQC_EDC_CNT3__DATA_BANKB_BANK_RAM_SEC_COUNT__SHIFT 0xc +#define SQC_EDC_CNT3__DATA_BANKB_BANK_RAM_DED_COUNT__SHIFT 0xe +#define SQC_EDC_CNT3__INST_BANKB_TAG_RAM_SEC_COUNT_MASK 0x00000003L +#define SQC_EDC_CNT3__INST_BANKB_TAG_RAM_DED_COUNT_MASK 0x0000000CL +#define SQC_EDC_CNT3__INST_BANKB_BANK_RAM_SEC_COUNT_MASK 0x00000030L +#define SQC_EDC_CNT3__INST_BANKB_BANK_RAM_DED_COUNT_MASK 0x000000C0L +#define SQC_EDC_CNT3__DATA_BANKB_TAG_RAM_SEC_COUNT_MASK 0x00000300L +#define SQC_EDC_CNT3__DATA_BANKB_TAG_RAM_DED_COUNT_MASK 0x00000C00L +#define SQC_EDC_CNT3__DATA_BANKB_BANK_RAM_SEC_COUNT_MASK 0x00003000L +#define SQC_EDC_CNT3__DATA_BANKB_BANK_RAM_DED_COUNT_MASK 0x0000C000L +//SQC_EDC_PARITY_CNT3 +#define SQC_EDC_PARITY_CNT3__INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT__SHIFT 0x0 +#define SQC_EDC_PARITY_CNT3__INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT__SHIFT 0x2 +#define SQC_EDC_PARITY_CNT3__INST_BANKA_MISS_FIFO_SEC_COUNT__SHIFT 0x4 +#define SQC_EDC_PARITY_CNT3__INST_BANKA_MISS_FIFO_DED_COUNT__SHIFT 0x6 +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_HIT_FIFO_SEC_COUNT__SHIFT 0x8 +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_HIT_FIFO_DED_COUNT__SHIFT 0xa +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_MISS_FIFO_SEC_COUNT__SHIFT 0xc +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_MISS_FIFO_DED_COUNT__SHIFT 0xe +#define SQC_EDC_PARITY_CNT3__INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT__SHIFT 0x10 +#define SQC_EDC_PARITY_CNT3__INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT__SHIFT 0x12 +#define SQC_EDC_PARITY_CNT3__INST_BANKB_MISS_FIFO_SEC_COUNT__SHIFT 0x14 +#define SQC_EDC_PARITY_CNT3__INST_BANKB_MISS_FIFO_DED_COUNT__SHIFT 0x16 +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_HIT_FIFO_SEC_COUNT__SHIFT 0x18 +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_HIT_FIFO_DED_COUNT__SHIFT 0x1a +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_MISS_FIFO_SEC_COUNT__SHIFT 0x1c +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_MISS_FIFO_DED_COUNT__SHIFT 0x1e +#define SQC_EDC_PARITY_CNT3__INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT_MASK 0x00000003L +#define SQC_EDC_PARITY_CNT3__INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT_MASK 0x0000000CL +#define SQC_EDC_PARITY_CNT3__INST_BANKA_MISS_FIFO_SEC_COUNT_MASK 0x00000030L +#define SQC_EDC_PARITY_CNT3__INST_BANKA_MISS_FIFO_DED_COUNT_MASK 0x000000C0L +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_HIT_FIFO_SEC_COUNT_MASK 0x00000300L +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_HIT_FIFO_DED_COUNT_MASK 0x00000C00L +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_MISS_FIFO_SEC_COUNT_MASK 0x00003000L +#define SQC_EDC_PARITY_CNT3__DATA_BANKA_MISS_FIFO_DED_COUNT_MASK 0x0000C000L +#define SQC_EDC_PARITY_CNT3__INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT_MASK 0x00030000L +#define SQC_EDC_PARITY_CNT3__INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT_MASK 0x000C0000L +#define SQC_EDC_PARITY_CNT3__INST_BANKB_MISS_FIFO_SEC_COUNT_MASK 0x00300000L +#define SQC_EDC_PARITY_CNT3__INST_BANKB_MISS_FIFO_DED_COUNT_MASK 0x00C00000L +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_HIT_FIFO_SEC_COUNT_MASK 0x03000000L +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_HIT_FIFO_DED_COUNT_MASK 0x0C000000L +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_MISS_FIFO_SEC_COUNT_MASK 0x30000000L +#define SQC_EDC_PARITY_CNT3__DATA_BANKB_MISS_FIFO_DED_COUNT_MASK 0xC0000000L +//SQC_EDC_CNT +#define SQC_EDC_CNT__DATA_CU0_WRITE_DATA_BUF_SEC_COUNT__SHIFT 0x0 +#define SQC_EDC_CNT__DATA_CU0_WRITE_DATA_BUF_DED_COUNT__SHIFT 0x2 +#define SQC_EDC_CNT__DATA_CU0_UTCL1_LFIFO_SEC_COUNT__SHIFT 0x4 +#define SQC_EDC_CNT__DATA_CU0_UTCL1_LFIFO_DED_COUNT__SHIFT 0x6 +#define SQC_EDC_CNT__DATA_CU1_WRITE_DATA_BUF_SEC_COUNT__SHIFT 0x8 +#define SQC_EDC_CNT__DATA_CU1_WRITE_DATA_BUF_DED_COUNT__SHIFT 0xa +#define SQC_EDC_CNT__DATA_CU1_UTCL1_LFIFO_SEC_COUNT__SHIFT 0xc +#define SQC_EDC_CNT__DATA_CU1_UTCL1_LFIFO_DED_COUNT__SHIFT 0xe +#define SQC_EDC_CNT__DATA_CU2_WRITE_DATA_BUF_SEC_COUNT__SHIFT 0x10 +#define SQC_EDC_CNT__DATA_CU2_WRITE_DATA_BUF_DED_COUNT__SHIFT 0x12 +#define SQC_EDC_CNT__DATA_CU2_UTCL1_LFIFO_SEC_COUNT__SHIFT 0x14 +#define SQC_EDC_CNT__DATA_CU2_UTCL1_LFIFO_DED_COUNT__SHIFT 0x16 +#define SQC_EDC_CNT__DATA_CU3_WRITE_DATA_BUF_SEC_COUNT__SHIFT 0x18 +#define SQC_EDC_CNT__DATA_CU3_WRITE_DATA_BUF_DED_COUNT__SHIFT 0x1a +#define SQC_EDC_CNT__DATA_CU3_UTCL1_LFIFO_SEC_COUNT__SHIFT 0x1c +#define SQC_EDC_CNT__DATA_CU3_UTCL1_LFIFO_DED_COUNT__SHIFT 0x1e +#define SQC_EDC_CNT__DATA_CU0_WRITE_DATA_BUF_SEC_COUNT_MASK 0x00000003L +#define SQC_EDC_CNT__DATA_CU0_WRITE_DATA_BUF_DED_COUNT_MASK 0x0000000CL +#define SQC_EDC_CNT__DATA_CU0_UTCL1_LFIFO_SEC_COUNT_MASK 0x00000030L +#define SQC_EDC_CNT__DATA_CU0_UTCL1_LFIFO_DED_COUNT_MASK 0x000000C0L +#define SQC_EDC_CNT__DATA_CU1_WRITE_DATA_BUF_SEC_COUNT_MASK 0x00000300L +#define SQC_EDC_CNT__DATA_CU1_WRITE_DATA_BUF_DED_COUNT_MASK 0x00000C00L +#define SQC_EDC_CNT__DATA_CU1_UTCL1_LFIFO_SEC_COUNT_MASK 0x00003000L +#define SQC_EDC_CNT__DATA_CU1_UTCL1_LFIFO_DED_COUNT_MASK 0x0000C000L +#define SQC_EDC_CNT__DATA_CU2_WRITE_DATA_BUF_SEC_COUNT_MASK 0x00030000L +#define SQC_EDC_CNT__DATA_CU2_WRITE_DATA_BUF_DED_COUNT_MASK 0x000C0000L +#define SQC_EDC_CNT__DATA_CU2_UTCL1_LFIFO_SEC_COUNT_MASK 0x00300000L +#define SQC_EDC_CNT__DATA_CU2_UTCL1_LFIFO_DED_COUNT_MASK 0x00C00000L +#define SQC_EDC_CNT__DATA_CU3_WRITE_DATA_BUF_SEC_COUNT_MASK 0x03000000L +#define SQC_EDC_CNT__DATA_CU3_WRITE_DATA_BUF_DED_COUNT_MASK 0x0C000000L +#define SQC_EDC_CNT__DATA_CU3_UTCL1_LFIFO_SEC_COUNT_MASK 0x30000000L +#define SQC_EDC_CNT__DATA_CU3_UTCL1_LFIFO_DED_COUNT_MASK 0xC0000000L +//SQ_EDC_SEC_CNT +#define SQ_EDC_SEC_CNT__LDS_SEC__SHIFT 0x0 +#define SQ_EDC_SEC_CNT__SGPR_SEC__SHIFT 0x8 +#define SQ_EDC_SEC_CNT__VGPR_SEC__SHIFT 0x10 +#define SQ_EDC_SEC_CNT__LDS_SEC_MASK 0x000000FFL +#define SQ_EDC_SEC_CNT__SGPR_SEC_MASK 0x0000FF00L +#define SQ_EDC_SEC_CNT__VGPR_SEC_MASK 0x00FF0000L +//SQ_EDC_DED_CNT +#define SQ_EDC_DED_CNT__LDS_DED__SHIFT 0x0 +#define SQ_EDC_DED_CNT__SGPR_DED__SHIFT 0x8 +#define SQ_EDC_DED_CNT__VGPR_DED__SHIFT 0x10 +#define SQ_EDC_DED_CNT__LDS_DED_MASK 0x000000FFL +#define SQ_EDC_DED_CNT__SGPR_DED_MASK 0x0000FF00L +#define SQ_EDC_DED_CNT__VGPR_DED_MASK 0x00FF0000L +//SQ_EDC_INFO +#define SQ_EDC_INFO__WAVE_ID__SHIFT 0x0 +#define SQ_EDC_INFO__SIMD_ID__SHIFT 0x4 +#define SQ_EDC_INFO__SOURCE__SHIFT 0x6 +#define SQ_EDC_INFO__VM_ID__SHIFT 0x9 +#define SQ_EDC_INFO__WAVE_ID_MASK 0x0000000FL +#define SQ_EDC_INFO__SIMD_ID_MASK 0x00000030L +#define SQ_EDC_INFO__SOURCE_MASK 0x000001C0L +#define SQ_EDC_INFO__VM_ID_MASK 0x00001E00L +//SQ_EDC_CNT +#define SQ_EDC_CNT__LDS_D_SEC_COUNT__SHIFT 0x0 +#define SQ_EDC_CNT__LDS_D_DED_COUNT__SHIFT 0x2 +#define SQ_EDC_CNT__LDS_I_SEC_COUNT__SHIFT 0x4 +#define SQ_EDC_CNT__LDS_I_DED_COUNT__SHIFT 0x6 +#define SQ_EDC_CNT__SGPR_SEC_COUNT__SHIFT 0x8 +#define SQ_EDC_CNT__SGPR_DED_COUNT__SHIFT 0xa +#define SQ_EDC_CNT__VGPR0_SEC_COUNT__SHIFT 0xc +#define SQ_EDC_CNT__VGPR0_DED_COUNT__SHIFT 0xe +#define SQ_EDC_CNT__VGPR1_SEC_COUNT__SHIFT 0x10 +#define SQ_EDC_CNT__VGPR1_DED_COUNT__SHIFT 0x12 +#define SQ_EDC_CNT__VGPR2_SEC_COUNT__SHIFT 0x14 +#define SQ_EDC_CNT__VGPR2_DED_COUNT__SHIFT 0x16 +#define SQ_EDC_CNT__VGPR3_SEC_COUNT__SHIFT 0x18 +#define SQ_EDC_CNT__VGPR3_DED_COUNT__SHIFT 0x1a +#define SQ_EDC_CNT__LDS_D_SEC_COUNT_MASK 0x00000003L +#define SQ_EDC_CNT__LDS_D_DED_COUNT_MASK 0x0000000CL +#define SQ_EDC_CNT__LDS_I_SEC_COUNT_MASK 0x00000030L +#define SQ_EDC_CNT__LDS_I_DED_COUNT_MASK 0x000000C0L +#define SQ_EDC_CNT__SGPR_SEC_COUNT_MASK 0x00000300L +#define SQ_EDC_CNT__SGPR_DED_COUNT_MASK 0x00000C00L +#define SQ_EDC_CNT__VGPR0_SEC_COUNT_MASK 0x00003000L +#define SQ_EDC_CNT__VGPR0_DED_COUNT_MASK 0x0000C000L +#define SQ_EDC_CNT__VGPR1_SEC_COUNT_MASK 0x00030000L +#define SQ_EDC_CNT__VGPR1_DED_COUNT_MASK 0x000C0000L +#define SQ_EDC_CNT__VGPR2_SEC_COUNT_MASK 0x00300000L +#define SQ_EDC_CNT__VGPR2_DED_COUNT_MASK 0x00C00000L +#define SQ_EDC_CNT__VGPR3_SEC_COUNT_MASK 0x03000000L +#define SQ_EDC_CNT__VGPR3_DED_COUNT_MASK 0x0C000000L + +// addressBlock: gc_tpdec +//TA_EDC_CNT +#define TA_EDC_CNT__TA_FS_DFIFO_SEC_COUNT__SHIFT 0x0 +#define TA_EDC_CNT__TA_FS_DFIFO_DED_COUNT__SHIFT 0x2 +#define TA_EDC_CNT__TA_FS_AFIFO_SEC_COUNT__SHIFT 0x4 +#define TA_EDC_CNT__TA_FS_AFIFO_DED_COUNT__SHIFT 0x6 +#define TA_EDC_CNT__TA_FL_LFIFO_SEC_COUNT__SHIFT 0x8 +#define TA_EDC_CNT__TA_FL_LFIFO_DED_COUNT__SHIFT 0xa +#define TA_EDC_CNT__TA_FX_LFIFO_SEC_COUNT__SHIFT 0xc +#define TA_EDC_CNT__TA_FX_LFIFO_DED_COUNT__SHIFT 0xe +#define TA_EDC_CNT__TA_FS_CFIFO_SEC_COUNT__SHIFT 0x10 +#define TA_EDC_CNT__TA_FS_CFIFO_DED_COUNT__SHIFT 0x12 +#define TA_EDC_CNT__TA_FS_DFIFO_SEC_COUNT_MASK 0x00000003L +#define TA_EDC_CNT__TA_FS_DFIFO_DED_COUNT_MASK 0x0000000CL +#define TA_EDC_CNT__TA_FS_AFIFO_SEC_COUNT_MASK 0x00000030L +#define TA_EDC_CNT__TA_FS_AFIFO_DED_COUNT_MASK 0x000000C0L +#define TA_EDC_CNT__TA_FL_LFIFO_SEC_COUNT_MASK 0x00000300L +#define TA_EDC_CNT__TA_FL_LFIFO_DED_COUNT_MASK 0x00000C00L +#define TA_EDC_CNT__TA_FX_LFIFO_SEC_COUNT_MASK 0x00003000L +#define TA_EDC_CNT__TA_FX_LFIFO_DED_COUNT_MASK 0x0000C000L +#define TA_EDC_CNT__TA_FS_CFIFO_SEC_COUNT_MASK 0x00030000L +#define TA_EDC_CNT__TA_FS_CFIFO_DED_COUNT_MASK 0x000C0000L + +// addressBlock: gc_tcdec +//TCP_EDC_CNT +#define TCP_EDC_CNT__SEC_COUNT__SHIFT 0x0 +#define TCP_EDC_CNT__LFIFO_SED_COUNT__SHIFT 0x8 +#define TCP_EDC_CNT__DED_COUNT__SHIFT 0x10 +#define TCP_EDC_CNT__SEC_COUNT_MASK 0x000000FFL +#define TCP_EDC_CNT__LFIFO_SED_COUNT_MASK 0x0000FF00L +#define TCP_EDC_CNT__DED_COUNT_MASK 0x00FF0000L +//TCP_EDC_CNT_NEW +#define TCP_EDC_CNT_NEW__CACHE_RAM_SEC_COUNT__SHIFT 0x0 +#define TCP_EDC_CNT_NEW__CACHE_RAM_DED_COUNT__SHIFT 0x2 +#define TCP_EDC_CNT_NEW__LFIFO_RAM_SEC_COUNT__SHIFT 0x4 +#define TCP_EDC_CNT_NEW__LFIFO_RAM_DED_COUNT__SHIFT 0x6 +#define TCP_EDC_CNT_NEW__CMD_FIFO_SEC_COUNT__SHIFT 0x8 +#define TCP_EDC_CNT_NEW__CMD_FIFO_DED_COUNT__SHIFT 0xa +#define TCP_EDC_CNT_NEW__VM_FIFO_SEC_COUNT__SHIFT 0xc +#define TCP_EDC_CNT_NEW__VM_FIFO_DED_COUNT__SHIFT 0xe +#define TCP_EDC_CNT_NEW__DB_RAM_SED_COUNT__SHIFT 0x10 +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_SEC_COUNT__SHIFT 0x12 +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_DED_COUNT__SHIFT 0x14 +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_SEC_COUNT__SHIFT 0x16 +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_DED_COUNT__SHIFT 0x18 +#define TCP_EDC_CNT_NEW__CACHE_RAM_SEC_COUNT_MASK 0x00000003L +#define TCP_EDC_CNT_NEW__CACHE_RAM_DED_COUNT_MASK 0x0000000CL +#define TCP_EDC_CNT_NEW__LFIFO_RAM_SEC_COUNT_MASK 0x00000030L +#define TCP_EDC_CNT_NEW__LFIFO_RAM_DED_COUNT_MASK 0x000000C0L +#define TCP_EDC_CNT_NEW__CMD_FIFO_SEC_COUNT_MASK 0x00000300L +#define TCP_EDC_CNT_NEW__CMD_FIFO_DED_COUNT_MASK 0x00000C00L +#define TCP_EDC_CNT_NEW__VM_FIFO_SEC_COUNT_MASK 0x00003000L +#define TCP_EDC_CNT_NEW__VM_FIFO_DED_COUNT_MASK 0x0000C000L +#define TCP_EDC_CNT_NEW__DB_RAM_SED_COUNT_MASK 0x00030000L +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_SEC_COUNT_MASK 0x000C0000L +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_DED_COUNT_MASK 0x00300000L +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_SEC_COUNT_MASK 0x00C00000L +#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_DED_COUNT_MASK 0x03000000L +//TCP_ATC_EDC_GATCL1_CNT +#define TCP_ATC_EDC_GATCL1_CNT__DATA_SEC__SHIFT 0x0 +#define TCP_ATC_EDC_GATCL1_CNT__DATA_SEC_MASK 0x000000FFL +//TCI_EDC_CNT +#define TCI_EDC_CNT__WRITE_RAM_SEC_COUNT__SHIFT 0x0 +#define TCI_EDC_CNT__WRITE_RAM_DED_COUNT__SHIFT 0x2 +#define TCI_EDC_CNT__WRITE_RAM_SEC_COUNT_MASK 0x00000003L +#define TCI_EDC_CNT__WRITE_RAM_DED_COUNT_MASK 0x0000000CL +//TCA_EDC_CNT +#define TCA_EDC_CNT__HOLE_FIFO_SEC_COUNT__SHIFT 0x0 +#define TCA_EDC_CNT__HOLE_FIFO_DED_COUNT__SHIFT 0x2 +#define TCA_EDC_CNT__REQ_FIFO_SEC_COUNT__SHIFT 0x4 +#define TCA_EDC_CNT__REQ_FIFO_DED_COUNT__SHIFT 0x6 +#define TCA_EDC_CNT__HOLE_FIFO_SEC_COUNT_MASK 0x00000003L +#define TCA_EDC_CNT__HOLE_FIFO_DED_COUNT_MASK 0x0000000CL +#define TCA_EDC_CNT__REQ_FIFO_SEC_COUNT_MASK 0x00000030L +#define TCA_EDC_CNT__REQ_FIFO_DED_COUNT_MASK 0x000000C0L +//TCC_EDC_CNT +#define TCC_EDC_CNT__CACHE_DATA_SEC_COUNT__SHIFT 0x0 +#define TCC_EDC_CNT__CACHE_DATA_DED_COUNT__SHIFT 0x2 +#define TCC_EDC_CNT__CACHE_DIRTY_SEC_COUNT__SHIFT 0x4 +#define TCC_EDC_CNT__CACHE_DIRTY_DED_COUNT__SHIFT 0x6 +#define TCC_EDC_CNT__HIGH_RATE_TAG_SEC_COUNT__SHIFT 0x8 +#define TCC_EDC_CNT__HIGH_RATE_TAG_DED_COUNT__SHIFT 0xa +#define TCC_EDC_CNT__LOW_RATE_TAG_SEC_COUNT__SHIFT 0xc +#define TCC_EDC_CNT__LOW_RATE_TAG_DED_COUNT__SHIFT 0xe +#define TCC_EDC_CNT__SRC_FIFO_SEC_COUNT__SHIFT 0x10 +#define TCC_EDC_CNT__SRC_FIFO_DED_COUNT__SHIFT 0x12 +#define TCC_EDC_CNT__LATENCY_FIFO_SEC_COUNT__SHIFT 0x14 +#define TCC_EDC_CNT__LATENCY_FIFO_DED_COUNT__SHIFT 0x16 +#define TCC_EDC_CNT__LATENCY_FIFO_NEXT_RAM_SEC_COUNT__SHIFT 0x18 +#define TCC_EDC_CNT__LATENCY_FIFO_NEXT_RAM_DED_COUNT__SHIFT 0x1a +#define TCC_EDC_CNT__CACHE_DATA_SEC_COUNT_MASK 0x00000003L +#define TCC_EDC_CNT__CACHE_DATA_DED_COUNT_MASK 0x0000000CL +#define TCC_EDC_CNT__CACHE_DIRTY_SEC_COUNT_MASK 0x00000030L +#define TCC_EDC_CNT__CACHE_DIRTY_DED_COUNT_MASK 0x000000C0L +#define TCC_EDC_CNT__HIGH_RATE_TAG_SEC_COUNT_MASK 0x00000300L +#define TCC_EDC_CNT__HIGH_RATE_TAG_DED_COUNT_MASK 0x00000C00L +#define TCC_EDC_CNT__LOW_RATE_TAG_SEC_COUNT_MASK 0x00003000L +#define TCC_EDC_CNT__LOW_RATE_TAG_DED_COUNT_MASK 0x0000C000L +#define TCC_EDC_CNT__SRC_FIFO_SEC_COUNT_MASK 0x00030000L +#define TCC_EDC_CNT__SRC_FIFO_DED_COUNT_MASK 0x000C0000L +#define TCC_EDC_CNT__LATENCY_FIFO_SEC_COUNT_MASK 0x00300000L +#define TCC_EDC_CNT__LATENCY_FIFO_DED_COUNT_MASK 0x00C00000L +#define TCC_EDC_CNT__LATENCY_FIFO_NEXT_RAM_SEC_COUNT_MASK 0x03000000L +#define TCC_EDC_CNT__LATENCY_FIFO_NEXT_RAM_DED_COUNT_MASK 0x0C000000L +//TCC_EDC_CNT2 +#define TCC_EDC_CNT2__CACHE_TAG_PROBE_FIFO_SEC_COUNT__SHIFT 0x0 +#define TCC_EDC_CNT2__CACHE_TAG_PROBE_FIFO_DED_COUNT__SHIFT 0x2 +#define TCC_EDC_CNT2__UC_ATOMIC_FIFO_SEC_COUNT__SHIFT 0x4 +#define TCC_EDC_CNT2__UC_ATOMIC_FIFO_DED_COUNT__SHIFT 0x6 +#define TCC_EDC_CNT2__WRITE_CACHE_READ_SEC_COUNT__SHIFT 0x8 +#define TCC_EDC_CNT2__WRITE_CACHE_READ_DED_COUNT__SHIFT 0xa +#define TCC_EDC_CNT2__RETURN_CONTROL_SEC_COUNT__SHIFT 0xc +#define TCC_EDC_CNT2__RETURN_CONTROL_DED_COUNT__SHIFT 0xe +#define TCC_EDC_CNT2__IN_USE_TRANSFER_SEC_COUNT__SHIFT 0x10 +#define TCC_EDC_CNT2__IN_USE_TRANSFER_DED_COUNT__SHIFT 0x12 +#define TCC_EDC_CNT2__IN_USE_DEC_SEC_COUNT__SHIFT 0x14 +#define TCC_EDC_CNT2__IN_USE_DEC_DED_COUNT__SHIFT 0x16 +#define TCC_EDC_CNT2__WRITE_RETURN_SEC_COUNT__SHIFT 0x18 +#define TCC_EDC_CNT2__WRITE_RETURN_DED_COUNT__SHIFT 0x1a +#define TCC_EDC_CNT2__RETURN_DATA_SEC_COUNT__SHIFT 0x1c +#define TCC_EDC_CNT2__RETURN_DATA_DED_COUNT__SHIFT 0x1e +#define TCC_EDC_CNT2__CACHE_TAG_PROBE_FIFO_SEC_COUNT_MASK 0x00000003L +#define TCC_EDC_CNT2__CACHE_TAG_PROBE_FIFO_DED_COUNT_MASK 0x0000000CL +#define TCC_EDC_CNT2__UC_ATOMIC_FIFO_SEC_COUNT_MASK 0x00000030L +#define TCC_EDC_CNT2__UC_ATOMIC_FIFO_DED_COUNT_MASK 0x000000C0L +#define TCC_EDC_CNT2__WRITE_CACHE_READ_SEC_COUNT_MASK 0x00000300L +#define TCC_EDC_CNT2__WRITE_CACHE_READ_DED_COUNT_MASK 0x00000C00L +#define TCC_EDC_CNT2__RETURN_CONTROL_SEC_COUNT_MASK 0x00003000L +#define TCC_EDC_CNT2__RETURN_CONTROL_DED_COUNT_MASK 0x0000C000L +#define TCC_EDC_CNT2__IN_USE_TRANSFER_SEC_COUNT_MASK 0x00030000L +#define TCC_EDC_CNT2__IN_USE_TRANSFER_DED_COUNT_MASK 0x000C0000L +#define TCC_EDC_CNT2__IN_USE_DEC_SEC_COUNT_MASK 0x00300000L +#define TCC_EDC_CNT2__IN_USE_DEC_DED_COUNT_MASK 0x00C00000L +#define TCC_EDC_CNT2__WRITE_RETURN_SEC_COUNT_MASK 0x03000000L +#define TCC_EDC_CNT2__WRITE_RETURN_DED_COUNT_MASK 0x0C000000L +#define TCC_EDC_CNT2__RETURN_DATA_SEC_COUNT_MASK 0x30000000L +#define TCC_EDC_CNT2__RETURN_DATA_DED_COUNT_MASK 0xC0000000L + +// addressBlock: gc_tpdec +//TD_EDC_CNT +#define TD_EDC_CNT__SS_FIFO_LO_SEC_COUNT__SHIFT 0x0 +#define TD_EDC_CNT__SS_FIFO_LO_DED_COUNT__SHIFT 0x2 +#define TD_EDC_CNT__SS_FIFO_HI_SEC_COUNT__SHIFT 0x4 +#define TD_EDC_CNT__SS_FIFO_HI_DED_COUNT__SHIFT 0x6 +#define TD_EDC_CNT__CS_FIFO_SEC_COUNT__SHIFT 0x8 +#define TD_EDC_CNT__CS_FIFO_DED_COUNT__SHIFT 0xa +#define TD_EDC_CNT__SS_FIFO_LO_SEC_COUNT_MASK 0x00000003L +#define TD_EDC_CNT__SS_FIFO_LO_DED_COUNT_MASK 0x0000000CL +#define TD_EDC_CNT__SS_FIFO_HI_SEC_COUNT_MASK 0x00000030L +#define TD_EDC_CNT__SS_FIFO_HI_DED_COUNT_MASK 0x000000C0L +#define TD_EDC_CNT__CS_FIFO_SEC_COUNT_MASK 0x00000300L +#define TD_EDC_CNT__CS_FIFO_DED_COUNT_MASK 0x00000C00L +//TA_EDC_CNT +#define TA_EDC_CNT__TA_FS_DFIFO_SEC_COUNT__SHIFT 0x0 +#define TA_EDC_CNT__TA_FS_DFIFO_DED_COUNT__SHIFT 0x2 +#define TA_EDC_CNT__TA_FS_AFIFO_SEC_COUNT__SHIFT 0x4 +#define TA_EDC_CNT__TA_FS_AFIFO_DED_COUNT__SHIFT 0x6 +#define TA_EDC_CNT__TA_FL_LFIFO_SEC_COUNT__SHIFT 0x8 +#define TA_EDC_CNT__TA_FL_LFIFO_DED_COUNT__SHIFT 0xa +#define TA_EDC_CNT__TA_FX_LFIFO_SEC_COUNT__SHIFT 0xc +#define TA_EDC_CNT__TA_FX_LFIFO_DED_COUNT__SHIFT 0xe +#define TA_EDC_CNT__TA_FS_CFIFO_SEC_COUNT__SHIFT 0x10 +#define TA_EDC_CNT__TA_FS_CFIFO_DED_COUNT__SHIFT 0x12 +#define TA_EDC_CNT__TA_FS_DFIFO_SEC_COUNT_MASK 0x00000003L +#define TA_EDC_CNT__TA_FS_DFIFO_DED_COUNT_MASK 0x0000000CL +#define TA_EDC_CNT__TA_FS_AFIFO_SEC_COUNT_MASK 0x00000030L +#define TA_EDC_CNT__TA_FS_AFIFO_DED_COUNT_MASK 0x000000C0L +#define TA_EDC_CNT__TA_FL_LFIFO_SEC_COUNT_MASK 0x00000300L +#define TA_EDC_CNT__TA_FL_LFIFO_DED_COUNT_MASK 0x00000C00L +#define TA_EDC_CNT__TA_FX_LFIFO_SEC_COUNT_MASK 0x00003000L +#define TA_EDC_CNT__TA_FX_LFIFO_DED_COUNT_MASK 0x0000C000L +#define TA_EDC_CNT__TA_FS_CFIFO_SEC_COUNT_MASK 0x00030000L +#define TA_EDC_CNT__TA_FS_CFIFO_DED_COUNT_MASK 0x000C0000L + +// addressBlock: gc_ea_gceadec2 +//GCEA_EDC_CNT +#define GCEA_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT__SHIFT 0x0 +#define GCEA_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT__SHIFT 0x2 +#define GCEA_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT__SHIFT 0x4 +#define GCEA_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT__SHIFT 0x6 +#define GCEA_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT__SHIFT 0x8 +#define GCEA_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT__SHIFT 0xa +#define GCEA_EDC_CNT__RRET_TAGMEM_SEC_COUNT__SHIFT 0xc +#define GCEA_EDC_CNT__RRET_TAGMEM_DED_COUNT__SHIFT 0xe +#define GCEA_EDC_CNT__WRET_TAGMEM_SEC_COUNT__SHIFT 0x10 +#define GCEA_EDC_CNT__WRET_TAGMEM_DED_COUNT__SHIFT 0x12 +#define GCEA_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT__SHIFT 0x14 +#define GCEA_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT__SHIFT 0x16 +#define GCEA_EDC_CNT__IORD_CMDMEM_SED_COUNT__SHIFT 0x18 +#define GCEA_EDC_CNT__IOWR_CMDMEM_SED_COUNT__SHIFT 0x1a +#define GCEA_EDC_CNT__IOWR_DATAMEM_SED_COUNT__SHIFT 0x1c +#define GCEA_EDC_CNT__MAM_AFMEM_SEC_COUNT__SHIFT 0x1e +#define GCEA_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT_MASK 0x00000003L +#define GCEA_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT_MASK 0x0000000CL +#define GCEA_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT_MASK 0x00000030L +#define GCEA_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT_MASK 0x000000C0L +#define GCEA_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT_MASK 0x00000300L +#define GCEA_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT_MASK 0x00000C00L +#define GCEA_EDC_CNT__RRET_TAGMEM_SEC_COUNT_MASK 0x00003000L +#define GCEA_EDC_CNT__RRET_TAGMEM_DED_COUNT_MASK 0x0000C000L +#define GCEA_EDC_CNT__WRET_TAGMEM_SEC_COUNT_MASK 0x00030000L +#define GCEA_EDC_CNT__WRET_TAGMEM_DED_COUNT_MASK 0x000C0000L +#define GCEA_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT_MASK 0x00300000L +#define GCEA_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT_MASK 0x00C00000L +#define GCEA_EDC_CNT__IORD_CMDMEM_SED_COUNT_MASK 0x03000000L +#define GCEA_EDC_CNT__IOWR_CMDMEM_SED_COUNT_MASK 0x0C000000L +#define GCEA_EDC_CNT__IOWR_DATAMEM_SED_COUNT_MASK 0x30000000L +#define GCEA_EDC_CNT__MAM_AFMEM_SEC_COUNT_MASK 0xC0000000L +//GCEA_EDC_CNT2 +#define GCEA_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT__SHIFT 0x0 +#define GCEA_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT__SHIFT 0x2 +#define GCEA_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT__SHIFT 0x4 +#define GCEA_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT__SHIFT 0x6 +#define GCEA_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT__SHIFT 0x8 +#define GCEA_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa +#define GCEA_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc +#define GCEA_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe +#define GCEA_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10 +#define GCEA_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12 +#define GCEA_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14 +#define GCEA_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16 +#define GCEA_EDC_CNT2__MAM_D0MEM_DED_COUNT__SHIFT 0x18 +#define GCEA_EDC_CNT2__MAM_D1MEM_DED_COUNT__SHIFT 0x1a +#define GCEA_EDC_CNT2__MAM_D2MEM_DED_COUNT__SHIFT 0x1c +#define GCEA_EDC_CNT2__MAM_D3MEM_DED_COUNT__SHIFT 0x1e +#define GCEA_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L +#define GCEA_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL +#define GCEA_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L +#define GCEA_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT_MASK 0x000000C0L +#define GCEA_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT_MASK 0x00000300L +#define GCEA_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L +#define GCEA_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L +#define GCEA_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L +#define GCEA_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L +#define GCEA_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L +#define GCEA_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L +#define GCEA_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L +#define GCEA_EDC_CNT2__MAM_D0MEM_DED_COUNT_MASK 0x03000000L +#define GCEA_EDC_CNT2__MAM_D1MEM_DED_COUNT_MASK 0x0C000000L +#define GCEA_EDC_CNT2__MAM_D2MEM_DED_COUNT_MASK 0x30000000L +#define GCEA_EDC_CNT2__MAM_D3MEM_DED_COUNT_MASK 0xC0000000L +//GCEA_EDC_CNT3 +#define GCEA_EDC_CNT3__DRAMRD_PAGEMEM_DED_COUNT__SHIFT 0x0 +#define GCEA_EDC_CNT3__DRAMWR_PAGEMEM_DED_COUNT__SHIFT 0x2 +#define GCEA_EDC_CNT3__IORD_CMDMEM_DED_COUNT__SHIFT 0x4 +#define GCEA_EDC_CNT3__IOWR_CMDMEM_DED_COUNT__SHIFT 0x6 +#define GCEA_EDC_CNT3__IOWR_DATAMEM_DED_COUNT__SHIFT 0x8 +#define GCEA_EDC_CNT3__GMIRD_PAGEMEM_DED_COUNT__SHIFT 0xa +#define GCEA_EDC_CNT3__GMIWR_PAGEMEM_DED_COUNT__SHIFT 0xc +#define GCEA_EDC_CNT3__MAM_AFMEM_DED_COUNT__SHIFT 0xe +#define GCEA_EDC_CNT3__MAM_A0MEM_SEC_COUNT__SHIFT 0x10 +#define GCEA_EDC_CNT3__MAM_A0MEM_DED_COUNT__SHIFT 0x12 +#define GCEA_EDC_CNT3__MAM_A1MEM_SEC_COUNT__SHIFT 0x14 +#define GCEA_EDC_CNT3__MAM_A1MEM_DED_COUNT__SHIFT 0x16 +#define GCEA_EDC_CNT3__MAM_A2MEM_SEC_COUNT__SHIFT 0x18 +#define GCEA_EDC_CNT3__MAM_A2MEM_DED_COUNT__SHIFT 0x1a +#define GCEA_EDC_CNT3__MAM_A3MEM_SEC_COUNT__SHIFT 0x1c +#define GCEA_EDC_CNT3__MAM_A3MEM_DED_COUNT__SHIFT 0x1e +#define GCEA_EDC_CNT3__DRAMRD_PAGEMEM_DED_COUNT_MASK 0x00000003L +#define GCEA_EDC_CNT3__DRAMWR_PAGEMEM_DED_COUNT_MASK 0x0000000CL +#define GCEA_EDC_CNT3__IORD_CMDMEM_DED_COUNT_MASK 0x00000030L +#define GCEA_EDC_CNT3__IOWR_CMDMEM_DED_COUNT_MASK 0x000000C0L +#define GCEA_EDC_CNT3__IOWR_DATAMEM_DED_COUNT_MASK 0x00000300L +#define GCEA_EDC_CNT3__GMIRD_PAGEMEM_DED_COUNT_MASK 0x00000C00L +#define GCEA_EDC_CNT3__GMIWR_PAGEMEM_DED_COUNT_MASK 0x00003000L +#define GCEA_EDC_CNT3__MAM_AFMEM_DED_COUNT_MASK 0x0000C000L +#define GCEA_EDC_CNT3__MAM_A0MEM_SEC_COUNT_MASK 0x00030000L +#define GCEA_EDC_CNT3__MAM_A0MEM_DED_COUNT_MASK 0x000C0000L +#define GCEA_EDC_CNT3__MAM_A1MEM_SEC_COUNT_MASK 0x00300000L +#define GCEA_EDC_CNT3__MAM_A1MEM_DED_COUNT_MASK 0x00C00000L +#define GCEA_EDC_CNT3__MAM_A2MEM_SEC_COUNT_MASK 0x03000000L +#define GCEA_EDC_CNT3__MAM_A2MEM_DED_COUNT_MASK 0x0C000000L +#define GCEA_EDC_CNT3__MAM_A3MEM_SEC_COUNT_MASK 0x30000000L +#define GCEA_EDC_CNT3__MAM_A3MEM_DED_COUNT_MASK 0xC0000000L + +// addressBlock: gc_gfxudec +//GRBM_GFX_INDEX +#define GRBM_GFX_INDEX__INSTANCE_INDEX__SHIFT 0x0 +#define GRBM_GFX_INDEX__SH_INDEX__SHIFT 0x8 +#define GRBM_GFX_INDEX__SE_INDEX__SHIFT 0x10 +#define GRBM_GFX_INDEX__SH_BROADCAST_WRITES__SHIFT 0x1d +#define GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT 0x1e +#define GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT 0x1f +#define GRBM_GFX_INDEX__INSTANCE_INDEX_MASK 0x000000FFL +#define GRBM_GFX_INDEX__SH_INDEX_MASK 0x0000FF00L +#define GRBM_GFX_INDEX__SE_INDEX_MASK 0x00FF0000L +#define GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK 0x20000000L +#define GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK 0x40000000L +#define GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK 0x80000000L + +// addressBlock: gc_utcl2_atcl2dec +//ATC_L2_CNTL +//ATC_L2_CACHE_4K_DSM_INDEX +#define ATC_L2_CACHE_4K_DSM_INDEX__INDEX__SHIFT 0x0 +#define ATC_L2_CACHE_4K_DSM_INDEX__INDEX_MASK 0x000000FFL +//ATC_L2_CACHE_2M_DSM_INDEX +#define ATC_L2_CACHE_2M_DSM_INDEX__INDEX__SHIFT 0x0 +#define ATC_L2_CACHE_2M_DSM_INDEX__INDEX_MASK 0x000000FFL +//ATC_L2_CACHE_4K_DSM_CNTL +#define ATC_L2_CACHE_4K_DSM_CNTL__SEC_COUNT__SHIFT 0xd +#define ATC_L2_CACHE_4K_DSM_CNTL__DED_COUNT__SHIFT 0xf +#define ATC_L2_CACHE_4K_DSM_CNTL__SEC_COUNT_MASK 0x00006000L +#define ATC_L2_CACHE_4K_DSM_CNTL__DED_COUNT_MASK 0x00018000L +//ATC_L2_CACHE_2M_DSM_CNTL +#define ATC_L2_CACHE_2M_DSM_CNTL__SEC_COUNT__SHIFT 0xd +#define ATC_L2_CACHE_2M_DSM_CNTL__DED_COUNT__SHIFT 0xf +#define ATC_L2_CACHE_2M_DSM_CNTL__SEC_COUNT_MASK 0x00006000L +#define ATC_L2_CACHE_2M_DSM_CNTL__DED_COUNT_MASK 0x00018000L + +// addressBlock: gc_utcl2_vml2pfdec +//VML2_MEM_ECC_INDEX +#define VML2_MEM_ECC_INDEX__INDEX__SHIFT 0x0 +#define VML2_MEM_ECC_INDEX__INDEX_MASK 0x000000FFL +//VML2_WALKER_MEM_ECC_INDEX +#define VML2_WALKER_MEM_ECC_INDEX__INDEX__SHIFT 0x0 +#define VML2_WALKER_MEM_ECC_INDEX__INDEX_MASK 0x000000FFL +//UTCL2_MEM_ECC_INDEX +#define UTCL2_MEM_ECC_INDEX__INDEX__SHIFT 0x0 +#define UTCL2_MEM_ECC_INDEX__INDEX_MASK 0x000000FFL +//VML2_MEM_ECC_CNTL +#define VML2_MEM_ECC_CNTL__SEC_COUNT__SHIFT 0xc +#define VML2_MEM_ECC_CNTL__DED_COUNT__SHIFT 0xe +#define VML2_MEM_ECC_CNTL__SEC_COUNT_MASK 0x00003000L +#define VML2_MEM_ECC_CNTL__DED_COUNT_MASK 0x0000C000L +//VML2_WALKER_MEM_ECC_CNTL +#define VML2_WALKER_MEM_ECC_CNTL__SEC_COUNT__SHIFT 0xc +#define VML2_WALKER_MEM_ECC_CNTL__DED_COUNT__SHIFT 0xe +#define VML2_WALKER_MEM_ECC_CNTL__SEC_COUNT_MASK 0x00003000L +#define VML2_WALKER_MEM_ECC_CNTL__DED_COUNT_MASK 0x0000C000L +//UTCL2_MEM_ECC_CNTL +#define UTCL2_MEM_ECC_CNTL__SEC_COUNT__SHIFT 0xc +#define UTCL2_MEM_ECC_CNTL__DED_COUNT__SHIFT 0xe +#define UTCL2_MEM_ECC_CNTL__SEC_COUNT_MASK 0x00003000L +#define UTCL2_MEM_ECC_CNTL__DED_COUNT_MASK 0x0000C000L + +// addressBlock: gc_rlcpdec +//RLC_EDC_CNT +#define RLC_EDC_CNT__RLCG_INSTR_RAM_SEC_COUNT__SHIFT 0x0 +#define RLC_EDC_CNT__RLCG_INSTR_RAM_DED_COUNT__SHIFT 0x2 +#define RLC_EDC_CNT__RLCG_SCRATCH_RAM_SEC_COUNT__SHIFT 0x4 +#define RLC_EDC_CNT__RLCG_SCRATCH_RAM_DED_COUNT__SHIFT 0x6 +#define RLC_EDC_CNT__RLCV_INSTR_RAM_SEC_COUNT__SHIFT 0x8 +#define RLC_EDC_CNT__RLCV_INSTR_RAM_DED_COUNT__SHIFT 0xa +#define RLC_EDC_CNT__RLCV_SCRATCH_RAM_SEC_COUNT__SHIFT 0xc +#define RLC_EDC_CNT__RLCV_SCRATCH_RAM_DED_COUNT__SHIFT 0xe +#define RLC_EDC_CNT__RLC_TCTAG_RAM_SEC_COUNT__SHIFT 0x10 +#define RLC_EDC_CNT__RLC_TCTAG_RAM_DED_COUNT__SHIFT 0x12 +#define RLC_EDC_CNT__RLC_SPM_SCRATCH_RAM_SEC_COUNT__SHIFT 0x14 +#define RLC_EDC_CNT__RLC_SPM_SCRATCH_RAM_DED_COUNT__SHIFT 0x16 +#define RLC_EDC_CNT__RLC_SRM_DATA_RAM_SEC_COUNT__SHIFT 0x18 +#define RLC_EDC_CNT__RLC_SRM_DATA_RAM_DED_COUNT__SHIFT 0x1a +#define RLC_EDC_CNT__RLC_SRM_ADDR_RAM_SEC_COUNT__SHIFT 0x1c +#define RLC_EDC_CNT__RLC_SRM_ADDR_RAM_DED_COUNT__SHIFT 0x1e +#define RLC_EDC_CNT__RLCG_INSTR_RAM_SEC_COUNT_MASK 0x00000003L +#define RLC_EDC_CNT__RLCG_INSTR_RAM_DED_COUNT_MASK 0x0000000CL +#define RLC_EDC_CNT__RLCG_SCRATCH_RAM_SEC_COUNT_MASK 0x00000030L +#define RLC_EDC_CNT__RLCG_SCRATCH_RAM_DED_COUNT_MASK 0x000000C0L +#define RLC_EDC_CNT__RLCV_INSTR_RAM_SEC_COUNT_MASK 0x00000300L +#define RLC_EDC_CNT__RLCV_INSTR_RAM_DED_COUNT_MASK 0x00000C00L +#define RLC_EDC_CNT__RLCV_SCRATCH_RAM_SEC_COUNT_MASK 0x00003000L +#define RLC_EDC_CNT__RLCV_SCRATCH_RAM_DED_COUNT_MASK 0x0000C000L +#define RLC_EDC_CNT__RLC_TCTAG_RAM_SEC_COUNT_MASK 0x00030000L +#define RLC_EDC_CNT__RLC_TCTAG_RAM_DED_COUNT_MASK 0x000C0000L +#define RLC_EDC_CNT__RLC_SPM_SCRATCH_RAM_SEC_COUNT_MASK 0x00300000L +#define RLC_EDC_CNT__RLC_SPM_SCRATCH_RAM_DED_COUNT_MASK 0x00C00000L +#define RLC_EDC_CNT__RLC_SRM_DATA_RAM_SEC_COUNT_MASK 0x03000000L +#define RLC_EDC_CNT__RLC_SRM_DATA_RAM_DED_COUNT_MASK 0x0C000000L +#define RLC_EDC_CNT__RLC_SRM_ADDR_RAM_SEC_COUNT_MASK 0x30000000L +#define RLC_EDC_CNT__RLC_SRM_ADDR_RAM_DED_COUNT_MASK 0xC0000000L +//RLC_EDC_CNT2 +#define RLC_EDC_CNT2__RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT__SHIFT 0x0 +#define RLC_EDC_CNT2__RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT__SHIFT 0x2 +#define RLC_EDC_CNT2__RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT__SHIFT 0x4 +#define RLC_EDC_CNT2__RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT__SHIFT 0x6 +#define RLC_EDC_CNT2__RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT__SHIFT 0x8 +#define RLC_EDC_CNT2__RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT__SHIFT 0xa +#define RLC_EDC_CNT2__RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT__SHIFT 0xc +#define RLC_EDC_CNT2__RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT__SHIFT 0xe +#define RLC_EDC_CNT2__RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT__SHIFT 0x10 +#define RLC_EDC_CNT2__RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT__SHIFT 0x12 +#define RLC_EDC_CNT2__RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT__SHIFT 0x14 +#define RLC_EDC_CNT2__RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT__SHIFT 0x16 +#define RLC_EDC_CNT2__RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT__SHIFT 0x18 +#define RLC_EDC_CNT2__RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT__SHIFT 0x1a +#define RLC_EDC_CNT2__RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT__SHIFT 0x1c +#define RLC_EDC_CNT2__RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT__SHIFT 0x1e +#define RLC_EDC_CNT2__RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT_MASK 0x00000003L +#define RLC_EDC_CNT2__RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT_MASK 0x0000000CL +#define RLC_EDC_CNT2__RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT_MASK 0x00000030L +#define RLC_EDC_CNT2__RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT_MASK 0x000000C0L +#define RLC_EDC_CNT2__RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT_MASK 0x00000300L +#define RLC_EDC_CNT2__RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT_MASK 0x00000C00L +#define RLC_EDC_CNT2__RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT_MASK 0x00003000L +#define RLC_EDC_CNT2__RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT_MASK 0x0000C000L +#define RLC_EDC_CNT2__RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT_MASK 0x00030000L +#define RLC_EDC_CNT2__RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT_MASK 0x000C0000L +#define RLC_EDC_CNT2__RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT_MASK 0x00300000L +#define RLC_EDC_CNT2__RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT_MASK 0x00C00000L +#define RLC_EDC_CNT2__RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT_MASK 0x03000000L +#define RLC_EDC_CNT2__RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT_MASK 0x0C000000L +#define RLC_EDC_CNT2__RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT_MASK 0x30000000L +#define RLC_EDC_CNT2__RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT_MASK 0xC0000000L + +#endif \ No newline at end of file From 4c461d89db4f8f40509b044b0daf3ac6edf4fbd7 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Thu, 16 Jan 2020 13:40:52 +0800 Subject: [PATCH 220/658] drm/amdgpu: add RAS support for the gfx block of Arcturus Implement functions to do the RAS error injection and query EDC counter. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 26 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 978 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h | 35 + 4 files changed, 1039 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 7ae3b22c5628..c2bbcdd9c875 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -120,6 +120,7 @@ amdgpu-y += \ amdgpu_rlc.o \ gfx_v8_0.o \ gfx_v9_0.o \ + gfx_v9_4.o \ gfx_v10_0.o # add async DMA block diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 583800ab5fe5..f8b1fd6ae6f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,6 +48,8 @@ #include "amdgpu_ras.h" +#include "gfx_v9_4.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -1950,6 +1952,17 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .query_ras_error_count = &gfx_v9_0_query_ras_error_count }; +static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_0_select_se_sh, + .read_wave_data = &gfx_v9_0_read_wave_data, + .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_4_ras_error_inject, + .query_ras_error_count = &gfx_v9_4_query_ras_error_count +}; + static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -2001,6 +2014,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_ARCTURUS: + adev->gfx.funcs = &gfx_v9_4_gfx_funcs; adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4265,7 +4279,17 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) goto fail; } - gfx_v9_0_clear_ras_edc_counter(adev); + switch (adev->asic_type) + { + case CHIP_VEGA20: + gfx_v9_0_clear_ras_edc_counter(adev); + break; + case CHIP_ARCTURUS: + gfx_v9_4_clear_ras_edc_counter(adev); + break; + default: + break; + } fail: amdgpu_ib_free(adev, &ib, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c new file mode 100644 index 000000000000..e19d275f3f7d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -0,0 +1,978 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_atomfirmware.h" +#include "amdgpu_pm.h" + +#include "gc/gc_9_4_1_offset.h" +#include "gc/gc_9_4_1_sh_mask.h" +#include "soc15_common.h" + +#include "gfx_v9_4.h" +#include "amdgpu_ras.h" + +static const struct soc15_reg_entry gfx_v9_4_edc_counter_regs[] = { + /* CPC */ + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1 }, + /* DC */ + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1 }, + /* CPF */ + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1 }, + /* GDS */ + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1 }, + /* SPI */ + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1 }, + /* SQ */ + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16 }, + /* SQC */ + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), 0, 4, 6 }, + /* TA */ + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16 }, + /* TCA */ + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2 }, + /* TCC */ + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16 }, + /* TCI */ + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72 }, + /* TCP */ + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16 }, + /* TD */ + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16 }, + /* GCEA */ + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32 }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32 }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 1, 32 }, + /* RLC */ + { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), 0, 1, 1 }, +}; + +static void gfx_v9_4_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); +} + +static const struct soc15_ras_field_entry gfx_v9_4_ras_fields[] = { + /* CPC */ + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) }, + { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) }, + { "CPC_DC_CSINVOC_RAM_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) }, + { "CPC_DC_RESTORE_RAM_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) }, + { "CPC_DC_CSINVOC_RAM1_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) }, + { "CPC_DC_RESTORE_RAM1_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) }, + + /* CPF */ + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) }, + { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) }, + + /* GDS */ + { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), + SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC), + SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) }, + { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, + { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, + + /* SPI */ + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) }, + { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) }, + { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) }, + { "SPI_WB_GRANT_61", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_DED_COUNT) }, + { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) }, + + /* SQ */ + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) }, + + /* SQC */ + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKA_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_HIT_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKA_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + DATA_BANKA_MISS_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKB_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_HIT_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKB_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + DATA_BANKB_MISS_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, + + /* TA */ + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_DED_COUNT) }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) }, + + /* TCA */ + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) }, + + /* TCC */ + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) }, + + /* TCI */ + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) }, + + /* TCP */ + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0, 0 }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, + + /* TD */ + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) }, + + /* EA */ + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, + { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, + { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, + { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0, 0 }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_DATAMEM_DED_COUNT) }, + { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) }, + { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) }, + { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) }, + { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) }, + { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) }, + { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) }, + { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) }, + { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) }, + { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) }, + { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) }, + { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, MAM_AFMEM_SEC_COUNT), 0, 0 }, + { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) }, + + /* RLC */ + { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) }, + { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) }, + { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) }, + { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) }, + { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) }, + { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) }, + { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) }, + { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) }, +}; + +static const char * const vml2_mems[] = { + "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_0_4K_MEM0", + "UTC_VML2_BANK_CACHE_0_4K_MEM1", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_1_4K_MEM0", + "UTC_VML2_BANK_CACHE_1_4K_MEM1", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_2_4K_MEM0", + "UTC_VML2_BANK_CACHE_2_4K_MEM1", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_3_4K_MEM0", + "UTC_VML2_BANK_CACHE_3_4K_MEM1", + "UTC_VML2_IFIFO_GROUP0", + "UTC_VML2_IFIFO_GROUP1", + "UTC_VML2_IFIFO_GROUP2", + "UTC_VML2_IFIFO_GROUP3", + "UTC_VML2_IFIFO_GROUP4", + "UTC_VML2_IFIFO_GROUP5", + "UTC_VML2_IFIFO_GROUP6", + "UTC_VML2_IFIFO_GROUP7", + "UTC_VML2_IFIFO_GROUP8", + "UTC_VML2_IFIFO_GROUP9", + "UTC_VML2_IFIFO_GROUP10", + "UTC_VML2_IFIFO_GROUP11", + "UTC_VML2_IFIFO_GROUP12", + "UTC_VML2_IFIFO_GROUP13", + "UTC_VML2_IFIFO_GROUP14", + "UTC_VML2_IFIFO_GROUP15", + "UTC_VML2_IFIFO_GROUP16", + "UTC_VML2_IFIFO_GROUP17", + "UTC_VML2_IFIFO_GROUP18", + "UTC_VML2_IFIFO_GROUP19", + "UTC_VML2_IFIFO_GROUP20", + "UTC_VML2_IFIFO_GROUP21", + "UTC_VML2_IFIFO_GROUP22", + "UTC_VML2_IFIFO_GROUP23", + "UTC_VML2_IFIFO_GROUP24", +}; + +static const char * const vml2_walker_mems[] = { + "UTC_VML2_CACHE_PDE0_MEM0", + "UTC_VML2_CACHE_PDE0_MEM1", + "UTC_VML2_CACHE_PDE1_MEM0", + "UTC_VML2_CACHE_PDE1_MEM1", + "UTC_VML2_CACHE_PDE2_MEM0", + "UTC_VML2_CACHE_PDE2_MEM1", + "UTC_VML2_RDIF_ARADDRS", + "UTC_VML2_RDIF_LOG_FIFO", + "UTC_VML2_QUEUE_REQ", + "UTC_VML2_QUEUE_RET", +}; + +static const char * const utcl2_router_mems[] = { + "UTCL2_ROUTER_GROUP0_VML2_REQ_FIFO0", + "UTCL2_ROUTER_GROUP1_VML2_REQ_FIFO1", + "UTCL2_ROUTER_GROUP2_VML2_REQ_FIFO2", + "UTCL2_ROUTER_GROUP3_VML2_REQ_FIFO3", + "UTCL2_ROUTER_GROUP4_VML2_REQ_FIFO4", + "UTCL2_ROUTER_GROUP5_VML2_REQ_FIFO5", + "UTCL2_ROUTER_GROUP6_VML2_REQ_FIFO6", + "UTCL2_ROUTER_GROUP7_VML2_REQ_FIFO7", + "UTCL2_ROUTER_GROUP8_VML2_REQ_FIFO8", + "UTCL2_ROUTER_GROUP9_VML2_REQ_FIFO9", + "UTCL2_ROUTER_GROUP10_VML2_REQ_FIFO10", + "UTCL2_ROUTER_GROUP11_VML2_REQ_FIFO11", + "UTCL2_ROUTER_GROUP12_VML2_REQ_FIFO12", + "UTCL2_ROUTER_GROUP13_VML2_REQ_FIFO13", + "UTCL2_ROUTER_GROUP14_VML2_REQ_FIFO14", + "UTCL2_ROUTER_GROUP15_VML2_REQ_FIFO15", + "UTCL2_ROUTER_GROUP16_VML2_REQ_FIFO16", + "UTCL2_ROUTER_GROUP17_VML2_REQ_FIFO17", + "UTCL2_ROUTER_GROUP18_VML2_REQ_FIFO18", + "UTCL2_ROUTER_GROUP19_VML2_REQ_FIFO19", + "UTCL2_ROUTER_GROUP20_VML2_REQ_FIFO20", + "UTCL2_ROUTER_GROUP21_VML2_REQ_FIFO21", + "UTCL2_ROUTER_GROUP22_VML2_REQ_FIFO22", + "UTCL2_ROUTER_GROUP23_VML2_REQ_FIFO23", + "UTCL2_ROUTER_GROUP24_VML2_REQ_FIFO24", +}; + +static const char * const atc_l2_cache_2m_mems[] = { + "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", +}; + +static const char * const atc_l2_cache_4k_mems[] = { + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", +}; + +static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev, + struct ras_err_data *err_data) +{ + uint32_t i, data; + uint32_t sec_count, ded_count; + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0); + + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_walker_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_walker_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) { + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + utcl2_router_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + utcl2_router_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL); + + sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_2m_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_2m_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL); + + sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_4k_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_4k_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + + return 0; +} + +static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t se_id, uint32_t inst_id, + uint32_t value, uint32_t *sec_count, + uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_ras_fields); i++) { + if (gfx_v9_4_ras_fields[i].reg_offset != reg->reg_offset || + gfx_v9_4_ras_fields[i].seg != reg->seg || + gfx_v9_4_ras_fields[i].inst != reg->inst) + continue; + + sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >> + gfx_v9_4_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", + gfx_v9_4_ras_fields[i].name, se_id, inst_id, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >> + gfx_v9_4_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", + gfx_v9_4_ras_fields[i].name, se_id, inst_id, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i, j, k; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance; + k++) { + gfx_v9_4_select_se_sh(adev, j, 0, k); + reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_edc_counter_regs[i])); + if (reg_value) + gfx_v9_4_ras_error_count( + &gfx_v9_4_edc_counter_regs[i], + j, k, reg_value, &sec_count, + &ded_count); + } + } + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; + + gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_4_query_utc_edc_status(adev, err_data); + + return 0; +} + +void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev) +{ + int i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance; + k++) { + gfx_v9_4_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_edc_counter_regs[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0); + + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) { + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL); + } + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); +} + +int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + block_info.block_id = info->head.block; + block_info.sub_block_index = info->head.sub_block_index; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h new file mode 100644 index 000000000000..2e3f6f755ad4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h @@ -0,0 +1,35 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_4_H__ +#define __GFX_V9_4_H__ + +void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev); + +int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + +int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); + +#endif /* __GFX_V9_4_H__ */ From a9d4fe2fd6529c3129ec6883b8649ea5c7eee4d3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 20 Jan 2020 13:54:30 +0100 Subject: [PATCH 221/658] drm/amdgpu: remove unnecessary conversion to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Better clean that up before some automation starts to complain about it Signed-off-by: Nirmoy Das Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/athub_v1_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/athub_v2_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 2 +- drivers/gpu/drm/amd/amdgpu/nv.c | 8 +++---- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/si_dma.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 28 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 +- 25 files changed, 50 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 82155ac3288a..12247a32f9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -527,7 +527,7 @@ static int acp_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = state == AMD_PG_STATE_GATE ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 64b1b2d2d19b..990f8e64a4d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -985,7 +985,7 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) { struct sysinfo si; - bool is_os_64 = (sizeof(void *) == 8) ? true : false; + bool is_os_64 = (sizeof(void *) == 8); uint64_t total_memory; uint64_t dram_size_seven_GB = 0x1B8000000; uint64_t dram_size_three_GB = 0xB8000000; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index d9cc746af5e6..847ca9b3ce4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -74,9 +74,9 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, case CHIP_VEGA20: case CHIP_RAVEN: athub_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); athub_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index ceb9aa4df0e7..921a69abda55 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -77,9 +77,9 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI14: case CHIP_NAVI12: athub_v2_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); athub_v2_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 65885ba92a5b..1785fdad6ecb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4229,7 +4229,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -4255,7 +4255,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: gfx_v10_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f8b1fd6ae6f4..6558d70fdc58 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4652,7 +4652,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); switch (adev->asic_type) { case CHIP_RAVEN: @@ -4714,7 +4714,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, case CHIP_ARCTURUS: case CHIP_RENOIR: gfx_v9_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index a78292d84854..ff2e6e1ccde7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -690,7 +690,7 @@ static int jpeg_v2_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { if (jpeg_v2_0_is_idle(handle)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 2c58939e6ad0..c6d046df4b70 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -469,7 +469,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index adfd8a6171eb..49a3a56ec017 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -523,9 +523,9 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, case CHIP_RAVEN: case CHIP_RENOIR: mmhub_v1_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v1_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index a7cb185d639a..bde189680521 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -427,9 +427,9 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI14: case CHIP_NAVI12: mmhub_v2_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index a32c9757711f..a5281df8d84f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -625,9 +625,9 @@ int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_ARCTURUS: mmhub_v9_4_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v9_4_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index f737ce459c28..cf557a428298 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -426,7 +426,7 @@ static int navi10_ih_set_clockgating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; navi10_ih_update_clockgating_state(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 2e0f8933410e..2d1bebdf1603 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -950,13 +950,13 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: adev->nbio.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); nv_update_hdp_mem_power_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); nv_update_hdp_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 27c7001be1ee..e55884d204bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2176,9 +2176,9 @@ static int sdma_v4_0_set_clockgating_state(void *handle, case CHIP_ARCTURUS: case CHIP_RENOIR: sdma_v4_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v4_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 4c6bf1f8a528..67b9830b7c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1525,9 +1525,9 @@ static int sdma_v5_0_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: sdma_v5_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v5_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 9aac9f9c50bb..42d5601b6bf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -648,7 +648,7 @@ static int si_dma_set_clockgating_state(void *handle, bool enable; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - enable = (state == AMD_CG_STATE_GATE) ? true : false; + enable = (state == AMD_CG_STATE_GATE); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { for (i = 0; i < adev->sdma.num_instances; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 317803f6a561..af41ee4c9639 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1467,38 +1467,38 @@ static int soc15_common_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: adev->nbio.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->df.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; case CHIP_RAVEN: case CHIP_RENOIR: adev->nbio.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; case CHIP_ARCTURUS: soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 01e62fb8e6e0..0fa8aae2d78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -763,7 +763,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 217084d56ab8..e0aadcaf6c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1421,7 +1421,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 475ae68f38f5..217db187207c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -739,7 +739,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 683701cf7270..3fd102efb7af 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -887,7 +887,7 @@ static int vce_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; if ((adev->asic_type == CHIP_POLARIS10) || diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index e654938f6cca..1a24fadd30e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1346,7 +1346,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index f4db8af6536b..b8dc136d2a01 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1213,7 +1213,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index c8b63d57a541..6970d3a1ae6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1663,7 +1663,7 @@ static int vcn_v2_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d9e331084ea0..407c6093c2ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -717,7 +717,7 @@ static int vega10_ih_set_clockgating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; vega10_ih_update_clockgating_state(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } From 240c811ccde49ad336b23c582018c6001690f19d Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Mon, 20 Jan 2020 15:08:35 +0800 Subject: [PATCH 222/658] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training(V2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [why] In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by UMC, that causes problems(like GTT corrupted and page fault observed). [how] Saving the content of this bottom VRAM to system memory before training, and restoring it after training to avoid VRAM corruption. Acked-by: Christian König Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 ++ drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 36 ++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3265487b859f..611021514c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -172,6 +172,8 @@ struct psp_dtm_context { #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 +/*Define the VRAM size that will be encroached by BIST training.*/ +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 685dd9754c67..ac173d2eb809 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -971,10 +971,13 @@ Err_out: */ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) { - int ret; - uint32_t p2c_header[4]; struct psp_memory_training_context *ctx = &psp->mem_train_ctx; uint32_t *pcache = (uint32_t*)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret; if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { DRM_DEBUG("Memory training is not supported.\n"); @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return 0; } - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", pcache[0], pcache[1], pcache[2], pcache[3], p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) DRM_DEBUG("Memory training ops:%x.\n", ops); if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long traing will encroach certain mount of bottom VRAM, + * saving the content of this bottom VRAM to system memory + * before training, and restoring it after training to avoid + * VRAM corruption. + */ + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + DRM_ERROR("failed to allocate system memory.\n"); + return -ENOMEM; + } + + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); if (ret) { DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); return ret; } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->nbio.funcs->hdp_flush(adev, NULL); + vfree(buf); } if (ops & PSP_MEM_TRAIN_SAVE) { From 7db1d560a4fab1e4668b1964e77f52cefce9e9c7 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Mon, 20 Jan 2020 17:20:10 +0800 Subject: [PATCH 223/658] Revert "drm/amdgpu: fix modprobe failure of the secondary GPU when GDDR6 training enabled(V5)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9e441478623fd913d4340654682b19f0c24e629d. The patch will be replaced with a better solution, revert it. Reviewed-by: Christian König Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 5 ----- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 27 +------------------------ 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 86267baca07c..d3c27a3c43f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -60,11 +60,6 @@ */ #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL -/* - * Default stolen memory size, 1024 * 768 * 4 - */ -#define AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE 0x300000ULL - struct firmware; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 86f4ffe408e7..0c5bf3bd640f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -641,12 +641,7 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - /* - * Can't free the stolen VGA memory when it might be used for memory - * training again. - */ - if (!adev->fw_vram_usage.mem_train_support) - amdgpu_bo_late_init(adev); + amdgpu_bo_late_init(adev); r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) @@ -830,19 +825,6 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); - /* - * In dual GPUs scenario, stolen_size is assigned to zero on the - * secondary GPU, since there is no pre-OS console using that memory. - * Then the bottom region of VRAM was allocated as GTT, unfortunately a - * small region of bottom VRAM was encroached by UMC firmware during - * GDDR6 BIST training, this cause page fault. - * The page fault can be fixed by forcing stolen_size to 3MB, then the - * bottom region of VRAM was allocated as stolen memory, GTT corruption - * avoid. - */ - adev->gmc.stolen_size = max(adev->gmc.stolen_size, - AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE); - /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -882,13 +864,6 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) static int gmc_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - void *stolen_vga_buf; - - /* - * Free the stolen memory if it wasn't already freed in late_init - * because of memory training. - */ - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_vm_manager_fini(adev); gmc_v10_0_gart_fini(adev); From 63e3ab9a8226c0271e823f32fc1c3de67d142c78 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 21 Jan 2020 13:29:20 +0100 Subject: [PATCH 224/658] drm/amdgpu: individualize fence allocation per entity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocate fences for each entity and remove ctx->fences reference as fences should be bound to amdgpu_ctx_entity instead amdgpu_ctx. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 46 +++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 - 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 64e2babbc36e..05c2af61e7de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -87,24 +87,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; - ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, - sizeof(struct dma_fence*), GFP_KERNEL); - if (!ctx->fences) - return -ENOMEM; ctx->entities[0] = kcalloc(num_entities, sizeof(struct amdgpu_ctx_entity), GFP_KERNEL); - if (!ctx->entities[0]) { - r = -ENOMEM; - goto error_free_fences; - } + if (!ctx->entities[0]) + return -ENOMEM; + for (i = 0; i < num_entities; ++i) { struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; entity->sequence = 1; - entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; + entity->fences = kcalloc(amdgpu_sched_jobs, + sizeof(struct dma_fence*), GFP_KERNEL); + if (!entity->fences) { + r = -ENOMEM; + goto error_cleanup_memory; + } } for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) ctx->entities[i] = ctx->entities[i - 1] + @@ -181,11 +181,17 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, error_cleanup_entities: for (i = 0; i < num_entities; ++i) drm_sched_entity_destroy(&ctx->entities[0][i].entity); - kfree(ctx->entities[0]); -error_free_fences: - kfree(ctx->fences); - ctx->fences = NULL; +error_cleanup_memory: + for (i = 0; i < num_entities; ++i) { + struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; + + kfree(entity->fences); + entity->fences = NULL; + } + + kfree(ctx->entities[0]); + ctx->entities[0] = NULL; return r; } @@ -199,12 +205,16 @@ static void amdgpu_ctx_fini(struct kref *ref) if (!adev) return; - for (i = 0; i < num_entities; ++i) - for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(ctx->entities[0][i].fences[j]); - kfree(ctx->fences); - kfree(ctx->entities[0]); + for (i = 0; i < num_entities; ++i) { + struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; + for (j = 0; j < amdgpu_sched_jobs; ++j) + dma_fence_put(entity->fences[j]); + + kfree(entity->fences); + } + + kfree(ctx->entities[0]); mutex_destroy(&ctx->lock); kfree(ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 4ad90a44dc3c..a6cd9d4b078c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -42,7 +42,6 @@ struct amdgpu_ctx { unsigned reset_counter_query; uint32_t vram_lost_counter; spinlock_t ring_lock; - struct dma_fence **fences; struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; bool preamble_presented; enum drm_sched_priority init_priority; From 43563bc2e6a769502d23f4ec9cd590e4636cf0ea Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 23 Dec 2019 16:02:13 -0500 Subject: [PATCH 225/658] drm/amd/display: update MSA and VSC SDP on video test pattern request [why] On video test pattern request we need to update MSA and VSC so it will match the requested test pattern dynamic range field. [how] Update dynamic range field in MSA and disable VSC as updating VSC info packet is complicated and not required for test pattern purpose. Signed-off-by: Wenjing Liu Reviewed-by: Nikola Cornij Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6ab298c65247..1bd0946829e3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3925,8 +3925,43 @@ bool dc_link_dp_set_test_pattern( sizeof(training_pattern)); } } else { - /* CRTC Patterns */ + enum dc_color_space color_space = COLOR_SPACE_UNKNOWN; + struct encoder_info_frame info_frame = pipe_ctx->stream_res.encoder_info_frame; + + switch (test_pattern_color_space) { + case DP_TEST_PATTERN_COLOR_SPACE_RGB: + color_space = COLOR_SPACE_SRGB; + if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA) + color_space = COLOR_SPACE_SRGB_LIMITED; + break; + + case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601: + color_space = COLOR_SPACE_YCBCR601; + if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA) + color_space = COLOR_SPACE_YCBCR601_LIMITED; + break; + case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709: + color_space = COLOR_SPACE_YCBCR709; + if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA) + color_space = COLOR_SPACE_YCBCR709_LIMITED; + break; + default: + break; + } + /* update MSA to requested color space */ + pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc, + &pipe_ctx->stream->timing, + color_space, false, link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); + + /* disable vsc so no need to update it based on request */ + info_frame.vsc.valid = false; + pipe_ctx->stream_res.stream_enc->funcs->update_dp_info_packets( + pipe_ctx->stream_res.stream_enc, + &info_frame); + + /* CRTC Patterns */ set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space); + /* Set Test Pattern state */ link->test_pattern_enabled = true; } From 50d2c6027ef4d09d7c2a9d12b2a7957576c0fa3c Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Fri, 3 Jan 2020 14:03:52 -0500 Subject: [PATCH 226/658] drm/amd/display: Do not send training pattern if VS Different [Why] The DP 1.4a Spec requires that training pattern only under certain specific conditions. Currently driver will re-send training pattern every time voltage swing value changes, but that should not be the case. [How] Do not re-send training pattern every time VS values are different. Only send it on the first iteration. Signed-off-by: Sung Lee Reviewed-by: Tony Cheng Acked-by: Abdoulaye Berthe Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 1bd0946829e3..3bb1b481451b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -983,7 +983,7 @@ static enum link_training_result perform_clock_recovery_sequence( offset); /* 2. update DPCD of the receiver*/ - if (!retries_cr) + if (!retry_count) /* EPR #361076 - write as a 5-byte burst, * but only for the 1-st iteration.*/ dpcd_set_lt_pattern_and_lane_settings( From 07d4f905329438b8e4daf7f58f5afea7d781a9b2 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 7 Jan 2020 16:33:32 -0500 Subject: [PATCH 227/658] drm/amd/display: Add debug option to disable DSC support [how] Empty dsc enc caps when debug option is set to disable DSC. Signed-off-by: Wenjing Liu Reviewed-by: Nikola Cornij Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3fa85a54360f..7d31dcb9e37f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -425,6 +425,7 @@ struct dc_debug_options { bool validate_dml_output; bool enable_dmcub_surface_flip; bool usbc_combo_phy_reset_wa; + bool disable_dsc; }; struct dc_debug_data { diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 8b78fcbfe746..87d682d25278 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -224,7 +224,8 @@ static void get_dsc_enc_caps( memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); if (dsc) { - dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); + if (!dsc->ctx->dc->debug.disable_dsc) + dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); if (dsc->ctx->dc->debug.native422_support) dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; } From 153ca760219277997e2c3146ae9a15c979c50520 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 6 Jan 2020 10:29:13 -0500 Subject: [PATCH 228/658] drm/amd/display: Get fb base and fb offset for DMUB from registers [Why] Under some hardware initialization sequences the fb base/fb offset provided can be zero or hardwareinit can happen too late. We want to ensure that we always have the correct fb_base/fb_offset when performing DMCUB hardware initialization so we can do DMCUB command table offloading during first dc hardware init. [How] Read from the DCN registers. VBIOS already filled these in for us. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Wesley Chalmers Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/inc/dmub_srv.h | 2 ++ .../gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 21 +++++++++++++++++-- .../gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 8 +++++-- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h index 8e23a7017588..287fb9a36a64 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h @@ -231,6 +231,8 @@ struct dmub_srv_base_funcs { struct dmub_srv_hw_funcs { /* private: internal use only */ + void (*init)(struct dmub_srv *dmub); + void (*reset)(struct dmub_srv *dmub); void (*reset_release)(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index cd51c6138894..9229012b93e2 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -54,6 +54,19 @@ const struct dmub_srv_common_regs dmub_srv_dcn20_regs = { /* Shared functions. */ +static void dmub_dcn20_get_fb_base_offset(struct dmub_srv *dmub, + uint64_t *fb_base, + uint64_t *fb_offset) +{ + uint32_t tmp; + + REG_GET(DCN_VM_FB_LOCATION_BASE, FB_BASE, &tmp); + *fb_base = (uint64_t)tmp << 24; + + REG_GET(DCN_VM_FB_OFFSET, FB_OFFSET, &tmp); + *fb_offset = (uint64_t)tmp << 24; +} + static inline void dmub_dcn20_translate_addr(const union dmub_addr *addr_in, uint64_t fb_base, uint64_t fb_offset, @@ -82,7 +95,9 @@ void dmub_dcn20_backdoor_load(struct dmub_srv *dmub, const struct dmub_window *cw1) { union dmub_addr offset; - uint64_t fb_base = dmub->fb_base, fb_offset = dmub->fb_offset; + uint64_t fb_base, fb_offset; + + dmub_dcn20_get_fb_base_offset(dmub, &fb_base, &fb_offset); REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); REG_UPDATE_2(DMCUB_MEM_CNTL, DMCUB_MEM_READ_SPACE, 0x3, @@ -118,7 +133,9 @@ void dmub_dcn20_setup_windows(struct dmub_srv *dmub, const struct dmub_window *cw6) { union dmub_addr offset; - uint64_t fb_base = dmub->fb_base, fb_offset = dmub->fb_offset; + uint64_t fb_base, fb_offset; + + dmub_dcn20_get_fb_base_offset(dmub, &fb_base, &fb_offset); dmub_dcn20_translate_addr(&cw2->offset, fb_base, fb_offset, &offset); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h index 53bfd4da69ad..04b0fa13153d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h @@ -92,7 +92,9 @@ struct dmub_srv; DMUB_SR(DMCUB_SCRATCH14) \ DMUB_SR(DMCUB_SCRATCH15) \ DMUB_SR(CC_DC_PIPE_DIS) \ - DMUB_SR(MMHUBBUB_SOFT_RESET) + DMUB_SR(MMHUBBUB_SOFT_RESET) \ + DMUB_SR(DCN_VM_FB_LOCATION_BASE) \ + DMUB_SR(DCN_VM_FB_OFFSET) #define DMUB_COMMON_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -121,7 +123,9 @@ struct dmub_srv; DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ - DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) + DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ + DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ + DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) struct dmub_srv_common_reg_offset { #define DMUB_SR(reg) uint32_t reg; From 830821a64db63026a8b7414b2b539f262821ad23 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 6 Jan 2020 13:21:48 -0500 Subject: [PATCH 229/658] drm/amd/display: Fallback to DMCUB when command table is missing [Why] If the command table isn't available then we can fallback to DMCUB offloading if it's enabled and available. [How] Instead of assigning NULL for supported command table functions we can fallback to the DMCUB when it's available. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Wesley Chalmers Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/bios/command_table2.c | 74 ++++++++++++++++++- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 2cb7a4288cb7..c4ba6e84db65 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -89,6 +89,10 @@ static enum bp_result encoder_control_digx_v1_5( struct bios_parser *bp, struct bp_encoder_control *cntl); +static enum bp_result encoder_control_fallback( + struct bios_parser *bp, + struct bp_encoder_control *cntl); + static void init_dig_encoder_control(struct bios_parser *bp) { uint32_t version = @@ -100,7 +104,7 @@ static void init_dig_encoder_control(struct bios_parser *bp) break; default: dm_output_to_console("Don't have dig_encoder_control for v%d\n", version); - bp->cmd_tbl.dig_encoder_control = NULL; + bp->cmd_tbl.dig_encoder_control = encoder_control_fallback; break; } } @@ -184,6 +188,18 @@ static enum bp_result encoder_control_digx_v1_5( return result; } +static enum bp_result encoder_control_fallback( + struct bios_parser *bp, + struct bp_encoder_control *cntl) +{ + if (bp->base.ctx->dc->ctx->dmub_srv && + bp->base.ctx->dc->debug.dmub_command_table) { + return encoder_control_digx_v1_5(bp, cntl); + } + + return BP_RESULT_FAILURE; +} + /***************************************************************************** ****************************************************************************** ** @@ -196,6 +212,10 @@ static enum bp_result transmitter_control_v1_6( struct bios_parser *bp, struct bp_transmitter_control *cntl); +static enum bp_result transmitter_control_fallback( + struct bios_parser *bp, + struct bp_transmitter_control *cntl); + static void init_transmitter_control(struct bios_parser *bp) { uint8_t frev; @@ -209,7 +229,7 @@ static void init_transmitter_control(struct bios_parser *bp) break; default: dm_output_to_console("Don't have transmitter_control for v%d\n", crev); - bp->cmd_tbl.transmitter_control = NULL; + bp->cmd_tbl.transmitter_control = transmitter_control_fallback; break; } } @@ -273,6 +293,18 @@ static enum bp_result transmitter_control_v1_6( return result; } +static enum bp_result transmitter_control_fallback( + struct bios_parser *bp, + struct bp_transmitter_control *cntl) +{ + if (bp->base.ctx->dc->ctx->dmub_srv && + bp->base.ctx->dc->debug.dmub_command_table) { + return transmitter_control_v1_6(bp, cntl); + } + + return BP_RESULT_FAILURE; +} + /****************************************************************************** ****************************************************************************** ** @@ -285,6 +317,10 @@ static enum bp_result set_pixel_clock_v7( struct bios_parser *bp, struct bp_pixel_clock_parameters *bp_params); +static enum bp_result set_pixel_clock_fallback( + struct bios_parser *bp, + struct bp_pixel_clock_parameters *bp_params); + static void init_set_pixel_clock(struct bios_parser *bp) { switch (BIOS_CMD_TABLE_PARA_REVISION(setpixelclock)) { @@ -294,7 +330,7 @@ static void init_set_pixel_clock(struct bios_parser *bp) default: dm_output_to_console("Don't have set_pixel_clock for v%d\n", BIOS_CMD_TABLE_PARA_REVISION(setpixelclock)); - bp->cmd_tbl.set_pixel_clock = NULL; + bp->cmd_tbl.set_pixel_clock = set_pixel_clock_fallback; break; } } @@ -400,6 +436,18 @@ static enum bp_result set_pixel_clock_v7( return result; } +static enum bp_result set_pixel_clock_fallback( + struct bios_parser *bp, + struct bp_pixel_clock_parameters *bp_params) +{ + if (bp->base.ctx->dc->ctx->dmub_srv && + bp->base.ctx->dc->debug.dmub_command_table) { + return set_pixel_clock_v7(bp, bp_params); + } + + return BP_RESULT_FAILURE; +} + /****************************************************************************** ****************************************************************************** ** @@ -632,6 +680,11 @@ static enum bp_result enable_disp_power_gating_v2_1( enum controller_id crtc_id, enum bp_pipe_control_action action); +static enum bp_result enable_disp_power_gating_fallback( + struct bios_parser *bp, + enum controller_id crtc_id, + enum bp_pipe_control_action action); + static void init_enable_disp_power_gating( struct bios_parser *bp) { @@ -643,7 +696,7 @@ static void init_enable_disp_power_gating( default: dm_output_to_console("Don't enable_disp_power_gating enable_crtc for v%d\n", BIOS_CMD_TABLE_PARA_REVISION(enabledisppowergating)); - bp->cmd_tbl.enable_disp_power_gating = NULL; + bp->cmd_tbl.enable_disp_power_gating = enable_disp_power_gating_fallback; break; } } @@ -695,6 +748,19 @@ static enum bp_result enable_disp_power_gating_v2_1( return result; } +static enum bp_result enable_disp_power_gating_fallback( + struct bios_parser *bp, + enum controller_id crtc_id, + enum bp_pipe_control_action action) +{ + if (bp->base.ctx->dc->ctx->dmub_srv && + bp->base.ctx->dc->debug.dmub_command_table) { + return enable_disp_power_gating_v2_1(bp, crtc_id, action); + } + + return BP_RESULT_FAILURE; +} + /****************************************************************************** ******************************************************************************* ** From 14ae7665e70a982b3653f2aa024251d69d89f81c Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Thu, 9 Jan 2020 15:11:06 +0800 Subject: [PATCH 230/658] drm/amd/display: check pipe_ctx is split pipe or not [Why] Driver use pipe_ctx to reallocate payload may cause allocate payload twice on same sink with split pipe. [How] Drvier must to check pipe_ctx is split pipe or not to avoid reallocate payload twice on same sink. Signed-off-by: Paul Hsieh Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 260c0b62d37d..a50768a7ba68 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2882,7 +2882,16 @@ enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link) // Clear all of MST payload then reallocate for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + + /* driver enable split pipe for external monitors + * we have to check pipe_ctx is split pipe or not + * If it's split pipe, driver using top pipe to + * reaallocate. + */ + if (!pipe_ctx || pipe_ctx->top_pipe) + continue; + + if (pipe_ctx->stream && pipe_ctx->stream->link == link && pipe_ctx->stream->dpms_off == false && pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { deallocate_mst_payload(pipe_ctx); @@ -2891,7 +2900,11 @@ enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link) for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + + if (!pipe_ctx || pipe_ctx->top_pipe) + continue; + + if (pipe_ctx->stream && pipe_ctx->stream->link == link && pipe_ctx->stream->dpms_off == false && pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { /* enable/disable PHY will clear connection between BE and FE From 5ea3985098048ba6850547c816208e5c2a40bef4 Mon Sep 17 00:00:00 2001 From: Haiyi Zhou Date: Thu, 9 Jan 2020 11:08:33 -0500 Subject: [PATCH 231/658] drm/amd/display: Fixed comment styling Switched to C-style comments for consistency Signed-off-by: Haiyi Zhou Reviewed-by: Reza Amini Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 6e5ecefe7d9d..b9992ebf77a6 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -381,7 +381,7 @@ static void apply_fixed_refresh(struct core_freesync *core_freesync, bool update = false; unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us; - //Compute the exit refresh rate and exit frame duration + /* Compute the exit refresh rate and exit frame duration */ unsigned int exit_refresh_rate_in_milli_hz = ((1000000000/max_render_time_in_us) + (1000*FIXED_REFRESH_EXIT_MARGIN_IN_HZ)); unsigned int exit_frame_duration_in_us = 1000000000/exit_refresh_rate_in_milli_hz; From bb6785c1212988c76c21cd1d4647c31545231e8b Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Jan 2020 09:17:58 -0500 Subject: [PATCH 232/658] drm/amd/display: Do DMCUB hw_init before DC [Why] For DMCUB enabled hardware DC has a dependency on DMCUB already being running. Command table offloading will fail on first modeset if DMCUB isn't initialized first. [How] Perform DMCUB hardware initialization before DC. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Hersen Wu Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9402374d2466..3af014fcdcc1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -940,14 +940,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - dc_hardware_init(adev->dm.dc); - r = dm_dmub_hw_init(adev); if (r) { DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); goto error; } + dc_hardware_init(adev->dm.dc); + adev->dm.freesync_module = mod_freesync_create(adev->dm.dc); if (!adev->dm.freesync_module) { DRM_ERROR( From 819d4b3fbfaf40b4537e80b01aef612e4140a91b Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Thu, 12 Dec 2019 10:37:35 -0500 Subject: [PATCH 233/658] drm/amd/display: Fix DML dummyinteger types mismatch [Why] The types for dummyinteger1 and dummyinteger2 are unsigned as part of the DML spec. They should not be long. [How] Make them unsigned int instead of long. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index e7a44df676ca..2875efd85467 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -763,8 +763,8 @@ struct vba_vars_st { double SwathWidthC[DC__NUM_DPP__MAX]; unsigned int BytePerPixelY[DC__NUM_DPP__MAX]; unsigned int BytePerPixelC[DC__NUM_DPP__MAX]; - long dummyinteger1; - long dummyinteger2; + unsigned int dummyinteger1; + unsigned int dummyinteger2; double FinalDRAMClockChangeLatency; double Tdmdl_vm[DC__NUM_DPP__MAX]; double Tdmdl[DC__NUM_DPP__MAX]; From 887ff121987d1bf56ce7c126731a396d4e29a53c Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Fri, 10 Jan 2020 14:44:28 -0500 Subject: [PATCH 234/658] drm/amd/display: Refactor to remove diags specific rgam func [Why] It has duplicate code for building regamma curve [How] Remove the duplicate code and use the same function for building regamma Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Reviewed-by: Krunoslav Kovac Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../amd/display/modules/color/color_gamma.c | 307 ++++++++---------- .../amd/display/modules/color/color_gamma.h | 4 - 2 files changed, 129 insertions(+), 182 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 1b278c42809a..cac09d500fda 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1673,129 +1673,6 @@ static bool map_regamma_hw_to_x_user( #define _EXTRA_POINTS 3 -bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, - const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed, - const struct freesync_hdr_tf_params *fs_params) -{ - struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; - struct dividers dividers; - - struct pwl_float_data *rgb_user = NULL; - struct pwl_float_data_ex *rgb_regamma = NULL; - struct gamma_pixel *axis_x = NULL; - struct pixel_gamma_point *coeff = NULL; - enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; - bool ret = false; - - if (output_tf->type == TF_TYPE_BYPASS) - return false; - - /* we can use hardcoded curve for plain SRGB TF */ - if (output_tf->type == TF_TYPE_PREDEFINED && canRomBeUsed == true && - output_tf->tf == TRANSFER_FUNCTION_SRGB) { - if (ramp == NULL) - return true; - if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) || - (!mapUserRamp && ramp->type == GAMMA_RGB_256)) - return true; - } - - output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - - if (ramp && ramp->type != GAMMA_CS_TFM_1D && - (mapUserRamp || ramp->type != GAMMA_RGB_256)) { - rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, - sizeof(*rgb_user), - GFP_KERNEL); - if (!rgb_user) - goto rgb_user_alloc_fail; - - axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x), - GFP_KERNEL); - if (!axis_x) - goto axis_x_alloc_fail; - - dividers.divider1 = dc_fixpt_from_fraction(3, 2); - dividers.divider2 = dc_fixpt_from_int(2); - dividers.divider3 = dc_fixpt_from_fraction(5, 2); - - build_evenly_distributed_points( - axis_x, - ramp->num_entries, - dividers); - - if (ramp->type == GAMMA_RGB_256 && mapUserRamp) - scale_gamma(rgb_user, ramp, dividers); - else if (ramp->type == GAMMA_RGB_FLOAT_1024) - scale_gamma_dx(rgb_user, ramp, dividers); - } - - rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_regamma), - GFP_KERNEL); - if (!rgb_regamma) - goto rgb_regamma_alloc_fail; - - coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff), - GFP_KERNEL); - if (!coeff) - goto coeff_alloc_fail; - - tf = output_tf->tf; - if (tf == TRANSFER_FUNCTION_PQ) { - tf_pts->end_exponent = 7; - tf_pts->x_point_at_y1_red = 125; - tf_pts->x_point_at_y1_green = 125; - tf_pts->x_point_at_y1_blue = 125; - - build_pq(rgb_regamma, - MAX_HW_POINTS, - coordinates_x, - output_tf->sdr_ref_white_level); - } else if (tf == TRANSFER_FUNCTION_GAMMA22 && - fs_params != NULL && fs_params->skip_tm == 0) { - build_freesync_hdr(rgb_regamma, - MAX_HW_POINTS, - coordinates_x, - fs_params); - } else if (tf == TRANSFER_FUNCTION_HLG) { - build_freesync_hdr(rgb_regamma, - MAX_HW_POINTS, - coordinates_x, - fs_params); - - } else { - tf_pts->end_exponent = 0; - tf_pts->x_point_at_y1_red = 1; - tf_pts->x_point_at_y1_green = 1; - tf_pts->x_point_at_y1_blue = 1; - - build_regamma(rgb_regamma, - MAX_HW_POINTS, - coordinates_x, tf); - } - map_regamma_hw_to_x_user(ramp, coeff, rgb_user, - coordinates_x, axis_x, rgb_regamma, - MAX_HW_POINTS, tf_pts, - (mapUserRamp || (ramp && ramp->type != GAMMA_RGB_256)) && - (ramp && ramp->type != GAMMA_CS_TFM_1D)); - - if (ramp && ramp->type == GAMMA_CS_TFM_1D) - apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); - - ret = true; - - kvfree(coeff); -coeff_alloc_fail: - kvfree(rgb_regamma); -rgb_regamma_alloc_fail: - kvfree(axis_x); -axis_x_alloc_fail: - kvfree(rgb_user); -rgb_user_alloc_fail: - return ret; -} - bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, const struct regamma_lut *regamma) { @@ -2043,14 +1920,14 @@ rgb_user_alloc_fail: return ret; } - -bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, +static bool calculate_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points, + struct pwl_float_data_ex *rgb_regamma, + const struct freesync_hdr_tf_params *fs_params, uint32_t sdr_ref_white_level) { uint32_t i; bool ret = false; - struct pwl_float_data_ex *rgb_regamma = NULL; if (trans == TRANSFER_FUNCTION_UNITY || trans == TRANSFER_FUNCTION_LINEAR) { @@ -2060,68 +1937,33 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, points->x_point_at_y1_blue = 1; for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i] = coordinates_x[i].x; - points->green[i] = coordinates_x[i].x; - points->blue[i] = coordinates_x[i].x; + rgb_regamma[i].r = coordinates_x[i].x; + rgb_regamma[i].g = coordinates_x[i].x; + rgb_regamma[i].b = coordinates_x[i].x; } + ret = true; } else if (trans == TRANSFER_FUNCTION_PQ) { - rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_regamma), - GFP_KERNEL); - if (!rgb_regamma) - goto rgb_regamma_alloc_fail; points->end_exponent = 7; points->x_point_at_y1_red = 125; points->x_point_at_y1_green = 125; points->x_point_at_y1_blue = 125; - build_pq(rgb_regamma, MAX_HW_POINTS, coordinates_x, sdr_ref_white_level); - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i] = rgb_regamma[i].r; - points->green[i] = rgb_regamma[i].g; - points->blue[i] = rgb_regamma[i].b; - } + ret = true; - - kvfree(rgb_regamma); - } else if (trans == TRANSFER_FUNCTION_SRGB || - trans == TRANSFER_FUNCTION_BT709 || - trans == TRANSFER_FUNCTION_GAMMA22 || - trans == TRANSFER_FUNCTION_GAMMA24 || - trans == TRANSFER_FUNCTION_GAMMA26) { - rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_regamma), - GFP_KERNEL); - if (!rgb_regamma) - goto rgb_regamma_alloc_fail; - points->end_exponent = 0; - points->x_point_at_y1_red = 1; - points->x_point_at_y1_green = 1; - points->x_point_at_y1_blue = 1; - - build_regamma(rgb_regamma, + } else if (trans == TRANSFER_FUNCTION_GAMMA22 && + fs_params != NULL && fs_params->skip_tm == 0) { + build_freesync_hdr(rgb_regamma, MAX_HW_POINTS, coordinates_x, - trans); - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i] = rgb_regamma[i].r; - points->green[i] = rgb_regamma[i].g; - points->blue[i] = rgb_regamma[i].b; - } - ret = true; + fs_params); - kvfree(rgb_regamma); + ret = true; } else if (trans == TRANSFER_FUNCTION_HLG) { - rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_regamma), - GFP_KERNEL); - if (!rgb_regamma) - goto rgb_regamma_alloc_fail; points->end_exponent = 4; points->x_point_at_y1_red = 12; points->x_point_at_y1_green = 12; @@ -2131,18 +1973,127 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, MAX_HW_POINTS, coordinates_x, 80, 1000); - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i] = rgb_regamma[i].r; - points->green[i] = rgb_regamma[i].g; - points->blue[i] = rgb_regamma[i].b; - } + + ret = true; + } else { + // trans == TRANSFER_FUNCTION_SRGB + // trans == TRANSFER_FUNCTION_BT709 + // trans == TRANSFER_FUNCTION_GAMMA22 + // trans == TRANSFER_FUNCTION_GAMMA24 + // trans == TRANSFER_FUNCTION_GAMMA26 + points->end_exponent = 0; + points->x_point_at_y1_red = 1; + points->x_point_at_y1_green = 1; + points->x_point_at_y1_blue = 1; + + build_regamma(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, + trans); + ret = true; - kvfree(rgb_regamma); } -rgb_regamma_alloc_fail: + return ret; } +bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, + const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed, + const struct freesync_hdr_tf_params *fs_params) +{ + struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; + struct dividers dividers; + + struct pwl_float_data *rgb_user = NULL; + struct pwl_float_data_ex *rgb_regamma = NULL; + struct gamma_pixel *axis_x = NULL; + struct pixel_gamma_point *coeff = NULL; + enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; + bool ret = false; + + if (output_tf->type == TF_TYPE_BYPASS) + return false; + + /* we can use hardcoded curve for plain SRGB TF */ + if (output_tf->type == TF_TYPE_PREDEFINED && canRomBeUsed == true && + output_tf->tf == TRANSFER_FUNCTION_SRGB) { + if (ramp == NULL) + return true; + if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) || + (!mapUserRamp && ramp->type == GAMMA_RGB_256)) + return true; + } + + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + if (ramp && ramp->type != GAMMA_CS_TFM_1D && + (mapUserRamp || ramp->type != GAMMA_RGB_256)) { + rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, + sizeof(*rgb_user), + GFP_KERNEL); + if (!rgb_user) + goto rgb_user_alloc_fail; + + axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x), + GFP_KERNEL); + if (!axis_x) + goto axis_x_alloc_fail; + + dividers.divider1 = dc_fixpt_from_fraction(3, 2); + dividers.divider2 = dc_fixpt_from_int(2); + dividers.divider3 = dc_fixpt_from_fraction(5, 2); + + build_evenly_distributed_points( + axis_x, + ramp->num_entries, + dividers); + + if (ramp->type == GAMMA_RGB_256 && mapUserRamp) + scale_gamma(rgb_user, ramp, dividers); + else if (ramp->type == GAMMA_RGB_FLOAT_1024) + scale_gamma_dx(rgb_user, ramp, dividers); + } + + rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, + sizeof(*rgb_regamma), + GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + + coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff), + GFP_KERNEL); + if (!coeff) + goto coeff_alloc_fail; + + tf = output_tf->tf; + + ret = calculate_curve(tf, + tf_pts, + rgb_regamma, + fs_params, + output_tf->sdr_ref_white_level); + + if (ret) { + map_regamma_hw_to_x_user(ramp, coeff, rgb_user, + coordinates_x, axis_x, rgb_regamma, + MAX_HW_POINTS, tf_pts, + (mapUserRamp || (ramp && ramp->type != GAMMA_RGB_256)) && + (ramp && ramp->type != GAMMA_CS_TFM_1D)); + + if (ramp && ramp->type == GAMMA_CS_TFM_1D) + apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); + } + + kvfree(coeff); +coeff_alloc_fail: + kvfree(rgb_regamma); +rgb_regamma_alloc_fail: + kvfree(axis_x); +axis_x_alloc_fail: + kvfree(rgb_user); +rgb_user_alloc_fail: + return ret; +} bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index 44ddea58523a..9994817a9a03 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -103,10 +103,6 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp); -bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, - struct dc_transfer_func_distributed_points *points, - uint32_t sdr_ref_white_level); - bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points); From 9eeec26a13392d6efb5215c26b5e9965ef7ac2af Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Sat, 11 Jan 2020 00:51:15 +0800 Subject: [PATCH 235/658] drm/amd/display: Refine i2c frequency calculating sequence [Why] In HG mode, vbios didn't call DispController_Init to program NV1x XTAL_REF_DIV value when ASIC_INIT, but driver read XTAL_REF_DIV to calculate i2c reference frequency. it cause i2c frequency change from 100kHz to 200kHz. [How] remove get_speed function and calculate reference frequency at set_speed functiton. Signed-off-by: Lewis Huang Reviewed-by: Jun Lei Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dce_i2c_hw.c | 68 ++++++------------- 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index 1cd4d8fc361f..49d490214060 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -100,20 +100,6 @@ static uint32_t get_hw_buffer_available_size( dce_i2c_hw->buffer_used_bytes; } -static uint32_t get_speed( - const struct dce_i2c_hw *dce_i2c_hw) -{ - uint32_t pre_scale = 0; - - REG_GET(SPEED, DC_I2C_DDC1_PRESCALE, &pre_scale); - - /* [anaumov] it seems following is unnecessary */ - /*ASSERT(value.bits.DC_I2C_DDC1_PRESCALE);*/ - return pre_scale ? - dce_i2c_hw->reference_frequency / pre_scale : - dce_i2c_hw->default_speed; -} - static void process_channel_reply( struct dce_i2c_hw *dce_i2c_hw, struct i2c_payload *reply) @@ -278,16 +264,25 @@ static void set_speed( struct dce_i2c_hw *dce_i2c_hw, uint32_t speed) { + uint32_t xtal_ref_div = 0; + uint32_t prescale = 0; + + REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div); + + if (xtal_ref_div == 0) + xtal_ref_div = 2; + + prescale = ((dce_i2c_hw->reference_frequency * 2) / xtal_ref_div) / speed; if (speed) { if (dce_i2c_hw->masks->DC_I2C_DDC1_START_STOP_TIMING_CNTL) REG_UPDATE_N(SPEED, 3, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE), dce_i2c_hw->reference_frequency / speed, + FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE), prescale, FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2, FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_START_STOP_TIMING_CNTL), speed > 50 ? 2:1); else REG_UPDATE_N(SPEED, 2, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE), dce_i2c_hw->reference_frequency / speed, + FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE), prescale, FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2); } } @@ -344,9 +339,7 @@ static void release_engine( bool safe_to_reset; /* Restore original HW engine speed */ - - set_speed(dce_i2c_hw, dce_i2c_hw->original_speed); - + REG_WRITE(SPEED, dce_i2c_hw->original_speed); /* Reset HW engine */ { @@ -416,7 +409,7 @@ struct dce_i2c_hw *acquire_i2c_hw_engine( dce_i2c_hw->ddc = ddc; - current_speed = get_speed(dce_i2c_hw); + current_speed = REG_READ(SPEED); if (current_speed) dce_i2c_hw->original_speed = current_speed; @@ -478,13 +471,9 @@ static void submit_channel_request_hw( static uint32_t get_transaction_timeout_hw( const struct dce_i2c_hw *dce_i2c_hw, - uint32_t length) + uint32_t length, + uint32_t speed) { - - uint32_t speed = get_speed(dce_i2c_hw); - - - uint32_t period_timeout; uint32_t num_of_clock_stretches; @@ -504,7 +493,8 @@ static uint32_t get_transaction_timeout_hw( bool dce_i2c_hw_engine_submit_payload( struct dce_i2c_hw *dce_i2c_hw, struct i2c_payload *payload, - bool middle_of_transaction) + bool middle_of_transaction, + uint32_t speed) { struct i2c_request_transaction_data request; @@ -542,7 +532,7 @@ bool dce_i2c_hw_engine_submit_payload( /* obtain timeout value before submitting request */ transaction_timeout = get_transaction_timeout_hw( - dce_i2c_hw, payload->length + 1); + dce_i2c_hw, payload->length + 1, speed); submit_channel_request_hw( dce_i2c_hw, &request); @@ -588,13 +578,11 @@ bool dce_i2c_submit_command_hw( struct i2c_payload *payload = cmd->payloads + index_of_payload; if (!dce_i2c_hw_engine_submit_payload( - dce_i2c_hw, payload, mot)) { + dce_i2c_hw, payload, mot, cmd->speed)) { result = false; break; } - - ++index_of_payload; } @@ -640,9 +628,6 @@ void dce100_i2c_hw_construct( const struct dce_i2c_shift *shifts, const struct dce_i2c_mask *masks) { - - uint32_t xtal_ref_div = 0; - dce_i2c_hw_construct(dce_i2c_hw, ctx, engine_id, @@ -650,21 +635,6 @@ void dce100_i2c_hw_construct( shifts, masks); dce_i2c_hw->buffer_size = I2C_HW_BUFFER_SIZE_DCE100; - - REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div); - - if (xtal_ref_div == 0) - xtal_ref_div = 2; - - /*Calculating Reference Clock by divding original frequency by - * XTAL_REF_DIV. - * At upper level, uint32_t reference_frequency = - * dal_dce_i2c_get_reference_clock(as) >> 1 - * which already divided by 2. So we need x2 to get original - * reference clock from ppll_info - */ - dce_i2c_hw->reference_frequency = - (dce_i2c_hw->reference_frequency * 2) / xtal_ref_div; } void dce112_i2c_hw_construct( From 2c61704bcaeed3b51dd48f97003533f40dbf3050 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 13 Jan 2020 08:40:00 -0500 Subject: [PATCH 236/658] drm/amd/display: 3.2.69 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7d31dcb9e37f..8ff25b5dd2f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.68" +#define DC_VER "3.2.69" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 022205ffbb8fc48bb12c7941f07c6e19bdf11155 Mon Sep 17 00:00:00 2001 From: Brandon Syu Date: Thu, 9 Jan 2020 14:19:20 +0800 Subject: [PATCH 237/658] drm/amd/display: fix rotation_angle to use enum values [Why] Hardcoded fixed values are not proper. [How] Use enum values instead of fixed numbers. Signed-off-by: Brandon Syu Reviewed-by: Eric Yang Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index da63fc53cc4a..cf09b9335728 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -261,28 +261,28 @@ static void hubp21_apply_PLAT_54186_wa( address->video_progressive.luma_addr.high_part == 0xf4) return; - if ((rotation_angle == 0 || rotation_angle == 180) + if ((rotation_angle == ROTATION_ANGLE_0 || rotation_angle == ROTATION_ANGLE_180) && viewport_c_height <= 512) return; - if ((rotation_angle == 90 || rotation_angle == 270) + if ((rotation_angle == ROTATION_ANGLE_90 || rotation_angle == ROTATION_ANGLE_270) && viewport_c_width <= 512) return; switch (rotation_angle) { - case 0: /* 0 degree rotation */ + case ROTATION_ANGLE_0: /* 0 degree rotation */ row_height = 128; patched_viewport_height = (viewport_c_height / row_height + 1) * row_height + 1; patched_viewport_width = viewport_c_width; hubp21->PLAT_54186_wa_chroma_addr_offset = 0; break; - case 2: /* 180 degree rotation */ + case ROTATION_ANGLE_180: /* 180 degree rotation */ row_height = 128; patched_viewport_height = viewport_c_height + row_height; patched_viewport_width = viewport_c_width; hubp21->PLAT_54186_wa_chroma_addr_offset = 0 - chroma_pitch * row_height * chroma_bpe; break; - case 1: /* 90 degree rotation */ + case ROTATION_ANGLE_90: /* 90 degree rotation */ row_height = 256; if (h_mirror_en) { patched_viewport_height = viewport_c_height; @@ -294,7 +294,7 @@ static void hubp21_apply_PLAT_54186_wa( hubp21->PLAT_54186_wa_chroma_addr_offset = 0 - tile_blk_size; } break; - case 3: /* 270 degree rotation */ + case ROTATION_ANGLE_270: /* 270 degree rotation */ row_height = 256; if (h_mirror_en) { patched_viewport_height = viewport_c_height; From 7527791e1fbd595a294cf6e6f41999d8acf6c43f Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 13 Jan 2020 10:26:19 -0500 Subject: [PATCH 238/658] drm/amd/display: Fix update type for multiple planes [Why] determine_update_type_for_commit() uses pointers to single instance of local variable to fill scaling/color info for all planes updates. This is a bug, that leads to incorrect update type for commit in case of multiple planes per crtc. Each plane should refer to separate scaling/color data. [How] Use arrays for plane properties. Bundle all properties into a single structure to simplify memory allocation. Signed-off-by: Roman Li Reviewed-by: Nicholas Kazlauskas Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 64 ++++++++++--------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3af014fcdcc1..2ac349849081 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7759,24 +7759,27 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, struct drm_crtc_state *new_crtc_state, *old_crtc_state; struct dm_crtc_state *new_dm_crtc_state, *old_dm_crtc_state; struct dc_stream_status *status = NULL; - - struct dc_surface_update *updates; enum surface_update_type update_type = UPDATE_TYPE_FAST; + struct surface_info_bundle { + struct dc_surface_update surface_updates[MAX_SURFACES]; + struct dc_plane_info plane_infos[MAX_SURFACES]; + struct dc_scaling_info scaling_infos[MAX_SURFACES]; + struct dc_flip_addrs flip_addrs[MAX_SURFACES]; + struct dc_stream_update stream_update; + } *bundle; - updates = kcalloc(MAX_SURFACES, sizeof(*updates), GFP_KERNEL); + bundle = kzalloc(sizeof(*bundle), GFP_KERNEL); - if (!updates) { - DRM_ERROR("Failed to allocate plane updates\n"); + if (!bundle) { + DRM_ERROR("Failed to allocate update bundle\n"); /* Set type to FULL to avoid crashing in DC*/ update_type = UPDATE_TYPE_FULL; goto cleanup; } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct dc_scaling_info scaling_info; - struct dc_stream_update stream_update; - memset(&stream_update, 0, sizeof(stream_update)); + memset(bundle, 0, sizeof(struct surface_info_bundle)); new_dm_crtc_state = to_dm_crtc_state(new_crtc_state); old_dm_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -7793,8 +7796,9 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, j) { const struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(new_plane_state->fb); - struct dc_plane_info plane_info; - struct dc_flip_addrs flip_addr; + struct dc_plane_info *plane_info = &bundle->plane_infos[num_plane]; + struct dc_flip_addrs *flip_addr = &bundle->flip_addrs[num_plane]; + struct dc_scaling_info *scaling_info = &bundle->scaling_infos[num_plane]; uint64_t tiling_flags; new_plane_crtc = new_plane_state->crtc; @@ -7812,49 +7816,48 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, if (crtc != new_plane_crtc) continue; - updates[num_plane].surface = new_dm_plane_state->dc_state; + bundle->surface_updates[num_plane].surface = + new_dm_plane_state->dc_state; if (new_crtc_state->mode_changed) { - stream_update.dst = new_dm_crtc_state->stream->dst; - stream_update.src = new_dm_crtc_state->stream->src; + bundle->stream_update.dst = new_dm_crtc_state->stream->dst; + bundle->stream_update.src = new_dm_crtc_state->stream->src; } if (new_crtc_state->color_mgmt_changed) { - updates[num_plane].gamma = + bundle->surface_updates[num_plane].gamma = new_dm_plane_state->dc_state->gamma_correction; - updates[num_plane].in_transfer_func = + bundle->surface_updates[num_plane].in_transfer_func = new_dm_plane_state->dc_state->in_transfer_func; - stream_update.gamut_remap = + bundle->stream_update.gamut_remap = &new_dm_crtc_state->stream->gamut_remap_matrix; - stream_update.output_csc_transform = + bundle->stream_update.output_csc_transform = &new_dm_crtc_state->stream->csc_color_matrix; - stream_update.out_transfer_func = + bundle->stream_update.out_transfer_func = new_dm_crtc_state->stream->out_transfer_func; } ret = fill_dc_scaling_info(new_plane_state, - &scaling_info); + scaling_info); if (ret) goto cleanup; - updates[num_plane].scaling_info = &scaling_info; + bundle->surface_updates[num_plane].scaling_info = scaling_info; if (amdgpu_fb) { ret = get_fb_info(amdgpu_fb, &tiling_flags); if (ret) goto cleanup; - memset(&flip_addr, 0, sizeof(flip_addr)); - ret = fill_dc_plane_info_and_addr( dm->adev, new_plane_state, tiling_flags, - &plane_info, - &flip_addr.address); + plane_info, + &flip_addr->address); if (ret) goto cleanup; - updates[num_plane].plane_info = &plane_info; - updates[num_plane].flip_addr = &flip_addr; + bundle->surface_updates[num_plane].plane_info = plane_info; + bundle->surface_updates[num_plane].flip_addr = flip_addr; } num_plane++; @@ -7875,14 +7878,15 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, status = dc_stream_get_status_from_state(old_dm_state->context, new_dm_crtc_state->stream); - stream_update.stream = new_dm_crtc_state->stream; + bundle->stream_update.stream = new_dm_crtc_state->stream; /* * TODO: DC modifies the surface during this call so we need * to lock here - find a way to do this without locking. */ mutex_lock(&dm->dc_lock); - update_type = dc_check_update_surfaces_for_stream(dc, updates, num_plane, - &stream_update, status); + update_type = dc_check_update_surfaces_for_stream( + dc, bundle->surface_updates, num_plane, + &bundle->stream_update, status); mutex_unlock(&dm->dc_lock); if (update_type > UPDATE_TYPE_MED) { @@ -7892,7 +7896,7 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, } cleanup: - kfree(updates); + kfree(bundle); *out_type = update_type; return ret; From 0167da498d57dddf31eb30516d1fae0f31ddecb6 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 9 Jan 2020 14:41:22 -0500 Subject: [PATCH 239/658] drm/amd/display: Add hardware reset interface for DMUB service [Why] We'll need this to perform a clean shutdown before unloading the driver. [How] It will call reset internally and set hw_init to false. It won't do anything if the hardware isn't initialized. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h | 15 +++++++++++++++ drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h index 287fb9a36a64..f8917594036a 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h @@ -418,6 +418,21 @@ enum dmub_status dmub_srv_is_hw_init(struct dmub_srv *dmub, bool *is_hw_init); enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params); +/** + * dmub_srv_hw_reset() - puts the DMUB hardware in reset state if initialized + * @dmub: the dmub service + * + * Before destroying the DMUB service or releasing the backing framebuffer + * memory we'll need to put the DMCUB into reset first. + * + * A subsequent call to dmub_srv_hw_init() will re-enable the DMCUB. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); + /** * dmub_srv_cmd_queue() - queues a command to the DMUB * @dmub: the dmub service diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index dee676335d73..ea99d4cf34cc 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -415,6 +415,22 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, return DMUB_STATUS_OK; } +enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) +{ + if (!dmub->sw_init) + return DMUB_STATUS_INVALID; + + if (dmub->hw_init == false) + return DMUB_STATUS_OK; + + if (dmub->hw_funcs.reset) + dmub->hw_funcs.reset(dmub); + + dmub->hw_init = false; + + return DMUB_STATUS_OK; +} + enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, const struct dmub_cmd_header *cmd) { From cc934031dc2a0be9535477dbef4b0e94c9f06ecd Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sat, 11 Jan 2020 13:43:39 -0500 Subject: [PATCH 240/658] drm/amd/display: Call ATOM_INIT instead of ATOM_ENABLE for DMCUB [Why] DMCUB command table doesn't support ATOM_ENABLE/ATOM_DISABLE anymore so we never end up calling the DCN init path in DMCUB. [How] Map ATOM_ENABLE to ATOM_INIT only for DMCUB command table offloading. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/command_table2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index c4ba6e84db65..629a07a2719b 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -711,6 +711,10 @@ static void enable_disp_power_gating_dmcub( power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING; power_gating.power_gating.pwr = *pwr; + /* ATOM_ENABLE is old API in DMUB */ + if (power_gating.power_gating.pwr.enable == ATOM_ENABLE) + power_gating.power_gating.pwr.enable = ATOM_INIT; + dc_dmub_srv_cmd_queue(dmcub, &power_gating.header); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); From a7da96fbccc682a67bcc15f3461cdd60a622ffaf Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 13 Jan 2020 08:58:00 -0500 Subject: [PATCH 241/658] drm/amd/display: Reset inbox rptr/wptr when resetting DMCUB [Why] These logically make sense more to be set after the DMCUB has been reset rather than when we setup the inbox. [How] Move them into the reset callback. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index 9229012b93e2..b2ca8e0dbac9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -80,6 +80,8 @@ void dmub_dcn20_reset(struct dmub_srv *dmub) REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + REG_WRITE(DMCUB_INBOX1_RPTR, 0); + REG_WRITE(DMCUB_INBOX1_WPTR, 0); } void dmub_dcn20_reset_release(struct dmub_srv *dmub) @@ -190,8 +192,6 @@ void dmub_dcn20_setup_mailbox(struct dmub_srv *dmub, REG_WRITE(DMCUB_INBOX1_BASE_ADDRESS, 0x80000000); REG_WRITE(DMCUB_INBOX1_SIZE, inbox1->top - inbox1->base); - REG_WRITE(DMCUB_INBOX1_RPTR, 0); - REG_WRITE(DMCUB_INBOX1_WPTR, 0); } uint32_t dmub_dcn20_get_inbox1_rptr(struct dmub_srv *dmub) From e5f0b5214beea71bd25584da451897300742795e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 13 Jan 2020 09:00:46 -0500 Subject: [PATCH 242/658] drm/amd/display: Check hw_init state when determining if DMCUB is initialized [Why] CW2 may already be programmed when coming back from S4. In this case we want to unconditionally replace whatever DMCUB version is currently enabled with the latest. [How] Check the hw_init flag to know whether or not we've previously executed the initliazed routine. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index ea99d4cf34cc..85a518bf8a76 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -312,6 +312,9 @@ enum dmub_status dmub_srv_is_hw_init(struct dmub_srv *dmub, bool *is_hw_init) if (!dmub->sw_init) return DMUB_STATUS_INVALID; + if (!dmub->hw_init) + return DMUB_STATUS_OK; + if (dmub->hw_funcs.is_hw_init) *is_hw_init = dmub->hw_funcs.is_hw_init(dmub); From 23bc5f3404de2e8b3adaedf33507409fda6f5528 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Wed, 8 Jan 2020 16:59:23 -0500 Subject: [PATCH 243/658] drm/amd/display: support VSC SDP update on video test pattern request [why] MSA will be deprecated in the future. Need to support VSC during DP test automation. [how] Do not disable VSC during DP test automation. TODO - need to add VSC update on DM side on test request. Signed-off-by: Wenjing Liu Reviewed-by: Nikola Cornij Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 3bb1b481451b..a53e8fed56f3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3926,7 +3926,6 @@ bool dc_link_dp_set_test_pattern( } } else { enum dc_color_space color_space = COLOR_SPACE_UNKNOWN; - struct encoder_info_frame info_frame = pipe_ctx->stream_res.encoder_info_frame; switch (test_pattern_color_space) { case DP_TEST_PATTERN_COLOR_SPACE_RGB: @@ -3951,13 +3950,9 @@ bool dc_link_dp_set_test_pattern( /* update MSA to requested color space */ pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc, &pipe_ctx->stream->timing, - color_space, false, link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); - - /* disable vsc so no need to update it based on request */ - info_frame.vsc.valid = false; - pipe_ctx->stream_res.stream_enc->funcs->update_dp_info_packets( - pipe_ctx->stream_res.stream_enc, - &info_frame); + color_space, + pipe_ctx->stream->use_vsc_sdp_for_colorimetry, + link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); /* CRTC Patterns */ set_crtc_test_pattern(link, pipe_ctx, test_pattern, test_pattern_color_space); From 947daab2a2d1813a8d973c59551545e44bdf1549 Mon Sep 17 00:00:00 2001 From: Isabel Zhang Date: Thu, 9 Jan 2020 12:05:21 -0500 Subject: [PATCH 244/658] drm/amd/display: changed max_downscale_src_width to 4096. [Why] MPO isn't enabled on some 4k videos due to video source width is 4096 and the current limit is 3840. [How] Changed the limit to 4096. Signed-off-by: Isabel Zhang Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 1d741bca2211..0d506d30d6b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -830,7 +830,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 3840, + .max_downscale_src_width = 4096, .disable_pplib_wm_range = false, .scl_reset_length10 = true, .sanity_checks = true, From 09f6dd6e6d4305cf199443bb7d099f4f64dc374b Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Tue, 14 Jan 2020 11:08:34 +0800 Subject: [PATCH 245/658] drm/amd/display: init hw i2c speed [Why] Driver didn't init hw i2c speed cause hdcp hw cannot send command, because the default value of speed register is 0x2. [How] Restore the default speed when release i2c engine Signed-off-by: Lewis Huang Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c | 9 +-------- drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h | 1 - 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index 49d490214060..066188ba7949 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -339,7 +339,7 @@ static void release_engine( bool safe_to_reset; /* Restore original HW engine speed */ - REG_WRITE(SPEED, dce_i2c_hw->original_speed); + set_speed(dce_i2c_hw, dce_i2c_hw->default_speed); /* Reset HW engine */ { @@ -371,7 +371,6 @@ struct dce_i2c_hw *acquire_i2c_hw_engine( { uint32_t counter = 0; enum gpio_result result; - uint32_t current_speed; struct dce_i2c_hw *dce_i2c_hw = NULL; if (!ddc) @@ -409,11 +408,6 @@ struct dce_i2c_hw *acquire_i2c_hw_engine( dce_i2c_hw->ddc = ddc; - current_speed = REG_READ(SPEED); - - if (current_speed) - dce_i2c_hw->original_speed = current_speed; - if (!setup_engine(dce_i2c_hw)) { release_engine(dce_i2c_hw); return NULL; @@ -613,7 +607,6 @@ void dce_i2c_hw_construct( dce_i2c_hw->buffer_used_bytes = 0; dce_i2c_hw->transaction_count = 0; dce_i2c_hw->engine_keep_power_up_count = 1; - dce_i2c_hw->original_speed = DEFAULT_I2C_HW_SPEED; dce_i2c_hw->default_speed = DEFAULT_I2C_HW_SPEED; dce_i2c_hw->send_reset_length = 0; dce_i2c_hw->setup_limit = I2C_SETUP_TIME_LIMIT_DCE; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h index d4b2037f7d74..fb055e6883c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h @@ -256,7 +256,6 @@ struct i2c_request_transaction_data { struct dce_i2c_hw { struct ddc *ddc; - uint32_t original_speed; uint32_t engine_keep_power_up_count; uint32_t transaction_count; uint32_t buffer_used_bytes; From d478667253f988dbd6215734a11e35f6953b540d Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 13 Jan 2020 17:05:42 -0500 Subject: [PATCH 246/658] drm/amd/display: use odm combine for YCbCr420 timing with h_active greater than 4096 [why] FMT has limitation to support YCbCr420 with h_active greater than 4096. [how] Use odm combine to overcome the limitation. Signed-off-by: Wenjing Liu Reviewed-by: Dmytro Laktyushkin Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../dc/dml/dcn20/display_mode_vba_20.c | 19 ++++++++++----- .../dc/dml/dcn20/display_mode_vba_20v2.c | 24 ++++++++++++------- .../dc/dml/dcn21/display_mode_vba_21.c | 24 ++++++++++++------- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index e7a8ac7a1f22..45f028986a8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -38,6 +38,7 @@ #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff +#define DCN20_MAX_420_IMAGE_WIDTH 4096 static double adjust_ReturnBW( struct display_mode_lib *mode_lib, @@ -3894,13 +3895,19 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && i == mode_lib->vba.soc.num_states) mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { - locals->ODMCombineEnablePerState[i][k] = false; - mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; - } else { - locals->ODMCombineEnablePerState[i][k] = true; - mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + + locals->ODMCombineEnablePerState[i][k] = false; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + if (mode_lib->vba.ODMCapability) { + if (locals->PlaneRequiredDISPCLKWithoutODMCombine > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } } + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 22f3b5a4b3b9..485a9c62ec58 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -39,6 +39,7 @@ #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff #define DCN20_MAX_DSC_IMAGE_WIDTH 5184 +#define DCN20_MAX_420_IMAGE_WIDTH 4096 static double adjust_ReturnBW( struct display_mode_lib *mode_lib, @@ -3935,15 +3936,22 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode && i == mode_lib->vba.soc.num_states) mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - if (mode_lib->vba.ODMCapability == false || - (locals->PlaneRequiredDISPCLKWithoutODMCombine <= MaxMaxDispclkRoundedDown - && (!locals->DSCEnabled[k] || locals->HActive[k] <= DCN20_MAX_DSC_IMAGE_WIDTH))) { - locals->ODMCombineEnablePerState[i][k] = false; - mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; - } else { - locals->ODMCombineEnablePerState[i][k] = true; - mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + + locals->ODMCombineEnablePerState[i][k] = false; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + if (mode_lib->vba.ODMCapability) { + if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN20_MAX_DSC_IMAGE_WIDTH)) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } } + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index af35b3bea909..e6617c958bb8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -65,6 +65,7 @@ typedef struct { #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff #define DCN21_MAX_DSC_IMAGE_WIDTH 5184 +#define DCN21_MAX_420_IMAGE_WIDTH 4096 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( @@ -3971,15 +3972,22 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && i == mode_lib->vba.soc.num_states) mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - if (mode_lib->vba.ODMCapability == false || - (locals->PlaneRequiredDISPCLKWithoutODMCombine <= MaxMaxDispclkRoundedDown - && (!locals->DSCEnabled[k] || locals->HActive[k] <= DCN21_MAX_DSC_IMAGE_WIDTH))) { - locals->ODMCombineEnablePerState[i][k] = false; - mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; - } else { - locals->ODMCombineEnablePerState[i][k] = true; - mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + + locals->ODMCombineEnablePerState[i][k] = false; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + if (mode_lib->vba.ODMCapability) { + if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } } + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { From 4a2d5f663dab6614772d8e28ca190b127ba46d9d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Jan 2020 09:12:58 +0100 Subject: [PATCH 247/658] i2c: Enable compile testing for more drivers Some of the I2C bus drivers can be compile tested to increase build coverage. This requires also: 1. Adding dependencies on COMMON_CLK for BCM2835 and Meson I2C controllers, 2. Adding 'if' conditional to 'default y' so they will not get enabled by default on all other architectures, 3. Limiting few compile test options to supported architectures (which provide the readsX()/writesX() primitives). Signed-off-by: Krzysztof Kozlowski [wsa: revert chunk for ZX2967. it needs more preparation] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 61 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index b2840c5ebf24..e6777e7dfecc 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -367,7 +367,8 @@ comment "I2C system bus drivers (mostly embedded / system-on-chip)" config I2C_ALTERA tristate "Altera Soft IP I2C" - depends on (ARCH_SOCFPGA || NIOS2) && OF + depends on ARCH_SOCFPGA || NIOS2 || COMPILE_TEST + depends on OF help If you say yes to this option, support will be included for the Altera Soft IP I2C interfaces on SoCFPGA and Nios2 architectures. @@ -387,7 +388,7 @@ config I2C_ASPEED config I2C_AT91 tristate "Atmel AT91 I2C Two-Wire interface (TWI)" - depends on ARCH_AT91 + depends on ARCH_AT91 || COMPILE_TEST help This supports the use of the I2C interface on Atmel AT91 processors. @@ -440,7 +441,8 @@ config I2C_AXXIA config I2C_BCM2835 tristate "Broadcom BCM2835 I2C controller" - depends on ARCH_BCM2835 || ARCH_BRCMSTB + depends on ARCH_BCM2835 || ARCH_BRCMSTB || COMPILE_TEST + depends on COMMON_CLK help If you say yes to this option, support will be included for the BCM2835 I2C controller. @@ -463,8 +465,8 @@ config I2C_BCM_IPROC config I2C_BCM_KONA tristate "BCM Kona I2C adapter" - depends on ARCH_BCM_MOBILE - default y + depends on ARCH_BCM_MOBILE || COMPILE_TEST + default y if ARCH_BCM_MOBILE help If you say yes to this option, support will be included for the I2C interface on the Broadcom Kona family of processors. @@ -511,7 +513,7 @@ config I2C_CPM config I2C_DAVINCI tristate "DaVinci I2C driver" - depends on ARCH_DAVINCI || ARCH_KEYSTONE + depends on ARCH_DAVINCI || ARCH_KEYSTONE || COMPILE_TEST help Support for TI DaVinci I2C controller driver. @@ -572,7 +574,7 @@ config I2C_DESIGNWARE_BAYTRAIL config I2C_DIGICOLOR tristate "Conexant Digicolor I2C driver" - depends on ARCH_DIGICOLOR + depends on ARCH_DIGICOLOR || COMPILE_TEST help Support for Conexant Digicolor SoCs (CX92755) I2C controller driver. @@ -611,8 +613,9 @@ config I2C_EMEV2 config I2C_EXYNOS5 tristate "Exynos5 high-speed I2C driver" - depends on ARCH_EXYNOS && OF - default y + depends on OF + depends on ARCH_EXYNOS || COMPILE_TEST + default y if ARCH_EXYNOS help High-speed I2C controller on Exynos5 based Samsung SoCs. @@ -634,7 +637,7 @@ config I2C_GPIO_FAULT_INJECTOR config I2C_HIGHLANDER tristate "Highlander FPGA SMBus interface" - depends on SH_HIGHLANDER + depends on SH_HIGHLANDER || COMPILE_TEST help If you say yes to this option, support will be included for the SMBus interface located in the FPGA on various Highlander @@ -686,7 +689,7 @@ config I2C_IMX_LPI2C config I2C_IOP3XX tristate "Intel IOPx3xx and IXP4xx on-chip I2C interface" - depends on ARCH_IOP32X || ARCH_IXP4XX + depends on ARCH_IOP32X || ARCH_IXP4XX || COMPILE_TEST help Say Y here if you want to use the IIC bus controller on the Intel IOPx3xx I/O Processors or IXP4xx Network Processors. @@ -726,6 +729,7 @@ config I2C_LPC2K config I2C_MESON tristate "Amlogic Meson I2C controller" depends on ARCH_MESON || COMPILE_TEST + depends on COMMON_CLK help If you say yes to this option, support will be included for the I2C interface on the Amlogic Meson family of SoCs. @@ -759,7 +763,7 @@ config I2C_MT7621 config I2C_MV64XXX tristate "Marvell mv64xxx I2C Controller" - depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU + depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Marvell 64xxx line of host bridges. @@ -770,7 +774,7 @@ config I2C_MV64XXX config I2C_MXS tristate "Freescale i.MX28 I2C interface" - depends on SOC_IMX28 + depends on SOC_IMX28 || COMPILE_TEST select STMP_DEVICE help Say Y here if you want to use the I2C bus controller on @@ -799,7 +803,7 @@ config I2C_OCORES config I2C_OMAP tristate "OMAP I2C adapter" - depends on ARCH_OMAP || ARCH_K3 + depends on ARCH_OMAP || ARCH_K3 || COMPILE_TEST default y if MACH_OMAP_H3 || MACH_OMAP_OSK help If you say yes to this option, support will be included for the @@ -833,7 +837,7 @@ config I2C_PCA_PLATFORM config I2C_PMCMSP tristate "PMC MSP I2C TWI Controller" - depends on PMC_MSP + depends on PMC_MSP || COMPILE_TEST help This driver supports the PMC TWI controller on MSP devices. @@ -842,7 +846,7 @@ config I2C_PMCMSP config I2C_PNX tristate "I2C bus support for Philips PNX and NXP LPC targets" - depends on ARCH_LPC32XX + depends on ARCH_LPC32XX || COMPILE_TEST help This driver supports the Philips IP3204 I2C IP block master and/or slave controller @@ -863,7 +867,7 @@ config I2C_PUV3 config I2C_PXA tristate "Intel PXA2XX I2C adapter" - depends on ARCH_PXA || ARCH_MMP || ARCH_MVEBU || (X86_32 && PCI && OF) + depends on ARCH_PXA || ARCH_MMP || ARCH_MVEBU || (X86_32 && PCI && OF) || COMPILE_TEST help If you have devices in the PXA I2C bus, say yes to this option. This driver can also be built as a module. If so, the module @@ -933,7 +937,7 @@ config HAVE_S3C2410_I2C config I2C_S3C2410 tristate "S3C2410 I2C Driver" - depends on HAVE_S3C2410_I2C + depends on HAVE_S3C2410_I2C || COMPILE_TEST help Say Y here to include support for I2C controller in the Samsung SoCs. @@ -971,7 +975,7 @@ config I2C_SIMTEC config I2C_SIRF tristate "CSR SiRFprimaII I2C interface" - depends on ARCH_SIRF + depends on ARCH_SIRF || COMPILE_TEST help If you say yes to this option, support will be included for the CSR SiRFprimaII I2C interface. @@ -981,14 +985,14 @@ config I2C_SIRF config I2C_SPRD tristate "Spreadtrum I2C interface" - depends on I2C=y && ARCH_SPRD + depends on I2C=y && (ARCH_SPRD || COMPILE_TEST) help If you say yes to this option, support will be included for the Spreadtrum I2C interface. config I2C_ST tristate "STMicroelectronics SSC I2C support" - depends on ARCH_STI + depends on ARCH_STI || COMPILE_TEST help Enable this option to add support for STMicroelectronics SoCs hardware SSC (Synchronous Serial Controller) as an I2C controller. @@ -1019,7 +1023,7 @@ config I2C_STM32F7 config I2C_STU300 tristate "ST Microelectronics DDC I2C interface" - depends on MACH_U300 + depends on MACH_U300 || COMPILE_TEST default y if MACH_U300 help If you say yes to this option, support will be included for the @@ -1055,15 +1059,16 @@ config I2C_SYNQUACER config I2C_TEGRA tristate "NVIDIA Tegra internal I2C controller" - depends on ARCH_TEGRA + depends on ARCH_TEGRA || (COMPILE_TEST && (ARC || ARM || ARM64 || M68K || RISCV || SUPERH || SPARC)) + # COMPILE_TEST needs architectures with readsX()/writesX() primitives help If you say yes to this option, support will be included for the I2C controller embedded in NVIDIA Tegra SOCs config I2C_TEGRA_BPMP tristate "NVIDIA Tegra BPMP I2C controller" - depends on TEGRA_BPMP - default y + depends on TEGRA_BPMP || COMPILE_TEST + default y if TEGRA_BPMP help If you say yes to this option, support will be included for the I2C controller embedded in NVIDIA Tegra SoCs accessed via the BPMP. @@ -1101,7 +1106,7 @@ config I2C_VERSATILE config I2C_WMT tristate "Wondermedia WM8xxx SoC I2C bus support" - depends on ARCH_VT8500 + depends on ARCH_VT8500 || COMPILE_TEST help Say yes if you want to support the I2C bus on Wondermedia 8xxx-series SoCs. @@ -1142,7 +1147,7 @@ config I2C_XILINX config I2C_XLR tristate "Netlogic XLR and Sigma Designs I2C support" - depends on CPU_XLR || ARCH_TANGO + depends on CPU_XLR || ARCH_TANGO || COMPILE_TEST help This driver enables support for the on-chip I2C interface of the Netlogic XLR/XLS MIPS processors and Sigma Designs SOCs. @@ -1294,7 +1299,7 @@ config I2C_ICY config I2C_MLXCPLD tristate "Mellanox I2C driver" - depends on X86_64 + depends on X86_64 || COMPILE_TEST help This exposes the Mellanox platform I2C busses to the linux I2C layer for X86 based systems. From a009310fade9165483483b235360458f1286f32a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Jan 2020 09:12:59 +0100 Subject: [PATCH 248/658] i2c: exynos: Update Kconfig documentation Update the help text to reflect current support devices: 1. The Exynos high speed I2C driver supports Exynos5 (ARMv7), Exynos5433 and Exynos7 (both ARMv8) SoCs, 2. The S3C I2C driver supports S3C, S5Pv210 and Exynos{3,4,5} SoCs. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e6777e7dfecc..2ddca08f8a76 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -612,12 +612,12 @@ config I2C_EMEV2 I2C interface on the Renesas Electronics EM/EV family of processors. config I2C_EXYNOS5 - tristate "Exynos5 high-speed I2C driver" + tristate "Exynos high-speed I2C driver" depends on OF depends on ARCH_EXYNOS || COMPILE_TEST default y if ARCH_EXYNOS help - High-speed I2C controller on Exynos5 based Samsung SoCs. + High-speed I2C controller on Exynos5 and newer Samsung SoCs. config I2C_GPIO tristate "GPIO-based bitbanging I2C" @@ -936,11 +936,11 @@ config HAVE_S3C2410_I2C respective Kconfig file. config I2C_S3C2410 - tristate "S3C2410 I2C Driver" + tristate "S3C/Exynos I2C Driver" depends on HAVE_S3C2410_I2C || COMPILE_TEST help Say Y here to include support for I2C controller in the - Samsung SoCs. + Samsung SoCs (S3C, S5Pv210, Exynos). config I2C_SH7760 tristate "Renesas SH7760 I2C Controller" From c6cadc7538bb6df746f1613ee317b646230b2f5e Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 16 Jan 2020 10:56:34 +0800 Subject: [PATCH 249/658] dt-binding: eeprom: at24: add vcc-supply property In some platforms, they disable the power-supply of eeprom due to power consumption reduction. This patch add vcc-supply property. Signed-off-by: Bibby Hsieh Reviewed-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 767959941399..0f6d8db18d6c 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -164,6 +164,10 @@ properties: minimum: 1 maximum: 8 + vcc-supply: + description: + phandle of the regulator that provides the supply voltage. + required: - compatible - reg From cd5676db0574cc1c0b234bc3b17565b07290aa72 Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 23 Jan 2020 12:52:57 +0100 Subject: [PATCH 250/658] misc: eeprom: at24: support pm_runtime control Although in the most platforms, the power of eeprom are alway on, some platforms disable the eeprom power in order to meet low power request. This patch add the pm_runtime ops to control power to support all platforms. Signed-off-by: Bibby Hsieh [Bartosz: rebased on top of current at24/for-next] Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 896c1fe3c44b..031eb64549af 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* Address pointer is 16 bit. */ @@ -87,6 +88,7 @@ struct at24_data { u8 flags; struct nvmem_device *nvmem; + struct regulator *vcc_reg; /* * Some chips tie up multiple I2C addresses; dummy devices reserve @@ -656,6 +658,9 @@ static int at24_probe(struct i2c_client *client) at24->client[0].client = client; at24->client[0].regmap = regmap; + at24->vcc_reg = devm_regulator_get(dev, "vcc"); + if (IS_ERR(at24->vcc_reg)) + return PTR_ERR(at24->vcc_reg); writable = !(flags & AT24_FLAG_READONLY); if (writable) { @@ -692,6 +697,12 @@ static int at24_probe(struct i2c_client *client) i2c_set_clientdata(client, at24); + err = regulator_enable(at24->vcc_reg); + if (err) { + dev_err(dev, "Failed to enable vcc regulator\n"); + return err; + } + /* enable runtime pm */ pm_runtime_set_active(dev); pm_runtime_enable(dev); @@ -704,6 +715,7 @@ static int at24_probe(struct i2c_client *client) pm_runtime_idle(dev); if (err) { pm_runtime_disable(dev); + regulator_disable(at24->vcc_reg); return -ENODEV; } @@ -719,15 +731,42 @@ static int at24_probe(struct i2c_client *client) static int at24_remove(struct i2c_client *client) { + struct at24_data *at24 = i2c_get_clientdata(client); + pm_runtime_disable(&client->dev); + if (!pm_runtime_status_suspended(&client->dev)) + regulator_disable(at24->vcc_reg); pm_runtime_set_suspended(&client->dev); return 0; } +static int __maybe_unused at24_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct at24_data *at24 = i2c_get_clientdata(client); + + return regulator_disable(at24->vcc_reg); +} + +static int __maybe_unused at24_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct at24_data *at24 = i2c_get_clientdata(client); + + return regulator_enable(at24->vcc_reg); +} + +static const struct dev_pm_ops at24_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(at24_suspend, at24_resume, NULL) +}; + static struct i2c_driver at24_driver = { .driver = { .name = "at24", + .pm = &at24_pm_ops, .of_match_table = at24_of_match, .acpi_match_table = ACPI_PTR(at24_acpi_ids), }, From 3a6adf3263f43593c0df66a62ffed9daea562bfd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Jan 2020 14:40:46 +0100 Subject: [PATCH 251/658] ASoC: max98090: silence lockdep warning Commit 08df0d9a00f7 ("ASoC: max98090: revert "ASoC: max98090: fix lockdep warning"") provided a good rationale for removing separate lock for the SHDN register access. However it restored the lockdep warning during the system boot. To silence the lockdep warning, mark the mutex taken in the max98090_shdn_save() function with the lockdep class dedicated for the runtime DAPM operations: SND_SOC_DAPM_CLASS_RUNTIME. This finally fixes the following lockdep warning observed on Exynos4412-based Odroid U3 board: ====================================================== WARNING: possible circular locking dependency detected 5.5.0-rc7-next-20200123 #7329 Not tainted ------------------------------------------------------ alsactl/1105 is trying to acquire lock: ed4f7cf4 (&card->dapm_mutex){+.+.}, at: max98090_shdn_save+0x1c/0x28 but task is already holding lock: edb8d49c (&card->controls_rwsem){++++}, at: snd_ctl_ioctl+0xcc/0xbb8 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&card->controls_rwsem){++++}: snd_ctl_add_replace+0x3c/0x84 dapm_create_or_share_kcontrol+0x24c/0x2e0 snd_soc_dapm_new_widgets+0x308/0x594 snd_soc_bind_card+0x834/0xa94 devm_snd_soc_register_card+0x34/0x6c odroid_audio_probe+0x288/0x34c platform_drv_probe+0x6c/0xa4 really_probe+0x200/0x48c driver_probe_device+0x78/0x1f8 bus_for_each_drv+0x74/0xb8 __device_attach+0xd4/0x16c bus_probe_device+0x88/0x90 deferred_probe_work_func+0x3c/0xd0 process_one_work+0x230/0x7bc worker_thread+0x44/0x524 kthread+0x130/0x164 ret_from_fork+0x14/0x20 0x0 -> #0 (&card->dapm_mutex){+.+.}: lock_acquire+0xe8/0x270 __mutex_lock+0x9c/0xb18 mutex_lock_nested+0x1c/0x24 max98090_shdn_save+0x1c/0x28 max98090_put_enum_double+0x20/0x40 snd_ctl_ioctl+0x190/0xbb8 ksys_ioctl+0x484/0xb10 ret_fast_syscall+0x0/0x28 0xbede0564 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&card->controls_rwsem); lock(&card->dapm_mutex); lock(&card->controls_rwsem); lock(&card->dapm_mutex); *** DEADLOCK *** 1 lock held by alsactl/1105: #0: edb8d49c (&card->controls_rwsem){++++}, at: snd_ctl_ioctl+0xcc/0xbb8 stack backtrace: CPU: 2 PID: 1105 Comm: alsactl Not tainted 5.5.0-rc7-next-20200123 #7329 Hardware name: Samsung Exynos (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb4/0xe0) [] (dump_stack) from [] (check_noncircular+0x1ec/0x208) [] (check_noncircular) from [] (__lock_acquire+0x1210/0x25ec) [] (__lock_acquire) from [] (lock_acquire+0xe8/0x270) [] (lock_acquire) from [] (__mutex_lock+0x9c/0xb18) [] (__mutex_lock) from [] (mutex_lock_nested+0x1c/0x24) [] (mutex_lock_nested) from [] (max98090_shdn_save+0x1c/0x28) [] (max98090_shdn_save) from [] (max98090_put_enum_double+0x20/0x40) [] (max98090_put_enum_double) from [] (snd_ctl_ioctl+0x190/0xbb8) [] (snd_ctl_ioctl) from [] (ksys_ioctl+0x484/0xb10) [] (ksys_ioctl) from [] (ret_fast_syscall+0x0/0x28) Exception stack(0xed331fa8 to 0xed331ff0) ... Fixes: 08df0d9a00f7 ("ASoC: max98090: revert "ASoC: max98090: fix lockdep warning"") Signed-off-by: Marek Szyprowski Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20200123134046.9769-1-m.szyprowski@samsung.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 0313e1183167..5bc2c6411b33 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -52,7 +52,8 @@ static void max98090_shdn_restore_locked(struct max98090_priv *max98090) static void max98090_shdn_save(struct max98090_priv *max98090) { - mutex_lock(&max98090->component->card->dapm_mutex); + mutex_lock_nested(&max98090->component->card->dapm_mutex, + SND_SOC_DAPM_CLASS_RUNTIME); max98090_shdn_save_locked(max98090); } From e91440ddfbf4a5d7c541aed8c55c0ec76d9b26c7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 24 Jan 2020 08:57:50 -0700 Subject: [PATCH 252/658] ASoC: rt1015: Remove unnecessary const Clang warns: ../sound/soc/codecs/rt1015.c:392:14: warning: duplicate 'const' declaration specifier [-Wduplicate-decl-specifier] static const SOC_ENUM_SINGLE_DECL(rt1015_boost_mode_enum, 0, 0, ^ ../include/sound/soc.h:355:2: note: expanded from macro 'SOC_ENUM_SINGLE_DECL' SOC_ENUM_DOUBLE_DECL(name, xreg, xshift, xshift, xtexts) ^ ../include/sound/soc.h:352:2: note: expanded from macro 'SOC_ENUM_DOUBLE_DECL' const struct soc_enum name = SOC_ENUM_DOUBLE(xreg, xshift_l, xshift_r, \ ^ 1 warning generated. Remove the const after static to fix it. Fixes: df31007400c3 ("ASoC: rt1015: add rt1015 amplifier driver") Link: https://github.com/ClangBuiltLinux/linux/issues/845 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20200124155750.33753-1-natechancellor@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1015.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 4a9c5b54008f..6d490e2dbc25 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -389,7 +389,7 @@ static const char * const rt1015_boost_mode[] = { "Bypass", "Adaptive", "Fixed Adaptive" }; -static const SOC_ENUM_SINGLE_DECL(rt1015_boost_mode_enum, 0, 0, +static SOC_ENUM_SINGLE_DECL(rt1015_boost_mode_enum, 0, 0, rt1015_boost_mode); static int rt1015_boost_mode_get(struct snd_kcontrol *kcontrol, From f7b37b8b139860ed786bf8ce4358290c6053d94d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:06:34 -0500 Subject: [PATCH 253/658] NFS: Add softreval behaviour to nfs_lookup_revalidate() If the server is unavaliable, we want to allow the revalidating lookup to time out, and to default to validating the cached dentry if the 'softreval' mount option is set. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 15 +++++++++++---- fs/nfs/internal.h | 9 +++++++++ fs/nfs/namespace.c | 2 +- fs/nfs/nfs3proc.c | 17 +++++++++++------ fs/nfs/nfs4_fs.h | 6 ++++-- fs/nfs/nfs4namespace.c | 3 +-- fs/nfs/nfs4proc.c | 28 ++++++++++++++++++---------- fs/nfs/proc.c | 15 ++++++++++----- include/linux/nfs_xdr.h | 2 +- 9 files changed, 66 insertions(+), 31 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9405eeadc3f3..bfc66f3f00e1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1142,10 +1142,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, if (fhandle == NULL || fattr == NULL || IS_ERR(label)) goto out; - ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); if (ret < 0) { - if (ret == -ESTALE || ret == -ENOENT) + switch (ret) { + case -ESTALE: + case -ENOENT: ret = 0; + break; + case -ETIMEDOUT: + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + ret = 1; + } goto out; } ret = 0; @@ -1408,7 +1415,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in goto out; trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; if (error < 0) { @@ -1683,7 +1690,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL); if (error) goto out_error; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 17f082442804..f80c47d5ff27 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -33,6 +33,15 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) return 1; } +static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) +{ + if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL)) + return false; + if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size) + return false; + return true; +} + /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 4fd22c0d730c..ad6077404947 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -301,7 +301,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, ctx->mntfh, ctx->clone_data.fattr, NULL); dput(parent); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4c93a8bca7dc..a46d1d5d16d8 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -154,14 +154,14 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs3_proc_lookup(struct inode *dir, const struct qstr *name, +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs3_diropargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len + .name = dentry->d_name.name, + .len = dentry->d_name.len }; struct nfs3_diropres res = { .fh = fhandle, @@ -173,20 +173,25 @@ nfs3_proc_lookup(struct inode *dir, const struct qstr *name, .rpc_resp = &res, }; int status; + unsigned short task_flags = 0; + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + dprintk("NFS call lookup %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; - dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_argp = fhandle; msg.rpc_resp = fattr; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); } nfs_free_fattr(res.dir_attr); dprintk("NFS reply lookup: %d\n", status); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9c136d53987d..c3e669dcee0e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -302,8 +302,10 @@ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struc extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, struct page *page, const struct cred *); extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); -extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *, - struct nfs_fh *, struct nfs_fattr *); +extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, + struct dentry *, + struct nfs_fh *, + struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern const struct xattr_handler *nfs4_xattr_handlers[]; extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index de6875a9b391..3ea1c1008b5b 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -442,12 +442,11 @@ int nfs4_submount(struct fs_context *fc, struct nfs_server *server) struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); - const struct qstr *name = &dentry->d_name; struct rpc_clnt *client; int ret; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(dir, name, ctx->mntfh, + client = nfs4_proc_lookup_mountpoint(dir, dentry, ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (IS_ERR(client)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e178e2e7ad80..df38db2eee3b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4177,7 +4177,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, + struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_server *server = NFS_SERVER(dir); @@ -4185,7 +4185,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, .dir_fh = NFS_FH(dir), - .name = name, + .name = &dentry->d_name, }; struct nfs4_lookup_res res = { .server = server, @@ -4198,13 +4198,20 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; args.bitmask = nfs4_bitmask(server, label); nfs_fattr_init(fattr); - dprintk("NFS call lookup %s\n", name->name); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); + dprintk("NFS call lookup %pd2\n", dentry); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + status = nfs4_do_call_sync(clnt, server, &msg, + &args.seq_args, &res.seq_res, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -4218,16 +4225,17 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, + struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs4_exception exception = { .interruptible = true, }; struct rpc_clnt *client = *clnt; + const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); + err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4262,14 +4270,14 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name, +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4278,13 +4286,13 @@ static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name, } struct rpc_clnt * -nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name, +nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0451a094e89e..15c865cc837f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -152,14 +152,14 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct inode *dir, const struct qstr *name, +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len + .name = dentry->d_name.name, + .len = dentry->d_name.len }; struct nfs_diropok res = { .fh = fhandle, @@ -171,10 +171,15 @@ nfs_proc_lookup(struct inode *dir, const struct qstr *name, .rpc_resp = &res, }; int status; + unsigned short task_flags = 0; - dprintk("NFS call lookup %s\n", name->name); + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0a36c6f62b58..94c77ed55ce1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1661,7 +1661,7 @@ struct nfs_rpc_ops { struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct inode *, const struct qstr *, + int (*lookup) (struct inode *, struct dentry *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); int (*lookupp) (struct inode *, struct nfs_fh *, From b24ee6c64ca785739b3ef8d95fd6becaad1bde39 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 16 Dec 2019 16:34:02 -0500 Subject: [PATCH 254/658] NFS: allow deprecation of NFS UDP protocol Add a kernel config CONFIG_NFS_DISABLE_UDP_SUPPORT to disallow NFS UDP mounts and enable it by default. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 9 +++++++++ fs/nfs/client.c | 4 ++++ fs/nfs/fs_context.c | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index e7dd07f47825..40b6c5ac46c0 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -196,3 +196,12 @@ config NFS_DEBUG depends on NFS_FS && SUNRPC_DEBUG select CRC32 default y + +config NFS_DISABLE_UDP_SUPPORT + bool "NFS: Disable NFS UDP protocol support" + depends on NFS_FS + default y + help + Choose Y here to disable the use of NFS over UDP. NFS over UDP + on modern networks (1Gb+) can lead to data corruption caused by + fragmentation during high loads. diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8f760f23748c..989c30c98511 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -474,6 +474,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = to->to_initval; to->to_exponential = 0; break; +#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_UDP_RETRANS; @@ -484,6 +485,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; +#endif default: BUG(); } @@ -580,8 +582,10 @@ static int nfs_start_lockd(struct nfs_server *server) default: nlm_init.protocol = IPPROTO_TCP; break; +#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: nlm_init.protocol = IPPROTO_UDP; +#endif } host = nlmclnt_init(&nlm_init); diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 0247dcb7b316..dfd95847f005 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -1215,6 +1215,10 @@ static int nfs_fs_context_validate(struct fs_context *fc) } } else { nfs_set_mount_transport_protocol(ctx); +#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; +#endif if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; } From 3a21409a0b4bce3171001b168ca42285004c873c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jan 2020 15:55:09 +0000 Subject: [PATCH 255/658] nfs: Return EINVAL rather than ERANGE for mount parse errors Return EINVAL rather than ERANGE for mount parse errors as the userspace mount command doesn't necessarily understand what to do with anything other than EINVAL. The old code returned -ERANGE as an intermediate error that then get converted to -EINVAL, whereas the new code returns -ERANGE. This was induced by passing minorversion=1 to a v4 mount where CONFIG_NFS_V4_1 was disabled in the kernel build. Fixes: 68f65ef40e1e ("NFS: Convert mount option parsing to use functionality from fs_parser.h") Reported-by: Krzysztof Kozlowski Signed-off-by: David Howells Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index dfd95847f005..2c6dc1b6cc92 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -779,8 +779,7 @@ out_invalid_value: out_invalid_address: return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: - nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); - return -ERANGE; + return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); } /* From dfb6cd1e654315168e36d947471bd2a0ccd834ae Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 24 Jan 2020 17:47:49 -0500 Subject: [PATCH 256/658] tracing: Fix very unlikely race of registering two stat tracers Looking through old emails in my INBOX, I came across a patch from Luis Henriques that attempted to fix a race of two stat tracers registering the same stat trace (extremely unlikely, as this is done in the kernel, and probably doesn't even exist). The submitted patch wasn't quite right as it needed to deal with clean up a bit better (if two stat tracers were the same, it would have the same files). But to make the code cleaner, all we needed to do is to keep the all_stat_sessions_mutex held for most of the registering function. Link: http://lkml.kernel.org/r/1410299375-20068-1-git-send-email-luis.henriques@canonical.com Fixes: 002bb86d8d42f ("tracing/ftrace: separate events tracing and stats tracing engine") Reported-by: Luis Henriques Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stat.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 874f1274cf99..da8a38c3d5e4 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -304,7 +304,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -315,17 +315,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -334,15 +332,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) From afccc00f75bbbee4e4ae833a96c2d29a7259c693 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Tue, 9 Sep 2014 22:49:41 +0100 Subject: [PATCH 257/658] tracing: Fix tracing_stat return values in error handling paths tracing_stat_init() was always returning '0', even on the error paths. It now returns -ENODEV if tracing_init_dentry() fails or -ENOMEM if it fails to created the 'trace_stat' debugfs directory. Link: http://lkml.kernel.org/r/1410299381-20108-1-git-send-email-luis.henriques@canonical.com Fixes: ed6f1c996bfe4 ("tracing: Check return value of tracing_init_dentry()") Signed-off-by: Luis Henriques [ Pulled from the archeological digging of my INBOX ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stat.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index da8a38c3d5e4..d1fa19773cc8 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -280,18 +280,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, From cbc3b92ce037f5e7536f6db157d185cd8b8f615c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Sep 2014 16:14:12 -0400 Subject: [PATCH 258/658] tracing: Set kernel_stack's caller size properly I noticed when trying to use the trace-cmd python interface that reading the raw buffer wasn't working for kernel_stack events. This is because it uses a stubbed version of __dynamic_array that doesn't do the __data_loc trick and encode the length of the array into the field. Instead it just shows up as a size of 0. So change this to __array and set the len to FTRACE_STACK_ENTRIES since this is what we actually do in practice and matches how user_stack_trace works. Link: http://lkml.kernel.org/r/1411589652-1318-1-git-send-email-jbacik@fb.com Signed-off-by: Josef Bacik [ Pulled from the archeological digging of my INBOX ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_entries.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index fc8e97328e54..78c146efb862 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -174,7 +174,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry, F_STRUCT( __field( int, size ) - __dynamic_array(unsigned long, caller ) + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) ), F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n" From b3f7a6cd490112085eadb578b6f4a5a34d140726 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 22 Nov 2014 21:30:12 +0300 Subject: [PATCH 259/658] tracing: Remove unneeded NULL check We checked "iter->trace" earlier so there is no need to check here. Link: http://lkml.kernel.org/r/20141122183012.GB6994@mwanda Signed-off-by: Dan Carpenter [ Pulled from the archeological digging of my INBOX ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d1410b4462ac..6fed9b0a8d58 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4224,7 +4224,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) mutex_init(&iter->mutex); /* Notify the tracer early; before we stop tracing. */ - if (iter->trace && iter->trace->open) + if (iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ From eba12ab71e71a2f82836c517596b6bd12664325c Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Tue, 3 Mar 2015 21:48:18 +0000 Subject: [PATCH 260/658] tracing: Fix comments about trace/ftrace.h commit f42c85e74faa422cf0bc747ed808681145448f88 moved tracepoint's ftrace creation into include/trace/ftrace.h and trace/define_trace.h was deleted as a result. However some comment info does not adapt to the change, which is such a misguiding when reading related code. This patch fix this by moving trace/trace_events.h to , since tracepoint headers have already been moved to tarce/events/. Link: http://lkml.kernel.org/r/1425419298-61941-1-git-send-email-houpengyang@huawei.com Signed-off-by: Hou Pengyang [ Pulled from the archeological digging of my INBOX ] Signed-off-by: Steven Rostedt (VMware) --- include/trace/trace_events.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 13a58d453992..831048507fef 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -2,7 +2,8 @@ /* * Stage 1 of the trace events. * - * Override the macros in to include the following: + * Override the macros in the event tracepoint header + * to include the following: * * struct trace_event_raw_ { * struct trace_entry ent; @@ -223,7 +224,8 @@ TRACE_MAKE_SYSTEM_STR(); /* * Stage 3 of the trace events. * - * Override the macros in to include the following: + * Override the macros in the event tracepoint header + * to include the following: * * enum print_line_t * trace_raw_output_(struct trace_iterator *iter, int flags) @@ -555,7 +557,8 @@ static inline notrace int trace_event_get_offsets_##call( \ /* * Stage 4 of the trace events. * - * Override the macros in to include the following: + * Override the macros in the event tracepoint header + * to include the following: * * For those macros defined with TRACE_EVENT: * From 28394da25888168df379c40910591b95e8e449f7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 24 Jan 2020 20:47:46 -0500 Subject: [PATCH 261/658] tracing: Decrement trace_array when bootconfig creates an instance The trace_array_get_by_name() creates a ftrace instance and trace_array_put() is used to remove the reference. Even though the trace_array_get_by_name() creates the instance, it also adds a reference count to it, that prevents user space from removing it. As the bootconfig just creates the instance on boot up, it should still be used where it can be deleted by user space after boot. A trace_array_put() is required to let that happen. Also, change the documentation on trace_array_get_by_name() to make this not be so confusing. Link: https://lore.kernel.org/r/20200124205927.76128804@rorschach.local.home Fixes: 4f712a4d04a4e ("tracing/boot: Add instance node support") Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 ++++ kernel/trace/trace_boot.c | 1 + 2 files changed, 5 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6fed9b0a8d58..0a5569b1cace 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8602,6 +8602,10 @@ out_unlock: * NOTE: This function increments the reference counter associated with the * trace array returned. This makes sure it cannot be freed while in use. * Use trace_array_put() once the trace array is no longer needed. + * If the trace_array is to be freed, trace_array_destroy() needs to + * be called after the trace_array_put(), or simply let user space delete + * it from the tracefs instances directory. But until the + * trace_array_put() is called, user space can not delete it. * */ struct trace_array *trace_array_get_by_name(const char *name) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index cd541ac1cbc1..2f616cd926b0 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -327,6 +327,7 @@ trace_boot_init_instances(struct xbc_node *node) continue; } trace_boot_init_one_instance(tr, inode); + trace_array_put(tr); } } From 24589e3a20876dc07c62f45c8f8f8266dd39ba38 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sat, 25 Jan 2020 10:52:30 -0500 Subject: [PATCH 262/658] tracing: Use pr_err() instead of WARN() for memory failures As warnings can trigger panics, especially when "panic_on_warn" is set, memory failure warnings can cause panics and fail fuzz testers that are stressing memory. Create a MEM_FAIL() macro to use instead of WARN() in the tracing code (perhaps this should be a kernel wide macro?), and use that for memory failure issues. This should stop failing fuzz tests due to warnings. Link: https://lore.kernel.org/r/CACT4Y+ZP-7np20GVRu3p+eZys9GPtbu+JpfV+HtsufAzvTgJrg@mail.gmail.com Suggested-by: Dmitry Vyukov Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace.c | 18 +++++++++--------- kernel/trace/trace.h | 12 ++++++++++++ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5c701765da5b..fdb1a9532420 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5459,7 +5459,7 @@ static void __init set_ftrace_early_graph(char *buf, int enable) struct ftrace_hash *hash; hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); - if (WARN_ON(!hash)) + if (MEM_FAIL(!hash, "Failed to allocate hash\n")) return; while (buf) { @@ -6591,7 +6591,7 @@ static void add_to_clear_hash_list(struct list_head *clear_list, func = kmalloc(sizeof(*func), GFP_KERNEL); if (!func) { - WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + MEM_FAIL(1, "alloc failure, ftrace filter could be stale\n"); return; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0a5569b1cace..6a28b1b9bf42 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3126,7 +3126,7 @@ static int alloc_percpu_trace_buffer(void) struct trace_buffer_struct *buffers; buffers = alloc_percpu(struct trace_buffer_struct); - if (WARN(!buffers, "Could not allocate percpu trace_printk buffer")) + if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) return -ENOMEM; trace_percpu_buffer = buffers; @@ -7932,7 +7932,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); - WARN_ONCE(!tr->percpu_dir, + MEM_FAIL(!tr->percpu_dir, "Could not create tracefs directory 'per_cpu/%d'\n", cpu); return tr->percpu_dir; @@ -8253,7 +8253,7 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer) for (cnt = 0; opts[cnt].name; cnt++) { create_trace_option_file(tr, &topts[cnt], flags, &opts[cnt]); - WARN_ONCE(topts[cnt].entry == NULL, + MEM_FAIL(topts[cnt].entry == NULL, "Failed to create trace option: %s", opts[cnt].name); } @@ -8437,7 +8437,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) #ifdef CONFIG_TRACER_MAX_TRACE ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); - if (WARN_ON(ret)) { + if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { ring_buffer_free(tr->array_buffer.buffer); tr->array_buffer.buffer = NULL; free_percpu(tr->array_buffer.data); @@ -8726,7 +8726,7 @@ static __init void create_trace_instances(struct dentry *d_tracer) trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, instance_mkdir, instance_rmdir); - if (WARN_ON(!trace_instance_dir)) + if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) return; } @@ -8796,7 +8796,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) #endif if (ftrace_create_function_files(tr, d_tracer)) - WARN(1, "Could not allocate function filter files"); + MEM_FAIL(1, "Could not allocate function filter files"); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, @@ -9348,8 +9348,7 @@ __init static int tracer_alloc_buffers(void) /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { - printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); - WARN_ON(1); + MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); goto out_free_savedcmd; } @@ -9422,7 +9421,8 @@ void __init early_trace_init(void) if (tracepoint_printk) { tracepoint_print_iter = kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); - if (WARN_ON(!tracepoint_print_iter)) + if (MEM_FAIL(!tracepoint_print_iter, + "Failed to allocate trace iterator\n")) tracepoint_printk = 0; else static_key_enable(&tracepoint_printk_key.key); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4812a36affac..6bb64d89c321 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -94,6 +94,18 @@ enum trace_type { #include "trace_entries.h" +/* Use this for memory failure errors */ +#define MEM_FAIL(condition, fmt, ...) ({ \ + static bool __section(.data.once) __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + pr_err("ERROR: " fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_warn_once); \ +}) + /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h From 32dff5e5d1b5889b068e411d490453834c639701 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:15 -0800 Subject: [PATCH 263/658] xfs: make xfs_buf_alloc return an error code Convert _xfs_buf_alloc() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_buf.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a0229c368e78..f9a6cf71f4ab 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -198,20 +198,22 @@ xfs_buf_free_maps( } } -static struct xfs_buf * +static int _xfs_buf_alloc( struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags) + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { struct xfs_buf *bp; int error; int i; + *bpp = NULL; bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); if (unlikely(!bp)) - return NULL; + return -ENOMEM; /* * We don't want certain flags to appear in b_flags unless they are @@ -239,7 +241,7 @@ _xfs_buf_alloc( error = xfs_buf_get_maps(bp, nmaps); if (error) { kmem_cache_free(xfs_buf_zone, bp); - return NULL; + return error; } bp->b_bn = map[0].bm_bn; @@ -256,7 +258,8 @@ _xfs_buf_alloc( XFS_STATS_INC(bp->b_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); - return bp; + *bpp = bp; + return 0; } /* @@ -715,8 +718,8 @@ xfs_buf_get_map( return NULL; } - new_bp = _xfs_buf_alloc(target, map, nmaps, flags); - if (unlikely(!new_bp)) + error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); + if (error) return NULL; error = xfs_buf_allocate_memory(new_bp, flags); @@ -917,8 +920,8 @@ xfs_buf_get_uncached( DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); /* flags might contain irrelevant bits, pass only what we care about */ - bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT); - if (unlikely(bp == NULL)) + error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); + if (error) goto fail; page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; From 3848b5f6709221b7ab52c9639baee50dc4b006bb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:15 -0800 Subject: [PATCH 264/658] xfs: make xfs_buf_get_map return an error code Convert xfs_buf_get_map() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_buf.c | 44 +++++++++++++++--------------------------- fs/xfs/xfs_buf.h | 13 +++++++++---- fs/xfs/xfs_trans_buf.c | 14 +++++++++----- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f9a6cf71f4ab..5c07b4a70026 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -685,53 +685,39 @@ xfs_buf_incore( * cache hits, as metadata intensive workloads will see 3 orders of magnitude * more hits than misses. */ -struct xfs_buf * +int xfs_buf_get_map( struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags) + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { struct xfs_buf *bp; struct xfs_buf *new_bp; int error = 0; + *bpp = NULL; error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); - - switch (error) { - case 0: - /* cache hit */ + if (!error) goto found; - case -EAGAIN: - /* cache hit, trylock failure, caller handles failure */ - ASSERT(flags & XBF_TRYLOCK); - return NULL; - case -ENOENT: - /* cache miss, go for insert */ - break; - case -EFSCORRUPTED: - default: - /* - * None of the higher layers understand failure types - * yet, so return NULL to signal a fatal lookup error. - */ - return NULL; - } + if (error != -ENOENT) + return error; error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); if (error) - return NULL; + return error; error = xfs_buf_allocate_memory(new_bp, flags); if (error) { xfs_buf_free(new_bp); - return NULL; + return error; } error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); if (error) { xfs_buf_free(new_bp); - return NULL; + return error; } if (bp != new_bp) @@ -744,7 +730,7 @@ found: xfs_warn(target->bt_mount, "%s: failed to map pagesn", __func__); xfs_buf_relse(bp); - return NULL; + return error; } } @@ -757,7 +743,8 @@ found: XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); - return bp; + *bpp = bp; + return 0; } STATIC int @@ -818,11 +805,12 @@ xfs_buf_read_map( const struct xfs_buf_ops *ops) { struct xfs_buf *bp; + int error; flags |= XBF_READ; - bp = xfs_buf_get_map(target, map, nmaps, flags); - if (!bp) + error = xfs_buf_get_map(target, map, nmaps, flags, &bp); + if (error) return NULL; trace_xfs_buf_read(bp, flags, _RET_IP_); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 56e081dd1d96..25dd2aa4322b 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -192,9 +192,8 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags); -struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, - struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags); +int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp); struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, @@ -209,8 +208,14 @@ xfs_buf_get( xfs_daddr_t blkno, size_t numblks) { + struct xfs_buf *bp; + int error; DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return xfs_buf_get_map(target, &map, 1, 0); + + error = xfs_buf_get_map(target, &map, 1, 0, &bp); + if (error) + return NULL; + return bp; } static inline struct xfs_buf * diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index b5b3a78ef31c..288333fef13a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -122,9 +122,14 @@ xfs_trans_get_buf_map( { xfs_buf_t *bp; struct xfs_buf_log_item *bip; + int error; - if (!tp) - return xfs_buf_get_map(target, map, nmaps, flags); + if (!tp) { + error = xfs_buf_get_map(target, map, nmaps, flags, &bp); + if (error) + return NULL; + return bp; + } /* * If we find the buffer in the cache with this transaction @@ -149,10 +154,9 @@ xfs_trans_get_buf_map( return bp; } - bp = xfs_buf_get_map(target, map, nmaps, flags); - if (bp == NULL) { + error = xfs_buf_get_map(target, map, nmaps, flags, &bp); + if (error) return NULL; - } ASSERT(!bp->b_error); From 4ed8e27b4f755f50d78dc3d9f9760b60e891f97b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:16 -0800 Subject: [PATCH 265/658] xfs: make xfs_buf_read_map return an error code Convert xfs_buf_read_map() to return numeric error codes like most everywhere else in xfs. This involves moving the open-coded logic that reports metadata IO read / corruption errors and stales the buffer into xfs_buf_read_map so that the logic is all in one place. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 11 +++--- fs/xfs/libxfs/xfs_attr_remote.c | 10 ------ fs/xfs/xfs_buf.c | 63 ++++++++++++++++++++++++--------- fs/xfs/xfs_buf.h | 15 +++++--- fs/xfs/xfs_log_recover.c | 10 ------ fs/xfs/xfs_symlink.c | 10 ------ fs/xfs/xfs_trans_buf.c | 36 +++++-------------- 7 files changed, 72 insertions(+), 83 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index fc93fd88ec89..4cc10aa43edf 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2956,14 +2956,17 @@ xfs_read_agf( trace_xfs_read_agf(mp, agno); ASSERT(agno != NULLAGNUMBER); - error = xfs_trans_read_buf( - mp, tp, mp->m_ddev_targp, + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); + /* + * Callers of xfs_read_agf() currently interpret a NULL bpp as EAGAIN + * and need to be converted to check for EAGAIN specifically. + */ + if (error == -EAGAIN) + return 0; if (error) return error; - if (!*bpp) - return 0; ASSERT(!(*bpp)->b_error); xfs_buf_set_ref(*bpp, XFS_AGF_REF); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index a266d05df146..88e50e904436 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -422,16 +422,6 @@ xfs_attr_rmtval_get( &xfs_attr3_rmt_buf_ops); if (!bp) return -ENOMEM; - error = bp->b_error; - if (error) { - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_relse(bp); - - /* bad CRC means corrupted metadata */ - if (error == -EFSBADCRC) - error = -EFSCORRUPTED; - return error; - } error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, &offset, &valuelen, diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 5c07b4a70026..871abaabff3d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -796,47 +796,76 @@ xfs_buf_reverify( return bp->b_error; } -xfs_buf_t * +int xfs_buf_read_map( struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, + struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { struct xfs_buf *bp; int error; flags |= XBF_READ; + *bpp = NULL; error = xfs_buf_get_map(target, map, nmaps, flags, &bp); if (error) - return NULL; + return error; trace_xfs_buf_read(bp, flags, _RET_IP_); if (!(bp->b_flags & XBF_DONE)) { + /* Initiate the buffer read and wait. */ XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; - _xfs_buf_read(bp, flags); - return bp; + error = _xfs_buf_read(bp, flags); + + /* Readahead iodone already dropped the buffer, so exit. */ + if (flags & XBF_ASYNC) + return 0; + } else { + /* Buffer already read; all we need to do is check it. */ + error = xfs_buf_reverify(bp, ops); + + /* Readahead already finished; drop the buffer and exit. */ + if (flags & XBF_ASYNC) { + xfs_buf_relse(bp); + return 0; + } + + /* We do not want read in the flags */ + bp->b_flags &= ~XBF_READ; + ASSERT(bp->b_ops != NULL || ops == NULL); } - xfs_buf_reverify(bp, ops); + /* + * If we've had a read error, then the contents of the buffer are + * invalid and should not be used. To ensure that a followup read tries + * to pull the buffer from disk again, we clear the XBF_DONE flag and + * mark the buffer stale. This ensures that anyone who has a current + * reference to the buffer will interpret it's contents correctly and + * future cache lookups will also treat it as an empty, uninitialised + * buffer. + */ + if (error) { + if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) + xfs_buf_ioerror_alert(bp, __func__); - if (flags & XBF_ASYNC) { - /* - * Read ahead call which is already satisfied, - * drop the buffer - */ + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); xfs_buf_relse(bp); - return NULL; + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; + return error; } - /* We do not want read in the flags */ - bp->b_flags &= ~XBF_READ; - ASSERT(bp->b_ops != NULL || ops == NULL); - return bp; + *bpp = bp; + return 0; } /* @@ -850,11 +879,13 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { + struct xfs_buf *bp; + if (bdi_read_congested(target->bt_bdev->bd_bdi)) return; xfs_buf_read_map(target, map, nmaps, - XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops); + XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops); } /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 25dd2aa4322b..f58147354b02 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -194,10 +194,9 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp); -struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target, - struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags, - const struct xfs_buf_ops *ops); +int xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); void xfs_buf_readahead_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, const struct xfs_buf_ops *ops); @@ -226,8 +225,14 @@ xfs_buf_read( xfs_buf_flags_t flags, const struct xfs_buf_ops *ops) { + struct xfs_buf *bp; + int error; DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return xfs_buf_read_map(target, &map, 1, flags, ops); + + error = xfs_buf_read_map(target, &map, 1, flags, &bp, ops); + if (error) + return NULL; + return bp; } static inline void diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0d683fb96396..c805a02f0078 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2749,11 +2749,6 @@ xlog_recover_buffer_pass2( buf_flags, NULL); if (!bp) return -ENOMEM; - error = bp->b_error; - if (error) { - xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); - goto out_release; - } /* * Recover the buffer only if we get an LSN from it and it's less than @@ -2956,11 +2951,6 @@ xlog_recover_inode_pass2( error = -ENOMEM; goto error; } - error = bp->b_error; - if (error) { - xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); - goto out_release; - } ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); dip = xfs_buf_offset(bp, in_f->ilf_boffset); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index a25502bc2071..b255a393a73b 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -57,16 +57,6 @@ xfs_readlink_bmap_ilocked( &xfs_symlink_buf_ops); if (!bp) return -ENOMEM; - error = bp->b_error; - if (error) { - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_relse(bp); - - /* bad CRC means corrupted metadata */ - if (error == -EFSBADCRC) - error = -EFSCORRUPTED; - goto out; - } byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); if (pathlen < byte_cnt) byte_cnt = pathlen; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 288333fef13a..cdb66c661425 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -302,36 +302,16 @@ xfs_trans_read_buf_map( return 0; } - bp = xfs_buf_read_map(target, map, nmaps, flags, ops); - if (!bp) { - if (!(flags & XBF_TRYLOCK)) - return -ENOMEM; - return tp ? 0 : -EAGAIN; - } - - /* - * If we've had a read error, then the contents of the buffer are - * invalid and should not be used. To ensure that a followup read tries - * to pull the buffer from disk again, we clear the XBF_DONE flag and - * mark the buffer stale. This ensures that anyone who has a current - * reference to the buffer will interpret it's contents correctly and - * future cache lookups will also treat it as an empty, uninitialised - * buffer. - */ - if (bp->b_error) { - error = bp->b_error; - if (!XFS_FORCED_SHUTDOWN(mp)) - xfs_buf_ioerror_alert(bp, __func__); - bp->b_flags &= ~XBF_DONE; - xfs_buf_stale(bp); - + error = xfs_buf_read_map(target, map, nmaps, flags, &bp, ops); + switch (error) { + case 0: + break; + default: if (tp && (tp->t_flags & XFS_TRANS_DIRTY)) xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); - xfs_buf_relse(bp); - - /* bad CRC means corrupted metadata */ - if (error == -EFSBADCRC) - error = -EFSCORRUPTED; + /* fall through */ + case -ENOMEM: + case -EAGAIN: return error; } From 841263e93310595c30653a9f530b2d7bbeed5aae Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:16 -0800 Subject: [PATCH 266/658] xfs: make xfs_buf_get return an error code Convert xfs_buf_get() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_attr_remote.c | 6 +++--- fs/xfs/libxfs/xfs_sb.c | 8 ++++---- fs/xfs/xfs_buf.h | 12 ++++-------- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 88e50e904436..7266e280b3e8 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -545,9 +545,9 @@ xfs_attr_rmtval_set( dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); - if (!bp) - return -ENOMEM; + error = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, &bp); + if (error) + return error; bp->b_ops = &xfs_attr3_rmt_buf_ops; xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 0ac69751fe85..6fdd007f81ab 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -985,9 +985,9 @@ xfs_update_secondary_sbs( for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) { struct xfs_buf *bp; - bp = xfs_buf_get(mp->m_ddev_targp, + error = xfs_buf_get(mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), - XFS_FSS_TO_BB(mp, 1)); + XFS_FSS_TO_BB(mp, 1), &bp); /* * If we get an error reading or writing alternate superblocks, * continue. xfs_repair chooses the "best" superblock based @@ -995,12 +995,12 @@ xfs_update_secondary_sbs( * superblocks un-updated than updated, and xfs_repair may * pick them over the properly-updated primary. */ - if (!bp) { + if (error) { xfs_warn(mp, "error allocating secondary superblock for ag %d", agno); if (!saved_error) - saved_error = -ENOMEM; + saved_error = error; continue; } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index f58147354b02..c3aa4e322243 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -201,20 +201,16 @@ void xfs_buf_readahead_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, const struct xfs_buf_ops *ops); -static inline struct xfs_buf * +static inline int xfs_buf_get( struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks) + size_t numblks, + struct xfs_buf **bpp) { - struct xfs_buf *bp; - int error; DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - error = xfs_buf_get_map(target, &map, 1, 0, &bp); - if (error) - return NULL; - return bp; + return xfs_buf_get_map(target, &map, 1, 0, bpp); } static inline struct xfs_buf * From 2842b6db3d539bec08d080b22635b6e8acaa30ec Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:17 -0800 Subject: [PATCH 267/658] xfs: make xfs_buf_get_uncached return an error code Convert xfs_buf_get_uncached() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.c | 21 ++++++++++++--------- fs/xfs/xfs_buf.c | 25 ++++++++++++++++--------- fs/xfs/xfs_buf.h | 4 ++-- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 14fbdf22b7e7..08d6beb54f8c 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -23,25 +23,28 @@ #include "xfs_ag_resv.h" #include "xfs_health.h" -static struct xfs_buf * +static int xfs_get_aghdr_buf( struct xfs_mount *mp, xfs_daddr_t blkno, size_t numblks, + struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { struct xfs_buf *bp; + int error; - bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0); - if (!bp) - return NULL; + error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp); + if (error) + return error; xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); bp->b_bn = blkno; bp->b_maps[0].bm_bn = blkno; bp->b_ops = ops; - return bp; + *bpp = bp; + return 0; } static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) @@ -340,13 +343,13 @@ xfs_ag_init_hdr( struct aghdr_init_data *id, aghdr_init_work_f work, const struct xfs_buf_ops *ops) - { struct xfs_buf *bp; + int error; - bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops); - if (!bp) - return -ENOMEM; + error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops); + if (error) + return error; (*work)(mp, bp, id); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 871abaabff3d..b420e865b32e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -902,12 +902,13 @@ xfs_buf_read_uncached( const struct xfs_buf_ops *ops) { struct xfs_buf *bp; + int error; *bpp = NULL; - bp = xfs_buf_get_uncached(target, numblks, flags); - if (!bp) - return -ENOMEM; + error = xfs_buf_get_uncached(target, numblks, flags, &bp); + if (error) + return error; /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); @@ -918,7 +919,7 @@ xfs_buf_read_uncached( xfs_buf_submit(bp); if (bp->b_error) { - int error = bp->b_error; + error = bp->b_error; xfs_buf_relse(bp); return error; } @@ -927,17 +928,20 @@ xfs_buf_read_uncached( return 0; } -xfs_buf_t * +int xfs_buf_get_uncached( struct xfs_buftarg *target, size_t numblks, - int flags) + int flags, + struct xfs_buf **bpp) { unsigned long page_count; int error, i; struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + *bpp = NULL; + /* flags might contain irrelevant bits, pass only what we care about */ error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); if (error) @@ -950,8 +954,10 @@ xfs_buf_get_uncached( for (i = 0; i < page_count; i++) { bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); - if (!bp->b_pages[i]) + if (!bp->b_pages[i]) { + error = -ENOMEM; goto fail_free_mem; + } } bp->b_flags |= _XBF_PAGES; @@ -963,7 +969,8 @@ xfs_buf_get_uncached( } trace_xfs_buf_get_uncached(bp, _RET_IP_); - return bp; + *bpp = bp; + return 0; fail_free_mem: while (--i >= 0) @@ -973,7 +980,7 @@ xfs_buf_get_uncached( xfs_buf_free_maps(bp); kmem_cache_free(xfs_buf_zone, bp); fail: - return NULL; + return error; } /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c3aa4e322243..7f7bd1edd99e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -242,8 +242,8 @@ xfs_buf_readahead( return xfs_buf_readahead_map(target, &map, 1, ops); } -struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, - int flags); +int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags, + struct xfs_buf **bpp); int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, int flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); From 0e3eccce5e0e438bc1aa3c2913221d3d43a1bef4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:17 -0800 Subject: [PATCH 268/658] xfs: make xfs_buf_read return an error code Convert xfs_buf_read() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_attr_remote.c | 8 ++++---- fs/xfs/xfs_buf.h | 10 +++------- fs/xfs/xfs_log_recover.c | 16 +++++++--------- fs/xfs/xfs_symlink.c | 8 ++++---- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 7266e280b3e8..8b7f74b3bea2 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -418,10 +418,10 @@ xfs_attr_rmtval_get( (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, - &xfs_attr3_rmt_buf_ops); - if (!bp) - return -ENOMEM; + error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, + 0, &bp, &xfs_attr3_rmt_buf_ops); + if (error) + return error; error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, &offset, &valuelen, diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7f7bd1edd99e..aa145ad25e9a 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -213,22 +213,18 @@ xfs_buf_get( return xfs_buf_get_map(target, &map, 1, 0, bpp); } -static inline struct xfs_buf * +static inline int xfs_buf_read( struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags, + struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { - struct xfs_buf *bp; - int error; DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - error = xfs_buf_read_map(target, &map, 1, flags, &bp, ops); - if (error) - return NULL; - return bp; + return xfs_buf_read_map(target, &map, 1, flags, bpp, ops); } static inline void diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index c805a02f0078..ac79537d3275 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2745,10 +2745,10 @@ xlog_recover_buffer_pass2( if (buf_f->blf_flags & XFS_BLF_INODE_BUF) buf_flags |= XBF_UNMAPPED; - bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, - buf_flags, NULL); - if (!bp) - return -ENOMEM; + error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, + buf_flags, &bp, NULL); + if (error) + return error; /* * Recover the buffer only if we get an LSN from it and it's less than @@ -2945,12 +2945,10 @@ xlog_recover_inode_pass2( } trace_xfs_log_recover_inode_recover(log, in_f); - bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, - &xfs_inode_buf_ops); - if (!bp) { - error = -ENOMEM; + error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, + 0, &bp, &xfs_inode_buf_ops); + if (error) goto error; - } ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); dip = xfs_buf_offset(bp, in_f->ilf_boffset); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index b255a393a73b..b94d7b9b55d0 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -53,10 +53,10 @@ xfs_readlink_bmap_ilocked( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, - &xfs_symlink_buf_ops); - if (!bp) - return -ENOMEM; + error = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, + &bp, &xfs_symlink_buf_ops); + if (error) + return error; byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); if (pathlen < byte_cnt) byte_cnt = pathlen; From 9676b54e6e28689af1b4247569f14466bdfc5390 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:18 -0800 Subject: [PATCH 269/658] xfs: make xfs_trans_get_buf_map return an error code Convert xfs_trans_get_buf_map() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_da_btree.c | 8 ++------ fs/xfs/xfs_trans.h | 15 ++++++++++----- fs/xfs/xfs_trans_buf.c | 22 +++++++++++----------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 8c3eafe280ed..875e04f82541 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2591,13 +2591,9 @@ xfs_da_get_buf( if (error || nmap == 0) goto out_free; - bp = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0); - error = bp ? bp->b_error : -EIO; - if (error) { - if (bp) - xfs_trans_brelse(tp, bp); + error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp); + if (error) goto out_free; - } *bpp = bp; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 64d7f171ebd3..a0be934ec811 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -169,10 +169,9 @@ int xfs_trans_alloc_empty(struct xfs_mount *mp, struct xfs_trans **tpp); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); -struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, - struct xfs_buftarg *target, - struct xfs_buf_map *map, int nmaps, - uint flags); +int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, + struct xfs_buf **bpp); static inline struct xfs_buf * xfs_trans_get_buf( @@ -182,8 +181,14 @@ xfs_trans_get_buf( int numblks, uint flags) { + struct xfs_buf *bp; + int error; + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return xfs_trans_get_buf_map(tp, target, &map, 1, flags); + error = xfs_trans_get_buf_map(tp, target, &map, 1, flags, &bp); + if (error) + return NULL; + return bp; } int xfs_trans_read_buf_map(struct xfs_mount *mp, diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index cdb66c661425..83470998f87b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -112,24 +112,22 @@ xfs_trans_bjoin( * If the transaction pointer is NULL, make this just a normal * get_buf() call. */ -struct xfs_buf * +int xfs_trans_get_buf_map( struct xfs_trans *tp, struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags) + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { xfs_buf_t *bp; struct xfs_buf_log_item *bip; int error; - if (!tp) { - error = xfs_buf_get_map(target, map, nmaps, flags, &bp); - if (error) - return NULL; - return bp; - } + *bpp = NULL; + if (!tp) + return xfs_buf_get_map(target, map, nmaps, flags, bpp); /* * If we find the buffer in the cache with this transaction @@ -151,18 +149,20 @@ xfs_trans_get_buf_map( ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_get_buf_recur(bip); - return bp; + *bpp = bp; + return 0; } error = xfs_buf_get_map(target, map, nmaps, flags, &bp); if (error) - return NULL; + return error; ASSERT(!bp->b_error); _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_get_buf(bp->b_log_item); - return bp; + *bpp = bp; + return 0; } /* From ce92464c180b60e79022bdf1175b7737a11f59b7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:18 -0800 Subject: [PATCH 270/658] xfs: make xfs_trans_get_buf return an error code Convert xfs_trans_get_buf() to return numeric error codes like most everywhere else in xfs. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 23 ++++++++++++++++------- fs/xfs/libxfs/xfs_ialloc.c | 12 ++++++------ fs/xfs/libxfs/xfs_sb.c | 9 +++++---- fs/xfs/scrub/repair.c | 8 ++++++-- fs/xfs/xfs_attr_inactive.c | 17 +++++++++-------- fs/xfs/xfs_dquot.c | 8 ++++---- fs/xfs/xfs_inode.c | 12 ++++++------ fs/xfs/xfs_rtalloc.c | 8 +++----- fs/xfs/xfs_symlink.c | 19 ++++++++----------- fs/xfs/xfs_trans.h | 13 ++++--------- 10 files changed, 67 insertions(+), 62 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b22c7e928eb1..2d53e5fdff70 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -688,11 +688,16 @@ xfs_btree_get_bufl( xfs_trans_t *tp, /* transaction pointer */ xfs_fsblock_t fsbno) /* file system block number */ { + struct xfs_buf *bp; xfs_daddr_t d; /* real disk block address */ + int error; ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0, &bp); + if (error) + return NULL; + return bp; } /* @@ -706,12 +711,17 @@ xfs_btree_get_bufs( xfs_agnumber_t agno, /* allocation group number */ xfs_agblock_t agbno) /* allocation group block number */ { + struct xfs_buf *bp; xfs_daddr_t d; /* real disk block address */ + int error; ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0, &bp); + if (error) + return NULL; + return bp; } /* @@ -1270,11 +1280,10 @@ xfs_btree_get_buf_block( error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; - *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, 0); - - if (!*bpp) - return -ENOMEM; + error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, + 0, bpp); + if (error) + return error; (*bpp)->b_ops = cur->bc_ops->buf_ops; *block = XFS_BUF_TO_BLOCK(*bpp); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 5b759af4d165..bf161e930f1d 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -276,6 +276,7 @@ xfs_ialloc_inode_init( int i, j; xfs_daddr_t d; xfs_ino_t ino = 0; + int error; /* * Loop over the new block(s), filling in the inodes. For small block @@ -327,12 +328,11 @@ xfs_ialloc_inode_init( */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * M_IGEO(mp)->blocks_per_cluster)); - fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize * - M_IGEO(mp)->blocks_per_cluster, - XBF_UNMAPPED); - if (!fbuf) - return -ENOMEM; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize * M_IGEO(mp)->blocks_per_cluster, + XBF_UNMAPPED, &fbuf); + if (error) + return error; /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 6fdd007f81ab..2f60fc3c99a0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1185,13 +1185,14 @@ xfs_sb_get_secondary( struct xfs_buf **bpp) { struct xfs_buf *bp; + int error; ASSERT(agno != 0 && agno != NULLAGNUMBER); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (!bp) - return -ENOMEM; + XFS_FSS_TO_BB(mp, 1), 0, &bp); + if (error) + return error; bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); *bpp = bp; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index b70a88bc975e..3df49d487940 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -341,13 +341,17 @@ xrep_init_btblock( struct xfs_trans *tp = sc->tp; struct xfs_mount *mp = sc->mp; struct xfs_buf *bp; + int error; trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), btnum); ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), - XFS_FSB_TO_BB(mp, 1), 0); + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0, + &bp); + if (error) + return error; xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 8fbb841cd6fe..bbfa6ba84dcd 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -205,11 +205,12 @@ xfs_attr3_node_inactive( /* * Remove the subsidiary block from the cache and from the log. */ - child_bp = xfs_trans_get_buf(*trans, mp->m_ddev_targp, + error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, child_blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0); - if (!child_bp) - return -EIO; + XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, + &child_bp); + if (error) + return error; error = bp->b_error; if (error) { xfs_trans_brelse(*trans, child_bp); @@ -298,10 +299,10 @@ xfs_attr3_root_inactive( /* * Invalidate the incore copy of the root block. */ - bp = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0); - if (!bp) - return -EIO; + error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, + XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp); + if (error) + return error; error = bp->b_error; if (error) { xfs_trans_brelse(*trans, bp); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9cfd3209f52b..d223e1ae90a6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -320,10 +320,10 @@ xfs_dquot_disk_alloc( dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0); - if (!bp) - return -ENOMEM; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) + return error; bp->b_ops = &xfs_dquot_buf_ops; /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1979a0055763..c5077e6326c7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2546,6 +2546,7 @@ xfs_ifree_cluster( struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); xfs_ino_t inum; + int error; inum = xic->first_ino; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); @@ -2574,12 +2575,11 @@ xfs_ifree_cluster( * complete before we get a lock on it, and hence we may fail * to mark all the active inodes on the buffer stale. */ - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, - mp->m_bsize * igeo->blocks_per_cluster, - XBF_UNMAPPED); - - if (!bp) - return -ENOMEM; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, + mp->m_bsize * igeo->blocks_per_cluster, + XBF_UNMAPPED, &bp); + if (error) + return error; /* * This buffer may not have been correctly initialised as we diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index d42b5a2047e0..6209e7b6b895 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -826,12 +826,10 @@ xfs_growfs_rt_alloc( * Get a buffer for the block. */ d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize, 0); - if (bp == NULL) { - error = -EIO; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize, 0, &bp); + if (error) goto out_trans_cancel; - } memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); /* diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index b94d7b9b55d0..d762d42ed0ff 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -280,12 +280,10 @@ xfs_symlink( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - BTOBB(byte_cnt), 0); - if (!bp) { - error = -ENOMEM; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + BTOBB(byte_cnt), 0, &bp); + if (error) goto out_trans_cancel; - } bp->b_ops = &xfs_symlink_buf_ops; byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); @@ -423,13 +421,12 @@ xfs_inactive_symlink_rmt( * Invalidate the block(s). No validation is done. */ for (i = 0; i < nmaps; i++) { - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), - XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); - if (!bp) { - error = -ENOMEM; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), + XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0, + &bp); + if (error) goto error_trans_cancel; - } xfs_trans_binval(tp, bp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a0be934ec811..752c7fef9de7 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -173,22 +173,17 @@ int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp); -static inline struct xfs_buf * +static inline int xfs_trans_get_buf( struct xfs_trans *tp, struct xfs_buftarg *target, xfs_daddr_t blkno, int numblks, - uint flags) + uint flags, + struct xfs_buf **bpp) { - struct xfs_buf *bp; - int error; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - error = xfs_trans_get_buf_map(tp, target, &map, 1, flags, &bp); - if (error) - return NULL; - return bp; + return xfs_trans_get_buf_map(tp, target, &map, 1, flags, bpp); } int xfs_trans_read_buf_map(struct xfs_mount *mp, From ee647f85cb81b09bbfa2886954828ed03fa3ec38 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:19 -0800 Subject: [PATCH 271/658] xfs: remove the xfs_btree_get_buf[ls] functions Remove the xfs_btree_get_bufs and xfs_btree_get_bufl functions, since they're pretty trivial oneliners. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 16 ++++++++------ fs/xfs/libxfs/xfs_bmap.c | 14 +++++++----- fs/xfs/libxfs/xfs_btree.c | 46 --------------------------------------- fs/xfs/libxfs/xfs_btree.h | 21 ------------------ 4 files changed, 18 insertions(+), 79 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 4cc10aa43edf..34b65635ee34 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1070,11 +1070,11 @@ xfs_alloc_ag_vextent_small( if (args->datatype & XFS_ALLOC_USERDATA) { struct xfs_buf *bp; - bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); - if (XFS_IS_CORRUPT(args->mp, !bp)) { - error = -EFSCORRUPTED; + error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp, + XFS_AGB_TO_DADDR(args->mp, args->agno, fbno), + args->mp->m_bsize, 0, &bp); + if (error) goto error; - } xfs_trans_binval(args->tp, bp); } *fbnop = args->agbno = fbno; @@ -2347,9 +2347,11 @@ xfs_free_agfl_block( if (error) return error; - bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); - if (XFS_IS_CORRUPT(tp->t_mountp, !bp)) - return -EFSCORRUPTED; + error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp, + XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno), + tp->t_mountp->m_bsize, 0, &bp); + if (error) + return error; xfs_trans_binval(tp, bp); return 0; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4c2e046fbfad..cfcef076c72f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -730,11 +730,11 @@ xfs_bmap_extents_to_btree( cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); - abp = xfs_btree_get_bufl(mp, tp, args.fsbno); - if (XFS_IS_CORRUPT(mp, !abp)) { - error = -EFSCORRUPTED; + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, args.fsbno), + mp->m_bsize, 0, &abp); + if (error) goto out_unreserve_dquot; - } /* * Fill in the child block. @@ -878,7 +878,11 @@ xfs_bmap_local_to_extents( ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); tp->t_firstblock = args.fsbno; - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno); + error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, + XFS_FSB_TO_DADDR(args.mp, args.fsbno), + args.mp->m_bsize, 0, &bp); + if (error) + goto done; /* * Initialize the block, copy the data and log the remote buffer. diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2d53e5fdff70..fd300dc93ca4 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -678,52 +678,6 @@ xfs_btree_get_block( return XFS_BUF_TO_BLOCK(*bpp); } -/* - * Get a buffer for the block, return it with no data read. - * Long-form addressing. - */ -xfs_buf_t * /* buffer for fsbno */ -xfs_btree_get_bufl( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t fsbno) /* file system block number */ -{ - struct xfs_buf *bp; - xfs_daddr_t d; /* real disk block address */ - int error; - - ASSERT(fsbno != NULLFSBLOCK); - d = XFS_FSB_TO_DADDR(mp, fsbno); - error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0, &bp); - if (error) - return NULL; - return bp; -} - -/* - * Get a buffer for the block, return it with no data read. - * Short-form addressing. - */ -xfs_buf_t * /* buffer for agno/agbno */ -xfs_btree_get_bufs( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno) /* allocation group block number */ -{ - struct xfs_buf *bp; - xfs_daddr_t d; /* real disk block address */ - int error; - - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0, &bp); - if (error) - return NULL; - return bp; -} - /* * Change the cursor to point to the first record at the given level. * Other levels are unaffected. diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index fb9b2121c628..3eff7c321d43 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -296,27 +296,6 @@ xfs_btree_dup_cursor( xfs_btree_cur_t *cur, /* input cursor */ xfs_btree_cur_t **ncur);/* output cursor */ -/* - * Get a buffer for the block, return it with no data read. - * Long-form addressing. - */ -struct xfs_buf * /* buffer for fsbno */ -xfs_btree_get_bufl( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t fsbno); /* file system block number */ - -/* - * Get a buffer for the block, return it with no data read. - * Short-form addressing. - */ -struct xfs_buf * /* buffer for agno/agbno */ -xfs_btree_get_bufs( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno); /* allocation group block number */ - /* * Compute first and last byte offsets for the fields given. * Interprets the offsets table, which contains struct field offsets. From f48e2df8a877ca1c19d92cfd7e74cc5956fa84cb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:19 -0800 Subject: [PATCH 272/658] xfs: make xfs_*read_agf return EAGAIN to ALLOC_FLAG_TRYLOCK callers Refactor xfs_read_agf and xfs_alloc_read_agf to return EAGAIN if the caller passed TRYLOCK and we weren't able to get the lock; and change the callers to recognize this. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 36 ++++++++++++++---------------------- fs/xfs/libxfs/xfs_bmap.c | 11 ++++++----- fs/xfs/xfs_filestream.c | 11 +++++------ 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 34b65635ee34..d8053bc96c4d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2502,12 +2502,11 @@ xfs_alloc_fix_freelist( if (!pag->pagf_init) { error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); - if (error) + if (error) { + /* Couldn't lock the AGF so skip this AG. */ + if (error == -EAGAIN) + error = 0; goto out_no_agbp; - if (!pag->pagf_init) { - ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - goto out_agbp_relse; } } @@ -2533,11 +2532,10 @@ xfs_alloc_fix_freelist( */ if (!agbp) { error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); - if (error) - goto out_no_agbp; - if (!agbp) { - ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); + if (error) { + /* Couldn't lock the AGF so skip this AG. */ + if (error == -EAGAIN) + error = 0; goto out_no_agbp; } } @@ -2768,11 +2766,10 @@ xfs_alloc_pagf_init( xfs_buf_t *bp; int error; - if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) - return error; - if (bp) + error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp); + if (!error) xfs_trans_brelse(tp, bp); - return 0; + return error; } /* @@ -2961,12 +2958,6 @@ xfs_read_agf( error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); - /* - * Callers of xfs_read_agf() currently interpret a NULL bpp as EAGAIN - * and need to be converted to check for EAGAIN specifically. - */ - if (error == -EAGAIN) - return 0; if (error) return error; @@ -2992,14 +2983,15 @@ xfs_alloc_read_agf( trace_xfs_alloc_read_agf(mp, agno); + /* We don't support trylock when freeing. */ + ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) != + (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)); ASSERT(agno != NULLAGNUMBER); error = xfs_read_agf(mp, tp, agno, (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); if (error) return error; - if (!*bpp) - return 0; ASSERT(!(*bpp)->b_error); agf = XFS_BUF_TO_AGF(*bpp); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index cfcef076c72f..9a6d7a84689a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3311,11 +3311,12 @@ xfs_bmap_longest_free_extent( pag = xfs_perag_get(mp, ag); if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); - if (error) - goto out; - - if (!pag->pagf_init) { - *notinit = 1; + if (error) { + /* Couldn't lock the AGF, so skip this AG. */ + if (error == -EAGAIN) { + *notinit = 1; + error = 0; + } goto out; } } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 5f12b5d8527a..1a88025e68a3 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -159,16 +159,15 @@ xfs_filestream_pick_ag( if (!pag->pagf_init) { err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); - if (err && !trylock) { + if (err) { xfs_perag_put(pag); - return err; + if (err != -EAGAIN) + return err; + /* Couldn't lock the AGF, skip this AG. */ + continue; } } - /* Might fail sometimes during the 1st pass with trylock set. */ - if (!pag->pagf_init) - goto next_ag; - /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; From 706b8c5bc70391be510a5454f307db90b622b279 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:20 -0800 Subject: [PATCH 273/658] xfs: remove unnecessary null pointer checks from _read_agf callers Drop the null buffer pointer checks in all code that calls xfs_alloc_read_agf and doesn't pass XFS_ALLOC_FLAG_TRYLOCK because they're no longer necessary. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 6 ------ fs/xfs/scrub/agheader_repair.c | 4 ---- fs/xfs/scrub/fscounters.c | 3 --- fs/xfs/scrub/repair.c | 2 -- fs/xfs/xfs_discard.c | 2 +- fs/xfs/xfs_reflink.c | 2 -- 6 files changed, 1 insertion(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index d7d702ee4d1a..6e1665f2cb67 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1177,8 +1177,6 @@ xfs_refcount_finish_one( XFS_ALLOC_FLAG_FREEING, &agbp); if (error) return error; - if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) - return -EFSCORRUPTED; rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); if (!rcur) { @@ -1718,10 +1716,6 @@ xfs_refcount_recover_cow_leftovers( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) goto out_trans; - if (!agbp) { - error = -ENOMEM; - goto out_trans; - } cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); /* Find all the leftover CoW staging extents. */ diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 7a1a38b636a9..d5e6db9af434 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -659,8 +659,6 @@ xrep_agfl( error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp); if (error) return error; - if (!agf_bp) - return -ENOMEM; /* * Make sure we have the AGFL buffer, as scrub might have decided it @@ -735,8 +733,6 @@ xrep_agi_find_btrees( error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp); if (error) return error; - if (!agf_bp) - return -ENOMEM; /* Find the btree roots. */ error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL); diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 7251c66a82c9..ec2064ed3c30 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -83,9 +83,6 @@ xchk_fscount_warmup( error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp); if (error) break; - error = -ENOMEM; - if (!agf_bp || !agi_bp) - break; /* * These are supposed to be initialized by the header read diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 3df49d487940..e489d7a8446a 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -546,8 +546,6 @@ xrep_reap_block( error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp); if (error) return error; - if (!agf_bp) - return -ENOMEM; } else { agf_bp = sc->sa.agf_bp; } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index cae613620175..0b8350e84d28 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -45,7 +45,7 @@ xfs_trim_extents( xfs_log_force(mp, XFS_LOG_SYNC); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); - if (error || !agbp) + if (error) goto out_put_perag; cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e723b267a247..b0ce04ffd3cd 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -143,8 +143,6 @@ xfs_reflink_find_shared( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; - if (!agbp) - return -ENOMEM; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); From cdbcf82b86ea24aa942991b4233cd8ddf13f590c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 17:01:20 -0800 Subject: [PATCH 274/658] xfs: fix xfs_buf_ioerror_alert location reporting Instead of passing __func__ to the error reporting function, let's use the return address builtins so that the messages actually tell you which higher level function called the buffer functions. This was previously true for the xfs_buf_read callers, but not for the xfs_trans_read_buf callers. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_buf.c | 12 +++++++----- fs/xfs/xfs_buf.h | 7 ++++--- fs/xfs/xfs_buf_item.c | 2 +- fs/xfs/xfs_log_recover.c | 4 ++-- fs/xfs/xfs_trans_buf.c | 5 +++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b420e865b32e..217e4f82a44a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -803,7 +803,8 @@ xfs_buf_read_map( int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops) + const struct xfs_buf_ops *ops, + xfs_failaddr_t fa) { struct xfs_buf *bp; int error; @@ -852,7 +853,7 @@ xfs_buf_read_map( */ if (error) { if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) - xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_ioerror_alert(bp, fa); bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); @@ -885,7 +886,8 @@ xfs_buf_readahead_map( return; xfs_buf_read_map(target, map, nmaps, - XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops); + XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, + __this_address); } /* @@ -1234,10 +1236,10 @@ __xfs_buf_ioerror( void xfs_buf_ioerror_alert( struct xfs_buf *bp, - const char *func) + xfs_failaddr_t func) { xfs_alert(bp->b_mount, -"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", +"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, -bp->b_error); } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index aa145ad25e9a..d79a1fe5d738 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -196,7 +196,7 @@ int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp); int xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); + const struct xfs_buf_ops *ops, xfs_failaddr_t fa); void xfs_buf_readahead_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, const struct xfs_buf_ops *ops); @@ -224,7 +224,8 @@ xfs_buf_read( { DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return xfs_buf_read_map(target, &map, 1, flags, bpp, ops); + return xfs_buf_read_map(target, &map, 1, flags, bpp, ops, + __builtin_return_address(0)); } static inline void @@ -261,7 +262,7 @@ extern void xfs_buf_ioend(struct xfs_buf *bp); extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) -extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); +extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa); extern int __xfs_buf_submit(struct xfs_buf *bp, bool); static inline int xfs_buf_submit(struct xfs_buf *bp) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 5be8973a452c..663810e6cd59 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1113,7 +1113,7 @@ xfs_buf_iodone_callback_error( if (bp->b_target != lasttarg || time_after(jiffies, (lasttime + 5*HZ))) { lasttime = jiffies; - xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_ioerror_alert(bp, __this_address); } lasttarg = bp->b_target; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ac79537d3275..25cfc85dbaa7 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -294,7 +294,7 @@ xlog_recover_iodone( * this during recovery. One strike! */ if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) { - xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_ioerror_alert(bp, __this_address); xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); } } @@ -5627,7 +5627,7 @@ xlog_do_recover( error = xfs_buf_submit(bp); if (error) { if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_ioerror_alert(bp, __this_address); ASSERT(0); } xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 83470998f87b..08174ffa2118 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -280,7 +280,7 @@ xfs_trans_read_buf_map( ASSERT(bp->b_ops != NULL); error = xfs_buf_reverify(bp, ops); if (error) { - xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_ioerror_alert(bp, __return_address); if (tp->t_flags & XFS_TRANS_DIRTY) xfs_force_shutdown(tp->t_mountp, @@ -302,7 +302,8 @@ xfs_trans_read_buf_map( return 0; } - error = xfs_buf_read_map(target, map, nmaps, flags, &bp, ops); + error = xfs_buf_read_map(target, map, nmaps, flags, &bp, ops, + __return_address); switch (error) { case 0: break; From bb44aa09e53960c0230a645144fe566e094a2a02 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 18 Nov 2019 08:50:36 +0000 Subject: [PATCH 275/658] watchdog: sama5d4_wdt: addition of sam9x60 compatible watchdog Add support for SAM9X60 WDT into sama5d4_wdt. This means that this driver gets a flag inside the data struct that represents the sam9x60 support. This flag differentiates between the two hardware blocks, and is set according to the compatible of the driver instantiation. Signed-off-by: Eugen Hristev Reviewed-by-off-by: Guenter Roeck Link: https://lore.kernel.org/r/1574067012-18559-3-git-send-email-eugen.hristev@microchip.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/at91sam9_wdt.h | 21 ++++++ drivers/watchdog/sama5d4_wdt.c | 109 ++++++++++++++++++++++++-------- 2 files changed, 105 insertions(+), 25 deletions(-) diff --git a/drivers/watchdog/at91sam9_wdt.h b/drivers/watchdog/at91sam9_wdt.h index abfe34dd760a..298d545df1a1 100644 --- a/drivers/watchdog/at91sam9_wdt.h +++ b/drivers/watchdog/at91sam9_wdt.h @@ -24,7 +24,10 @@ #define AT91_WDT_MR 0x04 /* Watchdog Mode Register */ #define AT91_WDT_WDV (0xfffUL << 0) /* Counter Value */ #define AT91_WDT_SET_WDV(x) ((x) & AT91_WDT_WDV) +#define AT91_SAM9X60_PERIODRST BIT(4) /* Period Reset */ +#define AT91_SAM9X60_RPTHRST BIT(5) /* Minimum Restart Period */ #define AT91_WDT_WDFIEN BIT(12) /* Fault Interrupt Enable */ +#define AT91_SAM9X60_WDDIS BIT(12) /* Watchdog Disable */ #define AT91_WDT_WDRSTEN BIT(13) /* Reset Processor */ #define AT91_WDT_WDRPROC BIT(14) /* Timer Restart */ #define AT91_WDT_WDDIS BIT(15) /* Watchdog Disable */ @@ -37,4 +40,22 @@ #define AT91_WDT_WDUNF BIT(0) /* Watchdog Underflow */ #define AT91_WDT_WDERR BIT(1) /* Watchdog Error */ +/* Watchdog Timer Value Register */ +#define AT91_SAM9X60_VR 0x08 + +/* Watchdog Window Level Register */ +#define AT91_SAM9X60_WLR 0x0c +/* Watchdog Period Value */ +#define AT91_SAM9X60_COUNTER (0xfffUL << 0) +#define AT91_SAM9X60_SET_COUNTER(x) ((x) & AT91_SAM9X60_COUNTER) + +/* Interrupt Enable Register */ +#define AT91_SAM9X60_IER 0x14 +/* Period Interrupt Enable */ +#define AT91_SAM9X60_PERINT BIT(0) +/* Interrupt Disable Register */ +#define AT91_SAM9X60_IDR 0x18 +/* Interrupt Status Register */ +#define AT91_SAM9X60_ISR 0x1c + #endif diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c index d193a60430b2..e5d11d6a2600 100644 --- a/drivers/watchdog/sama5d4_wdt.c +++ b/drivers/watchdog/sama5d4_wdt.c @@ -2,7 +2,7 @@ /* * Driver for Atmel SAMA5D4 Watchdog Timer * - * Copyright (C) 2015 Atmel Corporation + * Copyright (C) 2015-2019 Microchip Technology Inc. and its subsidiaries */ #include @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,10 @@ struct sama5d4_wdt { struct watchdog_device wdd; void __iomem *reg_base; u32 mr; + u32 ir; unsigned long last_ping; + bool need_irq; + bool sam9x60_support; }; static int wdt_timeout; @@ -78,7 +82,12 @@ static int sama5d4_wdt_start(struct watchdog_device *wdd) { struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); - wdt->mr &= ~AT91_WDT_WDDIS; + if (wdt->sam9x60_support) { + writel_relaxed(wdt->ir, wdt->reg_base + AT91_SAM9X60_IER); + wdt->mr &= ~AT91_SAM9X60_WDDIS; + } else { + wdt->mr &= ~AT91_WDT_WDDIS; + } wdt_write(wdt, AT91_WDT_MR, wdt->mr); return 0; @@ -88,7 +97,12 @@ static int sama5d4_wdt_stop(struct watchdog_device *wdd) { struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); - wdt->mr |= AT91_WDT_WDDIS; + if (wdt->sam9x60_support) { + writel_relaxed(wdt->ir, wdt->reg_base + AT91_SAM9X60_IDR); + wdt->mr |= AT91_SAM9X60_WDDIS; + } else { + wdt->mr |= AT91_WDT_WDDIS; + } wdt_write(wdt, AT91_WDT_MR, wdt->mr); return 0; @@ -109,6 +123,14 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd, struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); u32 value = WDT_SEC2TICKS(timeout); + if (wdt->sam9x60_support) { + wdt_write(wdt, AT91_SAM9X60_WLR, + AT91_SAM9X60_SET_COUNTER(value)); + + wdd->timeout = timeout; + return 0; + } + wdt->mr &= ~AT91_WDT_WDV; wdt->mr |= AT91_WDT_SET_WDV(value); @@ -143,8 +165,14 @@ static const struct watchdog_ops sama5d4_wdt_ops = { static irqreturn_t sama5d4_wdt_irq_handler(int irq, void *dev_id) { struct sama5d4_wdt *wdt = platform_get_drvdata(dev_id); + u32 reg; - if (wdt_read(wdt, AT91_WDT_SR)) { + if (wdt->sam9x60_support) + reg = wdt_read(wdt, AT91_SAM9X60_ISR); + else + reg = wdt_read(wdt, AT91_WDT_SR); + + if (reg) { pr_crit("Atmel Watchdog Software Reset\n"); emergency_restart(); pr_crit("Reboot didn't succeed\n"); @@ -157,13 +185,14 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt) { const char *tmp; - wdt->mr = AT91_WDT_WDDIS; + if (wdt->sam9x60_support) + wdt->mr = AT91_SAM9X60_WDDIS; + else + wdt->mr = AT91_WDT_WDDIS; if (!of_property_read_string(np, "atmel,watchdog-type", &tmp) && !strcmp(tmp, "software")) - wdt->mr |= AT91_WDT_WDFIEN; - else - wdt->mr |= AT91_WDT_WDRSTEN; + wdt->need_irq = true; if (of_property_read_bool(np, "atmel,idle-halt")) wdt->mr |= AT91_WDT_WDIDLEHLT; @@ -176,21 +205,46 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt) static int sama5d4_wdt_init(struct sama5d4_wdt *wdt) { - u32 reg; + u32 reg, val; + + val = WDT_SEC2TICKS(WDT_DEFAULT_TIMEOUT); /* * When booting and resuming, the bootloader may have changed the * watchdog configuration. * If the watchdog is already running, we can safely update it. * Else, we have to disable it properly. */ - if (wdt_enabled) { - wdt_write_nosleep(wdt, AT91_WDT_MR, wdt->mr); - } else { + if (!wdt_enabled) { reg = wdt_read(wdt, AT91_WDT_MR); - if (!(reg & AT91_WDT_WDDIS)) + if (wdt->sam9x60_support && (!(reg & AT91_SAM9X60_WDDIS))) + wdt_write_nosleep(wdt, AT91_WDT_MR, + reg | AT91_SAM9X60_WDDIS); + else if (!wdt->sam9x60_support && + (!(reg & AT91_WDT_WDDIS))) wdt_write_nosleep(wdt, AT91_WDT_MR, reg | AT91_WDT_WDDIS); } + + if (wdt->sam9x60_support) { + if (wdt->need_irq) + wdt->ir = AT91_SAM9X60_PERINT; + else + wdt->mr |= AT91_SAM9X60_PERIODRST; + + wdt_write(wdt, AT91_SAM9X60_IER, wdt->ir); + wdt_write(wdt, AT91_SAM9X60_WLR, AT91_SAM9X60_SET_COUNTER(val)); + } else { + wdt->mr |= AT91_WDT_SET_WDD(WDT_SEC2TICKS(MAX_WDT_TIMEOUT)); + wdt->mr |= AT91_WDT_SET_WDV(val); + + if (wdt->need_irq) + wdt->mr |= AT91_WDT_WDFIEN; + else + wdt->mr |= AT91_WDT_WDRSTEN; + } + + wdt_write_nosleep(wdt, AT91_WDT_MR, wdt->mr); + return 0; } @@ -201,7 +255,6 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) struct sama5d4_wdt *wdt; void __iomem *regs; u32 irq = 0; - u32 timeout; int ret; wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); @@ -215,6 +268,8 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) wdd->min_timeout = MIN_WDT_TIMEOUT; wdd->max_timeout = MAX_WDT_TIMEOUT; wdt->last_ping = jiffies; + wdt->sam9x60_support = of_device_is_compatible(dev->of_node, + "microchip,sam9x60-wdt"); watchdog_set_drvdata(wdd, wdt); @@ -224,15 +279,19 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) wdt->reg_base = regs; - irq = irq_of_parse_and_map(dev->of_node, 0); - if (!irq) - dev_warn(dev, "failed to get IRQ from DT\n"); - ret = of_sama5d4_wdt_init(dev->of_node, wdt); if (ret) return ret; - if ((wdt->mr & AT91_WDT_WDFIEN) && irq) { + if (wdt->need_irq) { + irq = irq_of_parse_and_map(dev->of_node, 0); + if (!irq) { + dev_warn(dev, "failed to get IRQ from DT\n"); + wdt->need_irq = false; + } + } + + if (wdt->need_irq) { ret = devm_request_irq(dev, irq, sama5d4_wdt_irq_handler, IRQF_SHARED | IRQF_IRQPOLL | IRQF_NO_SUSPEND, pdev->name, pdev); @@ -244,11 +303,6 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) watchdog_init_timeout(wdd, wdt_timeout, dev); - timeout = WDT_SEC2TICKS(wdd->timeout); - - wdt->mr |= AT91_WDT_SET_WDD(WDT_SEC2TICKS(MAX_WDT_TIMEOUT)); - wdt->mr |= AT91_WDT_SET_WDV(timeout); - ret = sama5d4_wdt_init(wdt); if (ret) return ret; @@ -269,7 +323,12 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) } static const struct of_device_id sama5d4_wdt_of_match[] = { - { .compatible = "atmel,sama5d4-wdt", }, + { + .compatible = "atmel,sama5d4-wdt", + }, + { + .compatible = "microchip,sam9x60-wdt", + }, { } }; MODULE_DEVICE_TABLE(of, sama5d4_wdt_of_match); From 85fdc63fe256b595f923a69848cd99972ff446d8 Mon Sep 17 00:00:00 2001 From: Christophe Roullier Date: Fri, 22 Nov 2019 14:22:46 +0100 Subject: [PATCH 276/658] drivers: watchdog: stm32_iwdg: set WDOG_HW_RUNNING at probe If the watchdog hardware is already enabled during the boot process, when the Linux watchdog driver loads, it should start/reset the watchdog and tell the watchdog framework. As a result, ping can be generated from the watchdog framework (if CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is set), until the userspace watchdog daemon takes over control Fixes:4332d113c66a ("watchdog: Add STM32 IWDG driver") Signed-off-by: Christophe Roullier Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191122132246.8473-1-christophe.roullier@st.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/stm32_iwdg.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index a3a329011a06..25188d6bbe15 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -262,6 +262,24 @@ static int stm32_iwdg_probe(struct platform_device *pdev) watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT); watchdog_init_timeout(wdd, 0, dev); + /* + * In case of CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is set + * (Means U-Boot/bootloaders leaves the watchdog running) + * When we get here we should make a decision to prevent + * any side effects before user space daemon will take care of it. + * The best option, taking into consideration that there is no + * way to read values back from hardware, is to enforce watchdog + * being run with deterministic values. + */ + if (IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED)) { + ret = stm32_iwdg_start(wdd); + if (ret) + return ret; + + /* Make sure the watchdog is serviced */ + set_bit(WDOG_HW_RUNNING, &wdd->status); + } + ret = devm_watchdog_register_device(dev, wdd); if (ret) return ret; From d4ba76d79854178ffff55d66e1a72580484a741d Mon Sep 17 00:00:00 2001 From: "Wang, Peng 1. (NSB - CN/Hangzhou)" Date: Mon, 25 Nov 2019 02:04:13 +0000 Subject: [PATCH 277/658] watchdog: make DesignWare watchdog allow users to set bigger timeout value watchdog_dev.c provides means to allow users to set bigger timeout value than HW can support, make DesignWare watchdog align with this. Signed-off-by: Peng Wang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/8fa54e92c6cd4544a7a3eb60a373ac43@nokia-sbell.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index fef7c61f5555..738eee5c8751 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -114,7 +114,15 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) writel(top_val | top_val << WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); - wdd->timeout = dw_wdt_top_in_seconds(dw_wdt, top_val); + /* + * In case users set bigger timeout value than HW can support, + * kernel(watchdog_dev.c) helps to feed watchdog before + * wdd->max_hw_heartbeat_ms + */ + if (top_s * 1000 <= wdd->max_hw_heartbeat_ms) + wdd->timeout = dw_wdt_top_in_seconds(dw_wdt, top_val); + else + wdd->timeout = top_s; return 0; } From f6c98b08381c774a56b2f0f0067da646c23f1447 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Thu, 28 Nov 2019 18:19:31 +0100 Subject: [PATCH 278/658] watchdog: da9062: add power management ops Disable the watchdog during suspend if it is enabled and re-enable it on resume. So we can sleep without the interruptions. Signed-off-by: Marco Felsch Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191128171931.22563-1-m.felsch@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9062_wdt.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index e149e66a6ea9..2a1e7de25b71 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -212,6 +212,7 @@ static int da9062_wdt_probe(struct platform_device *pdev) watchdog_set_restart_priority(&wdt->wdtdev, 128); watchdog_set_drvdata(&wdt->wdtdev, wdt); + dev_set_drvdata(dev, &wdt->wdtdev); ret = devm_watchdog_register_device(dev, &wdt->wdtdev); if (ret < 0) @@ -220,10 +221,34 @@ static int da9062_wdt_probe(struct platform_device *pdev) return da9062_wdt_ping(&wdt->wdtdev); } +static int __maybe_unused da9062_wdt_suspend(struct device *dev) +{ + struct watchdog_device *wdd = dev_get_drvdata(dev); + + if (watchdog_active(wdd)) + return da9062_wdt_stop(wdd); + + return 0; +} + +static int __maybe_unused da9062_wdt_resume(struct device *dev) +{ + struct watchdog_device *wdd = dev_get_drvdata(dev); + + if (watchdog_active(wdd)) + return da9062_wdt_start(wdd); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(da9062_wdt_pm_ops, + da9062_wdt_suspend, da9062_wdt_resume); + static struct platform_driver da9062_wdt_driver = { .probe = da9062_wdt_probe, .driver = { .name = "da9062-watchdog", + .pm = &da9062_wdt_pm_ops, .of_match_table = da9062_compatible_id_table, }, }; From e0b4f4e0cf7fa9d62628d4249c765ec18dffd143 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Fri, 13 Dec 2019 12:19:34 +0530 Subject: [PATCH 279/658] watchdog: qcom: Use platform_get_irq_optional() for bark irq platform_get_irq() prints an error message when the interrupt is not available. So on platforms where bark interrupt is not specified, following error message is observed on SDM845. [ 2.975888] qcom_wdt 17980000.watchdog: IRQ index 0 not found This is also seen on SC7180, SM8150 SoCs as well. Fix this by using platform_get_irq_optional() instead. Fixes: 36375491a4395654 ("watchdog: qcom: support pre-timeout when the bark irq is available") Signed-off-by: Sai Prakash Ranjan Reviewed-by: Bjorn Andersson Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20191213064934.4112-1-saiprakash.ranjan@codeaurora.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/qcom-wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index a494543d3ae1..eb47fe5ed280 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -246,7 +246,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) } /* check if there is pretimeout support */ - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq > 0) { ret = devm_request_irq(dev, irq, qcom_wdt_isr, IRQF_TRIGGER_RISING, From b1301b9022e9769f3228a353a83bb9623c0f6e41 Mon Sep 17 00:00:00 2001 From: Srinivas Neeli Date: Fri, 20 Dec 2019 12:28:16 +0530 Subject: [PATCH 280/658] watchdog: cadence: Skip printing pointer value "%p" is not printing the pointer value. In driver, printing pointer value is not useful so avoiding print. Signed-off-by: Srinivas Neeli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1576825096-26605-1-git-send-email-srinivas.neeli@xilinx.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/cadence_wdt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c index 06bd4e1a5923..672b184da875 100644 --- a/drivers/watchdog/cadence_wdt.c +++ b/drivers/watchdog/cadence_wdt.c @@ -369,9 +369,8 @@ static int cdns_wdt_probe(struct platform_device *pdev) return ret; platform_set_drvdata(pdev, wdt); - dev_info(dev, "Xilinx Watchdog Timer at %p with timeout %ds%s\n", - wdt->regs, cdns_wdt_device->timeout, - nowayout ? ", nowayout" : ""); + dev_info(dev, "Xilinx Watchdog Timer with timeout %ds%s\n", + cdns_wdt_device->timeout, nowayout ? ", nowayout" : ""); return 0; } From 69503e585192fdd84b240f18a0873d20e18a2e0a Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 8 Jan 2020 13:53:47 +0100 Subject: [PATCH 281/658] watchdog: fix UAF in reboot notifier handling in watchdog core code After the commit 44ea39420fc9 ("drivers/watchdog: make use of devm_register_reboot_notifier()") the struct notifier_block reboot_nb in the struct watchdog_device is removed from the reboot notifiers chain at the time watchdog's chardev is closed. But at least in i6300esb.c case reboot_nb is embedded in the struct esb_dev which can be freed on its device removal and before the chardev is closed, thus UAF at reboot: [ 7.728581] esb_probe: esb_dev.watchdog_device ffff91316f91ab28 ts# uname -r note the address ^^^ 5.5.0-rc5-ae6088-wdog ts# ./openwdog0 & [1] 696 ts# opened /dev/watchdog0, sleeping 10s... ts# echo 1 > /sys/devices/pci0000\:00/0000\:00\:09.0/remove [ 178.086079] devres:rel_nodes: dev ffff91317668a0b0 data ffff91316f91ab28 esb_dev.watchdog_device.reboot_nb memory is freed here ^^^ ts# ...woken up [ 181.459010] devres:rel_nodes: dev ffff913171781000 data ffff913174a1dae8 [ 181.460195] devm_unreg_reboot_notifier: res ffff913174a1dae8 nb ffff91316f91ab78 attempt to use memory already freed ^^^ [ 181.461063] devm_unreg_reboot_notifier: nb->call 6b6b6b6b6b6b6b6b [ 181.461243] devm_unreg_reboot_notifier: nb->next 6b6b6b6b6b6b6b6b freed memory is filled with a slub poison ^^^ [1]+ Done ./openwdog0 ts# reboot [ 229.921862] systemd-shutdown[1]: Rebooting. [ 229.939265] notifier_call_chain: nb ffffffff9c6c2f20 nb->next ffffffff9c6d50c0 [ 229.943080] notifier_call_chain: nb ffffffff9c6d50c0 nb->next 6b6b6b6b6b6b6b6b [ 229.946054] notifier_call_chain: nb 6b6b6b6b6b6b6b6b INVAL [ 229.957584] general protection fault: 0000 [#1] SMP [ 229.958770] CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 5.5.0-rc5-ae6088-wdog [ 229.960224] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 229.963288] RIP: 0010:notifier_call_chain+0x66/0xd0 [ 229.969082] RSP: 0018:ffffb20dc0013d88 EFLAGS: 00010246 [ 229.970812] RAX: 000000000000002e RBX: 6b6b6b6b6b6b6b6b RCX: 00000000000008b3 [ 229.972929] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffffffff9ccc46ac [ 229.975028] RBP: 0000000000000001 R08: 0000000000000000 R09: 00000000000008b3 [ 229.977039] R10: 0000000000000001 R11: ffffffff9c26c740 R12: 0000000000000000 [ 229.979155] R13: 6b6b6b6b6b6b6b6b R14: 0000000000000000 R15: 00000000fffffffa ... slub_debug=FZP poison ^^^ [ 229.989089] Call Trace: [ 229.990157] blocking_notifier_call_chain+0x43/0x59 [ 229.991401] kernel_restart_prepare+0x14/0x30 [ 229.992607] kernel_restart+0x9/0x30 [ 229.993800] __do_sys_reboot+0x1d2/0x210 [ 230.000149] do_syscall_64+0x3d/0x130 [ 230.001277] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 230.002639] RIP: 0033:0x7f5461bdd177 [ 230.016402] Modules linked in: i6300esb [ 230.050261] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Fix the crash by reverting 44ea39420fc9 so unregister_reboot_notifier() is called when watchdog device is removed. This also makes handling of the reboot notifier unified with the handling of the restart handler, which is freed with unregister_restart_handler() in the same place. Fixes: 44ea39420fc9 ("drivers/watchdog: make use of devm_register_reboot_notifier()") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Vladis Dronov Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200108125347.6067-1-vdronov@redhat.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_core.c | 35 +++++++++++++++++++++++++++++++ drivers/watchdog/watchdog_dev.c | 36 +------------------------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 21e8085b848b..861daf4f37b2 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -147,6 +147,25 @@ int watchdog_init_timeout(struct watchdog_device *wdd, } EXPORT_SYMBOL_GPL(watchdog_init_timeout); +static int watchdog_reboot_notifier(struct notifier_block *nb, + unsigned long code, void *data) +{ + struct watchdog_device *wdd; + + wdd = container_of(nb, struct watchdog_device, reboot_nb); + if (code == SYS_DOWN || code == SYS_HALT) { + if (watchdog_active(wdd)) { + int ret; + + ret = wdd->ops->stop(wdd); + if (ret) + return NOTIFY_BAD; + } + } + + return NOTIFY_DONE; +} + static int watchdog_restart_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -235,6 +254,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } } + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + + ret = register_reboot_notifier(&wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + return ret; + } + } + if (wdd->ops->restart) { wdd->restart_nb.notifier_call = watchdog_restart_notifier; @@ -289,6 +321,9 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd) if (wdd->ops->restart) unregister_restart_handler(&wdd->restart_nb); + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) + unregister_reboot_notifier(&wdd->reboot_nb); + watchdog_dev_unregister(wdd); ida_simple_remove(&watchdog_ida, wdd->id); } diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 4b2a85438478..8b5c742f24e8 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -38,7 +38,6 @@ #include /* For handling misc devices */ #include /* For module stuff/... */ #include /* For mutexes */ -#include /* For reboot notifier */ #include /* For memory functions */ #include /* For standard types (like size_t) */ #include /* For watchdog specific items */ @@ -1097,25 +1096,6 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd) put_device(&wd_data->dev); } -static int watchdog_reboot_notifier(struct notifier_block *nb, - unsigned long code, void *data) -{ - struct watchdog_device *wdd; - - wdd = container_of(nb, struct watchdog_device, reboot_nb); - if (code == SYS_DOWN || code == SYS_HALT) { - if (watchdog_active(wdd)) { - int ret; - - ret = wdd->ops->stop(wdd); - if (ret) - return NOTIFY_BAD; - } - } - - return NOTIFY_DONE; -} - /* * watchdog_dev_register: register a watchdog device * @wdd: watchdog device @@ -1134,22 +1114,8 @@ int watchdog_dev_register(struct watchdog_device *wdd) return ret; ret = watchdog_register_pretimeout(wdd); - if (ret) { + if (ret) watchdog_cdev_unregister(wdd); - return ret; - } - - if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - - ret = devm_register_reboot_notifier(&wdd->wd_data->dev, - &wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - } - } return ret; } From e7046df873548bfc87c0c594ca473226c5d3317b Mon Sep 17 00:00:00 2001 From: Jack Mitchell Date: Tue, 7 Jan 2020 15:51:55 +0000 Subject: [PATCH 282/658] watchdog: dw_wdt: ping watchdog to reset countdown before start Currently on an rk3288 SoC when trying to use the watchdog the SoC will instantly reset. This is due to the watchdog countdown counter being set to its initial value of 0x0. Reset the watchdog counter before start in order to correctly start the countdown timer from the right position. Signed-off-by: Jack Mitchell Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200107155155.278521-1-ml@embed.me.uk Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 738eee5c8751..fba21de2bbad 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -143,6 +143,7 @@ static int dw_wdt_start(struct watchdog_device *wdd) struct dw_wdt *dw_wdt = to_dw_wdt(wdd); dw_wdt_set_timeout(wdd, wdd->timeout); + dw_wdt_ping(&dw_wdt->wdd); dw_wdt_arm_system_reset(dw_wdt); return 0; From 6ae58eecad31362f5caa0bd44ff7e78fbac391dd Mon Sep 17 00:00:00 2001 From: Vincent Prince Date: Thu, 23 Jan 2020 15:05:44 +0100 Subject: [PATCH 283/658] watchdog: it87_wdt: add IT8786 ID IT8786 watchdog works as in IT872x Tested on VECOW ECS-9000 board. Signed-off-by: Vincent Prince Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200123140544.25937-1-vincent.prince.fr@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/it87_wdt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index a4b71ebc8cab..f3bf3ea50e39 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -67,6 +67,7 @@ #define IT8726_ID 0x8726 /* the data sheet suggest wrongly 0x8716 */ #define IT8728_ID 0x8728 #define IT8783_ID 0x8783 +#define IT8786_ID 0x8786 /* GPIO Configuration Registers LDN=0x07 */ #define WDTCTRL 0x71 @@ -294,6 +295,7 @@ static int __init it87_wdt_init(void) case IT8721_ID: case IT8728_ID: case IT8783_ID: + case IT8786_ID: max_units = 65535; break; case IT8705_ID: From c514430c51ee83a40a31b98336d31dfaf736b9c4 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Wed, 8 Jan 2020 10:57:03 +0100 Subject: [PATCH 284/658] dt-bindings: watchdog: da9062: add suspend disable option Document the watchdog disable option which can be used if the hardware automatic suspend option is broken. Signed-off-by: Marco Felsch Reviewed-by: Adam Thomson Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200108095704.23233-3-m.felsch@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/da9062-wdt.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt index b935b526d2f3..950e4fba8dbc 100644 --- a/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt @@ -6,6 +6,11 @@ Required properties: "dlg,da9061-watchdog", "dlg,da9062-watchdog" "dlg,da9062-watchdog" +Optional properties: +- dlg,use-sw-pm: Add this property to disable the watchdog during suspend. + Only use this option if you can't use the watchdog automatic suspend + function during a suspend (see register CONTROL_B). + Example: DA9062 pmic0: da9062@58 { From f43f97a0fc0e568a6f68480b043e8f7fdfa8fb23 Mon Sep 17 00:00:00 2001 From: "yong.liang" Date: Wed, 15 Jan 2020 16:58:25 +0800 Subject: [PATCH 285/658] dt-bindings: mediatek: mt8183: Add #reset-cells Add #reset-cells property and update example Signed-off-by: yong.liang Signed-off-by: Jiaxin Yu Reviewed-by: Yingjoe Chen Reviewed-by: Philipp Zabel Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200115085828.27791-2-yong.liang@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/mtk-wdt.txt | 10 +++++++--- .../reset-controller/mt8183-resets.h | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt index fd380eb28df5..ecb9ff784832 100644 --- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt @@ -9,17 +9,21 @@ Required properties: "mediatek,mt7622-wdt", "mediatek,mt6589-wdt": for MT7622 "mediatek,mt7623-wdt", "mediatek,mt6589-wdt": for MT7623 "mediatek,mt7629-wdt", "mediatek,mt6589-wdt": for MT7629 + "mediatek,mt8183-wdt", "mediatek,mt6589-wdt": for MT8183 "mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516 - reg : Specifies base physical address and size of the registers. Optional properties: - timeout-sec: contains the watchdog timeout in seconds. +- #reset-cells: Should be 1. Example: -wdt: watchdog@10000000 { - compatible = "mediatek,mt6589-wdt"; - reg = <0x10000000 0x18>; +watchdog: watchdog@10007000 { + compatible = "mediatek,mt8183-wdt", + "mediatek,mt6589-wdt"; + reg = <0 0x10007000 0 0x100>; timeout-sec = <10>; + #reset-cells = <1>; }; diff --git a/include/dt-bindings/reset-controller/mt8183-resets.h b/include/dt-bindings/reset-controller/mt8183-resets.h index 8804e34ebdd4..a1bbd41e0d12 100644 --- a/include/dt-bindings/reset-controller/mt8183-resets.h +++ b/include/dt-bindings/reset-controller/mt8183-resets.h @@ -78,4 +78,21 @@ #define MT8183_INFRACFG_AO_I2C7_SW_RST 126 #define MT8183_INFRACFG_AO_I2C8_SW_RST 127 +#define MT8183_INFRACFG_SW_RST_NUM 128 + +#define MT8183_TOPRGU_MM_SW_RST 1 +#define MT8183_TOPRGU_MFG_SW_RST 2 +#define MT8183_TOPRGU_VENC_SW_RST 3 +#define MT8183_TOPRGU_VDEC_SW_RST 4 +#define MT8183_TOPRGU_IMG_SW_RST 5 +#define MT8183_TOPRGU_MD_SW_RST 7 +#define MT8183_TOPRGU_CONN_SW_RST 9 +#define MT8183_TOPRGU_CONN_MCU_SW_RST 12 +#define MT8183_TOPRGU_IPU0_SW_RST 14 +#define MT8183_TOPRGU_IPU1_SW_RST 15 +#define MT8183_TOPRGU_AUDIO_SW_RST 17 +#define MT8183_TOPRGU_CAMSYS_SW_RST 18 + +#define MT8183_TOPRGU_SW_RST_NUM 19 + #endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8183 */ From fe42cc30a965143a10ed7823989b7e50d7529dac Mon Sep 17 00:00:00 2001 From: "yong.liang" Date: Wed, 15 Jan 2020 16:58:26 +0800 Subject: [PATCH 286/658] dt-bindings: mediatek: mt2712: Add #reset-cells Add #reset-cells and update mtk-wdt.txt Signed-off-by: yong.liang Signed-off-by: Jiaxin Yu Reviewed-by: Yingjoe Chen Reviewed-by: Philipp Zabel Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200115085828.27791-3-yong.liang@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/mtk-wdt.txt | 1 + .../reset-controller/mt2712-resets.h | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 include/dt-bindings/reset-controller/mt2712-resets.h diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt index ecb9ff784832..4dd36bd3f1ad 100644 --- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt @@ -4,6 +4,7 @@ Required properties: - compatible should contain: "mediatek,mt2701-wdt", "mediatek,mt6589-wdt": for MT2701 + "mediatek,mt2712-wdt", "mediatek,mt6589-wdt": for MT2712 "mediatek,mt6589-wdt": for MT6589 "mediatek,mt6797-wdt", "mediatek,mt6589-wdt": for MT6797 "mediatek,mt7622-wdt", "mediatek,mt6589-wdt": for MT7622 diff --git a/include/dt-bindings/reset-controller/mt2712-resets.h b/include/dt-bindings/reset-controller/mt2712-resets.h new file mode 100644 index 000000000000..9e7ee762f076 --- /dev/null +++ b/include/dt-bindings/reset-controller/mt2712-resets.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2019 MediaTek Inc. + * Author: Yong Liang + */ + +#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT2712 +#define _DT_BINDINGS_RESET_CONTROLLER_MT2712 + +#define MT2712_TOPRGU_INFRA_SW_RST 0 +#define MT2712_TOPRGU_MM_SW_RST 1 +#define MT2712_TOPRGU_MFG_SW_RST 2 +#define MT2712_TOPRGU_VENC_SW_RST 3 +#define MT2712_TOPRGU_VDEC_SW_RST 4 +#define MT2712_TOPRGU_IMG_SW_RST 5 +#define MT2712_TOPRGU_INFRA_AO_SW_RST 8 +#define MT2712_TOPRGU_USB_SW_RST 9 +#define MT2712_TOPRGU_APMIXED_SW_RST 10 + +#define MT2712_TOPRGU_SW_RST_NUM 11 + +#endif /* _DT_BINDINGS_RESET_CONTROLLER_MT2712 */ From c254e103082b74e4f0987c364e5e3b138dbef1cc Mon Sep 17 00:00:00 2001 From: "yong.liang" Date: Wed, 15 Jan 2020 16:58:27 +0800 Subject: [PATCH 287/658] watchdog: mtk_wdt: mt8183: Add reset controller Add reset controller API in watchdog driver. Besides watchdog, MTK toprgu module alsa provide sub-system (eg, audio, camera, codec and connectivity) software reset functionality. Signed-off-by: yong.liang Signed-off-by: Jiaxin Yu Reviewed-by: Yingjoe Chen Reviewed-by: Philipp Zabel Reviewed-by: Guenter Roeck Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20200115085828.27791-4-yong.liang@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 99 +++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index 9c3d0033260d..e88aacb0404d 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -9,6 +9,8 @@ * Based on sunxi_wdt.c */ +#include +#include #include #include #include @@ -16,10 +18,11 @@ #include #include #include +#include #include +#include #include #include -#include #define WDT_MAX_TIMEOUT 31 #define WDT_MIN_TIMEOUT 1 @@ -44,6 +47,9 @@ #define WDT_SWRST 0x14 #define WDT_SWRST_KEY 0x1209 +#define WDT_SWSYSRST 0x18U +#define WDT_SWSYS_RST_KEY 0x88000000 + #define DRV_NAME "mtk-wdt" #define DRV_VERSION "1.0" @@ -53,8 +59,90 @@ static unsigned int timeout; struct mtk_wdt_dev { struct watchdog_device wdt_dev; void __iomem *wdt_base; + spinlock_t lock; /* protects WDT_SWSYSRST reg */ + struct reset_controller_dev rcdev; }; +struct mtk_wdt_data { + int toprgu_sw_rst_num; +}; + +static const struct mtk_wdt_data mt8183_data = { + .toprgu_sw_rst_num = MT8183_TOPRGU_SW_RST_NUM, +}; + +static int toprgu_reset_update(struct reset_controller_dev *rcdev, + unsigned long id, bool assert) +{ + unsigned int tmp; + unsigned long flags; + struct mtk_wdt_dev *data = + container_of(rcdev, struct mtk_wdt_dev, rcdev); + + spin_lock_irqsave(&data->lock, flags); + + tmp = readl(data->wdt_base + WDT_SWSYSRST); + if (assert) + tmp |= BIT(id); + else + tmp &= ~BIT(id); + tmp |= WDT_SWSYS_RST_KEY; + writel(tmp, data->wdt_base + WDT_SWSYSRST); + + spin_unlock_irqrestore(&data->lock, flags); + + return 0; +} + +static int toprgu_reset_assert(struct reset_controller_dev *rcdev, + unsigned long id) +{ + return toprgu_reset_update(rcdev, id, true); +} + +static int toprgu_reset_deassert(struct reset_controller_dev *rcdev, + unsigned long id) +{ + return toprgu_reset_update(rcdev, id, false); +} + +static int toprgu_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + int ret; + + ret = toprgu_reset_assert(rcdev, id); + if (ret) + return ret; + + return toprgu_reset_deassert(rcdev, id); +} + +static const struct reset_control_ops toprgu_reset_ops = { + .assert = toprgu_reset_assert, + .deassert = toprgu_reset_deassert, + .reset = toprgu_reset, +}; + +static int toprgu_register_reset_controller(struct platform_device *pdev, + int rst_num) +{ + int ret; + struct mtk_wdt_dev *mtk_wdt = platform_get_drvdata(pdev); + + spin_lock_init(&mtk_wdt->lock); + + mtk_wdt->rcdev.owner = THIS_MODULE; + mtk_wdt->rcdev.nr_resets = rst_num; + mtk_wdt->rcdev.ops = &toprgu_reset_ops; + mtk_wdt->rcdev.of_node = pdev->dev.of_node; + ret = devm_reset_controller_register(&pdev->dev, &mtk_wdt->rcdev); + if (ret != 0) + dev_err(&pdev->dev, + "couldn't register wdt reset controller: %d\n", ret); + return ret; +} + static int mtk_wdt_restart(struct watchdog_device *wdt_dev, unsigned long action, void *data) { @@ -155,6 +243,7 @@ static int mtk_wdt_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_wdt_dev *mtk_wdt; + const struct mtk_wdt_data *wdt_data; int err; mtk_wdt = devm_kzalloc(dev, sizeof(*mtk_wdt), GFP_KERNEL); @@ -190,6 +279,13 @@ static int mtk_wdt_probe(struct platform_device *pdev) dev_info(dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)\n", mtk_wdt->wdt_dev.timeout, nowayout); + wdt_data = of_device_get_match_data(dev); + if (wdt_data) { + err = toprgu_register_reset_controller(pdev, + wdt_data->toprgu_sw_rst_num); + if (err) + return err; + } return 0; } @@ -219,6 +315,7 @@ static int mtk_wdt_resume(struct device *dev) static const struct of_device_id mtk_wdt_dt_ids[] = { { .compatible = "mediatek,mt6589-wdt" }, + { .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, mtk_wdt_dt_ids); From 9e5236e7cec110610f3bc824a4d535c1271e4bb5 Mon Sep 17 00:00:00 2001 From: "yong.liang" Date: Wed, 15 Jan 2020 16:58:28 +0800 Subject: [PATCH 288/658] watchdog: mtk_wdt: mt2712: Add reset controller Add reset controller for 2712. Besides watchdog, MTK toprgu module alsa provide sub-system (eg, audio, camera, codec and connectivity) software reset functionality. Signed-off-by: yong.liang Signed-off-by: Jiaxin Yu Reviewed-by: Yingjoe Chen Reviewed-by: Philipp Zabel Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20200115085828.27791-5-yong.liang@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index e88aacb0404d..d6a6393f609d 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -9,6 +9,7 @@ * Based on sunxi_wdt.c */ +#include #include #include #include @@ -67,6 +68,10 @@ struct mtk_wdt_data { int toprgu_sw_rst_num; }; +static const struct mtk_wdt_data mt2712_data = { + .toprgu_sw_rst_num = MT2712_TOPRGU_SW_RST_NUM, +}; + static const struct mtk_wdt_data mt8183_data = { .toprgu_sw_rst_num = MT8183_TOPRGU_SW_RST_NUM, }; @@ -314,6 +319,7 @@ static int mtk_wdt_resume(struct device *dev) #endif static const struct of_device_id mtk_wdt_dt_ids[] = { + { .compatible = "mediatek,mt2712-wdt", .data = &mt2712_data }, { .compatible = "mediatek,mt6589-wdt" }, { .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data }, { /* sentinel */ } From 057b52b4b3d58f4ee5944171da50f77b00a1bb0d Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Wed, 15 Jan 2020 17:23:07 +0100 Subject: [PATCH 289/658] watchdog: da9062: make restart handler atomic safe The restart handler is executed during the shutdown phase which is atomic/irq-less. The i2c framework supports atomic transfers since commit 63b96983a5dd ("i2c: core: introduce callbacks for atomic transfers") to address this use case. Using regmap within an atomic context is allowed only if the regmap type is MMIO and the cache type 'flat' or no cache is used. Using the i2c_smbus_write_byte_data() function can be done without additional tests because: 1) the DA9062 is an i2c-only device and 2) the i2c framework emulates the smbus protocol if the host adapter does not support smbus_xfer by using the master_xfer. Signed-off-by: Marco Felsch Reviewed-by: Guenter Roeck Reviewed-by: Stefan Lengfeld Tested-by: Stefan Lengfeld Reviewed-by: Adam Thomson Link: https://lore.kernel.org/r/20200115162307.7336-1-m.felsch@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9062_wdt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 2a1e7de25b71..47eefe072b40 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -147,12 +148,13 @@ static int da9062_wdt_restart(struct watchdog_device *wdd, unsigned long action, void *data) { struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); + struct i2c_client *client = to_i2c_client(wdt->hw->dev); int ret; - ret = regmap_write(wdt->hw->regmap, - DA9062AA_CONTROL_F, - DA9062AA_SHUTDOWN_MASK); - if (ret) + /* Don't use regmap because it is not atomic safe */ + ret = i2c_smbus_write_byte_data(client, DA9062AA_CONTROL_F, + DA9062AA_SHUTDOWN_MASK); + if (ret < 0) dev_alert(wdt->hw->dev, "Failed to shutdown (err = %d)\n", ret); From 8f5ac172abb79171eac9ecb7bedc071b56630097 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 12 Jul 2018 16:45:08 +0800 Subject: [PATCH 290/658] ceph: delete redundant douts in con_get/put() We print session's refcount in debug message inside ceph_put_mds_session() and get_session(), so we don't have to print it in con_get()/__ceph_lookup_mds_session()/con_put(). Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 145d46ba25ae..69631d145265 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4609,11 +4609,8 @@ static struct ceph_connection *con_get(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; - if (get_session(s)) { - dout("mdsc con_get %p ok (%d)\n", s, refcount_read(&s->s_ref)); + if (get_session(s)) return con; - } - dout("mdsc con_get %p FAIL\n", s); return NULL; } @@ -4621,7 +4618,6 @@ static void con_put(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; - dout("mdsc con_put %p (%d)\n", s, refcount_read(&s->s_ref) - 1); ceph_put_mds_session(s); } From d80865bff5201cc56bc247989ebbab6169b3a101 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 17 Aug 2018 22:05:31 +0800 Subject: [PATCH 291/658] ceph: remove unnecessary assignment in ceph_pre_init_acls() ceph_pagelist_encode_string() will not fail in reserved case, also, we do not check err code here, so remove unnecessary assignment. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index aa55f412a6e3..26be6520d3fb 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -222,8 +222,8 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8); if (err) goto out_err; - err = ceph_pagelist_encode_string(pagelist, - XATTR_NAME_POSIX_ACL_DEFAULT, len); + ceph_pagelist_encode_string(pagelist, + XATTR_NAME_POSIX_ACL_DEFAULT, len); err = posix_acl_to_xattr(&init_user_ns, default_acl, tmp_buf, val_size2); if (err < 0) From 4d7ace02ba5c6ef1f8eeb32a86fef7c528bd7f36 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 26 Nov 2019 07:24:21 -0500 Subject: [PATCH 292/658] ceph: fix mdsmap cluster available check based on laggy number In case the max_mds > 1 in MDS cluster and there is no any standby MDS and all the max_mds MDSs are in up:active state, if one of the up:active MDSs is dead, the m->m_num_laggy in kclient will be 1. Then the mount will fail without considering other healthy MDSs. There manybe some MDSs still "in" the cluster but not in up:active state, we will ignore them. Only when all the up:active MDSs in the cluster are laggy will treat the cluster as not be available. In case decreasing the max_mds, the cluster will not stop the extra up:active MDSs immediately and there will be a latency. During it the up:active MDS number will be larger than the max_mds, so later the m_info memories will 100% be reallocated. Here will pick out the up:active MDSs as the m_num_mds and allocate the needed memories once. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mdsmap.c | 48 +++++++++++++++++++++---------------- include/linux/ceph/mdsmap.h | 5 ++-- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 471bac335fae..7a925e025c0a 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -113,6 +113,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) int err; u8 mdsmap_v, mdsmap_cv; u16 mdsmap_ev; + u32 possible_max_rank; m = kzalloc(sizeof(*m), GFP_NOFS); if (!m) @@ -138,14 +139,30 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_session_autoclose = ceph_decode_32(p); m->m_max_file_size = ceph_decode_64(p); m->m_max_mds = ceph_decode_32(p); - m->m_num_mds = m->m_max_mds; + + /* + * pick out the active nodes as the m_num_mds, the m_num_mds + * maybe larger than m_max_mds when decreasing the max_mds in + * cluster side, in other case it should less than or equal + * to m_max_mds. + */ + m->m_num_mds = n = ceph_decode_32(p); + m->m_num_active_mds = m->m_num_mds; + + /* + * the possible max rank, it maybe larger than the m->m_num_mds, + * for example if the mds_max == 2 in the cluster, when the MDS(0) + * was laggy and being replaced by a new MDS, we will temporarily + * receive a new mds map with n_num_mds == 1 and the active MDS(1), + * and the mds rank >= m->m_num_mds. + */ + possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds); m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); if (!m->m_info) goto nomem; /* pick out active nodes from mds_info (state > 0) */ - n = ceph_decode_32(p); for (i = 0; i < n; i++) { u64 global_id; u32 namelen; @@ -215,18 +232,15 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_mds_state_name(state), laggy ? "(laggy)" : ""); - if (mds < 0 || state <= 0) + if (mds < 0 || mds >= possible_max_rank) { + pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); continue; + } - if (mds >= m->m_num_mds) { - int new_num = max(mds + 1, m->m_num_mds * 2); - void *new_m_info = krealloc(m->m_info, - new_num * sizeof(*m->m_info), - GFP_NOFS | __GFP_ZERO); - if (!new_m_info) - goto nomem; - m->m_info = new_m_info; - m->m_num_mds = new_num; + if (state <= 0) { + pr_warn("mdsmap_decode got incorrect state(%s)\n", + ceph_mds_state_name(state)); + continue; } info = &m->m_info[mds]; @@ -247,14 +261,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) info->export_targets = NULL; } } - if (m->m_num_mds > m->m_max_mds) { - /* find max up mds */ - for (i = m->m_num_mds; i >= m->m_max_mds; i--) { - if (i == 0 || m->m_info[i-1].state > 0) - break; - } - m->m_num_mds = i; - } /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); @@ -396,7 +402,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) return false; if (m->m_damaged) return false; - if (m->m_num_laggy > 0) + if (m->m_num_laggy == m->m_num_active_mds) return false; for (i = 0; i < m->m_num_mds; i++) { if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 0067d767c9ae..3a66f4f926ce 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,8 +25,9 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u32 m_max_mds; /* size of m_addr, m_state arrays */ - int m_num_mds; + u32 m_max_mds; /* expected up:active mds number */ + int m_num_active_mds; /* actual up:active mds number */ + int m_num_mds; /* size of m_info array */ struct ceph_mds_info *m_info; /* which object pools file data can be stored in */ From 5d47648fe95412beffe2089d6d6484adb5ea0f96 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 26 Nov 2019 07:24:22 -0500 Subject: [PATCH 293/658] ceph: only choose one MDS who is in up:active state without laggy Even the MDS is in up:active state, but it also maybe laggy. Here will skip the laggy MDSs. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 13 +++++++++---- fs/ceph/mdsmap.c | 30 +++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 69631d145265..1b53aceb54bd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -974,14 +974,14 @@ static int __choose_mds(struct ceph_mds_client *mdsc, frag.frag, mds, (int)r, frag.ndist); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= - CEPH_MDS_STATE_ACTIVE) + CEPH_MDS_STATE_ACTIVE && + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) goto out; } /* since this file/dir wasn't known to be * replicated, then we want to look for the * authoritative mds. */ - mode = USE_AUTH_MDS; if (frag.mds >= 0) { /* choose auth mds */ mds = frag.mds; @@ -989,9 +989,14 @@ static int __choose_mds(struct ceph_mds_client *mdsc, "frag %u mds%d (auth)\n", inode, ceph_vinop(inode), frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= - CEPH_MDS_STATE_ACTIVE) - goto out; + CEPH_MDS_STATE_ACTIVE) { + if (mode == USE_ANY_MDS && + !ceph_mdsmap_is_laggy(mdsc->mdsmap, + mds)) + goto out; + } } + mode = USE_AUTH_MDS; } } diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 7a925e025c0a..a77e0ecb9a6b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -13,22 +13,24 @@ #include "super.h" +#define CEPH_MDS_IS_READY(i, ignore_laggy) \ + (m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy)) -/* - * choose a random mds that is "up" (i.e. has a state > 0), or -1. - */ -int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) +static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) { int n = 0; int i, j; - /* special case for one mds */ + /* + * special case for one mds, no matter it is laggy or + * not we have no choice + */ if (1 == m->m_num_mds && m->m_info[0].state > 0) return 0; /* count */ for (i = 0; i < m->m_num_mds; i++) - if (m->m_info[i].state > 0) + if (CEPH_MDS_IS_READY(i, ignore_laggy)) n++; if (n == 0) return -1; @@ -36,7 +38,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) /* pick */ n = prandom_u32() % n; for (j = 0, i = 0; i < m->m_num_mds; i++) { - if (m->m_info[i].state > 0) + if (CEPH_MDS_IS_READY(i, ignore_laggy)) j++; if (j > n) break; @@ -45,6 +47,20 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) return i; } +/* + * choose a random mds that is "up" (i.e. has a state > 0), or -1. + */ +int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) +{ + int mds; + + mds = __mdsmap_get_random_mds(m, false); + if (mds == m->m_num_mds || mds == -1) + mds = __mdsmap_get_random_mds(m, true); + + return mds == m->m_num_mds ? -1 : mds; +} + #define __decode_and_drop_type(p, end, type, bad) \ do { \ if (*p + sizeof(type) > end) \ From 07edc0571ef1b13e124b462aca8d09f79809d6dd Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 4 Dec 2019 01:27:18 -0500 Subject: [PATCH 294/658] ceph: fix possible long time wait during umount During umount, if there has no any unsafe request in the mdsc and some requests still in-flight and not got reply yet, and if the rest requets are all safe ones, after that even all of them in mdsc are unregistered, the umount must wait until after mount_timeout seconds anyway. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1b53aceb54bd..6dca3b4d03a9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2884,6 +2884,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags); __unregister_request(mdsc, req); + /* last request during umount? */ + if (mdsc->stopping && !__get_oldest_req(mdsc)) + complete_all(&mdsc->safe_umount_waiters); + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { /* * We already handled the unsafe response, now do the @@ -2894,9 +2898,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) */ dout("got safe reply %llu, mds%d\n", tid, mds); - /* last unsafe request during umount? */ - if (mdsc->stopping && !__get_oldest_req(mdsc)) - complete_all(&mdsc->safe_umount_waiters); mutex_unlock(&mdsc->mutex); goto out; } From 57c219948245cb1e8970040a365058baab450316 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 4 Dec 2019 15:28:17 -0500 Subject: [PATCH 295/658] ceph: drop unused ttl_from parameter from fill_inode Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index c07407586ce8..5bdc1afc2bee 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -728,8 +728,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_reply_info_in *iinfo, struct ceph_mds_reply_dirfrag *dirinfo, - struct ceph_mds_session *session, - unsigned long ttl_from, int cap_fmode, + struct ceph_mds_session *session, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; @@ -1237,7 +1236,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) if (dir) { err = fill_inode(dir, NULL, &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1, + session, -1, &req->r_caps_reservation); if (err < 0) goto done; @@ -1305,9 +1304,9 @@ retry_lookup: req->r_target_inode = in; err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, - session, req->r_request_started, + session, (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && - rinfo->head->result == 0) ? req->r_fmode : -1, + rinfo->head->result == 0) ? req->r_fmode : -1, &req->r_caps_reservation); if (err < 0) { pr_err("fill_inode badness %p %llx.%llx\n", @@ -1493,8 +1492,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, continue; } rc = fill_inode(in, NULL, &rde->inode, NULL, session, - req->r_request_started, -1, - &req->r_caps_reservation); + -1, &req->r_caps_reservation); if (rc < 0) { pr_err("fill_inode badness on %p got %d\n", in, rc); err = rc; @@ -1694,8 +1692,7 @@ retry_lookup: } ret = fill_inode(in, NULL, &rde->inode, NULL, session, - req->r_request_started, -1, - &req->r_caps_reservation); + -1, &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); if (d_really_is_negative(dn)) { From 9a6bed4fe0c8bf57785cbc4db9f86086cb9b193d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Dec 2019 08:41:25 -0500 Subject: [PATCH 296/658] ceph: ensure we have a new cap before continuing in fill_inode If the caller passes in a NULL cap_reservation, and we can't allocate one then ensure that we fail gracefully. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5bdc1afc2bee..b5f068582970 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -753,8 +753,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page, info_caps = le32_to_cpu(info->cap.caps); /* prealloc new cap struct */ - if (info_caps && ceph_snap(inode) == CEPH_NOSNAP) + if (info_caps && ceph_snap(inode) == CEPH_NOSNAP) { new_cap = ceph_get_cap(mdsc, caps_reservation); + if (!new_cap) + return -ENOMEM; + } /* * prealloc xattr data, if it looks like we'll need it. only From 9cf54563b090f52db10ae6ebdca29dcc76bc7f34 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 Dec 2019 20:50:21 -0500 Subject: [PATCH 297/658] ceph: add __send_request helper Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 47 +++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6dca3b4d03a9..627cf0326b97 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2522,6 +2522,26 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, return 0; } +/* + * called under mdsc->mutex + */ +static int __send_request(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_mds_request *req, + bool drop_cap_releases) +{ + int err; + + err = __prepare_send_request(mdsc, req, session->s_mds, + drop_cap_releases); + if (!err) { + ceph_msg_get(req->r_request); + ceph_con_send(&session->s_con, req->r_request); + } + + return err; +} + /* * send request, or put it on the appropriate wait list. */ @@ -2611,11 +2631,7 @@ static void __do_request(struct ceph_mds_client *mdsc, if (req->r_request_started == 0) /* note request start time */ req->r_request_started = jiffies; - err = __prepare_send_request(mdsc, req, mds, false); - if (!err) { - ceph_msg_get(req->r_request); - ceph_con_send(&session->s_con, req->r_request); - } + err = __send_request(mdsc, session, req, false); out_session: ceph_put_mds_session(session); @@ -3217,7 +3233,6 @@ bad: return; } - /* * called under session->mutex. */ @@ -3226,18 +3241,12 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, { struct ceph_mds_request *req, *nreq; struct rb_node *p; - int err; dout("replay_unsafe_requests mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); - list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { - err = __prepare_send_request(mdsc, req, session->s_mds, true); - if (!err) { - ceph_msg_get(req->r_request); - ceph_con_send(&session->s_con, req->r_request); - } - } + list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) + __send_request(mdsc, session, req, true); /* * also re-send old requests when MDS enters reconnect stage. So that MDS @@ -3252,14 +3261,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, if (req->r_attempts == 0) continue; /* only old requests */ if (req->r_session && - req->r_session->s_mds == session->s_mds) { - err = __prepare_send_request(mdsc, req, - session->s_mds, true); - if (!err) { - ceph_msg_get(req->r_request); - ceph_con_send(&session->s_con, req->r_request); - } - } + req->r_session->s_mds == session->s_mds) + __send_request(mdsc, session, req, true); } mutex_unlock(&mdsc->mutex); } From 4d681c2f9141cf50261eef85b3233151c83d068b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 Dec 2019 22:35:51 -0500 Subject: [PATCH 298/658] ceph: keep the session state until it is released When reconnecting the session but if it is denied by the MDS due to client was in blacklist or something else, kclient will receive a session close reply, and we will never see the important log: "ceph: mds%d reconnect denied" And with the confusing log: "ceph: handle_session mds0 close 0000000085804730 state ??? seq 0" Let's keep the session state until its memories is released. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 ++- fs/ceph/mds_client.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 627cf0326b97..18fa8f866eef 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -530,6 +530,7 @@ const char *ceph_session_state_name(int s) case CEPH_MDS_SESSION_OPEN: return "open"; case CEPH_MDS_SESSION_HUNG: return "hung"; case CEPH_MDS_SESSION_CLOSING: return "closing"; + case CEPH_MDS_SESSION_CLOSED: return "closed"; case CEPH_MDS_SESSION_RESTARTING: return "restarting"; case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting"; case CEPH_MDS_SESSION_REJECTED: return "rejected"; @@ -674,7 +675,6 @@ static void __unregister_session(struct ceph_mds_client *mdsc, dout("__unregister_session mds%d %p\n", s->s_mds, s); BUG_ON(mdsc->sessions[s->s_mds] != s); mdsc->sessions[s->s_mds] = NULL; - s->s_state = 0; ceph_con_close(&s->s_con); ceph_put_mds_session(s); atomic_dec(&mdsc->num_sessions); @@ -3166,6 +3166,7 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_CLOSE: if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) pr_info("mds%d reconnect denied\n", session->s_mds); + session->s_state = CEPH_MDS_SESSION_CLOSED; cleanup_session_requests(mdsc, session); remove_session_caps(session); wake = 2; /* for good measure */ diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 14c7e8c49970..fe085e06adf5 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -151,7 +151,8 @@ enum { CEPH_MDS_SESSION_RESTARTING = 5, CEPH_MDS_SESSION_RECONNECTING = 6, CEPH_MDS_SESSION_CLOSING = 7, - CEPH_MDS_SESSION_REJECTED = 8, + CEPH_MDS_SESSION_CLOSED = 8, + CEPH_MDS_SESSION_REJECTED = 9, }; struct ceph_mds_session { From 97820058fb2831a4b203981fa2566ceaaa396103 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 10 Dec 2019 20:29:40 -0500 Subject: [PATCH 299/658] ceph: check availability of mds cluster on mount after wait timeout If all the MDS daemons are down for some reason, then the first mount attempt will fail with EIO after the mount request times out. A mount attempt will also fail with EIO if all of the MDS's are laggy. This patch changes the code to return -EHOSTUNREACH in these situations and adds a pr_info error message to help the admin determine the cause. URL: https://tracker.ceph.com/issues/4386 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 +-- fs/ceph/super.c | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 18fa8f866eef..e1902663f8b8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2583,8 +2583,7 @@ static void __do_request(struct ceph_mds_client *mdsc, if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { - err = -ENOENT; - pr_info("probably no mds server is up\n"); + err = -EHOSTUNREACH; goto finish; } } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 29a795f975df..430dcf329723 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1070,6 +1070,11 @@ static int ceph_get_tree(struct fs_context *fc) return 0; out_splat: + if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { + pr_info("No mds server is up or the cluster is laggy\n"); + err = -EHOSTUNREACH; + } + ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; From c4853e9776caefbd2f59739ce1a75798a2b4b7a5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 9 Dec 2019 07:47:15 -0500 Subject: [PATCH 300/658] ceph: retry the same mds later after the new session is opened If max_mds > 1 and a request is submitted that chooses a random mds rank, and the relating session is not opened yet, the request will wait until the session has been opened and resend again. Every time the request goes through __do_request, it will release the req->session first and choose a random one again, which may be a completely different rank than the one it just waited on. In the worst case, it will open all the mds sessions one by one just before the request can be successfully sent out. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index e1902663f8b8..07ecdfc8438d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -878,7 +878,8 @@ static struct inode *get_nonsnap_parent(struct dentry *dentry) * Called under mdsc->mutex. */ static int __choose_mds(struct ceph_mds_client *mdsc, - struct ceph_mds_request *req) + struct ceph_mds_request *req, + bool *random) { struct inode *inode; struct ceph_inode_info *ci; @@ -888,6 +889,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, u32 hash = req->r_direct_hash; bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); + if (random) + *random = false; + /* * is there a specific mds we should try? ignore hint if we have * no session and the mds is not up (active or recovering). @@ -1023,6 +1027,9 @@ out: return mds; random: + if (random) + *random = true; + mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap); dout("choose_mds chose random mds%d\n", mds); return mds; @@ -2551,6 +2558,7 @@ static void __do_request(struct ceph_mds_client *mdsc, struct ceph_mds_session *session = NULL; int mds = -1; int err = 0; + bool random; if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) @@ -2590,7 +2598,7 @@ static void __do_request(struct ceph_mds_client *mdsc, put_request_session(req); - mds = __choose_mds(mdsc, req); + mds = __choose_mds(mdsc, req, &random); if (mds < 0 || ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { dout("do_request no mds or not active, waiting for map\n"); @@ -2618,8 +2626,12 @@ static void __do_request(struct ceph_mds_client *mdsc, goto out_session; } if (session->s_state == CEPH_MDS_SESSION_NEW || - session->s_state == CEPH_MDS_SESSION_CLOSING) + session->s_state == CEPH_MDS_SESSION_CLOSING) { __open_session(mdsc, session); + /* retry the same mds later */ + if (random) + req->r_resend_mds = mds; + } list_add(&req->r_wait, &session->s_waiting); goto out_session; } @@ -2883,7 +2895,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_unlock(&mdsc->mutex); goto out; } else { - int mds = __choose_mds(mdsc, req); + int mds = __choose_mds(mdsc, req, NULL); if (mds >= 0 && mds != req->r_session->s_mds) { dout("but auth changed, so resending\n"); __do_request(mdsc, req); From 893e456b2c0bae61e172d2600a89c96abf9b3daf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Dec 2019 15:21:24 -0500 Subject: [PATCH 301/658] ceph: don't clear I_NEW until inode metadata is fully populated Currently, we could have an open-by-handle (or NFS server) call into the filesystem and start working with an inode before it's properly filled out. Don't clear I_NEW until we have filled out the inode, and discard it properly if that fails. Note that we occasionally take an extra reference to the inode to ensure that we don't put the last reference in discard_new_inode, but rather leave it for ceph_async_iput. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index b5f068582970..d01710a16a4a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); if (!inode) return ERR_PTR(-ENOMEM); - if (inode->i_state & I_NEW) { + if (inode->i_state & I_NEW) dout("get_inode created new inode %p %llx.%llx ino %llx\n", inode, ceph_vinop(inode), (u64)inode->i_ino); - unlock_new_inode(inode); - } dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, vino.snap, inode); @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) inode->i_fop = &ceph_snapdir_fops; ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ ci->i_rbytes = 0; + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + return inode; } @@ -1304,7 +1306,6 @@ retry_lookup: err = PTR_ERR(in); goto done; } - req->r_target_inode = in; err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, session, @@ -1314,8 +1315,13 @@ retry_lookup: if (err < 0) { pr_err("fill_inode badness %p %llx.%llx\n", in, ceph_vinop(in)); + if (in->i_state & I_NEW) + discard_new_inode(in); goto done; } + req->r_target_inode = in; + if (in->i_state & I_NEW) + unlock_new_inode(in); } /* @@ -1499,7 +1505,14 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, if (rc < 0) { pr_err("fill_inode badness on %p got %d\n", in, rc); err = rc; + if (in->i_state & I_NEW) { + ihold(in); + discard_new_inode(in); + } + } else if (in->i_state & I_NEW) { + unlock_new_inode(in); } + /* avoid calling iput_final() in mds dispatch threads */ ceph_async_iput(in); } @@ -1701,12 +1714,18 @@ retry_lookup: if (d_really_is_negative(dn)) { /* avoid calling iput_final() in mds * dispatch threads */ + if (in->i_state & I_NEW) { + ihold(in); + discard_new_inode(in); + } ceph_async_iput(in); } d_drop(dn); err = ret; goto next_item; } + if (in->i_state & I_NEW) + unlock_new_inode(in); if (d_really_is_negative(dn)) { if (ceph_security_xattr_deadlock(in)) { From 9f8b72b3a9485d659410989c6daf5467ebe264ea Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 16 Dec 2019 00:12:07 -0500 Subject: [PATCH 302/658] ceph: only touch the caps which have the subset mask requested For the caps having no any subset mask requested we shouldn't touch them. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9d09bb53c1ab..28ae0c134700 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -908,7 +908,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) ci_node); if (!__cap_is_valid(cap)) continue; - __touch_cap(cap); + if (cap->issued & mask) + __touch_cap(cap); } } return 1; From 0eb308531f0776fc87f7a7eb4a8efe943d98ab8c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 18 Dec 2019 21:15:18 -0500 Subject: [PATCH 303/658] ceph: print dentry offset in hex and fix xattr_version type In the debug logs about the di->offset or ctx->pos it is in hex format, but some others are using the dec format. It is a little hard to read. For the xattr version, it is u64 type, using a shorter type may truncate it. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 4 ++-- fs/ceph/xattr.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2e4764fd1872..d0cd0aba5843 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1186,7 +1186,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) struct dentry *dn = di->dentry; struct ceph_mds_client *mdsc; - dout("dentry_dir_lease_touch %p %p '%pd' (offset %lld)\n", + dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n", di, dn, dn, di->offset); if (!list_empty(&di->lease_list)) { @@ -1567,7 +1567,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); } - dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry, + dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry, dentry, inode, ceph_dentry(dentry)->offset); /* always trust cached snapped dentries, snapdir dentry */ diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index cb18ee637cb7..98a9a3101cda 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -655,7 +655,7 @@ static int __build_xattrs(struct inode *inode) u32 len; const char *name, *val; struct ceph_inode_info *ci = ceph_inode(inode); - int xattr_version; + u64 xattr_version; struct ceph_inode_xattr **xattrs = NULL; int err = 0; int i; From b38c9eb4757d5bac1eb8634a9516ef918fca2525 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 4 Dec 2019 06:57:39 -0500 Subject: [PATCH 304/658] ceph: add possible_max_rank and make the code more readable The m_num_mds here is actually the number for MDSs which are in up:active status, and it will be duplicated to m_num_active_mds, so remove it. Add possible_max_rank to the mdsmap struct and this will be the correctly possible largest rank boundary. Remove the special case for one mds in __mdsmap_get_random_mds(), because the validate mds rank may not always be 0. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/debugfs.c | 2 +- fs/ceph/mds_client.c | 10 ++++---- fs/ceph/mdsmap.c | 49 +++++++++++++++---------------------- include/linux/ceph/mdsmap.h | 10 ++++---- 4 files changed, 31 insertions(+), 40 deletions(-) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index c281f32b54f7..fb7cabd98e7b 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p) seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds); seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout); seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose); - for (i = 0; i < mdsmap->m_num_mds; i++) { + for (i = 0; i < mdsmap->possible_max_rank; i++) { struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; int state = mdsmap->m_info[i].state; seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 07ecdfc8438d..aba7a56d055d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -598,7 +598,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; - if (mds >= mdsc->mdsmap->m_num_mds) + if (mds >= mdsc->mdsmap->possible_max_rank) return ERR_PTR(-EINVAL); s = kzalloc(sizeof(*s), GFP_NOFS); @@ -1231,7 +1231,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_session *ts; int i, mds = session->s_mds; - if (mds >= mdsc->mdsmap->m_num_mds) + if (mds >= mdsc->mdsmap->possible_max_rank) return; mi = &mdsc->mdsmap->m_info[mds]; @@ -3785,7 +3785,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, dout("check_new_map new %u old %u\n", newmap->m_epoch, oldmap->m_epoch); - for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { + for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) { if (!mdsc->sessions[i]) continue; s = mdsc->sessions[i]; @@ -3799,7 +3799,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", ceph_session_state_name(s->s_state)); - if (i >= newmap->m_num_mds) { + if (i >= newmap->possible_max_rank) { /* force close session for stopped mds */ get_session(s); __unregister_session(mdsc, s); @@ -3856,7 +3856,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, } } - for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) { + for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) { s = mdsc->sessions[i]; if (!s) continue; diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index a77e0ecb9a6b..889627817e52 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -14,22 +14,15 @@ #include "super.h" #define CEPH_MDS_IS_READY(i, ignore_laggy) \ - (m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy)) + (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy) static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) { int n = 0; int i, j; - /* - * special case for one mds, no matter it is laggy or - * not we have no choice - */ - if (1 == m->m_num_mds && m->m_info[0].state > 0) - return 0; - /* count */ - for (i = 0; i < m->m_num_mds; i++) + for (i = 0; i < m->possible_max_rank; i++) if (CEPH_MDS_IS_READY(i, ignore_laggy)) n++; if (n == 0) @@ -37,7 +30,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) /* pick */ n = prandom_u32() % n; - for (j = 0, i = 0; i < m->m_num_mds; i++) { + for (j = 0, i = 0; i < m->possible_max_rank; i++) { if (CEPH_MDS_IS_READY(i, ignore_laggy)) j++; if (j > n) @@ -55,10 +48,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) int mds; mds = __mdsmap_get_random_mds(m, false); - if (mds == m->m_num_mds || mds == -1) + if (mds == m->possible_max_rank || mds == -1) mds = __mdsmap_get_random_mds(m, true); - return mds == m->m_num_mds ? -1 : mds; + return mds == m->possible_max_rank ? -1 : mds; } #define __decode_and_drop_type(p, end, type, bad) \ @@ -129,7 +122,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) int err; u8 mdsmap_v, mdsmap_cv; u16 mdsmap_ev; - u32 possible_max_rank; m = kzalloc(sizeof(*m), GFP_NOFS); if (!m) @@ -157,24 +149,23 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_max_mds = ceph_decode_32(p); /* - * pick out the active nodes as the m_num_mds, the m_num_mds - * maybe larger than m_max_mds when decreasing the max_mds in - * cluster side, in other case it should less than or equal - * to m_max_mds. + * pick out the active nodes as the m_num_active_mds, the + * m_num_active_mds maybe larger than m_max_mds when decreasing + * the max_mds in cluster side, in other case it should less + * than or equal to m_max_mds. */ - m->m_num_mds = n = ceph_decode_32(p); - m->m_num_active_mds = m->m_num_mds; + m->m_num_active_mds = n = ceph_decode_32(p); /* - * the possible max rank, it maybe larger than the m->m_num_mds, + * the possible max rank, it maybe larger than the m_num_active_mds, * for example if the mds_max == 2 in the cluster, when the MDS(0) * was laggy and being replaced by a new MDS, we will temporarily * receive a new mds map with n_num_mds == 1 and the active MDS(1), - * and the mds rank >= m->m_num_mds. + * and the mds rank >= m_num_active_mds. */ - possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds); + m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds); - m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); + m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS); if (!m->m_info) goto nomem; @@ -248,7 +239,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_mds_state_name(state), laggy ? "(laggy)" : ""); - if (mds < 0 || mds >= possible_max_rank) { + if (mds < 0 || mds >= m->possible_max_rank) { pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); continue; } @@ -318,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) for (i = 0; i < n; i++) { s32 mds = ceph_decode_32(p); - if (mds >= 0 && mds < m->m_num_mds) { + if (mds >= 0 && mds < m->possible_max_rank) { if (m->m_info[mds].laggy) num_laggy++; } } m->m_num_laggy = num_laggy; - if (n > m->m_num_mds) { + if (n > m->possible_max_rank) { void *new_m_info = krealloc(m->m_info, n * sizeof(*m->m_info), GFP_NOFS | __GFP_ZERO); @@ -333,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) goto nomem; m->m_info = new_m_info; } - m->m_num_mds = n; + m->possible_max_rank = n; } /* inc */ @@ -404,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) { int i; - for (i = 0; i < m->m_num_mds; i++) + for (i = 0; i < m->possible_max_rank; i++) kfree(m->m_info[i].export_targets); kfree(m->m_info); kfree(m->m_data_pg_pools); @@ -420,7 +411,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) return false; if (m->m_num_laggy == m->m_num_active_mds) return false; - for (i = 0; i < m->m_num_mds; i++) { + for (i = 0; i < m->possible_max_rank; i++) { if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) nr_active++; } diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 3a66f4f926ce..35d385296fbb 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -26,8 +26,8 @@ struct ceph_mdsmap { u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; u32 m_max_mds; /* expected up:active mds number */ - int m_num_active_mds; /* actual up:active mds number */ - int m_num_mds; /* size of m_info array */ + u32 m_num_active_mds; /* actual up:active mds number */ + u32 possible_max_rank; /* possible max rank index */ struct ceph_mds_info *m_info; /* which object pools file data can be stored in */ @@ -43,7 +43,7 @@ struct ceph_mdsmap { static inline struct ceph_entity_addr * ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) { - if (w >= m->m_num_mds) + if (w >= m->possible_max_rank) return NULL; return &m->m_info[w].addr; } @@ -51,14 +51,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) { BUG_ON(w < 0); - if (w >= m->m_num_mds) + if (w >= m->possible_max_rank) return CEPH_MDS_STATE_DNE; return m->m_info[w].state; } static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) { - if (w >= 0 && w < m->m_num_mds) + if (w >= 0 && w < m->possible_max_rank) return m->m_info[w].laggy; return false; } From 4fbc0c711b2464ee1551850b85002faae0b775d5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 20 Dec 2019 09:34:04 -0500 Subject: [PATCH 305/658] ceph: remove the extra slashes in the server path It's possible to pass the mount helper a server path that has more than one contiguous slash character. For example: $ mount -t ceph 192.168.195.165:40176:/// /mnt/cephfs/ In the MDS server side the extra slashes of the server path will be treated as snap dir, and then we can get the following debug logs: ceph: mount opening path // ceph: open_root_inode opening '//' ceph: fill_trace 0000000059b8a3bc is_dentry 0 is_target 1 ceph: alloc_inode 00000000dc4ca00b ceph: get_inode created new inode 00000000dc4ca00b 1.ffffffffffffffff ino 1 ceph: get_inode on 1=1.ffffffffffffffff got 00000000dc4ca00b And then when creating any new file or directory under the mount point, we can hit the following BUG_ON in ceph_fill_trace(): BUG_ON(ceph_snap(dir) != dvino.snap); Have the client ignore the extra slashes in the server path when mounting. This will also canonicalize the path, so that identical mounts can be consilidated. 1) "//mydir1///mydir//" 2) "/mydir1/mydir" 3) "/mydir1/mydir/" Regardless of the internal treatment of these paths, the kernel still stores the original string including the leading '/' for presentation to userland. URL: https://tracker.ceph.com/issues/42771 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 122 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 20 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 430dcf329723..112927dbd2f2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -107,7 +107,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); @@ -211,7 +210,6 @@ struct ceph_parse_opts_ctx { /* * Parse the source parameter. Distinguish the server list from the path. - * Internally we do not include the leading '/' in the path. * * The source will look like: * [,...]:[] @@ -232,12 +230,15 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { - if (strlen(dev_name_end) > 1) { - kfree(fsopt->server_path); - fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); - if (!fsopt->server_path) - return -ENOMEM; - } + kfree(fsopt->server_path); + + /* + * The server_path will include the whole chars from userland + * including the leading '/'. + */ + fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); + if (!fsopt->server_path) + return -ENOMEM; } else { dev_name_end = dev_name + strlen(dev_name); } @@ -461,6 +462,73 @@ static int strcmp_null(const char *s1, const char *s2) return strcmp(s1, s2); } +/** + * path_remove_extra_slash - Remove the extra slashes in the server path + * @server_path: the server path and could be NULL + * + * Return NULL if the path is NULL or only consists of "/", or a string + * without any extra slashes including the leading slash(es) and the + * slash(es) at the end of the server path, such as: + * "//dir1////dir2///" --> "dir1/dir2" + */ +static char *path_remove_extra_slash(const char *server_path) +{ + const char *path = server_path; + const char *cur, *end; + char *buf, *p; + int len; + + /* if the server path is omitted */ + if (!path) + return NULL; + + /* remove all the leading slashes */ + while (*path == '/') + path++; + + /* if the server path only consists of slashes */ + if (*path == '\0') + return NULL; + + len = strlen(path); + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + end = path + len; + p = buf; + do { + cur = strchr(path, '/'); + if (!cur) + cur = end; + + len = cur - path; + + /* including one '/' */ + if (cur != end) + len += 1; + + memcpy(p, path, len); + p += len; + + while (cur <= end && *cur == '/') + cur++; + path = cur; + } while (path < end); + + *p = '\0'; + + /* + * remove the last slash if there has and just to make sure that + * we will get something like "dir1/dir2" + */ + if (*(--p) == '/') + *p = '\0'; + + return buf; +} + static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_options *new_opt, struct ceph_fs_client *fsc) @@ -468,6 +536,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_mount_options *fsopt1 = new_fsopt; struct ceph_mount_options *fsopt2 = fsc->mount_options; int ofs = offsetof(struct ceph_mount_options, snapdir_name); + char *p1, *p2; int ret; ret = memcmp(fsopt1, fsopt2, ofs); @@ -480,9 +549,21 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; - ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); + + p1 = path_remove_extra_slash(fsopt1->server_path); + if (IS_ERR(p1)) + return PTR_ERR(p1); + p2 = path_remove_extra_slash(fsopt2->server_path); + if (IS_ERR(p2)) { + kfree(p1); + return PTR_ERR(p2); + } + ret = strcmp_null(p1, p2); + kfree(p1); + kfree(p2); if (ret) return ret; + ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); if (ret) return ret; @@ -788,7 +869,6 @@ static void destroy_caches(void) ceph_fscache_unregister(); } - /* * ceph_umount_begin - initiate forced umount. Tear down down the * mount, skipping steps that may hang while waiting for server(s). @@ -868,9 +948,6 @@ out: return root; } - - - /* * mount: join the ceph cluster, and open root directory. */ @@ -885,7 +962,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path; + const char *path, *p; err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; @@ -897,17 +974,22 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, goto out; } - if (!fsc->mount_options->server_path) { - path = ""; - dout("mount opening path \\t\n"); - } else { - path = fsc->mount_options->server_path + 1; - dout("mount opening path %s\n", path); + p = path_remove_extra_slash(fsc->mount_options->server_path); + if (IS_ERR(p)) { + err = PTR_ERR(p); + goto out; } + /* if the server path is omitted or just consists of '/' */ + if (!p) + path = ""; + else + path = p; + dout("mount opening path '%s'\n", path); ceph_fs_debugfs_init(fsc); root = open_root_dentry(fsc, path, started); + kfree(p); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; From 5b3248c6772459a0737afe0c85bb45ee3ba79eeb Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 19 Dec 2019 19:44:09 -0500 Subject: [PATCH 306/658] ceph: rename get_session and switch to use ceph_get_mds_session Just in case the session's refcount reach 0 and is releasing, and if we get the session without checking it, we may encounter kernel crash. Rename get_session to ceph_get_mds_session and make it global. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 16 ++++++++-------- fs/ceph/mds_client.h | 9 ++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index aba7a56d055d..f7c9a56514f9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -538,7 +538,7 @@ const char *ceph_session_state_name(int s) } } -static struct ceph_mds_session *get_session(struct ceph_mds_session *s) +struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s) { if (refcount_inc_not_zero(&s->s_ref)) { dout("mdsc get_session %p %d -> %d\n", s, @@ -569,7 +569,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, { if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) return NULL; - return get_session(mdsc->sessions[mds]); + return ceph_get_mds_session(mdsc->sessions[mds]); } static bool __have_session(struct ceph_mds_client *mdsc, int mds) @@ -1979,7 +1979,7 @@ void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, if (mdsc->stopping) return; - get_session(session); + ceph_get_mds_session(session); if (queue_work(mdsc->fsc->cap_wq, &session->s_cap_release_work)) { dout("cap release work queued\n"); @@ -2615,7 +2615,7 @@ static void __do_request(struct ceph_mds_client *mdsc, goto finish; } } - req->r_session = get_session(session); + req->r_session = ceph_get_mds_session(session); dout("do_request mds%d session %p state %s\n", mds, session, ceph_session_state_name(session->s_state)); @@ -3139,7 +3139,7 @@ static void handle_session(struct ceph_mds_session *session, mutex_lock(&mdsc->mutex); if (op == CEPH_SESSION_CLOSE) { - get_session(session); + ceph_get_mds_session(session); __unregister_session(mdsc, session); } /* FIXME: this ttl calculation is generous */ @@ -3801,7 +3801,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, if (i >= newmap->possible_max_rank) { /* force close session for stopped mds */ - get_session(s); + ceph_get_mds_session(s); __unregister_session(mdsc, s); __wake_requests(mdsc, &s->s_waiting); mutex_unlock(&mdsc->mutex); @@ -4402,7 +4402,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { if (mdsc->sessions[i]) { - session = get_session(mdsc->sessions[i]); + session = ceph_get_mds_session(mdsc->sessions[i]); __unregister_session(mdsc, session); mutex_unlock(&mdsc->mutex); mutex_lock(&session->s_mutex); @@ -4630,7 +4630,7 @@ static struct ceph_connection *con_get(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; - if (get_session(s)) + if (ceph_get_mds_session(s)) return con; return NULL; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index fe085e06adf5..c021df5f50ce 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -452,15 +452,10 @@ extern const char *ceph_mds_op_name(int op); extern struct ceph_mds_session * __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); -static inline struct ceph_mds_session * -ceph_get_mds_session(struct ceph_mds_session *s) -{ - refcount_inc(&s->s_ref); - return s; -} - extern const char *ceph_session_state_name(int s); +extern struct ceph_mds_session * +ceph_get_mds_session(struct ceph_mds_session *s); extern void ceph_put_mds_session(struct ceph_mds_session *s); extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, From 9ba1e224538a021b989302bb2777abc7a3b3ec79 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 8 Jan 2020 05:17:31 -0500 Subject: [PATCH 307/658] ceph: allocate the correct amount of extra bytes for the session features The total bytes may potentially be larger than 8. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 20 ++++++++++++++------ fs/ceph/mds_client.h | 23 ++++++++++++++++------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f7c9a56514f9..c839664f86c6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -1057,20 +1058,21 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) return msg; } +static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; +#define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8) static void encode_supported_features(void **p, void *end) { - static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; - static const size_t count = ARRAY_SIZE(bits); + static const size_t count = ARRAY_SIZE(feature_bits); if (count > 0) { size_t i; - size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8; + size_t size = FEATURE_BYTES(count); BUG_ON(*p + 4 + size > end); ceph_encode_32(p, size); memset(*p, 0, size); for (i = 0; i < count; i++) - ((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8); + ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8); *p += size; } else { BUG_ON(*p + 4 > end); @@ -1091,6 +1093,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; + size_t size, count; void *p, *end; const char* metadata[][2] = { @@ -1108,8 +1111,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 strlen(metadata[i][1]); metadata_key_count++; } + /* supported feature */ - extra_bytes += 4 + 8; + size = 0; + count = ARRAY_SIZE(feature_bits); + if (count > 0) + size = FEATURE_BYTES(count); + extra_bytes += 4 + size; /* Allocate the message */ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, @@ -1129,7 +1137,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map * - * ClientSession messages with metadata are v2 + * ClientSession messages with metadata are v3 */ msg->hdr.version = cpu_to_le16(3); msg->hdr.compat_version = cpu_to_le16(1); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c021df5f50ce..c950f8f88f58 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -17,22 +17,31 @@ #include /* The first 8 bits are reserved for old ceph releases */ -#define CEPHFS_FEATURE_MIMIC 8 -#define CEPHFS_FEATURE_REPLY_ENCODING 9 -#define CEPHFS_FEATURE_RECLAIM_CLIENT 10 -#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11 -#define CEPHFS_FEATURE_MULTI_RECONNECT 12 +enum ceph_feature_type { + CEPHFS_FEATURE_MIMIC = 8, + CEPHFS_FEATURE_REPLY_ENCODING, + CEPHFS_FEATURE_RECLAIM_CLIENT, + CEPHFS_FEATURE_LAZY_CAP_WANTED, + CEPHFS_FEATURE_MULTI_RECONNECT, -#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MULTI_RECONNECT, +}; + +/* + * This will always have the highest feature bit value + * as the last element of the array. + */ +#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ 0, 1, 2, 3, 4, 5, 6, 7, \ CEPHFS_FEATURE_MIMIC, \ CEPHFS_FEATURE_REPLY_ENCODING, \ CEPHFS_FEATURE_LAZY_CAP_WANTED, \ CEPHFS_FEATURE_MULTI_RECONNECT, \ + \ + CEPHFS_FEATURE_MAX, \ } #define CEPHFS_FEATURES_CLIENT_REQUIRED {} - /* * Some lock dependencies: * From a55e601b2f02df5db7070e9a37bd655c9c576a52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 22:01:04 +0100 Subject: [PATCH 308/658] rbd: work around -Wuninitialized warning gcc -O3 warns about a dummy variable that is passed down into rbd_img_fill_nodata without being initialized: drivers/block/rbd.c: In function 'rbd_img_fill_nodata': drivers/block/rbd.c:2573:13: error: 'dummy' is used uninitialized in this function [-Werror=uninitialized] fctx->iter = *fctx->pos; Since this is a dummy, I assume the warning is harmless, but it's better to initialize it anyway and avoid the warning. Fixes: mmtom ("init/Kconfig: enable -O3 for all arches") Signed-off-by: Arnd Bergmann Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2b184563cd32..38dcb39051a7 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2662,7 +2662,7 @@ static int rbd_img_fill_nodata(struct rbd_img_request *img_req, u64 off, u64 len) { struct ceph_file_extent ex = { off, len }; - union rbd_img_fill_iter dummy; + union rbd_img_fill_iter dummy = {}; struct rbd_img_fill_ctx fctx = { .pos_type = OBJ_REQUEST_NODATA, .pos = &dummy, From 045100cd79f503487b95a1d11e96b221fe50693c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 15 Nov 2019 09:13:59 -0500 Subject: [PATCH 309/658] ceph: close holes in structs ceph_mds_session and ceph_mds_request Move s_ref up to plug a 4 byte hole, which plugs another. Move r_kref to shave 8 bytes off per request on x86_64. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c950f8f88f58..27a7446e10d3 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -184,6 +184,7 @@ struct ceph_mds_session { /* protected by s_cap_lock */ spinlock_t s_cap_lock; + refcount_t s_ref; struct list_head s_caps; /* all caps issued by this session */ struct ceph_cap *s_cap_iterator; int s_nr_caps; @@ -198,7 +199,6 @@ struct ceph_mds_session { unsigned long s_renew_requested; /* last time we sent a renew req */ u64 s_renew_seq; - refcount_t s_ref; struct list_head s_waiting; /* waiting requests */ struct list_head s_unsafe; /* unsafe requests */ }; @@ -234,6 +234,7 @@ struct ceph_mds_request { struct rb_node r_node; struct ceph_mds_client *r_mdsc; + struct kref r_kref; int r_op; /* mds op code */ /* operation on what? */ @@ -304,7 +305,6 @@ struct ceph_mds_request { int r_resend_mds; /* mds to resend to next, if any*/ u32 r_sent_on_mseq; /* cap mseq request was sent at*/ - struct kref r_kref; struct list_head r_wait; struct completion r_completion; struct completion r_safe_completion; From 78beb0ff2feceb1d7568333f93195e1a4d95a49a Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Wed, 8 Jan 2020 10:03:53 +0000 Subject: [PATCH 310/658] ceph: use copy-from2 op in copy_file_range Instead of using the copy-from operation, switch copy_file_range to the new copy-from2 operation, which allows to send the truncate_seq and truncate_size parameters. If an OSD does not support the copy-from2 operation it will return -EOPNOTSUPP. In that case, the kernel client will stop trying to do remote object copies for this fs client and will always use the generic VFS copy_file_range. Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 11 ++++++++++- fs/ceph/super.c | 1 + fs/ceph/super.h | 2 ++ include/linux/ceph/osd_client.h | 1 + include/linux/ceph/rados.h | 2 ++ net/ceph/osd_client.c | 18 ++++++++++++------ 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 11929d2bb594..c3b8e8e0bf17 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1974,6 +1974,9 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, if (ceph_test_mount_opt(src_fsc, NOCOPYFROM)) return -EOPNOTSUPP; + if (!src_fsc->have_copy_from2) + return -EOPNOTSUPP; + /* * Striped file layouts require that we copy partial objects, but the * OSD copy-from operation only supports full-object copies. Limit @@ -2101,8 +2104,14 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, CEPH_OSD_OP_FLAG_FADVISE_NOCACHE, &dst_oid, &dst_oloc, CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | - CEPH_OSD_OP_FLAG_FADVISE_DONTNEED, 0); + CEPH_OSD_OP_FLAG_FADVISE_DONTNEED, + dst_ci->i_truncate_seq, dst_ci->i_truncate_size, + CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ); if (err) { + if (err == -EOPNOTSUPP) { + src_fsc->have_copy_from2 = false; + pr_notice("OSDs don't support copy-from2; disabling copy offload\n"); + } dout("ceph_osdc_copy_from returned %d\n", err); if (!ret) ret = err; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 112927dbd2f2..bfb8aead0555 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -718,6 +718,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, fsc->sb = NULL; fsc->mount_state = CEPH_MOUNT_MOUNTING; fsc->filp_gen = 1; + fsc->have_copy_from2 = true; atomic_long_set(&fsc->writeback_count, 0); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3bf1a01cd536..1e456a9011bb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -106,6 +106,8 @@ struct ceph_fs_client { unsigned long last_auto_reconnect; bool blacklisted; + bool have_copy_from2; + u32 filp_gen; loff_t max_file_size; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index eaffbdddf89a..5a62dbd3f4c2 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -534,6 +534,7 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc, struct ceph_object_id *dst_oid, struct ceph_object_locator *dst_oloc, u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, u8 copy_from_flags); /* watch/notify */ diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 3eb0e55665b4..59bdfd470100 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -256,6 +256,7 @@ extern const char *ceph_osd_state_name(int s); \ /* tiering */ \ f(COPY_FROM, __CEPH_OSD_OP(WR, DATA, 26), "copy-from") \ + f(COPY_FROM2, __CEPH_OSD_OP(WR, DATA, 45), "copy-from2") \ f(COPY_GET_CLASSIC, __CEPH_OSD_OP(RD, DATA, 27), "copy-get-classic") \ f(UNDIRTY, __CEPH_OSD_OP(WR, DATA, 28), "undirty") \ f(ISDIRTY, __CEPH_OSD_OP(RD, DATA, 29), "isdirty") \ @@ -446,6 +447,7 @@ enum { CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to * cloneid */ CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16, /* order with write */ + CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ = 32, /* send truncate_{seq,size} */ }; enum { diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ba45b074a362..b68b376d8c2f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -402,7 +402,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_LIST_WATCHERS: ceph_osd_data_release(&op->list_watchers.response_data); break; - case CEPH_OSD_OP_COPY_FROM: + case CEPH_OSD_OP_COPY_FROM2: ceph_osd_data_release(&op->copy_from.osd_data); break; default: @@ -697,7 +697,7 @@ static void get_num_data_items(struct ceph_osd_request *req, case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: case CEPH_OSD_OP_NOTIFY_ACK: - case CEPH_OSD_OP_COPY_FROM: + case CEPH_OSD_OP_COPY_FROM2: *num_request_data_items += 1; break; @@ -1029,7 +1029,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, case CEPH_OSD_OP_CREATE: case CEPH_OSD_OP_DELETE: break; - case CEPH_OSD_OP_COPY_FROM: + case CEPH_OSD_OP_COPY_FROM2: dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid); dst->copy_from.src_version = cpu_to_le64(src->copy_from.src_version); @@ -1966,7 +1966,7 @@ static void setup_request_data(struct ceph_osd_request *req) ceph_osdc_msg_data_add(request_msg, &op->notify_ack.request_data); break; - case CEPH_OSD_OP_COPY_FROM: + case CEPH_OSD_OP_COPY_FROM2: ceph_osdc_msg_data_add(request_msg, &op->copy_from.osd_data); break; @@ -5315,6 +5315,7 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req, struct ceph_object_locator *src_oloc, u32 src_fadvise_flags, u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, u8 copy_from_flags) { struct ceph_osd_req_op *op; @@ -5325,7 +5326,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req, if (IS_ERR(pages)) return PTR_ERR(pages); - op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags); + op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, + dst_fadvise_flags); op->copy_from.snapid = src_snapid; op->copy_from.src_version = src_version; op->copy_from.flags = copy_from_flags; @@ -5335,6 +5337,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req, end = p + PAGE_SIZE; ceph_encode_string(&p, end, src_oid->name, src_oid->name_len); encode_oloc(&p, end, src_oloc); + ceph_encode_32(&p, truncate_seq); + ceph_encode_64(&p, truncate_size); op->indata_len = PAGE_SIZE - (end - p); ceph_osd_data_pages_init(&op->copy_from.osd_data, pages, @@ -5350,6 +5354,7 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc, struct ceph_object_id *dst_oid, struct ceph_object_locator *dst_oloc, u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, u8 copy_from_flags) { struct ceph_osd_request *req; @@ -5366,7 +5371,8 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc, ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid, src_oloc, src_fadvise_flags, - dst_fadvise_flags, copy_from_flags); + dst_fadvise_flags, truncate_seq, + truncate_size, copy_from_flags); if (ret) goto out; From 3c802092dab69351b1c2e52a2250f47d5bf60253 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 1 Jan 2020 22:09:37 -0500 Subject: [PATCH 311/658] ceph: print r_direct_hash in hex in __choose_mds() dout It's hard to read, especially when it is: ceph: __choose_mds 00000000b7bc9c15 is_hash=1 (-271041095) mode 0 At the same time, switch to __func__ to get rid of the checkpatch warning. Signed-off-by: Xiubo Li Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c839664f86c6..011c779e4c76 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -900,7 +900,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (req->r_resend_mds >= 0 && (__have_session(mdsc, req->r_resend_mds) || ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) { - dout("choose_mds using resend_mds mds%d\n", + dout("%s using resend_mds mds%d\n", __func__, req->r_resend_mds); return req->r_resend_mds; } @@ -918,7 +918,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, rcu_read_lock(); inode = get_nonsnap_parent(req->r_dentry); rcu_read_unlock(); - dout("__choose_mds using snapdir's parent %p\n", inode); + dout("%s using snapdir's parent %p\n", __func__, inode); } } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ @@ -938,7 +938,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, /* direct snapped/virtual snapdir requests * based on parent dir inode */ inode = get_nonsnap_parent(parent); - dout("__choose_mds using nonsnap parent %p\n", inode); + dout("%s using nonsnap parent %p\n", __func__, inode); } else { /* dentry target */ inode = d_inode(req->r_dentry); @@ -954,8 +954,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, rcu_read_unlock(); } - dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, - (int)hash, mode); + dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash, + hash, mode); if (!inode) goto random; ci = ceph_inode(inode); @@ -973,11 +973,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, get_random_bytes(&r, 1); r %= frag.ndist; mds = frag.dist[r]; - dout("choose_mds %p %llx.%llx " - "frag %u mds%d (%d/%d)\n", - inode, ceph_vinop(inode), - frag.frag, mds, - (int)r, frag.ndist); + dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n", + __func__, inode, ceph_vinop(inode), + frag.frag, mds, (int)r, frag.ndist); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE && !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) @@ -990,9 +988,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (frag.mds >= 0) { /* choose auth mds */ mds = frag.mds; - dout("choose_mds %p %llx.%llx " - "frag %u mds%d (auth)\n", - inode, ceph_vinop(inode), frag.frag, mds); + dout("%s %p %llx.%llx frag %u mds%d (auth)\n", + __func__, inode, ceph_vinop(inode), + frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) { if (mode == USE_ANY_MDS && @@ -1017,7 +1015,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, goto random; } mds = cap->session->s_mds; - dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n", + dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__, inode, ceph_vinop(inode), mds, cap == ci->i_auth_cap ? "auth " : "", cap); spin_unlock(&ci->i_ceph_lock); @@ -1032,7 +1030,7 @@ random: *random = true; mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap); - dout("choose_mds chose random mds%d\n", mds); + dout("%s chose random mds%d\n", __func__, mds); return mds; } From d36e0b620aa53d9a33c739f0368e85707a997430 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 7 Jan 2020 13:12:57 -0500 Subject: [PATCH 312/658] ceph: print name of xattr in __ceph_{get,set}xattr() douts Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 98a9a3101cda..7b8a070a782d 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -851,7 +851,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, req_mask = __get_request_mask(inode); spin_lock(&ci->i_ceph_lock); - dout("getxattr %p ver=%lld index_ver=%lld\n", inode, + dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name, ci->i_xattrs.version, ci->i_xattrs.index_version); if (ci->i_xattrs.version == 0 || @@ -1078,7 +1078,8 @@ retry: } } - dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); + dout("setxattr %p name '%s' issued %s\n", inode, name, + ceph_cap_string(issued)); __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, name_len, val_len); From 24604f7e2bde5e6458812c3e9ee2a0d60c8c99fe Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 6 Jan 2020 19:05:19 -0500 Subject: [PATCH 313/658] ceph: move net/ceph/ceph_fs.c to fs/ceph/util.c All of these functions are only called from CephFS, so move them into ceph.ko, and drop the exports. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/Makefile | 2 +- net/ceph/ceph_fs.c => fs/ceph/util.c | 4 ---- net/ceph/Makefile | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) rename net/ceph/ceph_fs.c => fs/ceph/util.c (94%) diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index c1da294418d1..0a0823d378db 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ export.o caps.o snap.o xattr.o quota.o io.o \ mds_client.o mdsmap.o strings.o ceph_frag.o \ - debugfs.o + debugfs.o util.o ceph-$(CONFIG_CEPH_FSCACHE) += cache.o ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o diff --git a/net/ceph/ceph_fs.c b/fs/ceph/util.c similarity index 94% rename from net/ceph/ceph_fs.c rename to fs/ceph/util.c index 756a2dc10d27..2c34875675bf 100644 --- a/net/ceph/ceph_fs.c +++ b/fs/ceph/util.c @@ -39,7 +39,6 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl, fl->stripe_count == 0 && fl->object_size == 0) fl->pool_id = -1; } -EXPORT_SYMBOL(ceph_file_layout_from_legacy); void ceph_file_layout_to_legacy(struct ceph_file_layout *fl, struct ceph_file_layout_legacy *legacy) @@ -52,7 +51,6 @@ void ceph_file_layout_to_legacy(struct ceph_file_layout *fl, else legacy->fl_pg_pool = 0; } -EXPORT_SYMBOL(ceph_file_layout_to_legacy); int ceph_flags_to_mode(int flags) { @@ -82,7 +80,6 @@ int ceph_flags_to_mode(int flags) return mode; } -EXPORT_SYMBOL(ceph_flags_to_mode); int ceph_caps_for_mode(int mode) { @@ -101,4 +98,3 @@ int ceph_caps_for_mode(int mode) return caps; } -EXPORT_SYMBOL(ceph_caps_for_mode); diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 59d0ba2072de..ce09bb4fb249 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -13,5 +13,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ auth.o auth_none.o \ crypto.o armor.o \ auth_x.o \ - ceph_fs.o ceph_strings.o ceph_hash.o \ + ceph_strings.o ceph_hash.o \ pagevec.o snapshot.o string_table.o From 3325322f773bae68b20d8fa0e9e8ebb005271db5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 23 Jan 2020 13:44:33 +0100 Subject: [PATCH 314/658] rbd: set the 'device' link in sysfs The rbd driver already provides additional information in sysfs under /sys/bus/rbd, so we should set the 'device' link in the block device to reference this information. Signed-off-by: Hannes Reinecke Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 38dcb39051a7..405b66e09040 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7143,7 +7143,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, if (rc) goto err_out_image_lock; - add_disk(rbd_dev->disk); + device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); /* see rbd_init_disk() */ blk_put_queue(rbd_dev->disk->queue); From 23ee09032d44736c5e8c7becbe03170dbc6f2e8d Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 20 Jan 2020 18:01:15 +0200 Subject: [PATCH 315/658] ASoC: SOF: Intel: refactor i915_get/put functions The current interface to control i915 display power is misleading. The hda_codec_i915_get() and hda_codec_i915_put() names suggest a refcounting based interface. This is confusing as no refcounting is done and the underlying HDAC library interface does not support refcounts eithers. Clarify the code by replacing the functions with a single hda_codec_i915_display_power() that is aligned with snd_hdac_display_power(). Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200120160117.29130-2-kai.vehmanen@linux.intel.com Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 21 ++++++--------------- sound/soc/sof/intel/hda.c | 2 +- sound/soc/sof/intel/hda.h | 7 +++---- 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 78dfd5f5c034..9106ab8dac6f 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -170,23 +170,14 @@ EXPORT_SYMBOL_NS(hda_codec_probe_bus, SND_SOC_SOF_HDA_AUDIO_CODEC); #if IS_ENABLED(CONFIG_SND_HDA_CODEC_HDMI) || \ IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI) -void hda_codec_i915_get(struct snd_sof_dev *sdev) +void hda_codec_i915_display_power(struct snd_sof_dev *sdev, bool enable) { struct hdac_bus *bus = sof_to_bus(sdev); - dev_dbg(bus->dev, "Turning i915 HDAC power on\n"); - snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, true); + dev_dbg(bus->dev, "Turning i915 HDAC power %d\n", enable); + snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, enable); } -EXPORT_SYMBOL_NS(hda_codec_i915_get, SND_SOC_SOF_HDA_AUDIO_CODEC_I915); - -void hda_codec_i915_put(struct snd_sof_dev *sdev) -{ - struct hdac_bus *bus = sof_to_bus(sdev); - - dev_dbg(bus->dev, "Turning i915 HDAC power off\n"); - snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false); -} -EXPORT_SYMBOL_NS(hda_codec_i915_put, SND_SOC_SOF_HDA_AUDIO_CODEC_I915); +EXPORT_SYMBOL_NS(hda_codec_i915_display_power, SND_SOC_SOF_HDA_AUDIO_CODEC_I915); int hda_codec_i915_init(struct snd_sof_dev *sdev) { @@ -198,7 +189,7 @@ int hda_codec_i915_init(struct snd_sof_dev *sdev) if (ret < 0) return ret; - hda_codec_i915_get(sdev); + hda_codec_i915_display_power(sdev, true); return 0; } @@ -209,7 +200,7 @@ int hda_codec_i915_exit(struct snd_sof_dev *sdev) struct hdac_bus *bus = sof_to_bus(sdev); int ret; - hda_codec_i915_put(sdev); + hda_codec_i915_display_power(sdev, false); ret = snd_hdac_i915_exit(bus); diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index d08462f481de..54a7ba881150 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -380,7 +380,7 @@ static int hda_init_caps(struct snd_sof_dev *sdev) /* create codec instances */ hda_codec_probe_bus(sdev, hda_codec_use_common_hdmi); - hda_codec_i915_put(sdev); + hda_codec_i915_display_power(sdev, false); /* * we are done probing so decrement link counts diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 47408ec0de40..d1f87a107b9d 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -586,15 +586,14 @@ void hda_codec_jack_check(struct snd_sof_dev *sdev); (IS_ENABLED(CONFIG_SND_HDA_CODEC_HDMI) || \ IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) -void hda_codec_i915_get(struct snd_sof_dev *sdev); -void hda_codec_i915_put(struct snd_sof_dev *sdev); +void hda_codec_i915_display_power(struct snd_sof_dev *sdev, bool enable); int hda_codec_i915_init(struct snd_sof_dev *sdev); int hda_codec_i915_exit(struct snd_sof_dev *sdev); #else -static inline void hda_codec_i915_get(struct snd_sof_dev *sdev) { } -static inline void hda_codec_i915_put(struct snd_sof_dev *sdev) { } +static inline void hda_codec_i915_display_power(struct snd_sof_dev *sdev, + bool enable) { } static inline int hda_codec_i915_init(struct snd_sof_dev *sdev) { return 0; } static inline int hda_codec_i915_exit(struct snd_sof_dev *sdev) { return 0; } From 0c75419a94a20b2bf791825a665d8b64cf7c560d Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 20 Jan 2020 18:01:16 +0200 Subject: [PATCH 316/658] ASoC: SOF: Intel: do not disable i915 power during probe Change HDA probe behaviour slightly so that i915 power is not turned off if i915 audio codecs are found in the initial probe done by SOF Intel driver, and power is kept on until HDA codec driver probe runs. This will reduce number of mode sets on platforms with low minimum CDCLK (like GLK) and brings the SOF probe sequence closer to legacy HDA driver in terms of i915 audio codec power management. BugLink: https://github.com/thesofproject/linux/issues/1642 Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200120160117.29130-3-kai.vehmanen@linux.intel.com Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 54a7ba881150..65b86dd044f1 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -380,7 +380,8 @@ static int hda_init_caps(struct snd_sof_dev *sdev) /* create codec instances */ hda_codec_probe_bus(sdev, hda_codec_use_common_hdmi); - hda_codec_i915_display_power(sdev, false); + if (!HDA_IDISP_CODEC(bus->codec_mask)) + hda_codec_i915_display_power(sdev, false); /* * we are done probing so decrement link counts From 90b141614a4133ffeb1075049a49c702f98e26df Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Mon, 27 Jan 2020 16:56:03 +0530 Subject: [PATCH 317/658] ASoC: amd: Fix for Subsequent Playback issue. If we play audio back to back, which kills one playback and immediately start another, we can hear clicks. This patch fixes the issue. Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/1580124397-19842-1-git-send-email-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-pcm-dma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 5c3ec3c58e3b..aecc3c061679 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -349,13 +349,6 @@ static int acp3x_dma_close(struct snd_soc_component *component, component = snd_soc_rtdcom_lookup(prtd, DRV_NAME); adata = dev_get_drvdata(component->dev); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - adata->play_stream = NULL; - adata->i2ssp_play_stream = NULL; - } else { - adata->capture_stream = NULL; - adata->i2ssp_capture_stream = NULL; - } /* Disable ACP irq, when the current stream is being closed and * another stream is also not active. @@ -363,6 +356,13 @@ static int acp3x_dma_close(struct snd_soc_component *component, if (!adata->play_stream && !adata->capture_stream && !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream) rv_writel(0, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + adata->play_stream = NULL; + adata->i2ssp_play_stream = NULL; + } else { + adata->capture_stream = NULL; + adata->i2ssp_capture_stream = NULL; + } return 0; } From 4f7f9564a8937f38f97e8624cb7d6a28ee35f7cb Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 24 Jan 2020 15:36:19 -0600 Subject: [PATCH 318/658] ASoC: SOF: fix an Oops, caused by invalid topology It is possible to create invalid topology that will cause a kernel Oops when trying to allocate buffers for a NULL substream. Specifically such an Oops was caused by a topology, where a DAI on a capture pipeline was referencing the PCM ID from a playback pipeline. Fix the Oops by explicitly checking for NULL. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200124213625.30186-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/pcm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index 314f3095c12f..29435ba2d329 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -591,6 +591,11 @@ static int sof_pcm_new(struct snd_soc_component *component, "spcm: allocate %s playback DMA buffer size 0x%x max 0x%x\n", caps->name, caps->buffer_size_min, caps->buffer_size_max); + if (!pcm->streams[stream].substream) { + dev_err(component->dev, "error: NULL playback substream!\n"); + return -EINVAL; + } + snd_pcm_set_managed_buffer(pcm->streams[stream].substream, SNDRV_DMA_TYPE_DEV_SG, sdev->dev, le32_to_cpu(caps->buffer_size_min), @@ -609,6 +614,11 @@ capture: "spcm: allocate %s capture DMA buffer size 0x%x max 0x%x\n", caps->name, caps->buffer_size_min, caps->buffer_size_max); + if (!pcm->streams[stream].substream) { + dev_err(component->dev, "error: NULL capture substream!\n"); + return -EINVAL; + } + snd_pcm_set_managed_buffer(pcm->streams[stream].substream, SNDRV_DMA_TYPE_DEV_SG, sdev->dev, le32_to_cpu(caps->buffer_size_min), From 37e97e6faeabda405d0c4319f8419dcc3da14b2b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 24 Jan 2020 15:36:20 -0600 Subject: [PATCH 319/658] ASoC: SOF: core: free trace on errors free_trace() is not called on probe errors, fix Reviewed-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200124213625.30186-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sof/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 44f9c04d54aa..f517ab448a1d 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -224,12 +224,12 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (ret < 0) { dev_err(sdev->dev, "error: failed to register DSP DAI driver %d\n", ret); - goto fw_run_err; + goto fw_trace_err; } ret = snd_sof_machine_register(sdev, plat_data); if (ret < 0) - goto fw_run_err; + goto fw_trace_err; /* * Some platforms in SOF, ex: BYT, may not have their platform PM @@ -245,6 +245,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return 0; #if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE) +fw_trace_err: + snd_sof_free_trace(sdev); fw_run_err: snd_sof_fw_unload(sdev); fw_load_err: @@ -262,6 +264,7 @@ dbg_err: * snd_sof_device_remove() when the PCI/ACPI device is removed */ +fw_trace_err: fw_run_err: fw_load_err: ipc_err: From 410e5e55c9c1c9c0d452ac5b9adb37b933a7747e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 24 Jan 2020 15:36:21 -0600 Subject: [PATCH 320/658] ASoC: SOF: core: release resources on errors in probe_continue The initial intent of releasing resources in the .remove does not work well with HDaudio codecs. If the probe_continue() fails in a work queue, e.g. due to missing firmware or authentication issues, we don't release any resources, and as a result the kernel oopses during suspend operations. The suggested fix is to release all resources during errors in probe_continue(), and use fw_state to track resource allocation state, so that .remove does not attempt to release the same hardware resources twice. PM operations are also modified so that no action is done if DSP resources have been freed due to an error at probe. Reported-by: Takashi Iwai Co-developed-by: Kai Vehmanen Signed-off-by: Kai Vehmanen Bugzilla: http://bugzilla.suse.com/show_bug.cgi?id=1161246 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200124213625.30186-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sof/core.c | 33 ++++++++++++--------------------- sound/soc/sof/pm.c | 4 ++++ 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index f517ab448a1d..34cefbaf2d2a 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -244,7 +244,6 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return 0; -#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE) fw_trace_err: snd_sof_free_trace(sdev); fw_run_err: @@ -255,22 +254,10 @@ ipc_err: snd_sof_free_debug(sdev); dbg_err: snd_sof_remove(sdev); -#else - /* - * when the probe_continue is handled in a work queue, the - * probe does not fail so we don't release resources here. - * They will be released with an explicit call to - * snd_sof_device_remove() when the PCI/ACPI device is removed - */ - -fw_trace_err: -fw_run_err: -fw_load_err: -ipc_err: -dbg_err: - -#endif + /* all resources freed, update state to match */ + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; + sdev->first_boot = true; return ret; } @@ -353,10 +340,12 @@ int snd_sof_device_remove(struct device *dev) if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) cancel_work_sync(&sdev->probe_work); - snd_sof_fw_unload(sdev); - snd_sof_ipc_free(sdev); - snd_sof_free_debug(sdev); - snd_sof_free_trace(sdev); + if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) { + snd_sof_fw_unload(sdev); + snd_sof_ipc_free(sdev); + snd_sof_free_debug(sdev); + snd_sof_free_trace(sdev); + } /* * Unregister machine driver. This will unbind the snd_card which @@ -364,13 +353,15 @@ int snd_sof_device_remove(struct device *dev) * before freeing the snd_card. */ snd_sof_machine_unregister(sdev, pdata); + /* * Unregistering the machine driver results in unloading the topology. * Some widgets, ex: scheduler, attempt to power down the core they are * scheduled on, when they are unloaded. Therefore, the DSP must be * removed only after the topology has been unloaded. */ - snd_sof_remove(sdev); + if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) + snd_sof_remove(sdev); /* release firmware */ release_firmware(pdata->fw); diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c index 84290bbeebdd..a0cde053b61a 100644 --- a/sound/soc/sof/pm.c +++ b/sound/soc/sof/pm.c @@ -56,6 +56,10 @@ static int sof_resume(struct device *dev, bool runtime_resume) if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume) return 0; + /* DSP was never successfully started, nothing to resume */ + if (sdev->first_boot) + return 0; + /* * if the runtime_resume flag is set, call the runtime_resume routine * or else call the system resume routine From e6110114d18d330c05fd6de9f31283fd086a5a3a Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 24 Jan 2020 15:36:22 -0600 Subject: [PATCH 321/658] ASoC: SOF: trace: fix unconditional free in trace release Check if DMA pages were successfully allocated in initialization before calling free. For many types of memory (like sgbufs) the extra free is harmless, but not all backends track allocation state, so add an explicit check. Signed-off-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200124213625.30186-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/trace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/trace.c b/sound/soc/sof/trace.c index 4bb65030819d..d815090252f8 100644 --- a/sound/soc/sof/trace.c +++ b/sound/soc/sof/trace.c @@ -343,7 +343,10 @@ void snd_sof_free_trace(struct snd_sof_dev *sdev) snd_sof_release_trace(sdev); - snd_dma_free_pages(&sdev->dmatb); - snd_dma_free_pages(&sdev->dmatp); + if (sdev->dma_trace_pages) { + snd_dma_free_pages(&sdev->dmatb); + snd_dma_free_pages(&sdev->dmatp); + sdev->dma_trace_pages = 0; + } } EXPORT_SYMBOL(snd_sof_free_trace); From e3b9f5f4ef92801f62c58665283c61cece0af905 Mon Sep 17 00:00:00 2001 From: Pan Xiuli Date: Fri, 24 Jan 2020 15:36:23 -0600 Subject: [PATCH 322/658] ASoC: SOF: pci: add missing default_fw_name of JasperLake jsl_desc missed default_fw_name, this will fail the probe in nocodec or generice HDA mode due the firmware path is intel/sof/(null) Signed-off-by: Pan Xiuli Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200124213625.30186-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-pci-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index d855bc2b76ad..bf01b92f0dac 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -235,6 +235,7 @@ static const struct sof_dev_desc jsl_desc = { .chip_info = &jsl_chip_info, .default_fw_path = "intel/sof", .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-jsl.ri", .nocodec_tplg_filename = "sof-jsl-nocodec.tplg", .ops = &sof_cnl_ops, }; From bd01cf38eedceb87d43f9205a75b259361e056ea Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 24 Jan 2020 15:36:24 -0600 Subject: [PATCH 323/658] ASoC: intel: soc-acpi-intel-icl-match: fix rt715 ADR Fix the part id of rt715 (typo with zero in the wrong place) Signed-off-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200124213625.30186-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-icl-match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-icl-match.c b/sound/soc/intel/common/soc-acpi-intel-icl-match.c index 67e9da4635f2..752733013d54 100644 --- a/sound/soc/intel/common/soc-acpi-intel-icl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-icl-match.c @@ -59,7 +59,7 @@ static const u64 rt1308_2_adr[] = { }; static const u64 rt715_3_adr[] = { - 0x000310025D715000 + 0x000310025D071500 }; static const struct snd_soc_acpi_link_adr icl_3_in_1_default[] = { From 98ff5c262f27aafee077a4c096f71a8566e9e948 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 24 Jan 2020 15:36:25 -0600 Subject: [PATCH 324/658] ASoC: Intel: consistent HDMI codec probing code Multiple Intel ASoC machine drivers repeat the same pattern in their .late_probe() methods: they first check whether the common HDMI codec driver is used, if not, they proceed by linking the legacy HDMI driver to each HDMI port. While doing that they use some inconsistent code: 1. after the loop they check, whether the list contained at least one element and if not, they return an error. However, the earlier code to use the common HDMI driver uses the first element of the same list without checking. To fix this we move the check to the top of the function. 2. some of those .late_probe() implementations execute code, only needed for the common HDMI driver, before checking, whether the driver is used. Move the code to after the check. 3. Some of those functions also perform a redundant initialisation of the "err" variable. This patch fixes those issues. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200124213625.30186-8-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bxt_da7219_max98357a.c | 14 +++++++------- sound/soc/intel/boards/bxt_rt298.c | 14 +++++++------- sound/soc/intel/boards/cml_rt1011_rt5682.c | 13 +++++++------ sound/soc/intel/boards/glk_rt5682_max98357a.c | 16 ++++++++-------- sound/soc/intel/boards/sof_rt5682.c | 15 ++++++++------- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c index 33b13f3ca152..9177401c37a5 100644 --- a/sound/soc/intel/boards/bxt_da7219_max98357a.c +++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c @@ -617,12 +617,15 @@ static int bxt_card_late_probe(struct snd_soc_card *card) snd_soc_dapm_add_routes(&card->dapm, broxton_map, ARRAY_SIZE(broxton_map)); - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -643,9 +646,6 @@ static int bxt_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c index 067a97e7e6a8..4b67f261377c 100644 --- a/sound/soc/intel/boards/bxt_rt298.c +++ b/sound/soc/intel/boards/bxt_rt298.c @@ -529,12 +529,15 @@ static int bxt_card_late_probe(struct snd_soc_card *card) int err, i = 0; char jack_name[NAME_SIZE]; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -555,9 +558,6 @@ static int bxt_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/cml_rt1011_rt5682.c b/sound/soc/intel/boards/cml_rt1011_rt5682.c index d6efc554898c..dd80d0186a6c 100644 --- a/sound/soc/intel/boards/cml_rt1011_rt5682.c +++ b/sound/soc/intel/boards/cml_rt1011_rt5682.c @@ -241,12 +241,15 @@ static int sof_card_late_probe(struct snd_soc_card *card) struct hdmi_pcm *pcm; int ret, i = 0; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -265,8 +268,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c index 4a6d117ea7af..8e947bad143c 100644 --- a/sound/soc/intel/boards/glk_rt5682_max98357a.c +++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c @@ -534,15 +534,18 @@ static int glk_card_late_probe(struct snd_soc_card *card) struct snd_soc_component *component = NULL; char jack_name[NAME_SIZE]; struct glk_hdmi_pcm *pcm; - int err = 0; + int err; int i = 0; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct glk_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct glk_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -563,9 +566,6 @@ static int glk_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index 8a13231dee15..5d878873a8e0 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -273,19 +273,22 @@ static int sof_card_late_probe(struct snd_soc_card *card) struct snd_soc_component *component = NULL; char jack_name[NAME_SIZE]; struct sof_hdmi_pcm *pcm; - int err = 0; + int err; int i = 0; /* HDMI is not supported by SOF on Baytrail/CherryTrail */ if (is_legacy_cpu) return 0; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct sof_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct sof_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -305,8 +308,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; return hdac_hdmi_jack_port_init(component, &card->dapm); } From 37c58ddf57364d1a636850bb8ba6acbe1e16195e Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 17 Jan 2020 19:54:45 -0500 Subject: [PATCH 325/658] drm/amdgpu: Fix TLB invalidation request when using semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a more meaningful variable name for the invalidation request that is distinct from the tmp variable that gets overwritten when acquiring the invalidation semaphore. Fixes: 4ed8a03740d0 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 0c5bf3bd640f..67318b75f5b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 tmp; /* Use register 17 for GART */ const unsigned eng = 17; unsigned int i; @@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 54bdc1786ab1..6d95de1413c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); const unsigned eng = 17; - u32 j, tmp; + u32 j, inv_req, tmp; struct amdgpu_vmhub *hub; BUG_ON(vmhub >= adev->num_vmhubs); hub = &adev->vmhub[vmhub]; - tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal @@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t req = hub->vm_inv_eng0_req + eng; uint32_t ack = hub->vm_inv_eng0_ack + eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); return; } @@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared From fa34edbed46178545285fd080c8d4d3fbc8fbc71 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 17 Jan 2020 20:08:42 -0500 Subject: [PATCH 326/658] drm/amdgpu: Use the correct flush_type in flush_gpu_tlb_pasid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flush_type was incorrectly hard-coded to 0 when calling falling back to MMIO-based invalidation in flush_gpu_tlb_pasid. Fixes: ea930000a6dc ("drm/amdgpu: export function to flush TLB via pasid") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Reviewed-by: Oak Zeng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 67318b75f5b8..9775eca6fe43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -443,10 +443,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (all_hub) { for (i = 0; i < adev->num_vmhubs; i++) gmc_v10_0_flush_gpu_tlb(adev, vmid, - i, 0); + i, flush_type); } else { gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); + AMDGPU_GFXHUB_0, flush_type); } break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6d95de1413c4..90216abf14a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -602,10 +602,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (all_hub) { for (i = 0; i < adev->num_vmhubs; i++) gmc_v9_0_flush_gpu_tlb(adev, vmid, - i, 0); + i, flush_type); } else { gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); + AMDGPU_GFXHUB_0, flush_type); } break; } From b6501217265f770957548eec818f6c08aa1ef6a1 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 20 Jan 2020 15:43:04 -0500 Subject: [PATCH 327/658] drm/amdgpu/vcn: Share vcn_v2_0_dec_ring_test_ring to vcn2.5 Share vcn_v2_0_dec_ring_test_ring to vcn2.5 to support vcn software ring. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index b8dc136d2a01..4f7216788f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1624,7 +1624,7 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h index ef749b02ded9..6c9de1882428 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -37,6 +37,7 @@ extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +extern int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring); extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 6970d3a1ae6f..d8455b62567d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1488,7 +1488,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .emit_ib = vcn_v2_0_dec_ring_emit_ib, .emit_fence = vcn_v2_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ring = vcn_v2_0_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v2_0_dec_ring_insert_nop, .insert_start = vcn_v2_0_dec_ring_insert_start, From 62884a7bf34379b2cacd391c510cb9d852b36560 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 20 Jan 2020 15:47:35 -0500 Subject: [PATCH 328/658] drm/amdgpu/vcn2.5: fix a bug for the 2nd vcn instance (v2) Fix a bug for the 2nd vcn instance at start and stop. v2: squash in unused label removal. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d8455b62567d..bb31258e3340 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -891,8 +891,10 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, @@ -903,6 +905,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return 0; + /*SW clock gating */ vcn_v2_5_disable_clock_gating(adev); @@ -1294,10 +1299,9 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v2_5_stop_dpg_mode(adev, i); - goto power_off; + continue; } /* wait for vcn idle */ @@ -1349,7 +1353,6 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } -power_off: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); From 326b523eeb521368fd1566c8ceeddf04a68cb01e Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 20 Jan 2020 21:44:07 -0500 Subject: [PATCH 329/658] drm/amdgpu/vcn: fix vcn2.5 instance issue Fix vcn2.5 instance issue, vcn0 and vcn1 have same register offset Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 88 +++++++++++++-------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index bb31258e3340..70fae7977f8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -435,88 +435,88 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); offset = size; WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } /** @@ -670,19 +670,19 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__MMSCH_MODE_MASK); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect); + UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); + UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } /** @@ -772,11 +772,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect); + UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | @@ -788,28 +788,28 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect); + UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_MPC_CNTL), + UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_MPC_SET_MUXA0), + UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_MPC_SET_MUXB0), + UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_MPC_SET_MUX), + UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); @@ -817,26 +817,26 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* enable LMI MC and UMC channels */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect); + UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); /* unblock VCPU register access */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* enable master interrupt */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_MASTINT_EN), + UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) From a4555732142e96855c854fa7b2a65035044391b3 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 21 Jan 2020 16:28:07 -0500 Subject: [PATCH 330/658] drm/amdgpu/vcn: fix typo error Fix typo error, should be inst_idx instead of inst. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c4984c5fb2db..56984ff09ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -111,7 +111,7 @@ #define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ ({ \ - WREG32_SOC15(VCN, inst, mmUVD_DPG_LMA_CTL, \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ From 55bbb747ecfa39806198419e96e7bc2c44221d4e Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 21 Jan 2020 16:33:21 -0500 Subject: [PATCH 331/658] drm/amdgpu/vcn: use inst_idx relacing inst Use inst_idx relacing inst in SOC15_DPG_MODE macro to avoid confusion. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 56984ff09ea9..d6deb0eb1e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -65,33 +65,33 @@ /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ +#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \ do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ } while (0) -#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \ +#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ bool video_range, aon_range; \ \ - addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ From f583cc57ba70afdd02e36b0ada3790e8bf4fb10f Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 22 Jan 2020 11:03:30 +0800 Subject: [PATCH 332/658] drm/amdgpu: initialize bo_va_list when add gws to process bo_va_list is list_head, so initialize it. Signed-off-by: xinhui pan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b2487f4f271b..fa8ac9d19a7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2129,6 +2129,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem return -ENOMEM; mutex_init(&(*mem)->lock); + INIT_LIST_HEAD(&(*mem)->bo_va_list); (*mem)->bo = amdgpu_bo_ref(gws_bo); (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; (*mem)->process_info = process_info; From 2639f453f28e71dc4149fb06c71bcf6f93eb468f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 22 Jan 2020 10:37:56 +0100 Subject: [PATCH 333/658] drm/amdgpu: fix doc by clarifying sched_list definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit expand sched_list definition for better understanding. Also fix a typo atleast -> at least Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- include/drm/gpu_scheduler.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index ec79e8e5ad3c..63bccd201b97 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -45,7 +45,7 @@ * @guilty: atomic_t set to 1 when a job on this queue * is found to be guilty causing a timeout * - * Note: the sched_list should have atleast one element to schedule + * Note: the sched_list should have at least one element to schedule * the entity * * Returns 0 on success or a negative error code on failure. diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9e71be129c30..589be851f8a1 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -52,8 +52,9 @@ enum drm_sched_priority { * @list: used to append this struct to the list of entities in the * runqueue. * @rq: runqueue on which this entity is currently scheduled. - * @sched_list: a list of drm_gpu_schedulers on which jobs from this entity can - * be scheduled + * @sched_list: A list of schedulers (drm_gpu_schedulers). + * Jobs from this entity can be scheduled on any scheduler + * on this list. * @num_sched_list: number of drm_gpu_schedulers in the sched_list. * @rq_lock: lock to modify the runqueue to which this entity belongs. * @job_queue: the list of jobs of this entity. From a16afcdd8c58285e83951ce318b55eafb7948997 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 22 Jan 2020 15:53:11 +0800 Subject: [PATCH 334/658] drm/amd/powerplay: use true, false for bool variable in smu7_hwmgr.c Fixes coccicheck warning: drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:723:2-50: WARNING: Assignment of 0/1 to bool variable drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:733:3-52: WARNING: Assignment of 0/1 to bool variable drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:747:3-51: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index d70abada66bf..bf04cfefb283 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -720,7 +720,7 @@ static int smu7_setup_dpm_tables_v0(struct pp_hwmgr *hwmgr) data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].v; data->dpm_table.vddc_table.dpm_levels[i].param1 = std_voltage_table->entries[i].Leakage; /* param1 is for corresponding std voltage */ - data->dpm_table.vddc_table.dpm_levels[i].enabled = 1; + data->dpm_table.vddc_table.dpm_levels[i].enabled = true; } data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count; @@ -730,7 +730,7 @@ static int smu7_setup_dpm_tables_v0(struct pp_hwmgr *hwmgr) /* Initialize Vddci DPM table based on allow Mclk values */ for (i = 0; i < allowed_vdd_mclk_table->count; i++) { data->dpm_table.vddci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].v; - data->dpm_table.vddci_table.dpm_levels[i].enabled = 1; + data->dpm_table.vddci_table.dpm_levels[i].enabled = true; } data->dpm_table.vddci_table.count = allowed_vdd_mclk_table->count; } @@ -744,7 +744,7 @@ static int smu7_setup_dpm_tables_v0(struct pp_hwmgr *hwmgr) */ for (i = 0; i < allowed_vdd_mclk_table->count; i++) { data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].v; - data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1; + data->dpm_table.mvdd_table.dpm_levels[i].enabled = true; } data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count; } From b20dcd72c150d82c56682606202f3f028403e581 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jan 2020 00:22:16 +0000 Subject: [PATCH 335/658] drm/amd/amdgpu: fix spelling mistake "to" -> "too" There is a spelling mistake in a DRM_ERROR message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ceb0dbf685f1..59ddba137946 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -652,7 +652,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, if ((addr + (uint64_t)size) > (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", + DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } From 7af2a5771e0918cdadb1614c1f81dd67a58e00aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Jan 2020 12:26:51 -0500 Subject: [PATCH 336/658] drm/amdgpu: attempt to enable gfxoff on more raven1 boards (v2) Switch to a blacklist so we can disable specific boards that are problematic. v2: make the blacklist non-raven specific. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 44 ++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6558d70fdc58..51ec8a70660a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1162,18 +1162,54 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) } } +struct amdgpu_gfxoff_quirk { + u16 chip_vendor; + u16 chip_device; + u16 subsys_vendor; + u16 subsys_device; + u8 revision; +}; + +static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { + /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + { 0, 0, 0, 0, 0 }, +}; + +static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) +{ + const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; + + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device && + pdev->revision == p->revision) { + return true; + } + ++p; + } + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { + if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || - adev->pdev->device == 0x15d8) && - (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */ - !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) && + ((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (adev->pm.pp_feature & PP_GFXOFF_MASK) From 276cc92945ab4af02d8bedce18b4b6f87b9c1609 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Jan 2020 12:56:37 -0500 Subject: [PATCH 337/658] drm/amdgpu: original raven doesn't support full asic reset So don't use it. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index af41ee4c9639..15f3424a1ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -537,6 +537,10 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static int soc15_asic_reset(struct amdgpu_device *adev) { + /* original raven doesn't have full asic reset */ + if (adev->pdev->device == 0x15dd && adev->rev_id < 0x8) + return 0; + switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: if (!adev->in_suspend) From 3eb6d7aca53d81ce888624f09cd44dc0302161e8 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 21 Jan 2020 16:12:45 -0500 Subject: [PATCH 338/658] drm/amd/display: Retrain dongles when SINK_COUNT becomes non-zero [WHY] Two years ago the patch referenced by the Fixes tag stopped running dp_verify_link_cap_with_retries during DP detection when the reason for the detection was a short-pulse interrupt. This effectively meant that we were no longer doing the verify_link_cap training on active dongles when their SINK_COUNT changed from 0 to 1. A year ago this was partly remedied with: commit 80adaebd2d41 ("drm/amd/display: Don't skip link training for empty dongle") This made sure that we trained the dongle on initial hotplug (without connected downstream devices). This is all fine and dandy if it weren't for the fact that there are some dongles on the market that don't like link training when SINK_COUNT is 0 These dongles will in fact indicate a SINK_COUNT of 0 immediately after hotplug, even when a downstream device is connected, and then trigger a shortpulse interrupt indicating a SINK_COUNT change to 1. In order to play nicely we will need our policy to not link train an active DP dongle when SINK_COUNT is 0 but ensure we train it when the SINK_COUNT changes to 1. [HOW] Call dp_verify_link_cap_with_retries on detection even when the detection is triggered from a short pulse interrupt. With this change we can also revert this commit which we'll do in a separate follow-up change: commit 80adaebd2d41 ("drm/amd/display: Don't skip link training for empty dongle") Fixes: 0301ccbaf67d ("drm/amd/display: DP Compliance 400.1.1 failure") Suggested-by: Louis Li Tested-by: Louis Li Cc: Wenjing Liu Cc: Hersen Wu Cc: Eric Yang Reviewed-by: Wenjing Liu Signed-off-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index a50768a7ba68..cc2e05003595 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -969,8 +969,7 @@ static bool dc_link_detect_helper(struct dc_link *link, same_edid = is_same_edid(&prev_sink->dc_edid, &sink->dc_edid); if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && - sink_caps.transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX && - reason != DETECT_REASON_HPDRX) { + sink_caps.transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { /* * TODO debug why Dell 2413 doesn't like * two link trainings From da2f84ba73df11186cf23bec45d039f3b9162bd8 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 21 Jan 2020 16:29:54 -0500 Subject: [PATCH 339/658] Revert "drm/amd/display: Don't skip link training for empty dongle" This reverts commit 80adaebd2d411b7d6872a097634848a71eb13d20. [WHY] This change was working around a regression that occured in this: commit 0301ccbaf67d ("drm/amd/display: DP Compliance 400.1.1 failure") With the fix to run verify_link_cap when the SINK_COUNT of dongles becomes non-zero this change is no longer needed. Cc: Louis Li Cc: Wenjing Liu Cc: Hersen Wu Cc: Eric Yang Reviewed-by: Wenjing Liu Signed-off-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cc2e05003595..a09119c10d7c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -851,18 +851,12 @@ static bool dc_link_detect_helper(struct dc_link *link, if (memcmp(&link->dpcd_caps, &prev_dpcd_caps, sizeof(struct dpcd_caps))) same_dpcd = false; } - /* Active dongle plug in without display or downstream unplug*/ + /* Active dongle downstream unplug*/ if (link->type == dc_connection_active_dongle && link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) { - if (prev_sink != NULL) { + if (prev_sink != NULL) /* Downstream unplug */ dc_sink_release(prev_sink); - } else { - /* Empty dongle plug in */ - dp_verify_link_cap_with_retries(link, - &link->reported_link_cap, - LINK_TRAINING_MAX_VERIFY_RETRY); - } return true; } From 77171eade8faa713c1104b29ea7ff96915ecf9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 16 Jan 2020 14:06:59 +0100 Subject: [PATCH 340/658] drm/amdgpu: add coreboot workaround for KV/KB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coreboot seems to have a problem correctly setting up access to the stolen VRAM on KV/KB. Use the direct access only when necessary. Signed-off-by: Christian König Reported-and-tested-by: Fredrik Bruhn Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 19d5b133e1d7..9da9596a3638 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,7 +381,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU && + adev->gmc.real_vram_size > adev->gmc.aper_size) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } From bb67bfd2e7101bf2ac5327b0b7a847cd9fb9723f Mon Sep 17 00:00:00 2001 From: Dor Askayo Date: Sat, 4 Jan 2020 14:22:15 +0200 Subject: [PATCH 341/658] drm/amd/display: do not allocate display_mode_lib unnecessarily This allocation isn't required and can fail when resuming from suspend. Bug: https://gitlab.freedesktop.org/drm/amd/issues/1009 Signed-off-by: Dor Askayo Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 6c797fac189d..04441dbcba76 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2462,12 +2462,7 @@ void dc_set_power_state( enum dc_acpi_cm_power_state power_state) { struct kref refcount; - struct display_mode_lib *dml = kzalloc(sizeof(struct display_mode_lib), - GFP_KERNEL); - - ASSERT(dml); - if (!dml) - return; + struct display_mode_lib *dml; switch (power_state) { case DC_ACPI_CM_POWER_STATE_D0: @@ -2490,6 +2485,12 @@ void dc_set_power_state( * clean state, and dc hw programming optimizations will not * cause any trouble. */ + dml = kzalloc(sizeof(struct display_mode_lib), + GFP_KERNEL); + + ASSERT(dml); + if (!dml) + return; /* Preserve refcount */ refcount = dc->current_state->refcount; @@ -2503,10 +2504,10 @@ void dc_set_power_state( dc->current_state->refcount = refcount; dc->current_state->bw_ctx.dml = *dml; + kfree(dml); + break; } - - kfree(dml); } void dc_resume(struct dc *dc) From 658c663947b04b1f8a2fd061a3ef767983c880e5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Jan 2020 14:31:49 -0500 Subject: [PATCH 342/658] drm/amdgpu: enable GPU reset by default on Navi Has been working fine for a while. Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 990f8e64a4d5..2f15e2f6d336 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3760,6 +3760,9 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_RAVEN: case CHIP_ARCTURUS: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: goto disabled; From 2cb44fb09305e23fe60dbfcccad3f8c4028749f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Jan 2020 14:35:10 -0500 Subject: [PATCH 343/658] drm/amdgpu: enable GPU reset by default on renoir Everything is in place. Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f15e2f6d336..39cd545976b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3760,6 +3760,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_RAVEN: case CHIP_ARCTURUS: + case CHIP_RENOIR: case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: From 54f73df4cab18734b71a4325feb4749d334dc877 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 17 Jan 2020 13:33:05 +0000 Subject: [PATCH 344/658] drm/amd/display: fix for-loop with incorrectly sized loop counter (v2) A for-loop is iterating from 0 up to 1000 however the loop variable count is a u8 and hence not large enough. Fix this by making count an int. Also remove the redundant initialization of count since this is never used and add { } on the loop statement make the loop block clearer. v2: drop useless else (Walter Harms) Addresses-Coverity: ("Operands don't affect result") Fixes: ed581a0ace44 ("drm/amd/display: wait for update when setting dpg test pattern") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index a53e8fed56f3..cb731c1d30b1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3680,7 +3680,7 @@ static void set_crtc_test_pattern(struct dc_link *link, struct pipe_ctx *odm_pipe; enum controller_dp_color_space controller_color_space; int opp_cnt = 1; - uint8_t count = 0; + int count; switch (test_pattern_color_space) { case DP_TEST_PATTERN_COLOR_SPACE_RGB: @@ -3725,11 +3725,11 @@ static void set_crtc_test_pattern(struct dc_link *link, width, height); /* wait for dpg to blank pixel data with test pattern */ - for (count = 0; count < 1000; count++) + for (count = 0; count < 1000; count++) { if (opp->funcs->dpg_is_blanked(opp)) break; - else - udelay(100); + udelay(100); + } } } break; From 269a0bf79b3346fbebe7f62aef516ce61c2f02d0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 25 Jan 2020 20:26:13 +0000 Subject: [PATCH 345/658] drm/amd/powerplay: fix spelling mistake "Attemp" -> "Attempt" There are several spelling mistakes in PP_ASSERT_WITH_CODE messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c | 12 ++++++------ drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index a3915bfcce81..275dbf65f1a0 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -128,20 +128,20 @@ int vega12_enable_smc_features(struct pp_hwmgr *hwmgr, if (enable) { PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnableSmuFeaturesLow, smu_features_low) == 0, - "[EnableDisableSMCFeatures] Attemp to enable SMU features Low failed!", + "[EnableDisableSMCFeatures] Attempt to enable SMU features Low failed!", return -EINVAL); PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnableSmuFeaturesHigh, smu_features_high) == 0, - "[EnableDisableSMCFeatures] Attemp to enable SMU features High failed!", + "[EnableDisableSMCFeatures] Attempt to enable SMU features High failed!", return -EINVAL); } else { PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DisableSmuFeaturesLow, smu_features_low) == 0, - "[EnableDisableSMCFeatures] Attemp to disable SMU features Low failed!", + "[EnableDisableSMCFeatures] Attempt to disable SMU features Low failed!", return -EINVAL); PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DisableSmuFeaturesHigh, smu_features_high) == 0, - "[EnableDisableSMCFeatures] Attemp to disable SMU features High failed!", + "[EnableDisableSMCFeatures] Attempt to disable SMU features High failed!", return -EINVAL); } @@ -158,13 +158,13 @@ int vega12_get_enabled_smc_features(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc(hwmgr, PPSMC_MSG_GetEnabledSmuFeaturesLow) == 0, - "[GetEnabledSMCFeatures] Attemp to get SMU features Low failed!", + "[GetEnabledSMCFeatures] Attempt to get SMU features Low failed!", return -EINVAL); smc_features_low = smu9_get_argument(hwmgr); PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc(hwmgr, PPSMC_MSG_GetEnabledSmuFeaturesHigh) == 0, - "[GetEnabledSMCFeatures] Attemp to get SMU features High failed!", + "[GetEnabledSMCFeatures] Attempt to get SMU features High failed!", return -EINVAL); smc_features_high = smu9_get_argument(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c index 0db57fb83d30..49e5ef3e3876 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c @@ -316,20 +316,20 @@ int vega20_enable_smc_features(struct pp_hwmgr *hwmgr, if (enable) { PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnableSmuFeaturesLow, smu_features_low)) == 0, - "[EnableDisableSMCFeatures] Attemp to enable SMU features Low failed!", + "[EnableDisableSMCFeatures] Attempt to enable SMU features Low failed!", return ret); PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnableSmuFeaturesHigh, smu_features_high)) == 0, - "[EnableDisableSMCFeatures] Attemp to enable SMU features High failed!", + "[EnableDisableSMCFeatures] Attempt to enable SMU features High failed!", return ret); } else { PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DisableSmuFeaturesLow, smu_features_low)) == 0, - "[EnableDisableSMCFeatures] Attemp to disable SMU features Low failed!", + "[EnableDisableSMCFeatures] Attempt to disable SMU features Low failed!", return ret); PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DisableSmuFeaturesHigh, smu_features_high)) == 0, - "[EnableDisableSMCFeatures] Attemp to disable SMU features High failed!", + "[EnableDisableSMCFeatures] Attempt to disable SMU features High failed!", return ret); } @@ -347,12 +347,12 @@ int vega20_get_enabled_smc_features(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc(hwmgr, PPSMC_MSG_GetEnabledSmuFeaturesLow)) == 0, - "[GetEnabledSMCFeatures] Attemp to get SMU features Low failed!", + "[GetEnabledSMCFeatures] Attempt to get SMU features Low failed!", return ret); smc_features_low = vega20_get_argument(hwmgr); PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc(hwmgr, PPSMC_MSG_GetEnabledSmuFeaturesHigh)) == 0, - "[GetEnabledSMCFeatures] Attemp to get SMU features High failed!", + "[GetEnabledSMCFeatures] Attempt to get SMU features High failed!", return ret); smc_features_high = vega20_get_argument(hwmgr); From 7faa26c1bbe312d9191524e4b7ab010f91fcd654 Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Tue, 28 Jan 2020 10:45:08 +0530 Subject: [PATCH 346/658] ALSA: hda: Reset stream if DMA RUN bit not cleared Tegra HDA has FIFO size which can hold upto 10 audio frames to support DVFS. When HDA DMA RUN bit is set to 0 to stop the stream, the DMA RUN bit will be cleared to 0 only after transferring all the remaining audio frames queued up in the fifo. This is not in sync with spec which states the controller will stop transmitting(output) in the beginning of the next frame for the relevant stream. The above behavior with Tegra HDA was resulting in machine check error during the system suspend flow with active audio playback with below kernel error logs. [ 33.524583] mc-err: [mcerr] (hda) csr_hdar: EMEM address decode error [ 33.531088] mc-err: [mcerr] status = 0x20000015; addr = 0x00000000 [ 33.537431] mc-err: [mcerr] secure: no, access-type: read, SMMU fault: none This was due to the fifo has more than one audio frame when the DMA RUN bit is set to 0 during system suspend flow and the timeout handling in snd_hdac_stream_sync() was not designed to handle this scenario. So the DMA will continue running even after timeout hit until all remaining audio frames in the fifo are transferred, but the suspend flow will try to reset the controller and turn off the hda clocks without the knowledge of the DMA is still running and could result in mc-err. The above issue can be resolved by doing stream reset with the help of snd_hdac_stream_reset() which would ensure the DMA RUN bit is cleared if the timeout was hit in snd_hdac_stream_sync(). Signed-off-by: Mohan Kumar Link: https://lore.kernel.org/r/20200128051508.26064-1-mkumard@nvidia.com Signed-off-by: Takashi Iwai --- sound/hda/hdac_stream.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index d01e69139164..a314b03b4a4c 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -631,20 +631,27 @@ void snd_hdac_stream_sync(struct hdac_stream *azx_dev, bool start, nwait = 0; i = 0; list_for_each_entry(s, &bus->stream_list, list) { - if (streams & (1 << i)) { - if (start) { - /* check FIFO gets ready */ - if (!(snd_hdac_stream_readb(s, SD_STS) & - SD_STS_FIFO_READY)) - nwait++; - } else { - /* check RUN bit is cleared */ - if (snd_hdac_stream_readb(s, SD_CTL) & - SD_CTL_DMA_START) - nwait++; + if (!(streams & (1 << i++))) + continue; + + if (start) { + /* check FIFO gets ready */ + if (!(snd_hdac_stream_readb(s, SD_STS) & + SD_STS_FIFO_READY)) + nwait++; + } else { + /* check RUN bit is cleared */ + if (snd_hdac_stream_readb(s, SD_CTL) & + SD_CTL_DMA_START) { + nwait++; + /* + * Perform stream reset if DMA RUN + * bit not cleared within given timeout + */ + if (timeout == 1) + snd_hdac_stream_reset(s); } } - i++; } if (!nwait) break; From d0ce47cb993ec88d293bac7b51ba2d97ea99594c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 18 Jun 2019 15:51:17 -0300 Subject: [PATCH 347/658] docs: tracing: Fix a broken label Sphinx warnings about his: Documentation/trace/kprobetrace.rst:68: WARNING: undefined label: user_mem_access (if the link has no caption the label must precede a section header) The problem is quite simple: Sphinx wants a blank line after references. Link: http://lkml.kernel.org/r/a83ea390bc28784518fce772b4c961ea1c976f14.1560883872.git.mchehab+samsung@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 55993055902c..705d73087099 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -97,6 +97,7 @@ which shows given pointer in "symbol+offset" style. For $comm, the default type is "string"; any other type is invalid. .. _user_mem_access: + User Memory Access ------------------ Kprobe events supports user-space memory access. For that purpose, you can use From b527b638fd63ba791dc90a0a6e9a3035b10df52b Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 28 Jun 2019 12:40:20 -0500 Subject: [PATCH 348/658] tracing: Simplify assignment parsing for hist triggers In the process of adding better error messages for sorting, I realized that strsep was being used incorrectly and some of the error paths I was expecting to be hit weren't and just fell through to the common invalid key error case. It also became obvious that for keyword assignments, it wasn't necessary to save the full assignment and reparse it later, and having a common empty-assignment check would also make more sense in terms of error processing. Change the code to fix these problems and simplify it for new error message changes in a subsequent patch. Link: http://lkml.kernel.org/r/1c3ef0b6655deaf345f6faee2584a0298ac2d743.1561743018.git.zanussi@kernel.org Fixes: e62347d24534 ("tracing: Add hist trigger support for user-defined sorting ('sort=' param)") Fixes: 7ef224d1d0e3 ("tracing: Add 'hist' event trigger command") Fixes: a4072fe85ba3 ("tracing: Add a clock attribute for hist triggers") Reported-by: Masami Hiramatsu Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 70 ++++++++++++-------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 117a1202a6b9..bf2bcb8d7725 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2039,12 +2039,6 @@ static int parse_map_size(char *str) unsigned long size, map_bits; int ret; - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } - ret = kstrtoul(str, 0, &size); if (ret) goto out; @@ -2104,25 +2098,25 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) static int parse_assignment(struct trace_array *tr, char *str, struct hist_trigger_attrs *attrs) { - int ret = 0; + int len, ret = 0; - if ((str_has_prefix(str, "key=")) || - (str_has_prefix(str, "keys="))) { - attrs->keys_str = kstrdup(str, GFP_KERNEL); + if ((len = str_has_prefix(str, "key=")) || + (len = str_has_prefix(str, "keys="))) { + attrs->keys_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->keys_str) { ret = -ENOMEM; goto out; } - } else if ((str_has_prefix(str, "val=")) || - (str_has_prefix(str, "vals=")) || - (str_has_prefix(str, "values="))) { - attrs->vals_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "val=")) || + (len = str_has_prefix(str, "vals=")) || + (len = str_has_prefix(str, "values="))) { + attrs->vals_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->vals_str) { ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "sort=")) { - attrs->sort_key_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "sort="))) { + attrs->sort_key_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->sort_key_str) { ret = -ENOMEM; goto out; @@ -2133,12 +2127,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "clock=")) { - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } + } else if ((len = str_has_prefix(str, "clock="))) { + str += len; str = strstrip(str); attrs->clock = kstrdup(str, GFP_KERNEL); @@ -2146,8 +2136,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "size=")) { - int map_bits = parse_map_size(str); + } else if ((len = str_has_prefix(str, "size="))) { + int map_bits = parse_map_size(str + len); if (map_bits < 0) { ret = map_bits; @@ -2187,8 +2177,14 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) while (trigger_str) { char *str = strsep(&trigger_str, ":"); + char *rhs; - if (strchr(str, '=')) { + rhs = strchr(str, '='); + if (rhs) { + if (!strlen(++rhs)) { + ret = -EINVAL; + goto free; + } ret = parse_assignment(tr, str, attrs); if (ret) goto free; @@ -4522,10 +4518,6 @@ static int create_val_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = 0, j = 1; i < TRACING_MAP_VALS_MAX && j < TRACING_MAP_VALS_MAX; i++) { field_str = strsep(&fields_str, ","); @@ -4620,10 +4612,6 @@ static int create_key_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = n_vals; i < n_vals + TRACING_MAP_KEYS_MAX; i++) { field_str = strsep(&fields_str, ","); if (!field_str) @@ -4781,12 +4769,6 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) { - ret = -EINVAL; - goto out; - } - for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) { struct hist_field *hist_field; char *field_str, *field_name; @@ -4795,9 +4777,11 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) sort_key = &hist_data->sort_keys[i]; field_str = strsep(&fields_str, ","); - if (!field_str) { - if (i == 0) - ret = -EINVAL; + if (!field_str) + break; + + if (!*field_str) { + ret = -EINVAL; break; } @@ -4807,7 +4791,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } field_name = strsep(&field_str, "."); - if (!field_name) { + if (!field_name || !*field_name) { ret = -EINVAL; break; } From 4de26c8c967d55551d3983771116a2c3c0a4f464 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 28 Jun 2019 12:40:21 -0500 Subject: [PATCH 349/658] tracing: Add hist trigger error messages for sort specification Add error codes and messages for all the error paths leading to sort specification parsing errors. Link: http://lkml.kernel.org/r/237830dc05e583fbb53664d817a784297bf961be.1561743018.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index bf2bcb8d7725..23458ba9e5f5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -66,7 +66,12 @@ C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ C(INVALID_REF_KEY, "Using variable references in keys not supported"), \ C(VAR_NOT_FOUND, "Couldn't find variable"), \ - C(FIELD_NOT_FOUND, "Couldn't find field"), + C(FIELD_NOT_FOUND, "Couldn't find field"), \ + C(EMPTY_ASSIGNMENT, "Empty assignment"), \ + C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ + C(EMPTY_SORT_FIELD, "Empty sort field"), \ + C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), #undef C #define C(a, b) HIST_ERR_##a @@ -2183,6 +2188,7 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) if (rhs) { if (!strlen(++rhs)) { ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_ASSIGNMENT, errpos(str)); goto free; } ret = parse_assignment(tr, str, attrs); @@ -4743,7 +4749,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, return ret; } -static int is_descending(const char *str) +static int is_descending(struct trace_array *tr, const char *str) { if (!str) return 0; @@ -4754,11 +4760,14 @@ static int is_descending(const char *str) if (strcmp(str, "ascending") == 0) return 0; + hist_err(tr, HIST_ERR_INVALID_SORT_MODIFIER, errpos((char *)str)); + return -EINVAL; } static int create_sort_keys(struct hist_trigger_data *hist_data) { + struct trace_array *tr = hist_data->event_file->tr; char *fields_str = hist_data->attrs->sort_key_str; struct tracing_map_sort_key *sort_key; int descending, ret = 0; @@ -4782,10 +4791,12 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (!*field_str) { ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_SORT_FIELD, errpos("sort=")); break; } if ((i == TRACING_MAP_SORT_KEYS_MAX - 1) && fields_str) { + hist_err(tr, HIST_ERR_TOO_MANY_SORT_FIELDS, errpos("sort=")); ret = -EINVAL; break; } @@ -4793,11 +4804,12 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) field_name = strsep(&field_str, "."); if (!field_name || !*field_name) { ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_SORT_FIELD, errpos("sort=")); break; } if (strcmp(field_name, "hitcount") == 0) { - descending = is_descending(field_str); + descending = is_descending(tr, field_str); if (descending < 0) { ret = descending; break; @@ -4819,7 +4831,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (strcmp(field_name, test_name) == 0) { sort_key->field_idx = idx; - descending = is_descending(field_str); + descending = is_descending(tr, field_str); if (descending < 0) { ret = descending; goto out; @@ -4830,6 +4842,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } if (j == hist_data->n_fields) { ret = -EINVAL; + hist_err(tr, HIST_ERR_INVALID_SORT_FIELD, errpos(field_name)); break; } } From d0a497066f92eee6e5750af6a0ca32866030931a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 28 Jun 2019 12:40:22 -0500 Subject: [PATCH 350/658] tracing: Add 'hist:' to hist trigger error log error string The 'hist:' prefix gets stripped from the command text during command processing, but should be added back when displaying the command during error processing. Not only because it's what should be displayed but also because not having it means the test cases fail because the caret is miscalculated by the length of the prefix string. Link: http://lkml.kernel.org/r/449df721f560042e22382f67574bcc5b4d830d3d.1561743018.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 23458ba9e5f5..c322826e0726 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -612,7 +612,8 @@ static void last_cmd_set(struct trace_event_file *file, char *str) if (!str) return; - strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); + strcpy(last_cmd, "hist:"); + strncat(last_cmd, str, MAX_FILTER_STR_VAL - 1 - sizeof("hist:")); if (file) { call = file->event_call; From 34f71a4a2de84dde52ccfcb96ce25240ea7981a8 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 28 Jun 2019 12:40:23 -0500 Subject: [PATCH 351/658] tracing: Add new testcases for hist trigger parsing errors Add a testcase ensuring that the tracing error_log correctly displays hist trigger parsing errors. Link: http://lkml.kernel.org/r/62ec58d9aca661cde46ba678e32a938427945e9e.1561743018.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../trigger/trigger-hist-syntax-errors.tc | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc new file mode 100644 index 000000000000..d44087a2f3d1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test histogram parser errors + +if [ ! -f set_event -o ! -d events/kmem ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/kmem/kmalloc/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +if [ ! -f events/kmem/kmalloc/hist ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +[ -f error_log ] || exit_unsupported + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'hist:kmem:kmalloc' "$1" 'events/kmem/kmalloc/trigger' +} + +check_error 'hist:keys=common_pid:vals=bytes_req:sort=common_pid,^junk' # INVALID_SORT_FIELD +check_error 'hist:keys=common_pid:vals=bytes_req:^sort=' # EMPTY_ASSIGNMENT +check_error 'hist:keys=common_pid:vals=bytes_req:^sort=common_pid,' # EMPTY_SORT_FIELD +check_error 'hist:keys=common_pid:vals=bytes_req:sort=common_pid.^junk' # INVALID_SORT_MODIFIER +check_error 'hist:keys=common_pid:vals=bytes_req,bytes_alloc:^sort=common_pid,bytes_req,bytes_alloc' # TOO_MANY_SORT_FIELDS + +exit 0 From 76a598ec8c4fde58aab79b9f7c40c33d54eca67b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Jan 2020 18:36:35 +0900 Subject: [PATCH 352/658] tracing/boot: Include required headers and sort it alphabetically Include some required (but currently indirectly included) headers and sort it alphabetically. Link: http://lkml.kernel.org/r/158029059514.12381.6597832266860248781.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 2f616cd926b0..5aad41961f03 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -6,9 +6,16 @@ #define pr_fmt(fmt) "trace_boot: " fmt +#include +#include #include #include -#include +#include +#include +#include +#include +#include +#include #include "trace.h" From 5c3469cb899abe998299aafb8f16f325d62d2d68 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Jan 2020 18:36:44 +0900 Subject: [PATCH 353/658] tracing/boot: Move external function declarations to kernel/trace/trace.h Move external function declarations into kernel/trace/trace.h from trace_boot.c for tracing subsystem internal use. Link: http://lkml.kernel.org/r/158029060405.12381.11944554430359702545.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 17 +++++++++++++++++ kernel/trace/trace_boot.c | 15 --------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6bb64d89c321..b3075b637d14 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1157,6 +1157,11 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); + +extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); #else struct ftrace_func_command; @@ -1905,6 +1910,15 @@ void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); +/* Used from boot time tracer */ +extern int trace_set_options(struct trace_array *tr, char *option); +extern int tracing_set_tracer(struct trace_array *tr, const char *buf); +extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id); +extern int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new); + + #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); @@ -1964,6 +1978,9 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); +/* Used from boot time tracer */ +extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); +extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 5aad41961f03..4d37bf5c3742 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -21,13 +21,6 @@ #define MAX_BUF_LEN 256 -extern int trace_set_options(struct trace_array *tr, char *option); -extern int tracing_set_tracer(struct trace_array *tr, const char *buf); -extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, - unsigned long size, int cpu_id); -extern int tracing_set_cpumask(struct trace_array *tr, - cpumask_var_t tracing_cpumask_new); - static void __init trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) { @@ -76,9 +69,6 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) } #ifdef CONFIG_EVENT_TRACING -extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); -extern int trigger_process_regex(struct trace_event_file *file, char *buff); - static void __init trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) { @@ -252,11 +242,6 @@ trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) #endif #ifdef CONFIG_DYNAMIC_FTRACE -extern bool ftrace_filter_param __initdata; -extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, - int len, int reset); -extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, - int len, int reset); static void __init trace_boot_set_ftrace_filter(struct trace_array *tr, struct xbc_node *node) { From 67e69e1d46becd934a03489c38fc42bb2510c1c6 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Tue, 28 Jan 2020 16:00:22 +0530 Subject: [PATCH 354/658] ASoC: amd: Fix simultaneous playback and capture Stopping of one stream is killing the other stream when they are running simultaneously. This is because, IER register is cleared which disables I2S and overrides any other block enables. Clearing IER register only when all streams on a channel are disabled, fixes the issue. Signed-off-by: Akshu Agrawal Link: https://lore.kernel.org/r/20200128103029.128841-1-akshu.agrawal@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-i2s.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-i2s.c b/sound/soc/amd/raven/acp3x-i2s.c index bf51cadf8682..31cd4008e33f 100644 --- a/sound/soc/amd/raven/acp3x-i2s.c +++ b/sound/soc/amd/raven/acp3x-i2s.c @@ -234,30 +234,32 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, switch (rtd->i2s_instance) { case I2S_BT_INSTANCE: reg_val = mmACP_BTTDM_ITER; - ier_val = mmACP_BTTDM_IER; break; case I2S_SP_INSTANCE: default: reg_val = mmACP_I2STDM_ITER; - ier_val = mmACP_I2STDM_IER; } } else { switch (rtd->i2s_instance) { case I2S_BT_INSTANCE: reg_val = mmACP_BTTDM_IRER; - ier_val = mmACP_BTTDM_IER; break; case I2S_SP_INSTANCE: default: reg_val = mmACP_I2STDM_IRER; - ier_val = mmACP_I2STDM_IER; } } val = rv_readl(rtd->acp3x_base + reg_val); val = val & ~BIT(0); rv_writel(val, rtd->acp3x_base + reg_val); - rv_writel(0, rtd->acp3x_base + ier_val); + + if (!(rv_readl(rtd->acp3x_base + mmACP_BTTDM_ITER) & BIT(0)) && + !(rv_readl(rtd->acp3x_base + mmACP_BTTDM_IRER) & BIT(0))) + rv_writel(0, rtd->acp3x_base + mmACP_BTTDM_IER); + if (!(rv_readl(rtd->acp3x_base + mmACP_I2STDM_ITER) & BIT(0)) && + !(rv_readl(rtd->acp3x_base + mmACP_I2STDM_IRER) & BIT(0))) + rv_writel(0, rtd->acp3x_base + mmACP_I2STDM_IER); ret = 0; break; default: From faa37a9318899752d85761d47e79a42f3a0aa1b9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Jan 2020 20:28:28 +0100 Subject: [PATCH 355/658] ASoC: rt1308-sdw: Add __maybe_unused to PM callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the compile warnings by adding __maybe_unused to PM callbacks: sound/soc/codecs/rt1308-sdw.c:690:12: warning: ‘rt1308_dev_resume’ defined but not used [-Wunused-function] sound/soc/codecs/rt1308-sdw.c:676:12: warning: ‘rt1308_dev_suspend’ defined but not used [-Wunused-function] Fixes: a87a6653a28c ("ASoC: rt1308-sdw: add rt1308 SdW amplifier driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200127192831.14057-2-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/codecs/rt1308-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c index 8a03dbfe7906..d930f60cb797 100644 --- a/sound/soc/codecs/rt1308-sdw.c +++ b/sound/soc/codecs/rt1308-sdw.c @@ -673,7 +673,7 @@ static const struct sdw_device_id rt1308_id[] = { }; MODULE_DEVICE_TABLE(sdw, rt1308_id); -static int rt1308_dev_suspend(struct device *dev) +static int __maybe_unused rt1308_dev_suspend(struct device *dev) { struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(dev); @@ -687,7 +687,7 @@ static int rt1308_dev_suspend(struct device *dev) #define RT1308_PROBE_TIMEOUT 2000 -static int rt1308_dev_resume(struct device *dev) +static int __maybe_unused rt1308_dev_resume(struct device *dev) { struct sdw_slave *slave = dev_to_sdw_dev(dev); struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(dev); From 809a9b63323278e94f73d8b1f85b64e347f639ba Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Jan 2020 20:28:29 +0100 Subject: [PATCH 356/658] ASoC: rt700: Add __maybe_unused to PM callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the compile warnings by adding __maybe_unused to PM callbacks: sound/soc/codecs/rt700-sdw.c:503:12: warning: ‘rt700_dev_resume’ defined but not used [-Wunused-function] sound/soc/codecs/rt700-sdw.c:489:12: warning: ‘rt700_dev_suspend’ defined but not used [-Wunused-function] Fixes: 7d2a5f9ae41e ("ASoC: rt700: add rt700 codec driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200127192831.14057-3-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/codecs/rt700-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt700-sdw.c b/sound/soc/codecs/rt700-sdw.c index a4b95425886f..d4e0f953bcce 100644 --- a/sound/soc/codecs/rt700-sdw.c +++ b/sound/soc/codecs/rt700-sdw.c @@ -486,7 +486,7 @@ static const struct sdw_device_id rt700_id[] = { }; MODULE_DEVICE_TABLE(sdw, rt700_id); -static int rt700_dev_suspend(struct device *dev) +static int __maybe_unused rt700_dev_suspend(struct device *dev) { struct rt700_priv *rt700 = dev_get_drvdata(dev); @@ -500,7 +500,7 @@ static int rt700_dev_suspend(struct device *dev) #define RT700_PROBE_TIMEOUT 2000 -static int rt700_dev_resume(struct device *dev) +static int __maybe_unused rt700_dev_resume(struct device *dev) { struct sdw_slave *slave = dev_to_sdw_dev(dev); struct rt700_priv *rt700 = dev_get_drvdata(dev); From 973649d38e2664eb45e1ae9adfdce53626c29f94 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Jan 2020 20:28:30 +0100 Subject: [PATCH 357/658] ASoC: rt711: Add __maybe_unused to PM callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the compile warnings by adding __maybe_unused to PM callbacks: sound/soc/codecs/rt711-sdw.c:504:12: warning: ‘rt711_dev_resume’ defined but not used [-Wunused-function] sound/soc/codecs/rt711-sdw.c:490:12: warning: ‘rt711_dev_suspend’ defined but not used [-Wunused-function] Fixes: 320b8b0d13b8 ("ASoC: rt711: add rt711 codec driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200127192831.14057-4-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/codecs/rt711-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 85e62e1059cd..fc3a3fa3d51b 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -487,7 +487,7 @@ static const struct sdw_device_id rt711_id[] = { }; MODULE_DEVICE_TABLE(sdw, rt711_id); -static int rt711_dev_suspend(struct device *dev) +static int __maybe_unused rt711_dev_suspend(struct device *dev) { struct rt711_priv *rt711 = dev_get_drvdata(dev); @@ -501,7 +501,7 @@ static int rt711_dev_suspend(struct device *dev) #define RT711_PROBE_TIMEOUT 2000 -static int rt711_dev_resume(struct device *dev) +static int __maybe_unused rt711_dev_resume(struct device *dev) { struct sdw_slave *slave = dev_to_sdw_dev(dev); struct rt711_priv *rt711 = dev_get_drvdata(dev); From 4099c3295e480907e37b93d51fc17fda81dd5bed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Jan 2020 20:28:31 +0100 Subject: [PATCH 358/658] ASoC: rt715: Add __maybe_unused to PM callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the compile warnings by adding __maybe_unused to PM callbacks: sound/soc/codecs/rt715-sdw.c:566:12: warning: ‘rt715_dev_resume’ defined but not used [-Wunused-function] sound/soc/codecs/rt715-sdw.c:552:12: warning: ‘rt715_dev_suspend’ defined but not used [-Wunused-function] Fixes: d1ede0641b05 ("ASoC: rt715: add RT715 codec driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200127192831.14057-5-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/codecs/rt715-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index 6d892c44c522..64ef56ef0318 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -549,7 +549,7 @@ static const struct sdw_device_id rt715_id[] = { }; MODULE_DEVICE_TABLE(sdw, rt715_id); -static int rt715_dev_suspend(struct device *dev) +static int __maybe_unused rt715_dev_suspend(struct device *dev) { struct rt715_priv *rt715 = dev_get_drvdata(dev); @@ -563,7 +563,7 @@ static int rt715_dev_suspend(struct device *dev) #define RT715_PROBE_TIMEOUT 2000 -static int rt715_dev_resume(struct device *dev) +static int __maybe_unused rt715_dev_resume(struct device *dev) { struct sdw_slave *slave = dev_to_sdw_dev(dev); struct rt715_priv *rt715 = dev_get_drvdata(dev); From 46b770f720bdd8a7de1c04a1cab5d4e9e21d6666 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Wed, 29 Jan 2020 10:44:48 -0800 Subject: [PATCH 359/658] ALSA: uapi: Fix sparse warning Fix the following sparse warning generated due to 64-bit compat type having fields defined explicitly with __s32: sound/soc/sof/sof-audio.c:46:31: warning: incorrect type in assignment (different base types) sound/soc/sof/sof-audio.c:46:31: expected restricted snd_pcm_state_t [usertype] state sound/soc/sof/sof-audio.c:46:31: got signed int [usertype] state Fixes: 80fe7430c708 ("ALSA: add new 32-bit layout for snd_pcm_mmap_status/control") Reported-by: Pierre-Louis Bossart Suggested-by: Takashi Iwai Signed-off-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20200129184448.3005-1-ranjani.sridharan@linux.intel.com Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 30ebb2a42983..535a7229e1d9 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -564,13 +564,13 @@ typedef char __pad_after_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)]; #endif struct __snd_pcm_mmap_status64 { - __s32 state; /* RO: state - SNDRV_PCM_STATE_XXXX */ + snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */ __u32 pad1; /* Needed for 64 bit alignment */ __pad_before_uframe __pad1; snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */ __pad_after_uframe __pad2; struct __snd_timespec64 tstamp; /* Timestamp */ - __s32 suspended_state; /* RO: suspended stream state */ + snd_pcm_state_t suspended_state;/* RO: suspended stream state */ __u32 pad3; /* Needed for 64 bit alignment */ struct __snd_timespec64 audio_tstamp; /* sample counter or wall clock */ }; From 66f2d19f8116e16898f8d82e28573a384ddc430d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 29 Jan 2020 20:59:07 +0100 Subject: [PATCH 360/658] ALSA: pcm: Fix memory leak at closing a stream without hw_free ALSA PCM core recently introduced a new managed PCM buffer allocation mode that does allocate / free automatically at hw_params and hw_free. However, it overlooked the code path directly calling hw_free PCM ops at releasing the PCM substream, and it may result in a memory leak as spotted by syzkaller when no buffer preallocation is used (e.g. vmalloc buffer). This patch papers over it with a slight refactoring. The hw_free ops call and relevant tasks are unified in a new helper function, and call it from both places. Fixes: 0dba808eae26 ("ALSA: pcm: Introduce managed buffer allocation mode") Reported-by: syzbot+30edd0f34bfcdc548ac4@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20200129195907.12197-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index bb23f5066654..4ac42ee1238c 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -786,10 +786,22 @@ end: return err; } +static int do_hw_free(struct snd_pcm_substream *substream) +{ + int result = 0; + + snd_pcm_sync_stop(substream); + if (substream->ops->hw_free) + result = substream->ops->hw_free(substream); + if (substream->managed_buffer_alloc) + snd_pcm_lib_free_pages(substream); + return result; +} + static int snd_pcm_hw_free(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; - int result = 0; + int result; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; @@ -806,11 +818,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) snd_pcm_stream_unlock_irq(substream); if (atomic_read(&substream->mmap_count)) return -EBADFD; - snd_pcm_sync_stop(substream); - if (substream->ops->hw_free) - result = substream->ops->hw_free(substream); - if (substream->managed_buffer_alloc) - snd_pcm_lib_free_pages(substream); + result = do_hw_free(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); pm_qos_remove_request(&substream->latency_pm_qos_req); return result; @@ -2529,9 +2537,7 @@ void snd_pcm_release_substream(struct snd_pcm_substream *substream) snd_pcm_drop(substream); if (substream->hw_opened) { - if (substream->ops->hw_free && - substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) - substream->ops->hw_free(substream); + do_hw_free(substream); substream->ops->close(substream); substream->hw_opened = 0; } From eca95cd5a36d1fb547e84b8fd86fa2b7ba7f4b91 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 25 Jan 2020 20:20:20 +0000 Subject: [PATCH 361/658] i2c: parport: fix spelling mistake: "Atmost" -> "At most" There is a spelling mistake in a module parameter description. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-parport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c index f8f94a25af26..81eb441b2387 100644 --- a/drivers/i2c/busses/i2c-parport.c +++ b/drivers/i2c/busses/i2c-parport.c @@ -129,7 +129,7 @@ static int parport[MAX_DEVICE] = {0, -1, -1, -1}; module_param_array(parport, int, NULL, 0); MODULE_PARM_DESC(parport, "List of parallel ports to bind to, by index.\n" - " Atmost " __stringify(MAX_DEVICE) " devices are supported.\n" + " At most " __stringify(MAX_DEVICE) " devices are supported.\n" " Default is one device connected to parport0.\n" ); From 878508aed4776a5b08dcc000d33a01a42e3fd07d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Jan 2020 11:13:36 +0000 Subject: [PATCH 362/658] i2c: xiic: fix indentation issue There is a statement that is indented one level too deeply, remove the extraneous tab. Fixes: b4c119dbc300 ("i2c: xiic: Add timeout to the rx fifo wait loop") Signed-off-by: Colin Ian King Reviewed-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index b17d30c9ab40..90c1c362394d 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -261,7 +261,7 @@ static int xiic_clear_rx_fifo(struct xiic_i2c *i2c) xiic_getreg8(i2c, XIIC_DRR_REG_OFFSET); if (time_after(jiffies, timeout)) { dev_err(i2c->dev, "Failed to clear rx fifo\n"); - return -ETIMEDOUT; + return -ETIMEDOUT; } } From 21593938086fa2886fe07d06aaeb674254a02e3f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:26 +0100 Subject: [PATCH 363/658] docs: i2c: sort index logically The index page currently lists sections in alphabetical file order without caring about their content. Sort sections based on their content logically, according to the following structure: * Intro to I2C/SMBus and their usage in Linux: summary, i2c-protocol, smbus-protocol, instantiating-devices, busses/index, i2c-topology, muxes/i2c-mux-gpio * Implementing drivers: writing-clients, dev-interface, dma-considerations, fault-codes, functionality * Debugging: gpio-fault-injection, i2c-stub * Slave I2C: slave-interface, slave-eeprom-backend * Advanced: ten-bit-addresses * Obsolete info: upgrading-clients, old-module-parameters Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/index.rst | 60 +++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index a0fbaf6d0675..fee4744475df 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -4,30 +4,66 @@ I2C/SMBus Subsystem =================== +Introduction +============ + .. toctree:: :maxdepth: 1 + summary + i2c-protocol + smbus-protocol + instantiating-devices + busses/index + i2c-topology + muxes/i2c-mux-gpio + +Writing device drivers +====================== + +.. toctree:: + :maxdepth: 1 + + writing-clients dev-interface dma-considerations fault-codes functionality + +Debugging +========= + +.. toctree:: + :maxdepth: 1 + gpio-fault-injection - i2c-protocol i2c-stub - i2c-topology - instantiating-devices - old-module-parameters - slave-eeprom-backend + +Slave I2C +========= + +.. toctree:: + :maxdepth: 1 + slave-interface - smbus-protocol - summary + slave-eeprom-backend + +Advanced topics +=============== + +.. toctree:: + :maxdepth: 1 + ten-bit-addresses + +Legacy documentation +==================== + +.. toctree:: + :maxdepth: 1 + upgrading-clients - writing-clients - - muxes/i2c-mux-gpio - - busses/index + old-module-parameters .. only:: subproject and html From 096c22f88ea09dab449f039863a46cb504b9b3ec Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:27 +0100 Subject: [PATCH 364/658] docs: i2c: summary: extend introduction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - state the "official" name (I²C, not I2C, according to the spec) at the beginning but keep using the more practical I2C elsewhere - mention some known different names - add link to the specification document Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 3a24eac17375..dbab737d5075 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -2,12 +2,18 @@ I2C and SMBus ============= -I2C (pronounce: I squared C) is a protocol developed by Philips. It is a -slow two-wire protocol (variable speed, up to 400 kHz), with a high speed -extension (3.4 MHz). It provides an inexpensive bus for connecting many -types of devices with infrequent or low bandwidth communications needs. -I2C is widely used with embedded systems. Some systems use variants that -don't meet branding requirements, and so are not advertised as being I2C. +I²C (pronounce: I squared C and written I2C in the kernel documentation) is +a protocol developed by Philips. It is a slow two-wire protocol (variable +speed, up to 400 kHz), with a high speed extension (3.4 MHz). It provides +an inexpensive bus for connecting many types of devices with infrequent or +low bandwidth communications needs. I2C is widely used with embedded +systems. Some systems use variants that don't meet branding requirements, +and so are not advertised as being I2C but come under different names, +e.g. TWI (Two Wire Interface), IIC. + +The official I2C specification is the `"I2C-bus specification and user +manual" (UM10204) `_ +published by NXP Semiconductors. SMBus (System Management Bus) is based on the I2C protocol, and is mostly a subset of I2C protocols and signaling. Many I2C devices will work on an From 020bc5b9296af6d3e0b255cff3a2674191287624 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:28 +0100 Subject: [PATCH 365/658] docs: i2c: summary: rewrite the "terminology" section This section, partly dating back to the pre-git era, is somewhat unclear and partly incorrect. Rewrite it almost completely including a reference figure, concise but precise definition of each term and the paths where drivers are found. Particular care has been put in clarifying the relation between adapter and algorithm, which has no correspondence in the I2C spec terminology. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c.svg | 1341 +++++++++++++++++++++++++++++++++ Documentation/i2c/summary.rst | 36 +- 2 files changed, 1363 insertions(+), 14 deletions(-) create mode 100644 Documentation/i2c/i2c.svg diff --git a/Documentation/i2c/i2c.svg b/Documentation/i2c/i2c.svg new file mode 100644 index 000000000000..5979405ad1c3 --- /dev/null +++ b/Documentation/i2c/i2c.svg @@ -0,0 +1,1341 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + Luca Ceresoli + + + 2020 + + + + + + + + + + + + + + + I2CMaster + + + I2CSlave + + SCL + SDA + + I2CSlave + + I2CSlave + + + + + + + + + + + + VDD + + diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index dbab737d5075..09f73a608e25 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -31,21 +31,29 @@ implement all the common SMBus protocol semantics or messages. Terminology =========== -When we talk about I2C, we use the following terms:: +Using the terminology from the official documentation, the I2C bus connects +one or more *master* chips and one or more *slave* chips. - Bus -> Algorithm - Adapter - Device -> Driver - Client +.. kernel-figure:: i2c.svg + :alt: Simple I2C bus with one master and 3 slaves -An Algorithm driver contains general code that can be used for a whole class -of I2C adapters. Each specific adapter driver either depends on one algorithm -driver, or includes its own implementation. + Simple I2C bus -A Driver driver (yes, this sounds ridiculous, sorry) contains the general -code to access some type of device. Each detected device gets its own -data in the Client structure. Usually, Driver and Client are more closely -integrated than Algorithm and Adapter. +A **master** chip is a node that starts communications with slaves. In the +Linux kernel implementation it is called an **adapter** or bus. Adapter +drivers are in the ``drivers/i2c/busses/`` subdirectory. -For a given configuration, you will need a driver for your I2C bus, and -drivers for your I2C devices (usually one driver for each device). +An **algorithm** contains general code that can be used to implement a +whole class of I2C adapters. Each specific adapter driver either depends on +an algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes +its own implementation. + +A **slave** chip is a node that responds to communications when addressed +by the master. In Linux it is called a **client**. Client drivers are kept +in a directory specific to the feature they provide, for example +``drivers/media/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for +video-related chips. + +For the example configuration in figure, you will need a driver for your +I2C adapter, and drivers for your I2C devices (usually one driver for each +device). From 2f07c05f14a8bf5ee815e159a451d580d9113db9 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:29 +0100 Subject: [PATCH 366/658] docs: i2c: call it "I2C" consistently Uppercase "I2C" is used almost everywhere in the docs, but the lowercase version "i2c" is used somewhere. Use the uppercase form consistently. Signed-off-by: Luca Ceresoli Acked-by: Peter Rosin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface.rst | 18 +++--- Documentation/i2c/dma-considerations.rst | 2 +- Documentation/i2c/i2c-protocol.rst | 4 +- Documentation/i2c/i2c-topology.rst | 66 ++++++++++----------- Documentation/i2c/instantiating-devices.rst | 2 +- Documentation/i2c/old-module-parameters.rst | 6 +- Documentation/i2c/slave-interface.rst | 4 +- Documentation/i2c/writing-clients.rst | 4 +- 8 files changed, 53 insertions(+), 53 deletions(-) diff --git a/Documentation/i2c/dev-interface.rst b/Documentation/i2c/dev-interface.rst index 69c23a3c2b1b..c3717a87df12 100644 --- a/Documentation/i2c/dev-interface.rst +++ b/Documentation/i2c/dev-interface.rst @@ -2,26 +2,26 @@ I2C Device Interface ==================== -Usually, i2c devices are controlled by a kernel driver. But it is also +Usually, I2C devices are controlled by a kernel driver. But it is also possible to access all devices on an adapter from userspace, through the /dev interface. You need to load module i2c-dev for this. -Each registered i2c adapter gets a number, counting from 0. You can +Each registered I2C adapter gets a number, counting from 0. You can examine /sys/class/i2c-dev/ to see what number corresponds to which adapter. Alternatively, you can run "i2cdetect -l" to obtain a formatted list of all -i2c adapters present on your system at a given time. i2cdetect is part of +I2C adapters present on your system at a given time. i2cdetect is part of the i2c-tools package. I2C device files are character device files with major device number 89 and a minor device number corresponding to the number assigned as explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ..., -i2c-10, ...). All 256 minor device numbers are reserved for i2c. +i2c-10, ...). All 256 minor device numbers are reserved for I2C. C example ========= -So let's say you want to access an i2c adapter from a C program. +So let's say you want to access an I2C adapter from a C program. First, you need to include these two headers:: #include @@ -66,7 +66,7 @@ the device supports them. Both are illustrated below:: /* Using SMBus commands */ res = i2c_smbus_read_word_data(file, reg); if (res < 0) { - /* ERROR HANDLING: i2c transaction failed */ + /* ERROR HANDLING: I2C transaction failed */ } else { /* res contains the read word */ } @@ -79,12 +79,12 @@ the device supports them. Both are illustrated below:: buf[1] = 0x43; buf[2] = 0x65; if (write(file, buf, 3) != 3) { - /* ERROR HANDLING: i2c transaction failed */ + /* ERROR HANDLING: I2C transaction failed */ } /* Using I2C Read, equivalent of i2c_smbus_read_byte(file) */ if (read(file, buf, 1) != 1) { - /* ERROR HANDLING: i2c transaction failed */ + /* ERROR HANDLING: I2C transaction failed */ } else { /* buf[0] contains the read byte */ } @@ -144,7 +144,7 @@ The following IOCTLs are defined: If possible, use the provided ``i2c_smbus_*`` methods described below instead of issuing direct ioctls. -You can do plain i2c transactions by using read(2) and write(2) calls. +You can do plain I2C transactions by using read(2) and write(2) calls. You do not need to pass the address byte; instead, set it through ioctl I2C_SLAVE before you try to access the device. diff --git a/Documentation/i2c/dma-considerations.rst b/Documentation/i2c/dma-considerations.rst index 203002054120..142d52ce9ebb 100644 --- a/Documentation/i2c/dma-considerations.rst +++ b/Documentation/i2c/dma-considerations.rst @@ -2,7 +2,7 @@ Linux I2C and DMA ================= -Given that i2c is a low-speed bus, over which the majority of messages +Given that I2C is a low-speed bus, over which the majority of messages transferred are small, it is not considered a prime user of DMA access. At this time of writing, only 10% of I2C bus master drivers have DMA support implemented. And the vast majority of transactions are so small that setting up diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index 2f8fcf671b2e..0222c97f3436 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -2,7 +2,7 @@ I2C Protocol ============ -This document describes the i2c protocol. Or will, when it is finished :-) +This document describes the I2C protocol. Or will, when it is finished :-) Key to symbols ============== @@ -57,7 +57,7 @@ Modified transactions ===================== The following modifications to the I2C protocol can also be generated by -setting these flags for i2c messages. With the exception of I2C_M_NOSTART, they +setting these flags for I2C messages. With the exception of I2C_M_NOSTART, they are usually only needed to work around device issues: I2C_M_IGNORE_NAK: diff --git a/Documentation/i2c/i2c-topology.rst b/Documentation/i2c/i2c-topology.rst index 0c1ae95f6a97..0875090d6aeb 100644 --- a/Documentation/i2c/i2c-topology.rst +++ b/Documentation/i2c/i2c-topology.rst @@ -2,8 +2,8 @@ I2C topology ============ -There are a couple of reasons for building more complex i2c topologies -than a straight-forward i2c bus with one adapter and one or more devices. +There are a couple of reasons for building more complex I2C topologies +than a straight-forward I2C bus with one adapter and one or more devices. 1. A mux may be needed on the bus to prevent address collisions. @@ -11,20 +11,20 @@ than a straight-forward i2c bus with one adapter and one or more devices. may be needed to determine if it is ok to access the bus. 3. A device (particularly RF tuners) may want to avoid the digital noise - from the i2c bus, at least most of the time, and sits behind a gate + from the I2C bus, at least most of the time, and sits behind a gate that has to be operated before the device can be accessed. Etc === -These constructs are represented as i2c adapter trees by Linux, where +These constructs are represented as I2C adapter trees by Linux, where each adapter has a parent adapter (except the root adapter) and zero or more child adapters. The root adapter is the actual adapter that issues -i2c transfers, and all adapters with a parent are part of an "i2c-mux" +I2C transfers, and all adapters with a parent are part of an "i2c-mux" object (quoted, since it can also be an arbitrator or a gate). Depending of the particular mux driver, something happens when there is -an i2c transfer on one of its child adapters. The mux driver can +an I2C transfer on one of its child adapters. The mux driver can obviously operate a mux, but it can also do arbitration with an external bus master or open a gate. The mux driver has two operations for this, select and deselect. select is called before the transfer and (the @@ -34,7 +34,7 @@ optional) deselect is called after the transfer. Locking ======= -There are two variants of locking available to i2c muxes, they can be +There are two variants of locking available to I2C muxes, they can be mux-locked or parent-locked muxes. As is evident from below, it can be useful to know if a mux is mux-locked or if it is parent-locked. The following list was correct at the time of writing: @@ -45,7 +45,7 @@ In drivers/i2c/muxes/: i2c-arb-gpio-challenge Parent-locked i2c-mux-gpio Normally parent-locked, mux-locked iff all involved gpio pins are controlled by the - same i2c root adapter that they mux. + same I2C root adapter that they mux. i2c-mux-gpmux Normally parent-locked, mux-locked iff specified in device-tree. i2c-mux-ltc4306 Mux-locked @@ -54,7 +54,7 @@ i2c-mux-pca9541 Parent-locked i2c-mux-pca954x Parent-locked i2c-mux-pinctrl Normally parent-locked, mux-locked iff all involved pinctrl devices are controlled - by the same i2c root adapter that they mux. + by the same I2C root adapter that they mux. i2c-mux-reg Parent-locked ====================== ============================================= @@ -83,9 +83,9 @@ Mux-locked muxes Mux-locked muxes does not lock the entire parent adapter during the full select-transfer-deselect transaction, only the muxes on the parent adapter are locked. Mux-locked muxes are mostly interesting if the -select and/or deselect operations must use i2c transfers to complete +select and/or deselect operations must use I2C transfers to complete their tasks. Since the parent adapter is not fully locked during the -full transaction, unrelated i2c transfers may interleave the different +full transaction, unrelated I2C transfers may interleave the different stages of the transaction. This has the benefit that the mux driver may be easier and cleaner to implement, but it has some caveats. @@ -109,14 +109,14 @@ ML2. It is not safe to build arbitrary topologies with two (or more) ML3. A mux-locked mux cannot be used by a driver for auto-closing gates/muxes, i.e. something that closes automatically after a given - number (one, in most cases) of i2c transfers. Unrelated i2c transfers + number (one, in most cases) of I2C transfers. Unrelated I2C transfers may creep in and close prematurely. -ML4. If any non-i2c operation in the mux driver changes the i2c mux state, +ML4. If any non-I2C operation in the mux driver changes the I2C mux state, the driver has to lock the root adapter during that operation. Otherwise garbage may appear on the bus as seen from devices - behind the mux, when an unrelated i2c transfer is in flight during - the non-i2c mux-changing operation. + behind the mux, when an unrelated I2C transfer is in flight during + the non-I2C mux-changing operation. ==== ===================================================================== @@ -137,14 +137,14 @@ Mux-locked Example When there is an access to D1, this happens: - 1. Someone issues an i2c-transfer to D1. + 1. Someone issues an I2C-transfer to D1. 2. M1 locks muxes on its parent (the root adapter in this case). 3. M1 calls ->select to ready the mux. - 4. M1 (presumably) does some i2c-transfers as part of its select. - These transfers are normal i2c-transfers that locks the parent + 4. M1 (presumably) does some I2C-transfers as part of its select. + These transfers are normal I2C-transfers that locks the parent adapter. - 5. M1 feeds the i2c-transfer from step 1 to its parent adapter as a - normal i2c-transfer that locks the parent adapter. + 5. M1 feeds the I2C-transfer from step 1 to its parent adapter as a + normal I2C-transfer that locks the parent adapter. 6. M1 calls ->deselect, if it has one. 7. Same rules as in step 4, but for ->deselect. 8. M1 unlocks muxes on its parent. @@ -159,8 +159,8 @@ Parent-locked muxes Parent-locked muxes lock the parent adapter during the full select- transfer-deselect transaction. The implication is that the mux driver -has to ensure that any and all i2c transfers through that parent -adapter during the transaction are unlocked i2c transfers (using e.g. +has to ensure that any and all I2C transfers through that parent +adapter during the transaction are unlocked I2C transfers (using e.g. __i2c_transfer), or a deadlock will follow. There are a couple of caveats. @@ -169,12 +169,12 @@ PL1. If you build a topology with a parent-locked mux being the child of another mux, this might break a possible assumption from the child mux that the root adapter is unused between its select op and the actual transfer (e.g. if the child mux is auto-closing - and the parent mux issus i2c-transfers as part of its select). + and the parent mux issus I2C-transfers as part of its select). This is especially the case if the parent mux is mux-locked, but it may also happen if the parent mux is parent-locked. PL2. If select/deselect calls out to other subsystems such as gpio, - pinctrl, regmap or iio, it is essential that any i2c transfers + pinctrl, regmap or iio, it is essential that any I2C transfers caused by these subsystems are unlocked. This can be convoluted to accomplish, maybe even impossible if an acceptably clean solution is sought. @@ -197,15 +197,15 @@ Parent-locked Example When there is an access to D1, this happens: - 1. Someone issues an i2c-transfer to D1. + 1. Someone issues an I2C-transfer to D1. 2. M1 locks muxes on its parent (the root adapter in this case). 3. M1 locks its parent adapter. 4. M1 calls ->select to ready the mux. - 5. If M1 does any i2c-transfers (on this root adapter) as part of - its select, those transfers must be unlocked i2c-transfers so + 5. If M1 does any I2C-transfers (on this root adapter) as part of + its select, those transfers must be unlocked I2C-transfers so that they do not deadlock the root adapter. - 6. M1 feeds the i2c-transfer from step 1 to the root adapter as an - unlocked i2c-transfer, so that it does not deadlock the parent + 6. M1 feeds the I2C-transfer from step 1 to the root adapter as an + unlocked I2C-transfer, so that it does not deadlock the parent adapter. 7. M1 calls ->deselect, if it has one. 8. Same rules as in step 5, but for ->deselect. @@ -240,7 +240,7 @@ and specifically when M2 requests its parent to lock, M1 passes the buck to the root adapter). This topology is bad if M2 is an auto-closing mux and M1->select -issues any unlocked i2c transfers on the root adapter that may leak +issues any unlocked I2C transfers on the root adapter that may leak through and be seen by the M2 adapter, thus closing M2 prematurely. @@ -286,14 +286,14 @@ point. This kind of topology is generally not suitable and should probably be avoided. The reason is that M2 probably assumes that there will -be no i2c transfers during its calls to ->select and ->deselect, and +be no I2C transfers during its calls to ->select and ->deselect, and if there are, any such transfers might appear on the slave side of M2 -as partial i2c transfers, i.e. garbage or worse. This might cause +as partial I2C transfers, i.e. garbage or worse. This might cause device lockups and/or other problems. The topology is especially troublesome if M2 is an auto-closing mux. In that case, any interleaved accesses to D4 might close M2 -prematurely, as might any i2c-transfers part of M1->select. +prematurely, as might any I2C-transfers part of M1->select. But if M2 is not making the above stated assumption, and if M2 is not auto-closing, the topology is fine. diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst index b7b90b1b82f9..53244e2f0378 100644 --- a/Documentation/i2c/instantiating-devices.rst +++ b/Documentation/i2c/instantiating-devices.rst @@ -188,7 +188,7 @@ destroyed automatically when the driver which detected them is removed, or when the underlying I2C bus is itself destroyed, whichever happens first. -Those of you familiar with the i2c subsystem of 2.4 kernels and early 2.6 +Those of you familiar with the I2C subsystem of 2.4 kernels and early 2.6 kernels will find out that this method 3 is essentially similar to what was done there. Two significant differences are: diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst index a1939512ad66..78a6950e5763 100644 --- a/Documentation/i2c/old-module-parameters.rst +++ b/Documentation/i2c/old-module-parameters.rst @@ -2,14 +2,14 @@ I2C device driver binding control from user-space ================================================= -Up to kernel 2.6.32, many i2c drivers used helper macros provided by +Up to kernel 2.6.32, many I2C drivers used helper macros provided by which created standard module parameters to let the user -control how the driver would probe i2c buses and attach to devices. These +control how the driver would probe I2C buses and attach to devices. These parameters were known as "probe" (to let the driver probe for an extra address), "force" (to forcibly attach the driver to a given device) and "ignore" (to prevent a driver from probing a given address). -With the conversion of the i2c subsystem to the standard device driver +With the conversion of the I2C subsystem to the standard device driver binding model, it became clear that these per-module parameters were no longer needed, and that a centralized implementation was possible. The new, sysfs-based interface is described in the documentation file diff --git a/Documentation/i2c/slave-interface.rst b/Documentation/i2c/slave-interface.rst index c769bd6a15bf..82ea3e1d6fe4 100644 --- a/Documentation/i2c/slave-interface.rst +++ b/Documentation/i2c/slave-interface.rst @@ -59,7 +59,7 @@ The bus driver sends an event to the backend using the following function:: ret = i2c_slave_event(client, event, &val) -'client' describes the i2c slave device. 'event' is one of the special event +'client' describes the I2C slave device. 'event' is one of the special event types described hereafter. 'val' holds an u8 value for the data byte to be read/written and is thus bidirectional. The pointer to val must always be provided even if val is not used for an event, i.e. don't use NULL here. 'ret' @@ -143,7 +143,7 @@ Bus driver support If you want to add slave support to the bus driver: * implement calls to register/unregister the slave and add those to the - struct i2c_algorithm. When registering, you probably need to set the i2c + struct i2c_algorithm. When registering, you probably need to set the I2C slave address and enable slave specific interrupts. If you use runtime pm, you should use pm_runtime_get_sync() because your device usually needs to be powered on always to be able to detect its slave address. When unregistering, diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst index 0336909ca01b..44d97b2c9a82 100644 --- a/Documentation/i2c/writing-clients.rst +++ b/Documentation/i2c/writing-clients.rst @@ -95,7 +95,7 @@ to gather information from the client, or write new information to the client. I have found it useful to define foo_read and foo_write functions for this. -For some cases, it will be easier to call the i2c functions directly, +For some cases, it will be easier to call the I2C functions directly, but many chips have some kind of register-value idea that can easily be encapsulated. @@ -344,7 +344,7 @@ Plain I2C communication int i2c_master_recv(struct i2c_client *client, char *buf, int count); These routines read and write some bytes from/to a client. The client -contains the i2c address, so you do not have to include it. The second +contains the I2C address, so you do not have to include it. The second parameter contains the bytes to read/write, the third the number of bytes to read/write (must be less than the length of the buffer, also should be less than 64k since msg.len is u16.) Returned is the actual number of bytes From 40c573d12ea5d13c740322f6b9cdd93e593ceb1d Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:30 +0100 Subject: [PATCH 367/658] docs: i2c: fix typo Fix "issus" -> "issues". Signed-off-by: Luca Ceresoli Acked-by: Peter Rosin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-topology.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/i2c-topology.rst b/Documentation/i2c/i2c-topology.rst index 0875090d6aeb..b413ef6a6773 100644 --- a/Documentation/i2c/i2c-topology.rst +++ b/Documentation/i2c/i2c-topology.rst @@ -169,7 +169,7 @@ PL1. If you build a topology with a parent-locked mux being the child of another mux, this might break a possible assumption from the child mux that the root adapter is unused between its select op and the actual transfer (e.g. if the child mux is auto-closing - and the parent mux issus I2C-transfers as part of its select). + and the parent mux issues I2C-transfers as part of its select). This is especially the case if the parent mux is mux-locked, but it may also happen if the parent mux is parent-locked. From 48ca3b7fb82ccc00a20e8d97f35ee7e6813ac1f9 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:31 +0100 Subject: [PATCH 368/658] docs: i2c: replace "I2C-transfer" -> "I2C transfer" consistently "I2C transfer" is a legitimate english sentence, no need for a hyphen between the two words, as as such it is used in most of the documentation. Remove the hyphen in the few places where it is present. Signed-off-by: Luca Ceresoli Acked-by: Peter Rosin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-topology.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/i2c/i2c-topology.rst b/Documentation/i2c/i2c-topology.rst index b413ef6a6773..2a18b53e3508 100644 --- a/Documentation/i2c/i2c-topology.rst +++ b/Documentation/i2c/i2c-topology.rst @@ -137,14 +137,14 @@ Mux-locked Example When there is an access to D1, this happens: - 1. Someone issues an I2C-transfer to D1. + 1. Someone issues an I2C transfer to D1. 2. M1 locks muxes on its parent (the root adapter in this case). 3. M1 calls ->select to ready the mux. - 4. M1 (presumably) does some I2C-transfers as part of its select. - These transfers are normal I2C-transfers that locks the parent + 4. M1 (presumably) does some I2C transfers as part of its select. + These transfers are normal I2C transfers that locks the parent adapter. - 5. M1 feeds the I2C-transfer from step 1 to its parent adapter as a - normal I2C-transfer that locks the parent adapter. + 5. M1 feeds the I2C transfer from step 1 to its parent adapter as a + normal I2C transfer that locks the parent adapter. 6. M1 calls ->deselect, if it has one. 7. Same rules as in step 4, but for ->deselect. 8. M1 unlocks muxes on its parent. @@ -169,7 +169,7 @@ PL1. If you build a topology with a parent-locked mux being the child of another mux, this might break a possible assumption from the child mux that the root adapter is unused between its select op and the actual transfer (e.g. if the child mux is auto-closing - and the parent mux issues I2C-transfers as part of its select). + and the parent mux issues I2C transfers as part of its select). This is especially the case if the parent mux is mux-locked, but it may also happen if the parent mux is parent-locked. @@ -197,15 +197,15 @@ Parent-locked Example When there is an access to D1, this happens: - 1. Someone issues an I2C-transfer to D1. + 1. Someone issues an I2C transfer to D1. 2. M1 locks muxes on its parent (the root adapter in this case). 3. M1 locks its parent adapter. 4. M1 calls ->select to ready the mux. - 5. If M1 does any I2C-transfers (on this root adapter) as part of - its select, those transfers must be unlocked I2C-transfers so + 5. If M1 does any I2C transfers (on this root adapter) as part of + its select, those transfers must be unlocked I2C transfers so that they do not deadlock the root adapter. - 6. M1 feeds the I2C-transfer from step 1 to the root adapter as an - unlocked I2C-transfer, so that it does not deadlock the parent + 6. M1 feeds the I2C transfer from step 1 to the root adapter as an + unlocked I2C transfer, so that it does not deadlock the parent adapter. 7. M1 calls ->deselect, if it has one. 8. Same rules as in step 5, but for ->deselect. @@ -293,7 +293,7 @@ device lockups and/or other problems. The topology is especially troublesome if M2 is an auto-closing mux. In that case, any interleaved accesses to D4 might close M2 -prematurely, as might any I2C-transfers part of M1->select. +prematurely, as might any I2C transfers part of M1->select. But if M2 is not making the above stated assumption, and if M2 is not auto-closing, the topology is fine. From f72beb8bf9c401483a08fb548bbc9b946680a637 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:32 +0100 Subject: [PATCH 369/658] docs: i2c: i2c-protocol: fix kernel-doc function syntax This clarifies these are functions and adds a hyperlink to the function documentation. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-protocol.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index 0222c97f3436..f289d7759a51 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -28,7 +28,7 @@ Count (8 bits): A data byte containing the length of a block operation. Simple send transaction ======================= -This corresponds to i2c_master_send:: +This corresponds to i2c_master_send():: S Addr Wr [A] Data [A] Data [A] ... [A] Data [A] P @@ -36,7 +36,7 @@ This corresponds to i2c_master_send:: Simple receive transaction ========================== -This corresponds to i2c_master_recv:: +This corresponds to i2c_master_recv():: S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P @@ -44,7 +44,7 @@ This corresponds to i2c_master_recv:: Combined transactions ===================== -This corresponds to i2c_transfer +This corresponds to i2c_transfer(). They are just like the above transactions, but instead of a stop bit P a start bit S is sent and the transaction continues. An example of From f954731d2af433ce012f8dc456206eccd3cf2ee4 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:33 +0100 Subject: [PATCH 370/658] docs: i2c: i2c-protocol: properly name start and stop conditions In I2C there is no such thing as a "start bit" or a "stop bit". Use the proper naming: "start condition" and "stop condition". Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-protocol.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index f289d7759a51..c090003f55ed 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -8,8 +8,8 @@ Key to symbols ============== =============== ============================================================= -S (1 bit) : Start bit -P (1 bit) : Stop bit +S : Start condition +P : Stop condition Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0. A, NA (1 bit) : Accept and reverse accept bit. Addr (7 bits): I2C 7 bit address. Note that this can be expanded as usual to @@ -46,9 +46,9 @@ Combined transactions This corresponds to i2c_transfer(). -They are just like the above transactions, but instead of a stop bit P -a start bit S is sent and the transaction continues. An example of -a byte read, followed by a byte write:: +They are just like the above transactions, but instead of a stop +condition P a start condition S is sent and the transaction continues. +An example of a byte read, followed by a byte write:: S Addr Rd [A] [Data] NA S Addr Wr [A] Data [A] P @@ -77,8 +77,9 @@ I2C_M_NOSTART: S Addr Rd [A] [Data] NA Data [A] P If you set the I2C_M_NOSTART variable for the first partial message, - we do not generate Addr, but we do generate the startbit S. This will - probably confuse all other clients on your bus, so don't try this. + we do not generate Addr, but we do generate the start condition S. + This will probably confuse all other clients on your bus, so don't + try this. This is often used to gather transmits from multiple data buffers in system memory into something that appears as a single transfer to the From 02622c88618bb24faf50c51926e744e9e3ab334d Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:34 +0100 Subject: [PATCH 371/658] docs: i2c: i2c-protocol: remove unneeded colons from table These colons are not needed: the columns already nicely separate the symbols from their description. They are also inconsistently preceded by whitespace. Remove the colons completely to simplify and clean up. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-protocol.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index c090003f55ed..66adac3a5afd 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -8,19 +8,19 @@ Key to symbols ============== =============== ============================================================= -S : Start condition -P : Stop condition -Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0. -A, NA (1 bit) : Accept and reverse accept bit. -Addr (7 bits): I2C 7 bit address. Note that this can be expanded as usual to +S Start condition +P Stop condition +Rd/Wr (1 bit) Read/Write bit. Rd equals 1, Wr equals 0. +A, NA (1 bit) Accept and reverse accept bit. +Addr (7 bits) I2C 7 bit address. Note that this can be expanded as usual to get a 10 bit I2C address. -Comm (8 bits): Command byte, a data byte which often selects a register on +Comm (8 bits) Command byte, a data byte which often selects a register on the device. -Data (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh +Data (8 bits) A plain data byte. Sometimes, I write DataLow, DataHigh for 16 bit data. -Count (8 bits): A data byte containing the length of a block operation. +Count (8 bits) A data byte containing the length of a block operation. -[..]: Data sent by I2C device, as opposed to data sent by the +[..] Data sent by I2C device, as opposed to data sent by the host adapter. =============== ============================================================= From db0d7424e774d830eb998de1b13b937ae6a55335 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:35 +0100 Subject: [PATCH 372/658] docs: i2c: i2c-protocol: use proper names for ACK and NACK Use the proper ACK and NACK naming from the I2C specification instead of "accept" and "reverse accept". Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index 66adac3a5afd..9a4ac944cf9d 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -11,7 +11,7 @@ Key to symbols S Start condition P Stop condition Rd/Wr (1 bit) Read/Write bit. Rd equals 1, Wr equals 0. -A, NA (1 bit) Accept and reverse accept bit. +A, NA (1 bit) Acknowledge (ACK) and Not Acknowledge (NACK) bit Addr (7 bits) I2C 7 bit address. Note that this can be expanded as usual to get a 10 bit I2C address. Comm (8 bits) Command byte, a data byte which often selects a register on From 924fbb4d2eb8941ff576b6dca57dff8222048cb5 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:36 +0100 Subject: [PATCH 373/658] docs: i2c: smbus-protocol: fix link syntax Use the proper ReST syntax to generate a valid hyperlink. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index e30eb1d274c6..1600b09ec0be 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -27,8 +27,8 @@ a different protocol operation entirely. Each transaction type corresponds to a functionality flag. Before calling a transaction function, a device driver should always check (just once) for the corresponding functionality flag to ensure that the underlying I2C -adapter supports the transaction in question. See - for the details. +adapter supports the transaction in question. See :doc:`functionality` for +the details. Key to symbols From c0faa8a6be2465f6df8bf1249df321e6966fa062 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:37 +0100 Subject: [PATCH 374/658] docs: i2c: smbus-protocol: properly name start and stop conditions In I2C there is no such thing as a "start bit" or a "stop bit". Use the proper naming: "start condition" and "stop condition". Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 1600b09ec0be..8510eeda1dd0 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -35,8 +35,8 @@ Key to symbols ============== =============== ============================================================= -S (1 bit) : Start bit -P (1 bit) : Stop bit +S : Start condition +P : Stop condition Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0. A, NA (1 bit) : Accept and reverse accept bit. Addr (7 bits): I2C 7 bit address. Note that this can be expanded as usual to From 026c0fe666dfa2cc759467b5e2dd3d963b5f43fb Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:38 +0100 Subject: [PATCH 375/658] docs: i2c: smbus-protocol: remove unneeded colons from table These colons are not needed: the columns already nicely separate the symbols from their description. They are also inconsistently preceded by whitespace. Remove the colons completely to simplify and clean up. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 8510eeda1dd0..fbadd4d25ad5 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -35,19 +35,19 @@ Key to symbols ============== =============== ============================================================= -S : Start condition -P : Stop condition -Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0. -A, NA (1 bit) : Accept and reverse accept bit. -Addr (7 bits): I2C 7 bit address. Note that this can be expanded as usual to +S Start condition +P Stop condition +Rd/Wr (1 bit) Read/Write bit. Rd equals 1, Wr equals 0. +A, NA (1 bit) Accept and reverse accept bit. +Addr (7 bits) I2C 7 bit address. Note that this can be expanded as usual to get a 10 bit I2C address. -Comm (8 bits): Command byte, a data byte which often selects a register on +Comm (8 bits) Command byte, a data byte which often selects a register on the device. -Data (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh +Data (8 bits) A plain data byte. Sometimes, I write DataLow, DataHigh for 16 bit data. -Count (8 bits): A data byte containing the length of a block operation. +Count (8 bits) A data byte containing the length of a block operation. -[..]: Data sent by I2C device, as opposed to data sent by the host +[..] Data sent by I2C device, as opposed to data sent by the host adapter. =============== ============================================================= From 9e89d61878f2969bfe018881566bc78871a00fa1 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:39 +0100 Subject: [PATCH 376/658] docs: i2c: smbus-protocol: use proper names for ACK and NACK Use the proper ACK and NACK naming from the I2C specification instead of "accept" and "reverse accept". Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index fbadd4d25ad5..10c4a989982c 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -38,7 +38,7 @@ Key to symbols S Start condition P Stop condition Rd/Wr (1 bit) Read/Write bit. Rd equals 1, Wr equals 0. -A, NA (1 bit) Accept and reverse accept bit. +A, NA (1 bit) Acknowledge (ACK) and Not Acknowledge (NACK) bit Addr (7 bits) I2C 7 bit address. Note that this can be expanded as usual to get a 10 bit I2C address. Comm (8 bits) Command byte, a data byte which often selects a register on From 3c13f1fbec59c75b115392b18271701ed6f77242 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:40 +0100 Subject: [PATCH 377/658] docs: i2c: smbus-protocol: enable kernel-doc function syntax Hyperlinks from function names are not generated in headings. Move them in the plain text so they are rendered as clickable hyperlinks. While there also remove an unneeded colon in a heading. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 64 ++++++++++++++++++---------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 10c4a989982c..997945e90419 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -62,8 +62,10 @@ This sends a single bit to the device, at the place of the Rd/Wr bit:: Functionality flag: I2C_FUNC_SMBUS_QUICK -SMBus Receive Byte: i2c_smbus_read_byte() -========================================== +SMBus Receive Byte +================== + +Implemented by i2c_smbus_read_byte() This reads a single byte from a device, without specifying a device register. Some devices are so simple that this interface is enough; for @@ -75,8 +77,10 @@ the previous SMBus command:: Functionality flag: I2C_FUNC_SMBUS_READ_BYTE -SMBus Send Byte: i2c_smbus_write_byte() -======================================== +SMBus Send Byte +=============== + +Implemented by i2c_smbus_write_byte() This operation is the reverse of Receive Byte: it sends a single byte to a device. See Receive Byte for more information. @@ -88,8 +92,10 @@ to a device. See Receive Byte for more information. Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE -SMBus Read Byte: i2c_smbus_read_byte_data() -============================================ +SMBus Read Byte +=============== + +Implemented by i2c_smbus_read_byte_data() This reads a single byte from a device, from a designated register. The register is specified through the Comm byte:: @@ -99,8 +105,10 @@ The register is specified through the Comm byte:: Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA -SMBus Read Word: i2c_smbus_read_word_data() -============================================ +SMBus Read Word +=============== + +Implemented by i2c_smbus_read_word_data() This operation is very like Read Byte; again, data is read from a device, from a designated register that is specified through the Comm @@ -115,8 +123,10 @@ available for reads where the two data bytes are the other way around (not SMBus compliant, but very popular.) -SMBus Write Byte: i2c_smbus_write_byte_data() -============================================== +SMBus Write Byte +================ + +Implemented by i2c_smbus_write_byte_data() This writes a single byte to a device, to a designated register. The register is specified through the Comm byte. This is the opposite of @@ -129,8 +139,10 @@ the Read Byte operation. Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE_DATA -SMBus Write Word: i2c_smbus_write_word_data() -============================================== +SMBus Write Word +================ + +Implemented by i2c_smbus_write_word_data() This is the opposite of the Read Word operation. 16 bits of data is written to a device, to the designated register that is @@ -145,8 +157,8 @@ available for writes where the two data bytes are the other way around (not SMBus compliant, but very popular.) -SMBus Process Call: -=================== +SMBus Process Call +================== This command selects a device register (through the Comm byte), sends 16 bits of data to it, and reads 16 bits of data in return:: @@ -157,8 +169,10 @@ This command selects a device register (through the Comm byte), sends Functionality flag: I2C_FUNC_SMBUS_PROC_CALL -SMBus Block Read: i2c_smbus_read_block_data() -============================================== +SMBus Block Read +================ + +Implemented by i2c_smbus_read_block_data() This command reads a block of up to 32 bytes from a device, from a designated register that is specified through the Comm byte. The amount @@ -172,8 +186,10 @@ of data is specified by the device in the Count byte. Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA -SMBus Block Write: i2c_smbus_write_block_data() -================================================ +SMBus Block Write +================= + +Implemented by i2c_smbus_write_block_data() The opposite of the Block Read command, this writes up to 32 bytes to a device, to a designated register that is specified through the @@ -274,8 +290,10 @@ I2C block transactions do not limit the number of bytes transferred but the SMBus layer places a limit of 32 bytes. -I2C Block Read: i2c_smbus_read_i2c_block_data() -================================================ +I2C Block Read +============== + +Implemented by i2c_smbus_read_i2c_block_data() This command reads a block of bytes from a device, from a designated register that is specified through the Comm byte:: @@ -286,8 +304,10 @@ designated register that is specified through the Comm byte:: Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK -I2C Block Write: i2c_smbus_write_i2c_block_data() -================================================== +I2C Block Write +=============== + +Implemented by i2c_smbus_write_i2c_block_data() The opposite of the Block Read command, this writes bytes to a device, to a designated register that is specified through the From b36cbb70e4a2b81b26042b04538ffa69cfa24642 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:41 +0100 Subject: [PATCH 378/658] docs: i2c: smbus-protocol: fix kernel-doc function syntax This clarifies these are functions (and would/will adds a hyperlink to the function documentation if/when documented). Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 997945e90419..3c0fb3a2044d 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -118,7 +118,7 @@ byte. But this time, the data is a complete word (16 bits):: Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA -Note the convenience function i2c_smbus_read_word_swapped is +Note the convenience function i2c_smbus_read_word_swapped() is available for reads where the two data bytes are the other way around (not SMBus compliant, but very popular.) @@ -152,7 +152,7 @@ specified through the Comm byte.:: Functionality flag: I2C_FUNC_SMBUS_WRITE_WORD_DATA -Note the convenience function i2c_smbus_write_word_swapped is +Note the convenience function i2c_smbus_write_word_swapped() is available for writes where the two data bytes are the other way around (not SMBus compliant, but very popular.) From 414a596454a68c8672d339c916d8d2ed03245444 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:42 +0100 Subject: [PATCH 379/658] docs: i2c: smbus-protocol: fix typo The subject is plural, fix the verb. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 3c0fb3a2044d..de7285de5e93 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -145,7 +145,7 @@ SMBus Write Word Implemented by i2c_smbus_write_word_data() This is the opposite of the Read Word operation. 16 bits -of data is written to a device, to the designated register that is +of data are written to a device, to the designated register that is specified through the Comm byte.:: S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] P From c7148b059c2f88b2b70325cfe45a3271f5c16c43 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:43 +0100 Subject: [PATCH 380/658] docs: i2c: smbus-protocol: fix punctuation Remove misplaced dot before colon. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index de7285de5e93..7350e4b2c2fa 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -146,7 +146,7 @@ Implemented by i2c_smbus_write_word_data() This is the opposite of the Read Word operation. 16 bits of data are written to a device, to the designated register that is -specified through the Comm byte.:: +specified through the Comm byte:: S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] P From 95b83774e310fa5ef8fc60434ac55fe3dc375adc Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:44 +0100 Subject: [PATCH 381/658] docs: i2c: smbus-protocol: improve I2C Block transactions description Clarify from the beginning what these transactions are, and specifically how they differ from the SMBus counterparts, i.e. the lack of a Count byte. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 7350e4b2c2fa..0edaf6069ac1 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -282,9 +282,10 @@ This is implemented the following way in the Linux kernel: I2C Block Transactions ====================== -The following I2C block transactions are supported by the -SMBus layer and are described here for completeness. -They are *NOT* defined by the SMBus specification. +The following I2C block transactions are similar to the SMBus Block Read +and Write operations, except these do not have a Count byte. They are +supported by the SMBus layer and are described here for completeness, but +they are *NOT* defined by the SMBus specification. I2C block transactions do not limit the number of bytes transferred but the SMBus layer places a limit of 32 bytes. From 4f71daf6294be7f376b52c668f044575764d97b3 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:45 +0100 Subject: [PATCH 382/658] docs: i2c: instantiating-devices: fix internal hyperlink Use ReST syntax so that a proper hyperlink is generated. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/instantiating-devices.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst index 53244e2f0378..e823c4ad4e7f 100644 --- a/Documentation/i2c/instantiating-devices.rst +++ b/Documentation/i2c/instantiating-devices.rst @@ -86,7 +86,7 @@ Method 1c: Declare the I2C devices via ACPI ------------------------------------------- ACPI can also describe I2C devices. There is special documentation for this -which is currently located at Documentation/firmware-guide/acpi/enumeration.rst. +which is currently located at :doc:`../firmware-guide/acpi/enumeration`. Method 2: Instantiate the devices explicitly From da9a80bf1976148cfb26feebe34b626cd460f3d3 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:46 +0100 Subject: [PATCH 383/658] docs: i2c: instantiating-devices: rearrange static instatiation Among the "static" instantiation methods the "board file" method is described first. Move it as last, since it is being replaced by the other methods. Also fix subsubsection heading syntax and remove the "Method 1[abc]" prefix as the subsubsection structure clarifies the logical hierarchy. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/instantiating-devices.rst | 109 +++++++++++--------- 1 file changed, 60 insertions(+), 49 deletions(-) diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst index e823c4ad4e7f..665bf00792ba 100644 --- a/Documentation/i2c/instantiating-devices.rst +++ b/Documentation/i2c/instantiating-devices.rst @@ -9,14 +9,67 @@ reason, the kernel code must instantiate I2C devices explicitly. There are several ways to achieve this, depending on the context and requirements. -Method 1a: Declare the I2C devices by bus number ------------------------------------------------- +Method 1: Declare the I2C devices statically +-------------------------------------------- This method is appropriate when the I2C bus is a system bus as is the case -for many embedded systems. On such systems, each I2C bus has a number -which is known in advance. It is thus possible to pre-declare the I2C -devices which live on this bus. This is done with an array of struct -i2c_board_info which is registered by calling i2c_register_board_info(). +for many embedded systems. On such systems, each I2C bus has a number which +is known in advance. It is thus possible to pre-declare the I2C devices +which live on this bus. + +This information is provided to the kernel in a different way on different +architectures: device tree, ACPI or board files. + +When the I2C bus in question is registered, the I2C devices will be +instantiated automatically by i2c-core. The devices will be automatically +unbound and destroyed when the I2C bus they sit on goes away (if ever). + + +Declare the I2C devices via devicetree +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +On platforms using devicetree, the declaration of I2C devices is done in +subnodes of the master controller. + +Example:: + + i2c1: i2c@400a0000 { + /* ... master properties skipped ... */ + clock-frequency = <100000>; + + flash@50 { + compatible = "atmel,24c256"; + reg = <0x50>; + }; + + pca9532: gpio@60 { + compatible = "nxp,pca9532"; + gpio-controller; + #gpio-cells = <2>; + reg = <0x60>; + }; + }; + +Here, two devices are attached to the bus using a speed of 100kHz. For +additional properties which might be needed to set up the device, please refer +to its devicetree documentation in Documentation/devicetree/bindings/. + + +Declare the I2C devices via ACPI +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +ACPI can also describe I2C devices. There is special documentation for this +which is currently located at :doc:`../firmware-guide/acpi/enumeration`. + + +Declare the I2C devices in board files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In many embedded architectures, devicetree has replaced the old hardware +description based on board files, but the latter are still used in old +code. Instantiating I2C devices via board files is done with an array of +struct i2c_board_info which is registered by calling +i2c_register_board_info(). Example (from omap2 h4):: @@ -44,49 +97,7 @@ Example (from omap2 h4):: } The above code declares 3 devices on I2C bus 1, including their respective -addresses and custom data needed by their drivers. When the I2C bus in -question is registered, the I2C devices will be instantiated automatically -by i2c-core. - -The devices will be automatically unbound and destroyed when the I2C bus -they sit on goes away (if ever.) - - -Method 1b: Declare the I2C devices via devicetree -------------------------------------------------- - -This method has the same implications as method 1a. The declaration of I2C -devices is here done via devicetree as subnodes of the master controller. - -Example:: - - i2c1: i2c@400a0000 { - /* ... master properties skipped ... */ - clock-frequency = <100000>; - - flash@50 { - compatible = "atmel,24c256"; - reg = <0x50>; - }; - - pca9532: gpio@60 { - compatible = "nxp,pca9532"; - gpio-controller; - #gpio-cells = <2>; - reg = <0x60>; - }; - }; - -Here, two devices are attached to the bus using a speed of 100kHz. For -additional properties which might be needed to set up the device, please refer -to its devicetree documentation in Documentation/devicetree/bindings/. - - -Method 1c: Declare the I2C devices via ACPI -------------------------------------------- - -ACPI can also describe I2C devices. There is special documentation for this -which is currently located at :doc:`../firmware-guide/acpi/enumeration`. +addresses and custom data needed by their drivers. Method 2: Instantiate the devices explicitly From 22714ef85478f7ff3a2dfd3e2bc854b72271ab8f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:47 +0100 Subject: [PATCH 384/658] docs: i2c: instantiating-devices: use monospace for sysfs attributes Use a monospace (literal) formatting for better readability of sysfs attributes. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/instantiating-devices.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst index 665bf00792ba..e558e0a77e0c 100644 --- a/Documentation/i2c/instantiating-devices.rst +++ b/Documentation/i2c/instantiating-devices.rst @@ -225,15 +225,15 @@ In general, the kernel should know which I2C devices are connected and what addresses they live at. However, in certain cases, it does not, so a sysfs interface was added to let the user provide the information. This interface is made of 2 attribute files which are created in every I2C bus -directory: new_device and delete_device. Both files are write only and you -must write the right parameters to them in order to properly instantiate, -respectively delete, an I2C device. +directory: ``new_device`` and ``delete_device``. Both files are write +only and you must write the right parameters to them in order to properly +instantiate, respectively delete, an I2C device. -File new_device takes 2 parameters: the name of the I2C device (a string) -and the address of the I2C device (a number, typically expressed in -hexadecimal starting with 0x, but can also be expressed in decimal.) +File ``new_device`` takes 2 parameters: the name of the I2C device (a +string) and the address of the I2C device (a number, typically expressed +in hexadecimal starting with 0x, but can also be expressed in decimal.) -File delete_device takes a single parameter: the address of the I2C +File ``delete_device`` takes a single parameter: the address of the I2C device. As no two devices can live at the same address on a given I2C segment, the address is sufficient to uniquely identify the device to be deleted. From 899b56b37eb64ce1406e5de079147984b284f439 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:48 +0100 Subject: [PATCH 385/658] docs: i2c: old-module-parameters: fix internal hyperlink Use ReST syntax so that a proper hyperlink is generated. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/old-module-parameters.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst index 78a6950e5763..1a18e2b0f593 100644 --- a/Documentation/i2c/old-module-parameters.rst +++ b/Documentation/i2c/old-module-parameters.rst @@ -12,8 +12,8 @@ address), "force" (to forcibly attach the driver to a given device) and With the conversion of the I2C subsystem to the standard device driver binding model, it became clear that these per-module parameters were no longer needed, and that a centralized implementation was possible. The new, -sysfs-based interface is described in the documentation file -"instantiating-devices", section "Method 4: Instantiate from user-space". +sysfs-based interface is described in :doc:`instantiating-devices`, section +"Method 4: Instantiate from user-space". Below is a mapping from the old module parameters to the new interface. From dfea2b16cc993ff00d0e1c137fd9b3e8f4badcd3 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:49 +0100 Subject: [PATCH 386/658] docs: i2c: old-module-parameters: clarify this is for obsolete kernels This section applies only to code for very old kernels. Avoid people reading this unnecessarily. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/old-module-parameters.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst index 1a18e2b0f593..92a403d21a62 100644 --- a/Documentation/i2c/old-module-parameters.rst +++ b/Documentation/i2c/old-module-parameters.rst @@ -2,6 +2,11 @@ I2C device driver binding control from user-space ================================================= +.. NOTE:: + Note: this section is only relevant if you are handling some old code + found in kernel 2.6. If you work with more recent kernels, you can + safely skip this section. + Up to kernel 2.6.32, many I2C drivers used helper macros provided by which created standard module parameters to let the user control how the driver would probe I2C buses and attach to devices. These From 1ef0572296273b634339dd9c640c20ce1b8f436f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:50 +0100 Subject: [PATCH 387/658] docs: i2c: old-module-parameters: use monospace instead of "" Use a monospace (literal) formatting for better readability of sysfs attributes and the "dummy" client name. This looks much more readable in ReST-generated output. Signed-off-by: Luca Ceresoli Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/old-module-parameters.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst index 92a403d21a62..3b93cb88eebc 100644 --- a/Documentation/i2c/old-module-parameters.rst +++ b/Documentation/i2c/old-module-parameters.rst @@ -10,9 +10,9 @@ I2C device driver binding control from user-space Up to kernel 2.6.32, many I2C drivers used helper macros provided by which created standard module parameters to let the user control how the driver would probe I2C buses and attach to devices. These -parameters were known as "probe" (to let the driver probe for an extra -address), "force" (to forcibly attach the driver to a given device) and -"ignore" (to prevent a driver from probing a given address). +parameters were known as ``probe`` (to let the driver probe for an extra +address), ``force`` (to forcibly attach the driver to a given device) and +``ignore`` (to prevent a driver from probing a given address). With the conversion of the I2C subsystem to the standard device driver binding model, it became clear that these per-module parameters were no @@ -47,8 +47,8 @@ New method (sysfs interface):: # echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device # modprobe -Of course, it is important to instantiate the "dummy" device before loading +Of course, it is important to instantiate the ``dummy`` device before loading the driver. The dummy device will be handled by i2c-core itself, preventing other drivers from binding to it later on. If there is a real device at the problematic address, and you want another driver to bind to it, then simply -pass the name of the device in question instead of "dummy". +pass the name of the device in question instead of ``dummy``. From f6fcefa10fdbc852ececadb1fd600570228b49ab Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:51 +0100 Subject: [PATCH 388/658] docs: i2c: rename sections so the overall picture is clearer Some of the section names are not very clear. Reading those names in the index.rst page does not help much in grasping what the content is supposed to be. Rename those sections to clarify their content, especially when reading the index page. Signed-off-by: Luca Ceresoli Acked-by: Peter Rosin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface.rst | 6 +++--- Documentation/i2c/i2c-protocol.rst | 6 +++--- Documentation/i2c/i2c-topology.rst | 6 +++--- Documentation/i2c/old-module-parameters.rst | 6 +++--- Documentation/i2c/smbus-protocol.rst | 6 +++--- Documentation/i2c/summary.rst | 6 +++--- Documentation/i2c/writing-clients.rst | 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/i2c/dev-interface.rst b/Documentation/i2c/dev-interface.rst index c3717a87df12..bdb247f2f11a 100644 --- a/Documentation/i2c/dev-interface.rst +++ b/Documentation/i2c/dev-interface.rst @@ -1,6 +1,6 @@ -==================== -I2C Device Interface -==================== +============================================ +Implementing I2C device drivers in userspace +============================================ Usually, I2C devices are controlled by a kernel driver. But it is also possible to access all devices on an adapter from userspace, through diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index 9a4ac944cf9d..6aafc3880bce 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -1,6 +1,6 @@ -============ -I2C Protocol -============ +================ +The I2C Protocol +================ This document describes the I2C protocol. Or will, when it is finished :-) diff --git a/Documentation/i2c/i2c-topology.rst b/Documentation/i2c/i2c-topology.rst index 2a18b53e3508..7cb53819778e 100644 --- a/Documentation/i2c/i2c-topology.rst +++ b/Documentation/i2c/i2c-topology.rst @@ -1,6 +1,6 @@ -============ -I2C topology -============ +================================ +I2C muxes and complex topologies +================================ There are a couple of reasons for building more complex I2C topologies than a straight-forward I2C bus with one adapter and one or more devices. diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst index 3b93cb88eebc..38e55829dee8 100644 --- a/Documentation/i2c/old-module-parameters.rst +++ b/Documentation/i2c/old-module-parameters.rst @@ -1,6 +1,6 @@ -================================================= -I2C device driver binding control from user-space -================================================= +================================================================ +I2C device driver binding control from user-space in old kernels +================================================================ .. NOTE:: Note: this section is only relevant if you are handling some old code diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 0edaf6069ac1..c122ed239f7f 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -1,6 +1,6 @@ -====================== -SMBus Protocol Summary -====================== +================== +The SMBus Protocol +================== The following is a summary of the SMBus protocol. It applies to all revisions of the protocol (1.0, 1.1, and 2.0). diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 09f73a608e25..ce7230025b33 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -1,6 +1,6 @@ -============= -I2C and SMBus -============= +============================= +Introduction to I2C and SMBus +============================= I²C (pronounce: I squared C and written I2C in the kernel documentation) is a protocol developed by Philips. It is a slow two-wire protocol (variable diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst index 44d97b2c9a82..82aa33c964d3 100644 --- a/Documentation/i2c/writing-clients.rst +++ b/Documentation/i2c/writing-clients.rst @@ -1,6 +1,6 @@ -=================== -Writing I2C Clients -=================== +=============================== +Implementing I2C device drivers +=============================== This is a small guide for those who want to write kernel drivers for I2C or SMBus devices, using Linux as the protocol host/master (not slave). From ca5dbb0272cc30bbebd40315bb8a329e780f347a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:52 +0100 Subject: [PATCH 389/658] docs: i2c: i2c-protocol: use same wording as smbus-protocol In smbus-protocol.rst we use the text "Implemented by" for the same meaning as "This corresponds to". Change everything to "Implemented by" for coherency. Signed-off-by: Luca Ceresoli Reported-by: Jean Delvare Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-protocol.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst index 6aafc3880bce..b2092f8f815d 100644 --- a/Documentation/i2c/i2c-protocol.rst +++ b/Documentation/i2c/i2c-protocol.rst @@ -28,7 +28,7 @@ Count (8 bits) A data byte containing the length of a block operation. Simple send transaction ======================= -This corresponds to i2c_master_send():: +Implemented by i2c_master_send():: S Addr Wr [A] Data [A] Data [A] ... [A] Data [A] P @@ -36,7 +36,7 @@ This corresponds to i2c_master_send():: Simple receive transaction ========================== -This corresponds to i2c_master_recv():: +Implemented by i2c_master_recv():: S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P @@ -44,7 +44,7 @@ This corresponds to i2c_master_recv():: Combined transactions ===================== -This corresponds to i2c_transfer(). +Implemented by i2c_transfer(). They are just like the above transactions, but instead of a stop condition P a start condition S is sent and the transaction continues. From 4fcb445ec688a62da9c864ab05a4bd39b0307cdc Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 29 Jan 2020 16:19:53 +0100 Subject: [PATCH 390/658] docs: i2c: writing-clients: properly name the stop condition In I2C there is no such thing as a "stop bit". Use the proper naming: "stop condition". Signed-off-by: Luca Ceresoli Reported-by: Jean Delvare Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/writing-clients.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst index 82aa33c964d3..978cc8210bf3 100644 --- a/Documentation/i2c/writing-clients.rst +++ b/Documentation/i2c/writing-clients.rst @@ -357,9 +357,9 @@ read/written. This sends a series of messages. Each message can be a read or write, and they can be mixed in any way. The transactions are combined: no -stop bit is sent between transaction. The i2c_msg structure contains -for each message the client address, the number of bytes of the message -and the message data itself. +stop condition is issued between transaction. The i2c_msg structure +contains for each message the client address, the number of bytes of the +message and the message data itself. You can read the file ``i2c-protocol`` for more information about the actual I2C protocol. From f53938d2c79ae3c768dc92b1c3d898dfe820a491 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 16 Jan 2020 09:46:51 +0200 Subject: [PATCH 391/658] i2c: i801: Add support for Intel Comet Lake PCH-V Add support for Intel Comet Lake PCH-V which is based on Intel Kaby Lake. Difference between it and other Comet Lake variants is that former uses previous iTCO version 4 and latter use version 6 like Intel Cannon Lake PCH. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 44db3a91d32d..ca4f096fef74 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -68,6 +68,7 @@ * Elkhart Lake (PCH) 0x4b23 32 hard yes yes yes * Tiger Lake-LP (PCH) 0xa0a3 32 hard yes yes yes * Jasper Lake (SOC) 0x4da3 32 hard yes yes yes + * Comet Lake-V (PCH) 0xa3a3 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -244,6 +245,7 @@ #define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS 0xa223 #define PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS 0xa2a3 #define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS 0xa323 +#define PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS 0xa3a3 struct i801_mux_config { char *gpio_chip; @@ -1074,6 +1076,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) }, @@ -1742,6 +1745,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS: case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: + case PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; From a258edbca2d94a4d902daeecb3073c88b57ac7ad Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Fri, 10 Jan 2020 14:56:46 -0700 Subject: [PATCH 392/658] MAINTAINERS: Add Revanth Rajashekar as a SED-Opal maintainer Scott hasn't worked for Intel for some time and has already given us his blessing. CC: Scott Bauer Signed-off-by: Revanth Rajashekar Signed-off-by: Jon Derrick Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e48ab79879ac..a704cb9316e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14916,8 +14916,8 @@ S: Maintained F: drivers/mmc/host/sdhci-omap.c SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER -M: Scott Bauer M: Jonathan Derrick +M: Revanth Rajashekar L: linux-block@vger.kernel.org S: Supported F: block/sed* From 73e4eab0a8dacd0ad6921a1717eb38a959b53f09 Mon Sep 17 00:00:00 2001 From: Christian Zigotzky Date: Wed, 29 Jan 2020 20:54:43 -0700 Subject: [PATCH 393/658] pata_pcmia: add SanDisk High (>8G) CF card to supported list Add new SanDisk High (>8G) CF cards to the pata_pcmcia driver. Signed-off-by: Christian Zigotzky Signed-off-by: Jens Axboe --- drivers/ata/pata_pcmcia.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index 3fe0754c0d52..8eb066abbd9c 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -309,6 +309,7 @@ static const struct pcmcia_device_id pcmcia_devices[] = { PCMCIA_DEVICE_MANF_CARD(0x0098, 0x0000), /* Toshiba */ PCMCIA_DEVICE_MANF_CARD(0x00a4, 0x002d), PCMCIA_DEVICE_MANF_CARD(0x00ce, 0x0000), /* Samsung */ + PCMCIA_DEVICE_MANF_CARD(0x00f1, 0x0101), /* SanDisk High (>8G) CFA */ PCMCIA_DEVICE_MANF_CARD(0x0319, 0x0000), /* Hitachi */ PCMCIA_DEVICE_MANF_CARD(0x2080, 0x0001), PCMCIA_DEVICE_MANF_CARD(0x4e01, 0x0100), /* Viking CFA */ From 10a663a1b15134a5a714aa515e11425a44d4fdf7 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Sat, 25 Jan 2020 03:37:29 +0000 Subject: [PATCH 394/658] ata: ahci: Add shutdown to freeze hardware resources of ahci device_shutdown() called from reboot or power_shutdown expect all devices to be shutdown. Same is true for even ahci pci driver. As no ahci shutdown function is implemented, the ata subsystem always remains alive with DMA & interrupt support. File system related calls should not be honored after device_shutdown(). So defining ahci pci driver shutdown to freeze hardware (mask interrupt, stop DMA engine and free DMA resources). Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jens Axboe --- drivers/ata/ahci.c | 7 +++++++ drivers/ata/libata-core.c | 21 +++++++++++++++++++++ include/linux/libata.h | 1 + 3 files changed, 29 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 4bfd1b14b390..11ea1aff40db 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -81,6 +81,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); +static void ahci_shutdown_one(struct pci_dev *dev); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -606,6 +607,7 @@ static struct pci_driver ahci_pci_driver = { .id_table = ahci_pci_tbl, .probe = ahci_init_one, .remove = ahci_remove_one, + .shutdown = ahci_shutdown_one, .driver = { .pm = &ahci_pci_pm_ops, }, @@ -1877,6 +1879,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void ahci_shutdown_one(struct pci_dev *pdev) +{ + ata_pci_shutdown_one(pdev); +} + static void ahci_remove_one(struct pci_dev *pdev) { pm_runtime_get_noresume(&pdev->dev); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6f4ab5c5b52d..42c8728f6117 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6767,6 +6767,26 @@ void ata_pci_remove_one(struct pci_dev *pdev) ata_host_detach(host); } +void ata_pci_shutdown_one(struct pci_dev *pdev) +{ + struct ata_host *host = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ap->pflags |= ATA_PFLAG_FROZEN; + + /* Disable port interrupts */ + if (ap->ops->freeze) + ap->ops->freeze(ap); + + /* Stop the port DMA engines */ + if (ap->ops->port_stop) + ap->ops->port_stop(ap); + } +} + /* move to PCI subsystem */ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { @@ -7387,6 +7407,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); #ifdef CONFIG_PM EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); diff --git a/include/linux/libata.h b/include/linux/libata.h index 2dbde119721d..bff539918d82 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1221,6 +1221,7 @@ struct pci_bits { }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); +extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM From 6a365874a43c43b227492266f59cd68ecc5a6f83 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 24 Jan 2020 21:03:07 +0100 Subject: [PATCH 395/658] drbd: fifo_alloc() should use struct_size Switching to struct_size for the allocation in fifo_alloc avoids hard-coding the type of fifo_buffer.values in fifo_alloc. It also provides overflow protection; to avoid pessimistic code being generated by the compiler as a result, this patch also switches fifo_size to unsigned, propagating the change as appropriate. Reviewed-by: Gustavo A. R. Silva Signed-off-by: Stephen Kitt Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 3 ++- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ddbf56014c51..aae99a2d7bd4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -622,7 +622,7 @@ struct fifo_buffer { int total; /* sum of all values */ int values[0]; }; -extern struct fifo_buffer *fifo_alloc(int fifo_size); +extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size); /* flag bits per connection */ enum { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index de2f94d0103a..da4a3ebe04ef 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1575,7 +1575,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) struct drbd_device *device; struct disk_conf *new_disk_conf, *old_disk_conf; struct fifo_buffer *old_plan = NULL, *new_plan = NULL; - int err, fifo_size; + int err; + unsigned int fifo_size; retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2b3103c30857..79e216446030 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3887,7 +3887,7 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; const int apv = connection->agreed_pro_version; struct fifo_buffer *old_plan = NULL, *new_plan = NULL; - int fifo_size = 0; + unsigned int fifo_size = 0; int err; peer_device = conn_peer_device(connection, pi->vnr); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5bdcc70ad589..b7f605c6e231 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -482,11 +482,11 @@ static void fifo_add_val(struct fifo_buffer *fb, int value) fb->values[i] += value; } -struct fifo_buffer *fifo_alloc(int fifo_size) +struct fifo_buffer *fifo_alloc(unsigned int fifo_size) { struct fifo_buffer *fb; - fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); + fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO); if (!fb) return NULL; From 5c0dd228b5fc30a3b732c7ae2657e0161ec7ed80 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 22 Jan 2020 11:18:57 +0800 Subject: [PATCH 396/658] nbd: add a flush_workqueue in nbd_start_device When kzalloc fail, may cause trying to destroy the workqueue from inside the workqueue. If num_connections is m (2 < m), and NO.1 ~ NO.n (1 < n < m) kzalloc are successful. The NO.(n + 1) failed. Then, nbd_start_device will return ENOMEM to nbd_start_device_ioctl, and nbd_start_device_ioctl will return immediately without running flush_workqueue. However, we still have n recv threads. If nbd_release run first, recv threads may have to drop the last config_refs and try to destroy the workqueue from inside the workqueue. To fix it, add a flush_workqueue in nbd_start_device. Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Sun Ke Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b4607dd96185..78181908f0df 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1265,6 +1265,16 @@ static int nbd_start_device(struct nbd_device *nbd) args = kzalloc(sizeof(*args), GFP_KERNEL); if (!args) { sock_shutdown(nbd); + /* + * If num_connections is m (2 < m), + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. + * But NO.(n + 1) failed. We still have n recv threads. + * So, add flush_workqueue here to prevent recv threads + * dropping the last config_refs and trying to destroy + * the workqueue from inside the workqueue. + */ + if (i) + flush_workqueue(nbd->recv_workq); return -ENOMEM; } sk_set_memalloc(config->socks[i]->sock->sk); From 0265d6e8ddb8901c2c03d09f9444f382a60ba6b1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 17 Jan 2020 15:39:55 +0100 Subject: [PATCH 397/658] xen/blkfront: limit allocated memory size to actual use case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today the Xen blkfront driver allocates memory for one struct blkfront_ring_info for each communication ring. This structure is statically sized for the maximum supported configuration resulting in a size of more than 90 kB. As the main size contributor is one array inside the struct, the memory allocation can easily be limited by moving this array to be the last structure element and to allocate only the memory for the actually needed array size. Acked-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index c02be06c5299..61491167da19 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -151,9 +151,6 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) -#define BLK_MAX_RING_SIZE \ - __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS) - /* * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 * characters are enough. Define to 20 to keep consistent with backend. @@ -177,12 +174,12 @@ struct blkfront_ring_info { unsigned int evtchn, irq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_MAX_RING_SIZE]; struct list_head indirect_pages; struct list_head grants; unsigned int persistent_gnts_c; unsigned long shadow_free; struct blkfront_info *dev_info; + struct blk_shadow shadow[]; }; /* @@ -1915,7 +1912,8 @@ static int negotiate_mq(struct blkfront_info *info) info->nr_rings = 1; info->rinfo = kvcalloc(info->nr_rings, - sizeof(struct blkfront_ring_info), + struct_size(info->rinfo, shadow, + BLK_RING_SIZE(info)), GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); From 7991901082f0626592885a77a2cf8162536d1a51 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 13 Jan 2020 16:27:47 +0200 Subject: [PATCH 398/658] ata: pata_arasan_cf: Use dma_request_chan() instead dma_request_slave_channel() dma_request_slave_channel() is a wrapper on top of dma_request_chan() eating up the error code. The dma_request_chan() is the standard API to request slave channel, clients should be moved away from the legacy API to allow us to retire them. Acked-by: Viresh Kumar Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Peter Ujfalusi Signed-off-by: Jens Axboe --- drivers/ata/pata_arasan_cf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index 391dff0f25a2..e9cf31f38450 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -526,9 +526,10 @@ static void data_xfer(struct work_struct *work) /* request dma channels */ /* dma_request_channel may sleep, so calling from process context */ - acdev->dma_chan = dma_request_slave_channel(acdev->host->dev, "data"); - if (!acdev->dma_chan) { + acdev->dma_chan = dma_request_chan(acdev->host->dev, "data"); + if (IS_ERR(acdev->dma_chan)) { dev_err(acdev->host->dev, "Unable to get dma_chan\n"); + acdev->dma_chan = NULL; goto chan_request_fail; } @@ -539,6 +540,7 @@ static void data_xfer(struct work_struct *work) } dma_release_channel(acdev->dma_chan); + acdev->dma_chan = NULL; /* data xferred successfully */ if (!ret) { From bbf967b223b3f1b55eb494d735226152afbad64e Mon Sep 17 00:00:00 2001 From: Alex Williams Date: Thu, 31 Jan 2019 13:39:57 -0800 Subject: [PATCH 399/658] i2c: cadence: Handle transfer_size rollover Under certain conditions, Cadence's I2C controller's transfer_size register will roll over and generate invalid read transactions. Before this change, the ISR relied solely on the RXDV bit to determine when to write more data to the user's buffer. The invalid read data would cause overruns, smashing stacks and worse. This change stops the buffer writes to the requested boundary and reports the error. The controller will be reset so normal transactions may resume. Signed-off-by: Alex Williams Reviewed-by: Shubhrajyoti Datta Reviewed-by: Michal Simek # in a seperate mail Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 9d71ce15db05..179776bef5c4 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -208,6 +208,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) isr_status = cdns_i2c_readreg(CDNS_I2C_ISR_OFFSET); cdns_i2c_writereg(isr_status, CDNS_I2C_ISR_OFFSET); + id->err_status = 0; /* Handling nack and arbitration lost interrupt */ if (isr_status & (CDNS_I2C_IXR_NACK | CDNS_I2C_IXR_ARB_LOST)) { @@ -241,10 +242,17 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) !id->bus_hold_flag) cdns_i2c_clear_bus_hold(id); - *(id->p_recv_buf)++ = - cdns_i2c_readreg(CDNS_I2C_DATA_OFFSET); - id->recv_count--; - id->curr_recv_count--; + if (id->recv_count > 0) { + *(id->p_recv_buf)++ = + cdns_i2c_readreg(CDNS_I2C_DATA_OFFSET); + id->recv_count--; + id->curr_recv_count--; + } else { + dev_err(id->adap.dev.parent, + "xfer_size reg rollover. xfer aborted!\n"); + id->err_status |= CDNS_I2C_IXR_TO; + break; + } if (cdns_is_holdquirk(id, hold_quirk)) break; @@ -342,7 +350,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) } /* Update the status for errors */ - id->err_status = isr_status & CDNS_I2C_IXR_ERR_INTR_MASK; + id->err_status |= isr_status & CDNS_I2C_IXR_ERR_INTR_MASK; if (id->err_status) status = IRQ_HANDLED; From 69dc44bcbc11fab397b614b1204ce10f3d74219c Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 9 Dec 2019 16:10:49 +0530 Subject: [PATCH 400/658] i2c: cadence: Fix error printing in case of defer Do not print error in case of EPROBE_DEFER. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 179776bef5c4..bae64c6d3e48 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -929,7 +929,8 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(id->clk)) { - dev_err(&pdev->dev, "input clock not found.\n"); + if (PTR_ERR(id->clk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "input clock not found.\n"); return PTR_ERR(id->clk); } ret = clk_prepare_enable(id->clk); From db3fad841d9bf5c8b002ce86fd82aec32af80fc0 Mon Sep 17 00:00:00 2001 From: Topi Kuutela Date: Mon, 9 Dec 2019 16:10:50 +0530 Subject: [PATCH 401/658] i2c: cadence: Fix power management order of operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g. pm_runtime_set_active must be called while the power management system is disabled. Fixes extra hanging clk_enable. Signed-off-by: Topi Kuutela Acked-by: Sören Brinkmann Signed-off-by: Michal Simek Signed-off-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index bae64c6d3e48..276cfa8bd997 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -937,10 +937,10 @@ static int cdns_i2c_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "Unable to enable clock.\n"); - pm_runtime_enable(id->dev); pm_runtime_set_autosuspend_delay(id->dev, CNDS_I2C_PM_TIMEOUT); pm_runtime_use_autosuspend(id->dev); pm_runtime_set_active(id->dev); + pm_runtime_enable(id->dev); id->clk_rate_change_nb.notifier_call = cdns_i2c_clk_notifier_cb; if (clk_notifier_register(id->clk, &id->clk_rate_change_nb)) @@ -989,8 +989,8 @@ static int cdns_i2c_probe(struct platform_device *pdev) err_clk_dis: clk_disable_unprepare(id->clk); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); return ret; } @@ -1006,10 +1006,13 @@ static int cdns_i2c_remove(struct platform_device *pdev) { struct cdns_i2c *id = platform_get_drvdata(pdev); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + i2c_del_adapter(&id->adap); clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); clk_disable_unprepare(id->clk); - pm_runtime_disable(&pdev->dev); return 0; } From 9253975bcba65bece2d982db06bb959186ec7780 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 9 Jan 2020 11:07:53 +0530 Subject: [PATCH 402/658] i2c: cadence: Fix wording in i2c-cadence driver Fix wording based on checkpatch.pl Signed-off-by: Michal Simek Signed-off-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 276cfa8bd997..1105aee6634a 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -508,7 +508,7 @@ static void cdns_i2c_master_reset(struct i2c_adapter *adap) cdns_i2c_writereg(regval, CDNS_I2C_CR_OFFSET); /* Update the transfercount register to zero */ cdns_i2c_writereg(0, CDNS_I2C_XFER_SIZE_OFFSET); - /* Clear the interupt status register */ + /* Clear the interrupt status register */ regval = cdns_i2c_readreg(CDNS_I2C_ISR_OFFSET); cdns_i2c_writereg(regval, CDNS_I2C_ISR_OFFSET); /* Clear the status register */ From ea6dd25deeb5b797a145be7f860e3085e7d104c3 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 6 Jan 2020 14:28:32 +0100 Subject: [PATCH 403/658] i2c: stm32f7: add PM_SLEEP suspend/resume support Backup/restore I2C registers as part of the suspend/resume handlers. The device is marked as suspended to ensure that transfers are rejected during the suspended period. Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Alain Volmat Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 117 +++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index b2634afe066d..5c3e8ac6ad92 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -168,6 +168,24 @@ #define STM32F7_AUTOSUSPEND_DELAY (HZ / 100) +/** + * struct stm32f7_i2c_regs - i2c f7 registers backup + * @cr1: Control register 1 + * @cr2: Control register 2 + * @oar1: Own address 1 register + * @oar2: Own address 2 register + * @pecr: PEC register + * @tmgr: Timing register + */ +struct stm32f7_i2c_regs { + u32 cr1; + u32 cr2; + u32 oar1; + u32 oar2; + u32 pecr; + u32 tmgr; +}; + /** * struct stm32f7_i2c_spec - private i2c specification timing * @rate: I2C bus speed (Hz) @@ -276,6 +294,7 @@ struct stm32f7_i2c_msg { * @timing: I2C computed timings * @slave: list of slave devices registered on the I2C bus * @slave_running: slave device currently used + * @backup_regs: backup of i2c controller registers (for suspend/resume) * @slave_dir: transfer direction for the current slave device * @master_mode: boolean to know in which mode the I2C is running (master or * slave) @@ -298,6 +317,7 @@ struct stm32f7_i2c_dev { struct stm32f7_i2c_timings timing; struct i2c_client *slave[STM32F7_I2C_MAX_SLAVE]; struct i2c_client *slave_running; + struct stm32f7_i2c_regs backup_regs; u32 slave_dir; bool master_mode; struct stm32_i2c_dma *dma; @@ -2027,8 +2047,7 @@ static int stm32f7_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int stm32f7_i2c_runtime_suspend(struct device *dev) +static int __maybe_unused stm32f7_i2c_runtime_suspend(struct device *dev) { struct stm32f7_i2c_dev *i2c_dev = dev_get_drvdata(dev); @@ -2038,7 +2057,7 @@ static int stm32f7_i2c_runtime_suspend(struct device *dev) return 0; } -static int stm32f7_i2c_runtime_resume(struct device *dev) +static int __maybe_unused stm32f7_i2c_runtime_resume(struct device *dev) { struct stm32f7_i2c_dev *i2c_dev = dev_get_drvdata(dev); int ret; @@ -2053,11 +2072,101 @@ static int stm32f7_i2c_runtime_resume(struct device *dev) return 0; } -#endif + +static int __maybe_unused +stm32f7_i2c_regs_backup(struct stm32f7_i2c_dev *i2c_dev) +{ + int ret; + struct stm32f7_i2c_regs *backup_regs = &i2c_dev->backup_regs; + + ret = pm_runtime_get_sync(i2c_dev->dev); + if (ret < 0) + return ret; + + backup_regs->cr1 = readl_relaxed(i2c_dev->base + STM32F7_I2C_CR1); + backup_regs->cr2 = readl_relaxed(i2c_dev->base + STM32F7_I2C_CR2); + backup_regs->oar1 = readl_relaxed(i2c_dev->base + STM32F7_I2C_OAR1); + backup_regs->oar2 = readl_relaxed(i2c_dev->base + STM32F7_I2C_OAR2); + backup_regs->pecr = readl_relaxed(i2c_dev->base + STM32F7_I2C_PECR); + backup_regs->tmgr = readl_relaxed(i2c_dev->base + STM32F7_I2C_TIMINGR); + + pm_runtime_put_sync(i2c_dev->dev); + + return ret; +} + +static int __maybe_unused +stm32f7_i2c_regs_restore(struct stm32f7_i2c_dev *i2c_dev) +{ + u32 cr1; + int ret; + struct stm32f7_i2c_regs *backup_regs = &i2c_dev->backup_regs; + + ret = pm_runtime_get_sync(i2c_dev->dev); + if (ret < 0) + return ret; + + cr1 = readl_relaxed(i2c_dev->base + STM32F7_I2C_CR1); + if (cr1 & STM32F7_I2C_CR1_PE) + stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_PE); + + writel_relaxed(backup_regs->tmgr, i2c_dev->base + STM32F7_I2C_TIMINGR); + writel_relaxed(backup_regs->cr1 & ~STM32F7_I2C_CR1_PE, + i2c_dev->base + STM32F7_I2C_CR1); + if (backup_regs->cr1 & STM32F7_I2C_CR1_PE) + stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_PE); + writel_relaxed(backup_regs->cr2, i2c_dev->base + STM32F7_I2C_CR2); + writel_relaxed(backup_regs->oar1, i2c_dev->base + STM32F7_I2C_OAR1); + writel_relaxed(backup_regs->oar2, i2c_dev->base + STM32F7_I2C_OAR2); + writel_relaxed(backup_regs->pecr, i2c_dev->base + STM32F7_I2C_PECR); + + pm_runtime_put_sync(i2c_dev->dev); + + return ret; +} + +static int __maybe_unused stm32f7_i2c_suspend(struct device *dev) +{ + struct stm32f7_i2c_dev *i2c_dev = dev_get_drvdata(dev); + int ret; + + i2c_mark_adapter_suspended(&i2c_dev->adap); + ret = stm32f7_i2c_regs_backup(i2c_dev); + if (ret < 0) { + i2c_mark_adapter_resumed(&i2c_dev->adap); + return ret; + } + + pinctrl_pm_select_sleep_state(dev); + pm_runtime_force_suspend(dev); + + return 0; +} + +static int __maybe_unused stm32f7_i2c_resume(struct device *dev) +{ + struct stm32f7_i2c_dev *i2c_dev = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret < 0) + return ret; + pinctrl_pm_select_default_state(dev); + + ret = stm32f7_i2c_regs_restore(i2c_dev); + if (ret < 0) + return ret; + i2c_mark_adapter_resumed(&i2c_dev->adap); + + return 0; +} static const struct dev_pm_ops stm32f7_i2c_pm_ops = { SET_RUNTIME_PM_OPS(stm32f7_i2c_runtime_suspend, stm32f7_i2c_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(stm32f7_i2c_suspend, stm32f7_i2c_resume) }; static const struct of_device_id stm32f7_i2c_match[] = { From 64ae572bc7d0060429e40e1c8d803ce5eb31a0d6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Aug 2019 10:12:08 -0400 Subject: [PATCH 404/658] tracing: Fix sched switch start/stop refcount racy updates Reading the sched_cmdline_ref and sched_tgid_ref initial state within tracing_start_sched_switch without holding the sched_register_mutex is racy against concurrent updates, which can lead to tracepoint probes being registered more than once (and thus trigger warnings within tracepoint.c). [ May be the fix for this bug ] Link: https://lore.kernel.org/r/000000000000ab6f84056c786b93@google.com Link: http://lkml.kernel.org/r/20190817141208.15226-1-mathieu.desnoyers@efficios.com Cc: stable@vger.kernel.org CC: Steven Rostedt (VMware) CC: Joel Fernandes (Google) CC: Peter Zijlstra CC: Thomas Gleixner CC: Paul E. McKenney Reported-by: syzbot+774fddf07b7ab29a1e55@syzkaller.appspotmail.com Fixes: d914ba37d7145 ("tracing: Add support for recording tgid of tasks") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_sched_switch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: From e4075e8bdffd93a9b6d6e1d52fabedceeca5a91b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:02:56 +0300 Subject: [PATCH 405/658] ftrace: fpid_next() should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Without patch: # dd bs=4 skip=1 if=/sys/kernel/tracing/set_ftrace_pid dd: /sys/kernel/tracing/set_ftrace_pid: cannot skip to specified offset id no pid 2+1 records in 2+1 records out 10 bytes copied, 0.000213285 s, 46.9 kB/s Notice the "id" followed by "no pid". With the patch: # dd bs=4 skip=1 if=/sys/kernel/tracing/set_ftrace_pid dd: /sys/kernel/tracing/set_ftrace_pid: cannot skip to specified offset id 0+1 records in 0+1 records out 3 bytes copied, 0.000202112 s, 14.8 kB/s Notice that it only prints "id" and not the "no pid" afterward. Link: http://lkml.kernel.org/r/4f87c6ad-f114-30bb-8506-c32274ce2992@virtuozzo.com https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fdb1a9532420..0e9612c30995 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7026,9 +7026,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } From 039958a5f7aad695d4d52683c7d48aa13fb18249 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:03:01 +0300 Subject: [PATCH 406/658] tracing: eval_map_next() should always increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Link: http://lkml.kernel.org/r/7ad85b22-1866-977c-db17-88ac438bc764@virtuozzo.com Signed-off-by: Vasily Averin [ This is not a bug fix, it just makes it "technically correct" which is why I applied it. NULL is only returned on an anomaly which triggers a WARN_ON ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6a28b1b9bf42..8d144fd94aa8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5399,14 +5399,12 @@ static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) * Paranoid! If ptr points to end, we don't want to increment past it. * This really should never happen. */ + (*pos)++; ptr = update_eval_map(ptr); if (WARN_ON_ONCE(!ptr)) return NULL; ptr++; - - (*pos)++; - ptr = update_eval_map(ptr); return ptr; From 6722b23e7a2ace078344064a9735fb73e554e9ef Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:03:06 +0300 Subject: [PATCH 407/658] trigger_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Without patch: # dd bs=30 skip=1 if=/sys/kernel/tracing/events/sched/sched_switch/trigger dd: /sys/kernel/tracing/events/sched/sched_switch/trigger: cannot skip to specified offset n traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist # Available triggers: # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist 6+1 records in 6+1 records out 206 bytes copied, 0.00027916 s, 738 kB/s Notice the printing of "# Available triggers:..." after the line. With the patch: # dd bs=30 skip=1 if=/sys/kernel/tracing/events/sched/sched_switch/trigger dd: /sys/kernel/tracing/events/sched/sched_switch/trigger: cannot skip to specified offset n traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist 2+1 records in 2+1 records out 88 bytes copied, 0.000526867 s, 167 kB/s It only prints the end of the file, and does not restart. Link: http://lkml.kernel.org/r/3c35ee24-dd3a-8119-9c19-552ed253388a@virtuozzo.com https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_trigger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 60959c31791d..dd34a1b46a86 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -116,9 +116,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } From 89c95fcef1942415e0f20d8c82e6e36ff8eeca9c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:21 -0600 Subject: [PATCH 408/658] tracing: Add trace_array_find/_get() to find instance trace arrays Add a new trace_array_find() function that can be used to find a trace array given the instance name, and replace existing code that does the same thing with it. Also add trace_array_find_get() which does the same but returns the trace array after upping its refcount. Also make both available for use outside of trace.c. Link: http://lkml.kernel.org/r/cb68528c975eba95bee4561ac67dd1499423b2e5.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 43 +++++++++++++++++++++++++++++++++---------- kernel/trace/trace.h | 2 ++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d144fd94aa8..183b031a3828 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8499,6 +8499,34 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } +/* Must have trace_types_lock held */ +struct trace_array *trace_array_find(const char *instance) +{ + struct trace_array *tr, *found = NULL; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, instance) == 0) { + found = tr; + break; + } + } + + return found; +} + +struct trace_array *trace_array_find_get(const char *instance) +{ + struct trace_array *tr; + + mutex_lock(&trace_types_lock); + tr = trace_array_find(instance); + if (tr) + tr->ref++; + mutex_unlock(&trace_types_lock); + + return tr; +} + static struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; @@ -8575,10 +8603,8 @@ static int instance_mkdir(const char *name) mutex_lock(&trace_types_lock); ret = -EEXIST; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) - goto out_unlock; - } + if (trace_array_find(name)) + goto out_unlock; tr = trace_array_create(name); @@ -8706,12 +8732,9 @@ static int instance_rmdir(const char *name) mutex_lock(&trace_types_lock); ret = -ENODEV; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) { - ret = __remove_instance(tr); - break; - } - } + tr = trace_array_find(name); + if (tr) + ret = __remove_instance(tr); mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b3075b637d14..f5480a2aa334 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -358,6 +358,8 @@ extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); +extern struct trace_array *trace_array_find(const char *instance); +extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); From e3e2a2cc9c96725457ad6f31712ea7681a55666e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:22 -0600 Subject: [PATCH 409/658] tracing: Add trace_get/put_event_file() Add a function to get an event file and prevent it from going away on module or instance removal. trace_get_event_file() will find an event file in a given instance (if instance is NULL, it assumes the top trace array) and return it, pinning the instance's trace array as well as the event's module, if applicable, so they won't go away while in use. trace_put_event_file() does the matching release. Link: http://lkml.kernel.org/r/bb31ac4bdda168d5ed3c4b5f5a4c8f633e8d9118.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi [ Moved trace_array_put() to end of trace_put_event_file() ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 5 +++ kernel/trace/trace_events.c | 85 ++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 20948ee56f8c..8d621a73c97e 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -349,6 +349,11 @@ enum { EVENT_FILE_FL_WAS_ENABLED_BIT, }; +extern struct trace_event_file *trace_get_event_file(const char *instance, + const char *system, + const char *event); +extern void trace_put_event_file(struct trace_event_file *file); + /* * Event file flags: * ENABLED - The event is enabled diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index dfb736a964d6..da62472b1297 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2536,6 +2536,91 @@ find_event_file(struct trace_array *tr, const char *system, const char *event) return file; } +/** + * trace_get_event_file - Find and return a trace event file + * @instance: The name of the trace instance containing the event + * @system: The name of the system containing the event + * @event: The name of the event + * + * Return a trace event file given the trace instance name, trace + * system, and trace event name. If the instance name is NULL, it + * refers to the top-level trace array. + * + * This function will look it up and return it if found, after calling + * trace_array_get() to prevent the instance from going away, and + * increment the event's module refcount to prevent it from being + * removed. + * + * To release the file, call trace_put_event_file(), which will call + * trace_array_put() and decrement the event's module refcount. + * + * Return: The trace event on success, ERR_PTR otherwise. + */ +struct trace_event_file *trace_get_event_file(const char *instance, + const char *system, + const char *event) +{ + struct trace_array *tr = top_trace_array(); + struct trace_event_file *file = NULL; + int ret = -EINVAL; + + if (instance) { + tr = trace_array_find_get(instance); + if (!tr) + return ERR_PTR(-ENOENT); + } else { + ret = trace_array_get(tr); + if (ret) + return ERR_PTR(ret); + } + + mutex_lock(&event_mutex); + + file = find_event_file(tr, system, event); + if (!file) { + trace_array_put(tr); + ret = -EINVAL; + goto out; + } + + /* Don't let event modules unload while in use */ + ret = try_module_get(file->event_call->mod); + if (!ret) { + trace_array_put(tr); + ret = -EBUSY; + goto out; + } + + ret = 0; + out: + mutex_unlock(&event_mutex); + + if (ret) + file = ERR_PTR(ret); + + return file; +} +EXPORT_SYMBOL_GPL(trace_get_event_file); + +/** + * trace_put_event_file - Release a file from trace_get_event_file() + * @file: The trace event file + * + * If a file was retrieved using trace_get_event_file(), this should + * be called when it's no longer needed. It will cancel the previous + * trace_array_get() called by that function, and decrement the + * event's module refcount. + */ +void trace_put_event_file(struct trace_event_file *file) +{ + mutex_lock(&event_mutex); + module_put(file->event_call->mod); + mutex_unlock(&event_mutex); + + trace_array_put(file->tr); +} +EXPORT_SYMBOL_GPL(trace_put_event_file); + #ifdef CONFIG_DYNAMIC_FTRACE /* Avoid typos */ From f5f6b255a253e2c3132ca283e9090a6343bfb719 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:23 -0600 Subject: [PATCH 410/658] tracing: Add synth_event_delete() create_or_delete_synth_event() contains code to delete a synthetic event, which would be useful on its own - specifically, it would be useful to allow event-creating modules to call it separately. Separate out the delete code from that function and create an exported function named synth_event_delete(). Link: http://lkml.kernel.org/r/050db3b06df7f0a4b8a2922da602d1d879c7c1c2.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 2 ++ kernel/trace/trace_events_hist.c | 57 +++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8d621a73c97e..25fe743bcbaf 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -354,6 +354,8 @@ extern struct trace_event_file *trace_get_event_file(const char *instance, const char *event); extern void trace_put_event_file(struct trace_event_file *file); +extern int synth_event_delete(const char *name); + /* * Event file flags: * ENABLED - The event is enabled diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index c322826e0726..21e316732700 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1360,29 +1360,54 @@ static int __create_synth_event(int argc, const char *name, const char **argv) goto out; } +static int destroy_synth_event(struct synth_event *se) +{ + int ret; + + if (se->ref) + ret = -EBUSY; + else { + ret = unregister_synth_event(se); + if (!ret) { + dyn_event_remove(&se->devent); + free_synth_event(se); + } + } + + return ret; +} + +/** + * synth_event_delete - Delete a synthetic event + * @event_name: The name of the new sythetic event + * + * Delete a synthetic event that was created with synth_event_create(). + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_delete(const char *event_name) +{ + struct synth_event *se = NULL; + int ret = -ENOENT; + + mutex_lock(&event_mutex); + se = find_synth_event(event_name); + if (se) + ret = destroy_synth_event(se); + mutex_unlock(&event_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_delete); + static int create_or_delete_synth_event(int argc, char **argv) { const char *name = argv[0]; - struct synth_event *event = NULL; int ret; /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { - mutex_lock(&event_mutex); - event = find_synth_event(name + 1); - if (event) { - if (event->ref) - ret = -EBUSY; - else { - ret = unregister_synth_event(event); - if (!ret) { - dyn_event_remove(&event->devent); - free_synth_event(event); - } - } - } else - ret = -ENOENT; - mutex_unlock(&event_mutex); + ret = synth_event_delete(name + 1); return ret; } From 86c5426baddae9ff192e3159b9c2e7c14e3964c6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:24 -0600 Subject: [PATCH 411/658] tracing: Add dynamic event command creation interface Add an interface used to build up dynamic event creation commands, such as synthetic and kprobe events. Interfaces specific to those particular types of events and others can be built on top of this interface. Command creation is started by first using the dynevent_cmd_init() function to initialize the dynevent_cmd object. Following that, args are appended and optionally checked by the dynevent_arg_add() and dynevent_arg_pair_add() functions, which use objects representing arguments and pairs of arguments, initialized respectively by dynevent_arg_init() and dynevent_arg_pair_init(). Finally, once all args have been successfully added, the command is finalized and actually created using dynevent_create(). The code here for actually printing into the dyn_event->cmd buffer using snprintf() etc was adapted from v4 of Masami's 'tracing/boot: Add synthetic event support' patch. Link: http://lkml.kernel.org/r/1f65fa44390b6f238f6036777c3784ced1dcc6a0.1580323897.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 23 ++++ kernel/trace/trace_dynevent.c | 240 ++++++++++++++++++++++++++++++++++ kernel/trace/trace_dynevent.h | 33 +++++ 3 files changed, 296 insertions(+) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 25fe743bcbaf..651b03d5e272 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -354,6 +354,29 @@ extern struct trace_event_file *trace_get_event_file(const char *instance, const char *event); extern void trace_put_event_file(struct trace_event_file *file); +#define MAX_DYNEVENT_CMD_LEN (2048) + +enum dynevent_type { + DYNEVENT_TYPE_NONE, +}; + +struct dynevent_cmd; + +typedef int (*dynevent_create_fn_t)(struct dynevent_cmd *cmd); + +struct dynevent_cmd { + char *buf; + const char *event_name; + int maxlen; + int remaining; + unsigned int n_fields; + enum dynevent_type type; + dynevent_create_fn_t run_command; + void *private_data; +}; + +extern int dynevent_create(struct dynevent_cmd *cmd); + extern int synth_event_delete(const char *name); /* diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 89779eb84a07..6ffdbc4fda53 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -223,3 +223,243 @@ static __init int init_dynamic_event(void) return 0; } fs_initcall(init_dynamic_event); + +/** + * dynevent_arg_add - Add an arg to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @arg: The argument to append to the current cmd + * + * Append an argument to a dynevent_cmd. The argument string will be + * appended to the current cmd string, followed by a separator, if + * applicable. Before the argument is added, the check_arg() + * function, if defined, is called. + * + * The cmd string, separator, and check_arg() function should be set + * using the dynevent_arg_init() before any arguments are added using + * this function. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_arg_add(struct dynevent_cmd *cmd, + struct dynevent_arg *arg) +{ + int ret = 0; + int delta; + char *q; + + if (arg->check_arg) { + ret = arg->check_arg(arg); + if (ret) + return ret; + } + + q = cmd->buf + (cmd->maxlen - cmd->remaining); + + delta = snprintf(q, cmd->remaining, " %s%c", arg->str, arg->separator); + if (delta >= cmd->remaining) { + pr_err("String is too long: %s\n", arg->str); + return -E2BIG; + } + cmd->remaining -= delta; + + return ret; +} + +/** + * dynevent_arg_pair_add - Add an arg pair to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @arg_pair: The argument pair to append to the current cmd + * + * Append an argument pair to a dynevent_cmd. An argument pair + * consists of a left-hand-side argument and a right-hand-side + * argument separated by an operator, which can be whitespace, all + * followed by a separator, if applicable. This can be used to add + * arguments of the form 'type variable_name;' or 'x+y'. + * + * The lhs argument string will be appended to the current cmd string, + * followed by an operator, if applicable, followd by the rhs string, + * followed finally by a separator, if applicable. Before anything is + * added, the check_arg() function, if defined, is called. + * + * The cmd strings, operator, separator, and check_arg() function + * should be set using the dynevent_arg_pair_init() before any arguments + * are added using this function. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_arg_pair_add(struct dynevent_cmd *cmd, + struct dynevent_arg_pair *arg_pair) +{ + int ret = 0; + int delta; + char *q; + + if (arg_pair->check_arg) { + ret = arg_pair->check_arg(arg_pair); + if (ret) + return ret; + } + + q = cmd->buf + (cmd->maxlen - cmd->remaining); + + delta = snprintf(q, cmd->remaining, " %s%c", arg_pair->lhs, + arg_pair->operator); + if (delta >= cmd->remaining) { + pr_err("field string is too long: %s\n", arg_pair->lhs); + return -E2BIG; + } + cmd->remaining -= delta; q += delta; + + delta = snprintf(q, cmd->remaining, "%s%c", arg_pair->rhs, + arg_pair->separator); + if (delta >= cmd->remaining) { + pr_err("field string is too long: %s\n", arg_pair->rhs); + return -E2BIG; + } + cmd->remaining -= delta; q += delta; + + return ret; +} + +/** + * dynevent_str_add - Add a string to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @str: The string to append to the current cmd + * + * Append a string to a dynevent_cmd. The string will be appended to + * the current cmd string as-is, with nothing prepended or appended. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_str_add(struct dynevent_cmd *cmd, const char *str) +{ + int ret = 0; + int delta; + char *q; + + q = cmd->buf + (cmd->maxlen - cmd->remaining); + + delta = snprintf(q, cmd->remaining, "%s", str); + if (delta >= cmd->remaining) { + pr_err("String is too long: %s\n", str); + return -E2BIG; + } + cmd->remaining -= delta; + + return ret; +} + +/** + * dynevent_cmd_init - Initialize a dynevent_cmd object + * @cmd: A pointer to the dynevent_cmd struct representing the cmd + * @buf: A pointer to the buffer to generate the command into + * @maxlen: The length of the buffer the command will be generated into + * @type: The type of the cmd, checked against further operations + * @run_command: The type-specific function that will actually run the command + * + * Initialize a dynevent_cmd. A dynevent_cmd is used to build up and + * run dynamic event creation commands, such as commands for creating + * synthetic and kprobe events. Before calling any of the functions + * used to build the command, a dynevent_cmd object should be + * instantiated and initialized using this function. + * + * The initialization sets things up by saving a pointer to the + * user-supplied buffer and its length via the @buf and @maxlen + * params, and by saving the cmd-specific @type and @run_command + * params which are used to check subsequent dynevent_cmd operations + * and actually run the command when complete. + */ +void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, + enum dynevent_type type, + dynevent_create_fn_t run_command) +{ + memset(cmd, '\0', sizeof(*cmd)); + + cmd->buf = buf; + cmd->maxlen = maxlen; + cmd->remaining = cmd->maxlen; + cmd->type = type; + cmd->run_command = run_command; +} + +/** + * dynevent_arg_init - Initialize a dynevent_arg object + * @arg: A pointer to the dynevent_arg struct representing the arg + * @check_arg: An (optional) pointer to a function checking arg sanity + * @separator: An (optional) separator, appended after adding the arg + * + * Initialize a dynevent_arg object. A dynevent_arg represents an + * object used to append single arguments to the current command + * string. The @check_arg function, if present, will be used to check + * the sanity of the current arg string (which is directly set by the + * caller). After the arg string is successfully appended to the + * command string, the optional @separator is appended. If no + * separator was specified when initializing the arg, a space will be + * appended. + */ +void dynevent_arg_init(struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg, + char separator) +{ + memset(arg, '\0', sizeof(*arg)); + + if (!separator) + separator = ' '; + arg->separator = separator; + + arg->check_arg = check_arg; +} + +/** + * dynevent_arg_pair_init - Initialize a dynevent_arg_pair object + * @arg_pair: A pointer to the dynevent_arg_pair struct representing the arg + * @check_arg: An (optional) pointer to a function checking arg sanity + * @operator: An (optional) operator, appended after adding the first arg + * @separator: An (optional) separator, appended after adding the second arg + * + * Initialize a dynevent_arg_pair object. A dynevent_arg_pair + * represents an object used to append argument pairs such as 'type + * variable_name;' or 'x+y' to the current command string. An + * argument pair consists of a left-hand-side argument and a + * right-hand-side argument separated by an operator, which can be + * whitespace, all followed by a separator, if applicable. The + * @check_arg function, if present, will be used to check the sanity + * of the current arg strings (which is directly set by the caller). + * After the first arg string is successfully appended to the command + * string, the optional @operator is appended, followed by the second + * arg and and optional @separator. If no separator was specified + * when initializing the arg, a space will be appended. + */ +void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg, + char operator, char separator) +{ + memset(arg_pair, '\0', sizeof(*arg_pair)); + + if (!operator) + operator = ' '; + arg_pair->operator = operator; + + if (!separator) + separator = ' '; + arg_pair->separator = separator; + + arg_pair->check_arg = check_arg; +} + +/** + * dynevent_create - Create the dynamic event contained in dynevent_cmd + * @cmd: The dynevent_cmd object containing the dynamic event creation command + * + * Once a dynevent_cmd object has been successfully built up via the + * dynevent_cmd_init(), dynevent_arg_add() and dynevent_arg_pair_add() + * functions, this function runs the final command to actually create + * the event. + * + * Return: 0 if the event was successfully created, error otherwise. + */ +int dynevent_create(struct dynevent_cmd *cmd) +{ + return cmd->run_command(cmd); +} +EXPORT_SYMBOL_GPL(dynevent_create); diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 46898138d2df..b593fc34c5b1 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -117,4 +117,37 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); #define for_each_dyn_event_safe(pos, n) \ list_for_each_entry_safe(pos, n, &dyn_event_list, list) +extern void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, + enum dynevent_type type, + dynevent_create_fn_t run_command); + +typedef int (*dynevent_check_arg_fn_t)(void *data); + +struct dynevent_arg { + const char *str; + char separator; /* e.g. ';', ',', or nothing */ + dynevent_check_arg_fn_t check_arg; +}; + +extern void dynevent_arg_init(struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg, + char separator); +extern int dynevent_arg_add(struct dynevent_cmd *cmd, + struct dynevent_arg *arg); + +struct dynevent_arg_pair { + const char *lhs; + const char *rhs; + char operator; /* e.g. '=' or nothing */ + char separator; /* e.g. ';', ',', or nothing */ + dynevent_check_arg_fn_t check_arg; +}; + +extern void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg, + char operator, char separator); +extern int dynevent_arg_pair_add(struct dynevent_cmd *cmd, + struct dynevent_arg_pair *arg_pair); +extern int dynevent_str_add(struct dynevent_cmd *cmd, const char *str); + #endif From 35ca5207c2d111abb9e072f028945d5c12b20836 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:25 -0600 Subject: [PATCH 412/658] tracing: Add synthetic event command generation functions Add functions used to generate synthetic event commands, built on top of the dynevent_cmd interface. synth_event_gen_cmd_start() is used to create a synthetic event command using a variable arg list and synth_event_gen_cmd_array_start() does the same thing but using an array of field descriptors. synth_event_add_field(), synth_event_add_field_str() and synth_event_add_fields() can be used to add single fields one by one or as a group. Once all desired fields are added, synth_event_gen_cmd_end() is used to actually execute the command and create the event. synth_event_create() does everything, including creating the event, in a single call. Link: http://lkml.kernel.org/r/38fef702fad5ef208009f459552f34a94befd860.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 37 +++ kernel/trace/trace_events_hist.c | 379 ++++++++++++++++++++++++++++++- 2 files changed, 412 insertions(+), 4 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 651b03d5e272..07b83532a3c6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -357,6 +357,7 @@ extern void trace_put_event_file(struct trace_event_file *file); #define MAX_DYNEVENT_CMD_LEN (2048) enum dynevent_type { + DYNEVENT_TYPE_SYNTH = 1, DYNEVENT_TYPE_NONE, }; @@ -379,6 +380,42 @@ extern int dynevent_create(struct dynevent_cmd *cmd); extern int synth_event_delete(const char *name); +extern void synth_event_cmd_init(struct dynevent_cmd *cmd, + char *buf, int maxlen); + +extern int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, + const char *name, + struct module *mod, ...); + +#define synth_event_gen_cmd_start(cmd, name, mod, ...) \ + __synth_event_gen_cmd_start(cmd, name, mod, ## __VA_ARGS__, NULL) + +struct synth_field_desc { + const char *type; + const char *name; +}; + +extern int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, + const char *name, + struct module *mod, + struct synth_field_desc *fields, + unsigned int n_fields); +extern int synth_event_create(const char *name, + struct synth_field_desc *fields, + unsigned int n_fields, struct module *mod); + +extern int synth_event_add_field(struct dynevent_cmd *cmd, + const char *type, + const char *name); +extern int synth_event_add_field_str(struct dynevent_cmd *cmd, + const char *type_name); +extern int synth_event_add_fields(struct dynevent_cmd *cmd, + struct synth_field_desc *fields, + unsigned int n_fields); + +#define synth_event_gen_cmd_end(cmd) \ + dynevent_create(cmd) + /* * Event file flags: * ENABLED - The event is enabled diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 21e316732700..5a910bb193e9 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -379,7 +379,7 @@ struct hist_trigger_data { unsigned int n_save_var_str; }; -static int synth_event_create(int argc, const char **argv); +static int create_synth_event(int argc, const char **argv); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -387,7 +387,7 @@ static bool synth_event_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations synth_event_ops = { - .create = synth_event_create, + .create = create_synth_event, .show = synth_event_show, .is_busy = synth_event_is_busy, .free = synth_event_release, @@ -412,6 +412,7 @@ struct synth_event { struct trace_event_class class; struct trace_event_call call; struct tracepoint *tp; + struct module *mod; }; static bool is_synth_event(struct dyn_event *ev) @@ -1292,6 +1293,273 @@ struct hist_var_data { struct hist_trigger_data *hist_data; }; +static int synth_event_check_arg_fn(void *data) +{ + struct dynevent_arg_pair *arg_pair = data; + int size; + + size = synth_field_size((char *)arg_pair->lhs); + + return size ? 0 : -EINVAL; +} + +/** + * synth_event_add_field - Add a new field to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @type: The type of the new field to add + * @name: The name of the new field to add + * + * Add a new field to a synthetic event cmd object. Field ordering is in + * the same order the fields are added. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_field(struct dynevent_cmd *cmd, const char *type, + const char *name) +{ + struct dynevent_arg_pair arg_pair; + int ret; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (!type || !name) + return -EINVAL; + + dynevent_arg_pair_init(&arg_pair, synth_event_check_arg_fn, 0, ';'); + + arg_pair.lhs = type; + arg_pair.rhs = name; + + ret = dynevent_arg_pair_add(cmd, &arg_pair); + if (ret) + return ret; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_field); + +/** + * synth_event_add_field_str - Add a new field to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @type_name: The type and name of the new field to add, as a single string + * + * Add a new field to a synthetic event cmd object, as a single + * string. The @type_name string is expected to be of the form 'type + * name', which will be appended by ';'. No sanity checking is done - + * what's passed in is assumed to already be well-formed. Field + * ordering is in the same order the fields are added. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_field_str(struct dynevent_cmd *cmd, const char *type_name) +{ + struct dynevent_arg arg; + int ret; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (!type_name) + return -EINVAL; + + dynevent_arg_init(&arg, NULL, ';'); + + arg.str = type_name; + + ret = dynevent_arg_add(cmd, &arg); + if (ret) + return ret; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_field_str); + +/** + * synth_event_add_fields - Add multiple fields to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * + * Add a new set of fields to a synthetic event cmd object. The event + * fields that will be defined for the event should be passed in as an + * array of struct synth_field_desc, and the number of elements in the + * array passed in as n_fields. Field ordering will retain the + * ordering given in the fields array. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_fields(struct dynevent_cmd *cmd, + struct synth_field_desc *fields, + unsigned int n_fields) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < n_fields; i++) { + if (fields[i].type == NULL || fields[i].name == NULL) { + ret = -EINVAL; + break; + } + + ret = synth_event_add_field(cmd, fields[i].type, fields[i].name); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_fields); + +/** + * __synth_event_gen_cmd_start - Start a synthetic event command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the synthetic event + * @mod: The module creating the event, NULL if not created from a module + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the synth_event_gen_cmd_start() wrapper, which + * automatically adds a NULL to the end of the arg list. If this + * function is used directly, make sure the last arg in the variable + * arg list is NULL. + * + * Generate a synthetic event command to be executed by + * synth_event_gen_cmd_end(). This function can be used to generate + * the complete command or only the first part of it; in the latter + * case, synth_event_add_field(), synth_event_add_field_str(), or + * synth_event_add_fields() can be used to add more fields following + * this. + * + * There should be an even number variable args, each pair consisting + * of a type followed by a field name. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, const char *name, + struct module *mod, ...) +{ + struct dynevent_arg arg; + va_list args; + int ret; + + cmd->event_name = name; + cmd->private_data = mod; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + dynevent_arg_init(&arg, NULL, 0); + arg.str = name; + ret = dynevent_arg_add(cmd, &arg); + if (ret) + return ret; + + va_start(args, mod); + for (;;) { + const char *type, *name; + + type = va_arg(args, const char *); + if (!type) + break; + name = va_arg(args, const char *); + if (!name) + break; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) { + ret = -EINVAL; + break; + } + + ret = synth_event_add_field(cmd, type, name); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__synth_event_gen_cmd_start); + +/** + * synth_event_gen_cmd_array_start - Start synthetic event command from an array + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the synthetic event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * + * Generate a synthetic event command to be executed by + * synth_event_gen_cmd_end(). This function can be used to generate + * the complete command or only the first part of it; in the latter + * case, synth_event_add_field(), synth_event_add_field_str(), or + * synth_event_add_fields() can be used to add more fields following + * this. + * + * The event fields that will be defined for the event should be + * passed in as an array of struct synth_field_desc, and the number of + * elements in the array passed in as n_fields. Field ordering will + * retain the ordering given in the fields array. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, + struct module *mod, + struct synth_field_desc *fields, + unsigned int n_fields) +{ + struct dynevent_arg arg; + unsigned int i; + int ret = 0; + + cmd->event_name = name; + cmd->private_data = mod; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (n_fields > SYNTH_FIELDS_MAX) + return -EINVAL; + + dynevent_arg_init(&arg, NULL, 0); + arg.str = name; + ret = dynevent_arg_add(cmd, &arg); + if (ret) + return ret; + + for (i = 0; i < n_fields; i++) { + if (fields[i].type == NULL || fields[i].name == NULL) + return -EINVAL; + + ret = synth_event_add_field(cmd, fields[i].type, fields[i].name); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); + static int __create_synth_event(int argc, const char *name, const char **argv) { struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; @@ -1360,6 +1628,56 @@ static int __create_synth_event(int argc, const char *name, const char **argv) goto out; } +/** + * synth_event_create - Create a new synthetic event + * @name: The name of the new sythetic event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * @mod: The module creating the event, NULL if not created from a module + * + * Create a new synthetic event with the given name under the + * trace/events/synthetic/ directory. The event fields that will be + * defined for the event should be passed in as an array of struct + * synth_field_desc, and the number elements in the array passed in as + * n_fields. Field ordering will retain the ordering given in the + * fields array. + * + * If the new synthetic event is being created from a module, the mod + * param must be non-NULL. This will ensure that the trace buffer + * won't contain unreadable events. + * + * The new synth event should be deleted using synth_event_delete() + * function. The new synthetic event can be generated from modules or + * other kernel code using trace_synth_event() and related functions. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_create(const char *name, struct synth_field_desc *fields, + unsigned int n_fields, struct module *mod) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + ret = synth_event_gen_cmd_array_start(&cmd, name, mod, + fields, n_fields); + if (ret) + goto out; + + ret = synth_event_gen_cmd_end(&cmd); + out: + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_create); + static int destroy_synth_event(struct synth_event *se) { int ret; @@ -1388,14 +1706,33 @@ static int destroy_synth_event(struct synth_event *se) int synth_event_delete(const char *event_name) { struct synth_event *se = NULL; + struct module *mod = NULL; int ret = -ENOENT; mutex_lock(&event_mutex); se = find_synth_event(event_name); - if (se) + if (se) { + mod = se->mod; ret = destroy_synth_event(se); + } mutex_unlock(&event_mutex); + if (mod) { + mutex_lock(&trace_types_lock); + /* + * It is safest to reset the ring buffer if the module + * being unloaded registered any events that were + * used. The only worry is if a new module gets + * loaded, and takes on the same id as the events of + * this module. When printing out the buffer, traced + * events left over from this module may be passed to + * the new module events and unexpected results may + * occur. + */ + tracing_reset_all_online_cpus(); + mutex_unlock(&trace_types_lock); + } + return ret; } EXPORT_SYMBOL_GPL(synth_event_delete); @@ -1420,7 +1757,41 @@ int synth_event_run_command(const char *command) return trace_run_command(command, create_or_delete_synth_event); } -static int synth_event_create(int argc, const char **argv) +static int synth_event_run_cmd(struct dynevent_cmd *cmd) +{ + struct synth_event *se; + int ret; + + ret = trace_run_command(cmd->buf, create_or_delete_synth_event); + if (ret) + return ret; + + se = find_synth_event(cmd->event_name); + if (WARN_ON(!se)) + return -ENOENT; + + se->mod = cmd->private_data; + + return ret; +} + +/** + * synth_event_cmd_init - Initialize a synthetic event command object + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @buf: A pointer to the buffer used to build the command + * @maxlen: The length of the buffer passed in @buf + * + * Initialize a synthetic event command object. Use this before + * calling any of the other dyenvent_cmd functions. + */ +void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) +{ + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_SYNTH, + synth_event_run_cmd); +} +EXPORT_SYMBOL_GPL(synth_event_cmd_init); + +static int create_synth_event(int argc, const char **argv) { const char *name = argv[0]; int len; From 8dcc53ad956d2caf4c5c2dda196e6801b71a3154 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:27 -0600 Subject: [PATCH 413/658] tracing: Add synth_event_trace() and related functions Add an exported function named synth_event_trace(), allowing modules or other kernel code to trace synthetic events. Also added are several functions that allow the same functionality to be broken out in a piecewise fashion, which are useful in situations where tracing an event from a full array of values would be cumbersome. Those functions are synth_event_trace_start/end() and synth_event_add_(next)_val(). Link: http://lkml.kernel.org/r/7a84de5f1854acf4144b57efe835ca645afa764f.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 26 ++ kernel/trace/trace_events_hist.c | 463 +++++++++++++++++++++++++++++++ 2 files changed, 489 insertions(+) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 07b83532a3c6..bf03d12efb28 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -416,6 +416,32 @@ extern int synth_event_add_fields(struct dynevent_cmd *cmd, #define synth_event_gen_cmd_end(cmd) \ dynevent_create(cmd) +struct synth_event; + +struct synth_event_trace_state { + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int cur_field; + unsigned int n_u64; + bool enabled; + bool add_next; + bool add_name; +}; + +extern int synth_event_trace(struct trace_event_file *file, + unsigned int n_vals, ...); +extern int synth_event_trace_array(struct trace_event_file *file, u64 *vals, + unsigned int n_vals); +extern int synth_event_trace_start(struct trace_event_file *file, + struct synth_event_trace_state *trace_state); +extern int synth_event_add_next_val(u64 val, + struct synth_event_trace_state *trace_state); +extern int synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state); +extern int synth_event_trace_end(struct synth_event_trace_state *trace_state); + /* * Event file flags: * ENABLED - The event is enabled diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5a910bb193e9..4d56a4f0310d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -398,6 +398,7 @@ struct synth_field { char *type; char *name; size_t size; + unsigned int offset; bool is_signed; bool is_string; }; @@ -668,6 +669,8 @@ static int synth_event_define_fields(struct trace_event_call *call) if (ret) break; + event->fields[i]->offset = n_u64; + if (event->fields[i]->is_string) { offset += STR_VAR_LEN_MAX; n_u64 += STR_VAR_LEN_MAX / sizeof(u64); @@ -1791,6 +1794,466 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) } EXPORT_SYMBOL_GPL(synth_event_cmd_init); +/** + * synth_event_trace - Trace a synthetic event + * @file: The trace_event_file representing the synthetic event + * @n_vals: The number of values in vals + * @args: Variable number of args containing the event values + * + * Trace a synthetic event using the values passed in the variable + * argument list. + * + * The argument list should be a list 'n_vals' u64 values. The number + * of vals must match the number of field in the synthetic event, and + * must be in the same order as the synthetic event fields. + * + * All vals should be cast to u64, and string vals are just pointers + * to strings, cast to u64. Strings will be copied into space + * reserved in the event for the string, using these pointers. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) +{ + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; + va_list args; + int ret = 0; + + /* + * Normal event generation doesn't get called at all unless + * the ENABLED bit is set (which attaches the probe thus + * allowing this code to be called, etc). Because this is + * called directly by the user, we don't have that but we + * still need to honor not logging when disabled. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + return 0; + + event = file->event_call->data; + + if (n_vals != event->n_fields) + return -EINVAL; + + if (trace_trigger_soft_disabled(file)) + return -EINVAL; + + fields_size = event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + va_start(args, n_vals); + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + u64 val; + + val = va_arg(args, u64); + + if (event->fields[i]->is_string) { + char *str_val = (char *)(long)val; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + entry->fields[n_u64] = val; + n_u64++; + } + } + va_end(args); + + trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace); + +/** + * synth_event_trace_array - Trace a synthetic event from an array + * @file: The trace_event_file representing the synthetic event + * @vals: Array of values + * @n_vals: The number of values in vals + * + * Trace a synthetic event using the values passed in as 'vals'. + * + * The 'vals' array is just an array of 'n_vals' u64. The number of + * vals must match the number of field in the synthetic event, and + * must be in the same order as the synthetic event fields. + * + * All vals should be cast to u64, and string vals are just pointers + * to strings, cast to u64. Strings will be copied into space + * reserved in the event for the string, using these pointers. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_array(struct trace_event_file *file, u64 *vals, + unsigned int n_vals) +{ + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; + int ret = 0; + + /* + * Normal event generation doesn't get called at all unless + * the ENABLED bit is set (which attaches the probe thus + * allowing this code to be called, etc). Because this is + * called directly by the user, we don't have that but we + * still need to honor not logging when disabled. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + return 0; + + event = file->event_call->data; + + if (n_vals != event->n_fields) + return -EINVAL; + + if (trace_trigger_soft_disabled(file)) + return -EINVAL; + + fields_size = event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + if (event->fields[i]->is_string) { + char *str_val = (char *)(long)vals[i]; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + entry->fields[n_u64] = vals[i]; + n_u64++; + } + } + + trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace_array); + +/** + * synth_event_trace_start - Start piecewise synthetic event trace + * @file: The trace_event_file representing the synthetic event + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Start the trace of a synthetic event field-by-field rather than all + * at once. + * + * This function 'opens' an event trace, which means space is reserved + * for the event in the trace buffer, after which the event's + * individual field values can be set through either + * synth_event_add_next_val() or synth_event_add_val(). + * + * A pointer to a trace_state object is passed in, which will keep + * track of the current event trace state until the event trace is + * closed (and the event finally traced) using + * synth_event_trace_end(). + * + * Note that synth_event_trace_end() must be called after all values + * have been added for each event trace, regardless of whether adding + * all field values succeeded or not. + * + * Note also that for a given event trace, all fields must be added + * using either synth_event_add_next_val() or synth_event_add_val() + * but not both together or interleaved. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_start(struct trace_event_file *file, + struct synth_event_trace_state *trace_state) +{ + struct synth_trace_event *entry; + int fields_size = 0; + int ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + memset(trace_state, '\0', sizeof(*trace_state)); + + /* + * Normal event tracing doesn't get called at all unless the + * ENABLED bit is set (which attaches the probe thus allowing + * this code to be called, etc). Because this is called + * directly by the user, we don't have that but we still need + * to honor not logging when disabled. For the the iterated + * trace case, we save the enabed state upon start and just + * ignore the following data calls. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) { + trace_state->enabled = false; + goto out; + } + + trace_state->enabled = true; + + trace_state->event = file->event_call->data; + + if (trace_trigger_soft_disabled(file)) { + ret = -EINVAL; + goto out; + } + + fields_size = trace_state->event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + trace_state->buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(trace_state->buffer); + + entry = trace_event_buffer_reserve(&trace_state->fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + trace_state->entry = entry; +out: + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace_start); + +static int save_synth_val(struct synth_field *field, u64 val, + struct synth_event_trace_state *trace_state) +{ + struct synth_trace_event *entry = trace_state->entry; + + if (field->is_string) { + char *str_val = (char *)(long)val; + char *str_field; + + if (!str_val) + return -EINVAL; + + str_field = (char *)&entry->fields[field->offset]; + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + } else + entry->fields[field->offset] = val; + + return 0; +} + +/** + * synth_event_add_next_val - Add the next field's value to an open synth trace + * @val: The value to set the next field to + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Set the value of the next field in an event that's been opened by + * synth_event_trace_start(). + * + * The val param should be the value cast to u64. If the value points + * to a string, the val param should be a char * cast to u64. + * + * This function assumes all the fields in an event are to be set one + * after another - successive calls to this function are made, one for + * each field, in the order of the fields in the event, until all + * fields have been set. If you'd rather set each field individually + * without regard to ordering, synth_event_add_val() can be used + * instead. + * + * Note however that synth_event_add_next_val() and + * synth_event_add_val() can't be intermixed for a given event trace - + * one or the other but not both can be used at the same time. + * + * Note also that synth_event_trace_end() must be called after all + * values have been added for each event trace, regardless of whether + * adding all field values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_add_next_val(u64 val, + struct synth_event_trace_state *trace_state) +{ + struct synth_field *field; + struct synth_event *event; + int ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + /* can't mix add_next_synth_val() with add_synth_val() */ + if (trace_state->add_name) { + ret = -EINVAL; + goto out; + } + trace_state->add_next = true; + + if (!trace_state->enabled) + goto out; + + event = trace_state->event; + + if (trace_state->cur_field >= event->n_fields) { + ret = -EINVAL; + goto out; + } + + field = event->fields[trace_state->cur_field++]; + ret = save_synth_val(field, val, trace_state); + out: + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_next_val); + +static struct synth_field *find_synth_field(struct synth_event *event, + const char *field_name) +{ + struct synth_field *field = NULL; + unsigned int i; + + for (i = 0; i < event->n_fields; i++) { + field = event->fields[i]; + if (strcmp(field->name, field_name) == 0) + return field; + } + + return NULL; +} + +/** + * synth_event_add_val - Add a named field's value to an open synth trace + * @field_name: The name of the synthetic event field value to set + * @val: The value to set the next field to + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Set the value of the named field in an event that's been opened by + * synth_event_trace_start(). + * + * The val param should be the value cast to u64. If the value points + * to a string, the val param should be a char * cast to u64. + * + * This function looks up the field name, and if found, sets the field + * to the specified value. This lookup makes this function more + * expensive than synth_event_add_next_val(), so use that or the + * none-piecewise synth_event_trace() instead if efficiency is more + * important. + * + * Note however that synth_event_add_next_val() and + * synth_event_add_val() can't be intermixed for a given event trace - + * one or the other but not both can be used at the same time. + * + * Note also that synth_event_trace_end() must be called after all + * values have been added for each event trace, regardless of whether + * adding all field values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state) +{ + struct synth_trace_event *entry; + struct synth_event *event; + struct synth_field *field; + int ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + /* can't mix add_next_synth_val() with add_synth_val() */ + if (trace_state->add_next) { + ret = -EINVAL; + goto out; + } + trace_state->add_name = true; + + if (!trace_state->enabled) + goto out; + + event = trace_state->event; + entry = trace_state->entry; + + field = find_synth_field(event, field_name); + if (!field) { + ret = -EINVAL; + goto out; + } + + ret = save_synth_val(field, val, trace_state); + out: + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_val); + +/** + * synth_event_trace_end - End piecewise synthetic event trace + * @trace_state: A pointer to object tracking the piecewise trace state + * + * End the trace of a synthetic event opened by + * synth_event_trace__start(). + * + * This function 'closes' an event trace, which basically means that + * it commits the reserved event and cleans up other loose ends. + * + * A pointer to a trace_state object is passed in, which will keep + * track of the current event trace state opened with + * synth_event_trace_start(). + * + * Note that this function must be called after all values have been + * added for each event trace, regardless of whether adding all field + * values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_end(struct synth_event_trace_state *trace_state) +{ + if (!trace_state) + return -EINVAL; + + trace_event_buffer_commit(&trace_state->fbuffer); + + ring_buffer_nest_end(trace_state->buffer); + + return 0; +} +EXPORT_SYMBOL_GPL(synth_event_trace_end); + static int create_synth_event(int argc, const char **argv) { const char *name = argv[0]; From 9fe41efaca08416657efa8731c0d47ccb6a3f3eb Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:28 -0600 Subject: [PATCH 414/658] tracing: Add synth event generation test module Add a test module that checks the basic functionality of the in-kernel synthetic event generation API by generating and tracing synthetic events from a module. Link: http://lkml.kernel.org/r/fcb4dd9eb9eefb70ab20538d3529d51642389664.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 13 + kernel/trace/Makefile | 1 + kernel/trace/synth_event_gen_test.c | 523 ++++++++++++++++++++++++++++ 3 files changed, 537 insertions(+) create mode 100644 kernel/trace/synth_event_gen_test.c diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 75326d8ab1af..4f2041166a2f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -774,6 +774,19 @@ config PREEMPTIRQ_DELAY_TEST If unsure, say N +config SYNTH_EVENT_GEN_TEST + tristate "Test module for in-kernel synthetic event generation" + depends on HIST_TRIGGERS + help + This option creates a test module to check the base + functionality of in-kernel synthetic event definition and + generation. + + To test, insert the module, and then check the trace buffer + for the generated sample events. + + If unsure, say N. + config TRACE_EVAL_MAP_FILE bool "Show eval mappings for trace events" depends on TRACING diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 395e2db9c742..32012f50fb79 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o +obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += trace_preemptirq.o diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c new file mode 100644 index 000000000000..4aefe003cb7c --- /dev/null +++ b/kernel/trace/synth_event_gen_test.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module for in-kernel sythetic event creation and generation. + * + * Copyright (C) 2019 Tom Zanussi + */ + +#include +#include + +/* + * This module is a simple test of basic functionality for in-kernel + * synthetic event creation and generation, the first and second tests + * using synth_event_gen_cmd_start() and synth_event_add_field(), the + * third uses synth_event_create() to do it all at once with a static + * field array. + * + * Following that are a few examples using the created events to test + * various ways of tracing a synthetic event. + * + * To test, select CONFIG_SYNTH_EVENT_GEN_TEST and build the module. + * Then: + * + * # insmod kernel/trace/synth_event_gen_test.ko + * # cat /sys/kernel/debug/tracing/trace + * + * You should see several events in the trace buffer - + * "create_synth_test", "empty_synth_test", and several instances of + * "gen_synth_test". + * + * To remove the events, remove the module: + * + * # rmmod synth_event_gen_test + * + */ + +static struct trace_event_file *create_synth_test; +static struct trace_event_file *empty_synth_test; +static struct trace_event_file *gen_synth_test; + +/* + * Test to make sure we can create a synthetic event, then add more + * fields. + */ +static int __init test_gen_synth_cmd(void) +{ + struct dynevent_cmd cmd; + u64 vals[7]; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Create the empty gen_synth_test synthetic event with the + * first 4 fields. + */ + ret = synth_event_gen_cmd_start(&cmd, "gen_synth_test", THIS_MODULE, + "pid_t", "next_pid_field", + "char[16]", "next_comm_field", + "u64", "ts_ns", + "u64", "ts_ms"); + if (ret) + goto free; + + /* Use synth_event_add_field to add the rest of the fields */ + + ret = synth_event_add_field(&cmd, "unsigned int", "cpu"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[64]", "my_string_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "int", "my_int_field"); + if (ret) + goto free; + + ret = synth_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_synth_test event file. We need to prevent + * the instance and event from disappearing from underneath + * us, which trace_get_event_file() does (though in this case + * we're using the top-level instance which never goes away). + */ + gen_synth_test = trace_get_event_file(NULL, "synthetic", + "gen_synth_test"); + if (IS_ERR(gen_synth_test)) { + ret = PTR_ERR(gen_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", "gen_synth_test", true); + if (ret) { + trace_put_event_file(gen_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"hula hoops"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 598; /* my_int_field */ + + /* Now generate a gen_synth_test event */ + ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("gen_synth_test"); + free: + kfree(buf); + + goto out; +} + +/* + * Test to make sure we can create an initially empty synthetic event, + * then add all the fields. + */ +static int __init test_empty_synth_event(void) +{ + struct dynevent_cmd cmd; + u64 vals[7]; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Create the empty_synth_test synthetic event with no fields. + */ + ret = synth_event_gen_cmd_start(&cmd, "empty_synth_test", THIS_MODULE); + if (ret) + goto free; + + /* Use synth_event_add_field to add all of the fields */ + + ret = synth_event_add_field(&cmd, "pid_t", "next_pid_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[16]", "next_comm_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "u64", "ts_ns"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "u64", "ts_ms"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "unsigned int", "cpu"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[64]", "my_string_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "int", "my_int_field"); + if (ret) + goto free; + + /* All fields have been added, close and register the synth event */ + + ret = synth_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the empty_synth_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + empty_synth_test = trace_get_event_file(NULL, "synthetic", + "empty_synth_test"); + if (IS_ERR(empty_synth_test)) { + ret = PTR_ERR(empty_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", "empty_synth_test", true); + if (ret) { + trace_put_event_file(empty_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed_2.0"; /* my_string_field */ + vals[6] = 399; /* my_int_field */ + + /* Now trace an empty_synth_test event */ + ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("empty_synth_test"); + free: + kfree(buf); + + goto out; +} + +static struct synth_field_desc create_synth_test_fields[] = { + { .type = "pid_t", .name = "next_pid_field" }, + { .type = "char[16]", .name = "next_comm_field" }, + { .type = "u64", .name = "ts_ns" }, + { .type = "u64", .name = "ts_ms" }, + { .type = "unsigned int", .name = "cpu" }, + { .type = "char[64]", .name = "my_string_field" }, + { .type = "int", .name = "my_int_field" }, +}; + +/* + * Test synthetic event creation all at once from array of field + * descriptors. + */ +static int __init test_create_synth_event(void) +{ + u64 vals[7]; + int ret; + + /* Create the create_synth_test event with the fields above */ + ret = synth_event_create("create_synth_test", + create_synth_test_fields, + ARRAY_SIZE(create_synth_test_fields), + THIS_MODULE); + if (ret) + goto out; + + /* + * Now get the create_synth_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + create_synth_test = trace_get_event_file(NULL, "synthetic", + "create_synth_test"); + if (IS_ERR(create_synth_test)) { + ret = PTR_ERR(create_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(create_synth_test->tr, + "synthetic", "create_synth_test", true); + if (ret) { + trace_put_event_file(create_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 398; /* my_int_field */ + + /* Now generate a create_synth_test event */ + ret = synth_event_trace_array(create_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = synth_event_delete("create_synth_test"); + + goto out; +} + +/* + * Test tracing a synthetic event by reserving trace buffer space, + * then filling in fields one after another. + */ +static int __init test_add_next_synth_val(void) +{ + struct synth_event_trace_state trace_state; + int ret; + + /* Start by reserving space in the trace buffer */ + ret = synth_event_trace_start(gen_synth_test, &trace_state); + if (ret) + return ret; + + /* Write some bogus values into the trace buffer, one after another */ + + /* next_pid_field */ + ret = synth_event_add_next_val(777, &trace_state); + if (ret) + goto out; + + /* next_comm_field */ + ret = synth_event_add_next_val((u64)"slinky", &trace_state); + if (ret) + goto out; + + /* ts_ns */ + ret = synth_event_add_next_val(1000000, &trace_state); + if (ret) + goto out; + + /* ts_ms */ + ret = synth_event_add_next_val(1000, &trace_state); + if (ret) + goto out; + + /* cpu */ + ret = synth_event_add_next_val(smp_processor_id(), &trace_state); + if (ret) + goto out; + + /* my_string_field */ + ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state); + if (ret) + goto out; + + /* my_int_field */ + ret = synth_event_add_next_val(395, &trace_state); + out: + /* Finally, commit the event */ + ret = synth_event_trace_end(&trace_state); + + return ret; +} + +/* + * Test tracing a synthetic event by reserving trace buffer space, + * then filling in fields using field names, which can be done in any + * order. + */ +static int __init test_add_synth_val(void) +{ + struct synth_event_trace_state trace_state; + int ret; + + /* Start by reserving space in the trace buffer */ + ret = synth_event_trace_start(gen_synth_test, &trace_state); + if (ret) + return ret; + + /* Write some bogus values into the trace buffer, using field names */ + + ret = synth_event_add_val("ts_ns", 1000000, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("ts_ms", 1000, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("next_pid_field", 777, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("next_comm_field", (u64)"silly putty", + &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("my_string_field", (u64)"thneed_9", + &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("my_int_field", 3999, &trace_state); + out: + /* Finally, commit the event */ + ret = synth_event_trace_end(&trace_state); + + return ret; +} + +/* + * Test tracing a synthetic event all at once from array of values. + */ +static int __init test_trace_synth_event(void) +{ + int ret; + + /* Trace some bogus values just for testing */ + ret = synth_event_trace(create_synth_test, 7, /* number of values */ + 444, /* next_pid_field */ + (u64)"clackers", /* next_comm_field */ + 1000000, /* ts_ns */ + 1000, /* ts_ms */ + smp_processor_id(), /* cpu */ + (u64)"Thneed", /* my_string_field */ + 999); /* my_int_field */ + return ret; +} + +static int __init synth_event_gen_test_init(void) +{ + int ret; + + ret = test_gen_synth_cmd(); + if (ret) + return ret; + + ret = test_empty_synth_event(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + trace_put_event_file(gen_synth_test); + WARN_ON(synth_event_delete("gen_synth_test")); + goto out; + } + + ret = test_create_synth_event(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + trace_put_event_file(gen_synth_test); + WARN_ON(synth_event_delete("gen_synth_test")); + + WARN_ON(trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false)); + trace_put_event_file(empty_synth_test); + WARN_ON(synth_event_delete("empty_synth_test")); + goto out; + } + + ret = test_add_next_synth_val(); + WARN_ON(ret); + + ret = test_add_synth_val(); + WARN_ON(ret); + + ret = test_trace_synth_event(); + WARN_ON(ret); + out: + return ret; +} + +static void __exit synth_event_gen_test_exit(void) +{ + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("gen_synth_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(empty_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("empty_synth_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(create_synth_test->tr, + "synthetic", + "create_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(create_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("create_synth_test")); +} + +module_init(synth_event_gen_test_init) +module_exit(synth_event_gen_test_exit) + +MODULE_AUTHOR("Tom Zanussi"); +MODULE_DESCRIPTION("synthetic event generation test"); +MODULE_LICENSE("GPL v2"); From 2a588dd1d5d649a183a2ff6fa1b80e870cf821d8 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:29 -0600 Subject: [PATCH 415/658] tracing: Add kprobe event command generation functions Add functions used to generate kprobe event commands, built on top of the dynevent_cmd interface. kprobe_event_gen_cmd_start() is used to create a kprobe event command using a variable arg list, and kretprobe_event_gen_cmd_start() does the same for kretprobe event commands. kprobe_event_add_fields() can be used to add single fields one by one or as a group. Once all desired fields are added, kprobe_event_gen_cmd_end() or kretprobe_event_gen_cmd_end() respectively are used to actually execute the command and create the event. Link: http://lkml.kernel.org/r/95cc4696502bb6017f9126f306a45ad19b4cc14f.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 31 +++++++ kernel/trace/trace_kprobe.c | 161 +++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index bf03d12efb28..7c307a7c9c6a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -358,6 +358,7 @@ extern void trace_put_event_file(struct trace_event_file *file); enum dynevent_type { DYNEVENT_TYPE_SYNTH = 1, + DYNEVENT_TYPE_KPROBE, DYNEVENT_TYPE_NONE, }; @@ -442,6 +443,36 @@ extern int synth_event_add_val(const char *field_name, u64 val, struct synth_event_trace_state *trace_state); extern int synth_event_trace_end(struct synth_event_trace_state *trace_state); +extern int kprobe_event_delete(const char *name); + +extern void kprobe_event_cmd_init(struct dynevent_cmd *cmd, + char *buf, int maxlen); + +#define kprobe_event_gen_cmd_start(cmd, name, loc, ...) \ + __kprobe_event_gen_cmd_start(cmd, false, name, loc, ## __VA_ARGS__, NULL) + +#define kretprobe_event_gen_cmd_start(cmd, name, loc, ...) \ + __kprobe_event_gen_cmd_start(cmd, true, name, loc, ## __VA_ARGS__, NULL) + +extern int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, + bool kretprobe, + const char *name, + const char *loc, ...); + +#define kprobe_event_add_fields(cmd, ...) \ + __kprobe_event_add_fields(cmd, ## __VA_ARGS__, NULL) + +#define kprobe_event_add_field(cmd, field) \ + __kprobe_event_add_fields(cmd, field, NULL) + +extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); + +#define kprobe_event_gen_cmd_end(cmd) \ + dynevent_create(cmd) + +#define kretprobe_event_gen_cmd_end(cmd) \ + dynevent_create(cmd) + /* * Event file flags: * ENABLED - The event is enabled diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index bf20cd7f2666..f43548b466d0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -906,6 +906,167 @@ int trace_kprobe_run_command(const char *command) return trace_run_command(command, create_or_delete_trace_kprobe); } +static int trace_kprobe_run_cmd(struct dynevent_cmd *cmd) +{ + return trace_run_command(cmd->buf, create_or_delete_trace_kprobe); +} + +/** + * kprobe_event_cmd_init - Initialize a kprobe event command object + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @buf: A pointer to the buffer used to build the command + * @maxlen: The length of the buffer passed in @buf + * + * Initialize a synthetic event command object. Use this before + * calling any of the other kprobe_event functions. + */ +void kprobe_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) +{ + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_KPROBE, + trace_kprobe_run_cmd); +} +EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); + +/** + * __kprobe_event_gen_cmd_start - Generate a kprobe event command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the kprobe event + * @loc: The location of the kprobe event + * @kretprobe: Is this a return probe? + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the kprobe_event_gen_cmd_start() wrapper, which automatically + * adds a NULL to the end of the arg list. If this function is used + * directly, make sure the last arg in the variable arg list is NULL. + * + * Generate a kprobe event command to be executed by + * kprobe_event_gen_cmd_end(). This function can be used to generate the + * complete command or only the first part of it; in the latter case, + * kprobe_event_add_fields() can be used to add more fields following this. + * + * Return: 0 if successful, error otherwise. + */ +int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, + const char *name, const char *loc, ...) +{ + char buf[MAX_EVENT_NAME_LEN]; + struct dynevent_arg arg; + va_list args; + int ret; + + if (cmd->type != DYNEVENT_TYPE_KPROBE) + return -EINVAL; + + if (kretprobe) + snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name); + else + snprintf(buf, MAX_EVENT_NAME_LEN, "p:kprobes/%s", name); + + ret = dynevent_str_add(cmd, buf); + if (ret) + return ret; + + dynevent_arg_init(&arg, NULL, 0); + arg.str = loc; + ret = dynevent_arg_add(cmd, &arg); + if (ret) + return ret; + + va_start(args, loc); + for (;;) { + const char *field; + + field = va_arg(args, const char *); + if (!field) + break; + + if (++cmd->n_fields > MAX_TRACE_ARGS) { + ret = -EINVAL; + break; + } + + arg.str = field; + ret = dynevent_arg_add(cmd, &arg); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__kprobe_event_gen_cmd_start); + +/** + * __kprobe_event_add_fields - Add probe fields to a kprobe command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the kprobe_event_add_fields() wrapper, which + * automatically adds a NULL to the end of the arg list. If this + * function is used directly, make sure the last arg in the variable + * arg list is NULL. + * + * Add probe fields to an existing kprobe command using a variable + * list of args. Fields are added in the same order they're listed. + * + * Return: 0 if successful, error otherwise. + */ +int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...) +{ + struct dynevent_arg arg; + va_list args; + int ret; + + if (cmd->type != DYNEVENT_TYPE_KPROBE) + return -EINVAL; + + dynevent_arg_init(&arg, NULL, 0); + + va_start(args, cmd); + for (;;) { + const char *field; + + field = va_arg(args, const char *); + if (!field) + break; + + if (++cmd->n_fields > MAX_TRACE_ARGS) { + ret = -EINVAL; + break; + } + + arg.str = field; + ret = dynevent_arg_add(cmd, &arg); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__kprobe_event_add_fields); + +/** + * kprobe_event_delete - Delete a kprobe event + * @name: The name of the kprobe event to delete + * + * Delete a kprobe event with the give @name from kernel code rather + * than directly from the command line. + * + * Return: 0 if successful, error otherwise. + */ +int kprobe_event_delete(const char *name) +{ + char buf[MAX_EVENT_NAME_LEN]; + + snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); + + return trace_run_command(buf, create_or_delete_trace_kprobe); +} +EXPORT_SYMBOL_GPL(kprobe_event_delete); + static int trace_kprobe_release(struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); From 29a15481054681fa2d450b60a6feea8e6ca6f511 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:30 -0600 Subject: [PATCH 416/658] tracing: Change trace_boot to use kprobe_event interface Have trace_boot_add_kprobe_event() use the kprobe_event interface. Also, rename kprobe_event_run_cmd() to kprobe_event_run_command() now that trace_boot's version is gone. Link: http://lkml.kernel.org/r/af5429d11291ab1e9a85a0ff944af3b2bcf193c7.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 35 +++++++++++++++-------------------- kernel/trace/trace_kprobe.c | 9 ++------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 4d37bf5c3742..2298a70cdda6 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -88,37 +88,32 @@ trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) } #ifdef CONFIG_KPROBE_EVENTS -extern int trace_kprobe_run_command(const char *command); - static int __init trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) { + struct dynevent_cmd cmd; struct xbc_node *anode; char buf[MAX_BUF_LEN]; const char *val; - char *p; - int len; + int ret; - len = snprintf(buf, ARRAY_SIZE(buf) - 1, "p:kprobes/%s ", event); - if (len >= ARRAY_SIZE(buf)) { - pr_err("Event name is too long: %s\n", event); - return -E2BIG; - } - p = buf + len; - len = ARRAY_SIZE(buf) - len; + kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + + ret = kprobe_event_gen_cmd_start(&cmd, event, NULL); + if (ret) + return ret; xbc_node_for_each_array_value(node, "probes", anode, val) { - if (strlcpy(p, val, len) >= len) { - pr_err("Probe definition is too long: %s\n", val); - return -E2BIG; - } - if (trace_kprobe_run_command(buf) < 0) { - pr_err("Failed to add probe: %s\n", buf); - return -EINVAL; - } + ret = kprobe_event_add_field(&cmd, val); + if (ret) + return ret; } - return 0; + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + pr_err("Failed to add probe: %s\n", buf); + + return ret; } #else static inline int __init diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f43548b466d0..307abb724a71 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -901,12 +901,7 @@ static int create_or_delete_trace_kprobe(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } -int trace_kprobe_run_command(const char *command) -{ - return trace_run_command(command, create_or_delete_trace_kprobe); -} - -static int trace_kprobe_run_cmd(struct dynevent_cmd *cmd) +static int trace_kprobe_run_command(struct dynevent_cmd *cmd) { return trace_run_command(cmd->buf, create_or_delete_trace_kprobe); } @@ -923,7 +918,7 @@ static int trace_kprobe_run_cmd(struct dynevent_cmd *cmd) void kprobe_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) { dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_KPROBE, - trace_kprobe_run_cmd); + trace_kprobe_run_command); } EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); From 64836248dda20c8e7427b493f7e06d9bf8f58850 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:31 -0600 Subject: [PATCH 417/658] tracing: Add kprobe event command generation test module Add a test module that checks the basic functionality of the in-kernel kprobe event command generation API by creating kprobe events from a module. Link: http://lkml.kernel.org/r/97e502b204f9dba948e3fa3a4315448298218787.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 12 ++ kernel/trace/Makefile | 1 + kernel/trace/kprobe_event_gen_test.c | 225 +++++++++++++++++++++++++++ 3 files changed, 238 insertions(+) create mode 100644 kernel/trace/kprobe_event_gen_test.c diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4f2041166a2f..4484e783f68d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -787,6 +787,18 @@ config SYNTH_EVENT_GEN_TEST If unsure, say N. +config KPROBE_EVENT_GEN_TEST + tristate "Test module for in-kernel kprobe event generation" + depends on KPROBE_EVENTS + help + This option creates a test module to check the base + functionality of in-kernel kprobe event definition. + + To test, insert the module, and then check the trace buffer + for the generated kprobe events. + + If unsure, say N. + config TRACE_EVAL_MAP_FILE bool "Show eval mappings for trace events" depends on TRACING diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 32012f50fb79..f9dcd19165fa 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o +obj-$(CONFIG_KPROBE_EVENT_GEN_TEST) += kprobe_event_gen_test.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += trace_preemptirq.o diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c new file mode 100644 index 000000000000..18b0f1cbb947 --- /dev/null +++ b/kernel/trace/kprobe_event_gen_test.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module for in-kernel kprobe event creation and generation. + * + * Copyright (C) 2019 Tom Zanussi + */ + +#include +#include + +/* + * This module is a simple test of basic functionality for in-kernel + * kprobe/kretprobe event creation. The first test uses + * kprobe_event_gen_cmd_start(), kprobe_event_add_fields() and + * kprobe_event_gen_cmd_end() to create a kprobe event, which is then + * enabled in order to generate trace output. The second creates a + * kretprobe event using kretprobe_event_gen_cmd_start() and + * kretprobe_event_gen_cmd_end(), and is also then enabled. + * + * To test, select CONFIG_KPROBE_EVENT_GEN_TEST and build the module. + * Then: + * + * # insmod kernel/trace/kprobe_event_gen_test.ko + * # cat /sys/kernel/debug/tracing/trace + * + * You should see many instances of the "gen_kprobe_test" and + * "gen_kretprobe_test" events in the trace buffer. + * + * To remove the events, remove the module: + * + * # rmmod kprobe_event_gen_test + * + */ + +static struct trace_event_file *gen_kprobe_test; +static struct trace_event_file *gen_kretprobe_test; + +/* + * Test to make sure we can create a kprobe event, then add more + * fields. + */ +static int __init test_gen_kprobe_cmd(void) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the gen_kprobe_test event with the first 2 kprobe + * fields. + */ + ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", + "do_sys_open", + "dfd=%ax", "filename=%dx"); + if (ret) + goto free; + + /* Use kprobe_event_add_fields to add the rest of the fields */ + + ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); + if (ret) + goto free; + + /* + * This actually creates the event. + */ + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_kprobe_test event file. We need to prevent + * the instance and event from disappearing from underneath + * us, which trace_get_event_file() does (though in this case + * we're using the top-level instance which never goes away). + */ + gen_kprobe_test = trace_get_event_file(NULL, "kprobes", + "gen_kprobe_test"); + if (IS_ERR(gen_kprobe_test)) { + ret = PTR_ERR(gen_kprobe_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", "gen_kprobe_test", true); + if (ret) { + trace_put_event_file(gen_kprobe_test); + goto delete; + } + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kprobe_test"); + free: + kfree(buf); + + goto out; +} + +/* + * Test to make sure we can create a kretprobe event. + */ +static int __init test_gen_kretprobe_cmd(void) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the kretprobe event. + */ + ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", + "do_sys_open", + "$retval"); + if (ret) + goto free; + + /* + * This actually creates the event. + */ + ret = kretprobe_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_kretprobe_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + gen_kretprobe_test = trace_get_event_file(NULL, "kprobes", + "gen_kretprobe_test"); + if (IS_ERR(gen_kretprobe_test)) { + ret = PTR_ERR(gen_kretprobe_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", "gen_kretprobe_test", true); + if (ret) { + trace_put_event_file(gen_kretprobe_test); + goto delete; + } + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kretprobe_test"); + free: + kfree(buf); + + goto out; +} + +static int __init kprobe_event_gen_test_init(void) +{ + int ret; + + ret = test_gen_kprobe_cmd(); + if (ret) + return ret; + + ret = test_gen_kretprobe_cmd(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + trace_put_event_file(gen_kretprobe_test); + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); + } + + return ret; +} + +static void __exit kprobe_event_gen_test_exit(void) +{ + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kprobe_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kretprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); +} + +module_init(kprobe_event_gen_test_init) +module_exit(kprobe_event_gen_test_exit) + +MODULE_AUTHOR("Tom Zanussi"); +MODULE_DESCRIPTION("kprobe event generation test"); +MODULE_LICENSE("GPL v2"); From 34ed63573b664f984a4b84df02a9ecdfaa3a6034 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:32 -0600 Subject: [PATCH 418/658] tracing: Documentation for in-kernel synthetic event API Add Documentation for creating and generating synthetic events from modules. Link: http://lkml.kernel.org/r/734bf8789ff8700000c9acde61a553427910ddb5.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/events.rst | 515 +++++++++++++++++++++++++++++++++ 1 file changed, 515 insertions(+) diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index f7e1fcc0953c..ed79b220bd07 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst @@ -525,3 +525,518 @@ The following commands are supported: event counts (hitcount). See Documentation/trace/histogram.rst for details and examples. + +6.3 In-kernel trace event API +----------------------------- + +In most cases, the command-line interface to trace events is more than +sufficient. Sometimes, however, applications might find the need for +more complex relationships than can be expressed through a simple +series of linked command-line expressions, or putting together sets of +commands may be simply too cumbersome. An example might be an +application that needs to 'listen' to the trace stream in order to +maintain an in-kernel state machine detecting, for instance, when an +illegal kernel state occurs in the scheduler. + +The trace event subsystem provides an in-kernel API allowing modules +or other kernel code to generate user-defined 'synthetic' events at +will, which can be used to either augment the existing trace stream +and/or signal that a particular important state has occurred. + +A similar in-kernel API is also available for creating kprobe and +kretprobe events. + +Both the synthetic event and k/ret/probe event APIs are built on top +of a lower-level "dynevent_cmd" event command API, which is also +available for more specialized applications, or as the basis of other +higher-level trace event APIs. + +The API provided for these purposes is describe below and allows the +following: + + - dynamically creating synthetic event definitions + - dynamically creating kprobe and kretprobe event definitions + - tracing synthetic events from in-kernel code + - the low-level "dynevent_cmd" API + +6.3.1 Dyamically creating synthetic event definitions +----------------------------------------------------- + +There are a couple ways to create a new synthetic event from a kernel +module or other kernel code. + +The first creates the event in one step, using synth_event_create(). +In this method, the name of the event to create and an array defining +the fields is supplied to synth_event_create(). If successful, a +synthetic event with that name and fields will exist following that +call. For example, to create a new "schedtest" synthetic event: + + ret = synth_event_create("schedtest", sched_fields, + ARRAY_SIZE(sched_fields), THIS_MODULE); + +The sched_fields param in this example points to an array of struct +synth_field_desc, each of which describes an event field by type and +name: + + static struct synth_field_desc sched_fields[] = { + { .type = "pid_t", .name = "next_pid_field" }, + { .type = "char[16]", .name = "next_comm_field" }, + { .type = "u64", .name = "ts_ns" }, + { .type = "u64", .name = "ts_ms" }, + { .type = "unsigned int", .name = "cpu" }, + { .type = "char[64]", .name = "my_string_field" }, + { .type = "int", .name = "my_int_field" }, + }; + +See synth_field_size() for available types. If field_name contains [n] +the field is considered to be an array. + +If the event is created from within a module, a pointer to the module +must be passed to synth_event_create(). This will ensure that the +trace buffer won't contain unreadable events when the module is +removed. + +At this point, the event object is ready to be used for generating new +events. + +In the second method, the event is created in several steps. This +allows events to be created dynamically and without the need to create +and populate an array of fields beforehand. + +To use this method, an empty or partially empty synthetic event should +first be created using synth_event_gen_cmd_start() or +synth_event_gen_cmd_array_start(). For synth_event_gen_cmd_start(), +the name of the event along with one or more pairs of args each pair +representing a 'type field_name;' field specification should be +supplied. For synth_event_gen_cmd_array_start(), the name of the +event along with an array of struct synth_field_desc should be +supplied. Before calling synth_event_gen_cmd_start() or +synth_event_gen_cmd_array_start(), the user should create and +initialize a dynevent_cmd object using synth_event_cmd_init(). + +For example, to create a new "schedtest" synthetic event with two +fields: + + struct dynevent_cmd cmd; + char *buf; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + ret = synth_event_gen_cmd_start(&cmd, "schedtest", THIS_MODULE, + "pid_t", "next_pid_field", + "u64", "ts_ns"); + +Alternatively, using an array of struct synth_field_desc fields +containing the same information: + + ret = synth_event_gen_cmd_array_start(&cmd, "schedtest", THIS_MODULE, + fields, n_fields); + +Once the synthetic event object has been created, it can then be +populated with more fields. Fields are added one by one using +synth_event_add_field(), supplying the dynevent_cmd object, a field +type, and a field name. For example, to add a new int field named +"intfield", the following call should be made: + + ret = synth_event_add_field(&cmd, "int", "intfield"); + +See synth_field_size() for available types. If field_name contains [n] +the field is considered to be an array. + +A group of fields can also be added all at once using an array of +synth_field_desc with add_synth_fields(). For example, this would add +just the first four sched_fields: + + ret = synth_event_add_fields(&cmd, sched_fields, 4); + +If you already have a string of the form 'type field_name', +synth_event_add_field_str() can be used to add it as-is; it will +also automatically append a ';' to the string. + +Once all the fields have been added, the event should be finalized and +registered by calling the synth_event_gen_cmd_end() function: + + ret = synth_event_gen_cmd_end(&cmd); + +At this point, the event object is ready to be used for tracing new +events. + +6.3.3 Tracing synthetic events from in-kernel code +-------------------------------------------------- + +To trace a synthetic event, there are several options. The first +option is to trace the event in one call, using synth_event_trace() +with a variable number of values, or synth_event_trace_array() with an +array of values to be set. A second option can be used to avoid the +need for a pre-formed array of values or list of arguments, via +synth_event_trace_start() and synth_event_trace_end() along with +synth_event_add_next_val() or synth_event_add_val() to add the values +piecewise. + +6.3.3.1 Tracing a synthetic event all at once +--------------------------------------------- + +To trace a synthetic event all at once, the synth_event_trace() or +synth_event_trace_array() functions can be used. + +The synth_event_trace() function is passed the trace_event_file +representing the synthetic event (which can be retrieved using +trace_get_event_file() using the synthetic event name, "synthetic" as +the system name, and the trace instance name (NULL if using the global +trace array)), along with an variable number of u64 args, one for each +synthetic event field, and the number of values being passed. + +So, to trace an event corresponding to the synthetic event definition +above, code like the following could be used: + + ret = synth_event_trace(create_synth_test, 7, /* number of values */ + 444, /* next_pid_field */ + (u64)"clackers", /* next_comm_field */ + 1000000, /* ts_ns */ + 1000, /* ts_ms */ + smp_processor_id(),/* cpu */ + (u64)"Thneed", /* my_string_field */ + 999); /* my_int_field */ + +All vals should be cast to u64, and string vals are just pointers to +strings, cast to u64. Strings will be copied into space reserved in +the event for the string, using these pointers. + +Alternatively, the synth_event_trace_array() function can be used to +accomplish the same thing. It is passed the trace_event_file +representing the synthetic event (which can be retrieved using +trace_get_event_file() using the synthetic event name, "synthetic" as +the system name, and the trace instance name (NULL if using the global +trace array)), along with an array of u64, one for each synthetic +event field. + +To trace an event corresponding to the synthetic event definition +above, code like the following could be used: + + u64 vals[7]; + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 398; /* my_int_field */ + +The 'vals' array is just an array of u64, the number of which must +match the number of field in the synthetic event, and which must be in +the same order as the synthetic event fields. + +All vals should be cast to u64, and string vals are just pointers to +strings, cast to u64. Strings will be copied into space reserved in +the event for the string, using these pointers. + +In order to trace a synthetic event, a pointer to the trace event file +is needed. The trace_get_event_file() function can be used to get +it - it will find the file in the given trace instance (in this case +NULL since the top trace array is being used) while at the same time +preventing the instance containing it from going away: + + schedtest_event_file = trace_get_event_file(NULL, "synthetic", + "schedtest"); + +Before tracing the event, it should be enabled in some way, otherwise +the synthetic event won't actually show up in the trace buffer. + +To enable a synthetic event from the kernel, trace_array_set_clr_event() +can be used (which is not specific to synthetic events, so does need +the "synthetic" system name to be specified explicitly). + +To enable the event, pass 'true' to it: + + trace_array_set_clr_event(schedtest_event_file->tr, + "synthetic", "schedtest", true); + +To disable it pass false: + + trace_array_set_clr_event(schedtest_event_file->tr, + "synthetic", "schedtest", false); + +Finally, synth_event_trace_array() can be used to actually trace the +event, which should be visible in the trace buffer afterwards: + + ret = synth_event_trace_array(schedtest_event_file, vals, + ARRAY_SIZE(vals)); + +To remove the synthetic event, the event should be disabled, and the +trace instance should be 'put' back using trace_put_event_file(): + + trace_array_set_clr_event(schedtest_event_file->tr, + "synthetic", "schedtest", false); + trace_put_event_file(schedtest_event_file); + +If those have been successful, synth_event_delete() can be called to +remove the event: + + ret = synth_event_delete("schedtest"); + +6.3.3.1 Tracing a synthetic event piecewise +------------------------------------------- + +To trace a synthetic using the piecewise method described above, the +synth_event_trace_start() function is used to 'open' the synthetic +event trace: + + struct synth_trace_state trace_state; + + ret = synth_event_trace_start(schedtest_event_file, &trace_state); + +It's passed the trace_event_file representing the synthetic event +using the same methods as described above, along with a pointer to a +struct synth_trace_state object, which will be zeroed before use and +used to maintain state between this and following calls. + +Once the event has been opened, which means space for it has been +reserved in the trace buffer, the individual fields can be set. There +are two ways to do that, either one after another for each field in +the event, which requires no lookups, or by name, which does. The +tradeoff is flexibility in doing the assignments vs the cost of a +lookup per field. + +To assign the values one after the other without lookups, +synth_event_add_next_val() should be used. Each call is passed the +same synth_trace_state object used in the synth_event_trace_start(), +along with the value to set the next field in the event. After each +field is set, the 'cursor' points to the next field, which will be set +by the subsequent call, continuing until all the fields have been set +in order. The same sequence of calls as in the above examples using +this method would be (without error-handling code): + + /* next_pid_field */ + ret = synth_event_add_next_val(777, &trace_state); + + /* next_comm_field */ + ret = synth_event_add_next_val((u64)"slinky", &trace_state); + + /* ts_ns */ + ret = synth_event_add_next_val(1000000, &trace_state); + + /* ts_ms */ + ret = synth_event_add_next_val(1000, &trace_state); + + /* cpu */ + ret = synth_event_add_next_val(smp_processor_id(), &trace_state); + + /* my_string_field */ + ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state); + + /* my_int_field */ + ret = synth_event_add_next_val(395, &trace_state); + +To assign the values in any order, synth_event_add_val() should be +used. Each call is passed the same synth_trace_state object used in +the synth_event_trace_start(), along with the field name of the field +to set and the value to set it to. The same sequence of calls as in +the above examples using this method would be (without error-handling +code): + + ret = synth_event_add_val("next_pid_field", 777, &trace_state); + ret = synth_event_add_val("next_comm_field", (u64)"silly putty", + &trace_state); + ret = synth_event_add_val("ts_ns", 1000000, &trace_state); + ret = synth_event_add_val("ts_ms", 1000, &trace_state); + ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state); + ret = synth_event_add_val("my_string_field", (u64)"thneed_9", + &trace_state); + ret = synth_event_add_val("my_int_field", 3999, &trace_state); + +Note that synth_event_add_next_val() and synth_event_add_val() are +incompatible if used within the same trace of an event - either one +can be used but not both at the same time. + +Finally, the event won't be actually traced until it's 'closed', +which is done using synth_event_trace_end(), which takes only the +struct synth_trace_state object used in the previous calls: + + ret = synth_event_trace_end(&trace_state); + +Note that synth_event_trace_end() must be called at the end regardless +of whether any of the add calls failed (say due to a bad field name +being passed in). + +6.3.4 Dyamically creating kprobe and kretprobe event definitions +---------------------------------------------------------------- + +To create a kprobe or kretprobe trace event from kernel code, the +kprobe_event_gen_cmd_start() or kretprobe_event_gen_cmd_start() +functions can be used. + +To create a kprobe event, an empty or partially empty kprobe event +should first be created using kprobe_event_gen_cmd_start(). The name +of the event and the probe location should be specfied along with one +or args each representing a probe field should be supplied to this +function. Before calling kprobe_event_gen_cmd_start(), the user +should create and initialize a dynevent_cmd object using +kprobe_event_cmd_init(). + +For example, to create a new "schedtest" kprobe event with two fields: + + struct dynevent_cmd cmd; + char *buf; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the gen_kprobe_test event with the first 2 kprobe + * fields. + */ + ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", "do_sys_open", + "dfd=%ax", "filename=%dx"); + +Once the kprobe event object has been created, it can then be +populated with more fields. Fields can be added using +kprobe_event_add_fields(), supplying the dynevent_cmd object along +with a variable arg list of probe fields. For example, to add a +couple additional fields, the following call could be made: + + ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); + +Once all the fields have been added, the event should be finalized and +registered by calling the kprobe_event_gen_cmd_end() or +kretprobe_event_gen_cmd_end() functions, depending on whether a kprobe +or kretprobe command was started: + + ret = kprobe_event_gen_cmd_end(&cmd); + +or + + ret = kretprobe_event_gen_cmd_end(&cmd); + +At this point, the event object is ready to be used for tracing new +events. + +Similarly, a kretprobe event can be created using +kretprobe_event_gen_cmd_start() with a probe name and location and +additional params such as $retval: + + ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", + "do_sys_open", "$retval"); + +Similar to the synthetic event case, code like the following can be +used to enable the newly created kprobe event: + + gen_kprobe_test = trace_get_event_file(NULL, "kprobes", "gen_kprobe_test"); + + ret = trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", "gen_kprobe_test", true); + +Finally, also similar to synthetic events, the following code can be +used to give the kprobe event file back and delete the event: + + trace_put_event_file(gen_kprobe_test); + + ret = kprobe_event_delete("gen_kprobe_test"); + +6.3.4 The "dynevent_cmd" low-level API +-------------------------------------- + +Both the in-kernel synthetic event and kprobe interfaces are built on +top of a lower-level "dynevent_cmd" interface. This interface is +meant to provide the basis for higher-level interfaces such as the +synthetic and kprobe interfaces, which can be used as examples. + +The basic idea is simple and amounts to providing a general-purpose +layer that can be used to generate trace event commands. The +generated command strings can then be passed to the command-parsing +and event creation code that already exists in the trace event +subystem for creating the corresponding trace events. + +In a nutshell, the way it works is that the higher-level interface +code creates a struct dynevent_cmd object, then uses a couple +functions, dynevent_arg_add() and dynevent_arg_pair_add() to build up +a command string, which finally causes the command to be executed +using the dynevent_create() function. The details of the interface +are described below. + +The first step in building a new command string is to create and +initialize an instance of a dynevent_cmd. Here, for instance, we +create a dynevent_cmd on the stack and initialize it: + + struct dynevent_cmd cmd; + char *buf; + int ret; + + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_FOO, + foo_event_run_command); + +The dynevent_cmd initialization needs to be given a user-specified +buffer and the length of the buffer (MAX_DYNEVENT_CMD_LEN can be used +for this purpose - at 2k it's generally too big to be comfortably put +on the stack, so is dynamically allocated), a dynevent type id, which +is meant to be used to check that further API calls are for the +correct command type, and a pointer to an event-specific run_command() +callback that will be called to actually execute the event-specific +command function. + +Once that's done, the command string can by built up by successive +calls to argument-adding functions. + +To add a single argument, define and initialize a struct dynevent_arg +or struct dynevent_arg_pair object. Here's an example of the simplest +possible arg addition, which is simply to append the given string as +a whitespace-separated argument to the command: + + struct dynevent_arg arg; + + dynevent_arg_init(&arg, NULL, 0); + + arg.str = name; + + ret = dynevent_arg_add(cmd, &arg); + +The arg object is first initialized using dynevent_arg_init() and in +this case the parameters are NULL or 0, which means there's no +optional sanity-checking function or separator appended to the end of +the arg. + +Here's another more complicated example using an 'arg pair', which is +used to create an argument that consists of a couple components added +together as a unit, for example, a 'type field_name;' arg or a simple +expression arg e.g. 'flags=%cx': + + struct dynevent_arg_pair arg_pair; + + dynevent_arg_pair_init(&arg_pair, dynevent_foo_check_arg_fn, 0, ';'); + + arg_pair.lhs = type; + arg_pair.rhs = name; + + ret = dynevent_arg_pair_add(cmd, &arg_pair); + +Again, the arg_pair is first initialized, in this case with a callback +function used to check the sanity of the args (for example, that +neither part of the pair is NULL), along with a character to be used +to add an operator between the pair (here none) and a separator to be +appended onto the end of the arg pair (here ';'). + +There's also a dynevent_str_add() function that can be used to simply +add a string as-is, with no spaces, delimeters, or arg check. + +Any number of dynevent_*_add() calls can be made to build up the string +(until its length surpasses cmd->maxlen). When all the arguments have +been added and the command string is complete, the only thing left to +do is run the command, which happens by simply calling +dynevent_create(): + + ret = dynevent_create(&cmd); + +At that point, if the return value is 0, the dynamic event has been +created and is ready to use. + +See the dynevent_cmd function definitions themselves for the details +of the API. From 61778cd70c1dcef082e5ee6781c7c0c4d7d5b576 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Jan 2020 16:19:10 -0500 Subject: [PATCH 419/658] tracing: Move all function tracing configs together The features that depend on the function tracer were spread out through the tracing menu, pull them together as it is easier to manage. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 142 +++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4484e783f68d..32fcbc00753b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -172,6 +172,77 @@ config FUNCTION_GRAPH_TRACER the return value. This is done by setting the current return address on the current task structure into a stack of calls. +config DYNAMIC_FTRACE + bool "enable/disable function tracing dynamically" + depends on FUNCTION_TRACER + depends on HAVE_DYNAMIC_FTRACE + default y + help + This option will modify all the calls to function tracing + dynamically (will patch them out of the binary image and + replace them with a No-Op instruction) on boot up. During + compile time, a table is made of all the locations that ftrace + can function trace, and this table is linked into the kernel + image. When this is enabled, functions can be individually + enabled, and the functions not enabled will not affect + performance of the system. + + See the files in /sys/kernel/debug/tracing: + available_filter_functions + set_ftrace_filter + set_ftrace_notrace + + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but + otherwise has native performance as long as no tracing is active. + +config DYNAMIC_FTRACE_WITH_REGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_REGS + +config DYNAMIC_FTRACE_WITH_DIRECT_CALLS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +config FUNCTION_PROFILER + bool "Kernel function profiler" + depends on FUNCTION_TRACER + default n + help + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A "functions" file is created in + the trace_stat directory; this file shows the list of functions that + have been hit and their counters. + + If in doubt, say N. + +config STACK_TRACER + bool "Trace max stack" + depends on HAVE_FUNCTION_TRACER + select FUNCTION_TRACER + select STACKTRACE + select KALLSYMS + help + This special tracer records the maximum stack footprint of the + kernel and displays it in /sys/kernel/debug/tracing/stack_trace. + + This tracer works by hooking into every function call that the + kernel executes, and keeping a maximum stack depth value and + stack-trace saved. If this is configured with DYNAMIC_FTRACE + then it will not have any overhead while the stack tracer + is disabled. + + To enable the stack tracer on bootup, pass in 'stacktrace' + on the kernel command line. + + The stack tracer can also be enabled or disabled via the + sysctl kernel.stack_tracer_enabled + + Say N if unsure. + config TRACE_PREEMPT_TOGGLE bool help @@ -410,30 +481,6 @@ config BRANCH_TRACER Say N if unsure. -config STACK_TRACER - bool "Trace max stack" - depends on HAVE_FUNCTION_TRACER - select FUNCTION_TRACER - select STACKTRACE - select KALLSYMS - help - This special tracer records the maximum stack footprint of the - kernel and displays it in /sys/kernel/debug/tracing/stack_trace. - - This tracer works by hooking into every function call that the - kernel executes, and keeping a maximum stack depth value and - stack-trace saved. If this is configured with DYNAMIC_FTRACE - then it will not have any overhead while the stack tracer - is disabled. - - To enable the stack tracer on bootup, pass in 'stacktrace' - on the kernel command line. - - The stack tracer can also be enabled or disabled via the - sysctl kernel.stack_tracer_enabled - - Say N if unsure. - config BLK_DEV_IO_TRACE bool "Support for tracing block IO actions" depends on SYSFS @@ -531,53 +578,6 @@ config DYNAMIC_EVENTS config PROBE_EVENTS def_bool n -config DYNAMIC_FTRACE - bool "enable/disable function tracing dynamically" - depends on FUNCTION_TRACER - depends on HAVE_DYNAMIC_FTRACE - default y - help - This option will modify all the calls to function tracing - dynamically (will patch them out of the binary image and - replace them with a No-Op instruction) on boot up. During - compile time, a table is made of all the locations that ftrace - can function trace, and this table is linked into the kernel - image. When this is enabled, functions can be individually - enabled, and the functions not enabled will not affect - performance of the system. - - See the files in /sys/kernel/debug/tracing: - available_filter_functions - set_ftrace_filter - set_ftrace_notrace - - This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but - otherwise has native performance as long as no tracing is active. - -config DYNAMIC_FTRACE_WITH_REGS - def_bool y - depends on DYNAMIC_FTRACE - depends on HAVE_DYNAMIC_FTRACE_WITH_REGS - -config DYNAMIC_FTRACE_WITH_DIRECT_CALLS - def_bool y - depends on DYNAMIC_FTRACE - depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - -config FUNCTION_PROFILER - bool "Kernel function profiler" - depends on FUNCTION_TRACER - default n - help - This option enables the kernel function profiler. A file is created - in debugfs called function_profile_enabled which defaults to zero. - When a 1 is echoed into this file profiling begins, and when a - zero is entered, profiling stops. A "functions" file is created in - the trace_stat directory; this file shows the list of functions that - have been hit and their counters. - - If in doubt, say N. - config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS From a48fc4f5f1d24c70906359731d7080f9ad78a1cb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Jan 2020 16:23:04 -0500 Subject: [PATCH 420/658] tracing: Move tracing test module configs together The MMIO test module was by itself, move it to the other test modules. Also, add the text "Test module" to PREEMPTIRQ_DELAY_TEST as that create a test module as well. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 32fcbc00753b..47d0149347a9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -680,16 +680,6 @@ config TRACE_EVENT_INJECT If unsure, say N. -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. - config TRACEPOINT_BENCHMARK bool "Add tracepoint that benchmarks tracepoints" help @@ -759,8 +749,18 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + config PREEMPTIRQ_DELAY_TEST - tristate "Preempt / IRQ disable delay thread to test latency tracers" + tristate "Test module to create a preempt / IRQ disable delay thread to test latency tracers" depends on m help Select this option to build a test module that can help test latency From 21b3ce3063be8eb7150a717d7c5286fb56ba8cea Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Jan 2020 16:26:45 -0500 Subject: [PATCH 421/658] tracing: Move mmio tracer config up with the other tracers Move the config that enables the mmiotracer with the other tracers such that all the tracers are together. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 47d0149347a9..2014056682f5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -353,6 +353,19 @@ config HWLAT_TRACER file. Every time a latency is greater than tracing_thresh, it will be recorded into the ring buffer. +config MMIOTRACE + bool "Memory mapped IO tracing" + depends on HAVE_MMIOTRACE_SUPPORT && PCI + select GENERIC_TRACER + help + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. Tracing is disabled by + default and can be enabled at run-time. + + See Documentation/trace/mmiotrace.rst. + If you are not helping to develop drivers, say N. + config ENABLE_DEFAULT_TRACERS bool "Trace process context switches and events" depends on !GENERIC_TRACER @@ -627,19 +640,6 @@ config EVENT_TRACE_TEST_SYSCALLS TBD - enable a way to actually call the syscalls as we test their events -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on HAVE_MMIOTRACE_SUPPORT && PCI - select GENERIC_TRACER - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/trace/mmiotrace.rst. - If you are not helping to develop drivers, say N. - config TRACING_MAP bool depends on ARCH_HAVE_NMI_SAFE_CMPXCHG From 1e837945a8854227f3f4e4d2d7abff64ed320830 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Jan 2020 16:30:30 -0500 Subject: [PATCH 422/658] tracing: Move tracing selftests to bottom of menu Move all the tracing selftest configs to the bottom of the tracing menu. There's no reason for them to be interspersed throughout. Also, move the bootconfig menu to the top. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 168 +++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2014056682f5..91e885194dbc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -141,6 +141,15 @@ menuconfig FTRACE if FTRACE +config BOOTTIME_TRACING + bool "Boot-time Tracing support" + depends on BOOT_CONFIG && TRACING + default y + help + Enable developer to setup ftrace subsystem via supplemental + kernel cmdline at boot time for debugging (tracing) driver + initialization and boot process. + config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER @@ -605,41 +614,6 @@ config FTRACE_MCOUNT_RECORD depends on DYNAMIC_FTRACE depends on HAVE_FTRACE_MCOUNT_RECORD -config FTRACE_SELFTEST - bool - -config FTRACE_STARTUP_TEST - bool "Perform a startup test on ftrace" - depends on GENERIC_TRACER - select FTRACE_SELFTEST - help - This option performs a series of startup tests on ftrace. On bootup - a series of tests are made to verify that the tracer is - functioning properly. It will do tests on all the configured - tracers of ftrace. - -config EVENT_TRACE_STARTUP_TEST - bool "Run selftest on trace events" - depends on FTRACE_STARTUP_TEST - default y - help - This option performs a test on all trace events in the system. - It basically just enables each event and runs some code that - will trigger events (not necessarily the event it enables) - This may take some time run as there are a lot of events. - -config EVENT_TRACE_TEST_SYSCALLS - bool "Run selftest on syscall events" - depends on EVENT_TRACE_STARTUP_TEST - help - This option will also enable testing every syscall event. - It only enables the event and disables it and runs various loads - with the event enabled. This adds a bit more time for kernel boot - up since it runs this on every system call defined. - - TBD - enable a way to actually call the syscalls as we test their - events - config TRACING_MAP bool depends on ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -726,6 +700,81 @@ config RING_BUFFER_BENCHMARK If unsure, say N. +config TRACE_EVAL_MAP_FILE + bool "Show eval mappings for trace events" + depends on TRACING + help + The "print fmt" of the trace events will show the enum/sizeof names + instead of their values. This can cause problems for user space tools + that use this string to parse the raw data as user space does not know + how to convert the string to its value. + + To fix this, there's a special macro in the kernel that can be used + to convert an enum/sizeof into its value. If this macro is used, then + the print fmt strings will be converted to their values. + + If something does not get converted properly, this option can be + used to show what enums/sizeof the kernel tried to convert. + + This option is for debugging the conversions. A file is created + in the tracing directory called "eval_map" that will show the + names matched with their values and what trace event system they + belong too. + + Normally, the mapping of the strings to values will be freed after + boot up or module load. With this option, they will not be freed, as + they are needed for the "eval_map" file. Enabling this option will + increase the memory footprint of the running kernel. + + If unsure, say N. + +config GCOV_PROFILE_FTRACE + bool "Enable GCOV profiling on ftrace subsystem" + depends on GCOV_KERNEL + help + Enable GCOV profiling on ftrace subsystem for checking + which functions/lines are tested. + + If unsure, say N. + + Note that on a kernel compiled with this config, ftrace will + run significantly slower. + +config FTRACE_SELFTEST + bool + +config FTRACE_STARTUP_TEST + bool "Perform a startup test on ftrace" + depends on GENERIC_TRACER + select FTRACE_SELFTEST + help + This option performs a series of startup tests on ftrace. On bootup + a series of tests are made to verify that the tracer is + functioning properly. It will do tests on all the configured + tracers of ftrace. + +config EVENT_TRACE_STARTUP_TEST + bool "Run selftest on trace events" + depends on FTRACE_STARTUP_TEST + default y + help + This option performs a test on all trace events in the system. + It basically just enables each event and runs some code that + will trigger events (not necessarily the event it enables) + This may take some time run as there are a lot of events. + +config EVENT_TRACE_TEST_SYSCALLS + bool "Run selftest on syscall events" + depends on EVENT_TRACE_STARTUP_TEST + help + This option will also enable testing every syscall event. + It only enables the event and disables it and runs various loads + with the event enabled. This adds a bit more time for kernel boot + up since it runs this on every system call defined. + + TBD - enable a way to actually call the syscalls as we test their + events + config RING_BUFFER_STARTUP_TEST bool "Ring buffer startup self test" depends on RING_BUFFER @@ -799,55 +848,6 @@ config KPROBE_EVENT_GEN_TEST If unsure, say N. -config TRACE_EVAL_MAP_FILE - bool "Show eval mappings for trace events" - depends on TRACING - help - The "print fmt" of the trace events will show the enum/sizeof names - instead of their values. This can cause problems for user space tools - that use this string to parse the raw data as user space does not know - how to convert the string to its value. - - To fix this, there's a special macro in the kernel that can be used - to convert an enum/sizeof into its value. If this macro is used, then - the print fmt strings will be converted to their values. - - If something does not get converted properly, this option can be - used to show what enums/sizeof the kernel tried to convert. - - This option is for debugging the conversions. A file is created - in the tracing directory called "eval_map" that will show the - names matched with their values and what trace event system they - belong too. - - Normally, the mapping of the strings to values will be freed after - boot up or module load. With this option, they will not be freed, as - they are needed for the "eval_map" file. Enabling this option will - increase the memory footprint of the running kernel. - - If unsure, say N. - -config GCOV_PROFILE_FTRACE - bool "Enable GCOV profiling on ftrace subsystem" - depends on GCOV_KERNEL - help - Enable GCOV profiling on ftrace subsystem for checking - which functions/lines are tested. - - If unsure, say N. - - Note that on a kernel compiled with this config, ftrace will - run significantly slower. - -config BOOTTIME_TRACING - bool "Boot-time Tracing support" - depends on BOOT_CONFIG && TRACING - default y - help - Enable developer to setup ftrace subsystem via supplemental - kernel cmdline at boot time for debugging (tracing) driver - initialization and boot process. - endif # FTRACE endif # TRACING_SUPPORT From 87ce955b24c9940cb2ca7e5173fcf175578d9fe9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 30 Jan 2020 08:25:34 -0700 Subject: [PATCH 423/658] io_uring: add ->show_fdinfo() for the io_uring file descriptor It can be hard to know exactly what is registered with the ring. Especially for credentials, it'd be handy to be able to see which ones are registered, what personalities they have, and what the ID of each of them is. This adds support for showing information registered in the ring from the fdinfo of the io_uring fd. Here's an example from a test case that registers 4 files (two of them sparse), 4 buffers, and 2 personalities: pos: 0 flags: 02000002 mnt_id: 14 UserFiles: 4 0: file-no-1 1: file-no-2 2: 3: UserBufs: 4 0: 0x563817c46000/128 1: 0x563817c47000/256 2: 0x563817c48000/512 3: 0x563817c49000/1024 Personalities: 1 Uid: 0 0 0 0 Gid: 0 0 0 0 Groups: 0 CapEff: 0000003fffffffff 2 Uid: 0 0 0 0 Gid: 0 0 0 0 Groups: 0 CapEff: 0000003fffffffff Suggested-by: Jann Horn Signed-off-by: Jens Axboe --- fs/io_uring.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index ac5340fdcdfe..81a787ba6960 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6501,6 +6501,80 @@ out_fput: return submitted ? submitted : ret; } +static int io_uring_show_cred(int id, void *p, void *data) +{ + const struct cred *cred = p; + struct seq_file *m = data; + struct user_namespace *uns = seq_user_ns(m); + struct group_info *gi; + kernel_cap_t cap; + unsigned __capi; + int g; + + seq_printf(m, "%5d\n", id); + seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); + seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); + seq_puts(m, "\n\tGroups:\t"); + gi = cred->group_info; + for (g = 0; g < gi->ngroups; g++) { + seq_put_decimal_ull(m, g ? " " : "", + from_kgid_munged(uns, gi->gid[g])); + } + seq_puts(m, "\n\tCapEff:\t"); + cap = cred->cap_effective; + CAP_FOR_EACH_U32(__capi) + seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); + seq_putc(m, '\n'); + return 0; +} + +static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) +{ + int i; + + mutex_lock(&ctx->uring_lock); + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); + for (i = 0; i < ctx->nr_user_files; i++) { + struct fixed_file_table *table; + struct file *f; + + table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; + f = table->files[i & IORING_FILE_TABLE_MASK]; + if (f) + seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); + else + seq_printf(m, "%5u: \n", i); + } + seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); + for (i = 0; i < ctx->nr_user_bufs; i++) { + struct io_mapped_ubuf *buf = &ctx->user_bufs[i]; + + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, + (unsigned int) buf->len); + } + if (!idr_is_empty(&ctx->personality_idr)) { + seq_printf(m, "Personalities:\n"); + idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); + } + mutex_unlock(&ctx->uring_lock); +} + +static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct io_ring_ctx *ctx = f->private_data; + + if (percpu_ref_tryget(&ctx->refs)) { + __io_uring_show_fdinfo(ctx, m); + percpu_ref_put(&ctx->refs); + } +} + static const struct file_operations io_uring_fops = { .release = io_uring_release, .flush = io_uring_flush, @@ -6511,6 +6585,7 @@ static const struct file_operations io_uring_fops = { #endif .poll = io_uring_poll, .fasync = io_uring_fasync, + .show_fdinfo = io_uring_show_fdinfo, }; static int io_allocate_scq_urings(struct io_ring_ctx *ctx, From d7f62e825fd19202a0749d10fb439714c51f67d2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 29 Jan 2020 14:39:41 +0100 Subject: [PATCH 424/658] io_uring: add BUILD_BUG_ON() to assert the layout of struct io_uring_sqe With nesting of anonymous unions and structs it's hard to review layout changes. It's better to ask the compiler for these things. Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io_uring.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 81a787ba6960..217721c7bc41 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7038,6 +7038,39 @@ out_fput: static int __init io_uring_init(void) { +#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ + BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ + BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ +} while (0) + +#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ + __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) + BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); + BUILD_BUG_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_SQE_ELEM(1, __u8, flags); + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_SQE_ELEM(4, __s32, fd); + BUILD_BUG_SQE_ELEM(8, __u64, off); + BUILD_BUG_SQE_ELEM(8, __u64, addr2); + BUILD_BUG_SQE_ELEM(16, __u64, addr); + BUILD_BUG_SQE_ELEM(24, __u32, len); + BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); + BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); + BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); + BUILD_BUG_SQE_ELEM(28, __u16, poll_events); + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); + BUILD_BUG_SQE_ELEM(28, __u32, open_flags); + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); + BUILD_BUG_SQE_ELEM(32, __u64, user_data); + BUILD_BUG_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_SQE_ELEM(42, __u16, personality); + BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC); return 0; From 18c6b74e7cfc9a599917d9e98f9835de8208e19a Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Mon, 27 Jan 2020 16:08:11 -0600 Subject: [PATCH 425/658] drm/amdgpu: Enable DISABLE_BARRIER_WAITCNT for Arcturus In previous gfx9 parts, S_BARRIER shader instructions are implicitly S_WAITCNT 0 instructions as well. This setting turns off that mechanism in Arcturus and beyond. With this, shaders must follow the ISA guide insofar as putting in explicit S_WAITCNT operations even after an S_BARRIER. v2: Fix patch title to list component Signed-off-by: Joseph Greathouse Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 17 +++++++++++++++++ .../amd/include/asic_reg/gc/gc_9_0_sh_mask.h | 6 ++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 51ec8a70660a..90f64b8bc358 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2441,6 +2441,22 @@ static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) } } +static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) +{ + uint32_t tmp; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); + tmp = REG_SET_FIELD(tmp, SQ_CONFIG, + DISABLE_BARRIER_WAITCNT, 1); + WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); + break; + default: + break; + }; +} + static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -2486,6 +2502,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) gfx_v9_0_init_compute_vmid(adev); gfx_v9_0_init_gds_vmid(adev); + gfx_v9_0_init_sq_config(adev); } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index c9e3f6d849a8..ea316d8dcb37 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -2060,7 +2060,8 @@ // addressBlock: gc_sqdec //SQ_CONFIG -#define SQ_CONFIG__UNUSED__SHIFT 0x0 +#define SQ_CONFIG__DISABLE_BARRIER_WAITCNT__SHIFT 0x0 +#define SQ_CONFIG__UNUSED__SHIFT 0x1 #define SQ_CONFIG__OVERRIDE_ALU_BUSY__SHIFT 0x7 #define SQ_CONFIG__DEBUG_EN__SHIFT 0x8 #define SQ_CONFIG__DEBUG_SINGLE_MEMOP__SHIFT 0x9 @@ -2079,7 +2080,8 @@ #define SQ_CONFIG__DISABLE_SP_REDUNDANT_THREAD_GATING__SHIFT 0x1d #define SQ_CONFIG__DISABLE_FLAT_SOFT_CLAUSE__SHIFT 0x1e #define SQ_CONFIG__DISABLE_MIMG_SOFT_CLAUSE__SHIFT 0x1f -#define SQ_CONFIG__UNUSED_MASK 0x0000007FL +#define SQ_CONFIG__DISABLE_BARRIER_WAITCNT_MASK 0x00000001L +#define SQ_CONFIG__UNUSED_MASK 0x0000007EL #define SQ_CONFIG__OVERRIDE_ALU_BUSY_MASK 0x00000080L #define SQ_CONFIG__DEBUG_EN_MASK 0x00000100L #define SQ_CONFIG__DEBUG_SINGLE_MEMOP_MASK 0x00000200L From 977f7e1068be60f3cac7d80ab18692341f3b7fc5 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 21 Jan 2020 15:53:53 +0100 Subject: [PATCH 426/658] drm/amdgpu: allocate entities on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we pre-allocate entities and fences for all the HW IPs on context creation and some of which are might never be used. This patch tries to resolve entity/fences wastage by creating entity only when needed. v2: allocate memory for entity and fences together Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +- 2 files changed, 124 insertions(+), 117 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 05c2af61e7de..94a6c42f29ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_JPEG] = 1, }; -static int amdgpu_ctx_total_num_entities(void) -{ - unsigned i, num_entities = 0; - - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - num_entities += amdgpu_ctx_num_entities[i]; - - return num_entities; -} - static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum drm_sched_priority priority) { + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) + return -EINVAL; + /* NORMAL and below are accessible by everyone */ if (priority <= DRM_SCHED_PRIORITY_NORMAL) return 0; @@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } -static int amdgpu_ctx_init(struct amdgpu_device *adev, - enum drm_sched_priority priority, - struct drm_file *filp, - struct amdgpu_ctx *ctx) +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); - unsigned i, j; + struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_entity *entity; + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; + unsigned num_scheds = 0; + enum drm_sched_priority priority; int r; - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) - return -EINVAL; + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), + GFP_KERNEL); + if (!entity) + return -ENOMEM; - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - - - ctx->entities[0] = kcalloc(num_entities, - sizeof(struct amdgpu_ctx_entity), - GFP_KERNEL); - if (!ctx->entities[0]) - return -ENOMEM; - - - for (i = 0; i < num_entities; ++i) { - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; - - entity->sequence = 1; - entity->fences = kcalloc(amdgpu_sched_jobs, - sizeof(struct dma_fence*), GFP_KERNEL); - if (!entity->fences) { - r = -ENOMEM; - goto error_cleanup_memory; - } - } - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) - ctx->entities[i] = ctx->entities[i - 1] + - amdgpu_ctx_num_entities[i - 1]; - - kref_init(&ctx->refcount); - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - struct drm_gpu_scheduler **scheds; - struct drm_gpu_scheduler *sched; - unsigned num_scheds = 0; - - switch (i) { + entity->sequence = 1; + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + switch (hw_ip) { case AMDGPU_HW_IP_GFX: sched = &adev->gfx.gfx_ring[0].sched; scheds = &sched; @@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, scheds = adev->jpeg.jpeg_sched; num_scheds = adev->jpeg.num_jpeg_sched; break; - } - - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - r = drm_sched_entity_init(&ctx->entities[i][j].entity, - priority, scheds, - num_scheds, &ctx->guilty); - if (r) - goto error_cleanup_entities; } + r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds, + &ctx->guilty); + if (r) + goto error_free_entity; + + ctx->entities[hw_ip][ring] = entity; + return 0; + +error_free_entity: + kfree(entity); + + return r; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum drm_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) +{ + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + ctx->adev = adev; + + kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; + return 0; -error_cleanup_entities: - for (i = 0; i < num_entities; ++i) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); +} -error_cleanup_memory: - for (i = 0; i < num_entities; ++i) { - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +{ - kfree(entity->fences); - entity->fences = NULL; - } + int i; - kfree(ctx->entities[0]); - ctx->entities[0] = NULL; - return r; + if (!entity) + return; + + for (i = 0; i < amdgpu_sched_jobs; ++i) + dma_fence_put(entity->fences[i]); + + kfree(entity); } static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; - for (i = 0; i < num_entities; ++i) { - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; - - for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(entity->fences[j]); - - kfree(entity->fences); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { + amdgpu_ctx_fini_entity(ctx->entities[i][j]); + ctx->entities[i][j] = NULL; + } } - kfree(ctx->entities[0]); mutex_destroy(&ctx->lock); - kfree(ctx); } int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity) { + int r; + if (hw_ip >= AMDGPU_HW_IP_NUM) { DRM_ERROR("unknown HW IP type: %d\n", hw_ip); return -EINVAL; @@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return -EINVAL; } - *entity = &ctx->entities[hw_ip][ring].entity; + if (ctx->entities[hw_ip][ring] == NULL) { + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); + if (r) + return r; + } + + *entity = &ctx->entities[hw_ip][ring]->entity; return 0; } @@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; - unsigned num_entities; - u32 i; + u32 i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + if (!ctx->entities[i][j]) + continue; - num_entities = amdgpu_ctx_total_num_entities(); - for (i = 0; i < num_entities; i++) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); + drm_sched_entity_destroy(&ctx->entities[i][j]->entity); + } + } amdgpu_ctx_fini(ref); } @@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); enum drm_sched_priority ctx_prio; - unsigned i; + unsigned i, j; ctx->override_priority = priority; ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity = &ctx->entities[0][i].entity; + if (!ctx->entities[i][j]) + continue; - drm_sched_entity_set_priority(entity, ctx_prio); + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_set_priority(entity, ctx_prio); + } } } @@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; - entity = &ctx->entities[0][i].entity; - timeout = drm_sched_entity_flush(entity, timeout); + if (!ctx->entities[i][j]) + continue; + + entity = &ctx->entities[i][j]->entity; + timeout = drm_sched_entity_flush(entity, timeout); + } } } mutex_unlock(&mgr->lock); @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; @@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) - drm_sched_entity_fini(&ctx->entities[0][i].entity); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; + + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_fini(entity); + } + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index a6cd9d4b078c..de490f183af2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -29,10 +29,12 @@ struct drm_device; struct drm_file; struct amdgpu_fpriv; +#define AMDGPU_MAX_ENTITY_NUM 4 + struct amdgpu_ctx_entity { uint64_t sequence; - struct dma_fence **fences; struct drm_sched_entity entity; + struct dma_fence *fences[]; }; struct amdgpu_ctx { @@ -42,7 +44,7 @@ struct amdgpu_ctx { unsigned reset_counter_query; uint32_t vram_lost_counter; spinlock_t ring_lock; - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; bool preamble_presented; enum drm_sched_priority init_priority; enum drm_sched_priority override_priority; From 7044cb6c20776a1d546f08c332601bb6005388a2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 28 Jan 2020 15:42:27 +1100 Subject: [PATCH 427/658] amdgpu: using vmalloc requires includeing vmalloc.h Fixes: 240c811ccde4 ("drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training(V2)") Signed-off-by: Stephen Rothwell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index ac173d2eb809..0829188c1a5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -22,6 +22,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_psp.h" From 2a239597dfc65fb097a3d7ddcd01066745fea069 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Jan 2020 11:28:27 +0000 Subject: [PATCH 428/658] drm/amd/display: fix spelling mistake link_integiry_check -> link_integrity_check There is a spelling mistake on the struct field name link_integiry_check, fix this by renaming it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h | 2 +- .../gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c | 8 ++++---- .../gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index f98d3d9ecb6d..af78e4f1be68 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -63,7 +63,7 @@ struct mod_hdcp_transition_input_hdcp1 { uint8_t hdcp_capable_dp; uint8_t binfo_read_dp; uint8_t r0p_available_dp; - uint8_t link_integiry_check; + uint8_t link_integrity_check; uint8_t reauth_request_check; uint8_t stream_encryption_dp; }; diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 04845e43df15..37670db64855 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -283,8 +283,8 @@ static enum mod_hdcp_status wait_for_ready(struct mod_hdcp *hdcp, hdcp, "bstatus_read")) goto out; if (!mod_hdcp_execute_and_set(check_link_integrity_dp, - &input->link_integiry_check, &status, - hdcp, "link_integiry_check")) + &input->link_integrity_check, &status, + hdcp, "link_integrity_check")) goto out; if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, @@ -431,8 +431,8 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, hdcp, "bstatus_read")) goto out; if (!mod_hdcp_execute_and_set(check_link_integrity_dp, - &input->link_integiry_check, &status, - hdcp, "link_integiry_check")) + &input->link_integrity_check, &status, + hdcp, "link_integrity_check")) goto out; if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c index 21ebc62bb9d9..76edcbe51f71 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c @@ -241,7 +241,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct mod_hdcp *hdcp, } break; case D1_A4_AUTHENTICATED: - if (input->link_integiry_check != PASS || + if (input->link_integrity_check != PASS || input->reauth_request_check != PASS) { /* 1A-07: restart hdcp on a link integrity failure */ fail_and_restart_in_ms(0, &status, output); @@ -249,7 +249,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct mod_hdcp *hdcp, } break; case D1_A6_WAIT_FOR_READY: - if (input->link_integiry_check == FAIL || + if (input->link_integrity_check == FAIL || input->reauth_request_check == FAIL) { fail_and_restart_in_ms(0, &status, output); break; From ec3d65082d7dabad6fa8f66a8ef166f2d522d6b2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 Jan 2020 17:09:52 +0100 Subject: [PATCH 429/658] radeon: insert 10ms sleep in dce5_crtc_load_lut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per at least one tester this is enough magic to recover the regression introduced for some people (but not all) in commit b8e2b0199cc377617dc238f5106352c06dcd3fa2 Author: Peter Rosin Date: Tue Jul 4 12:36:57 2017 +0200 drm/fb-helper: factor out pseudo-palette which for radeon had the side-effect of refactoring out a seemingly redudant writing of the color palette. 10ms in a fairly slow modeset path feels like an acceptable form of duct-tape, so maybe worth a shot and see what sticks. Cc: Alex Deucher Cc: Michel Dänzer References: https://bugzilla.kernel.org/show_bug.cgi?id=198123 Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 856526cb2caf..82fa7ab6d35c 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -127,6 +127,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id); + msleep(10); + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset, (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) | NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS))); From ded58c7bbbf3b52c018db99fd4d530c3448a0c83 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Tue, 28 Jan 2020 16:38:53 -0500 Subject: [PATCH 430/658] drm/amd/display: Move drm_dp_mst_atomic_check() to the front of dc_validate_global_state() [Why] Need to do atomic check first, then validate global state. If not, when connecting both MST and HDMI displays and set a bad mode via xrandr, system will hang. [How] Move drm_dp_mst_atomic_check() to the front of dc_validate_global_state(). Signed-off-by: Zhan Liu Reviewed-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2ac349849081..279541517a99 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8167,6 +8167,16 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; #endif + /* + * Perform validation of MST topology in the state: + * We need to perform MST atomic check before calling + * dc_validate_global_state(), or there is a chance + * to get stuck in an infinite loop and hang eventually. + */ + ret = drm_dp_mst_atomic_check(state); + if (ret) + goto fail; + if (dc_validate_global_state(dc, dm_state->context, false) != DC_OK) { ret = -EINVAL; goto fail; @@ -8195,10 +8205,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, dc_retain_state(old_dm_state->context); } } - /* Perform validation of MST topology in the state*/ - ret = drm_dp_mst_atomic_check(state); - if (ret) - goto fail; /* Store the overall update type for use later in atomic check. */ for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) { From eca31a4ac89bd5af26eccc6c32acbee396950b30 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 29 Jan 2020 09:09:05 +0100 Subject: [PATCH 431/658] radeon: completely remove lut leftovers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an oversight from commit 42585395ebc1034a98937702849669f17eadb35f Author: Peter Rosin Date: Thu Jul 13 18:25:36 2017 +0200 drm: radeon: remove dead code and pointless local lut storage v2: Also remove leftover local variable. Cc: Peter Rosin Cc: Alex Deucher Cc: Michel Dänzer Reviewed-by: Michel Dänzer Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 7 ------- drivers/gpu/drm/radeon/radeon_mode.h | 1 - 2 files changed, 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 82fa7ab6d35c..d07c7db0c815 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -674,7 +674,6 @@ static void radeon_crtc_init(struct drm_device *dev, int index) { struct radeon_device *rdev = dev->dev_private; struct radeon_crtc *radeon_crtc; - int i; radeon_crtc = kzalloc(sizeof(struct radeon_crtc) + (RADEONFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); if (radeon_crtc == NULL) @@ -703,12 +702,6 @@ static void radeon_crtc_init(struct drm_device *dev, int index) radeon_crtc->mode_set.num_connectors = 0; #endif - for (i = 0; i < 256; i++) { - radeon_crtc->lut_r[i] = i << 2; - radeon_crtc->lut_g[i] = i << 2; - radeon_crtc->lut_b[i] = i << 2; - } - if (rdev->is_atom_bios && (ASIC_IS_AVIVO(rdev) || radeon_r4xx_atom)) radeon_atombios_init_crtc(dev, radeon_crtc); else diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index fd470d6bf3f4..96565171d13e 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -327,7 +327,6 @@ enum radeon_flip_status { struct radeon_crtc { struct drm_crtc base; int crtc_id; - u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; bool cursor_out_of_bounds; From 968162204adf5f2292214356978357e9dcd35de1 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 29 Jan 2020 18:24:35 -0700 Subject: [PATCH 432/658] drm/amdgpu: Fix implicit enum conversion in gfx_v9_4_ras_error_inject Clang warns: ../drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c:967:35: warning: implicit conversion from enumeration type 'enum amdgpu_ras_block' to different enumeration type 'enum ta_ras_block' [-Wenum-conversion] block_info.block_id = info->head.block; ~ ~~~~~~~~~~~^~~~~ 1 warning generated. Use the function added in commit 828cfa29093f ("drm/amdgpu: Fix amdgpu ras to ta enums conversion") that handles this conversion explicitly. Fixes: 4c461d89db4f ("drm/amdgpu: add RAS support for the gfx block of Arcturus") Link: https://github.com/ClangBuiltLinux/linux/issues/849 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index e19d275f3f7d..f099f13d7f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -964,7 +964,7 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return -EINVAL; - block_info.block_id = info->head.block; + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); block_info.sub_block_index = info->head.sub_block_index; block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); block_info.address = info->address; From e0d5322c29ef9f19a445c4960e9f89b4a85571bc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 25 Jan 2020 11:30:25 -0500 Subject: [PATCH 433/658] drm/amdgpu/navi10: add mclk to navi10_get_clock_by_type_with_latency Doesn't seem to be used, but add it just in case. Reviewed-by: Matt Coffin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 93c66c69ca28..f1b27fc20c19 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -949,6 +949,8 @@ static int navi10_get_clock_by_type_with_latency(struct smu_context *smu, case SMU_GFXCLK: case SMU_DCEFCLK: case SMU_SOCCLK: + case SMU_MCLK: + case SMU_UCLK: ret = smu_get_dpm_level_count(smu, clk_type, &level_count); if (ret) return ret; From 079a8834bb6596d74eca5356b0d0ea0529d4f60d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 30 Jan 2020 20:21:29 +0100 Subject: [PATCH 434/658] xtensa: configs: Cleanup old Kconfig IO scheduler options CONFIG_IOSCHED_DEADLINE and CONFIG_IOSCHED_CFQ are gone since commit f382fb0bcef4 ("block: remove legacy IO schedulers"). The IOSCHED_DEADLINE was replaced by MQ_IOSCHED_DEADLINE and it will be now enabled by default (along with MQ_IOSCHED_KYBER). Signed-off-by: Krzysztof Kozlowski Message-Id: <20200130192129.2677-1-krzk@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/configs/audio_kc705_defconfig | 2 -- arch/xtensa/configs/cadence_csp_defconfig | 2 -- arch/xtensa/configs/generic_kc705_defconfig | 2 -- arch/xtensa/configs/iss_defconfig | 2 -- arch/xtensa/configs/nommu_kc705_defconfig | 2 -- arch/xtensa/configs/smp_lx200_defconfig | 2 -- 6 files changed, 12 deletions(-) diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index b6367af71d65..eeb4c5383c83 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -21,8 +21,6 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_XTENSA_VARIANT_CUSTOM=y CONFIG_XTENSA_VARIANT_CUSTOM_NAME="test_kc705_hifi" CONFIG_XTENSA_UNALIGNED_USER=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index f4eef6decd2a..fc240737b14d 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -27,8 +27,6 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_XTENSA_VARIANT_CUSTOM=y CONFIG_XTENSA_VARIANT_CUSTOM_NAME="csp" CONFIG_XTENSA_UNALIGNED_USER=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index c925165cf760..412f611033cc 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -21,8 +21,6 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_XTENSA_VARIANT_DC233C=y CONFIG_XTENSA_UNALIGNED_USER=y CONFIG_PREEMPT=y diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index d1c01742baf4..32ce8fb068f0 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -1,8 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_PCI is not set CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,38400 eth0=tuntap,,tap0 ip=192.168.168.5:192.168.168.1 root=nfs nfsroot=192.168.168.1:/opt/montavista/pro/devkit/xtensa/linux_be/target memmap=128M@0" diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 380e366730d5..88b2e222d4bf 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -25,8 +25,6 @@ CONFIG_KALLSYMS_ALL=y CONFIG_PERF_EVENTS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_XTENSA_VARIANT_CUSTOM=y CONFIG_XTENSA_VARIANT_CUSTOM_NAME="de212" # CONFIG_XTENSA_VARIANT_MMU is not set diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index d46b58f34098..8b3bc92a079c 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -21,8 +21,6 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_XTENSA_VARIANT_CUSTOM=y CONFIG_XTENSA_VARIANT_CUSTOM_NAME="test_mmuhifi_c3" CONFIG_XTENSA_UNALIGNED_USER=y From d5fae240b9bdb2d374624e6aa9f99a4527f111ec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 30 Jan 2020 20:25:20 +0100 Subject: [PATCH 435/658] m68k: configs: Cleanup old Kconfig IO scheduler options CONFIG_IOSCHED_DEADLINE and CONFIG_IOSCHED_CFQ are gone since commit f382fb0bcef4 ("block: remove legacy IO schedulers"). The IOSCHED_DEADLINE was replaced by MQ_IOSCHED_DEADLINE and it will be now enabled by default (along with MQ_IOSCHED_KYBER). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Ungerer --- arch/m68k/configs/amcore_defconfig | 1 - arch/m68k/configs/m5208evb_defconfig | 2 -- arch/m68k/configs/m5249evb_defconfig | 2 -- arch/m68k/configs/m5272c3_defconfig | 2 -- arch/m68k/configs/m5275evb_defconfig | 2 -- arch/m68k/configs/m5307c3_defconfig | 2 -- arch/m68k/configs/m5407c3_defconfig | 2 -- arch/m68k/configs/m5475evb_defconfig | 2 -- 8 files changed, 15 deletions(-) diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index d5e683dd885d..3a84f24d41c8 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -13,7 +13,6 @@ CONFIG_EMBEDDED=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set CONFIG_M5307=y CONFIG_AMCORE=y diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig index a3102ff7e5ed..0ee3079f6ca9 100644 --- a/arch/m68k/configs/m5208evb_defconfig +++ b/arch/m68k/configs/m5208evb_defconfig @@ -10,8 +10,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x40000000 diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig index f7bb9ed3efa8..f84f68c04065 100644 --- a/arch/m68k/configs/m5249evb_defconfig +++ b/arch/m68k/configs/m5249evb_defconfig @@ -10,8 +10,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set CONFIG_M5249=y CONFIG_M5249C3=y diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig index 1e679f6a400f..eca65020aae3 100644 --- a/arch/m68k/configs/m5272c3_defconfig +++ b/arch/m68k/configs/m5272c3_defconfig @@ -10,8 +10,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set CONFIG_M5272=y CONFIG_M5272C3=y diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig index d2987b40423e..9402c7a3e9c7 100644 --- a/arch/m68k/configs/m5275evb_defconfig +++ b/arch/m68k/configs/m5275evb_defconfig @@ -10,8 +10,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set CONFIG_M5275=y # CONFIG_4KSTACKS is not set diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig index 97a78c99eeee..bb8b0eb4bdfc 100644 --- a/arch/m68k/configs/m5307c3_defconfig +++ b/arch/m68k/configs/m5307c3_defconfig @@ -10,8 +10,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set CONFIG_M5307=y CONFIG_M5307C3=y diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig index 766a97f39a3a..ce9ccf13c7c0 100644 --- a/arch/m68k/configs/m5407c3_defconfig +++ b/arch/m68k/configs/m5407c3_defconfig @@ -11,8 +11,6 @@ CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set CONFIG_M5407=y CONFIG_M5407C3=y diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index 579fd98afed6..93f7c7a07553 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -11,8 +11,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_EMBEDDED=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_COLDFIRE=y # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x0 From 8ff2d7ca4a55dfabf12e876369835bd024eb4621 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Thu, 21 Nov 2019 14:36:17 +0530 Subject: [PATCH 436/658] i2c: cros-ec-tunnel: Fix slave device enumeration During adding of the adapter the slave device registration use to fail as the acpi companion field was not populated. Fixes: 9af1563a5486 ("i2c: cros-ec-tunnel: Make the device acpi compatible") Signed-off-by: Akshu Agrawal Acked-by: Raul E Rangel Reviewed-by: Enric Balletbo i Serra Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cros-ec-tunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index 958161c71985..8a2db3ac3b3c 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -273,6 +273,7 @@ static int ec_i2c_probe(struct platform_device *pdev) bus->adap.dev.parent = &pdev->dev; bus->adap.dev.of_node = pdev->dev.of_node; bus->adap.retries = I2C_MAX_RETRIES; + ACPI_COMPANION_SET(&bus->adap.dev, ACPI_COMPANION(&pdev->dev)); err = i2c_add_adapter(&bus->adap); if (err) From b49f8e0e7bd17b968129790e40f9e2566f4f95ec Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Thu, 21 Nov 2019 14:10:51 -0700 Subject: [PATCH 437/658] i2c: cros-ec-tunnel: Fix ACPI identifier The initial patch was using the incorrect identifier. Fixes: 9af1563a5486 ("i2c: cros-ec-tunnel: Make the device acpi compatible") Signed-off-by: Raul E Rangel Acked-by: Enric Balletbo i Serra Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cros-ec-tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index 8a2db3ac3b3c..790ea3fda693 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -299,7 +299,7 @@ static const struct of_device_id cros_ec_i2c_of_match[] = { MODULE_DEVICE_TABLE(of, cros_ec_i2c_of_match); static const struct acpi_device_id cros_ec_i2c_tunnel_acpi_id[] = { - { "GOOG001A", 0 }, + { "GOOG0012", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, cros_ec_i2c_tunnel_acpi_id); From 03b10951e9f6e6297d967a903c8d743b245ae3b8 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 13 Jan 2020 21:02:58 +0100 Subject: [PATCH 438/658] power: avs: qcom-cpr: add a printout after the driver has been initialized In order to easier inform the user that the driver has been initialized successfully, add a printout after the driver has been initialized. At the same time, remove a dev_dbg() that is now redundant. Signed-off-by: Niklas Cassel Reviewed-by: Bjorn Andersson Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/qcom-cpr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/power/avs/qcom-cpr.c b/drivers/power/avs/qcom-cpr.c index 9192fb747653..b158a684d05c 100644 --- a/drivers/power/avs/qcom-cpr.c +++ b/drivers/power/avs/qcom-cpr.c @@ -1547,8 +1547,6 @@ static int cpr_pd_attach_dev(struct generic_pm_domain *domain, goto unlock; } - dev_dbg(drv->dev, "number of OPPs: %d\n", drv->num_corners); - drv->corners = devm_kcalloc(drv->dev, drv->num_corners, sizeof(*drv->corners), GFP_KERNEL); @@ -1586,6 +1584,9 @@ static int cpr_pd_attach_dev(struct generic_pm_domain *domain, acc_desc->enable_mask, acc_desc->enable_mask); + dev_info(drv->dev, "driver initialized with %u OPPs\n", + drv->num_corners); + unlock: mutex_unlock(&drv->lock); From adef0cedeaaee0a1a9495adb599806d3c0a8b31e Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 21 Jan 2020 23:28:59 +0100 Subject: [PATCH 439/658] PM / AVS: rockchip-io: fix the supply naming for the emmc supply on px30 The supply going to the emmc/flash is named vccio6, not vccio0 and while the code does this correctly already, the comments and error output do not. So just change these values to the correct ones. Signed-off-by: Heiko Stuebner Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/rockchip-io-domain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c index 398fc954419e..eece97f97ef8 100644 --- a/drivers/power/avs/rockchip-io-domain.c +++ b/drivers/power/avs/rockchip-io-domain.c @@ -152,18 +152,18 @@ static void px30_iodomain_init(struct rockchip_iodomain *iod) int ret; u32 val; - /* if no VCCIO0 supply we should leave things alone */ + /* if no VCCIO6 supply we should leave things alone */ if (!iod->supplies[PX30_IO_VSEL_VCCIO6_SUPPLY_NUM].reg) return; /* - * set vccio0 iodomain to also use this framework + * set vccio6 iodomain to also use this framework * instead of a special gpio. */ val = PX30_IO_VSEL_VCCIO6_SRC | (PX30_IO_VSEL_VCCIO6_SRC << 16); ret = regmap_write(iod->grf, PX30_IO_VSEL, val); if (ret < 0) - dev_warn(iod->dev, "couldn't update vccio0 ctrl\n"); + dev_warn(iod->dev, "couldn't update vccio6 ctrl\n"); } static void rk3288_iodomain_init(struct rockchip_iodomain *iod) From db5a10c172ed0d41aa4139ef9f1abd8128568267 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Mon, 27 Jan 2020 15:53:56 -0800 Subject: [PATCH 440/658] power: avs: qcom-cpr: add unspecified HAS_IOMEM dependency Currently CONFIG_QCOM_CPR=y implicitly depends on CONFIG_HAS_IOMEM=y; consequently, on architectures without IOMEM we get the following build error: /usr/bin/ld: drivers/power/avs/qcom-cpr.o: in function `cpr_probe': drivers/power/avs/qcom-cpr.c:1690: undefined reference to `devm_ioremap_resource' Fix the build error by adding the unspecified dependency. Signed-off-by: Brendan Higgins Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/avs/Kconfig b/drivers/power/avs/Kconfig index b8fe166cd0d9..cdb4237bfd02 100644 --- a/drivers/power/avs/Kconfig +++ b/drivers/power/avs/Kconfig @@ -14,7 +14,7 @@ menuconfig POWER_AVS config QCOM_CPR tristate "QCOM Core Power Reduction (CPR) support" - depends on POWER_AVS + depends on POWER_AVS && HAS_IOMEM select PM_OPP select REGMAP help From e093e53f4f70b126ff63bc422e5135e934e35a1b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 29 Jan 2020 18:41:30 -0700 Subject: [PATCH 441/658] power: avs: qcom-cpr: Avoid clang -Wsometimes-uninitialized in cpr_scale Clang warns (trimmed for brevity): ../drivers/power/avs/qcom-cpr.c:570:13: warning: variable 'reg_mask' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] ../drivers/power/avs/qcom-cpr.c:520:13: warning: variable 'new_uV' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] Due to the fact that Clang's static analysis happens before any optimization passes are taken into account, it cannot see that both branches in the if statement must be taken because dir cannot be something other than UP or DOWN due to the check at the top of this function. Change the else if condition to else to fix this false positive. Fixes: bf6910abf548 ("power: avs: Add support for CPR (Core Power Reduction)") Link: https://github.com/ClangBuiltLinux/linux/issues/840 Signed-off-by: Nathan Chancellor Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/qcom-cpr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/avs/qcom-cpr.c b/drivers/power/avs/qcom-cpr.c index b158a684d05c..bd7c3e48b386 100644 --- a/drivers/power/avs/qcom-cpr.c +++ b/drivers/power/avs/qcom-cpr.c @@ -517,7 +517,7 @@ static int cpr_scale(struct cpr_drv *drv, enum voltage_change_dir dir) dev_dbg(drv->dev, "UP: -> new_uV: %d last_uV: %d perf state: %u\n", new_uV, last_uV, cpr_get_cur_perf_state(drv)); - } else if (dir == DOWN) { + } else { if (desc->clamp_timer_interval && error_steps < desc->down_threshold) { /* @@ -567,7 +567,7 @@ static int cpr_scale(struct cpr_drv *drv, enum voltage_change_dir dir) /* Disable auto nack down */ reg_mask = RBCPR_CTL_SW_AUTO_CONT_NACK_DN_EN; val = 0; - } else if (dir == DOWN) { + } else { /* Restore default threshold for UP */ reg_mask = RBCPR_CTL_UP_THRESHOLD_MASK; reg_mask <<= RBCPR_CTL_UP_THRESHOLD_SHIFT; From c611990844c28c61ca4b35ff69d3a2ae95ccd486 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 5 Dec 2019 07:40:43 -0500 Subject: [PATCH 442/658] KVM: s390: ENOTSUPP -> EOPNOTSUPP fixups There is no ENOTSUPP for userspace. Reported-by: Julian Wiedmann Fixes: 519783935451 ("KVM: s390: introduce ais mode modify function") Fixes: 2c1a48f2e5ed ("KVM: S390: add new group for flic") Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 165dea4c7f19..c06c89d370a7 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2190,7 +2190,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2499,7 +2499,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2579,7 +2579,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; From 55680890ea78be0df5e1384989f1be835043c084 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 31 Jan 2020 05:02:00 -0500 Subject: [PATCH 443/658] KVM: s390: do not clobber registers during guest reset/store status The initial CPU reset clobbers the userspace fpc and the store status ioctl clobbers the guest acrs + fpr. As these calls are only done via ioctl (and not via vcpu_run), no CPU context is loaded, so we can (and must) act directly on the sync regs, not on the thread context. Cc: stable@kernel.org Fixes: e1788bb995be ("KVM: s390: handle floating point registers in the run ioctl not in vcpu_put/load") Fixes: 31d8b8d41a7e ("KVM: s390: handle access registers in the run ioctl not in vcpu_put/load") Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Janosch Frank Link: https://lore.kernel.org/r/20200131100205.74720-2-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d9e6bf3d54f0..876802894b35 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2860,9 +2860,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | CR14_UNUSED_33 | CR14_EXTERNAL_DAMAGE_SUBMASK; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; + vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; @@ -4351,7 +4349,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { From cca00ebb8ad3b3a5eb6e60b1ac7e9211f66af477 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 31 Jan 2020 05:02:01 -0500 Subject: [PATCH 444/658] KVM: s390: Cleanup initial cpu reset The code seems to be quite old and uses lots of unneeded spaces for alignment, which doesn't really help with readability. Let's: * Get rid of the extra spaces * Remove the ULs as they are not needed on 0s * Define constants for the CR 0 and 14 initial values * Use the sizeof of the gcr array to memset it to 0 Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20200131100205.74720-3-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 5 +++++ arch/s390/kvm/kvm-s390.c | 18 +++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 02f4c21c57f6..73044545ecac 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -122,6 +122,11 @@ struct mcck_volatile_info { __u32 reserved; }; +#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK) +#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ + CR14_EXTERNAL_DAMAGE_SUBMASK) + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 876802894b35..bb072866bd69 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2847,19 +2847,15 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) { /* this equals initial cpu reset in pop, but we don't switch to ESA */ - vcpu->arch.sie_block->gpsw.mask = 0UL; - vcpu->arch.sie_block->gpsw.addr = 0UL; + vcpu->arch.sie_block->gpsw.mask = 0; + vcpu->arch.sie_block->gpsw.addr = 0; kvm_s390_set_prefix(vcpu, 0); kvm_s390_set_cpu_timer(vcpu, 0); - vcpu->arch.sie_block->ckc = 0UL; - vcpu->arch.sie_block->todpr = 0; - memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); - vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 | - CR0_INTERRUPT_KEY_SUBMASK | - CR0_MEASUREMENT_ALERT_SUBMASK; - vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | - CR14_UNUSED_33 | - CR14_EXTERNAL_DAMAGE_SUBMASK; + vcpu->arch.sie_block->ckc = 0; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); + vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; + vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; From 7de3f1423ff9431f3bd5023bb78d1e062314e7f0 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 31 Jan 2020 05:02:02 -0500 Subject: [PATCH 445/658] KVM: s390: Add new reset vcpu API The architecture states that we need to reset local IRQs for all CPU resets. Because the old reset interface did not support the normal CPU reset we never did that on a normal reset. Let's implement an interface for the missing normal and clear resets and reset all local IRQs, registers and control structures as stated in the architecture. Userspace might already reset the registers via the vcpu run struct, but as we need the interface for the interrupt clearing part anyway, we implement the resets fully and don't rely on userspace to reset the rest. Signed-off-by: Janosch Frank Reviewed-by: Cornelia Huck Reviewed-by: Christian Borntraeger Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20200131100205.74720-4-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/api.txt | 43 +++++++++++++++++ arch/s390/kvm/kvm-s390.c | 84 +++++++++++++++++++++++----------- include/uapi/linux/kvm.h | 5 ++ 3 files changed, 105 insertions(+), 27 deletions(-) diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt index ebb37b34dcfc..73448764f544 100644 --- a/Documentation/virt/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -4168,6 +4168,42 @@ This ioctl issues an ultravisor call to terminate the secure guest, unpins the VPA pages and releases all the device pages that are used to track the secure pages by hypervisor. +4.122 KVM_S390_NORMAL_RESET + +Capability: KVM_CAP_S390_VCPU_RESETS +Architectures: s390 +Type: vcpu ioctl +Parameters: none +Returns: 0 + +This ioctl resets VCPU registers and control structures according to +the cpu reset definition in the POP (Principles Of Operation). + +4.123 KVM_S390_INITIAL_RESET + +Capability: none +Architectures: s390 +Type: vcpu ioctl +Parameters: none +Returns: 0 + +This ioctl resets VCPU registers and control structures according to +the initial cpu reset definition in the POP. However, the cpu is not +put into ESA mode. This reset is a superset of the normal reset. + +4.124 KVM_S390_CLEAR_RESET + +Capability: KVM_CAP_S390_VCPU_RESETS +Architectures: s390 +Type: vcpu ioctl +Parameters: none +Returns: 0 + +This ioctl resets VCPU registers and control structures according to +the clear cpu reset definition in the POP. However, the cpu is not put +into ESA mode. This reset is a superset of the initial reset. + + 5. The kvm_run structure ------------------------ @@ -5396,3 +5432,10 @@ handling by KVM (as some KVM hypercall may be mistakenly treated as TLB flush hypercalls by Hyper-V) so userspace should disable KVM identification in CPUID and only exposes Hyper-V identification. In this case, guest thinks it's running on Hyper-V and only use Hyper-V hypercalls. + +8.22 KVM_CAP_S390_VCPU_RESETS + +Architectures: s390 + +This capability indicates that the KVM_S390_NORMAL_RESET and +KVM_S390_CLEAR_RESET ioctls are available. diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bb072866bd69..e39f6ef97b09 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -529,6 +529,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: case KVM_CAP_S390_AIS_MIGRATION: + case KVM_CAP_S390_VCPU_RESETS: r = 1; break; case KVM_CAP_S390_HPAGE_1M: @@ -2844,29 +2845,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } -static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) -{ - /* this equals initial cpu reset in pop, but we don't switch to ESA */ - vcpu->arch.sie_block->gpsw.mask = 0; - vcpu->arch.sie_block->gpsw.addr = 0; - kvm_s390_set_prefix(vcpu, 0); - kvm_s390_set_cpu_timer(vcpu, 0); - vcpu->arch.sie_block->ckc = 0; - vcpu->arch.sie_block->todpr = 0; - memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); - vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; - vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; - vcpu->run->s.regs.fpc = 0; - vcpu->arch.sie_block->gbea = 1; - vcpu->arch.sie_block->pp = 0; - vcpu->arch.sie_block->fpf &= ~FPF_BPBC; - vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; - kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) - kvm_s390_vcpu_stop(vcpu); - kvm_s390_clear_local_irqs(vcpu); -} - void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { mutex_lock(&vcpu->kvm->lock); @@ -3281,10 +3259,53 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, return r; } -static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) { - kvm_s390_vcpu_initial_reset(vcpu); - return 0; + vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); + + kvm_clear_async_pf_completion_queue(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); + kvm_s390_clear_local_irqs(vcpu); +} + +static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + /* Initial reset is a superset of the normal reset */ + kvm_arch_vcpu_ioctl_normal_reset(vcpu); + + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0; + vcpu->arch.sie_block->gpsw.addr = 0; + kvm_s390_set_prefix(vcpu, 0); + kvm_s390_set_cpu_timer(vcpu, 0); + vcpu->arch.sie_block->ckc = 0; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); + vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; + vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; + vcpu->run->s.regs.fpc = 0; + vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; +} + +static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Clear reset is a superset of the initial reset */ + kvm_arch_vcpu_ioctl_initial_reset(vcpu); + + memset(®s->gprs, 0, sizeof(regs->gprs)); + memset(®s->vrs, 0, sizeof(regs->vrs)); + memset(®s->acrs, 0, sizeof(regs->acrs)); + memset(®s->gscb, 0, sizeof(regs->gscb)); + + regs->etoken = 0; + regs->etoken_extension = 0; } int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -4357,8 +4378,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); break; } + case KVM_S390_CLEAR_RESET: + r = 0; + kvm_arch_vcpu_ioctl_clear_reset(vcpu); + break; case KVM_S390_INITIAL_RESET: - r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); + r = 0; + kvm_arch_vcpu_ioctl_initial_reset(vcpu); + break; + case KVM_S390_NORMAL_RESET: + r = 0; + kvm_arch_vcpu_ioctl_normal_reset(vcpu); break; case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f0a16b4adbbd..4b95f9a31a2f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1009,6 +1009,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 #define KVM_CAP_ARM_NISV_TO_USER 177 #define KVM_CAP_ARM_INJECT_EXT_DABT 178 +#define KVM_CAP_S390_VCPU_RESETS 179 #ifdef KVM_CAP_IRQ_ROUTING @@ -1473,6 +1474,10 @@ struct kvm_enc_region { /* Available with KVM_CAP_ARM_SVE */ #define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) +/* Available with KVM_CAP_S390_VCPU_RESETS */ +#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) +#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ From ada0a50d7685e35c1b8ee1deb9a38203acda6683 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 31 Jan 2020 05:02:03 -0500 Subject: [PATCH 446/658] selftests: KVM: Add fpu and one reg set/get library functions Add library access to more registers. Signed-off-by: Janosch Frank Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20200131100205.74720-5-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger --- .../testing/selftests/kvm/include/kvm_util.h | 6 ++++ tools/testing/selftests/kvm/lib/kvm_util.c | 36 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 29cccaf96baf..ae0d14c2540a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -125,6 +125,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); +void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); #ifdef __KVM_HAVE_VCPU_EVENTS void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 41cf45416060..a6dd0401eb50 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1373,6 +1373,42 @@ int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); } +void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu); + TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + +void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu); + TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + +void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg); + TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + +void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg); + TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + /* * VCPU Ioctl * From b25d4cb43f31e31c176eb862db2ad3072b496d44 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 31 Jan 2020 05:02:04 -0500 Subject: [PATCH 447/658] selftests: KVM: s390x: Add reset tests Test if the registers end up having the correct values after a normal, initial and clear reset. Signed-off-by: Janosch Frank Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20200131100205.74720-6-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger --- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/s390x/resets.c | 155 +++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 tools/testing/selftests/kvm/s390x/resets.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3138a916574a..fe1ea294730c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -36,6 +36,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += s390x/resets TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c new file mode 100644 index 000000000000..fb8e976943a9 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test for s390x CPU resets + * + * Copyright (C) 2020, IBM + */ + +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 3 + +struct kvm_vm *vm; +struct kvm_run *run; +struct kvm_sync_regs *regs; +static uint64_t regs_null[16]; + +static uint64_t crs[16] = { 0x40000ULL, + 0x42000ULL, + 0, 0, 0, 0, 0, + 0x43000ULL, + 0, 0, 0, 0, 0, + 0x44000ULL, + 0, 0 +}; + +static void guest_code_initial(void) +{ + /* Round toward 0 */ + uint32_t fpc = 0x11; + + /* Dirty registers */ + asm volatile ( + " lctlg 0,15,%0\n" + " sfpc %1\n" + : : "Q" (crs), "d" (fpc)); + GUEST_SYNC(0); +} + +static void test_one_reg(uint64_t id, uint64_t value) +{ + struct kvm_one_reg reg; + uint64_t eval_reg; + + reg.addr = (uintptr_t)&eval_reg; + reg.id = id; + vcpu_get_reg(vm, VCPU_ID, ®); + TEST_ASSERT(eval_reg == value, "value == %s", value); +} + +static void assert_clear(void) +{ + struct kvm_sregs sregs; + struct kvm_regs regs; + struct kvm_fpu fpu; + + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0"); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0"); + + vcpu_fpu_get(vm, VCPU_ID, &fpu); + TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0"); +} + +static void assert_initial(void) +{ + struct kvm_sregs sregs; + struct kvm_fpu fpu; + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0"); + TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000"); + TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12), + "cr1-13 == 0"); + TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0"); + + vcpu_fpu_get(vm, VCPU_ID, &fpu); + TEST_ASSERT(!fpu.fpc, "fpc == 0"); + + test_one_reg(KVM_REG_S390_GBEA, 1); + test_one_reg(KVM_REG_S390_PP, 0); + test_one_reg(KVM_REG_S390_TODPR, 0); + test_one_reg(KVM_REG_S390_CPU_TIMER, 0); + test_one_reg(KVM_REG_S390_CLOCK_COMP, 0); +} + +static void assert_normal(void) +{ + test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); +} + +static void test_normal(void) +{ + printf("Testing normal reset\n"); + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code_initial); + run = vcpu_state(vm, VCPU_ID); + regs = &run->s.regs; + + vcpu_run(vm, VCPU_ID); + + vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0); + assert_normal(); + kvm_vm_free(vm); +} + +static void test_initial(void) +{ + printf("Testing initial reset\n"); + vm = vm_create_default(VCPU_ID, 0, guest_code_initial); + run = vcpu_state(vm, VCPU_ID); + regs = &run->s.regs; + + vcpu_run(vm, VCPU_ID); + + vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0); + assert_normal(); + assert_initial(); + kvm_vm_free(vm); +} + +static void test_clear(void) +{ + printf("Testing clear reset\n"); + vm = vm_create_default(VCPU_ID, 0, guest_code_initial); + run = vcpu_state(vm, VCPU_ID); + regs = &run->s.regs; + + vcpu_run(vm, VCPU_ID); + + vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0); + assert_normal(); + assert_initial(); + assert_clear(); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + + test_initial(); + if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) { + test_normal(); + test_clear(); + } + return 0; +} From b2ff728bae9b04b533fbc8de66f1719c4dc889de Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Fri, 31 Jan 2020 05:02:05 -0500 Subject: [PATCH 448/658] selftests: KVM: testing the local IRQs resets Local IRQs are reset by a normal cpu reset. The initial cpu reset and the clear cpu reset, as superset of the normal reset, both clear the IRQs too. Let's inject an interrupt to a vCPU before calling a reset and see if it is gone after the reset. We choose to inject only an emergency interrupt at this point and can extend the test to other types of IRQs later. Signed-off-by: Pierre Morel Signed-off-by: Janosch Frank [minor fixups] Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20200131100205.74720-7-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger --- tools/testing/selftests/kvm/s390x/resets.c | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index fb8e976943a9..1485bc6c8999 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -14,6 +14,9 @@ #include "kvm_util.h" #define VCPU_ID 3 +#define LOCAL_IRQS 32 + +struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS]; struct kvm_vm *vm; struct kvm_run *run; @@ -53,6 +56,23 @@ static void test_one_reg(uint64_t id, uint64_t value) TEST_ASSERT(eval_reg == value, "value == %s", value); } +static void assert_noirq(void) +{ + struct kvm_s390_irq_state irq_state; + int irqs; + + irq_state.len = sizeof(buf); + irq_state.buf = (unsigned long)buf; + irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state); + /* + * irqs contains the number of retrieved interrupts. Any interrupt + * (notably, the emergency call interrupt we have injected) should + * be cleared by the resets, so this should be 0. + */ + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno); + TEST_ASSERT(!irqs, "IRQ pending"); +} + static void assert_clear(void) { struct kvm_sregs sregs; @@ -94,6 +114,22 @@ static void assert_initial(void) static void assert_normal(void) { test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); + assert_noirq(); +} + +static void inject_irq(int cpu_id) +{ + struct kvm_s390_irq_state irq_state; + struct kvm_s390_irq *irq = &buf[0]; + int irqs; + + /* Inject IRQ */ + irq_state.len = sizeof(struct kvm_s390_irq); + irq_state.buf = (unsigned long)buf; + irq->type = KVM_S390_INT_EMERGENCY; + irq->u.emerg.code = cpu_id; + irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); } static void test_normal(void) @@ -106,6 +142,8 @@ static void test_normal(void) vcpu_run(vm, VCPU_ID); + inject_irq(VCPU_ID); + vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0); assert_normal(); kvm_vm_free(vm); @@ -120,6 +158,8 @@ static void test_initial(void) vcpu_run(vm, VCPU_ID); + inject_irq(VCPU_ID); + vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0); assert_normal(); assert_initial(); @@ -135,6 +175,8 @@ static void test_clear(void) vcpu_run(vm, VCPU_ID); + inject_irq(VCPU_ID); + vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0); assert_normal(); assert_initial(); From cb639a420e2e74dba5432e8ebcfb3559ce925148 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 31 Jan 2020 16:22:14 +0100 Subject: [PATCH 449/658] ALSA: pcm: Fix sparse warnings wrt snd_pcm_state_t Since we have a bitwise definition of snd_pcm_state_t and use it for certain struct fields, a few new (and years old) sparse warnings came up. This patch is an attempt to cover them. - The state fields in snd_pcm_mmap_status* and co are all defined as snd_pcm_state_t type now - The PCM action callbacks take snd_pcm_state_t argument as well; some actions taking special values got the explicit cast and comments - For the PCM action that doesn't need an extra argument receives ACTION_ARG_IGNORE instead of ambiguous 0 While we're at it, the boolean argument is also properly changed to bool and true/false, as well as a slight refactoring of PCM pause helper function to make easier to read. No functional changes, just shutting up chatty sparse. Fixes: 46b770f720bd ("ALSA: uapi: Fix sparse warning") Reported-by: kbuild test robot Link: https://lore.kernel.org/r/20200131152214.11698-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 4 +- sound/core/pcm_compat.c | 8 +- sound/core/pcm_native.c | 187 ++++++++++++++++++++++++++-------------- 3 files changed, 127 insertions(+), 72 deletions(-) diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 5a31525e2df6..f657ff08f317 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1450,7 +1450,7 @@ struct snd_pcm_status64 { #define SNDRV_PCM_IOCTL_STATUS_EXT64 _IOWR('A', 0x24, struct snd_pcm_status64) struct snd_pcm_status32 { - s32 state; /* stream state */ + snd_pcm_state_t state; /* stream state */ s32 trigger_tstamp_sec; /* time when stream was started/stopped/paused */ s32 trigger_tstamp_nsec; s32 tstamp_sec; /* reference timestamp */ @@ -1461,7 +1461,7 @@ struct snd_pcm_status32 { u32 avail; /* number of frames available */ u32 avail_max; /* max frames available on hw since last status */ u32 overrange; /* count of ADC (capture) overrange detections from last status */ - s32 suspended_state; /* suspended stream state */ + snd_pcm_state_t suspended_state; /* suspended stream state */ u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */ s32 audio_tstamp_sec; /* sample counter, wall clock, PHC or on-demand sync'ed */ s32 audio_tstamp_nsec; diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 967c689fb8da..590a46a9e78d 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -156,7 +156,7 @@ static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, #endif /* CONFIG_X86_X32 */ struct compat_snd_pcm_status64 { - s32 state; + snd_pcm_state_t state; u8 rsvd[4]; /* alignment */ s64 trigger_tstamp_sec; s64 trigger_tstamp_nsec; @@ -168,7 +168,7 @@ struct compat_snd_pcm_status64 { u32 avail; u32 avail_max; u32 overrange; - s32 suspended_state; + snd_pcm_state_t suspended_state; u32 audio_tstamp_data; s64 audio_tstamp_sec; s64 audio_tstamp_nsec; @@ -376,13 +376,13 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream, #ifdef CONFIG_X86_X32 /* X32 ABI has 64bit timespec and 64bit alignment */ struct snd_pcm_mmap_status_x32 { - s32 state; + snd_pcm_state_t state; s32 pad1; u32 hw_ptr; u32 pad2; /* alignment */ s64 tstamp_sec; s64 tstamp_nsec; - s32 suspended_state; + snd_pcm_state_t suspended_state; s32 pad3; s64 audio_tstamp_sec; s64 audio_tstamp_nsec; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 4ac42ee1238c..336406bcb59e 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -551,7 +551,8 @@ static int period_to_usecs(struct snd_pcm_runtime *runtime) return usecs; } -static void snd_pcm_set_state(struct snd_pcm_substream *substream, int state) +static void snd_pcm_set_state(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { snd_pcm_stream_lock_irq(substream); if (substream->runtime->status->state != SNDRV_PCM_STATE_DISCONNECTED) @@ -1105,11 +1106,17 @@ static void snd_pcm_trigger_tstamp(struct snd_pcm_substream *substream) runtime->trigger_master = NULL; } +#define ACTION_ARG_IGNORE (__force snd_pcm_state_t)0 + struct action_ops { - int (*pre_action)(struct snd_pcm_substream *substream, int state); - int (*do_action)(struct snd_pcm_substream *substream, int state); - void (*undo_action)(struct snd_pcm_substream *substream, int state); - void (*post_action)(struct snd_pcm_substream *substream, int state); + int (*pre_action)(struct snd_pcm_substream *substream, + snd_pcm_state_t state); + int (*do_action)(struct snd_pcm_substream *substream, + snd_pcm_state_t state); + void (*undo_action)(struct snd_pcm_substream *substream, + snd_pcm_state_t state); + void (*post_action)(struct snd_pcm_substream *substream, + snd_pcm_state_t state); }; /* @@ -1119,7 +1126,8 @@ struct action_ops { */ static int snd_pcm_action_group(const struct action_ops *ops, struct snd_pcm_substream *substream, - int state, int do_lock) + snd_pcm_state_t state, + bool do_lock) { struct snd_pcm_substream *s = NULL; struct snd_pcm_substream *s1; @@ -1176,7 +1184,7 @@ static int snd_pcm_action_group(const struct action_ops *ops, */ static int snd_pcm_action_single(const struct action_ops *ops, struct snd_pcm_substream *substream, - int state) + snd_pcm_state_t state) { int res; @@ -1257,14 +1265,14 @@ snd_pcm_stream_group_ref(struct snd_pcm_substream *substream) */ static int snd_pcm_action(const struct action_ops *ops, struct snd_pcm_substream *substream, - int state) + snd_pcm_state_t state) { struct snd_pcm_group *group; int res; group = snd_pcm_stream_group_ref(substream); if (group) - res = snd_pcm_action_group(ops, substream, state, 1); + res = snd_pcm_action_group(ops, substream, state, true); else res = snd_pcm_action_single(ops, substream, state); snd_pcm_group_unref(group, substream); @@ -1276,7 +1284,7 @@ static int snd_pcm_action(const struct action_ops *ops, */ static int snd_pcm_action_lock_irq(const struct action_ops *ops, struct snd_pcm_substream *substream, - int state) + snd_pcm_state_t state) { int res; @@ -1290,14 +1298,14 @@ static int snd_pcm_action_lock_irq(const struct action_ops *ops, */ static int snd_pcm_action_nonatomic(const struct action_ops *ops, struct snd_pcm_substream *substream, - int state) + snd_pcm_state_t state) { int res; /* Guarantee the group members won't change during non-atomic action */ down_read(&snd_pcm_link_rwsem); if (snd_pcm_stream_linked(substream)) - res = snd_pcm_action_group(ops, substream, state, 0); + res = snd_pcm_action_group(ops, substream, state, false); else res = snd_pcm_action_single(ops, substream, state); up_read(&snd_pcm_link_rwsem); @@ -1307,7 +1315,8 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops, /* * start callbacks */ -static int snd_pcm_pre_start(struct snd_pcm_substream *substream, int state) +static int snd_pcm_pre_start(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->status->state != SNDRV_PCM_STATE_PREPARED) @@ -1320,20 +1329,23 @@ static int snd_pcm_pre_start(struct snd_pcm_substream *substream, int state) return 0; } -static int snd_pcm_do_start(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_start(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { if (substream->runtime->trigger_master != substream) return 0; return substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_START); } -static void snd_pcm_undo_start(struct snd_pcm_substream *substream, int state) +static void snd_pcm_undo_start(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream) substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP); } -static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_start(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); @@ -1377,7 +1389,8 @@ static int snd_pcm_start_lock_irq(struct snd_pcm_substream *substream) /* * stop callbacks */ -static int snd_pcm_pre_stop(struct snd_pcm_substream *substream, int state) +static int snd_pcm_pre_stop(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->status->state == SNDRV_PCM_STATE_OPEN) @@ -1386,7 +1399,8 @@ static int snd_pcm_pre_stop(struct snd_pcm_substream *substream, int state) return 0; } -static int snd_pcm_do_stop(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_stop(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream && snd_pcm_running(substream)) @@ -1394,7 +1408,8 @@ static int snd_pcm_do_stop(struct snd_pcm_substream *substream, int state) return 0; /* unconditonally stop all substreams */ } -static void snd_pcm_post_stop(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_stop(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->status->state != state) { @@ -1465,14 +1480,17 @@ int snd_pcm_stop_xrun(struct snd_pcm_substream *substream) EXPORT_SYMBOL_GPL(snd_pcm_stop_xrun); /* - * pause callbacks + * pause callbacks: pass boolean (to start pause or resume) as state argument */ -static int snd_pcm_pre_pause(struct snd_pcm_substream *substream, int push) +#define pause_pushed(state) (__force bool)(state) + +static int snd_pcm_pre_pause(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (!(runtime->info & SNDRV_PCM_INFO_PAUSE)) return -ENOSYS; - if (push) { + if (pause_pushed(state)) { if (runtime->status->state != SNDRV_PCM_STATE_RUNNING) return -EBADFD; } else if (runtime->status->state != SNDRV_PCM_STATE_PAUSED) @@ -1481,13 +1499,14 @@ static int snd_pcm_pre_pause(struct snd_pcm_substream *substream, int push) return 0; } -static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) +static int snd_pcm_do_pause(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { if (substream->runtime->trigger_master != substream) return 0; /* some drivers might use hw_ptr to recover from the pause - update the hw_ptr now */ - if (push) + if (pause_pushed(state)) snd_pcm_update_hw_ptr(substream); /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta between the current jiffies, this gives a large enough @@ -1495,23 +1514,27 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) */ substream->runtime->hw_ptr_jiffies = jiffies - HZ * 1000; return substream->ops->trigger(substream, - push ? SNDRV_PCM_TRIGGER_PAUSE_PUSH : - SNDRV_PCM_TRIGGER_PAUSE_RELEASE); + pause_pushed(state) ? + SNDRV_PCM_TRIGGER_PAUSE_PUSH : + SNDRV_PCM_TRIGGER_PAUSE_RELEASE); } -static void snd_pcm_undo_pause(struct snd_pcm_substream *substream, int push) +static void snd_pcm_undo_pause(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream) substream->ops->trigger(substream, - push ? SNDRV_PCM_TRIGGER_PAUSE_RELEASE : + pause_pushed(state) ? + SNDRV_PCM_TRIGGER_PAUSE_RELEASE : SNDRV_PCM_TRIGGER_PAUSE_PUSH); } -static void snd_pcm_post_pause(struct snd_pcm_substream *substream, int push) +static void snd_pcm_post_pause(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); - if (push) { + if (pause_pushed(state)) { runtime->status->state = SNDRV_PCM_STATE_PAUSED; snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MPAUSE); wake_up(&runtime->sleep); @@ -1532,15 +1555,24 @@ static const struct action_ops snd_pcm_action_pause = { /* * Push/release the pause for all linked streams. */ -static int snd_pcm_pause(struct snd_pcm_substream *substream, int push) +static int snd_pcm_pause(struct snd_pcm_substream *substream, bool push) { - return snd_pcm_action(&snd_pcm_action_pause, substream, push); + return snd_pcm_action(&snd_pcm_action_pause, substream, + (__force snd_pcm_state_t)push); +} + +static int snd_pcm_pause_lock_irq(struct snd_pcm_substream *substream, + bool push) +{ + return snd_pcm_action_lock_irq(&snd_pcm_action_pause, substream, + (__force snd_pcm_state_t)push); } #ifdef CONFIG_PM -/* suspend */ +/* suspend callback: state argument ignored */ -static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state) +static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; switch (runtime->status->state) { @@ -1556,7 +1588,8 @@ static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state) return 0; } -static int snd_pcm_do_suspend(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_suspend(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->trigger_master != substream) @@ -1567,7 +1600,8 @@ static int snd_pcm_do_suspend(struct snd_pcm_substream *substream, int state) return 0; /* suspend unconditionally */ } -static void snd_pcm_post_suspend(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_suspend(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); @@ -1598,7 +1632,8 @@ static int snd_pcm_suspend(struct snd_pcm_substream *substream) unsigned long flags; snd_pcm_stream_lock_irqsave(substream, flags); - err = snd_pcm_action(&snd_pcm_action_suspend, substream, 0); + err = snd_pcm_action(&snd_pcm_action_suspend, substream, + ACTION_ARG_IGNORE); snd_pcm_stream_unlock_irqrestore(substream, flags); return err; } @@ -1642,9 +1677,10 @@ int snd_pcm_suspend_all(struct snd_pcm *pcm) } EXPORT_SYMBOL(snd_pcm_suspend_all); -/* resume */ +/* resume callbacks: state argument ignored */ -static int snd_pcm_pre_resume(struct snd_pcm_substream *substream, int state) +static int snd_pcm_pre_resume(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (!(runtime->info & SNDRV_PCM_INFO_RESUME)) @@ -1653,7 +1689,8 @@ static int snd_pcm_pre_resume(struct snd_pcm_substream *substream, int state) return 0; } -static int snd_pcm_do_resume(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_resume(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->trigger_master != substream) @@ -1666,14 +1703,16 @@ static int snd_pcm_do_resume(struct snd_pcm_substream *substream, int state) return substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_RESUME); } -static void snd_pcm_undo_resume(struct snd_pcm_substream *substream, int state) +static void snd_pcm_undo_resume(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream && snd_pcm_running(substream)) substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_SUSPEND); } -static void snd_pcm_post_resume(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_resume(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); @@ -1691,7 +1730,8 @@ static const struct action_ops snd_pcm_action_resume = { static int snd_pcm_resume(struct snd_pcm_substream *substream) { - return snd_pcm_action_lock_irq(&snd_pcm_action_resume, substream, 0); + return snd_pcm_action_lock_irq(&snd_pcm_action_resume, substream, + ACTION_ARG_IGNORE); } #else @@ -1732,7 +1772,9 @@ static int snd_pcm_xrun(struct snd_pcm_substream *substream) /* * reset ioctl */ -static int snd_pcm_pre_reset(struct snd_pcm_substream *substream, int state) +/* reset callbacks: state argument ignored */ +static int snd_pcm_pre_reset(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; switch (runtime->status->state) { @@ -1746,7 +1788,8 @@ static int snd_pcm_pre_reset(struct snd_pcm_substream *substream, int state) } } -static int snd_pcm_do_reset(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_reset(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL); @@ -1760,7 +1803,8 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream, int state) return 0; } -static void snd_pcm_post_reset(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_reset(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; runtime->control->appl_ptr = runtime->status->hw_ptr; @@ -1777,17 +1821,20 @@ static const struct action_ops snd_pcm_action_reset = { static int snd_pcm_reset(struct snd_pcm_substream *substream) { - return snd_pcm_action_nonatomic(&snd_pcm_action_reset, substream, 0); + return snd_pcm_action_nonatomic(&snd_pcm_action_reset, substream, + ACTION_ARG_IGNORE); } /* * prepare ioctl */ -/* we use the second argument for updating f_flags */ +/* pass f_flags as state argument */ static int snd_pcm_pre_prepare(struct snd_pcm_substream *substream, - int f_flags) + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; + int f_flags = (__force int)state; + if (runtime->status->state == SNDRV_PCM_STATE_OPEN || runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; @@ -1797,17 +1844,19 @@ static int snd_pcm_pre_prepare(struct snd_pcm_substream *substream, return 0; } -static int snd_pcm_do_prepare(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_prepare(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { int err; snd_pcm_sync_stop(substream); err = substream->ops->prepare(substream); if (err < 0) return err; - return snd_pcm_do_reset(substream, 0); + return snd_pcm_do_reset(substream, state); } -static void snd_pcm_post_prepare(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_prepare(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; runtime->control->appl_ptr = runtime->status->hw_ptr; @@ -1840,7 +1889,7 @@ static int snd_pcm_prepare(struct snd_pcm_substream *substream, snd_pcm_stream_lock_irq(substream); switch (substream->runtime->status->state) { case SNDRV_PCM_STATE_PAUSED: - snd_pcm_pause(substream, 0); + snd_pcm_pause(substream, false); /* fallthru */ case SNDRV_PCM_STATE_SUSPENDED: snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP); @@ -1849,14 +1898,17 @@ static int snd_pcm_prepare(struct snd_pcm_substream *substream, snd_pcm_stream_unlock_irq(substream); return snd_pcm_action_nonatomic(&snd_pcm_action_prepare, - substream, f_flags); + substream, + (__force snd_pcm_state_t)f_flags); } /* * drain ioctl */ -static int snd_pcm_pre_drain_init(struct snd_pcm_substream *substream, int state) +/* drain init callbacks: state argument ignored */ +static int snd_pcm_pre_drain_init(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; switch (runtime->status->state) { @@ -1869,7 +1921,8 @@ static int snd_pcm_pre_drain_init(struct snd_pcm_substream *substream, int state return 0; } -static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream, int state) +static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { @@ -1895,7 +1948,9 @@ static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream, int state) } else { /* stop running stream */ if (runtime->status->state == SNDRV_PCM_STATE_RUNNING) { - int new_state = snd_pcm_capture_avail(runtime) > 0 ? + snd_pcm_state_t new_state; + + new_state = snd_pcm_capture_avail(runtime) > 0 ? SNDRV_PCM_STATE_DRAINING : SNDRV_PCM_STATE_SETUP; snd_pcm_do_stop(substream, new_state); snd_pcm_post_stop(substream, new_state); @@ -1911,7 +1966,8 @@ static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream, int state) return 0; } -static void snd_pcm_post_drain_init(struct snd_pcm_substream *substream, int state) +static void snd_pcm_post_drain_init(struct snd_pcm_substream *substream, + snd_pcm_state_t state) { } @@ -1954,10 +2010,11 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, snd_pcm_stream_lock_irq(substream); /* resume pause */ if (runtime->status->state == SNDRV_PCM_STATE_PAUSED) - snd_pcm_pause(substream, 0); + snd_pcm_pause(substream, false); /* pre-start/stop - all running streams are changed to DRAINING state */ - result = snd_pcm_action(&snd_pcm_action_drain_init, substream, 0); + result = snd_pcm_action(&snd_pcm_action_drain_init, substream, + ACTION_ARG_IGNORE); if (result < 0) goto unlock; /* in non-blocking, we don't wait in ioctl but let caller poll */ @@ -2058,7 +2115,7 @@ static int snd_pcm_drop(struct snd_pcm_substream *substream) snd_pcm_stream_lock_irq(substream); /* resume pause */ if (runtime->status->state == SNDRV_PCM_STATE_PAUSED) - snd_pcm_pause(substream, 0); + snd_pcm_pause(substream, false); snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP); /* runtime->control->appl_ptr = runtime->status->hw_ptr; */ @@ -2900,12 +2957,12 @@ static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream, } struct snd_pcm_mmap_status32 { - s32 state; + snd_pcm_state_t state; s32 pad1; u32 hw_ptr; s32 tstamp_sec; s32 tstamp_nsec; - s32 suspended_state; + snd_pcm_state_t suspended_state; s32 audio_tstamp_sec; s32 audio_tstamp_nsec; } __attribute__((packed)); @@ -3183,9 +3240,7 @@ static int snd_pcm_common_ioctl(struct file *file, case SNDRV_PCM_IOCTL_DROP: return snd_pcm_drop(substream); case SNDRV_PCM_IOCTL_PAUSE: - return snd_pcm_action_lock_irq(&snd_pcm_action_pause, - substream, - (int)(unsigned long)arg); + return snd_pcm_pause_lock_irq(substream, (unsigned long)arg); case SNDRV_PCM_IOCTL_WRITEI_FRAMES: case SNDRV_PCM_IOCTL_READI_FRAMES: return snd_pcm_xferi_frames_ioctl(substream, arg); From fdeb1aca2861472b38779be44141757483300827 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 12:59:26 -0600 Subject: [PATCH 450/658] tracing: Change trace_boot to use synth_event interface Have trace_boot_add_synth_event() use the synth_event interface. Also, rename synth_event_run_cmd() to synth_event_run_command() now that trace_boot's version is gone. Link: http://lkml.kernel.org/r/94f1fa0e31846d0bddca916b8663404b20559e34.1580323897.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 31 ++++++++++++------------------- kernel/trace/trace_events_hist.c | 9 ++------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 2298a70cdda6..06d7feb5255f 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -125,38 +125,31 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) #endif #ifdef CONFIG_HIST_TRIGGERS -extern int synth_event_run_command(const char *command); - static int __init trace_boot_add_synth_event(struct xbc_node *node, const char *event) { + struct dynevent_cmd cmd; struct xbc_node *anode; - char buf[MAX_BUF_LEN], *q; + char buf[MAX_BUF_LEN]; const char *p; - int len, delta, ret; + int ret; - len = ARRAY_SIZE(buf); - delta = snprintf(buf, len, "%s", event); - if (delta >= len) { - pr_err("Event name is too long: %s\n", event); - return -E2BIG; - } - len -= delta; q = buf + delta; + synth_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + + ret = synth_event_gen_cmd_start(&cmd, event, NULL); + if (ret) + return ret; xbc_node_for_each_array_value(node, "fields", anode, p) { - delta = snprintf(q, len, " %s;", p); - if (delta >= len) { - pr_err("fields string is too long: %s\n", p); - return -E2BIG; - } - len -= delta; q += delta; + ret = synth_event_add_field_str(&cmd, p); + if (ret) + return ret; } - ret = synth_event_run_command(buf); + ret = synth_event_gen_cmd_end(&cmd); if (ret < 0) pr_err("Failed to add synthetic event: %s\n", buf); - return ret; } #else diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 4d56a4f0310d..2e88c9805f4b 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1755,12 +1755,7 @@ static int create_or_delete_synth_event(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } -int synth_event_run_command(const char *command) -{ - return trace_run_command(command, create_or_delete_synth_event); -} - -static int synth_event_run_cmd(struct dynevent_cmd *cmd) +static int synth_event_run_command(struct dynevent_cmd *cmd) { struct synth_event *se; int ret; @@ -1790,7 +1785,7 @@ static int synth_event_run_cmd(struct dynevent_cmd *cmd) void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) { dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_SYNTH, - synth_event_run_cmd); + synth_event_run_command); } EXPORT_SYMBOL_GPL(synth_event_cmd_init); From d380dcde9a07ca5de4805dee11f58a98ec0ad6ff Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 29 Jan 2020 21:18:18 -0500 Subject: [PATCH 451/658] tracing: Fix now invalid var_ref_vals assumption in trace action The patch 'tracing: Fix histogram code when expression has same var as value' added code to return an existing variable reference when creating a new variable reference, which resulted in var_ref_vals slots being reused instead of being duplicated. The implementation of the trace action assumes that the end of the var_ref_vals array starting at action_data.var_ref_idx corresponds to the values that will be assigned to the trace params. The patch mentioned above invalidates that assumption, which means that each param needs to explicitly specify its index into var_ref_vals. This fix changes action_data.var_ref_idx to an array of var ref indexes to account for that. Link: https://lore.kernel.org/r/1580335695.6220.8.camel@kernel.org Fixes: 8bcebc77e85f ("tracing: Fix histogram code when expression has same var as value") Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 53 +++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 2e88c9805f4b..5b4e04780411 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -476,11 +476,12 @@ struct action_data { * When a histogram trigger is hit, the values of any * references to variables, including variables being passed * as parameters to synthetic events, are collected into a - * var_ref_vals array. This var_ref_idx is the index of the - * first param in the array to be passed to the synthetic - * event invocation. + * var_ref_vals array. This var_ref_idx array is an array of + * indices into the var_ref_vals array, one for each synthetic + * event param, and is passed to the synthetic event + * invocation. */ - unsigned int var_ref_idx; + unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -884,14 +885,14 @@ static struct trace_event_functions synth_event_funcs = { static notrace void trace_event_raw_event_synth(void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; struct trace_buffer *buffer; struct synth_event *event; - unsigned int i, n_u64; + unsigned int i, n_u64, val_idx; int fields_size = 0; event = trace_file->event_call->data; @@ -914,15 +915,16 @@ static notrace void trace_event_raw_event_synth(void *__data, goto out; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + val_idx = var_ref_idx[i]; if (event->fields[i]->is_string) { - char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; + char *str_val = (char *)(long)var_ref_vals[val_idx]; char *str_field = (char *)&entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { struct synth_field *field = event->fields[i]; - u64 val = var_ref_vals[var_ref_idx + i]; + u64 val = var_ref_vals[val_idx]; switch (field->size) { case 1: @@ -1122,10 +1124,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name) } typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx); + unsigned int *var_ref_idx); static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct tracepoint *tp = event->tp; @@ -3506,6 +3508,22 @@ static int init_var_ref(struct hist_field *ref_field, goto out; } +static int find_var_ref_idx(struct hist_trigger_data *hist_data, + struct hist_field *var_field) +{ + struct hist_field *ref_field; + int i; + + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) + return i; + } + + return -ENOENT; +} + /** * create_var_ref - Create a variable reference and attach it to trigger * @hist_data: The trigger that will be referencing the variable @@ -5071,11 +5089,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data, struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; - unsigned int i, var_ref_idx; + unsigned int i; unsigned int field_pos = 0; struct synth_event *event; char *synth_event_name; - int ret = 0; + int var_ref_idx, ret = 0; lockdep_assert_held(&event_mutex); @@ -5092,8 +5110,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event->ref++; - var_ref_idx = hist_data->n_var_refs; - for (i = 0; i < data->n_params; i++) { char *p; @@ -5142,6 +5158,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, goto err; } + var_ref_idx = find_var_ref_idx(hist_data, var_ref); + if (WARN_ON(var_ref_idx < 0)) { + ret = var_ref_idx; + goto err; + } + + data->var_ref_idx[i] = var_ref_idx; + field_pos++; kfree(p); continue; @@ -5160,7 +5184,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, } data->synth_event = event; - data->var_ref_idx = var_ref_idx; out: return ret; err: From 7da04e48798461f6598d2c0ef8c1bac3e5ea1dee Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 15 Dec 2019 02:55:58 -0800 Subject: [PATCH 452/658] xtensa: drop set_except_vector declaration There's no implementation for set_except_vector function in the xtensa code. Drop its declaration. Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/setup.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/xtensa/include/uapi/asm/setup.h b/arch/xtensa/include/uapi/asm/setup.h index 57e6c210e84f..5356a5fd4d17 100644 --- a/arch/xtensa/include/uapi/asm/setup.h +++ b/arch/xtensa/include/uapi/asm/setup.h @@ -14,6 +14,4 @@ #define COMMAND_LINE_SIZE 256 -extern void set_except_vector(int n, void *addr); - #endif From e725331354924b3f720c8bb7ca579d44cc63e574 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 15 Dec 2019 02:44:59 -0800 Subject: [PATCH 453/658] xtensa: clean up platform headers Drop include directives for irrelevant headers in asm/platform.h and its users. Sort remaining headers. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/platform.h | 2 -- arch/xtensa/kernel/platform.c | 4 +--- arch/xtensa/platforms/iss/setup.c | 16 +++------------- arch/xtensa/platforms/xtfpga/setup.c | 1 + 4 files changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h index f2c48522c5a1..354ca942de40 100644 --- a/arch/xtensa/include/asm/platform.h +++ b/arch/xtensa/include/asm/platform.h @@ -12,8 +12,6 @@ #define _XTENSA_PLATFORM_H #include -#include - #include /* diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index a95ba05b0abe..25fd2cf1fe08 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -12,12 +12,10 @@ * Chris Zankel */ +#include #include -#include -#include #include #include -#include /* HZ */ #define _F(r,f,a,b) \ r __platform_##f a b; \ diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index e28dd53d7df5..554758823ff5 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -11,23 +11,13 @@ * Copyright 2001 - 2005 Tensilica Inc. * Copyright 2017 Cadence Design Systems Inc. */ -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include +#include #include -#include #include #include diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 829115bb381f..b7e4580d3636 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include From 64716b9ec5ba3342d3a9f0464d2abb48de65b327 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 15 Dec 2019 02:48:51 -0800 Subject: [PATCH 454/658] xtensa: drop empty platform_* functions from platforms Provide missing default implementation for platform_init and drop copies of default platform_init, platform_setup and platform_heartbeet from platforms/*/setup.c Signed-off-by: Max Filippov --- arch/xtensa/kernel/platform.c | 1 + arch/xtensa/platforms/iss/setup.c | 9 +-------- arch/xtensa/platforms/xtfpga/setup.c | 16 ---------------- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index 25fd2cf1fe08..ac1e0e566995 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -26,6 +26,7 @@ * (Please, refer to include/asm-xtensa/platform.h for more information) */ +_F(void, init, (bp_tag_t *first), { }); _F(void, setup, (char** cmd), { }); _F(void, restart, (void), { while(1); }); _F(void, halt, (void), { while(1); }); diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index 554758823ff5..ed519aee0ec8 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -23,10 +23,6 @@ #include -void __init platform_init(bp_tag_t* bootparam) -{ -} - void platform_halt(void) { pr_info(" ** Called platform_halt() **\n"); @@ -38,6 +34,7 @@ void platform_power_off(void) pr_info(" ** Called platform_power_off() **\n"); simc_exit(0); } + void platform_restart(void) { /* Flush and reset the mmu, simulate a processor reset, and @@ -46,10 +43,6 @@ void platform_restart(void) /* control never gets here */ } -void platform_heartbeat(void) -{ -} - static int iss_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index b7e4580d3636..4f7d6142d41f 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -57,22 +57,6 @@ void platform_restart(void) /* control never gets here */ } -void __init platform_setup(char **cmdline) -{ -} - -/* early initialization */ - -void __init platform_init(bp_tag_t *first) -{ -} - -/* Heartbeat. */ - -void platform_heartbeat(void) -{ -} - #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT void __init platform_calibrate_ccount(void) From a5127a2dbe95dd72b6a21c98dee0857511f30357 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 31 Jan 2020 17:59:08 +0100 Subject: [PATCH 455/658] drm/tegra: sor: Suspend on clock registration failure Make sure the SOR module is suspenden after we fail to register the SOR pad output clock. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index f884185c5e9f..30c96b15d7a3 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3921,15 +3921,16 @@ static int tegra_sor_probe(struct platform_device *pdev) if (!sor->clk_pad) { char *name; - err = host1x_client_resume(&sor->client); - if (err < 0) { - dev_err(sor->dev, "failed to resume: %d\n", err); + name = devm_kasprintf(sor->dev, GFP_KERNEL, "sor%u_pad_clkout", + sor->index); + if (!name) { + err = -ENOMEM; goto remove; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "sor%u_pad_clkout", sor->index); - if (!name) { - err = -ENOMEM; + err = host1x_client_resume(&sor->client); + if (err < 0) { + dev_err(sor->dev, "failed to resume: %d\n", err); goto remove; } From ad2139cb80a7a9afbfe428d0448d351a84e22ee6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 31 Jan 2020 17:59:09 +0100 Subject: [PATCH 456/658] drm/tegra: sor: Disable runtime PM on probe failure If the driver fails to probe, make sure to disable runtime PM again. While at it, make the cleanup code in ->remove() symmetric. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 30c96b15d7a3..e79dd65c1b54 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3925,13 +3925,13 @@ static int tegra_sor_probe(struct platform_device *pdev) sor->index); if (!name) { err = -ENOMEM; - goto remove; + goto rpm_disable; } err = host1x_client_resume(&sor->client); if (err < 0) { dev_err(sor->dev, "failed to resume: %d\n", err); - goto remove; + goto rpm_disable; } sor->clk_pad = tegra_clk_sor_pad_register(sor, name); @@ -3942,7 +3942,7 @@ static int tegra_sor_probe(struct platform_device *pdev) err = PTR_ERR(sor->clk_pad); dev_err(&pdev->dev, "failed to register SOR pad clock: %d\n", err); - goto remove; + goto rpm_disable; } INIT_LIST_HEAD(&sor->client.list); @@ -3953,11 +3953,13 @@ static int tegra_sor_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", err); - goto remove; + goto rpm_disable; } return 0; +rpm_disable: + pm_runtime_disable(&pdev->dev); remove: if (sor->ops && sor->ops->remove) sor->ops->remove(sor); @@ -3971,8 +3973,6 @@ static int tegra_sor_remove(struct platform_device *pdev) struct tegra_sor *sor = platform_get_drvdata(pdev); int err; - pm_runtime_disable(&pdev->dev); - err = host1x_client_unregister(&sor->client); if (err < 0) { dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", @@ -3980,6 +3980,8 @@ static int tegra_sor_remove(struct platform_device *pdev) return err; } + pm_runtime_disable(&pdev->dev); + if (sor->ops && sor->ops->remove) { err = sor->ops->remove(sor); if (err < 0) From c472a0b0a1fd1688157b4ad6efc1c3fb8e571a53 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 31 Jan 2020 17:59:10 +0100 Subject: [PATCH 457/658] drm/tegra: sor: Initialize runtime PM before use Commit fd67e9c6ed5a ("drm/tegra: Do not implement runtime PM") replaced the generic runtime PM usage by a host1x bus-specific implementation in order to work around some assumptions baked into runtime PM that are in conflict with the requirements in the Tegra DRM driver. Unfortunately the new runtime PM callbacks are not setup yet at the time when the SOR driver first needs to resume the device to register the SOR pad clock, and accesses to register will cause the system to hang. Note that this only happens on Tegra124 and Tegra210 because those are the only SoCs where the SOR pad clock is registered from the SOR driver. Later generations use a SOR pad clock provided by the BPMP. Fix this by moving the registration of the SOR pad clock after the host1x client has been registered. That's somewhat suboptimal because this could potentially, though it's very unlikely, cause the Tegra DRM to be probed if the SOR happens to be the last subdevice to register, only to be immediately removed again if the SOR pad output clock fails to register. That's just a minor annoyance, though, and doesn't justify implementing a workaround. Fixes: fd67e9c6ed5a ("drm/tegra: Do not implement runtime PM") Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 64 +++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index e79dd65c1b54..a9222841862e 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3914,37 +3914,6 @@ static int tegra_sor_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sor); pm_runtime_enable(&pdev->dev); - /* - * On Tegra210 and earlier, provide our own implementation for the - * pad output clock. - */ - if (!sor->clk_pad) { - char *name; - - name = devm_kasprintf(sor->dev, GFP_KERNEL, "sor%u_pad_clkout", - sor->index); - if (!name) { - err = -ENOMEM; - goto rpm_disable; - } - - err = host1x_client_resume(&sor->client); - if (err < 0) { - dev_err(sor->dev, "failed to resume: %d\n", err); - goto rpm_disable; - } - - sor->clk_pad = tegra_clk_sor_pad_register(sor, name); - host1x_client_suspend(&sor->client); - } - - if (IS_ERR(sor->clk_pad)) { - err = PTR_ERR(sor->clk_pad); - dev_err(&pdev->dev, "failed to register SOR pad clock: %d\n", - err); - goto rpm_disable; - } - INIT_LIST_HEAD(&sor->client.list); sor->client.ops = &sor_client_ops; sor->client.dev = &pdev->dev; @@ -3956,8 +3925,41 @@ static int tegra_sor_probe(struct platform_device *pdev) goto rpm_disable; } + /* + * On Tegra210 and earlier, provide our own implementation for the + * pad output clock. + */ + if (!sor->clk_pad) { + char *name; + + name = devm_kasprintf(sor->dev, GFP_KERNEL, "sor%u_pad_clkout", + sor->index); + if (!name) { + err = -ENOMEM; + goto unregister; + } + + err = host1x_client_resume(&sor->client); + if (err < 0) { + dev_err(sor->dev, "failed to resume: %d\n", err); + goto unregister; + } + + sor->clk_pad = tegra_clk_sor_pad_register(sor, name); + host1x_client_suspend(&sor->client); + } + + if (IS_ERR(sor->clk_pad)) { + err = PTR_ERR(sor->clk_pad); + dev_err(sor->dev, "failed to register SOR pad clock: %d\n", + err); + goto unregister; + } + return 0; +unregister: + host1x_client_unregister(&sor->client); rpm_disable: pm_runtime_disable(&pdev->dev); remove: From 249d7b2ef674cdae28c377cfe6f56696548305d5 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 31 Jan 2020 15:55:31 -0600 Subject: [PATCH 458/658] tracing: Consolidate some synth_event_trace code The synth_event trace code contains some almost identical functions and some small functions that are called only once - consolidate the common code into single functions and fold in the small functions to simplify the code overall. Link: http://lkml.kernel.org/r/d1c8d8ad124a653b7543afe801d38c199ca5c20e.1580506712.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 141 +++++++++++++------------------ 1 file changed, 57 insertions(+), 84 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5b4e04780411..42058a1b5146 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2053,24 +2053,72 @@ out: } EXPORT_SYMBOL_GPL(synth_event_trace_start); -static int save_synth_val(struct synth_field *field, u64 val, - struct synth_event_trace_state *trace_state) +static int __synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state) { - struct synth_trace_event *entry = trace_state->entry; + struct synth_field *field = NULL; + struct synth_trace_event *entry; + struct synth_event *event; + int i, ret = 0; + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + /* can't mix add_next_synth_val() with add_synth_val() */ + if (field_name) { + if (trace_state->add_next) { + ret = -EINVAL; + goto out; + } + trace_state->add_name = true; + } else { + if (trace_state->add_name) { + ret = -EINVAL; + goto out; + } + trace_state->add_next = true; + } + + if (!trace_state->enabled) + goto out; + + event = trace_state->event; + if (trace_state->add_name) { + for (i = 0; i < event->n_fields; i++) { + field = event->fields[i]; + if (strcmp(field->name, field_name) == 0) + break; + } + if (!field) { + ret = -EINVAL; + goto out; + } + } else { + if (trace_state->cur_field >= event->n_fields) { + ret = -EINVAL; + goto out; + } + field = event->fields[trace_state->cur_field++]; + } + + entry = trace_state->entry; if (field->is_string) { char *str_val = (char *)(long)val; char *str_field; - if (!str_val) - return -EINVAL; + if (!str_val) { + ret = -EINVAL; + goto out; + } str_field = (char *)&entry->fields[field->offset]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); } else entry->fields[field->offset] = val; - - return 0; + out: + return ret; } /** @@ -2104,54 +2152,10 @@ static int save_synth_val(struct synth_field *field, u64 val, int synth_event_add_next_val(u64 val, struct synth_event_trace_state *trace_state) { - struct synth_field *field; - struct synth_event *event; - int ret = 0; - - if (!trace_state) { - ret = -EINVAL; - goto out; - } - - /* can't mix add_next_synth_val() with add_synth_val() */ - if (trace_state->add_name) { - ret = -EINVAL; - goto out; - } - trace_state->add_next = true; - - if (!trace_state->enabled) - goto out; - - event = trace_state->event; - - if (trace_state->cur_field >= event->n_fields) { - ret = -EINVAL; - goto out; - } - - field = event->fields[trace_state->cur_field++]; - ret = save_synth_val(field, val, trace_state); - out: - return ret; + return __synth_event_add_val(NULL, val, trace_state); } EXPORT_SYMBOL_GPL(synth_event_add_next_val); -static struct synth_field *find_synth_field(struct synth_event *event, - const char *field_name) -{ - struct synth_field *field = NULL; - unsigned int i; - - for (i = 0; i < event->n_fields; i++) { - field = event->fields[i]; - if (strcmp(field->name, field_name) == 0) - return field; - } - - return NULL; -} - /** * synth_event_add_val - Add a named field's value to an open synth trace * @field_name: The name of the synthetic event field value to set @@ -2183,38 +2187,7 @@ static struct synth_field *find_synth_field(struct synth_event *event, int synth_event_add_val(const char *field_name, u64 val, struct synth_event_trace_state *trace_state) { - struct synth_trace_event *entry; - struct synth_event *event; - struct synth_field *field; - int ret = 0; - - if (!trace_state) { - ret = -EINVAL; - goto out; - } - - /* can't mix add_next_synth_val() with add_synth_val() */ - if (trace_state->add_next) { - ret = -EINVAL; - goto out; - } - trace_state->add_name = true; - - if (!trace_state->enabled) - goto out; - - event = trace_state->event; - entry = trace_state->entry; - - field = find_synth_field(event, field_name); - if (!field) { - ret = -EINVAL; - goto out; - } - - ret = save_synth_val(field, val, trace_state); - out: - return ret; + return __synth_event_add_val(field_name, val, trace_state); } EXPORT_SYMBOL_GPL(synth_event_add_val); From 78be2228c15dd45865b102b29d72e721f0ace9b1 Mon Sep 17 00:00:00 2001 From: Yong Zhi Date: Fri, 31 Jan 2020 14:40:03 -0600 Subject: [PATCH 459/658] ALSA: hda: Add JasperLake PCI ID and codec vid Add HD Audio Device PCI ID and codec vendor_id for the Intel JasperLake REV2/A0 silicon. Signed-off-by: Yong Zhi Signed-off-by: Pierre-Louis Bossart Cc: Link: https://lore.kernel.org/r/20200131204003.10153-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ sound/pci/hda/patch_hdmi.c | 1 + 2 files changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b5e8d4301883..92a042e34d3e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2451,6 +2451,8 @@ static const struct pci_device_id azx_ids[] = { /* Jasperlake */ { PCI_DEVICE(0x8086, 0x38c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0x4dc8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Tigerlake */ { PCI_DEVICE(0x8086, 0xa0c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 48bddc218829..7c006f9858c0 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4256,6 +4256,7 @@ HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), From f8e5f90b3a53bb75f05124ed19156388379a337d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Feb 2020 09:05:28 +0100 Subject: [PATCH 460/658] ALSA: usb-audio: Fix endianess in descriptor validation I overlooked that some fields are words and need the converts from LE in the recently added USB descriptor validation code. This patch fixes those with the proper macro usages. Fixes: 57f8770620e9 ("ALSA: usb-audio: More validations of descriptor units") Cc: Link: https://lore.kernel.org/r/20200201080530.22390-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 4034c2072415..6fe206f6e911 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -110,7 +110,7 @@ static bool validate_processing_unit(const void *p, default: if (v->type == UAC1_EXTENSION_UNIT) return true; /* OK */ - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC_PROCESS_UP_DOWNMIX: case UAC_PROCESS_DOLBY_PROLOGIC: if (d->bLength < len + 1) /* bNrModes */ @@ -125,7 +125,7 @@ static bool validate_processing_unit(const void *p, case UAC_VERSION_2: if (v->type == UAC2_EXTENSION_UNIT_V2) return true; /* OK */ - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC2_PROCESS_UP_DOWNMIX: case UAC2_PROCESS_DOLBY_PROLOCIC: /* SiC! */ if (d->bLength < len + 1) /* bNrModes */ @@ -142,7 +142,7 @@ static bool validate_processing_unit(const void *p, len += 2; /* wClusterDescrID */ break; } - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC3_PROCESS_UP_DOWNMIX: if (d->bLength < len + 1) /* bNrModes */ return false; From d8f489355cff55b30731354317739a00cf1238bd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Feb 2020 09:05:29 +0100 Subject: [PATCH 461/658] ALSA: usb-audio: Annotate endianess in Scarlett gen2 quirk The Scarlett gen2 mixer quirk code defines a few record types to communicate via USB hub, and those must be all little-endian. This patch changes the field types to LE to annotate endianess properly. It also fixes the incorrect usage of leXX_to_cpu() in a couple of places, which was caught by sparse after this change. Fixes: 9e4d5c1be21f ("ALSA: usb-audio: Scarlett Gen 2 mixer interface") Cc: Link: https://lore.kernel.org/r/20200201080530.22390-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 46 ++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 94b903d95afa..74c00c905d24 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -558,11 +558,11 @@ static const struct scarlett2_config /* proprietary request/response format */ struct scarlett2_usb_packet { - u32 cmd; - u16 size; - u16 seq; - u32 error; - u32 pad; + __le32 cmd; + __le16 size; + __le16 seq; + __le32 error; + __le32 pad; u8 data[]; }; @@ -664,11 +664,11 @@ static int scarlett2_usb( "Scarlett Gen 2 USB invalid response; " "cmd tx/rx %d/%d seq %d/%d size %d/%d " "error %d pad %d\n", - le16_to_cpu(req->cmd), le16_to_cpu(resp->cmd), + le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd), le16_to_cpu(req->seq), le16_to_cpu(resp->seq), resp_size, le16_to_cpu(resp->size), - le16_to_cpu(resp->error), - le16_to_cpu(resp->pad)); + le32_to_cpu(resp->error), + le32_to_cpu(resp->pad)); err = -EINVAL; goto unlock; } @@ -687,7 +687,7 @@ error: /* Send SCARLETT2_USB_DATA_CMD SCARLETT2_USB_CONFIG_SAVE */ static void scarlett2_config_save(struct usb_mixer_interface *mixer) { - u32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); + __le32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); scarlett2_usb(mixer, SCARLETT2_USB_DATA_CMD, &req, sizeof(u32), @@ -713,11 +713,11 @@ static int scarlett2_usb_set_config( const struct scarlett2_config config_item = scarlett2_config_items[config_item_num]; struct { - u32 offset; - u32 bytes; - s32 value; + __le32 offset; + __le32 bytes; + __le32 value; } __packed req; - u32 req2; + __le32 req2; int err; struct scarlett2_mixer_data *private = mixer->private_data; @@ -753,8 +753,8 @@ static int scarlett2_usb_get( int offset, void *buf, int size) { struct { - u32 offset; - u32 size; + __le32 offset; + __le32 size; } __packed req; req.offset = cpu_to_le32(offset); @@ -794,8 +794,8 @@ static int scarlett2_usb_set_mix(struct usb_mixer_interface *mixer, const struct scarlett2_device_info *info = private->info; struct { - u16 mix_num; - u16 data[SCARLETT2_INPUT_MIX_MAX]; + __le16 mix_num; + __le16 data[SCARLETT2_INPUT_MIX_MAX]; } __packed req; int i, j; @@ -850,9 +850,9 @@ static int scarlett2_usb_set_mux(struct usb_mixer_interface *mixer) }; struct { - u16 pad; - u16 num; - u32 data[SCARLETT2_MUX_MAX]; + __le16 pad; + __le16 num; + __le32 data[SCARLETT2_MUX_MAX]; } __packed req; req.pad = 0; @@ -911,9 +911,9 @@ static int scarlett2_usb_get_meter_levels(struct usb_mixer_interface *mixer, u16 *levels) { struct { - u16 pad; - u16 num_meters; - u32 magic; + __le16 pad; + __le16 num_meters; + __le32 magic; } __packed req; u32 resp[SCARLETT2_NUM_METERS]; int i, err; From 2acf25f13ebe8beb40e97a1bbe76f36277c64f1e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Feb 2020 09:05:30 +0100 Subject: [PATCH 462/658] ALSA: dummy: Fix PCM format loop in proc output The loop termination for iterating over all formats should contain SNDRV_PCM_FORMAT_LAST, not less than it. Fixes: 9b151fec139d ("ALSA: dummy - Add debug proc file") Cc: Link: https://lore.kernel.org/r/20200201080530.22390-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/drivers/dummy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index da0bd8960b3c..02ac3f4e0c02 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -903,7 +903,7 @@ static void print_formats(struct snd_dummy *dummy, { int i; - for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) { + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { if (dummy->pcm_hw.formats & (1ULL << i)) snd_iprintf(buffer, " %s", snd_pcm_format_name(i)); } From 5b787914408ed903d087ffc3281bd6d4e93f94b5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jan 2020 18:15:36 -0800 Subject: [PATCH 463/658] xtensa: drop unused function fast_coprocessor_double fast_coprocessor_double is not used since commit c658eac628aa ("[XTENSA] Add support for configurable registers and coprocessors"). Remove it. There should be no coprocessor exceptions generated in the exception handling paths while PS.EXCM is set. Signed-off-by: Max Filippov --- arch/xtensa/kernel/coprocessor.S | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index bb8e499b9900..c53ce6d8794f 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -145,13 +145,6 @@ ENDPROC(coprocessor_flush) * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception */ -ENTRY(fast_coprocessor_double) - - wsr a0, excsave1 - call0 unrecoverable_exception - -ENDPROC(fast_coprocessor_double) - ENTRY(fast_coprocessor) /* Save remaining registers a1-a3 and SAR */ From a922b1561137fc516e7c77c478779f7d198cc9cf Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jan 2020 21:20:58 -0800 Subject: [PATCH 464/658] xtensa: clean up optional XCHAL_* definitions Simplify users of XCHAL_HAVE_EXTERN_REGS and XCHAL_HAVE_VECBASE and always define them as 0 if they're not defined in the variant/core.h Signed-off-by: Max Filippov --- arch/xtensa/include/asm/core.h | 8 ++++++++ arch/xtensa/include/asm/processor.h | 4 ---- arch/xtensa/include/asm/vectors.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index 5b4acb7d1c07..5590b0f68837 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -10,10 +10,18 @@ #define XCHAL_HAVE_EXCLUSIVE 0 #endif +#ifndef XCHAL_HAVE_EXTERN_REGS +#define XCHAL_HAVE_EXTERN_REGS 0 +#endif + #ifndef XCHAL_HAVE_MPU #define XCHAL_HAVE_MPU 0 #endif +#ifndef XCHAL_HAVE_VECBASE +#define XCHAL_HAVE_VECBASE 0 +#endif + #ifndef XCHAL_SPANNING_WAY #define XCHAL_SPANNING_WAY 0 #endif diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 6fa903daf2a2..7f63aca6a0d3 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -237,10 +237,6 @@ extern unsigned long get_wchan(struct task_struct *p); v; \ }) -#ifndef XCHAL_HAVE_EXTERN_REGS -#define XCHAL_HAVE_EXTERN_REGS 0 -#endif - #if XCHAL_HAVE_EXTERN_REGS static inline void set_er(unsigned long value, unsigned long addr) diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index fd99b25037a7..140f30762cf9 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -40,7 +40,7 @@ #define VECBASE_VADDR _vecbase #endif -#if defined(XCHAL_HAVE_VECBASE) && XCHAL_HAVE_VECBASE +#if XCHAL_HAVE_VECBASE #define VECTOR_VADDR(offset) (VECBASE_VADDR + offset) From 58bc6c69afe51ed19d1294ab0bb319b7df1cf75a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 31 Jan 2020 17:59:26 -0800 Subject: [PATCH 465/658] arch/xtensa: fix Kconfig typos for HAVE_SMP Fix typos in xtensa Kconfig help text for HAVE_SMP. Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Cc: linux-xtensa@linux-xtensa.org Message-Id: <500b2132-ea3c-a385-1f37-05664de5f1dd@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 296c5324dace..06e6161797fa 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -180,11 +180,11 @@ config HAVE_SMP depends on XTENSA_VARIANT_CUSTOM select XTENSA_MX help - This option is use to indicate that the system-on-a-chip (SOC) + This option is used to indicate that the system-on-a-chip (SOC) supports Multiprocessing. Multiprocessor support implemented above the CPU core definition and currently needs to be selected manually. - Multiprocessor support in implemented with external cache and + Multiprocessor support is implemented with external cache and interrupt controllers. The MX interrupt distributer adds Interprocessor Interrupts From 5bebf7486d4f4940b2a8e4009beb1dff5041853d Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:31 +0800 Subject: [PATCH 466/658] bcache: fix memory corruption in bch_cache_accounting_clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 83ff9318c44ba ("bcache: not use hard coded memset size in bch_cache_accounting_clear()") tries to make the code more easy to understand by removing the hard coded number with following change, void bch_cache_accounting_clear(...) { memset(&acc->total.cache_hits, 0, - sizeof(unsigned long) * 7); + sizeof(struct cache_stats)); } Unfortunately the change was wrong (it also tells us the original code was not easy to correctly understand). The hard coded number 7 is used because in struct cache_stats, 15 struct cache_stats { 16 struct kobject kobj; 17 18 unsigned long cache_hits; 19 unsigned long cache_misses; 20 unsigned long cache_bypass_hits; 21 unsigned long cache_bypass_misses; 22 unsigned long cache_readaheads; 23 unsigned long cache_miss_collisions; 24 unsigned long sectors_bypassed; 25 26 unsigned int rescale; 27 }; only members in LINE 18-24 want to be set to 0. It is wrong to use 'sizeof(struct cache_stats)' to replace 'sizeof(unsigned long) * 7), the memory objects behind acc->total is staled by this change. Сорокин Артем Сергеевич reports that by the following steps, kernel panic will be triggered, 1. Create new set: make-bcache -B /dev/nvme1n1 -C /dev/sda --wipe-bcache 2. Run in /sys/fs/bcache/: echo 1 > clear_stats && cat stats_five_minute/cache_bypass_hits I can reproduce the panic and get following dmesg with KASAN enabled, [22613.172742] ================================================================== [22613.172862] BUG: KASAN: null-ptr-deref in sysfs_kf_seq_show+0x117/0x230 [22613.172864] Read of size 8 at addr 0000000000000000 by task cat/6753 [22613.172870] CPU: 1 PID: 6753 Comm: cat Not tainted 5.5.0-rc7-lp151.28.16-default+ #11 [22613.172872] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/29/2019 [22613.172873] Call Trace: [22613.172964] dump_stack+0x8b/0xbb [22613.172968] ? sysfs_kf_seq_show+0x117/0x230 [22613.172970] ? sysfs_kf_seq_show+0x117/0x230 [22613.173031] __kasan_report+0x176/0x192 [22613.173064] ? pr_cont_kernfs_name+0x40/0x60 [22613.173067] ? sysfs_kf_seq_show+0x117/0x230 [22613.173070] kasan_report+0xe/0x20 [22613.173072] sysfs_kf_seq_show+0x117/0x230 [22613.173105] seq_read+0x199/0x6d0 [22613.173110] vfs_read+0xa5/0x1a0 [22613.173113] ksys_read+0x110/0x160 [22613.173115] ? kernel_write+0xb0/0xb0 [22613.173177] do_syscall_64+0x77/0x290 [22613.173238] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [22613.173241] RIP: 0033:0x7fc2c886ac61 [22613.173244] Code: fe ff ff 48 8d 3d c7 a0 09 00 48 83 ec 08 e8 46 03 02 00 66 0f 1f 44 00 00 8b 05 ca fb 2c 00 48 63 ff 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 55 53 48 89 d5 48 89 [22613.173245] RSP: 002b:00007ffebe776d68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [22613.173248] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fc2c886ac61 [22613.173249] RDX: 0000000000020000 RSI: 00007fc2c8cca000 RDI: 0000000000000003 [22613.173250] RBP: 0000000000020000 R08: ffffffffffffffff R09: 0000000000000000 [22613.173251] R10: 000000000000038c R11: 0000000000000246 R12: 00007fc2c8cca000 [22613.173253] R13: 0000000000000003 R14: 00007fc2c8cca00f R15: 0000000000020000 [22613.173255] ================================================================== [22613.173256] Disabling lock debugging due to kernel taint [22613.173350] BUG: kernel NULL pointer dereference, address: 0000000000000000 [22613.178380] #PF: supervisor read access in kernel mode [22613.180959] #PF: error_code(0x0000) - not-present page [22613.183444] PGD 0 P4D 0 [22613.184867] Oops: 0000 [#1] SMP KASAN PTI [22613.186797] CPU: 1 PID: 6753 Comm: cat Tainted: G B 5.5.0-rc7-lp151.28.16-default+ #11 [22613.191253] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/29/2019 [22613.196706] RIP: 0010:sysfs_kf_seq_show+0x117/0x230 [22613.199097] Code: ff 48 8b 0b 48 8b 44 24 08 48 01 e9 eb a6 31 f6 48 89 cf ba 00 10 00 00 48 89 4c 24 10 e8 b1 e6 e9 ff 4c 89 ff e8 19 07 ea ff <49> 8b 07 48 85 c0 48 89 44 24 08 0f 84 91 00 00 00 49 8b 6d 00 48 [22613.208016] RSP: 0018:ffff8881d4f8fd78 EFLAGS: 00010246 [22613.210448] RAX: 0000000000000000 RBX: ffff8881eb99b180 RCX: ffffffff810d9ef6 [22613.213691] RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246 [22613.216893] RBP: 0000000000001000 R08: fffffbfff072ddcd R09: fffffbfff072ddcd [22613.220075] R10: 0000000000000001 R11: fffffbfff072ddcc R12: ffff8881de5c0200 [22613.223256] R13: ffff8881ed175500 R14: ffff8881eb99b198 R15: 0000000000000000 [22613.226290] FS: 00007fc2c8d3d500(0000) GS:ffff8881f2a80000(0000) knlGS:0000000000000000 [22613.229637] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22613.231993] CR2: 0000000000000000 CR3: 00000001ec89a004 CR4: 00000000003606e0 [22613.234909] Call Trace: [22613.235931] seq_read+0x199/0x6d0 [22613.237259] vfs_read+0xa5/0x1a0 [22613.239229] ksys_read+0x110/0x160 [22613.240590] ? kernel_write+0xb0/0xb0 [22613.242040] do_syscall_64+0x77/0x290 [22613.243625] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [22613.245450] RIP: 0033:0x7fc2c886ac61 [22613.246706] Code: fe ff ff 48 8d 3d c7 a0 09 00 48 83 ec 08 e8 46 03 02 00 66 0f 1f 44 00 00 8b 05 ca fb 2c 00 48 63 ff 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 55 53 48 89 d5 48 89 [22613.253296] RSP: 002b:00007ffebe776d68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [22613.255835] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fc2c886ac61 [22613.258472] RDX: 0000000000020000 RSI: 00007fc2c8cca000 RDI: 0000000000000003 [22613.260807] RBP: 0000000000020000 R08: ffffffffffffffff R09: 0000000000000000 [22613.263188] R10: 000000000000038c R11: 0000000000000246 R12: 00007fc2c8cca000 [22613.265598] R13: 0000000000000003 R14: 00007fc2c8cca00f R15: 0000000000020000 [22613.268729] Modules linked in: scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs vmw_vsock_vmci_transport vsock fuse bnep kvm_intel kvm irqbypass crc32_pclmul crc32c_intel ghash_clmulni_intel snd_ens1371 snd_ac97_codec ac97_bus bcache snd_pcm btusb btrtl btbcm btintel crc64 aesni_intel glue_helper crypto_simd vmw_balloon cryptd bluetooth snd_timer snd_rawmidi snd joydev pcspkr e1000 rfkill vmw_vmci soundcore ecdh_generic ecc gameport i2c_piix4 mptctl ac button hid_generic usbhid sr_mod cdrom ata_generic ehci_pci vmwgfx uhci_hcd drm_kms_helper syscopyarea serio_raw sysfillrect sysimgblt fb_sys_fops ttm ehci_hcd mptspi scsi_transport_spi mptscsih ata_piix mptbase ahci usbcore libahci drm sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua [22613.292429] CR2: 0000000000000000 [22613.293563] ---[ end trace a074b26a8508f378 ]--- [22613.295138] RIP: 0010:sysfs_kf_seq_show+0x117/0x230 [22613.296769] Code: ff 48 8b 0b 48 8b 44 24 08 48 01 e9 eb a6 31 f6 48 89 cf ba 00 10 00 00 48 89 4c 24 10 e8 b1 e6 e9 ff 4c 89 ff e8 19 07 ea ff <49> 8b 07 48 85 c0 48 89 44 24 08 0f 84 91 00 00 00 49 8b 6d 00 48 [22613.303553] RSP: 0018:ffff8881d4f8fd78 EFLAGS: 00010246 [22613.305280] RAX: 0000000000000000 RBX: ffff8881eb99b180 RCX: ffffffff810d9ef6 [22613.307924] RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246 [22613.310272] RBP: 0000000000001000 R08: fffffbfff072ddcd R09: fffffbfff072ddcd [22613.312685] R10: 0000000000000001 R11: fffffbfff072ddcc R12: ffff8881de5c0200 [22613.315076] R13: ffff8881ed175500 R14: ffff8881eb99b198 R15: 0000000000000000 [22613.318116] FS: 00007fc2c8d3d500(0000) GS:ffff8881f2a80000(0000) knlGS:0000000000000000 [22613.320743] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22613.322628] CR2: 0000000000000000 CR3: 00000001ec89a004 CR4: 00000000003606e0 Here this patch fixes the following problem by explicity set all the 7 members to 0 in bch_cache_accounting_clear(). Reported-by: Сорокин Артем Сергеевич Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/stats.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index ba1c93791d8d..503aafe188dc 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -109,9 +109,13 @@ int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, void bch_cache_accounting_clear(struct cache_accounting *acc) { - memset(&acc->total.cache_hits, - 0, - sizeof(struct cache_stats)); + acc->total.cache_hits = 0; + acc->total.cache_misses = 0; + acc->total.cache_bypass_hits = 0; + acc->total.cache_bypass_misses = 0; + acc->total.cache_readaheads = 0; + acc->total.cache_miss_collisions = 0; + acc->total.sectors_bypassed = 0; } void bch_cache_accounting_destroy(struct cache_accounting *acc) From 7c02b0055f774ed9afb6e1c7724f33bf148ffdc0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:32 +0800 Subject: [PATCH 467/658] bcache: explicity type cast in bset_bkey_last() In bset.h, macro bset_bkey_last() is defined as, bkey_idx((struct bkey *) (i)->d, (i)->keys) Parameter i can be variable type of data structure, the macro always works once the type of struct i has member 'd' and 'keys'. bset_bkey_last() is also used in macro csum_set() to calculate the checksum of a on-disk data structure. When csum_set() is used to calculate checksum of on-disk bcache super block, the parameter 'i' data type is struct cache_sb_disk. Inside struct cache_sb_disk (also in struct cache_sb) the member keys is __u16 type. But bkey_idx() expects unsigned int (a 32bit width), so there is problem when sending parameters via stack to call bkey_idx(). Sparse tool from Intel 0day kbuild system reports this incompatible problem. bkey_idx() is part of user space API, so the simplest fix is to cast the (i)->keys to unsigned int type in macro bset_bkey_last(). Reported-by: kbuild test robot Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/bset.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index c71365e7c1fa..a50dcfda656f 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -397,7 +397,8 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state); /* Bkey utility code */ -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx) { From 038ba8cc1bffc51250add4a9b9249d4331576d8f Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:33 +0800 Subject: [PATCH 468/658] bcache: add readahead cache policy options via sysfs interface In year 2007 high performance SSD was still expensive, in order to save more space for real workload or meta data, the readahead I/Os for non-meta data was bypassed and not cached on SSD. In now days, SSD price drops a lot and people can find larger size SSD with more comfortable price. It is unncessary to alway bypass normal readahead I/Os to save SSD space for now. This patch adds options for readahead data cache policies via sysfs file /sys/block/bcache/readahead_cache_policy, the options are, - "all": cache all readahead data I/Os. - "meta-only": only cache meta data, and bypass other regular I/Os. If users want to make bcache continue to only cache readahead request for metadata and bypass regular data readahead, please set "meta-only" to this sysfs file. By default, bcache will back to cache all read- ahead requests now. Cc: stable@vger.kernel.org Signed-off-by: Coly Li Acked-by: Eric Wheeler Cc: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 3 +++ drivers/md/bcache/request.c | 17 ++++++++++++----- drivers/md/bcache/sysfs.c | 22 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index adf26a21fcd1..74a9849ea164 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -330,6 +330,9 @@ struct cached_dev { */ atomic_t has_dirty; +#define BCH_CACHE_READA_ALL 0 +#define BCH_CACHE_READA_META_ONLY 1 + unsigned int cache_readahead_policy; struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 73478a91a342..820d8402a1dc 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -379,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) goto skip; /* - * Flag for bypass if the IO is for read-ahead or background, - * unless the read-ahead request is for metadata + * If the bio is for read-ahead or background IO, bypass it or + * not depends on the following situations, + * - If the IO is for meta data, always cache it and no bypass + * - If the IO is not meta data, check dc->cache_reada_policy, + * BCH_CACHE_READA_ALL: cache it and not bypass + * BCH_CACHE_READA_META_ONLY: not cache it and bypass + * That is, read-ahead request for metadata always get cached * (eg, for gfs2 or xfs). */ - if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && - !(bio->bi_opf & (REQ_META|REQ_PRIO))) - goto skip; + if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) { + if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) && + (dc->cache_readahead_policy != BCH_CACHE_READA_ALL)) + goto skip; + } if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 733e2ddf3c78..3470fae4eabc 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = { NULL }; +static const char * const bch_reada_cache_policies[] = { + "all", + "meta-only", + NULL +}; + /* Default is 0 ("auto") */ static const char * const bch_stop_on_failure_modes[] = { "auto", @@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us); rw_attribute(sequential_cutoff); rw_attribute(data_csum); rw_attribute(cache_mode); +rw_attribute(readahead_cache_policy); rw_attribute(stop_when_cache_set_failed); rw_attribute(writeback_metadata); rw_attribute(writeback_running); @@ -168,6 +175,11 @@ SHOW(__bch_cached_dev) bch_cache_modes, BDEV_CACHE_MODE(&dc->sb)); + if (attr == &sysfs_readahead_cache_policy) + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_reada_cache_policies, + dc->cache_readahead_policy); + if (attr == &sysfs_stop_when_cache_set_failed) return bch_snprint_string_list(buf, PAGE_SIZE, bch_stop_on_failure_modes, @@ -353,6 +365,15 @@ STORE(__cached_dev) } } + if (attr == &sysfs_readahead_cache_policy) { + v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); + if (v < 0) + return v; + + if ((unsigned int) v != dc->cache_readahead_policy) + dc->cache_readahead_policy = v; + } + if (attr == &sysfs_stop_when_cache_set_failed) { v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); if (v < 0) @@ -467,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_data_csum, #endif &sysfs_cache_mode, + &sysfs_readahead_cache_policy, &sysfs_stop_when_cache_set_failed, &sysfs_writeback_metadata, &sysfs_writeback_running, From d1c3cc34f5a78b38d2b809b289d912c3560545df Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:34 +0800 Subject: [PATCH 469/658] bcache: fix incorrect data type usage in btree_flush_write() Dan Carpenter points out that from commit 2aa8c529387c ("bcache: avoid unnecessary btree nodes flushing in btree_flush_write()"), there is a incorrect data type usage which leads to the following static checker warning: drivers/md/bcache/journal.c:444 btree_flush_write() warn: 'ref_nr' unsigned <= 0 drivers/md/bcache/journal.c 422 static void btree_flush_write(struct cache_set *c) 423 { 424 struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR]; 425 unsigned int i, nr, ref_nr; ^^^^^^ 426 atomic_t *fifo_front_p, *now_fifo_front_p; 427 size_t mask; 428 429 if (c->journal.btree_flushing) 430 return; 431 432 spin_lock(&c->journal.flush_write_lock); 433 if (c->journal.btree_flushing) { 434 spin_unlock(&c->journal.flush_write_lock); 435 return; 436 } 437 c->journal.btree_flushing = true; 438 spin_unlock(&c->journal.flush_write_lock); 439 440 /* get the oldest journal entry and check its refcount */ 441 spin_lock(&c->journal.lock); 442 fifo_front_p = &fifo_front(&c->journal.pin); 443 ref_nr = atomic_read(fifo_front_p); 444 if (ref_nr <= 0) { ^^^^^^^^^^^ Unsigned can't be less than zero. 445 /* 446 * do nothing if no btree node references 447 * the oldest journal entry 448 */ 449 spin_unlock(&c->journal.lock); 450 goto out; 451 } 452 spin_unlock(&c->journal.lock); As the warning information indicates, local varaible ref_nr in unsigned int type is wrong, which does not matche atomic_read() and the "<= 0" checking. This patch fixes the above error by defining local variable ref_nr as int type. Fixes: 2aa8c529387c ("bcache: avoid unnecessary btree nodes flushing in btree_flush_write()") Reported-by: Dan Carpenter Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/journal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 33ddc5269e8d..6730820780b0 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -422,7 +422,8 @@ err: static void btree_flush_write(struct cache_set *c) { struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR]; - unsigned int i, nr, ref_nr; + unsigned int i, nr; + int ref_nr; atomic_t *fifo_front_p, *now_fifo_front_p; size_t mask; From 49d08d596e85f39ded48e85df362827cbab1f5ae Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:35 +0800 Subject: [PATCH 470/658] bcache: check return value of prio_read() Now if prio_read() failed during starting a cache set, we can print out error message in run_cache_set() and handle the failure properly. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3dea1d5acd5c..2749daf09724 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -609,12 +609,13 @@ int bch_prio_write(struct cache *ca, bool wait) return 0; } -static void prio_read(struct cache *ca, uint64_t bucket) +static int prio_read(struct cache *ca, uint64_t bucket) { struct prio_set *p = ca->disk_buckets; struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; struct bucket *b; unsigned int bucket_nr = 0; + int ret = -EIO; for (b = ca->buckets; b < ca->buckets + ca->sb.nbuckets; @@ -627,11 +628,15 @@ static void prio_read(struct cache *ca, uint64_t bucket) prio_io(ca, bucket, REQ_OP_READ, 0); if (p->csum != - bch_crc64(&p->magic, bucket_bytes(ca) - 8)) + bch_crc64(&p->magic, bucket_bytes(ca) - 8)) { pr_warn("bad csum reading priorities"); + goto out; + } - if (p->magic != pset_magic(&ca->sb)) + if (p->magic != pset_magic(&ca->sb)) { pr_warn("bad magic reading priorities"); + goto out; + } bucket = p->next_bucket; d = p->data; @@ -640,6 +645,10 @@ static void prio_read(struct cache *ca, uint64_t bucket) b->prio = le16_to_cpu(d->prio); b->gen = b->last_gc = d->gen; } + + ret = 0; +out: + return ret; } /* Bcache device */ @@ -1873,8 +1882,10 @@ static int run_cache_set(struct cache_set *c) j = &list_entry(journal.prev, struct journal_replay, list)->j; err = "IO error reading priorities"; - for_each_cache(ca, c, i) - prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]); + for_each_cache(ca, c, i) { + if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) + goto err; + } /* * If prio_read() fails it'll call cache_set_error and we'll From 74403b6c50dd7a633d3f22f59f975d6081eae093 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 31 Jan 2020 15:55:32 -0600 Subject: [PATCH 471/658] tracing: Remove check_arg() callbacks from dynevent args It's kind of strange to have check_arg() callbacks as part of the arg objects themselves; it makes more sense to just pass these in when the args are added instead. Remove the check_arg() callbacks from those objects which also means removing the check_arg() args from the init functions, adding them to the add functions and fixing up existing callers. Link: http://lkml.kernel.org/r/c7708d6f177fcbe1a36b6e4e8e150907df0fa5d2.1580506712.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 62 +++++++++++++++----------------- kernel/trace/trace_dynevent.h | 11 +++--- kernel/trace/trace_events_hist.c | 16 ++++----- kernel/trace/trace_kprobe.c | 10 +++--- 4 files changed, 46 insertions(+), 53 deletions(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 6ffdbc4fda53..f9cfcdc9d1f3 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -228,27 +228,30 @@ fs_initcall(init_dynamic_event); * dynevent_arg_add - Add an arg to a dynevent_cmd * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd * @arg: The argument to append to the current cmd + * @check_arg: An (optional) pointer to a function checking arg sanity * * Append an argument to a dynevent_cmd. The argument string will be * appended to the current cmd string, followed by a separator, if - * applicable. Before the argument is added, the check_arg() - * function, if defined, is called. + * applicable. Before the argument is added, the @check_arg function, + * if present, will be used to check the sanity of the current arg + * string. * - * The cmd string, separator, and check_arg() function should be set - * using the dynevent_arg_init() before any arguments are added using - * this function. + * The cmd string and separator should be set using the + * dynevent_arg_init() before any arguments are added using this + * function. * * Return: 0 if successful, error otherwise. */ int dynevent_arg_add(struct dynevent_cmd *cmd, - struct dynevent_arg *arg) + struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg) { int ret = 0; int delta; char *q; - if (arg->check_arg) { - ret = arg->check_arg(arg); + if (check_arg) { + ret = check_arg(arg); if (ret) return ret; } @@ -269,6 +272,7 @@ int dynevent_arg_add(struct dynevent_cmd *cmd, * dynevent_arg_pair_add - Add an arg pair to a dynevent_cmd * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd * @arg_pair: The argument pair to append to the current cmd + * @check_arg: An (optional) pointer to a function checking arg sanity * * Append an argument pair to a dynevent_cmd. An argument pair * consists of a left-hand-side argument and a right-hand-side @@ -278,24 +282,26 @@ int dynevent_arg_add(struct dynevent_cmd *cmd, * * The lhs argument string will be appended to the current cmd string, * followed by an operator, if applicable, followd by the rhs string, - * followed finally by a separator, if applicable. Before anything is - * added, the check_arg() function, if defined, is called. + * followed finally by a separator, if applicable. Before the + * argument is added, the @check_arg function, if present, will be + * used to check the sanity of the current arg strings. * - * The cmd strings, operator, separator, and check_arg() function - * should be set using the dynevent_arg_pair_init() before any arguments - * are added using this function. + * The cmd strings, operator, and separator should be set using the + * dynevent_arg_pair_init() before any arguments are added using this + * function. * * Return: 0 if successful, error otherwise. */ int dynevent_arg_pair_add(struct dynevent_cmd *cmd, - struct dynevent_arg_pair *arg_pair) + struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg) { int ret = 0; int delta; char *q; - if (arg_pair->check_arg) { - ret = arg_pair->check_arg(arg_pair); + if (check_arg) { + ret = check_arg(arg_pair); if (ret) return ret; } @@ -385,20 +391,16 @@ void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, /** * dynevent_arg_init - Initialize a dynevent_arg object * @arg: A pointer to the dynevent_arg struct representing the arg - * @check_arg: An (optional) pointer to a function checking arg sanity * @separator: An (optional) separator, appended after adding the arg * * Initialize a dynevent_arg object. A dynevent_arg represents an * object used to append single arguments to the current command - * string. The @check_arg function, if present, will be used to check - * the sanity of the current arg string (which is directly set by the - * caller). After the arg string is successfully appended to the + * string. After the arg string is successfully appended to the * command string, the optional @separator is appended. If no * separator was specified when initializing the arg, a space will be * appended. */ void dynevent_arg_init(struct dynevent_arg *arg, - dynevent_check_arg_fn_t check_arg, char separator) { memset(arg, '\0', sizeof(*arg)); @@ -406,14 +408,11 @@ void dynevent_arg_init(struct dynevent_arg *arg, if (!separator) separator = ' '; arg->separator = separator; - - arg->check_arg = check_arg; } /** * dynevent_arg_pair_init - Initialize a dynevent_arg_pair object * @arg_pair: A pointer to the dynevent_arg_pair struct representing the arg - * @check_arg: An (optional) pointer to a function checking arg sanity * @operator: An (optional) operator, appended after adding the first arg * @separator: An (optional) separator, appended after adding the second arg * @@ -422,16 +421,13 @@ void dynevent_arg_init(struct dynevent_arg *arg, * variable_name;' or 'x+y' to the current command string. An * argument pair consists of a left-hand-side argument and a * right-hand-side argument separated by an operator, which can be - * whitespace, all followed by a separator, if applicable. The - * @check_arg function, if present, will be used to check the sanity - * of the current arg strings (which is directly set by the caller). - * After the first arg string is successfully appended to the command - * string, the optional @operator is appended, followed by the second - * arg and and optional @separator. If no separator was specified - * when initializing the arg, a space will be appended. + * whitespace, all followed by a separator, if applicable. After the + * first arg string is successfully appended to the command string, + * the optional @operator is appended, followed by the second arg and + * and optional @separator. If no separator was specified when + * initializing the arg, a space will be appended. */ void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, - dynevent_check_arg_fn_t check_arg, char operator, char separator) { memset(arg_pair, '\0', sizeof(*arg_pair)); @@ -443,8 +439,6 @@ void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, if (!separator) separator = ' '; arg_pair->separator = separator; - - arg_pair->check_arg = check_arg; } /** diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index b593fc34c5b1..d6857a254ede 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -126,28 +126,27 @@ typedef int (*dynevent_check_arg_fn_t)(void *data); struct dynevent_arg { const char *str; char separator; /* e.g. ';', ',', or nothing */ - dynevent_check_arg_fn_t check_arg; }; extern void dynevent_arg_init(struct dynevent_arg *arg, - dynevent_check_arg_fn_t check_arg, char separator); extern int dynevent_arg_add(struct dynevent_cmd *cmd, - struct dynevent_arg *arg); + struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg); struct dynevent_arg_pair { const char *lhs; const char *rhs; char operator; /* e.g. '=' or nothing */ char separator; /* e.g. ';', ',', or nothing */ - dynevent_check_arg_fn_t check_arg; }; extern void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, - dynevent_check_arg_fn_t check_arg, char operator, char separator); + extern int dynevent_arg_pair_add(struct dynevent_cmd *cmd, - struct dynevent_arg_pair *arg_pair); + struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg); extern int dynevent_str_add(struct dynevent_cmd *cmd, const char *str); #endif diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 42058a1b5146..d2817fe52f32 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1334,12 +1334,12 @@ int synth_event_add_field(struct dynevent_cmd *cmd, const char *type, if (!type || !name) return -EINVAL; - dynevent_arg_pair_init(&arg_pair, synth_event_check_arg_fn, 0, ';'); + dynevent_arg_pair_init(&arg_pair, 0, ';'); arg_pair.lhs = type; arg_pair.rhs = name; - ret = dynevent_arg_pair_add(cmd, &arg_pair); + ret = dynevent_arg_pair_add(cmd, &arg_pair, synth_event_check_arg_fn); if (ret) return ret; @@ -1377,11 +1377,11 @@ int synth_event_add_field_str(struct dynevent_cmd *cmd, const char *type_name) if (!type_name) return -EINVAL; - dynevent_arg_init(&arg, NULL, ';'); + dynevent_arg_init(&arg, ';'); arg.str = type_name; - ret = dynevent_arg_add(cmd, &arg); + ret = dynevent_arg_add(cmd, &arg, NULL); if (ret) return ret; @@ -1472,9 +1472,9 @@ int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, const char *name, if (cmd->type != DYNEVENT_TYPE_SYNTH) return -EINVAL; - dynevent_arg_init(&arg, NULL, 0); + dynevent_arg_init(&arg, 0); arg.str = name; - ret = dynevent_arg_add(cmd, &arg); + ret = dynevent_arg_add(cmd, &arg, NULL); if (ret) return ret; @@ -1546,9 +1546,9 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, if (n_fields > SYNTH_FIELDS_MAX) return -EINVAL; - dynevent_arg_init(&arg, NULL, 0); + dynevent_arg_init(&arg, 0); arg.str = name; - ret = dynevent_arg_add(cmd, &arg); + ret = dynevent_arg_add(cmd, &arg, NULL); if (ret) return ret; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 307abb724a71..fe183d4045d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -962,9 +962,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, if (ret) return ret; - dynevent_arg_init(&arg, NULL, 0); + dynevent_arg_init(&arg, 0); arg.str = loc; - ret = dynevent_arg_add(cmd, &arg); + ret = dynevent_arg_add(cmd, &arg, NULL); if (ret) return ret; @@ -982,7 +982,7 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, } arg.str = field; - ret = dynevent_arg_add(cmd, &arg); + ret = dynevent_arg_add(cmd, &arg, NULL); if (ret) break; } @@ -1017,7 +1017,7 @@ int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...) if (cmd->type != DYNEVENT_TYPE_KPROBE) return -EINVAL; - dynevent_arg_init(&arg, NULL, 0); + dynevent_arg_init(&arg, 0); va_start(args, cmd); for (;;) { @@ -1033,7 +1033,7 @@ int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...) } arg.str = field; - ret = dynevent_arg_add(cmd, &arg); + ret = dynevent_arg_add(cmd, &arg, NULL); if (ret) break; } From e9260f6257efa6a6293507696e875b6494ee3744 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 31 Jan 2020 15:55:33 -0600 Subject: [PATCH 472/658] tracing: Remove useless code in dynevent_arg_pair_add() The final addition to q is unnecessary, since q isn't ever used afterwards. Link: http://lkml.kernel.org/r/7880a1268217886cdba7035526650195668da856.1580506712.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index f9cfcdc9d1f3..204275ec8d71 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -322,7 +322,7 @@ int dynevent_arg_pair_add(struct dynevent_cmd *cmd, pr_err("field string is too long: %s\n", arg_pair->rhs); return -E2BIG; } - cmd->remaining -= delta; q += delta; + cmd->remaining -= delta; return ret; } From 2b90927c77c973771cc658d639724d5b247a83eb Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 31 Jan 2020 15:55:34 -0600 Subject: [PATCH 473/658] tracing: Use seq_buf for building dynevent_cmd string The dynevent_cmd commands that build up the command string don't need to do that themselves - there's a seq_buf facility that does pretty much the same thing those command are doing manually, so use it instead. Link: http://lkml.kernel.org/r/eb8a6e835c964d0ab8a38cbf5ffa60746b54a465.1580506712.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 4 +-- kernel/trace/trace_dynevent.c | 48 +++++++++----------------------- kernel/trace/trace_events_hist.c | 2 +- kernel/trace/trace_kprobe.c | 2 +- 4 files changed, 16 insertions(+), 40 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 7c307a7c9c6a..67f528ecb9e5 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -367,10 +367,8 @@ struct dynevent_cmd; typedef int (*dynevent_create_fn_t)(struct dynevent_cmd *cmd); struct dynevent_cmd { - char *buf; + struct seq_buf seq; const char *event_name; - int maxlen; - int remaining; unsigned int n_fields; enum dynevent_type type; dynevent_create_fn_t run_command; diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 204275ec8d71..9f2e8520b748 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -247,8 +247,6 @@ int dynevent_arg_add(struct dynevent_cmd *cmd, dynevent_check_arg_fn_t check_arg) { int ret = 0; - int delta; - char *q; if (check_arg) { ret = check_arg(arg); @@ -256,14 +254,11 @@ int dynevent_arg_add(struct dynevent_cmd *cmd, return ret; } - q = cmd->buf + (cmd->maxlen - cmd->remaining); - - delta = snprintf(q, cmd->remaining, " %s%c", arg->str, arg->separator); - if (delta >= cmd->remaining) { - pr_err("String is too long: %s\n", arg->str); + ret = seq_buf_printf(&cmd->seq, " %s%c", arg->str, arg->separator); + if (ret) { + pr_err("String is too long: %s%c\n", arg->str, arg->separator); return -E2BIG; } - cmd->remaining -= delta; return ret; } @@ -297,8 +292,6 @@ int dynevent_arg_pair_add(struct dynevent_cmd *cmd, dynevent_check_arg_fn_t check_arg) { int ret = 0; - int delta; - char *q; if (check_arg) { ret = check_arg(arg_pair); @@ -306,23 +299,15 @@ int dynevent_arg_pair_add(struct dynevent_cmd *cmd, return ret; } - q = cmd->buf + (cmd->maxlen - cmd->remaining); - - delta = snprintf(q, cmd->remaining, " %s%c", arg_pair->lhs, - arg_pair->operator); - if (delta >= cmd->remaining) { - pr_err("field string is too long: %s\n", arg_pair->lhs); + ret = seq_buf_printf(&cmd->seq, " %s%c%s%c", arg_pair->lhs, + arg_pair->operator, arg_pair->rhs, + arg_pair->separator); + if (ret) { + pr_err("field string is too long: %s%c%s%c\n", arg_pair->lhs, + arg_pair->operator, arg_pair->rhs, + arg_pair->separator); return -E2BIG; } - cmd->remaining -= delta; q += delta; - - delta = snprintf(q, cmd->remaining, "%s%c", arg_pair->rhs, - arg_pair->separator); - if (delta >= cmd->remaining) { - pr_err("field string is too long: %s\n", arg_pair->rhs); - return -E2BIG; - } - cmd->remaining -= delta; return ret; } @@ -340,17 +325,12 @@ int dynevent_arg_pair_add(struct dynevent_cmd *cmd, int dynevent_str_add(struct dynevent_cmd *cmd, const char *str) { int ret = 0; - int delta; - char *q; - q = cmd->buf + (cmd->maxlen - cmd->remaining); - - delta = snprintf(q, cmd->remaining, "%s", str); - if (delta >= cmd->remaining) { + ret = seq_buf_puts(&cmd->seq, str); + if (ret) { pr_err("String is too long: %s\n", str); return -E2BIG; } - cmd->remaining -= delta; return ret; } @@ -381,9 +361,7 @@ void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, { memset(cmd, '\0', sizeof(*cmd)); - cmd->buf = buf; - cmd->maxlen = maxlen; - cmd->remaining = cmd->maxlen; + seq_buf_init(&cmd->seq, buf, maxlen); cmd->type = type; cmd->run_command = run_command; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index d2817fe52f32..b3bcfd8c7332 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1762,7 +1762,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) struct synth_event *se; int ret; - ret = trace_run_command(cmd->buf, create_or_delete_synth_event); + ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); if (ret) return ret; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fe183d4045d2..51efc790aea8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -903,7 +903,7 @@ static int create_or_delete_trace_kprobe(int argc, char **argv) static int trace_kprobe_run_command(struct dynevent_cmd *cmd) { - return trace_run_command(cmd->buf, create_or_delete_trace_kprobe); + return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); } /** From 81b450909bfb4b71b4ae1c07add4feada32892bd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 2 Feb 2020 10:07:23 +0100 Subject: [PATCH 474/658] ALSA: emu10k1: Fix annotation and cast for the recent uapi header change The recent sound/emu10k1.h uapi header change by the commit 2e4688676392 ("ALSA: emu10k1: Make uapi/emu10k1.h compilable again") made sparse angry because of the inconsistency of __user annotation and the own ctl id struct that were changed in uapi header. This patch addresses those by adjusting the cast and annotations properly again. Fixes: 2e4688676392 ("ALSA: emu10k1: Make uapi/emu10k1.h compilable again") Link: https://lore.kernel.org/r/20200202090724.18232-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emufx.c | 180 +++++++++++++++++++++----------------- 1 file changed, 101 insertions(+), 79 deletions(-) diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index 6b7ff4a94800..4e76ed0e91d5 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -463,7 +463,7 @@ static void snd_emu10k1_write_op(struct snd_emu10k1_fx8010_code *icode, u_int32_t *code; if (snd_BUG_ON(*ptr >= 512)) return; - code = (u_int32_t __force *)icode->code + (*ptr) * 2; + code = icode->code + (*ptr) * 2; set_bit(*ptr, icode->code_valid); code[0] = ((x & 0x3ff) << 10) | (y & 0x3ff); code[1] = ((op & 0x0f) << 20) | ((r & 0x3ff) << 10) | (a & 0x3ff); @@ -480,7 +480,7 @@ static void snd_emu10k1_audigy_write_op(struct snd_emu10k1_fx8010_code *icode, u_int32_t *code; if (snd_BUG_ON(*ptr >= 1024)) return; - code = (u_int32_t __force *)icode->code + (*ptr) * 2; + code = icode->code + (*ptr) * 2; set_bit(*ptr, icode->code_valid); code[0] = ((x & 0x7ff) << 12) | (y & 0x7ff); code[1] = ((op & 0x0f) << 24) | ((r & 0x7ff) << 12) | (a & 0x7ff); @@ -513,8 +513,8 @@ static int snd_emu10k1_gpr_poke(struct snd_emu10k1 *emu, if (!test_bit(gpr, icode->gpr_valid)) continue; if (in_kernel) - val = *(__force u32 *)&icode->gpr_map[gpr]; - else if (get_user(val, &icode->gpr_map[gpr])) + val = icode->gpr_map[gpr]; + else if (get_user(val, (__user u32 *)&icode->gpr_map[gpr])) return -EFAULT; snd_emu10k1_ptr_write(emu, emu->gpr_base + gpr, 0, val); } @@ -530,7 +530,7 @@ static int snd_emu10k1_gpr_peek(struct snd_emu10k1 *emu, for (gpr = 0; gpr < (emu->audigy ? 0x200 : 0x100); gpr++) { set_bit(gpr, icode->gpr_valid); val = snd_emu10k1_ptr_read(emu, emu->gpr_base + gpr, 0); - if (put_user(val, &icode->gpr_map[gpr])) + if (put_user(val, (__user u32 *)&icode->gpr_map[gpr])) return -EFAULT; } return 0; @@ -547,11 +547,11 @@ static int snd_emu10k1_tram_poke(struct snd_emu10k1 *emu, if (!test_bit(tram, icode->tram_valid)) continue; if (in_kernel) { - val = *(__force u32 *)&icode->tram_data_map[tram]; - addr = *(__force u32 *)&icode->tram_addr_map[tram]; + val = icode->tram_data_map[tram]; + addr = icode->tram_addr_map[tram]; } else { - if (get_user(val, &icode->tram_data_map[tram]) || - get_user(addr, &icode->tram_addr_map[tram])) + if (get_user(val, (__user __u32 *)&icode->tram_data_map[tram]) || + get_user(addr, (__user __u32 *)&icode->tram_addr_map[tram])) return -EFAULT; } snd_emu10k1_ptr_write(emu, TANKMEMDATAREGBASE + tram, 0, val); @@ -581,8 +581,8 @@ static int snd_emu10k1_tram_peek(struct snd_emu10k1 *emu, addr = snd_emu10k1_ptr_read(emu, TANKMEMADDRREGBASE + tram, 0) >> 12; addr |= snd_emu10k1_ptr_read(emu, A_TANKMEMCTLREGBASE + tram, 0) << 20; } - if (put_user(val, &icode->tram_data_map[tram]) || - put_user(addr, &icode->tram_addr_map[tram])) + if (put_user(val, (__user u32 *)&icode->tram_data_map[tram]) || + put_user(addr, (__user u32 *)&icode->tram_addr_map[tram])) return -EFAULT; } return 0; @@ -598,11 +598,11 @@ static int snd_emu10k1_code_poke(struct snd_emu10k1 *emu, if (!test_bit(pc / 2, icode->code_valid)) continue; if (in_kernel) { - lo = *(__force u32 *)&icode->code[pc + 0]; - hi = *(__force u32 *)&icode->code[pc + 1]; + lo = icode->code[pc + 0]; + hi = icode->code[pc + 1]; } else { - if (get_user(lo, &icode->code[pc + 0]) || - get_user(hi, &icode->code[pc + 1])) + if (get_user(lo, (__user u32 *)&icode->code[pc + 0]) || + get_user(hi, (__user u32 *)&icode->code[pc + 1])) return -EFAULT; } snd_emu10k1_efx_write(emu, pc + 0, lo); @@ -619,17 +619,21 @@ static int snd_emu10k1_code_peek(struct snd_emu10k1 *emu, memset(icode->code_valid, 0, sizeof(icode->code_valid)); for (pc = 0; pc < (emu->audigy ? 2*1024 : 2*512); pc += 2) { set_bit(pc / 2, icode->code_valid); - if (put_user(snd_emu10k1_efx_read(emu, pc + 0), &icode->code[pc + 0])) + if (put_user(snd_emu10k1_efx_read(emu, pc + 0), + (__user u32 *)&icode->code[pc + 0])) return -EFAULT; - if (put_user(snd_emu10k1_efx_read(emu, pc + 1), &icode->code[pc + 1])) + if (put_user(snd_emu10k1_efx_read(emu, pc + 1), + (__user u32 *)&icode->code[pc + 1])) return -EFAULT; } return 0; } static struct snd_emu10k1_fx8010_ctl * -snd_emu10k1_look_for_ctl(struct snd_emu10k1 *emu, struct emu10k1_ctl_elem_id *id) +snd_emu10k1_look_for_ctl(struct snd_emu10k1 *emu, + struct emu10k1_ctl_elem_id *_id) { + struct snd_ctl_elem_id *id = (struct snd_ctl_elem_id *)_id; struct snd_emu10k1_fx8010_ctl *ctl; struct snd_kcontrol *kcontrol; @@ -672,41 +676,60 @@ static unsigned int *copy_tlv(const unsigned int __user *_tlv, bool in_kernel) } static int copy_gctl(struct snd_emu10k1 *emu, - struct snd_emu10k1_fx8010_control_gpr *gctl, - struct snd_emu10k1_fx8010_control_gpr __user *_gctl, + struct snd_emu10k1_fx8010_control_gpr *dst, + struct snd_emu10k1_fx8010_control_gpr *src, int idx, bool in_kernel) { - struct snd_emu10k1_fx8010_control_old_gpr __user *octl; + struct snd_emu10k1_fx8010_control_gpr __user *_src; + struct snd_emu10k1_fx8010_control_old_gpr *octl; + struct snd_emu10k1_fx8010_control_old_gpr __user *_octl; + _src = (struct snd_emu10k1_fx8010_control_gpr __user *)src; if (emu->support_tlv) { if (in_kernel) - memcpy(gctl, (__force void *)&_gctl[idx], sizeof(*gctl)); - else if (copy_from_user(gctl, &_gctl[idx], sizeof(*gctl))) + *dst = src[idx]; + else if (copy_from_user(dst, &_src[idx], sizeof(*src))) return -EFAULT; return 0; } - octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)_gctl; + octl = (struct snd_emu10k1_fx8010_control_old_gpr *)src; + _octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)octl; if (in_kernel) - memcpy(gctl, (__force void *)&octl[idx], sizeof(*octl)); - else if (copy_from_user(gctl, &octl[idx], sizeof(*octl))) + memcpy(dst, &octl[idx], sizeof(*octl)); + else if (copy_from_user(dst, &_octl[idx], sizeof(*octl))) return -EFAULT; - gctl->tlv = NULL; + dst->tlv = NULL; return 0; } static int copy_gctl_to_user(struct snd_emu10k1 *emu, - struct snd_emu10k1_fx8010_control_gpr __user *_gctl, - struct snd_emu10k1_fx8010_control_gpr *gctl, + struct snd_emu10k1_fx8010_control_gpr *dst, + struct snd_emu10k1_fx8010_control_gpr *src, int idx) { + struct snd_emu10k1_fx8010_control_gpr __user *_dst; struct snd_emu10k1_fx8010_control_old_gpr __user *octl; + _dst = (struct snd_emu10k1_fx8010_control_gpr __user *)dst; if (emu->support_tlv) - return copy_to_user(&_gctl[idx], gctl, sizeof(*gctl)); + return copy_to_user(&_dst[idx], src, sizeof(*src)); - octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)_gctl; - return copy_to_user(&octl[idx], gctl, sizeof(*octl)); + octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)dst; + return copy_to_user(&octl[idx], src, sizeof(*octl)); +} + +static int copy_ctl_elem_id(const struct emu10k1_ctl_elem_id *list, int i, + struct emu10k1_ctl_elem_id *ret, bool in_kernel) +{ + struct emu10k1_ctl_elem_id __user *_id = + (struct emu10k1_ctl_elem_id __user *)&list[i]; + + if (in_kernel) + *ret = list[i]; + else if (copy_from_user(ret, _id, sizeof(*ret))) + return -EFAULT; + return 0; } static int snd_emu10k1_verify_controls(struct snd_emu10k1 *emu, @@ -714,17 +737,16 @@ static int snd_emu10k1_verify_controls(struct snd_emu10k1 *emu, bool in_kernel) { unsigned int i; - struct emu10k1_ctl_elem_id __user *_id; struct emu10k1_ctl_elem_id id; struct snd_emu10k1_fx8010_control_gpr *gctl; + struct snd_ctl_elem_id *gctl_id; int err; - _id = (__force struct emu10k1_ctl_elem_id __user *)icode->gpr_del_controls; - for (i = 0; i < icode->gpr_del_control_count; i++, _id++) { - if (in_kernel) - id = *(__force struct emu10k1_ctl_elem_id *)_id; - else if (copy_from_user(&id, _id, sizeof(id))) - return -EFAULT; + for (i = 0; i < icode->gpr_del_control_count; i++) { + err = copy_ctl_elem_id(icode->gpr_del_controls, i, &id, + in_kernel); + if (err < 0) + return err; if (snd_emu10k1_look_for_ctl(emu, &id) == NULL) return -ENOENT; } @@ -740,16 +762,16 @@ static int snd_emu10k1_verify_controls(struct snd_emu10k1 *emu, } if (snd_emu10k1_look_for_ctl(emu, &gctl->id)) continue; + gctl_id = (struct snd_ctl_elem_id *)&gctl->id; down_read(&emu->card->controls_rwsem); - if (snd_ctl_find_id(emu->card, - (struct snd_ctl_elem_id *)&gctl->id)) { + if (snd_ctl_find_id(emu->card, gctl_id)) { up_read(&emu->card->controls_rwsem); err = -EEXIST; goto __error; } up_read(&emu->card->controls_rwsem); - if (gctl->id.iface != SNDRV_CTL_ELEM_IFACE_MIXER && - gctl->id.iface != SNDRV_CTL_ELEM_IFACE_PCM) { + if (gctl_id->iface != SNDRV_CTL_ELEM_IFACE_MIXER && + gctl_id->iface != SNDRV_CTL_ELEM_IFACE_PCM) { err = -EINVAL; goto __error; } @@ -784,6 +806,7 @@ static int snd_emu10k1_add_controls(struct snd_emu10k1 *emu, { unsigned int i, j; struct snd_emu10k1_fx8010_control_gpr *gctl; + struct snd_ctl_elem_id *gctl_id; struct snd_emu10k1_fx8010_ctl *ctl, *nctl; struct snd_kcontrol_new knew; struct snd_kcontrol *kctl; @@ -804,24 +827,25 @@ static int snd_emu10k1_add_controls(struct snd_emu10k1 *emu, err = -EFAULT; goto __error; } - if (gctl->id.iface != SNDRV_CTL_ELEM_IFACE_MIXER && - gctl->id.iface != SNDRV_CTL_ELEM_IFACE_PCM) { + gctl_id = (struct snd_ctl_elem_id *)&gctl->id; + if (gctl_id->iface != SNDRV_CTL_ELEM_IFACE_MIXER && + gctl_id->iface != SNDRV_CTL_ELEM_IFACE_PCM) { err = -EINVAL; goto __error; } - if (! gctl->id.name[0]) { + if (!*gctl_id->name) { err = -EINVAL; goto __error; } ctl = snd_emu10k1_look_for_ctl(emu, &gctl->id); memset(&knew, 0, sizeof(knew)); - knew.iface = gctl->id.iface; - knew.name = gctl->id.name; - knew.index = gctl->id.index; - knew.device = gctl->id.device; - knew.subdevice = gctl->id.subdevice; + knew.iface = gctl_id->iface; + knew.name = gctl_id->name; + knew.index = gctl_id->index; + knew.device = gctl_id->device; + knew.subdevice = gctl_id->subdevice; knew.info = snd_emu10k1_gpr_ctl_info; - knew.tlv.p = copy_tlv((__force const unsigned int __user *)gctl->tlv, in_kernel); + knew.tlv.p = copy_tlv((const unsigned int __user *)gctl->tlv, in_kernel); if (knew.tlv.p) knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ; @@ -878,17 +902,15 @@ static int snd_emu10k1_del_controls(struct snd_emu10k1 *emu, { unsigned int i; struct emu10k1_ctl_elem_id id; - struct emu10k1_ctl_elem_id __user *_id; struct snd_emu10k1_fx8010_ctl *ctl; struct snd_card *card = emu->card; + int err; - _id = (__force struct emu10k1_ctl_elem_id __user *)icode->gpr_del_controls; - - for (i = 0; i < icode->gpr_del_control_count; i++, _id++) { - if (in_kernel) - id = *(__force struct emu10k1_ctl_elem_id *)_id; - else if (copy_from_user(&id, _id, sizeof(id))) - return -EFAULT; + for (i = 0; i < icode->gpr_del_control_count; i++) { + err = copy_ctl_elem_id(icode->gpr_del_controls, i, &id, + in_kernel); + if (err < 0) + return err; down_write(&card->controls_rwsem); ctl = snd_emu10k1_look_for_ctl(emu, &id); if (ctl) @@ -917,7 +939,7 @@ static int snd_emu10k1_list_controls(struct snd_emu10k1 *emu, i < icode->gpr_list_control_count) { memset(gctl, 0, sizeof(*gctl)); id = &ctl->kcontrol->id; - gctl->id.iface = id->iface; + gctl->id.iface = (__force int)id->iface; strlcpy(gctl->id.name, id->name, sizeof(gctl->id.name)); gctl->id.index = id->index; gctl->id.device = id->device; @@ -1095,7 +1117,7 @@ static void snd_emu10k1_init_mono_control(struct snd_emu10k1_fx8010_control_gpr *ctl, const char *name, int gpr, int defval) { - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, name); ctl->vcount = ctl->count = 1; ctl->gpr[0] = gpr + 0; ctl->value[0] = defval; @@ -1116,7 +1138,7 @@ static void snd_emu10k1_init_stereo_control(struct snd_emu10k1_fx8010_control_gpr *ctl, const char *name, int gpr, int defval) { - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, name); ctl->vcount = ctl->count = 2; ctl->gpr[0] = gpr + 0; ctl->value[0] = defval; @@ -1138,7 +1160,7 @@ static void snd_emu10k1_init_mono_onoff_control(struct snd_emu10k1_fx8010_control_gpr *ctl, const char *name, int gpr, int defval) { - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, name); ctl->vcount = ctl->count = 1; ctl->gpr[0] = gpr + 0; ctl->value[0] = defval; @@ -1151,7 +1173,7 @@ static void snd_emu10k1_init_stereo_onoff_control(struct snd_emu10k1_fx8010_control_gpr *ctl, const char *name, int gpr, int defval) { - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, name); ctl->vcount = ctl->count = 2; ctl->gpr[0] = gpr + 0; ctl->value[0] = defval; @@ -1204,8 +1226,8 @@ static int _snd_emu10k1_audigy_init_efx(struct snd_emu10k1 *emu) if (!icode) return err; - icode->gpr_map = (u_int32_t __user *) kcalloc(512 + 256 + 256 + 2 * 1024, - sizeof(u_int32_t), GFP_KERNEL); + icode->gpr_map = kcalloc(512 + 256 + 256 + 2 * 1024, + sizeof(u_int32_t), GFP_KERNEL); if (!icode->gpr_map) goto __err_gpr; controls = kcalloc(SND_EMU10K1_GPR_CONTROLS, @@ -1213,7 +1235,7 @@ static int _snd_emu10k1_audigy_init_efx(struct snd_emu10k1 *emu) if (!controls) goto __err_ctrls; - gpr_map = (u32 __force *)icode->gpr_map; + gpr_map = icode->gpr_map; icode->tram_data_map = icode->gpr_map + 512; icode->tram_addr_map = icode->tram_data_map + 256; @@ -1468,7 +1490,7 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) ctl = &controls[nctl + 0]; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, "Tone Control - Bass"); ctl->vcount = 2; ctl->count = 10; @@ -1477,7 +1499,7 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) ctl->value[0] = ctl->value[1] = 20; ctl->translation = EMU10K1_GPR_TRANSLATION_BASS; ctl = &controls[nctl + 1]; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, "Tone Control - Treble"); ctl->vcount = 2; ctl->count = 10; @@ -1758,7 +1780,7 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) A_OP(icode, &ptr, 0x0f, 0xc0, 0xc0, 0xcf, 0xc0); icode->gpr_add_control_count = nctl; - icode->gpr_add_controls = (struct snd_emu10k1_fx8010_control_gpr __user *)controls; + icode->gpr_add_controls = controls; emu->support_tlv = 1; /* support TLV */ err = snd_emu10k1_icode_poke(emu, icode, true); emu->support_tlv = 0; /* clear again */ @@ -1766,7 +1788,7 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) __err: kfree(controls); __err_ctrls: - kfree((void __force *)icode->gpr_map); + kfree(icode->gpr_map); __err_gpr: kfree(icode); return err; @@ -1839,8 +1861,8 @@ static int _snd_emu10k1_init_efx(struct snd_emu10k1 *emu) if (!icode) return err; - icode->gpr_map = (u_int32_t __user *) kcalloc(256 + 160 + 160 + 2 * 512, - sizeof(u_int32_t), GFP_KERNEL); + icode->gpr_map = kcalloc(256 + 160 + 160 + 2 * 512, + sizeof(u_int32_t), GFP_KERNEL); if (!icode->gpr_map) goto __err_gpr; @@ -1854,7 +1876,7 @@ static int _snd_emu10k1_init_efx(struct snd_emu10k1 *emu) if (!ipcm) goto __err_ipcm; - gpr_map = (u32 __force *)icode->gpr_map; + gpr_map = icode->gpr_map; icode->tram_data_map = icode->gpr_map + 256; icode->tram_addr_map = icode->tram_data_map + 160; @@ -2188,7 +2210,7 @@ static int _snd_emu10k1_init_efx(struct snd_emu10k1 *emu) OP(icode, &ptr, iACC3, GPR(playback + SND_EMU10K1_PLAYBACK_CHANNELS + 5), GPR(playback + 5), C_00000000, C_00000000); /* LFE */ ctl = &controls[i + 0]; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, "Tone Control - Bass"); ctl->vcount = 2; ctl->count = 10; @@ -2198,7 +2220,7 @@ static int _snd_emu10k1_init_efx(struct snd_emu10k1 *emu) ctl->tlv = snd_emu10k1_bass_treble_db_scale; ctl->translation = EMU10K1_GPR_TRANSLATION_BASS; ctl = &controls[i + 1]; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->id.iface = (__force int)SNDRV_CTL_ELEM_IFACE_MIXER; strcpy(ctl->id.name, "Tone Control - Treble"); ctl->vcount = 2; ctl->count = 10; @@ -2384,7 +2406,7 @@ static int _snd_emu10k1_init_efx(struct snd_emu10k1 *emu) if ((err = snd_emu10k1_fx8010_tram_setup(emu, ipcm->buffer_size)) < 0) goto __err; icode->gpr_add_control_count = i; - icode->gpr_add_controls = (struct snd_emu10k1_fx8010_control_gpr __user *)controls; + icode->gpr_add_controls = controls; emu->support_tlv = 1; /* support TLV */ err = snd_emu10k1_icode_poke(emu, icode, true); emu->support_tlv = 0; /* clear again */ @@ -2395,7 +2417,7 @@ __err: __err_ipcm: kfree(controls); __err_ctrls: - kfree((void __force *)icode->gpr_map); + kfree(icode->gpr_map); __err_gpr: kfree(icode); return err; From 112e3f5ac505ea0f1e3c59cc757db3c45252c41d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 2 Feb 2020 10:07:24 +0100 Subject: [PATCH 475/658] ALSA: hdsp: Make the firmware loading ioctl a bit more readable The firmware loading ioctl that is implemented for hdsp hwdep device takes the reference of the address pointer, hence the current code is rather confusing. Also, due to the recent change in uapi header, sparse also complains about the cast. This patch tries to improve the readability by converting the straightforward copy_from_user of the whole struct (which contains only the pointer). Fixes: d63e63d42107 ("ALSA: hdsp: Make uapi/hdsp.h compilable again") Link: https://lore.kernel.org/r/20200202090724.18232-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdsp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index ba2a47dd384c..25b3c5697343 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -4802,7 +4802,7 @@ static int snd_hdsp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, unsigne break; } case SNDRV_HDSP_IOCTL_UPLOAD_FIRMWARE: { - struct hdsp_firmware __user *firmware; + struct hdsp_firmware firmware; u32 __user *firmware_data; int err; @@ -4815,10 +4815,9 @@ static int snd_hdsp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, unsigne dev_info(hdsp->card->dev, "initializing firmware upload\n"); - firmware = (struct hdsp_firmware __user *)argp; - - if (get_user(firmware_data, (__force void __user **)&firmware->firmware_data)) + if (copy_from_user(&firmware, argp, sizeof(firmware))) return -EFAULT; + firmware_data = (u32 __user *)firmware.firmware_data; if (hdsp_check_for_iobox (hdsp)) return -EIO; From 6ba7fc99680b2250deba1a23f34d31fd25125d61 Mon Sep 17 00:00:00 2001 From: Yong Zhi Date: Fri, 31 Jan 2020 14:40:32 -0600 Subject: [PATCH 476/658] ASoC: SOF: Intel: add PCI ID for JasperLake Mirror ID added for legacy HDaudio. Signed-off-by: Yong Zhi Signed-off-by: Divagar Mohandass Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200131204032.10213-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-pci-dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index bf01b92f0dac..cec631a1389b 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -417,6 +417,8 @@ static const struct pci_device_id sof_pci_ids[] = { #if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE) { PCI_DEVICE(0x8086, 0x38c8), .driver_data = (unsigned long)&jsl_desc}, + { PCI_DEVICE(0x8086, 0x4dc8), + .driver_data = (unsigned long)&jsl_desc}, #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_LP) { PCI_DEVICE(0x8086, 0x02c8), From 961b91a93ea27495022b2bdc3ca0f608f2c97b5f Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 31 Jan 2020 09:19:01 +0000 Subject: [PATCH 477/658] ASoC: tegra: Revert 24 and 32 bit support Commit f3ee99087c8ca0ecfdd549ef5a94f557c42d5428 ("ASoC: tegra: Allow 24bit and 32bit samples") added 24-bit and 32-bit support for to the Tegra30 I2S driver. However, there are two additional commits that are also needed to get 24-bit and 32-bit support to work correctly. These commits are not yet applied because there are still some review comments that need to be addressed. With only this change applied, 24-bit and 32-bit support is advertised by the I2S driver, but it does not work and the audio is distorted. Therefore, revert this patch for now until the other changes are also ready. Furthermore, a clock issue with 24-bit support has been identified with this change and so if we revert this now, we can also fix that in the updated version. Reported-by: Dmitry Osipenko Signed-off-by: Jon Hunter Tested-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200131091901.13014-1-jonathanh@nvidia.com Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/tegra/tegra30_i2s.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c index dbed3c5408e7..d59882ec48f1 100644 --- a/sound/soc/tegra/tegra30_i2s.c +++ b/sound/soc/tegra/tegra30_i2s.c @@ -127,7 +127,7 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, struct device *dev = dai->dev; struct tegra30_i2s *i2s = snd_soc_dai_get_drvdata(dai); unsigned int mask, val, reg; - int ret, sample_size, srate, i2sclock, bitcnt, audio_bits; + int ret, sample_size, srate, i2sclock, bitcnt; struct tegra30_ahub_cif_conf cif_conf; if (params_channels(params) != 2) @@ -137,19 +137,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: val = TEGRA30_I2S_CTRL_BIT_SIZE_16; - audio_bits = TEGRA30_AUDIOCIF_BITS_16; sample_size = 16; break; - case SNDRV_PCM_FORMAT_S24_LE: - val = TEGRA30_I2S_CTRL_BIT_SIZE_24; - audio_bits = TEGRA30_AUDIOCIF_BITS_24; - sample_size = 24; - break; - case SNDRV_PCM_FORMAT_S32_LE: - val = TEGRA30_I2S_CTRL_BIT_SIZE_32; - audio_bits = TEGRA30_AUDIOCIF_BITS_32; - sample_size = 32; - break; default: return -EINVAL; } @@ -181,8 +170,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, cif_conf.threshold = 0; cif_conf.audio_channels = 2; cif_conf.client_channels = 2; - cif_conf.audio_bits = audio_bits; - cif_conf.client_bits = audio_bits; + cif_conf.audio_bits = TEGRA30_AUDIOCIF_BITS_16; + cif_conf.client_bits = TEGRA30_AUDIOCIF_BITS_16; cif_conf.expand = 0; cif_conf.stereo_conv = 0; cif_conf.replicate = 0; @@ -317,18 +306,14 @@ static const struct snd_soc_dai_driver tegra30_i2s_dai_template = { .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_96000, - .formats = SNDRV_PCM_FMTBIT_S32_LE | - SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S16_LE, + .formats = SNDRV_PCM_FMTBIT_S16_LE, }, .capture = { .stream_name = "Capture", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_96000, - .formats = SNDRV_PCM_FMTBIT_S32_LE | - SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S16_LE, + .formats = SNDRV_PCM_FMTBIT_S16_LE, }, .ops = &tegra30_i2s_dai_ops, .symmetric_rates = 1, From 8044aad70a1fbd66376cdb2a13e536db9dd6c132 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 17 Jan 2020 16:28:19 +1000 Subject: [PATCH 478/658] m68knommu: fix memcpy() out of bounds warning in get_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer versions of gcc are giving warnings in the non-MMU m68k version of the get_user() macro: ./arch/m68k/include/asm/string.h:72:25: warning: ‘__builtin_memcpy’ forming offset [3, 4] is out of the bounds [0, 2] of object ‘__gu_val’ with type ‘short unsigned int’ [-Warray-bounds] The warnings are generated when smaller sized variables are used as the result of user space pointers to larger values. For example a short/2-byte variable stores the result of a user space int (4-byte) pointer. The warning is in the 8-byte branch of get_user() - even though that branch is not the taken branch in the warning cases. Refactor the 8-byte branch of get_user() so that it uses a correctly formed union type to read and write the source and destination objects. Keep using the memcpy() just in case the user space pointer is not naturaly aligned (not required for ColdFire, but needed for early 68000). Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/uaccess_no.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h index 0134008bf539..6bc80c35726d 100644 --- a/arch/m68k/include/asm/uaccess_no.h +++ b/arch/m68k/include/asm/uaccess_no.h @@ -71,26 +71,29 @@ extern int __put_user_bad(void); #define get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - typeof(x) __gu_val = 0; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm(__gu_err, __gu_val, ptr, b, "=d"); \ + __get_user_asm(__gu_err, x, ptr, b, "=d"); \ break; \ case 2: \ - __get_user_asm(__gu_err, __gu_val, ptr, w, "=r"); \ + __get_user_asm(__gu_err, x, ptr, w, "=r"); \ break; \ case 4: \ - __get_user_asm(__gu_err, __gu_val, ptr, l, "=r"); \ + __get_user_asm(__gu_err, x, ptr, l, "=r"); \ break; \ - case 8: \ - memcpy((void *) &__gu_val, ptr, sizeof (*(ptr))); \ + case 8: { \ + union { \ + u64 l; \ + __typeof__(*(ptr)) t; \ + } __gu_val; \ + memcpy(&__gu_val.l, ptr, sizeof(__gu_val.l)); \ + (x) = __gu_val.t; \ break; \ + } \ default: \ - __gu_val = 0; \ __gu_err = __get_user_bad(); \ break; \ } \ - (x) = (typeof(*(ptr))) __gu_val; \ __gu_err; \ }) #define __get_user(x, ptr) get_user(x, ptr) From 3a5be9b8f43346a24f31c0017cb2566a6b2c72c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2020 11:57:08 +0100 Subject: [PATCH 479/658] intel_idle: Introduce 'use_acpi' module parameter For diagnostics, it is generally useful to be able to make intel_idle take the system's ACPI tables into consideration even if that is not required for the processor model in there, so introduce a new module parameter, 'use_acpi', to make that happen and update the documentation to cover it. While at it, fix the 'no_acpi' module parameter name in the documentation. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_idle.rst | 13 +++++++++---- drivers/idle/intel_idle.c | 11 +++++++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst index afbf778035f8..8998598746a4 100644 --- a/Documentation/admin-guide/pm/intel_idle.rst +++ b/Documentation/admin-guide/pm/intel_idle.rst @@ -60,6 +60,9 @@ of the system. The former are always used if the processor model at hand is recognized by ``intel_idle`` and the latter are used if that is required for the given processor model (which is the case for all server processor models recognized by ``intel_idle``) or if the processor model is not recognized. +[There is a module parameter that can be used to make the driver use the ACPI +tables with any processor model recognized by it; see +`below `_.] If the ACPI tables are going to be used for building the list of available idle states, ``intel_idle`` first looks for a ``_CST`` object under one of the ACPI @@ -165,7 +168,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the ``MWAIT`` instruction is not allowed to be used, so the initialization of ``intel_idle`` will fail. -Apart from that there are two module parameters recognized by ``intel_idle`` +Apart from that there are three module parameters recognized by ``intel_idle`` itself that can be set via the kernel command line (they cannot be updated via sysfs, so that is the only way to change their values). @@ -186,9 +189,11 @@ QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states even if they have been enumerated (see :ref:`cpu-pm-qos` in :doc:`cpuidle`). Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail. -The ``noacpi`` module parameter (which is recognized by ``intel_idle`` if the -kernel has been configured with ACPI support), can be set to make the driver -ignore the system's ACPI tables entirely (it is unset by default). +The ``no_acpi`` and ``use_acpi`` module parameters (recognized by ``intel_idle`` +if the kernel has been configured with ACPI support) can be set to make the +driver ignore the system's ACPI tables entirely or use them for all of the +recognized processor models, respectively (they both are unset by default and +``use_acpi`` has no effect if ``no_acpi`` is set). .. _intel-idle-core-and-package-idle-states: diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 7833e650789f..6fbd94f85fa5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1131,6 +1131,10 @@ static bool no_acpi __read_mostly; module_param(no_acpi, bool, 0444); MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); +static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ +module_param_named(use_acpi, force_use_acpi, bool, 0444); +MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); + static struct acpi_processor_power acpi_state_table __initdata; /** @@ -1258,6 +1262,8 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint) return true; } #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +#define force_use_acpi (false) + static inline bool intel_idle_acpi_cst_extract(void) { return false; } static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } @@ -1460,7 +1466,8 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; - if (icpu->use_acpi && intel_idle_off_by_default(mwait_hint) && + if ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; @@ -1607,7 +1614,7 @@ static int __init intel_idle_init(void) icpu = (const struct idle_cpu *)id->driver_data; if (icpu) { cpuidle_state_table = icpu->state_table; - if (icpu->use_acpi) + if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { return -ENODEV; From 4dcb78ee579cdf90e30c5a0223f6f160ea37182d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2020 11:57:18 +0100 Subject: [PATCH 480/658] intel_idle: Introduce 'states_off' module parameter In certain system configurations it may not be desirable to use some C-states assumed to be available by intel_idle and the driver needs to be prevented from using them even before the cpuidle sysfs interface becomes accessible to user space. Currently, the only way to achieve that is by setting the 'max_cstate' module parameter to a value lower than the index of the shallowest of the C-states in question, but that may be overly intrusive, because it effectively makes all of the idle states deeper than the 'max_cstate' one go away (and the C-state to avoid may be in the middle of the range normally regarded as available). To allow that limitation to be overcome, introduce a new module parameter called 'states_off' to represent a list of idle states to be disabled by default in the form of a bitmask and update the documentation to cover it. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_idle.rst | 19 ++++++++++++++++- drivers/idle/intel_idle.c | 23 ++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst index 8998598746a4..89309e1b0e48 100644 --- a/Documentation/admin-guide/pm/intel_idle.rst +++ b/Documentation/admin-guide/pm/intel_idle.rst @@ -168,7 +168,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the ``MWAIT`` instruction is not allowed to be used, so the initialization of ``intel_idle`` will fail. -Apart from that there are three module parameters recognized by ``intel_idle`` +Apart from that there are four module parameters recognized by ``intel_idle`` itself that can be set via the kernel command line (they cannot be updated via sysfs, so that is the only way to change their values). @@ -195,6 +195,23 @@ driver ignore the system's ACPI tables entirely or use them for all of the recognized processor models, respectively (they both are unset by default and ``use_acpi`` has no effect if ``no_acpi`` is set). +The value of the ``states_off`` module parameter (0 by default) represents a +list of idle states to be disabled by default in the form of a bitmask. + +Namely, the positions of the bits that are set in the ``states_off`` value are +the indices of idle states to be disabled by default (as reflected by the names +of the corresponding idle state directories in ``sysfs``, :file:`state0`, +:file:`state1` ... :file:`state` ..., where ```` is the index of the given +idle state; see :ref:`idle-states-representation` in :doc:`cpuidle`). + +For example, if ``states_off`` is equal to 3, the driver will disable idle +states 0 and 1 by default, and if it is equal to 8, idle state 3 will be +disabled by default and so on (bit positions beyond the maximum idle state index +are ignored). + +The idle states disabled this way can be enabled (on a per-CPU basis) from user +space via ``sysfs``. + .. _intel-idle-core-and-package-idle-states: diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 6fbd94f85fa5..d55606608ac8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -63,6 +63,7 @@ static struct cpuidle_driver intel_idle_driver = { }; /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; +static unsigned int disabled_states_mask; static unsigned int mwait_substates; @@ -1234,6 +1235,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (cx->type > ACPI_STATE_C2) state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + if (disabled_states_mask & BIT(cstate)) + state->flags |= CPUIDLE_FLAG_OFF; + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } @@ -1466,9 +1470,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; - if ((icpu->use_acpi || force_use_acpi) && - intel_idle_off_by_default(mwait_hint) && - !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && + !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; drv->state_count++; @@ -1487,6 +1492,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) { cpuidle_poll_state_init(drv); + + if (disabled_states_mask & BIT(0)) + drv->states[0].flags |= CPUIDLE_FLAG_OFF; + drv->state_count = 1; if (icpu) @@ -1667,3 +1676,11 @@ device_initcall(intel_idle_init); * is the easiest way (currently) to continue doing that. */ module_param(max_cstate, int, 0444); +/* + * The positions of the bits that are set in this number are the indices of the + * idle states to be disabled by default (as reflected by the names of the + * corresponding idle state directories in sysfs, "state0", "state1" ... + * "state" ..., where is the index of the given state). + */ +module_param_named(states_off, disabled_states_mask, uint, 0444); +MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); From c21502efdaedfdf9fc71334883a164341881bc22 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 31 Jan 2020 11:05:17 +0100 Subject: [PATCH 481/658] Documentation: admin-guide: PM: Update sleep states documentation There is some information in Documentation/power/interface.rst that is still missing from Documentation/admin-guide/pm/sleep-states.rst and really should be present in there, so update the latter by adding that information to it and delete the former (as it becomes redundant after that and it is somewhat outdated). While at it, clean up some assorted pieces of sleep-states.rst a bit. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/sleep-states.rst | 76 ++++++++++++++---- Documentation/power/interface.rst | 79 ------------------- 2 files changed, 59 insertions(+), 96 deletions(-) delete mode 100644 Documentation/power/interface.rst diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst index cd3a28cb81f4..ee55a460c639 100644 --- a/Documentation/admin-guide/pm/sleep-states.rst +++ b/Documentation/admin-guide/pm/sleep-states.rst @@ -153,8 +153,11 @@ for the given CPU architecture includes the low-level code for system resume. Basic ``sysfs`` Interfaces for System Suspend and Hibernation ============================================================= -The following files located in the :file:`/sys/power/` directory can be used by -user space for sleep states control. +The power management subsystem provides userspace with a unified ``sysfs`` +interface for system sleep regardless of the underlying system architecture or +platform. That interface is located in the :file:`/sys/power/` directory +(assuming that ``sysfs`` is mounted at :file:`/sys`) and it consists of the +following attributes (files): ``state`` This file contains a list of strings representing sleep states supported @@ -162,9 +165,9 @@ user space for sleep states control. to start a transition of the system into the sleep state represented by that string. - In particular, the strings "disk", "freeze" and "standby" represent the + In particular, the "disk", "freeze" and "standby" strings represent the :ref:`hibernation `, :ref:`suspend-to-idle ` and - :ref:`standby ` sleep states, respectively. The string "mem" + :ref:`standby ` sleep states, respectively. The "mem" string is interpreted in accordance with the contents of the ``mem_sleep`` file described below. @@ -177,7 +180,7 @@ user space for sleep states control. associated with the "mem" string in the ``state`` file described above. The strings that may be present in this file are "s2idle", "shallow" - and "deep". The string "s2idle" always represents :ref:`suspend-to-idle + and "deep". The "s2idle" string always represents :ref:`suspend-to-idle ` and, by convention, "shallow" and "deep" represent :ref:`standby ` and :ref:`suspend-to-RAM `, respectively. @@ -185,15 +188,17 @@ user space for sleep states control. Writing one of the listed strings into this file causes the system suspend variant represented by it to be associated with the "mem" string in the ``state`` file. The string representing the suspend variant - currently associated with the "mem" string in the ``state`` file - is listed in square brackets. + currently associated with the "mem" string in the ``state`` file is + shown in square brackets. If the kernel does not support system suspend, this file is not present. ``disk`` - This file contains a list of strings representing different operations - that can be carried out after the hibernation image has been saved. The - possible options are as follows: + This file controls the operating mode of hibernation (Suspend-to-Disk). + Specifically, it tells the kernel what to do after creating a + hibernation image. + + Reading from it returns a list of supported options encoded as: ``platform`` Put the system into a special low-power state (e.g. ACPI S4) to @@ -201,6 +206,11 @@ user space for sleep states control. platform firmware to take a simplified initialization path after wakeup. + It is only available if the platform provides a special + mechanism to put the system to sleep after creating a + hibernation image (platforms with ACPI do that as a rule, for + example). + ``shutdown`` Power off the system. @@ -214,22 +224,53 @@ user space for sleep states control. the hibernation image and continue. Otherwise, use the image to restore the previous state of the system. + It is available if system suspend is supported. + ``test_resume`` Diagnostic operation. Load the image as though the system had just woken up from hibernation and the currently running kernel instance was a restore kernel and follow up with full system resume. - Writing one of the listed strings into this file causes the option + Writing one of the strings listed above into this file causes the option represented by it to be selected. - The currently selected option is shown in square brackets which means + The currently selected option is shown in square brackets, which means that the operation represented by it will be carried out after creating - and saving the image next time hibernation is triggered by writing - ``disk`` to :file:`/sys/power/state`. + and saving the image when hibernation is triggered by writing ``disk`` + to :file:`/sys/power/state`. If the kernel does not support hibernation, this file is not present. +``image_size`` + This file controls the size of hibernation images. + + It can be written a string representing a non-negative integer that will + be used as a best-effort upper limit of the image size, in bytes. The + hibernation core will do its best to ensure that the image size will not + exceed that number, but if that turns out to be impossible to achieve, a + hibernation image will still be created and its size will be as small as + possible. In particular, writing '0' to this file causes the size of + hibernation images to be minimum. + + Reading from it returns the current image size limit, which is set to + around 2/5 of the available RAM size by default. + +``pm_trace`` + This file controls the "PM trace" mechanism saving the last suspend + or resume event point in the RTC memory across reboots. It helps to + debug hard lockups or reboots due to device driver failures that occur + during system suspend or resume (which is more common) more effectively. + + If it contains "1", the fingerprint of each suspend/resume event point + in turn will be stored in the RTC memory (overwriting the actual RTC + information), so it will survive a system crash if one occurs right + after storing it and it can be used later to identify the driver that + caused the crash to happen. + + It contains "0" by default, which may be changed to "1" by writing a + string representing a nonzero integer into it. + According to the above, there are two ways to make the system go into the :ref:`suspend-to-idle ` state. The first one is to write "freeze" directly to :file:`/sys/power/state`. The second one is to write "s2idle" to @@ -244,6 +285,7 @@ system go into the :ref:`suspend-to-RAM ` state (write "deep" into The default suspend variant (ie. the one to be used without writing anything into :file:`/sys/power/mem_sleep`) is either "deep" (on the majority of systems supporting :ref:`suspend-to-RAM `) or "s2idle", but it can be overridden -by the value of the "mem_sleep_default" parameter in the kernel command line. -On some ACPI-based systems, depending on the information in the ACPI tables, the -default may be "s2idle" even if :ref:`suspend-to-RAM ` is supported. +by the value of the ``mem_sleep_default`` parameter in the kernel command line. +On some systems with ACPI, depending on the information in the ACPI tables, the +default may be "s2idle" even if :ref:`suspend-to-RAM ` is supported in +principle. diff --git a/Documentation/power/interface.rst b/Documentation/power/interface.rst deleted file mode 100644 index 8d270ed27228..000000000000 --- a/Documentation/power/interface.rst +++ /dev/null @@ -1,79 +0,0 @@ -=========================================== -Power Management Interface for System Sleep -=========================================== - -Copyright (c) 2016 Intel Corp., Rafael J. Wysocki - -The power management subsystem provides userspace with a unified sysfs interface -for system sleep regardless of the underlying system architecture or platform. -The interface is located in the /sys/power/ directory (assuming that sysfs is -mounted at /sys). - -/sys/power/state is the system sleep state control file. - -Reading from it returns a list of supported sleep states, encoded as: - -- 'freeze' (Suspend-to-Idle) -- 'standby' (Power-On Suspend) -- 'mem' (Suspend-to-RAM) -- 'disk' (Suspend-to-Disk) - -Suspend-to-Idle is always supported. Suspend-to-Disk is always supported -too as long the kernel has been configured to support hibernation at all -(ie. CONFIG_HIBERNATION is set in the kernel configuration file). Support -for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the -platform. - -If one of the strings listed in /sys/power/state is written to it, the system -will attempt to transition into the corresponding sleep state. Refer to -Documentation/admin-guide/pm/sleep-states.rst for a description of each of -those states. - -/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk). -Specifically, it tells the kernel what to do after creating a hibernation image. - -Reading from it returns a list of supported options encoded as: - -- 'platform' (put the system into sleep using a platform-provided method) -- 'shutdown' (shut the system down) -- 'reboot' (reboot the system) -- 'suspend' (trigger a Suspend-to-RAM transition) -- 'test_resume' (resume-after-hibernation test mode) - -The currently selected option is printed in square brackets. - -The 'platform' option is only available if the platform provides a special -mechanism to put the system to sleep after creating a hibernation image (ACPI -does that, for example). The 'suspend' option is available if Suspend-to-RAM -is supported. Refer to Documentation/power/basic-pm-debugging.rst for the -description of the 'test_resume' option. - -To select an option, write the string representing it to /sys/power/disk. - -/sys/power/image_size controls the size of hibernation images. - -It can be written a string representing a non-negative integer that will be -used as a best-effort upper limit of the image size, in bytes. The hibernation -core will do its best to ensure that the image size will not exceed that number. -However, if that turns out to be impossible to achieve, a hibernation image will -still be created and its size will be as small as possible. In particular, -writing '0' to this file will enforce hibernation images to be as small as -possible. - -Reading from this file returns the current image size limit, which is set to -around 2/5 of available RAM by default. - -/sys/power/pm_trace controls the PM trace mechanism saving the last suspend -or resume event point in the RTC across reboots. - -It helps to debug hard lockups or reboots due to device driver failures that -occur during system suspend or resume (which is more common) more effectively. - -If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume -event point in turn will be stored in the RTC memory (overwriting the actual -RTC information), so it will survive a system crash if one occurs right after -storing it and it can be used later to identify the driver that caused the crash -to happen (see Documentation/power/s2ram.rst for more information). - -Initially it contains '0' which may be changed to '1' by writing a string -representing a nonzero integer into it. From c01a4a136440cb9727a519bb10f5f2696a80e8de Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Mon, 3 Feb 2020 09:36:06 +0800 Subject: [PATCH 482/658] ACPI / APD: Add clock frequency for Hisilicon Hip08-Lite I2C controller I2C clock frequency of Designware ip for Hisilicon Hip08 Lite is 125M, use a new ACPI HID to enable it. Tested-by: Sheng Feng Signed-off-by: Hanjun Guo Reviewed-by: Jarkko Nikula Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_apd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index 71511ae2dfcd..ba2612e9a0eb 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -161,6 +161,11 @@ static const struct apd_device_desc hip08_i2c_desc = { .fixed_clk_rate = 250000000, }; +static const struct apd_device_desc hip08_lite_i2c_desc = { + .setup = acpi_apd_setup, + .fixed_clk_rate = 125000000, +}; + static const struct apd_device_desc thunderx2_i2c_desc = { .setup = acpi_apd_setup, .fixed_clk_rate = 125000000, @@ -243,6 +248,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = { { "CAV9007", APD_ADDR(thunderx2_i2c_desc) }, { "HISI02A1", APD_ADDR(hip07_i2c_desc) }, { "HISI02A2", APD_ADDR(hip08_i2c_desc) }, + { "HISI02A3", APD_ADDR(hip08_lite_i2c_desc) }, { "HISI0173", APD_ADDR(hip08_spi_desc) }, { "NXP0001", APD_ADDR(nxp_i2c_desc) }, #endif From dec0a81a7814c8a876e663e0627468e6b1a7d2fb Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Mon, 3 Feb 2020 09:36:07 +0800 Subject: [PATCH 483/658] i2c: designware: Add ACPI HID for Hisilicon Hip08-Lite I2C controller Add ACPI HID HISI02A3 for Hisilicon Hip08 Lite, which has different clock frequency from Hip08 for I2C controller. Tested-by: Sheng Feng Signed-off-by: Hanjun Guo Acked-by: Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/i2c/busses/i2c-designware-platdrv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 16dd338877d0..3b7d58c2fe85 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -130,6 +130,7 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = { { "APMC0D0F", 0 }, { "HISI02A1", 0 }, { "HISI02A2", 0 }, + { "HISI02A3", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match); From 58ae5284f663f0856a13bc6caad93a0fe49d7cb4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 3 Feb 2020 01:58:45 -0500 Subject: [PATCH 484/658] drm/nouveau/disp/gv100-: halt NV_PDISP_FE_RM_INTR_STAT_CTRL_DISP_ERROR storms Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c index 3aa2cc3af1e2..c1032527f791 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c @@ -155,6 +155,12 @@ gv100_disp_intr_ctrl_disp(struct nv50_disp *disp) if (stat & 0x00000008) stat &= ~0x00000008; + if (stat & 0x00000080) { + u32 error = nvkm_mask(device, 0x611848, 0x00000000, 0x00000000); + nvkm_warn(subdev, "error %08x\n", error); + stat &= ~0x00000080; + } + if (stat & 0x00000100) { unsigned long wndws = nvkm_rd32(device, 0x611858); unsigned long other = nvkm_rd32(device, 0x61185c); From 5bb88d07948b6779cb783ec0f08b4c1474d592dd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 3 Feb 2020 03:36:30 -0500 Subject: [PATCH 485/658] drm/nouveau/kms/gv100-: move window ownership setup into modesetting path For various complicated reasons, we need to avoid sending a core update method during display init. Something, which we've been required to do on GV100 and up because we've been assigning windows to heads there and the HW is rather picky about when that's allowed. This moves window assignment into the modesetting path at a point where it's much safer to send our first update methods to NVDisplay. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/core.h | 6 ++++++ drivers/gpu/drm/nouveau/dispnv50/corec37d.c | 21 ++++++++++++++++++--- drivers/gpu/drm/nouveau/dispnv50/corec57d.c | 7 ++++--- drivers/gpu/drm/nouveau/dispnv50/disp.c | 16 ++++++++++++++++ 4 files changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.h b/drivers/gpu/drm/nouveau/dispnv50/core.h index df8336b593f7..ff94f3f6f264 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/core.h +++ b/drivers/gpu/drm/nouveau/dispnv50/core.h @@ -6,6 +6,7 @@ struct nv50_core { const struct nv50_core_func *func; struct nv50_dmac chan; + bool assign_windows; }; int nv50_core_new(struct nouveau_drm *, struct nv50_core **); @@ -18,6 +19,10 @@ struct nv50_core_func { struct nvif_device *); void (*update)(struct nv50_core *, u32 *interlock, bool ntfy); + struct { + void (*owner)(struct nv50_core *); + } wndw; + const struct nv50_head_func *head; const struct nv50_outp_func { void (*ctrl)(struct nv50_core *, int or, u32 ctrl, @@ -48,6 +53,7 @@ int core917d_new(struct nouveau_drm *, s32, struct nv50_core **); int corec37d_new(struct nouveau_drm *, s32, struct nv50_core **); int corec37d_ntfy_wait_done(struct nouveau_bo *, u32, struct nvif_device *); void corec37d_update(struct nv50_core *, u32 *, bool); +void corec37d_wndw_owner(struct nv50_core *); extern const struct nv50_outp_func sorc37d; int corec57d_new(struct nouveau_drm *, s32, struct nv50_core **); diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c index 40d9b654ab8c..f414171e40b4 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c @@ -24,6 +24,20 @@ #include +void +corec37d_wndw_owner(struct nv50_core *core) +{ + const u32 windows = 8; /*XXX*/ + u32 *push, i; + if ((push = evo_wait(&core->chan, 2 * windows))) { + for (i = 0; i < windows; i++) { + evo_mthd(push, 0x1000 + (i * 0x080), 1); + evo_data(push, i >> 1); + } + evo_kick(push, &core->chan); + } +} + void corec37d_update(struct nv50_core *core, u32 *interlock, bool ntfy) { @@ -76,12 +90,11 @@ corec37d_init(struct nv50_core *core) { const u32 windows = 8; /*XXX*/ u32 *push, i; - if ((push = evo_wait(&core->chan, 2 + 6 * windows + 2))) { + if ((push = evo_wait(&core->chan, 2 + 5 * windows + 2))) { evo_mthd(push, 0x0208, 1); evo_data(push, core->chan.sync.handle); for (i = 0; i < windows; i++) { - evo_mthd(push, 0x1000 + (i * 0x080), 3); - evo_data(push, i >> 1); + evo_mthd(push, 0x1004 + (i * 0x080), 2); evo_data(push, 0x0000001f); evo_data(push, 0x00000000); evo_mthd(push, 0x1010 + (i * 0x080), 1); @@ -90,6 +103,7 @@ corec37d_init(struct nv50_core *core) evo_mthd(push, 0x0200, 1); evo_data(push, 0x00000001); evo_kick(push, &core->chan); + core->assign_windows = true; } } @@ -99,6 +113,7 @@ corec37d = { .ntfy_init = corec37d_ntfy_init, .ntfy_wait_done = corec37d_ntfy_wait_done, .update = corec37d_update, + .wndw.owner = corec37d_wndw_owner, .head = &headc37d, .sor = &sorc37d, }; diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c index b606d68cda10..b540606ac052 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c @@ -27,12 +27,11 @@ corec57d_init(struct nv50_core *core) { const u32 windows = 8; /*XXX*/ u32 *push, i; - if ((push = evo_wait(&core->chan, 2 + 6 * windows + 2))) { + if ((push = evo_wait(&core->chan, 2 + 5 * windows + 2))) { evo_mthd(push, 0x0208, 1); evo_data(push, core->chan.sync.handle); for (i = 0; i < windows; i++) { - evo_mthd(push, 0x1000 + (i * 0x080), 3); - evo_data(push, i >> 1); + evo_mthd(push, 0x1004 + (i * 0x080), 2); evo_data(push, 0x0000000f); evo_data(push, 0x00000000); evo_mthd(push, 0x1010 + (i * 0x080), 1); @@ -41,6 +40,7 @@ corec57d_init(struct nv50_core *core) evo_mthd(push, 0x0200, 1); evo_data(push, 0x00000001); evo_kick(push, &core->chan); + core->assign_windows = true; } } @@ -50,6 +50,7 @@ corec57d = { .ntfy_init = corec37d_ntfy_init, .ntfy_wait_done = corec37d_ntfy_wait_done, .update = corec37d_update, + .wndw.owner = corec37d_wndw_owner, .head = &headc57d, .sor = &sorc37d, }; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 2f123082c85d..a3dc2ba19fb2 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1933,6 +1933,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) struct nouveau_drm *drm = nouveau_drm(dev); struct nv50_disp *disp = nv50_disp(dev); struct nv50_atom *atom = nv50_atom(state); + struct nv50_core *core = disp->core; struct nv50_outp_atom *outp, *outt; u32 interlock[NV50_DISP_INTERLOCK__SIZE] = {}; int i; @@ -2051,6 +2052,21 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) } } + /* Update window->head assignment. + * + * This has to happen in an update that's not interlocked with + * any window channels to avoid hitting HW error checks. + * + *TODO: Proper handling of window ownership (Turing apparently + * supports non-fixed mappings). + */ + if (core->assign_windows) { + core->func->wndw.owner(core); + core->func->update(core, interlock, false); + core->assign_windows = false; + interlock[NV50_DISP_INTERLOCK_CORE] = 0; + } + /* Update plane(s). */ for_each_new_plane_in_state(state, plane, new_plane_state, i) { struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); From 137c4ba7163ad9d5696b9fde78b1c0898a9c115b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 3 Feb 2020 03:37:07 -0500 Subject: [PATCH 486/658] drm/nouveau/kms/gv100-: avoid sending a core update until the first modeset The OR routing logic in NVKM does not expect to receive supervisor interrupts until the DD has provided consistent information on the ORs it's using and the EVO/NVD assembly state to match. The combination of changing window ownership + core channel update during display init triggered a situation where we'd disconnect an OR from the pad it was meant to still be driving on some systems. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/corec37d.c | 4 +--- drivers/gpu/drm/nouveau/dispnv50/corec57d.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c index f414171e40b4..3b36dc8d36b2 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c @@ -90,7 +90,7 @@ corec37d_init(struct nv50_core *core) { const u32 windows = 8; /*XXX*/ u32 *push, i; - if ((push = evo_wait(&core->chan, 2 + 5 * windows + 2))) { + if ((push = evo_wait(&core->chan, 2 + 5 * windows))) { evo_mthd(push, 0x0208, 1); evo_data(push, core->chan.sync.handle); for (i = 0; i < windows; i++) { @@ -100,8 +100,6 @@ corec37d_init(struct nv50_core *core) evo_mthd(push, 0x1010 + (i * 0x080), 1); evo_data(push, 0x00127fff); } - evo_mthd(push, 0x0200, 1); - evo_data(push, 0x00000001); evo_kick(push, &core->chan); core->assign_windows = true; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c index b540606ac052..147adcd60937 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c @@ -27,7 +27,7 @@ corec57d_init(struct nv50_core *core) { const u32 windows = 8; /*XXX*/ u32 *push, i; - if ((push = evo_wait(&core->chan, 2 + 5 * windows + 2))) { + if ((push = evo_wait(&core->chan, 2 + 5 * windows))) { evo_mthd(push, 0x0208, 1); evo_data(push, core->chan.sync.handle); for (i = 0; i < windows; i++) { @@ -37,8 +37,6 @@ corec57d_init(struct nv50_core *core) evo_mthd(push, 0x1010 + (i * 0x080), 1); evo_data(push, 0x00117fff); } - evo_mthd(push, 0x0200, 1); - evo_data(push, 0x00000001); evo_kick(push, &core->chan); core->assign_windows = true; } From 30460e1ea3e62f8457e087db9a309ed1031630da Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:41 +0100 Subject: [PATCH 487/658] fs: Enable bmap() function to properly return errors By now, bmap() will either return the physical block number related to the requested file offset or 0 in case of error or the requested offset maps into a hole. This patch makes the needed changes to enable bmap() to proper return errors, using the return value as an error return, and now, a pointer must be passed to bmap() to be filled with the mapped physical block. It will change the behavior of bmap() on return: - negative value in case of error - zero on success or map fell into a hole In case of a hole, the *block will be zero too Since this is a prep patch, by now, the only error return is -EINVAL if ->bmap doesn't exist. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- drivers/md/md-bitmap.c | 16 ++++++++++------ fs/f2fs/data.c | 16 +++++++++++----- fs/inode.c | 30 ++++++++++++++++++------------ fs/jbd2/journal.c | 22 +++++++++++++++------- include/linux/fs.h | 9 ++++++++- mm/page_io.c | 11 +++++++---- 6 files changed, 69 insertions(+), 35 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 3ad18246fcb3..92d3b515252d 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -364,7 +364,7 @@ static int read_page(struct file *file, unsigned long index, int ret = 0; struct inode *inode = file_inode(file); struct buffer_head *bh; - sector_t block; + sector_t block, blk_cur; pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT); @@ -375,17 +375,21 @@ static int read_page(struct file *file, unsigned long index, goto out; } attach_page_buffers(page, bh); - block = index << (PAGE_SHIFT - inode->i_blkbits); + blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); while (bh) { + block = blk_cur; + if (count == 0) bh->b_blocknr = 0; else { - bh->b_blocknr = bmap(inode, block); - if (bh->b_blocknr == 0) { - /* Cannot use this file! */ + ret = bmap(inode, &block); + if (ret || !block) { ret = -EINVAL; + bh->b_blocknr = 0; goto out; } + + bh->b_blocknr = block; bh->b_bdev = inode->i_sb->s_bdev; if (count < (1<i_blkbits)) count = 0; @@ -399,7 +403,7 @@ static int read_page(struct file *file, unsigned long index, set_buffer_mapped(bh); submit_bh(REQ_OP_READ, 0, bh); } - block++; + blk_cur++; bh = bh->b_this_page; } page->index = index; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a034cd0ce021..e948902c4ec5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3170,12 +3170,16 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) while ((probe_block + blocks_per_page) <= last_block && page_no < max) { unsigned block_in_page; sector_t first_block; + sector_t block = 0; + int err = 0; cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + block = probe_block; + err = bmap(inode, &block); + if (err || !block) goto bad_bmap; + first_block = block; /* * It must be PAGE_SIZE aligned on-disk @@ -3187,11 +3191,13 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) for (block_in_page = 1; block_in_page < blocks_per_page; block_in_page++) { - sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + err = bmap(inode, &block); + + if (err || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; diff --git a/fs/inode.c b/fs/inode.c index 96d62d97694e..5dacfa1fb129 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1598,25 +1598,31 @@ retry: } EXPORT_SYMBOL(iput); +#ifdef CONFIG_BLOCK /** * bmap - find a block number in a file - * @inode: inode of file - * @block: block to find + * @inode: inode owning the block number being requested + * @block: pointer containing the block to find * - * Returns the block number on the device holding the inode that - * is the disk block number for the block of the file requested. - * That is, asked for block 4 of inode 1 the function will return the - * disk block relative to the disk start that holds that block of the - * file. + * Replaces the value in *block with the block number on the device holding + * corresponding to the requested block number in the file. + * That is, asked for block 4 of inode 1 the function will replace the + * 4 in *block, with disk block relative to the disk start that holds that + * block of the file. + * + * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a + * hole, returns 0 and *block is also set to 0. */ -sector_t bmap(struct inode *inode, sector_t block) +int bmap(struct inode *inode, sector_t *block) { - sector_t res = 0; - if (inode->i_mapping->a_ops->bmap) - res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); - return res; + if (!inode->i_mapping->a_ops->bmap) + return -EINVAL; + + *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block); + return 0; } EXPORT_SYMBOL(bmap); +#endif /* * With relative atime, only update atime if the previous atime is diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5e408ee24a1a..01fa5d247e39 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -795,18 +795,23 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, { int err = 0; unsigned long long ret; + sector_t block = 0; if (journal->j_inode) { - ret = bmap(journal->j_inode, blocknr); - if (ret) - *retp = ret; - else { + block = blocknr; + ret = bmap(journal->j_inode, &block); + + if (ret || !block) { printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", __func__, blocknr, journal->j_devname); err = -EIO; __journal_abort_soft(journal, err); + + } else { + *retp = block; } + } else { *retp = blocknr; /* +journal->j_blk_offset */ } @@ -1244,11 +1249,14 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev, journal_t *jbd2_journal_init_inode(struct inode *inode) { journal_t *journal; + sector_t blocknr; char *p; - unsigned long long blocknr; + int err = 0; - blocknr = bmap(inode, 0); - if (!blocknr) { + blocknr = 0; + err = bmap(inode, &blocknr); + + if (err || !blocknr) { pr_err("%s: Cannot locate journal superblock\n", __func__); return NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..f762e07ea589 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2865,9 +2865,16 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) extern void emergency_sync(void); extern void emergency_remount(void); + #ifdef CONFIG_BLOCK -extern sector_t bmap(struct inode *, sector_t); +extern int bmap(struct inode *inode, sector_t *block); +#else +static inline int bmap(struct inode *inode, sector_t *block) +{ + return -EINVAL; +} #endif + extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); diff --git a/mm/page_io.c b/mm/page_io.c index 3a198deb8bb1..76965be1d40e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -177,8 +177,9 @@ int generic_swapfile_activate(struct swap_info_struct *sis, cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + first_block = probe_block; + ret = bmap(inode, &first_block); + if (ret || !first_block) goto bad_bmap; /* @@ -193,9 +194,11 @@ int generic_swapfile_activate(struct swap_info_struct *sis, block_in_page++) { sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + ret = bmap(inode, &block); + if (ret || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; From 10d83e11a582631a14579a087a55608075c6f539 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:42 +0100 Subject: [PATCH 488/658] cachefiles: drop direct usage of ->bmap method. Replace the direct usage of ->bmap method by a bmap() call. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/cachefiles/rdwr.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 44a3ce1e4ce4..1dc97f2d6201 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -396,7 +396,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, struct cachefiles_object *object; struct cachefiles_cache *cache; struct inode *inode; - sector_t block0, block; + sector_t block; unsigned shift; int ret; @@ -412,7 +412,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ @@ -428,12 +427,14 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, * enough for this as it doesn't indicate errors, but it's all we've * got for the moment */ - block0 = page->index; - block0 <<= shift; + block = page->index; + block <<= shift; + + ret = bmap(inode, &block); + ASSERT(ret < 0); - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); _debug("%llx -> %llx", - (unsigned long long) block0, + (unsigned long long) (page->index << shift), (unsigned long long) block); if (block) { @@ -711,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ @@ -728,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ret = space ? -ENODATA : -ENOBUFS; list_for_each_entry_safe(page, _n, pages, lru) { - sector_t block0, block; + sector_t block; /* we assume the absence or presence of the first block is a * good enough indication for the page as a whole @@ -736,13 +736,14 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, * good enough for this as it doesn't indicate errors, but * it's all we've got for the moment */ - block0 = page->index; - block0 <<= shift; + block = page->index; + block <<= shift; + + ret = bmap(inode, &block); + ASSERT(!ret); - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, - block0); _debug("%llx -> %llx", - (unsigned long long) block0, + (unsigned long long) (page->index << shift), (unsigned long long) block); if (block) { From 569d2056def7add85c0a9add03c19017e5525012 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:43 +0100 Subject: [PATCH 489/658] ecryptfs: drop direct calls to ->bmap Replace direct ->bmap calls by bmap() method. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/ecryptfs/mmap.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index cffa0c1ec829..019572c6b39a 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -524,16 +524,12 @@ out: static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) { - int rc = 0; - struct inode *inode; - struct inode *lower_inode; + struct inode *lower_inode = ecryptfs_inode_to_lower(mapping->host); + int ret = bmap(lower_inode, &block); - inode = (struct inode *)mapping->host; - lower_inode = ecryptfs_inode_to_lower(inode); - if (lower_inode->i_mapping->a_ops->bmap) - rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping, - block); - return rc; + if (ret) + return 0; + return block; } const struct address_space_operations ecryptfs_aops = { From 0d89fdae2afe833dd3025edd8c8287675d45c74e Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:44 +0100 Subject: [PATCH 490/658] fibmap: Use bmap instead of ->bmap method in ioctl_fibmap Now we have the possibility of proper error return in bmap, use bmap() function in ioctl_fibmap() instead of calling ->bmap method directly. Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/ioctl.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 2f5e4e5b97e1..13327862f278 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -54,19 +54,29 @@ EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { - struct address_space *mapping = filp->f_mapping; - int res, block; + struct inode *inode = file_inode(filp); + int error, ur_block; + sector_t block; - /* do we support this mess? */ - if (!mapping->a_ops->bmap) - return -EINVAL; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - res = get_user(block, p); - if (res) - return res; - res = mapping->a_ops->bmap(mapping, block); - return put_user(res, p); + + error = get_user(ur_block, p); + if (error) + return error; + + block = ur_block; + error = bmap(inode, &block); + + if (error) + ur_block = 0; + else + ur_block = block; + + if (put_user(ur_block, p)) + error = -EFAULT; + + return error; } /** From 324282c0252a44a97d628813e30ea7258940d469 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:45 +0100 Subject: [PATCH 491/658] fibmap: Reject negative block numbers FIBMAP receives an integer from userspace which is then implicitly converted into sector_t to be passed to bmap(). No check is made to ensure userspace didn't send a negative block number, which can end up in an underflow, and returning to userspace a corrupted block address. As a side-effect, the underflow caused by a negative block here, will trigger the WARN() in iomap_bmap_actor(), which is how this issue was first discovered. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ioctl.c b/fs/ioctl.c index 13327862f278..0be9bee9ff8f 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -65,6 +65,9 @@ static int ioctl_fibmap(struct file *filp, int __user *p) if (error) return error; + if (ur_block < 0) + return -EINVAL; + block = ur_block; error = bmap(inode, &block); From f718b093277df582fbf8775548a4f163e664d282 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:40:54 +0100 Subject: [PATCH 492/658] block, bfq: do not plug I/O for bfq_queues with no proc refs Commit 478de3380c1c ("block, bfq: deschedule empty bfq_queues not referred by any process") fixed commit 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging") by descheduling an empty bfq_queue when it remains with not process reference. Yet, this still left a case uncovered: an empty bfq_queue with not process reference that remains in service. This happens for an in-service sync bfq_queue that is deemed to deserve I/O-dispatch plugging when it remains empty. Yet no new requests will arrive for such a bfq_queue if no process sends requests to it any longer. Even worse, the bfq_queue may happen to be prematurely freed while still in service (because there may remain no reference to it any longer). This commit solves this problem by preventing I/O dispatch from being plugged for the in-service bfq_queue, if the latter has no process reference (the bfq_queue is then prevented from remaining in service). Fixes: 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging") Tested-by: Oleksandr Natalenko Reported-by: Patrick Dung Tested-by: Patrick Dung Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4686b68b48b4..55d4328e7c12 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3443,6 +3443,10 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + return (bfqq->wr_coeff > 1 && (bfqd->wr_busy_queues < bfq_tot_busy_queues(bfqd) || @@ -4076,6 +4080,10 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, bfqq_sequential_and_IO_bound, idling_boosts_thr; + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) && bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq); @@ -4169,6 +4177,10 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) struct bfq_data *bfqd = bfqq->bfqd; bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar; + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + if (unlikely(bfqd->strict_guarantees)) return true; From 32c59e3a9a5a0b180dd015755d6d18ca31e55935 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:40:55 +0100 Subject: [PATCH 493/658] block, bfq: do not insert oom queue into position tree BFQ maintains an ordered list, implemented with an RB tree, of head-request positions of non-empty bfq_queues. This position tree, inherited from CFQ, is used to find bfq_queues that contain I/O close to each other. BFQ merges these bfq_queues into a single shared queue, if this boosts throughput on the device at hand. There is however a special-purpose bfq_queue that does not participate in queue merging, the oom bfq_queue. Yet, also this bfq_queue could be wrongly added to the position tree. So bfqq_find_close() could return the oom bfq_queue, which is a source of further troubles in an out-of-memory situation. This commit prevents the oom bfq_queue from being inserted into the position tree. Tested-by: Patrick Dung Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 55d4328e7c12..15dfb0844644 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -613,6 +613,10 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->pos_root = NULL; } + /* oom_bfqq does not participate in queue merging */ + if (bfqq == &bfqd->oom_bfqq) + return; + /* * bfqq cannot be merged any longer (see comments in * bfq_setup_cooperator): no point in adding bfqq into the From ecedd3d7e19911ab8fe42f17b77c0a30fe7f4db3 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:40:56 +0100 Subject: [PATCH 494/658] block, bfq: get extra ref to prevent a queue from being freed during a group move In bfq_bfqq_move(), the bfq_queue, say Q, to be moved to a new group may happen to be deactivated in the scheduling data structures of the source group (and then activated in the destination group). If Q is referred only by the data structures in the source group when the deactivation happens, then Q is freed upon the deactivation. This commit addresses this issue by getting an extra reference before the possible deactivation, and releasing this extra reference after Q has been moved. Tested-by: Chris Evich Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index e1419edde2ec..8ab7f18ff8cb 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -651,6 +651,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); + /* + * get extra reference to prevent bfqq from being freed in + * next possible deactivate + */ + bfqq->ref++; + if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st) @@ -670,6 +676,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->in_service_queue && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); + /* release extra ref taken above */ + bfq_put_queue(bfqq); } /** From 33a16a9804688b2f4c4281ec31bc393ef2645ae4 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:40:57 +0100 Subject: [PATCH 495/658] block, bfq: extend incomplete name of field on_st The flag on_st in the bfq_entity data structure is true if the entity is on a service tree or is in service. Yet the name of the field, confusingly, does not mention the second, very important case. Extend the name to mention the second case too. Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 2 +- block/bfq-iosched.c | 2 +- block/bfq-iosched.h | 2 +- block/bfq-wf2q.c | 11 +++++++---- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 8ab7f18ff8cb..c818c64766e5 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -659,7 +659,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); - else if (entity->on_st) + else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(bfqq_group(bfqq)); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 15dfb0844644..28770ec7c06f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1059,7 +1059,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, static int bfqq_process_refs(struct bfq_queue *bfqq) { - return bfqq->ref - bfqq->allocated - bfqq->entity.on_st - + return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv - (bfqq->weight_counter != NULL); } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 8526f20c53bc..f1cb89def7f8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -150,7 +150,7 @@ struct bfq_entity { * Flag, true if the entity is on a tree (either the active or * the idle one of its service_tree) or is in service. */ - bool on_st; + bool on_st_or_in_serv; /* B-WF2Q+ start and finish timestamps [sectors/weight] */ u64 start, finish; diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ffe9ce9faa89..26776bdbdf36 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -645,7 +645,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st, { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - entity->on_st = false; + entity->on_st_or_in_serv = false; st->wsum -= entity->weight; if (bfqq && !is_in_service) bfq_put_queue(bfqq); @@ -999,7 +999,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, */ bfq_get_entity(entity); - entity->on_st = true; + entity->on_st_or_in_serv = true; } #ifdef CONFIG_BFQ_GROUP_IOSCHED @@ -1165,7 +1165,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) struct bfq_service_tree *st; bool is_in_service; - if (!entity->on_st) /* entity never activated, or already inactive */ + if (!entity->on_st_or_in_serv) /* + * entity never activated, or + * already inactive + */ return false; /* @@ -1620,7 +1623,7 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) * service tree either, then release the service reference to * the queue it represents (taken with bfq_get_entity). */ - if (!in_serv_entity->on_st) { + if (!in_serv_entity->on_st_or_in_serv) { /* * If no process is referencing in_serv_bfqq any * longer, then the service reference may be the only From 4d8340d0d4d90e7ca367d18ec16c2fefa89a339c Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:40:58 +0100 Subject: [PATCH 496/658] block, bfq: remove ifdefs from around gets/puts of bfq groups ifdefs around gets and puts of bfq groups reduce readability, remove them. Tested-by: Oleksandr Natalenko Reported-by: Jens Axboe Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 ++++ block/bfq-iosched.c | 6 +----- block/bfq-iosched.h | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index c818c64766e5..cae488b58049 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1406,6 +1406,10 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) return bfqq->bfqd->root_group; } +void bfqg_and_blkg_get(struct bfq_group *bfqg) {} + +void bfqg_and_blkg_put(struct bfq_group *bfqg) {} + struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 28770ec7c06f..fff76c920968 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4825,9 +4825,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) { struct bfq_queue *item; struct hlist_node *n; -#ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqg = bfqq_group(bfqq); -#endif if (bfqq->bfqd) bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", @@ -4900,9 +4898,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->last_completed_rq_bfqq = NULL; kmem_cache_free(bfq_pool, bfqq); -#ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_and_blkg_put(bfqg); -#endif } static void bfq_put_cooperator(struct bfq_queue *bfqq) @@ -6390,10 +6386,10 @@ static void bfq_exit_queue(struct elevator_queue *e) hrtimer_cancel(&bfqd->idle_slice_timer); -#ifdef CONFIG_BFQ_GROUP_IOSCHED /* release oom-queue reference to root group */ bfqg_and_blkg_put(bfqd->root_group); +#ifdef CONFIG_BFQ_GROUP_IOSCHED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else spin_lock_irq(&bfqd->lock); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index f1cb89def7f8..2c7cec737b2a 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -921,6 +921,7 @@ struct bfq_group { #else struct bfq_group { + struct bfq_entity entity; struct bfq_sched_data sched_data; struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; From db37a34c563bf4692b36990ae89005c031385e52 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:40:59 +0100 Subject: [PATCH 497/658] block, bfq: get a ref to a group when adding it to a service tree BFQ schedules generic entities, which may represent either bfq_queues or groups of bfq_queues. When an entity is inserted into a service tree, a reference must be taken, to make sure that the entity does not disappear while still referred in the tree. Unfortunately, such a reference is mistakenly taken only if the entity represents a bfq_queue. This commit takes a reference also in case the entity represents a group. Tested-by: Oleksandr Natalenko Tested-by: Chris Evich Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 2 +- block/bfq-iosched.h | 1 + block/bfq-wf2q.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index cae488b58049..09b69a3ed490 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg) kfree(bfqg); } -static void bfqg_and_blkg_get(struct bfq_group *bfqg) +void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 2c7cec737b2a..d1233af9c684 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -985,6 +985,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); +void bfqg_and_blkg_get(struct bfq_group *bfqg); void bfqg_and_blkg_put(struct bfq_group *bfqg); #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 26776bdbdf36..eb0e2a6daabe 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -533,7 +533,9 @@ static void bfq_get_entity(struct bfq_entity *entity) bfqq->ref++; bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", bfqq, bfqq->ref); - } + } else + bfqg_and_blkg_get(container_of(entity, struct bfq_group, + entity)); } /** @@ -647,8 +649,14 @@ static void bfq_forget_entity(struct bfq_service_tree *st, entity->on_st_or_in_serv = false; st->wsum -= entity->weight; - if (bfqq && !is_in_service) + if (is_in_service) + return; + + if (bfqq) bfq_put_queue(bfqq); + else + bfqg_and_blkg_put(container_of(entity, struct bfq_group, + entity)); } /** From c92bddee77353a773f0df76115c1a01877ce1eae Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Feb 2020 11:41:00 +0100 Subject: [PATCH 498/658] block, bfq: clarify the goal of bfq_split_bfqq() The exact, general goal of the function bfq_split_bfqq() is not that apparent. Add a comment to make it clear. Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index fff76c920968..8c436abfaf14 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5979,6 +5979,8 @@ static void bfq_finish_requeue_request(struct request *rq) } /* + * Removes the association between the current task and bfqq, assuming + * that bic points to the bfq iocontext of the task. * Returns NULL if a new bfqq should be allocated, or the old bfqq if this * was the last process referring to that bfqq. */ From c0399cf668a2a1b7310dbedd424f6a4b60aabffc Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 21 Jan 2020 16:49:56 +0800 Subject: [PATCH 499/658] NFS: remove unused macros MNT_fhs_status_sz/MNT_fhandle3_sz are never used after they were introduced. So better to remove them. Signed-off-by: Alex Shi Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anna Schumaker --- fs/nfs/mount_clnt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index cb7c10e9721e..35c8cb2d7637 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -29,9 +29,7 @@ */ #define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) #define MNT_status_sz (1) -#define MNT_fhs_status_sz (1) #define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) -#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE)) #define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) /* From 4ac76436a6d07dec1c3c766f234aa787a16e8f65 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sat, 11 Jan 2020 13:08:16 +0530 Subject: [PATCH 500/658] nvmet: Pass lockdep expression to RCU lists ctrl->subsys->namespaces and subsys->namespaces are traversed with list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of ctrl->subsys->lock and subsys->lock respectively. Hence, add the corresponding lockdep expression to the list traversal primitive to silence false-positive lockdep warnings, and harden RCU lists. Reported-by: kbuild test robot Reviewed-by: Joel Fernandes (Google) Signed-off-by: Amol Grover Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 28438b833c1b..35810a0a8d21 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -555,7 +555,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) } else { struct nvmet_ns *old; - list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { + list_for_each_entry_rcu(old, &subsys->namespaces, dev_link, + lockdep_is_held(&subsys->lock)) { BUG_ON(ns->nsid == old->nsid); if (ns->nsid < old->nsid) break; @@ -1172,7 +1173,8 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, ctrl->p2p_client = get_device(req->p2p_client); - list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link, + lockdep_is_held(&ctrl->subsys->lock)) nvmet_p2pmem_ns_add_p2p(ctrl, ns); } From b716e6889c95f64ba32af492461f6cc9341f3f05 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 26 Jan 2020 23:23:28 -0800 Subject: [PATCH 501/658] nvmet: fix dsm failure when payload does not match sgl descriptor The host is allowed to pass the controller an sgl describing a buffer that is larger than the dsm payload itself, allow it when executing dsm. Reported-by: Dakshaja Uppalapati Reviewed-by: Christoph Hellwig , Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 11 +++++++++++ drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/nvme/target/io-cmd-file.c | 2 +- drivers/nvme/target/nvmet.h | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 35810a0a8d21..461987f669c5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -939,6 +939,17 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) } EXPORT_SYMBOL_GPL(nvmet_check_data_len); +bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) +{ + if (unlikely(data_len > req->transfer_len)) { + req->error_loc = offsetof(struct nvme_common_command, dptr); + nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); + return false; + } + + return true; +} + int nvmet_req_alloc_sgl(struct nvmet_req *req) { struct pci_dev *p2p_dev = NULL; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index b6fca0e421ef..ea0e596be15d 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -280,7 +280,7 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req) static void nvmet_bdev_execute_dsm(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; switch (le32_to_cpu(req->cmd->dsm.attributes)) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index caebfce06605..cd5670b83118 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -336,7 +336,7 @@ static void nvmet_file_dsm_work(struct work_struct *w) static void nvmet_file_execute_dsm(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 46df45e837c9..eda28b22a2c8 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -374,6 +374,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); void nvmet_req_free_sgl(struct nvmet_req *req); From cfa27356f835dc7755192e7b941d4f4851acbcc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jan 2020 19:40:24 +0100 Subject: [PATCH 502/658] nvme-pci: remove nvmeq->tags There is no real need to have a pointer to the tagset in struct nvme_queue, as we only need it in a single place, and that place can derive the used tagset from the device and qid trivially. This fixes a problem with stale pointer exposure when tagsets are reset, and also shrinks the nvme_queue structure. It also matches what most other transports have done since day 1. Reported-by: Edmund Nadolski Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 365a2ddbeaa7..da392b50f73e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -167,7 +167,6 @@ struct nvme_queue { /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; - struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -376,29 +375,17 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, WARN_ON(hctx_idx != 0); WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); - WARN_ON(nvmeq->tags); hctx->driver_data = nvmeq; - nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } -static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->tags = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; - if (!nvmeq->tags) - nvmeq->tags = &dev->tagset.tags[hctx_idx]; - WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; @@ -948,6 +935,13 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) writel(head, nvmeq->q_db + nvmeq->dev->db_stride); } +static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return nvmeq->dev->admin_tagset.tags[0]; + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; +} + static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; @@ -972,7 +966,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) return; } - req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } @@ -1572,7 +1566,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; From 21e157c62eeded8b1558a991b4820b761d48a730 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 28 Jan 2020 11:33:29 -0800 Subject: [PATCH 503/658] clk: qcom: Don't overwrite 'cfg' in clk_rcg2_dfs_populate_freq() The DFS frequency table logic overwrites 'cfg' while detecting the parent clk and then later on in clk_rcg2_dfs_populate_freq() we use that same variable to figure out the mode of the clk, either MND or not. Add a new variable to hold the parent clk bit so that 'cfg' is left untouched for use later. This fixes problems in detecting the supported frequencies for any clks in DFS mode. Fixes: cc4f6944d0e3 ("clk: qcom: Add support for RCG to register for DFS") Reported-by: Rajendra Nayak Signed-off-by: Stephen Boyd Link: https://lkml.kernel.org/r/20200128193329.45635-1-sboyd@kernel.org Tested-by: Rajendra Nayak --- drivers/clk/qcom/clk-rcg2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index da045b200def..973ecf4f6bc5 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -953,7 +953,7 @@ static void clk_rcg2_dfs_populate_freq(struct clk_hw *hw, unsigned int l, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct clk_hw *p; unsigned long prate = 0; - u32 val, mask, cfg, mode; + u32 val, mask, cfg, mode, src; int i, num_parents; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + SE_PERF_DFSR(l), &cfg); @@ -963,12 +963,12 @@ static void clk_rcg2_dfs_populate_freq(struct clk_hw *hw, unsigned int l, if (cfg & mask) f->pre_div = cfg & mask; - cfg &= CFG_SRC_SEL_MASK; - cfg >>= CFG_SRC_SEL_SHIFT; + src = cfg & CFG_SRC_SEL_MASK; + src >>= CFG_SRC_SEL_SHIFT; num_parents = clk_hw_get_num_parents(hw); for (i = 0; i < num_parents; i++) { - if (cfg == rcg->parent_map[i].cfg) { + if (src == rcg->parent_map[i].cfg) { f->src = rcg->parent_map[i].src; p = clk_hw_get_parent_by_index(&rcg->clkr.hw, i); prate = clk_hw_get_rate(p); From 2a8aa18c113124434a32c1bd01054d67f799e4ee Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 09:49:43 -0800 Subject: [PATCH 504/658] dt-bindings: clk: qcom: Fix self-validation, split, and clean cruft The 'qcom,gcc.yaml' file failed self-validation (dt_binding_check) because it required a property to be either (3 entries big), (3 entries big), or (7 entries big), but not more than one of those things. That didn't make a ton of sense. This patch splits all of the exceptional device trees (AKA those that would have needed if/then/else rules) from qcom,gcc.yaml. It also cleans up some cruft found while doing that. After this lands, this worked for me atop clk-next with just the known error about msm8998: for f in \ Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml \ Documentation/devicetree/bindings/clock/qcom,gcc.yaml; do \ ARCH=arm64 make dtbs_check DT_SCHEMA_FILES=$f; \ done I then picked this patch atop linux-next (next-20200129) and ran: # Delete broken yaml: rm Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml ARCH=arm64 make dt_binding_check | grep 'clock/qcom' ...and that didn't seem to indicate problems. Arbitrary decisions made (yell if you want changed): - Left all the older devices (where clocks / clock-names weren't specified) in a single file. - Didn't make clocks "required" for msm8996 but left them as listed. This seems a little weird but it matches the old binding. Misc cleanups as part of this patch: - Fixed schema id to not have "bindings/" as per Rob [1]. - Listed include files as per Stephen. - sm8150 was claimed to be same set of clocks as sc7180, but driver and dts appear to say that "bi_tcxo_ao" doesn't exist. Fixed. - In "apq8064", "#thermal-sensor-cells" was missing the "#". - Got rid of "|" at the end of top description since spacing doesn't matter. - Changed indentation to consistently 2 spaces (it was 3 in some places). - Added period at the end of protected-clocks description. - No space before ":". - Updated sc7180/sm8150 example to use the 'qcom,rpmh.h' include. - Updated sc7180/sm8150 example to use larger address/size cells as per reality. - Updated sc7180/sm8150 example to point to the sleep_clk rather than <0>. - Made it so that gcc-ipq8074 didn't require #power-domain-cells since actual dts didn't have it and I got no hits from: git grep _GDSC include/dt-bindings/clock/qcom,gcc-ipq8074.h - Made it so that gcc-qcs404 didn't require #power-domain-cells since actual dts didn't have it and I got no hits from: git grep _GDSC include/dt-bindings/clock/qcom,gcc-qcs404.h Noticed, but not done in this patch (volunteers needed): - Add "aud_ref_clk" to sm8150 bindings / dts even though I found a reference to it in "gcc-sm8150.c". - Fix node name in actual ipq8074 to be "clock-controller" (it's gcc). - Since the example doesn't need phandes to exist, in msm8998 could just make up places providing some of the clocks currently bogused out with <0>. - On msm8998 clocks are listed as required but current dts doesn't have them. [1] https://lore.kernel.org/r/CAL_Jsq+_2E-bAbP9F6VYkWRp0crEyRGa5peuwP58-PZniVny7w@mail.gmail.com Fixes: ab91f72e018a ("clk: qcom: gcc-msm8996: Fix parent for CLKREF clocks") Signed-off-by: Douglas Anderson Reviewed-by: Rob Herring Reviewed-by: Jeffrey Hugo Link: https://lkml.kernel.org/r/20200203094843.v3.1.I4452dc951d7556ede422835268742b25a18b356b@changeid Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,gcc-apq8064.yaml | 83 +++++++ .../bindings/clock/qcom,gcc-ipq8074.yaml | 51 ++++ .../bindings/clock/qcom,gcc-msm8996.yaml | 68 ++++++ .../bindings/clock/qcom,gcc-msm8998.yaml | 93 +++++++ .../bindings/clock/qcom,gcc-qcs404.yaml | 51 ++++ .../bindings/clock/qcom,gcc-sc7180.yaml | 75 ++++++ .../bindings/clock/qcom,gcc-sm8150.yaml | 72 ++++++ .../devicetree/bindings/clock/qcom,gcc.yaml | 230 +++--------------- 8 files changed, 529 insertions(+), 194 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml new file mode 100644 index 000000000000..17f87178f6b8 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-apq8064.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Binding for APQ8064 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on APQ8064. + + See also: + - dt-bindings/clock/qcom,gcc-msm8960.h + - dt-bindings/reset/qcom,gcc-msm8960.h + +properties: + compatible: + const: qcom,gcc-apq8064 + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + + nvmem-cells: + minItems: 1 + maxItems: 2 + description: + Qualcomm TSENS (thermal sensor device) on some devices can + be part of GCC and hence the TSENS properties can also be part + of the GCC/clock-controller node. + For more details on the TSENS properties please refer + Documentation/devicetree/bindings/thermal/qcom-tsens.txt + + nvmem-cell-names: + minItems: 1 + maxItems: 2 + items: + - const: calib + - const: calib_backup + + '#thermal-sensor-cells': + const: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - reg + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + - nvmem-cells + - nvmem-cell-names + - '#thermal-sensor-cells' + +examples: + - | + clock-controller@900000 { + compatible = "qcom,gcc-apq8064"; + reg = <0x00900000 0x4000>; + nvmem-cells = <&tsens_calib>, <&tsens_backup>; + nvmem-cell-names = "calib", "calib_backup"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + #thermal-sensor-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml new file mode 100644 index 000000000000..89c6e070e7ac --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-ipq8074.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Bindingfor IPQ8074 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on IPQ8074. + + See also: + - dt-bindings/clock/qcom,gcc-ipq8074.h + +properties: + compatible: + const: qcom,gcc-ipq8074 + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + reg: + maxItems: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - reg + - '#clock-cells' + - '#reset-cells' + +examples: + - | + clock-controller@1800000 { + compatible = "qcom,gcc-ipq8074"; + reg = <0x01800000 0x80000>; + #clock-cells = <1>; + #reset-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml new file mode 100644 index 000000000000..18e4e77b8cfa --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8996.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Binding for MSM8996 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on MSM8996. + + See also: + - dt-bindings/clock/qcom,gcc-msm8996.h + +properties: + compatible: + const: qcom,gcc-msm8996 + + clocks: + items: + - description: XO source + - description: Second XO source + - description: Sleep clock source + + clock-names: + items: + - const: cxo + - const: cxo2 + - const: sleep_clk + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - reg + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + clock-controller@300000 { + compatible = "qcom,gcc-msm8996"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + reg = <0x300000 0x90000>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml new file mode 100644 index 000000000000..1d3cae980471 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8998.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Binding for MSM8998 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on MSM8998. + + See also: + - dt-bindings/clock/qcom,gcc-msm8998.h + +properties: + compatible: + const: qcom,gcc-msm8998 + + clocks: + items: + - description: Board XO source + - description: Sleep clock source + - description: USB 3.0 phy pipe clock + - description: UFS phy rx symbol clock for pipe 0 + - description: UFS phy rx symbol clock for pipe 1 + - description: UFS phy tx symbol clock + - description: PCIE phy pipe clock + + clock-names: + items: + - const: xo + - const: sleep_clk + - const: usb3_pipe + - const: ufs_rx_symbol0 + - const: ufs_rx_symbol1 + - const: ufs_tx_symbol0 + - const: pcie0_pipe + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - clocks + - clock-names + - reg + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + clock-controller@100000 { + compatible = "qcom,gcc-msm8998"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + reg = <0x00100000 0xb0000>; + clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, + <&sleep>, + <0>, + <0>, + <0>, + <0>, + <0>; + clock-names = "xo", + "sleep_clk", + "usb3_pipe", + "ufs_rx_symbol0", + "ufs_rx_symbol1", + "ufs_tx_symbol0", + "pcie0_pipe"; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml new file mode 100644 index 000000000000..8cdece395eba --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-qcs404.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Bindingfor QCS404 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on QCS404. + + See also: + - dt-bindings/clock/qcom,gcc-qcs404.h + +properties: + compatible: + const: qcom,gcc-qcs404 + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + reg: + maxItems: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - reg + - '#clock-cells' + - '#reset-cells' + +examples: + - | + clock-controller@1800000 { + compatible = "qcom,gcc-qcs404"; + reg = <0x01800000 0x80000>; + #clock-cells = <1>; + #reset-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml new file mode 100644 index 000000000000..ee4f968e2909 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-sc7180.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Binding for SC7180 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on SC7180. + + See also: + - dt-bindings/clock/qcom,gcc-sc7180.h + +properties: + compatible: + const: qcom,gcc-sc7180 + + clocks: + items: + - description: Board XO source + - description: Board active XO source + - description: Sleep clock source + + clock-names: + items: + - const: bi_tcxo + - const: bi_tcxo_ao + - const: sleep_clk + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - clocks + - clock-names + - reg + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + clock-controller@100000 { + compatible = "qcom,gcc-sc7180"; + reg = <0 0x00100000 0 0x1f0000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&sleep_clk>; + clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml new file mode 100644 index 000000000000..888e9a708390 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-sm8150.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Binding for SM8150 + +maintainers: + - Stephen Boyd + - Taniya Das + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on SM8150. + + See also: + - dt-bindings/clock/qcom,gcc-sm8150.h + +properties: + compatible: + const: qcom,gcc-sm8150 + + clocks: + items: + - description: Board XO source + - description: Sleep clock source + + clock-names: + items: + - const: bi_tcxo + - const: sleep_clk + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + + protected-clocks: + description: + Protected clock specifier list as per common clock binding. + +required: + - compatible + - clocks + - clock-names + - reg + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + clock-controller@100000 { + compatible = "qcom,gcc-sm8150"; + reg = <0 0x00100000 0 0x1f0000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&sleep_clk>; + clock-names = "bi_tcxo", "sleep_clk"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml index cac1150c9292..d18f8ab9eeee 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only %YAML 1.2 --- -$id: http://devicetree.org/schemas/bindings/clock/qcom,gcc.yaml# +$id: http://devicetree.org/schemas/clock/qcom,gcc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller Binding @@ -14,77 +14,42 @@ description: | Qualcomm global clock control module which supports the clocks, resets and power domains. + See also: + - dt-bindings/clock/qcom,gcc-apq8084.h + - dt-bindings/reset/qcom,gcc-apq8084.h + - dt-bindings/clock/qcom,gcc-ipq4019.h + - dt-bindings/clock/qcom,gcc-ipq6018.h + - dt-bindings/reset/qcom,gcc-ipq6018.h + - dt-bindings/clock/qcom,gcc-ipq806x.h (qcom,gcc-ipq8064) + - dt-bindings/reset/qcom,gcc-ipq806x.h (qcom,gcc-ipq8064) + - dt-bindings/clock/qcom,gcc-msm8660.h + - dt-bindings/reset/qcom,gcc-msm8660.h + - dt-bindings/clock/qcom,gcc-msm8974.h + - dt-bindings/reset/qcom,gcc-msm8974.h + - dt-bindings/clock/qcom,gcc-msm8994.h + - dt-bindings/clock/qcom,gcc-mdm9615.h + - dt-bindings/reset/qcom,gcc-mdm9615.h + - dt-bindings/clock/qcom,gcc-sdm660.h (qcom,gcc-sdm630 and qcom,gcc-sdm660) + - dt-bindings/clock/qcom,gcc-sdm845.h + properties: - compatible : + compatible: enum: - - qcom,gcc-apq8064 - - qcom,gcc-apq8084 - - qcom,gcc-ipq4019 - - qcom,gcc-ipq6018 - - qcom,gcc-ipq8064 - - qcom,gcc-ipq8074 - - qcom,gcc-msm8660 - - qcom,gcc-msm8916 - - qcom,gcc-msm8960 - - qcom,gcc-msm8974 - - qcom,gcc-msm8974pro - - qcom,gcc-msm8974pro-ac - - qcom,gcc-msm8994 - - qcom,gcc-msm8996 - - qcom,gcc-msm8998 - - qcom,gcc-mdm9615 - - qcom,gcc-qcs404 - - qcom,gcc-sc7180 - - qcom,gcc-sdm630 - - qcom,gcc-sdm660 - - qcom,gcc-sdm845 - - qcom,gcc-sm8150 - - clocks: - oneOf: - #qcom,gcc-sm8150 - #qcom,gcc-sc7180 - - items: - - description: Board XO source - - description: Board active XO source - - description: Sleep clock source - #qcom,gcc-msm8996 - - items: - - description: XO source - - description: Second XO source - - description: Sleep clock source - #qcom,gcc-msm8998 - - items: - - description: Board XO source - - description: Sleep clock source - - description: USB 3.0 phy pipe clock - - description: UFS phy rx symbol clock for pipe 0 - - description: UFS phy rx symbol clock for pipe 1 - - description: UFS phy tx symbol clock - - description: PCIE phy pipe clock - - clock-names: - oneOf: - #qcom,gcc-sm8150 - #qcom,gcc-sc7180 - - items: - - const: bi_tcxo - - const: bi_tcxo_ao - - const: sleep_clk - #qcom,gcc-msm8996 - - items: - - const: cxo - - const: cxo2 - - const: sleep_clk - #qcom,gcc-msm8998 - - items: - - const: xo - - const: sleep_clk - - const: usb3_pipe - - const: ufs_rx_symbol0 - - const: ufs_rx_symbol1 - - const: ufs_tx_symbol0 - - const: pcie0_pipe + - qcom,gcc-apq8084 + - qcom,gcc-ipq4019 + - qcom,gcc-ipq6018 + - qcom,gcc-ipq8064 + - qcom,gcc-msm8660 + - qcom,gcc-msm8916 + - qcom,gcc-msm8960 + - qcom,gcc-msm8974 + - qcom,gcc-msm8974pro + - qcom,gcc-msm8974pro-ac + - qcom,gcc-msm8994 + - qcom,gcc-mdm9615 + - qcom,gcc-sdm630 + - qcom,gcc-sdm660 + - qcom,gcc-sdm845 '#clock-cells': const: 1 @@ -98,31 +63,9 @@ properties: reg: maxItems: 1 - nvmem-cells: - minItems: 1 - maxItems: 2 - description: - Qualcomm TSENS (thermal sensor device) on some devices can - be part of GCC and hence the TSENS properties can also be part - of the GCC/clock-controller node. - For more details on the TSENS properties please refer - Documentation/devicetree/bindings/thermal/qcom-tsens.txt - - nvmem-cell-names: - minItems: 1 - maxItems: 2 - description: - Names for each nvmem-cells specified. - items: - - const: calib - - const: calib_backup - - 'thermal-sensor-cells': - const: 1 - protected-clocks: description: - Protected clock specifier list as per common clock binding + Protected clock specifier list as per common clock binding. required: - compatible @@ -131,33 +74,6 @@ required: - '#reset-cells' - '#power-domain-cells' -if: - properties: - compatible: - contains: - const: qcom,gcc-apq8064 - -then: - required: - - nvmem-cells - - nvmem-cell-names - - '#thermal-sensor-cells' - -else: - if: - properties: - compatible: - contains: - enum: - - qcom,gcc-msm8998 - - qcom,gcc-sm8150 - - qcom,gcc-sc7180 - then: - required: - - clocks - - clock-names - - examples: # Example for GCC for MSM8960: - | @@ -168,78 +84,4 @@ examples: #reset-cells = <1>; #power-domain-cells = <1>; }; - - - # Example of GCC with TSENS properties: - - | - clock-controller@900000 { - compatible = "qcom,gcc-apq8064"; - reg = <0x00900000 0x4000>; - nvmem-cells = <&tsens_calib>, <&tsens_backup>; - nvmem-cell-names = "calib", "calib_backup"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - #thermal-sensor-cells = <1>; - }; - - # Example of GCC with protected-clocks properties: - - | - clock-controller@100000 { - compatible = "qcom,gcc-sdm845"; - reg = <0x100000 0x1f0000>; - protected-clocks = <187>, <188>, <189>, <190>, <191>; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; - - # Example of GCC with clock node properties for SM8150: - - | - clock-controller@100000 { - compatible = "qcom,gcc-sm8150"; - reg = <0x00100000 0x1f0000>; - clocks = <&rpmhcc 0>, <&rpmhcc 1>, <&sleep_clk>; - clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; - - # Example of GCC with clock nodes properties for SC7180: - - | - clock-controller@100000 { - compatible = "qcom,gcc-sc7180"; - reg = <0x100000 0x1f0000>; - clocks = <&rpmhcc 0>, <&rpmhcc 1>, <0>; - clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; - - # Example of MSM8998 GCC: - - | - #include - clock-controller@100000 { - compatible = "qcom,gcc-msm8998"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - reg = <0x00100000 0xb0000>; - clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, - <&sleep>, - <0>, - <0>, - <0>, - <0>, - <0>; - clock-names = "xo", - "sleep_clk", - "usb3_pipe", - "ufs_rx_symbol0", - "ufs_rx_symbol1", - "ufs_tx_symbol0", - "pcie0_pipe"; - }; ... From 387122478775be5d9816c34aa29de53d0b926835 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Jan 2020 17:31:13 -0500 Subject: [PATCH 505/658] NFSv4: pnfs_roc() must use cred_fscmp() to compare creds When comparing two 'struct cred' for equality w.r.t. behaviour under filesystem access, we need to use cred_fscmp(). Fixes: a52458b48af1 ("NFS/NFSD/SUNRPC: replace generic creds with 'struct cred'.") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3ac6b4dea72d..542ea8dfd1bc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1425,7 +1425,7 @@ retry: /* lo ref dropped in pnfs_roc_release() */ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); /* If the creds don't match, we can't compound the layoutreturn */ - if (!layoutreturn || cred != lo->plh_lc_cred) + if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0) goto out_noroc; roc = layoutreturn; From 9a206de2ea878f4502e86b81c0d7eb9b651bde82 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Jan 2020 17:31:14 -0500 Subject: [PATCH 506/658] NFS: nfs_access_get_cached_rcu() should use cred_fscmp() We do not need to have the rcu lookup method fail in the case where the fsuid/fsgid and supplemental groups match. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bfc66f3f00e1..6427a8a8d61a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2360,7 +2360,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); cache = list_entry(lh, struct nfs_access_entry, lru); if (lh == &nfsi->access_cache_entry_lru || - cred != cache->cred) + cred_fscmp(cred, cache->cred) != 0) cache = NULL; if (cache == NULL) goto out; From 65f5160376212094b477c7309ba9867e69ec69d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Jan 2020 17:31:15 -0500 Subject: [PATCH 507/658] NFS: nfs_find_open_context() should use cred_fscmp() We want to find open contexts that match our filesystem access properties. They don't have to exactly match the cred. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 71dfc9d2fc3d..1309e6f47f3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1061,7 +1061,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct rcu_read_lock(); list_for_each_entry_rcu(pos, &nfsi->open_files, list) { - if (cred != NULL && pos->cred != cred) + if (cred != NULL && cred_fscmp(pos->cred, cred) != 0) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; From f885ea640d65d98c8f1a326efb3abe13f1865c89 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jan 2020 09:58:15 -0500 Subject: [PATCH 508/658] NFSv4: nfs_inode_evict_delegation() should set NFS_DELEGATION_RETURNING In particular, the pnfs return-on-close code will check for that flag, so ensure we set it appropriately. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index fe57b2b5314a..a7e42725c3b1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -607,6 +607,7 @@ void nfs_inode_evict_delegation(struct inode *inode) delegation = nfs_inode_detach_delegation(inode); if (delegation != NULL) { + set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); nfs_do_return_delegation(inode, delegation, 1); nfs_free_delegation(delegation); From 0d10416797c27b9e359d4bd94fb9db6f34f25d83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jan 2020 09:58:16 -0500 Subject: [PATCH 509/658] NFS: Clear NFS_DELEGATION_RETURN_IF_CLOSED when the delegation is returned If a delegation is marked as needing to be returned when the file is closed, then don't clear that marking until we're ready to return it. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a7e42725c3b1..b5b14618b73e 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -479,7 +479,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; - if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { + else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { struct inode *inode; spin_lock(&delegation->lock); @@ -488,6 +488,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) ret = true; spin_unlock(&delegation->lock); } + if (ret) + clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) ret = false; From b7b7dac6843e483b6fe8e29cef99e3dbb7594a53 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jan 2020 09:58:17 -0500 Subject: [PATCH 510/658] NFSv4: Try to return the delegation immediately when marked for return on close Add a routine to return the delegation immediately upon close of the file if it was marked for return-on-close. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 33 +++++++++++++++++++++++++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4state.c | 1 + 3 files changed, 35 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b5b14618b73e..90e50f32f3e0 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -639,6 +639,39 @@ int nfs4_inode_return_delegation(struct inode *inode) return err; } +/** + * nfs_inode_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called on file close in order to determine if the + * inode delegation needs to be returned immediately. + */ +void nfs4_inode_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { + spin_lock(&delegation->lock); + if (delegation->inode && + list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); + ret = delegation; + } + spin_unlock(&delegation->lock); + } +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + /** * nfs4_inode_make_writeable * @inode: pointer to inode diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 15d3484be028..31b84604d383 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,6 +42,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); +void nfs4_inode_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fe1b908eecc8..19b473bc560e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -766,6 +766,7 @@ void nfs4_put_open_state(struct nfs4_state *state) list_del(&state->open_states); spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); + nfs4_inode_return_delegation_on_close(inode); iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); From d2269ea14ebd2a73f291d6b3a7a7d320ec00270c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jan 2020 09:58:18 -0500 Subject: [PATCH 511/658] NFSv4: Add accounting for the number of active delegations held In order to better manage our delegation caching, add a counter to track the number of active delegations. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 90e50f32f3e0..a777b3d0e720 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -25,13 +25,29 @@ #include "internal.h" #include "nfs4trace.h" -static void nfs_free_delegation(struct nfs_delegation *delegation) +static atomic_long_t nfs_active_delegations; + +static void __nfs_free_delegation(struct nfs_delegation *delegation) { put_cred(delegation->cred); delegation->cred = NULL; kfree_rcu(delegation, rcu); } +static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation) +{ + if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; + atomic_long_dec(&nfs_active_delegations); + } +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + nfs_mark_delegation_revoked(delegation); + __nfs_free_delegation(delegation); +} + /** * nfs_mark_delegation_referenced - set delegation's REFERENCED flag * @delegation: delegation to process @@ -343,7 +359,8 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, delegation->stateid.seqid = update->stateid.seqid; smp_wmb(); delegation->type = update->type; - clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); } } @@ -423,6 +440,8 @@ add_new: rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; + atomic_long_inc(&nfs_active_delegations); + trace_nfs4_set_delegation(inode, type); spin_lock(&inode->i_lock); @@ -432,7 +451,7 @@ add_new: out: spin_unlock(&clp->cl_lock); if (delegation != NULL) - nfs_free_delegation(delegation); + __nfs_free_delegation(delegation); if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); nfs_free_delegation(freeme); @@ -796,13 +815,6 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } -static void nfs_mark_delegation_revoked(struct nfs_server *server, - struct nfs_delegation *delegation) -{ - set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; -} - static void nfs_revoke_delegation(struct inode *inode, const nfs4_stateid *stateid) { @@ -830,7 +842,7 @@ static void nfs_revoke_delegation(struct inode *inode, } spin_unlock(&delegation->lock); } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); ret = true; out: rcu_read_unlock(); @@ -869,7 +881,7 @@ void nfs_delegation_mark_returned(struct inode *inode, delegation->stateid.seqid = stateid->seqid; } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); out_clear_returning: clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); From 10717f45639f6c1bc27b56405252c3a027406d92 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jan 2020 09:58:19 -0500 Subject: [PATCH 512/658] NFSv4: Limit the total number of cached delegations Delegations can be expensive to return, and can cause scalability issues for the server. Let's therefore try to limit the number of inactive delegations we hold. Once the number of delegations is above a certain threshold, start to return them on close. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a777b3d0e720..4a841071d8a7 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -25,7 +25,10 @@ #include "internal.h" #include "nfs4trace.h" +#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U) + static atomic_long_t nfs_active_delegations; +static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; static void __nfs_free_delegation(struct nfs_delegation *delegation) { @@ -676,7 +679,8 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode) delegation = nfs4_get_valid_delegation(inode); if (!delegation) goto out; - if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { + if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) || + atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) { spin_lock(&delegation->lock); if (delegation->inode && list_empty(&NFS_I(inode)->open_files) && @@ -1365,3 +1369,5 @@ out: rcu_read_unlock(); return ret; } + +module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); From a8bd9ddf397be8e0d812ce3da4e40440ac25f6d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:57:07 -0500 Subject: [PATCH 513/658] NFS: Replace various occurrences of kstrndup() with kmemdup_nul() When we already know the string length, it is more efficient to use kmemdup_nul(). Signed-off-by: Trond Myklebust [Anna - Changes to super.c were already made during fscontext conversion] Signed-off-by: Anna Schumaker --- fs/nfs/dns_resolve.c | 2 +- fs/nfs/nfs4namespace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index aec769a500a1..89bd5581f317 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -93,7 +93,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew, key = container_of(ckey, struct nfs_dns_ent, h); kfree(new->hostname); - new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); + new->hostname = kmemdup_nul(key->hostname, key->namelen, GFP_KERNEL); if (new->hostname) { new->namelen = key->namelen; nfs_dns_ent_update(cnew, ckey); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 3ea1c1008b5b..84026e7b8a5f 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -501,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, rpc_set_port(sap, NFS_PORT); error = -ENOMEM; - hostname = kstrndup(buf->data, buf->len, GFP_KERNEL); + hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL); if (hostname == NULL) break; From 7ccbddbe3e67591a02ddd20078d1241bd8fe79f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:57:08 -0500 Subject: [PATCH 514/658] SUNRPC: Use kmemdup_nul() in rpc_parse_scope_id() Using kmemdup_nul() is more efficient when the length is known. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d024af4be85e..8b4d72b1a066 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -175,7 +175,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, return 0; len = (buf + buflen) - delim - 1; - p = kstrndup(delim + 1, len, GFP_KERNEL); + p = kmemdup_nul(delim + 1, len, GFP_KERNEL); if (p) { u32 scope_id = 0; struct net_device *dev; From 4b310319c6a8ce708f1033d57145e2aa027a883c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:53:53 -0500 Subject: [PATCH 515/658] NFS: Fix memory leaks and corruption in readdir nfs_readdir_xdr_to_array() must not exit without having initialised the array, so that the page cache deletion routines can safely call nfs_readdir_clear_array(). Furthermore, we should ensure that if we exit nfs_readdir_filler() with an error, we free up any page contents to prevent a leak if we try to fill the page again. Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir") Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6427a8a8d61a..451c48cdb1c2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -162,6 +162,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -682,6 +694,7 @@ int nfs_readdir_filler(void *data, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } From 114de38225d9b300f027e2aec9afbb6e0def154b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:53:54 -0500 Subject: [PATCH 516/658] NFS: Directory page cache pages need to be locked when read When a NFS directory page cache page is removed from the page cache, its contents are freed through a call to nfs_readdir_clear_array(). To prevent the removal of the page cache entry until after we've finished reading it, we must take the page lock. Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir") Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 451c48cdb1c2..d95c2c94bd87 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -702,8 +702,6 @@ int nfs_readdir_filler(void *data, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -717,19 +715,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) + return 0; + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -744,7 +751,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -783,7 +790,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = true; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -829,13 +835,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -900,6 +906,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); From 3803d6721baff3d5dd6cd6b8c7294e54d124bc41 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:53:55 -0500 Subject: [PATCH 517/658] NFS: Use kmemdup_nul() in nfs_readdir_make_qstr() The directory strings stored in the readdir cache may be used with printk(), so it is better to ensure they are nul-terminated. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d95c2c94bd87..5a8ff7e97ce3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -198,7 +198,7 @@ static int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) { string->len = len; - string->name = kmemdup(name, len, GFP_KERNEL); + string->name = kmemdup_nul(name, len, GFP_KERNEL); if (string->name == NULL) return -ENOMEM; /* From 93a6ab7b691fe33d5a30e7fc8e85276de2815108 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:53:56 -0500 Subject: [PATCH 518/658] NFS: Switch readdir to using iterate_shared() Now that the page cache locking is repaired, we should be able to switch to using iterate_shared() for improved concurrency when doing readdir(). Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5a8ff7e97ce3..88f6cf1ccf8c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -58,7 +58,7 @@ static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, - .iterate = nfs_readdir, + .iterate_shared = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, From b5e683d5cab8cd433b06ae178621f083cabd4f63 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Feb 2020 08:23:03 -0700 Subject: [PATCH 519/658] eventfd: track eventfd_signal() recursion depth eventfd use cases from aio and io_uring can deadlock due to circular or resursive calling, when eventfd_signal() tries to grab the waitqueue lock. On top of that, it's also possible to construct notification chains that are deep enough that we could blow the stack. Add a percpu counter that tracks the percpu recursion depth, warn if we exceed it. The counter is also exposed so that users of eventfd_signal() can do the right thing if it's non-zero in the context where it is called. Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Jens Axboe --- fs/eventfd.c | 15 +++++++++++++++ include/linux/eventfd.h | 14 ++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/fs/eventfd.c b/fs/eventfd.c index 8aa0ea8c55e8..78e41c7c3d05 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -24,6 +24,8 @@ #include #include +DEFINE_PER_CPU(int, eventfd_wake_count); + static DEFINE_IDA(eventfd_ida); struct eventfd_ctx { @@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) { unsigned long flags; + /* + * Deadlock or stack overflow issues can happen if we recurse here + * through waitqueue wakeup handlers. If the caller users potentially + * nested waitqueues with custom wakeup handlers, then it should + * check eventfd_signal_count() before calling this function. If + * it returns true, the eventfd_signal() call should be deferred to a + * safe context. + */ + if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) + return 0; + spin_lock_irqsave(&ctx->wqh.lock, flags); + this_cpu_inc(eventfd_wake_count); if (ULLONG_MAX - ctx->count < n) n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN); + this_cpu_dec(eventfd_wake_count); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index ffcc7724ca21..dc4fd8a6644d 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +DECLARE_PER_CPU(int, eventfd_wake_count); + +static inline bool eventfd_signal_count(void) +{ + return this_cpu_read(eventfd_wake_count); +} + #else /* CONFIG_EVENTFD */ /* @@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, return -ENOSYS; } +static inline bool eventfd_signal_count(void) +{ + return false; +} + #endif #endif /* _LINUX_EVENTFD_H */ From f0b493e6b9a8959356983f57112229e69c2f7b8c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 1 Feb 2020 21:30:11 -0700 Subject: [PATCH 520/658] io_uring: prevent potential eventfd recursion on poll If we have nested or circular eventfd wakeups, then we can deadlock if we run them inline from our poll waitqueue wakeup handler. It's also possible to have very long chains of notifications, to the extent where we could risk blowing the stack. Check the eventfd recursion count before calling eventfd_signal(). If it's non-zero, then punt the signaling to async context. This is always safe, as it takes us out-of-line in terms of stack and locking context. Cc: stable@vger.kernel.org # 5.1+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 217721c7bc41..43f3b7d90299 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1020,21 +1020,28 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) { + if (!ctx->cq_ev_fd) + return false; if (!ctx->eventfd_async) return true; return io_wq_current_is_worker() || in_interrupt(); } -static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev) { if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (waitqueue_active(&ctx->sqo_wait)) wake_up(&ctx->sqo_wait); - if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx)) + if (trigger_ev) eventfd_signal(ctx->cq_ev_fd, 1); } +static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +{ + __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx)); +} + /* Returns true if there are no backlogged entries after the flush */ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) { @@ -3561,6 +3568,14 @@ static void io_poll_flush(struct io_wq_work **workptr) __io_poll_flush(req->ctx, nodes); } +static void io_poll_trigger_evfd(struct io_wq_work **workptr) +{ + struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); + + eventfd_signal(req->ctx->cq_ev_fd, 1); + io_put_req(req); +} + static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { @@ -3586,14 +3601,22 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (llist_empty(&ctx->poll_llist) && spin_trylock_irqsave(&ctx->completion_lock, flags)) { + bool trigger_ev; + hash_del(&req->hash_node); io_poll_complete(req, mask, 0); - req->flags |= REQ_F_COMP_LOCKED; - io_put_req(req); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); - req = NULL; + trigger_ev = io_should_trigger_evfd(ctx); + if (trigger_ev && eventfd_signal_count()) { + trigger_ev = false; + req->work.func = io_poll_trigger_evfd; + } else { + req->flags |= REQ_F_COMP_LOCKED; + io_put_req(req); + req = NULL; + } + spin_unlock_irqrestore(&ctx->completion_lock, flags); + __io_cqring_ev_posted(ctx, trigger_ev); } else { req->result = mask; req->llist_node.next = NULL; From 0b7b21e42ba2d6ac9595a4358a9354249605a3af Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 31 Jan 2020 08:34:59 -0700 Subject: [PATCH 521/658] io_uring: use the proper helpers for io_send/recv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't use the recvmsg/sendmsg helpers, use the same helpers that the recv(2) and send(2) system calls use. Reported-by: 李通洲 Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 43f3b7d90299..cd07df2afe61 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3090,7 +3090,8 @@ static int io_send(struct io_kiocb *req, struct io_kiocb **nxt, else if (force_nonblock) flags |= MSG_DONTWAIT; - ret = __sys_sendmsg_sock(sock, &msg, flags); + msg.msg_flags = flags; + ret = sock_sendmsg(sock, &msg); if (force_nonblock && ret == -EAGAIN) return -EAGAIN; if (ret == -ERESTARTSYS) @@ -3116,6 +3117,7 @@ static int io_recvmsg_prep(struct io_kiocb *req, sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->len = READ_ONCE(sqe->len); if (!io || req->opcode == IORING_OP_RECV) return 0; @@ -3234,7 +3236,7 @@ static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt, else if (force_nonblock) flags |= MSG_DONTWAIT; - ret = __sys_recvmsg_sock(sock, &msg, NULL, NULL, flags); + ret = sock_recvmsg(sock, &msg, flags); if (force_nonblock && ret == -EAGAIN) return -EAGAIN; if (ret == -ERESTARTSYS) From 5d204bcfa09330972ad3428a8f81c23f371d3e6d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 31 Jan 2020 12:06:52 -0700 Subject: [PATCH 522/658] io_uring: don't map read/write iovec potentially twice If we have a read/write that is deferred, we already setup the async IO context for that request, and mapped it. When we later try and execute the request and we get -EAGAIN, we don't want to attempt to re-map it. If we do, we end up with garbage in the iovec, which typically leads to an -EFAULT or -EINVAL completion. Cc: stable@vger.kernel.org # 5.5 Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index cd07df2afe61..678a1b245e10 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2171,10 +2171,12 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, { if (!io_op_defs[req->opcode].async_ctx) return 0; - if (!req->io && io_alloc_async_ctx(req)) - return -ENOMEM; + if (!req->io) { + if (io_alloc_async_ctx(req)) + return -ENOMEM; - io_req_map_rw(req, io_size, iovec, fast_iov, iter); + io_req_map_rw(req, io_size, iovec, fast_iov, iter); + } req->work.func = io_rw_async; return 0; } From 9250f9ee194dc3dcee28a42a1533fa2cc0edd215 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 1 Feb 2020 01:22:08 +0300 Subject: [PATCH 523/658] io_uring: remove extra ->file check It won't ever get into io_prep_rw() when req->file haven't been set in io_req_set_file(), hence remove the check. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 678a1b245e10..a3c75aff8d10 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1862,9 +1862,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, unsigned ioprio; int ret; - if (!req->file) - return -EBADF; - if (S_ISREG(file_inode(req->file)->i_mode)) req->flags |= REQ_F_ISREG; From 1a417f4e618e05fba29ba222f1e8555c302376ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 31 Jan 2020 17:16:48 -0700 Subject: [PATCH 524/658] io_uring: fix sporadic double CQE entry for close We punt close to async for the final fput(), but we log the completion even before that even in that case. We rely on the request not having a files table assigned to detect what the final async close should do. However, if we punt the async queue to __io_queue_sqe(), we'll get ->files assigned and this makes io_close_finish() think it should both close the filp again (which does no harm) AND log a new CQE event for this request. This causes duplicate CQEs. Queue the request up for async manually so we don't grab files needlessly and trigger this condition. Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a3c75aff8d10..b9c9e04cc1cb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2843,16 +2843,13 @@ static void io_close_finish(struct io_wq_work **workptr) int ret; ret = filp_close(req->close.put_file, req->work.files); - if (ret < 0) { + if (ret < 0) req_set_fail_links(req); - } io_cqring_add_event(req, ret); } fput(req->close.put_file); - /* we bypassed the re-issue, drop the submission reference */ - io_put_req(req); io_put_req_find_next(req, &nxt); if (nxt) io_wq_assign_next(workptr, nxt); @@ -2894,7 +2891,13 @@ static int io_close(struct io_kiocb *req, struct io_kiocb **nxt, eagain: req->work.func = io_close_finish; - return -EAGAIN; + /* + * Do manual async queue here to avoid grabbing files - we don't + * need the files, and it'll cause io_close_finish() to close + * the file again and cause a double CQE entry for this request + */ + io_queue_async_work(req); + return 0; } static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) From 3e69426da2599677ebbe76e2d97a606c4797bd74 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 1 Feb 2020 09:22:49 -0700 Subject: [PATCH 525/658] io_uring: punt even fadvise() WILLNEED to async context Andres correctly points out that read-ahead can block, if it needs to read in meta data (or even just through the page cache page allocations). Play it safe for now and just ensure WILLNEED is also punted to async context. While in there, allow the file settings hints from non-blocking context. They don't need to start/do IO, and we can safely do them inline. Fixes: 4840e418c2fc ("io_uring: add IORING_OP_FADVISE") Reported-by: Andres Freund Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b9c9e04cc1cb..1580f1e7ba1c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2730,9 +2730,16 @@ static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt, struct io_fadvise *fa = &req->fadvise; int ret; - /* DONTNEED may block, others _should_ not */ - if (fa->advice == POSIX_FADV_DONTNEED && force_nonblock) - return -EAGAIN; + if (force_nonblock) { + switch (fa->advice) { + case POSIX_FADV_NORMAL: + case POSIX_FADV_RANDOM: + case POSIX_FADV_SEQUENTIAL: + break; + default: + return -EAGAIN; + } + } ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); if (ret < 0) From 6c8a31346925cbb373f84a18428ab3df59d3950e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 1 Feb 2020 03:58:00 +0300 Subject: [PATCH 526/658] io_uring: iterate req cache backwards Grab requests from cache-array from the end, so can get by only free_reqs. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1580f1e7ba1c..42b45c57c3f8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -585,8 +585,7 @@ struct io_submit_state { * io_kiocb alloc cache */ void *reqs[IO_IOPOLL_BATCH]; - unsigned int free_reqs; - unsigned int cur_req; + unsigned int free_reqs; /* * File reference cache @@ -1190,12 +1189,10 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, ret = 1; } state->free_reqs = ret - 1; - state->cur_req = 1; - req = state->reqs[0]; + req = state->reqs[ret - 1]; } else { - req = state->reqs[state->cur_req]; state->free_reqs--; - state->cur_req++; + req = state->reqs[state->free_reqs]; } got_it: @@ -4849,8 +4846,7 @@ static void io_submit_state_end(struct io_submit_state *state) blk_finish_plug(&state->plug); io_file_put(state); if (state->free_reqs) - kmem_cache_free_bulk(req_cachep, state->free_reqs, - &state->reqs[state->cur_req]); + kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); } /* From 3e577dcd73a1fdc641bf45e5ea4a37869de221b5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 1 Feb 2020 03:58:42 +0300 Subject: [PATCH 527/658] io_uring: put the flag changing code in the same spot Both iocb_flags() and kiocb_set_rw_flags() are inline and modify kiocb->ki_flags. Place them close, so they can be potentially better optimised. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 42b45c57c3f8..edb00ae2619b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1867,8 +1867,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, req->flags |= REQ_F_CUR_POS; kiocb->ki_pos = req->file->f_pos; } - kiocb->ki_flags = iocb_flags(kiocb->ki_filp); kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); + kiocb->ki_flags = iocb_flags(kiocb->ki_filp); + ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); + if (unlikely(ret)) + return ret; ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { @@ -1880,10 +1883,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, } else kiocb->ki_ioprio = get_current_ioprio(); - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); - if (unlikely(ret)) - return ret; - /* don't allow async punt if RWF_NOWAIT was requested */ if ((kiocb->ki_flags & IOCB_NOWAIT) || (req->file->f_flags & O_NONBLOCK)) From 01d7a356872eec22ef34a33a5f9cfa917d145468 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 3 Feb 2020 10:33:42 -0700 Subject: [PATCH 528/658] aio: prevent potential eventfd recursion on poll If we have nested or circular eventfd wakeups, then we can deadlock if we run them inline from our poll waitqueue wakeup handler. It's also possible to have very long chains of notifications, to the extent where we could risk blowing the stack. Check the eventfd recursion count before calling eventfd_signal(). If it's non-zero, then punt the signaling to async context. This is always safe, as it takes us out-of-line in terms of stack and locking context. Cc: stable@vger.kernel.org # 4.19+ Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- fs/aio.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index a9fbad2ce5e6..5f3d3d814928 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } +static void aio_poll_put_work(struct work_struct *work) +{ + struct poll_iocb *req = container_of(work, struct poll_iocb, work); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + + iocb_put(iocb); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del_init(&req->wait.entry); if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + struct kioctx *ctx = iocb->ki_ctx; + /* * Try to complete the iocb inline if we can. Use * irqsave/irqrestore because not all filesystems (e.g. fuse) @@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); req->done = true; - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - iocb_put(iocb); + if (iocb->ki_eventfd && eventfd_signal_count()) { + iocb = NULL; + INIT_WORK(&req->work, aio_poll_put_work); + schedule_work(&req->work); + } + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + if (iocb) + iocb_put(iocb); } else { schedule_work(&req->work); } From 12efec5602744c5a185049eb4fcfd9aebe01bd6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jan 2020 19:49:57 -0500 Subject: [PATCH 529/658] saner copy_mount_options() don't bother with the byte-by-byte loops, etc. Signed-off-by: Al Viro --- fs/namespace.c | 49 +++++++------------------------------------------ 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5e1bf611a9eb..85b5f7bea82e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2979,39 +2979,10 @@ static void shrink_submounts(struct mount *mnt) } } -/* - * Some copy_from_user() implementations do not return the exact number of - * bytes remaining to copy on a fault. But copy_mount_options() requires that. - * Note that this function differs from copy_from_user() in that it will oops - * on bad values of `to', rather than returning a short copy. - */ -static long exact_copy_from_user(void *to, const void __user * from, - unsigned long n) -{ - char *t = to; - const char __user *f = from; - char c; - - if (!access_ok(from, n)) - return n; - - while (n) { - if (__get_user(c, f)) { - memset(t, 0, n); - break; - } - *t++ = c; - f++; - n--; - } - return n; -} - void *copy_mount_options(const void __user * data) { - int i; - unsigned long size; char *copy; + unsigned size; if (!data) return NULL; @@ -3020,22 +2991,16 @@ void *copy_mount_options(const void __user * data) if (!copy) return ERR_PTR(-ENOMEM); - /* We only care that *some* data at the address the user - * gave us is valid. Just in case, we'll zero - * the remainder of the page. - */ - /* copy_from_user cannot cross TASK_SIZE ! */ - size = TASK_SIZE - (unsigned long)untagged_addr(data); - if (size > PAGE_SIZE) - size = PAGE_SIZE; + size = PAGE_SIZE - offset_in_page(data); - i = size - exact_copy_from_user(copy, data, size); - if (!i) { + if (copy_from_user(copy, data, size)) { kfree(copy); return ERR_PTR(-EFAULT); } - if (i != PAGE_SIZE) - memset(copy + i, 0, PAGE_SIZE - i); + if (size != PAGE_SIZE) { + if (copy_from_user(copy + size, data + size, PAGE_SIZE - size)) + memset(copy + size, 0, PAGE_SIZE - size); + } return copy; } From 73cb3106e883ac03cc4f15b20d525e6bde650526 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 3 Feb 2020 22:37:36 +0000 Subject: [PATCH 530/658] clk: ls1028a: fix a dereference of pointer 'parent' before a null check Currently the pointer 'parent' is being dereferenced before it is being null checked. Fix this by performing the null check before it is dereferenced. Addresses-Coverity: ("Dereference before null check") Fixes: d37010a3c162 ("clk: ls1028a: Add clock driver for Display output interface") Signed-off-by: Colin Ian King Link: https://lkml.kernel.org/r/20200203223736.99645-1-colin.king@canonical.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-plldig.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-plldig.c b/drivers/clk/clk-plldig.c index 312b8312d503..25020164b89e 100644 --- a/drivers/clk/clk-plldig.c +++ b/drivers/clk/clk-plldig.c @@ -187,7 +187,7 @@ static int plldig_init(struct clk_hw *hw) { struct clk_plldig *data = to_clk_plldig(hw); struct clk_hw *parent = clk_hw_get_parent(hw); - unsigned long parent_rate = clk_hw_get_rate(parent); + unsigned long parent_rate; unsigned long val; unsigned long long lltmp; unsigned int mfd, fracdiv = 0; @@ -195,6 +195,8 @@ static int plldig_init(struct clk_hw *hw) if (!parent) return -EINVAL; + parent_rate = clk_hw_get_rate(parent); + if (data->vco_freq) { mfd = data->vco_freq / parent_rate; lltmp = data->vco_freq % parent_rate; From 908b050114d8fefdddc57ec9fbc213c3690e7f5f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:34 -0800 Subject: [PATCH 531/658] clk: qcom: rcg2: Don't crash if our parent can't be found; return an error When I got my clock parenting slightly wrong I ended up with a crash that looked like this: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... pc : clk_hw_get_rate+0x14/0x44 ... Call trace: clk_hw_get_rate+0x14/0x44 _freq_tbl_determine_rate+0x94/0xfc clk_rcg2_determine_rate+0x2c/0x38 clk_core_determine_round_nolock+0x4c/0x88 clk_core_round_rate_nolock+0x6c/0xa8 clk_core_round_rate_nolock+0x9c/0xa8 clk_core_set_rate_nolock+0x70/0x180 clk_set_rate+0x3c/0x6c of_clk_set_defaults+0x254/0x360 platform_drv_probe+0x28/0xb0 really_probe+0x120/0x2dc driver_probe_device+0x64/0xfc device_driver_attach+0x4c/0x6c __driver_attach+0xac/0xc0 bus_for_each_dev+0x84/0xcc driver_attach+0x2c/0x38 bus_add_driver+0xfc/0x1d0 driver_register+0x64/0xf8 __platform_driver_register+0x4c/0x58 msm_drm_register+0x5c/0x60 ... It turned out that clk_hw_get_parent_by_index() was returning NULL and we weren't checking. Let's check it so that we don't crash. Fixes: ac269395cdd8 ("clk: qcom: Convert to clk_hw based provider APIs") Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Link: https://lkml.kernel.org/r/20200203103049.v4.1.I7487325fe8e701a68a07d3be8a6a4b571eca9cfa@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rcg2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 973ecf4f6bc5..357159fe85b5 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -218,6 +218,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, clk_flags = clk_hw_get_flags(hw); p = clk_hw_get_parent_by_index(hw, index); + if (!p) + return -EINVAL; + if (clk_flags & CLK_SET_RATE_PARENT) { rate = f->freq; if (f->pre_div) { From 48cabc221f0d914db3ba1af0924cf4f53ebaced5 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:35 -0800 Subject: [PATCH 532/658] dt-bindings: clock: Fix qcom,dispcc bindings for sdm845/sc7180 The qcom,dispcc bindings had a few problems with them: 1. They didn't specify all the clocks that dispcc is a client of. Specifically on sc7180 there are two clocks from the DSI PHY and two from the DP PHY. On sdm845 there are actually two DSI PHYs (each of which has two clocks) and an extra clock from the gcc. These all need to be specified. 2. The sdm845.dtsi has existed for quite some time without specifying the clocks. The Linux driver was relying on global names to match things up. While we should transition things, it should be noted in the bindings. 3. The names used the bindings for "xo" and "gpll0" didn't match the names that QC used for these clocks internally and this was causing confusion / difficulty with their code generation tools. Switched to the internal names to simplify everyone's lives. It's not quite as clean in a purist sense but it should avoid headaches. This officially changes the binding, but that seems OK in this case. Also note that I updated the example. Fixes: 5d28e44ba630 ("dt-bindings: clock: Add YAML schemas for the QCOM DISPCC clock bindings") Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.2.I0c4bbb0f75a0880cd4bd90d8b267271e2375e0d0@changeid Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,dispcc.yaml | 67 ------------- .../bindings/clock/qcom,sc7180-dispcc.yaml | 84 ++++++++++++++++ .../bindings/clock/qcom,sdm845-dispcc.yaml | 99 +++++++++++++++++++ 3 files changed, 183 insertions(+), 67 deletions(-) delete mode 100644 Documentation/devicetree/bindings/clock/qcom,dispcc.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc.yaml deleted file mode 100644 index 9c58e02a1de1..000000000000 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/bindings/clock/qcom,dispcc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Qualcomm Display Clock & Reset Controller Binding - -maintainers: - - Taniya Das - -description: | - Qualcomm display clock control module which supports the clocks, resets and - power domains. - -properties: - compatible: - enum: - - qcom,sc7180-dispcc - - qcom,sdm845-dispcc - - clocks: - minItems: 1 - maxItems: 2 - items: - - description: Board XO source - - description: GPLL0 source from GCC - - clock-names: - items: - - const: xo - - const: gpll0 - - '#clock-cells': - const: 1 - - '#reset-cells': - const: 1 - - '#power-domain-cells': - const: 1 - - reg: - maxItems: 1 - -required: - - compatible - - reg - - clocks - - clock-names - - '#clock-cells' - - '#reset-cells' - - '#power-domain-cells' - -examples: - # Example of DISPCC with clock node properties for SDM845: - - | - clock-controller@af00000 { - compatible = "qcom,sdm845-dispcc"; - reg = <0xaf00000 0x10000>; - clocks = <&rpmhcc 0>, <&gcc 24>; - clock-names = "xo", "gpll0"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; -... diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml new file mode 100644 index 000000000000..0429062f1585 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sc7180-dispcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Display Clock & Reset Controller Binding for SC7180 + +maintainers: + - Taniya Das + +description: | + Qualcomm display clock control module which supports the clocks, resets and + power domains on SC7180. + + See also dt-bindings/clock/qcom,dispcc-sc7180.h. + +properties: + compatible: + const: qcom,sc7180-dispcc + + clocks: + items: + - description: Board XO source + - description: GPLL0 source from GCC + - description: Byte clock from DSI PHY + - description: Pixel clock from DSI PHY + - description: Link clock from DP PHY + - description: VCO DIV clock from DP PHY + + clock-names: + items: + - const: bi_tcxo + - const: gcc_disp_gpll0_clk_src + - const: dsi0_phy_pll_out_byteclk + - const: dsi0_phy_pll_out_dsiclk + - const: dp_phy_pll_link_clk + - const: dp_phy_pll_vco_div_clk + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + #include + clock-controller@af00000 { + compatible = "qcom,sc7180-dispcc"; + reg = <0 0x0af00000 0 0x200000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_DISP_GPLL0_CLK_SRC>, + <&dsi_phy 0>, + <&dsi_phy 1>, + <&dp_phy 0>, + <&dp_phy 1>; + clock-names = "bi_tcxo", + "gcc_disp_gpll0_clk_src", + "dsi0_phy_pll_out_byteclk", + "dsi0_phy_pll_out_dsiclk", + "dp_phy_pll_link_clk", + "dp_phy_pll_vco_div_clk"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml new file mode 100644 index 000000000000..89269ddfbdcd --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sdm845-dispcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Display Clock & Reset Controller Binding for SDM845 + +maintainers: + - Taniya Das + +description: | + Qualcomm display clock control module which supports the clocks, resets and + power domains on SDM845. + + See also dt-bindings/clock/qcom,dispcc-sdm845.h. + +properties: + compatible: + const: qcom,sdm845-dispcc + + # NOTE: sdm845.dtsi existed for quite some time and specified no clocks. + # The code had to use hardcoded mechanisms to find the input clocks. + # New dts files should have these clocks. + clocks: + items: + - description: Board XO source + - description: GPLL0 source from GCC + - description: GPLL0 div source from GCC + - description: Byte clock from DSI PHY0 + - description: Pixel clock from DSI PHY0 + - description: Byte clock from DSI PHY1 + - description: Pixel clock from DSI PHY1 + - description: Link clock from DP PHY + - description: VCO DIV clock from DP PHY + + clock-names: + items: + - const: bi_tcxo + - const: gcc_disp_gpll0_clk_src + - const: gcc_disp_gpll0_div_clk_src + - const: dsi0_phy_pll_out_byteclk + - const: dsi0_phy_pll_out_dsiclk + - const: dsi1_phy_pll_out_byteclk + - const: dsi1_phy_pll_out_dsiclk + - const: dp_link_clk_divsel_ten + - const: dp_vco_divided_clk_src_mux + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + #include + clock-controller@af00000 { + compatible = "qcom,sdm845-dispcc"; + reg = <0 0x0af00000 0 0x10000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_DISP_GPLL0_CLK_SRC>, + <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>, + <&dsi0_phy 0>, + <&dsi0_phy 1>, + <&dsi1_phy 0>, + <&dsi1_phy 1>, + <&dp_phy 0>, + <&dp_phy 1>; + clock-names = "bi_tcxo", + "gcc_disp_gpll0_clk_src", + "gcc_disp_gpll0_div_clk_src", + "dsi0_phy_pll_out_byteclk", + "dsi0_phy_pll_out_dsiclk", + "dsi1_phy_pll_out_byteclk", + "dsi1_phy_pll_out_dsiclk", + "dp_link_clk_divsel_ten", + "dp_vco_divided_clk_src_mux"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... From 0a97e8a5bf0a79c9f82f6c22d9845f7c179ea758 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:37 -0800 Subject: [PATCH 533/658] clk: qcom: Get rid of fallback global names for dispcc-sc7180 In the new world input clocks should be matched by ".fw_name". sc7180 is new enough that no backward compatibility use of global names should be needed. Remove it. With a proper device tree and downstream display patches I have verified booting a sc7180 up and seeing the display after this patch. Fixes: dd3d06622138 ("clk: qcom: Add display clock controller driver for SC7180") Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.4.Ia3706a5d5add72e88dbff60fd13ec06bf7a2fd48@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sc7180.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c index 30c1e25d3edb..a820e1558677 100644 --- a/drivers/clk/qcom/dispcc-sc7180.c +++ b/drivers/clk/qcom/dispcc-sc7180.c @@ -81,7 +81,7 @@ static const struct parent_map disp_cc_parent_map_0[] = { static const struct clk_parent_data disp_cc_parent_data_0[] = { { .fw_name = "bi_tcxo" }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, + { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_1[] = { @@ -93,10 +93,9 @@ static const struct parent_map disp_cc_parent_map_1[] = { static const struct clk_parent_data disp_cc_parent_data_1[] = { { .fw_name = "bi_tcxo" }, - { .fw_name = "dp_phy_pll_link_clk", .name = "dp_phy_pll_link_clk" }, - { .fw_name = "dp_phy_pll_vco_div_clk", - .name = "dp_phy_pll_vco_div_clk"}, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, + { .fw_name = "dp_phy_pll_link_clk" }, + { .fw_name = "dp_phy_pll_vco_div_clk" }, + { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_2[] = { @@ -107,9 +106,8 @@ static const struct parent_map disp_cc_parent_map_2[] = { static const struct clk_parent_data disp_cc_parent_data_2[] = { { .fw_name = "bi_tcxo" }, - { .fw_name = "dsi0_phy_pll_out_byteclk", - .name = "dsi0_phy_pll_out_byteclk" }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, + { .fw_name = "dsi0_phy_pll_out_byteclk" }, + { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_3[] = { @@ -125,7 +123,7 @@ static const struct clk_parent_data disp_cc_parent_data_3[] = { { .hw = &disp_cc_pll0.clkr.hw }, { .fw_name = "gcc_disp_gpll0_clk_src" }, { .hw = &disp_cc_pll0_out_even.clkr.hw }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, + { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_4[] = { @@ -137,7 +135,7 @@ static const struct parent_map disp_cc_parent_map_4[] = { static const struct clk_parent_data disp_cc_parent_data_4[] = { { .fw_name = "bi_tcxo" }, { .fw_name = "gcc_disp_gpll0_clk_src" }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, + { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_5[] = { @@ -148,9 +146,8 @@ static const struct parent_map disp_cc_parent_map_5[] = { static const struct clk_parent_data disp_cc_parent_data_5[] = { { .fw_name = "bi_tcxo" }, - { .fw_name = "dsi0_phy_pll_out_dsiclk", - .name = "dsi0_phy_pll_out_dsiclk" }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, + { .fw_name = "dsi0_phy_pll_out_dsiclk" }, + { .fw_name = "core_bi_pll_test_se" }, }; static const struct freq_tbl ftbl_disp_cc_mdss_ahb_clk_src[] = { From c1ef343612cd51a8c97ca3004bffc6db33f639c6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:38 -0800 Subject: [PATCH 534/658] clk: qcom: Get rid of the test clock for dispcc-sc7180 The test clock isn't in the bindings and apparently it's not used by anyone upstream. Remove it. Suggested-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.5.I28ac8f801456f1b950f7da10ed0f74a1344d4a35@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sc7180.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c index a820e1558677..397f5d9dafc8 100644 --- a/drivers/clk/qcom/dispcc-sc7180.c +++ b/drivers/clk/qcom/dispcc-sc7180.c @@ -76,38 +76,32 @@ static struct clk_alpha_pll_postdiv disp_cc_pll0_out_even = { static const struct parent_map disp_cc_parent_map_0[] = { { P_BI_TCXO, 0 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data disp_cc_parent_data_0[] = { { .fw_name = "bi_tcxo" }, - { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_1[] = { { P_BI_TCXO, 0 }, { P_DP_PHY_PLL_LINK_CLK, 1 }, { P_DP_PHY_PLL_VCO_DIV_CLK, 2 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data disp_cc_parent_data_1[] = { { .fw_name = "bi_tcxo" }, { .fw_name = "dp_phy_pll_link_clk" }, { .fw_name = "dp_phy_pll_vco_div_clk" }, - { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_2[] = { { P_BI_TCXO, 0 }, { P_DSI0_PHY_PLL_OUT_BYTECLK, 1 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data disp_cc_parent_data_2[] = { { .fw_name = "bi_tcxo" }, { .fw_name = "dsi0_phy_pll_out_byteclk" }, - { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_3[] = { @@ -115,7 +109,6 @@ static const struct parent_map disp_cc_parent_map_3[] = { { P_DISP_CC_PLL0_OUT_MAIN, 1 }, { P_GPLL0_OUT_MAIN, 4 }, { P_DISP_CC_PLL0_OUT_EVEN, 5 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data disp_cc_parent_data_3[] = { @@ -123,31 +116,26 @@ static const struct clk_parent_data disp_cc_parent_data_3[] = { { .hw = &disp_cc_pll0.clkr.hw }, { .fw_name = "gcc_disp_gpll0_clk_src" }, { .hw = &disp_cc_pll0_out_even.clkr.hw }, - { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_4[] = { { P_BI_TCXO, 0 }, { P_GPLL0_OUT_MAIN, 4 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data disp_cc_parent_data_4[] = { { .fw_name = "bi_tcxo" }, { .fw_name = "gcc_disp_gpll0_clk_src" }, - { .fw_name = "core_bi_pll_test_se" }, }; static const struct parent_map disp_cc_parent_map_5[] = { { P_BI_TCXO, 0 }, { P_DSI0_PHY_PLL_OUT_DSICLK, 1 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data disp_cc_parent_data_5[] = { { .fw_name = "bi_tcxo" }, { .fw_name = "dsi0_phy_pll_out_dsiclk" }, - { .fw_name = "core_bi_pll_test_se" }, }; static const struct freq_tbl ftbl_disp_cc_mdss_ahb_clk_src[] = { @@ -166,7 +154,7 @@ static struct clk_rcg2 disp_cc_mdss_ahb_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_ahb_clk_src", .parent_data = disp_cc_parent_data_4, - .num_parents = 3, + .num_parents = 2, .flags = CLK_SET_RATE_PARENT, .ops = &clk_rcg2_shared_ops, }, @@ -180,7 +168,7 @@ static struct clk_rcg2 disp_cc_mdss_byte0_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_byte0_clk_src", .parent_data = disp_cc_parent_data_2, - .num_parents = 3, + .num_parents = 2, .flags = CLK_SET_RATE_PARENT, .ops = &clk_byte2_ops, }, @@ -213,7 +201,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_crypto_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_crypto_clk_src", .parent_data = disp_cc_parent_data_1, - .num_parents = 4, + .num_parents = 3, .flags = CLK_SET_RATE_PARENT, .ops = &clk_byte2_ops, }, @@ -227,7 +215,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_link_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_link_clk_src", .parent_data = disp_cc_parent_data_1, - .num_parents = 4, + .num_parents = 3, .flags = CLK_SET_RATE_PARENT, .ops = &clk_byte2_ops, }, @@ -241,7 +229,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_pixel_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_pixel_clk_src", .parent_data = disp_cc_parent_data_1, - .num_parents = 4, + .num_parents = 3, .flags = CLK_SET_RATE_PARENT, .ops = &clk_dp_ops, }, @@ -256,7 +244,7 @@ static struct clk_rcg2 disp_cc_mdss_esc0_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_esc0_clk_src", .parent_data = disp_cc_parent_data_2, - .num_parents = 3, + .num_parents = 2, .ops = &clk_rcg2_ops, }, }; @@ -279,7 +267,7 @@ static struct clk_rcg2 disp_cc_mdss_mdp_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_mdp_clk_src", .parent_data = disp_cc_parent_data_3, - .num_parents = 5, + .num_parents = 4, .ops = &clk_rcg2_shared_ops, }, }; @@ -292,7 +280,7 @@ static struct clk_rcg2 disp_cc_mdss_pclk0_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_pclk0_clk_src", .parent_data = disp_cc_parent_data_5, - .num_parents = 3, + .num_parents = 2, .flags = CLK_SET_RATE_PARENT, .ops = &clk_pixel_ops, }, @@ -307,7 +295,7 @@ static struct clk_rcg2 disp_cc_mdss_rot_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_rot_clk_src", .parent_data = disp_cc_parent_data_3, - .num_parents = 5, + .num_parents = 4, .ops = &clk_rcg2_shared_ops, }, }; @@ -321,7 +309,7 @@ static struct clk_rcg2 disp_cc_mdss_vsync_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_vsync_clk_src", .parent_data = disp_cc_parent_data_0, - .num_parents = 2, + .num_parents = 1, .ops = &clk_rcg2_shared_ops, }, }; From 3696ebe4e1fc45ea391412ff1a82cec9ae4f6e8f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:39 -0800 Subject: [PATCH 535/658] clk: qcom: Use ARRAY_SIZE in dispcc-sc7180 for parent clocks It's nicer to use ARRAY_SIZE instead of hardcoding. Had we always been doing this it would have prevented a previous bug. See commit 74c31ff9c84a ("clk: qcom: gpu_cc_gmu_clk_src has 5 parents, not 6"). Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.6.If590c468722d2985cea63adf60c0d2b3098f37d9@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sc7180.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c index 397f5d9dafc8..dd7af41e47eb 100644 --- a/drivers/clk/qcom/dispcc-sc7180.c +++ b/drivers/clk/qcom/dispcc-sc7180.c @@ -154,7 +154,7 @@ static struct clk_rcg2 disp_cc_mdss_ahb_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_ahb_clk_src", .parent_data = disp_cc_parent_data_4, - .num_parents = 2, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_4), .flags = CLK_SET_RATE_PARENT, .ops = &clk_rcg2_shared_ops, }, @@ -168,7 +168,7 @@ static struct clk_rcg2 disp_cc_mdss_byte0_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_byte0_clk_src", .parent_data = disp_cc_parent_data_2, - .num_parents = 2, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_2), .flags = CLK_SET_RATE_PARENT, .ops = &clk_byte2_ops, }, @@ -188,7 +188,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_aux_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_aux_clk_src", .parent_data = disp_cc_parent_data_0, - .num_parents = 2, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_0), .ops = &clk_rcg2_ops, }, }; @@ -201,7 +201,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_crypto_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_crypto_clk_src", .parent_data = disp_cc_parent_data_1, - .num_parents = 3, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_1), .flags = CLK_SET_RATE_PARENT, .ops = &clk_byte2_ops, }, @@ -215,7 +215,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_link_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_link_clk_src", .parent_data = disp_cc_parent_data_1, - .num_parents = 3, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_1), .flags = CLK_SET_RATE_PARENT, .ops = &clk_byte2_ops, }, @@ -229,7 +229,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_pixel_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_dp_pixel_clk_src", .parent_data = disp_cc_parent_data_1, - .num_parents = 3, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_1), .flags = CLK_SET_RATE_PARENT, .ops = &clk_dp_ops, }, @@ -244,7 +244,7 @@ static struct clk_rcg2 disp_cc_mdss_esc0_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_esc0_clk_src", .parent_data = disp_cc_parent_data_2, - .num_parents = 2, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_2), .ops = &clk_rcg2_ops, }, }; @@ -267,7 +267,7 @@ static struct clk_rcg2 disp_cc_mdss_mdp_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_mdp_clk_src", .parent_data = disp_cc_parent_data_3, - .num_parents = 4, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_3), .ops = &clk_rcg2_shared_ops, }, }; @@ -280,7 +280,7 @@ static struct clk_rcg2 disp_cc_mdss_pclk0_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_pclk0_clk_src", .parent_data = disp_cc_parent_data_5, - .num_parents = 2, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_5), .flags = CLK_SET_RATE_PARENT, .ops = &clk_pixel_ops, }, @@ -295,7 +295,7 @@ static struct clk_rcg2 disp_cc_mdss_rot_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_rot_clk_src", .parent_data = disp_cc_parent_data_3, - .num_parents = 4, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_3), .ops = &clk_rcg2_shared_ops, }, }; @@ -309,7 +309,7 @@ static struct clk_rcg2 disp_cc_mdss_vsync_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "disp_cc_mdss_vsync_clk_src", .parent_data = disp_cc_parent_data_0, - .num_parents = 1, + .num_parents = ARRAY_SIZE(disp_cc_parent_data_0), .ops = &clk_rcg2_shared_ops, }, }; From e6747e24f15d9d25bcc771154825c582e970bfa9 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:40 -0800 Subject: [PATCH 536/658] dt-bindings: clock: Fix qcom,gpucc bindings for sdm845/sc7180/msm8998 The qcom,gpucc bindings had a few problems with them: 1. When things were converted to yaml the name of the "gpll0 main" clock got changed from "gpll0" to "gpll0_main". Change it back for msm8998. 2. Apparently there is a push not to use purist aliases for clocks but instead to just use the internal Qualcomm names. For sdm845 and sc7180 (where the drivers haven't already been changed) move in this direction. Things were also getting complicated harder to deal with by jamming several SoCs into one file. Splitting simplifies things. Fixes: 5c6f3a36b913 ("dt-bindings: clock: Add YAML schemas for the QCOM GPUCC clock bindings") Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.7.I513cd73b16665065ae6c22cf594d8b543745e28c@changeid Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/qcom,gpucc.yaml | 72 ------------------- .../bindings/clock/qcom,msm8998-gpucc.yaml | 66 +++++++++++++++++ .../bindings/clock/qcom,sc7180-gpucc.yaml | 72 +++++++++++++++++++ .../bindings/clock/qcom,sdm845-gpucc.yaml | 72 +++++++++++++++++++ 4 files changed, 210 insertions(+), 72 deletions(-) delete mode 100644 Documentation/devicetree/bindings/clock/qcom,gpucc.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml deleted file mode 100644 index 622845aa643f..000000000000 --- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/bindings/clock/qcom,gpucc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Qualcomm Graphics Clock & Reset Controller Binding - -maintainers: - - Taniya Das - -description: | - Qualcomm grpahics clock control module which supports the clocks, resets and - power domains. - -properties: - compatible: - enum: - - qcom,msm8998-gpucc - - qcom,sc7180-gpucc - - qcom,sdm845-gpucc - - clocks: - minItems: 1 - maxItems: 3 - items: - - description: Board XO source - - description: GPLL0 main branch source from GCC(gcc_gpu_gpll0_clk_src) - - description: GPLL0 div branch source from GCC(gcc_gpu_gpll0_div_clk_src) - - clock-names: - minItems: 1 - maxItems: 3 - items: - - const: xo - - const: gpll0_main - - const: gpll0_div - - '#clock-cells': - const: 1 - - '#reset-cells': - const: 1 - - '#power-domain-cells': - const: 1 - - reg: - maxItems: 1 - -required: - - compatible - - reg - - clocks - - clock-names - - '#clock-cells' - - '#reset-cells' - - '#power-domain-cells' - -examples: - # Example of GPUCC with clock node properties for SDM845: - - | - clock-controller@5090000 { - compatible = "qcom,sdm845-gpucc"; - reg = <0x5090000 0x9000>; - clocks = <&rpmhcc 0>, <&gcc 31>, <&gcc 32>; - clock-names = "xo", "gpll0_main", "gpll0_div"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; -... diff --git a/Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml new file mode 100644 index 000000000000..7d853c1a85e5 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,msm8998-gpucc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Graphics Clock & Reset Controller Binding for MSM8998 + +maintainers: + - Taniya Das + +description: | + Qualcomm graphics clock control module which supports the clocks, resets and + power domains on MSM8998. + + See also dt-bindings/clock/qcom,gpucc-msm8998.h. + +properties: + compatible: + const: qcom,msm8998-gpucc + + clocks: + items: + - description: Board XO source + - description: GPLL0 main branch source (gcc_gpu_gpll0_clk_src) + + clock-names: + items: + - const: xo + - const: gpll0 + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + #include + clock-controller@5065000 { + compatible = "qcom,msm8998-gpucc"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + reg = <0x05065000 0x9000>; + clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&gcc GPLL0_OUT_MAIN>; + clock-names = "xo", "gpll0"; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml new file mode 100644 index 000000000000..5785192cc4be --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sc7180-gpucc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Graphics Clock & Reset Controller Binding for SC7180 + +maintainers: + - Taniya Das + +description: | + Qualcomm graphics clock control module which supports the clocks, resets and + power domains on SC7180. + + See also dt-bindings/clock/qcom,gpucc-sc7180.h. + +properties: + compatible: + const: qcom,sc7180-gpucc + + clocks: + items: + - description: Board XO source + - description: GPLL0 main branch source + - description: GPLL0 div branch source + + clock-names: + items: + - const: bi_tcxo + - const: gcc_gpu_gpll0_clk_src + - const: gcc_gpu_gpll0_div_clk_src + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + #include + clock-controller@5090000 { + compatible = "qcom,sc7180-gpucc"; + reg = <0 0x05090000 0 0x9000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_GPU_GPLL0_CLK_SRC>, + <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; + clock-names = "bi_tcxo", + "gcc_gpu_gpll0_clk_src", + "gcc_gpu_gpll0_div_clk_src"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml new file mode 100644 index 000000000000..bac04f1c5d79 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sdm845-gpucc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Graphics Clock & Reset Controller Binding for SDM845 + +maintainers: + - Taniya Das + +description: | + Qualcomm graphics clock control module which supports the clocks, resets and + power domains on SDM845. + + See also dt-bindings/clock/qcom,gpucc-sdm845.h. + +properties: + compatible: + const: qcom,sdm845-gpucc + + clocks: + items: + - description: Board XO source + - description: GPLL0 main branch source + - description: GPLL0 div branch source + + clock-names: + items: + - const: bi_tcxo + - const: gcc_gpu_gpll0_clk_src + - const: gcc_gpu_gpll0_div_clk_src + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + #include + clock-controller@5090000 { + compatible = "qcom,sdm845-gpucc"; + reg = <0 0x05090000 0 0x9000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_GPU_GPLL0_CLK_SRC>, + <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; + clock-names = "bi_tcxo", + "gcc_gpu_gpll0_clk_src", + "gcc_gpu_gpll0_div_clk_src"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... From 39c064a0e6052276e9c7a6dc4ab153059496e2ab Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:42 -0800 Subject: [PATCH 537/658] clk: qcom: Get rid of the test clock for gpucc-sc7180 The test clock isn't in the bindings and apparently it's not used by anyone upstream. Remove it. Suggested-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.9.I6d5276b768f6593053be036a3e70cce298d39f0c@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gpucc-sc7180.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clk/qcom/gpucc-sc7180.c b/drivers/clk/qcom/gpucc-sc7180.c index ec61194cceaf..c88f00125775 100644 --- a/drivers/clk/qcom/gpucc-sc7180.c +++ b/drivers/clk/qcom/gpucc-sc7180.c @@ -60,7 +60,6 @@ static const struct parent_map gpu_cc_parent_map_0[] = { { P_GPU_CC_PLL1_OUT_MAIN, 3 }, { P_GPLL0_OUT_MAIN, 5 }, { P_GPLL0_OUT_MAIN_DIV, 6 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data gpu_cc_parent_data_0[] = { @@ -68,7 +67,6 @@ static const struct clk_parent_data gpu_cc_parent_data_0[] = { { .hw = &gpu_cc_pll1.clkr.hw }, { .fw_name = "gcc_gpu_gpll0_clk_src" }, { .fw_name = "gcc_gpu_gpll0_div_clk_src" }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, }; static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = { @@ -86,7 +84,7 @@ static struct clk_rcg2 gpu_cc_gmu_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "gpu_cc_gmu_clk_src", .parent_data = gpu_cc_parent_data_0, - .num_parents = 5, + .num_parents = 4, .flags = CLK_SET_RATE_PARENT, .ops = &clk_rcg2_shared_ops, }, From 72de7a19ef5b1aff90ab406a2238c72d4dcf6df4 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:43 -0800 Subject: [PATCH 538/658] clk: qcom: Use ARRAY_SIZE in gpucc-sc7180 for parent clocks It's nicer to use ARRAY_SIZE instead of hardcoding. Had we always been doing this it would have prevented a previous bug. See commit 74c31ff9c84a ("clk: qcom: gpu_cc_gmu_clk_src has 5 parents, not 6"). Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.10.I3bf44e33f4dc7ecca10a50dbccb7dc082894fa59@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gpucc-sc7180.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gpucc-sc7180.c b/drivers/clk/qcom/gpucc-sc7180.c index c88f00125775..a96c0b945de2 100644 --- a/drivers/clk/qcom/gpucc-sc7180.c +++ b/drivers/clk/qcom/gpucc-sc7180.c @@ -84,7 +84,7 @@ static struct clk_rcg2 gpu_cc_gmu_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "gpu_cc_gmu_clk_src", .parent_data = gpu_cc_parent_data_0, - .num_parents = 4, + .num_parents = ARRAY_SIZE(gpu_cc_parent_data_0), .flags = CLK_SET_RATE_PARENT, .ops = &clk_rcg2_shared_ops, }, From 8cff43d46cfcdeb11ee1b48ad86692785a351e0b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:44 -0800 Subject: [PATCH 539/658] dt-bindings: clock: Cleanup qcom,videocc bindings for sdm845/sc7180 This makes the qcom,videocc bindings match the recent changes to the dispcc and gpucc. 1. Switched to using "bi_tcxo" instead of "xo". 2. Adds a description for the XO clock. Not terribly important but nice if it cleanly matches its cousins. 3. Updates the example to use the symbolic name for the RPMH clock and also show that the real devices are currently using 2 address cells / size cells and fixes the spacing on the closing brace. 4. Split into 2 files. In this case they could probably share one file, but let's be consistent. Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.11.I27bbd90045f38cd3218c259526409d52a48efb35@changeid Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,sc7180-videocc.yaml | 63 +++++++++++++++++++ ...,videocc.yaml => qcom,sdm845-videocc.yaml} | 27 ++++---- 2 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7180-videocc.yaml rename Documentation/devicetree/bindings/clock/{qcom,videocc.yaml => qcom,sdm845-videocc.yaml} (61%) diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-videocc.yaml new file mode 100644 index 000000000000..31df901884ac --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-videocc.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sc7180-videocc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Video Clock & Reset Controller Binding for SC7180 + +maintainers: + - Taniya Das + +description: | + Qualcomm video clock control module which supports the clocks, resets and + power domains on SC7180. + + See also dt-bindings/clock/qcom,videocc-sc7180.h. + +properties: + compatible: + const: qcom,sc7180-videocc + + clocks: + items: + - description: Board XO source + + clock-names: + items: + - const: bi_tcxo + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +examples: + - | + #include + clock-controller@ab00000 { + compatible = "qcom,sc7180-videocc"; + reg = <0 0x0ab00000 0 0x10000>; + clocks = <&rpmhcc RPMH_CXO_CLK>; + clock-names = "bi_tcxo"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-videocc.yaml similarity index 61% rename from Documentation/devicetree/bindings/clock/qcom,videocc.yaml rename to Documentation/devicetree/bindings/clock/qcom,sdm845-videocc.yaml index 43cfc893a8d1..9d216c0f11d4 100644 --- a/Documentation/devicetree/bindings/clock/qcom,videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sdm845-videocc.yaml @@ -1,30 +1,31 @@ # SPDX-License-Identifier: GPL-2.0-only %YAML 1.2 --- -$id: http://devicetree.org/schemas/bindings/clock/qcom,videocc.yaml# +$id: http://devicetree.org/schemas/clock/qcom,sdm845-videocc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Video Clock & Reset Controller Binding +title: Qualcomm Video Clock & Reset Controller Binding for SDM845 maintainers: - Taniya Das description: | Qualcomm video clock control module which supports the clocks, resets and - power domains. + power domains on SDM845. + + See also dt-bindings/clock/qcom,videocc-sdm845.h. properties: compatible: - enum: - - qcom,sc7180-videocc - - qcom,sdm845-videocc + const: qcom,sdm845-videocc clocks: - maxItems: 1 + items: + - description: Board XO source clock-names: items: - - const: xo + - const: bi_tcxo '#clock-cells': const: 1 @@ -48,15 +49,15 @@ required: - '#power-domain-cells' examples: - # Example of VIDEOCC with clock node properties for SDM845: - | + #include clock-controller@ab00000 { compatible = "qcom,sdm845-videocc"; - reg = <0xab00000 0x10000>; - clocks = <&rpmhcc 0>; - clock-names = "xo"; + reg = <0 0x0ab00000 0 0x10000>; + clocks = <&rpmhcc RPMH_CXO_CLK>; + clock-names = "bi_tcxo"; #clock-cells = <1>; #reset-cells = <1>; #power-domain-cells = <1>; - }; + }; ... From abc8f93f33e75e22ab293648edbeb51157e9b6c9 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:45 -0800 Subject: [PATCH 540/658] clk: qcom: Get rid of the test clock for videocc-sc7180 The test clock isn't in the bindings and apparently it's not used by anyone upstream. Remove it. Suggested-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.12.Ifd19a2701a102ec9f04e61a09345198383a9e937@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/videocc-sc7180.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clk/qcom/videocc-sc7180.c b/drivers/clk/qcom/videocc-sc7180.c index 76add30024aa..653fc4e6bb6f 100644 --- a/drivers/clk/qcom/videocc-sc7180.c +++ b/drivers/clk/qcom/videocc-sc7180.c @@ -50,13 +50,11 @@ static struct clk_alpha_pll video_pll0 = { static const struct parent_map video_cc_parent_map_1[] = { { P_BI_TCXO, 0 }, { P_VIDEO_PLL0_OUT_MAIN, 1 }, - { P_CORE_BI_PLL_TEST_SE, 7 }, }; static const struct clk_parent_data video_cc_parent_data_1[] = { { .fw_name = "bi_tcxo" }, { .hw = &video_pll0.clkr.hw }, - { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, }; static const struct freq_tbl ftbl_video_cc_venus_clk_src[] = { @@ -78,7 +76,7 @@ static struct clk_rcg2 video_cc_venus_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "video_cc_venus_clk_src", .parent_data = video_cc_parent_data_1, - .num_parents = 3, + .num_parents = 2, .flags = CLK_SET_RATE_PARENT, .ops = &clk_rcg2_shared_ops, }, From 563528b831746792cb8a4ab50a757e648c626b3e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:46 -0800 Subject: [PATCH 541/658] clk: qcom: Use ARRAY_SIZE in videocc-sc7180 for parent clocks It's nicer to use ARRAY_SIZE instead of hardcoding. Had we always been doing this it would have prevented a previous bug. See commit 74c31ff9c84a ("clk: qcom: gpu_cc_gmu_clk_src has 5 parents, not 6"). Signed-off-by: Douglas Anderson Link: https://lkml.kernel.org/r/20200203103049.v4.13.If37e4b1b5553ac9db5ea51e84a6eec286cdf209e@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/videocc-sc7180.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/videocc-sc7180.c b/drivers/clk/qcom/videocc-sc7180.c index 653fc4e6bb6f..c363c3cc544e 100644 --- a/drivers/clk/qcom/videocc-sc7180.c +++ b/drivers/clk/qcom/videocc-sc7180.c @@ -76,7 +76,7 @@ static struct clk_rcg2 video_cc_venus_clk_src = { .clkr.hw.init = &(struct clk_init_data){ .name = "video_cc_venus_clk_src", .parent_data = video_cc_parent_data_1, - .num_parents = 2, + .num_parents = ARRAY_SIZE(video_cc_parent_data_1), .flags = CLK_SET_RATE_PARENT, .ops = &clk_rcg2_shared_ops, }, From 18ffa34b73fc4acd73fb2cd0750f09ff8433fa88 Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Mon, 3 Feb 2020 02:28:21 +0000 Subject: [PATCH 542/658] MAINTAINERS: Remove the Bard Liao from the MAINTAINERS of Realtek CODECs Remove the maintainer "Bard Liao" since he had quitted from Realtek. Signed-off-by: Oder Chiou Link: https://lore.kernel.org/r/3ae27357f98a406a958304386a1457bc@realtek.com Signed-off-by: Mark Brown --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bd5847e802de..5e5382e2fe21 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13934,7 +13934,6 @@ F: include/linux/platform_data/rtc-* F: tools/testing/selftests/rtc/ REALTEK AUDIO CODECS -M: Bard Liao M: Oder Chiou S: Maintained F: sound/soc/codecs/rt* From 42b716359beca10684195fd6e93a74ecd8ca8003 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 4 Feb 2020 11:12:41 +0000 Subject: [PATCH 543/658] ASoC: wcd934x: Add missing COMMON_CLK dependency Looks like some platforms are not yet using COMMON CLK. PowerPC allyesconfig failed with below error in next ld: sound/soc/codecs/wcd934x.o:(.toc+0x0): undefined reference to `of_clk_src_simple_get' ld: sound/soc/codecs/wcd934x.o: in function `.wcd934x_codec_probe': wcd934x.c:(.text.wcd934x_codec_probe+0x3d4): undefined reference to `.__clk_get_name' ld: wcd934x.c:(.text.wcd934x_codec_probe+0x438): undefined reference to `.clk_hw_register' ld: wcd934x.c:(.text.wcd934x_codec_probe+0x474): undefined reference to `.of_clk_add_provider' Add the missing COMMON_CLK dependency to fix this errors. Reported-by: Stephen Rothwell Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200204111241.6927-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index c9eb683bd1b0..286514865960 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1334,6 +1334,7 @@ config SND_SOC_WCD9335 config SND_SOC_WCD934X tristate "WCD9340/WCD9341 Codec" + depends on COMMON_CLK depends on MFD_WCD934X help The WCD9340/9341 is a audio codec IC Integrated in From c8ab422553c81a0eb070329c63725df1cd1425bc Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Tue, 4 Feb 2020 19:30:20 +0800 Subject: [PATCH 544/658] brd: check and limit max_part par In brd_init func, rd_nr num of brd_device are firstly allocated and add in brd_devices, then brd_devices are traversed to add each brd_device by calling add_disk func. When allocating brd_device, the disk->first_minor is set to i * max_part, if rd_nr * max_part is larger than MINORMASK, two different brd_device may have the same devt, then only one of them can be successfully added. when rmmod brd.ko, it will cause oops when calling brd_exit. Follow those steps: # modprobe brd rd_nr=3 rd_size=102400 max_part=1048576 # rmmod brd then, the oops will appear. Oops log: [ 726.613722] Call trace: [ 726.614175] kernfs_find_ns+0x24/0x130 [ 726.614852] kernfs_find_and_get_ns+0x44/0x68 [ 726.615749] sysfs_remove_group+0x38/0xb0 [ 726.616520] blk_trace_remove_sysfs+0x1c/0x28 [ 726.617320] blk_unregister_queue+0x98/0x100 [ 726.618105] del_gendisk+0x144/0x2b8 [ 726.618759] brd_exit+0x68/0x560 [brd] [ 726.619501] __arm64_sys_delete_module+0x19c/0x2a0 [ 726.620384] el0_svc_common+0x78/0x130 [ 726.621057] el0_svc_handler+0x38/0x78 [ 726.621738] el0_svc+0x8/0xc [ 726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260) Here, we add brd_check_and_reset_par func to check and limit max_part par. -- V5->V6: - remove useless code V4->V5:(suggested by Ming Lei) - make sure max_part is not larger than DISK_MAX_PARTS V3->V4:(suggested by Ming Lei) - remove useless change - add one limit of max_part V2->V3: (suggested by Ming Lei) - clear .minors when running out of consecutive minor space in brd_alloc - remove limit of rd_nr V1->V2: - add more checks in brd_check_par_valid as suggested by Ming Lei. Signed-off-by: Zhiqiang Liu Reviewed-by: Bob Liu Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/brd.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index a8730cc4db10..220c5e18aba0 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -473,6 +473,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) return kobj; } +static inline void brd_check_and_reset_par(void) +{ + if (unlikely(!max_part)) + max_part = 1; + + /* + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), + * otherwise, it is possiable to get same dev_t when adding partitions. + */ + if ((1U << MINORBITS) % max_part != 0) + max_part = 1UL << fls(max_part); + + if (max_part > DISK_MAX_PARTS) { + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", + DISK_MAX_PARTS, DISK_MAX_PARTS); + max_part = DISK_MAX_PARTS; + } +} + static int __init brd_init(void) { struct brd_device *brd, *next; @@ -496,8 +515,7 @@ static int __init brd_init(void) if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) return -EIO; - if (unlikely(!max_part)) - max_part = 1; + brd_check_and_reset_par(); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); From b9303bb1990950bd2f49ec1c85c015b5b6aac24b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 3 Feb 2020 22:53:06 +0100 Subject: [PATCH 545/658] x86/PCI: Define to_pci_sysdata() even when !CONFIG_PCI Recently, the to_pci_sysdata() helper was added inside the CONFIG_PCI guard, but it is used inside a CONFIG_NUMA guard, which does not require CONFIG_PCI. This breaks builds on !CONFIG_PCI machines. Make to_pci_sysdata() available in all configurations. Fixes: aad6aa0cd674 ("x86/PCI: Add to_pci_sysdata() helper") Link: https://lore.kernel.org/r/20200203215306.172000-1-Jason@zx2c4.com Reported-by: Randy Dunlap Signed-off-by: Jason A. Donenfeld Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Acked-by: Randy Dunlap # build-tested --- arch/x86/include/asm/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 40ac1330adb2..7ccb338507e3 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -33,13 +33,13 @@ extern int pci_routeirq; extern int noioapicquirk; extern int noioapicreroute; -#ifdef CONFIG_PCI - static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus) { return bus->sysdata; } +#ifdef CONFIG_PCI + #ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { From c6a228be7f440ad5c341b3039f6033d4a8f070a4 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 30 Jan 2020 13:29:05 -0500 Subject: [PATCH 546/658] drm/amd/display: Only enable cursor on pipes that need it [Why] In current code we're essentially drawing the cursor on every pipe that contains it. This only works when the planes have the same scaling for src to dest rect, otherwise we'll get "double cursor" where one cursor is incorrectly filtered and offset from the real position. [How] Without dedicated cursor planes on DCN we require at least one pipe that matches the scaling of the current timing. This is an optimization and workaround for the most common case where the top-most plane is not scaled but the bottom-most plane is scaled. Whenever a pipe has a parent pipe in the blending tree whose recout fully contains the current pipe we can disable the pipe. This only applies when the pipe is actually visible of course. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index f2127afb37b2..1008ac8a0f2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2911,6 +2911,33 @@ void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data) hubbub->funcs->update_dchub(hubbub, dh_data); } +static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) +{ + struct pipe_ctx *test_pipe; + const struct rect *r1 = &pipe_ctx->plane_res.scl_data.recout, *r2; + int r1_r = r1->x + r1->width, r1_b = r1->y + r1->height, r2_r, r2_b; + + /** + * Disable the cursor if there's another pipe above this with a + * plane that contains this pipe's viewport to prevent double cursor + * and incorrect scaling artifacts. + */ + for (test_pipe = pipe_ctx->top_pipe; test_pipe; + test_pipe = test_pipe->top_pipe) { + if (!test_pipe->plane_state->visible) + continue; + + r2 = &test_pipe->plane_res.scl_data.recout; + r2_r = r2->x + r2->width; + r2_b = r2->y + r2->height; + + if (r1->x >= r2->x && r1->y >= r2->y && r1_r <= r2_r && r1_b <= r2_b) + return true; + } + + return false; +} + void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) { struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; @@ -2956,6 +2983,9 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) pos_cpy.enable = false; + if (pos_cpy.enable && dcn10_can_pipe_disable_cursor(pipe_ctx)) + pos_cpy.enable = false; + // Swap axis and mirror horizontally if (param.rotation == ROTATION_ANGLE_90) { uint32_t temp_x = pos_cpy.x; From f38abc15d157b7b31fa7f651dc8bf92858c963f8 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 19:55:47 -0500 Subject: [PATCH 547/658] drm/amdkfd: Fix a bug in SDMA RLC queue counting under HWS mode The sdma_queue_count increment should be done before execute_queues_cpsch(), which calls pm_calc_rlib_size() where sdma_queue_count is used to calculate whether over_subscription is triggered. With the previous code, when a SDMA queue is created, compute_queue_count in pm_calc_rlib_size() is one more than the actual compute queue number, because the queue_count has been incremented while sdma_queue_count has not. This patch fixes that. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2870553a2ce0..80d22bf702e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1237,16 +1237,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(&q->list, &qpd->queues_list); qpd->queue_count++; + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; + if (q->properties.is_active) { dqm->queue_count++; retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. From 1cf8c930b378016846c88ef0f1444248033326ec Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 30 Jan 2020 16:46:38 +0800 Subject: [PATCH 548/658] drm/amd/powerplay: fix navi10 system intermittent reboot issue V2 This workaround is needed only for Navi10 12 Gbps SKUs. V2: added SMU firmware version guard Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 18 ++++++ .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/powerplay/inc/smu_types.h | 2 + .../drm/amd/powerplay/inc/smu_v11_0_ppsmc.h | 5 +- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 58 +++++++++++++++++++ drivers/gpu/drm/amd/powerplay/smu_internal.h | 3 + 6 files changed, 86 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 99469479e277..99ad4ddbe12f 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -21,6 +21,7 @@ */ #include +#include #include "pp_debug.h" #include "amdgpu.h" @@ -1137,6 +1138,23 @@ static int smu_smc_table_hw_init(struct smu_context *smu, ret = smu_system_features_control(smu, true); if (ret) return ret; + + if (adev->asic_type == CHIP_NAVI10) { + if ((adev->pdev->device == 0x731f && (adev->pdev->revision == 0xc2 || + adev->pdev->revision == 0xc3 || + adev->pdev->revision == 0xca || + adev->pdev->revision == 0xcb)) || + (adev->pdev->device == 0x66af && (adev->pdev->revision == 0xf3 || + adev->pdev->revision == 0xf4 || + adev->pdev->revision == 0xf5 || + adev->pdev->revision == 0xf6))) { + ret = smu_disable_umc_cdr_12gbps_workaround(smu); + if (ret) { + pr_err("Workaround failed to disable UMC CDR feature on 12Gbps SKU!\n"); + return ret; + } + } + } } if (adev->asic_type != CHIP_ARCTURUS) { ret = smu_notify_display_change(smu); diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index b0591a8dda41..3237eb1ff708 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -565,6 +565,7 @@ struct pptable_funcs { int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int (*override_pcie_parameters)(struct smu_context *smu); uint32_t (*get_pptable_power_limit)(struct smu_context *smu); + int (*disable_umc_cdr_12gbps_workaround)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h index d8c9b7f91fcc..a5b4df146713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h @@ -170,6 +170,8 @@ __SMU_DUMMY_MAP(SetSoftMinJpeg), \ __SMU_DUMMY_MAP(SetHardMinFclkByFreq), \ __SMU_DUMMY_MAP(DFCstateControl), \ + __SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \ + __SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \ #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h index 373861ddccd0..406bfd187ce8 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h @@ -120,7 +120,10 @@ #define PPSMC_MSG_GetVoltageByDpmOverdrive 0x45 #define PPSMC_MSG_BacoAudioD3PME 0x48 -#define PPSMC_Message_Count 0x49 +#define PPSMC_MSG_DALDisableDummyPstateChange 0x49 +#define PPSMC_MSG_DALEnableDummyPstateChange 0x4A + +#define PPSMC_Message_Count 0x4B typedef uint32_t PPSMC_Result; typedef uint32_t PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index f1b27fc20c19..e59cd2c74dc9 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -119,6 +119,8 @@ static struct smu_11_0_cmn2aisc_mapping navi10_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg), MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3), + MSG_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE,PPSMC_MSG_DALDisableDummyPstateChange), + MSG_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE, PPSMC_MSG_DALEnableDummyPstateChange), }; static struct smu_11_0_cmn2aisc_mapping navi10_clk_map[SMU_CLK_COUNT] = { @@ -2093,6 +2095,61 @@ static int navi10_run_btc(struct smu_context *smu) return ret; } +static int navi10_dummy_pstate_control(struct smu_context *smu, bool enable) +{ + int result = 0; + + if (!enable) + result = smu_send_smc_msg(smu, SMU_MSG_DAL_DISABLE_DUMMY_PSTATE_CHANGE); + else + result = smu_send_smc_msg(smu, SMU_MSG_DAL_ENABLE_DUMMY_PSTATE_CHANGE); + + return result; +} + +static int navi10_disable_umc_cdr_12gbps_workaround(struct smu_context *smu) +{ + uint32_t uclk_count, uclk_min, uclk_max; + uint32_t smu_version; + int ret = 0; + + ret = smu_get_smc_version(smu, NULL, &smu_version); + if (ret) + return ret; + + /* This workaround is available only for 42.50 or later SMC firmwares */ + if (smu_version < 0x2A3200) + return 0; + + ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_count); + if (ret) + return ret; + + ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)0, &uclk_min); + if (ret) + return ret; + + ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)(uclk_count - 1), &uclk_max); + if (ret) + return ret; + + /* Force UCLK out of the highest DPM */ + ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, uclk_min); + if (ret) + return ret; + + /* Revert the UCLK Hardmax */ + ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, uclk_max); + if (ret) + return ret; + + /* + * In this case, SMU already disabled dummy pstate during enablement + * of UCLK DPM, we have to re-enabled it. + * */ + return navi10_dummy_pstate_control(smu, true); +} + static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, .alloc_dpm_context = navi10_allocate_dpm_context, @@ -2187,6 +2244,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .od_edit_dpm_table = navi10_od_edit_dpm_table, .get_pptable_power_limit = navi10_get_pptable_power_limit, .run_btc = navi10_run_btc, + .disable_umc_cdr_12gbps_workaround = navi10_disable_umc_cdr_12gbps_workaround, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index 783319ec8bf9..7bd200ffcda8 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -207,4 +207,7 @@ int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg); #define smu_update_pcie_parameters(smu, pcie_gen_cap, pcie_width_cap) \ ((smu)->ppt_funcs->update_pcie_parameters ? (smu)->ppt_funcs->update_pcie_parameters((smu), (pcie_gen_cap), (pcie_width_cap)) : 0) +#define smu_disable_umc_cdr_12gbps_workaround(smu) \ + ((smu)->ppt_funcs->disable_umc_cdr_12gbps_workaround ? (smu)->ppt_funcs->disable_umc_cdr_12gbps_workaround((smu)) : 0) + #endif From 47eed65178e8fa626097648eb90cb6b31952efbc Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Fri, 31 Jan 2020 09:51:23 -0500 Subject: [PATCH 549/658] drm/amd/display: Fix a typo when computing dsc configuration [why] Remove a backslash symbol accidentally left in increase bpp function when computing mst dsc configuration. Signed-off-by: Mikita Lipski Reviewed-by: Zhan Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 96b391e4b3e7..5672f7765919 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -632,7 +632,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state, if (drm_dp_atomic_find_vcpi_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn,\ + vars[next_index].pbn, dm_mst_get_pbn_divider(dc_link)) < 0) return; if (!drm_dp_mst_atomic_check(state)) { From a30a8c2f884c2835383cc5cce134fafbcaa62c1f Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 6 Dec 2019 13:16:08 -0500 Subject: [PATCH 550/658] drm/amd/display: Fix HW/SW state mismatch [Why] When we disable a connector we don't explicitly remove it from the module so the display is still cached(SW) in the hdcp_module. SST: no issues because we can only have 1 display per link MST: We have x displays per link, now if we disable 1 we don't remove it from the module so the module has x display cached(SW). If we try to enable HDCP, psp verification will fail because we are reporting x displays while the HW only has x-1 display enabled [How] Check the callback for when we disable stream and call remove display. Signed-off-by: Bhawanpreet Lakha Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index ae329335dfcc..0acd3409dd6c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -135,6 +135,20 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, mutex_unlock(&hdcp_w->mutex); } +static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, + unsigned int link_index, + struct amdgpu_dm_connector *aconnector) +{ + struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; + + mutex_lock(&hdcp_w->mutex); + hdcp_w->aconnector = aconnector; + + mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); + + process_output(hdcp_w); + mutex_unlock(&hdcp_w->mutex); +} void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -303,6 +317,11 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) memset(link, 0, sizeof(*link)); display->index = aconnector->base.index; + + if (config->dpms_off) { + hdcp_remove_display(hdcp_work, link_index, aconnector); + return; + } display->state = MOD_HDCP_DISPLAY_ACTIVE; if (aconnector->dc_sink != NULL) From 45826e9c4e9e952db43053f4fbed58ec602a410f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 25 Jan 2020 11:51:41 -0500 Subject: [PATCH 551/658] drm/amdgpu/navi: fix index for OD MCLK You can only adjust the max mclk, not the min. Bug: https://gitlab.freedesktop.org/drm/amd/issues/1020 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.5.x --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index e59cd2c74dc9..7c3629f64419 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -845,7 +845,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) break; size += sprintf(buf + size, "OD_MCLK:\n"); - size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax); + size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax); break; case SMU_OD_VDDC_CURVE: if (!smu->od_enabled || !od_table || !od_settings) From ee23a518fdc2c1dd1aaaf3a2c7ffdd6c83b396ec Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 25 Jan 2020 11:27:06 -0500 Subject: [PATCH 552/658] drm/amdgpu/navi10: add OD_RANGE for navi overclocking So users can see the range of valid values. Bug: https://gitlab.freedesktop.org/drm/amd/issues/1020 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.5.x --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 59 ++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 7c3629f64419..a0a6dc2c190a 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -739,6 +739,15 @@ static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_tabl return od_table->cap[feature]; } +static void navi10_od_setting_get_range(struct smu_11_0_overdrive_table *od_table, + enum SMU_11_0_ODSETTING_ID setting, + uint32_t *min, uint32_t *max) +{ + if (min) + *min = od_table->min[setting]; + if (max) + *max = od_table->max[setting]; +} static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) @@ -757,6 +766,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, OverDriveTable_t *od_table = (OverDriveTable_t *)table_context->overdrive_table; struct smu_11_0_overdrive_table *od_settings = smu->od_settings; + uint32_t min_value, max_value; switch (clk_type) { case SMU_GFXCLK: @@ -869,6 +879,55 @@ static int navi10_print_clk_levels(struct smu_context *smu, } size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); } + break; + case SMU_OD_RANGE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + size = sprintf(buf, "%s:\n", "OD_RANGE"); + + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN, + &min_value, NULL); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX, + NULL, &max_value); + size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, + &min_value, &max_value); + size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", + min_value, max_value); + } + break; default: break; From 93c5f1f66c6ad4a3b180c1644f74e1b3b4be7864 Mon Sep 17 00:00:00 2001 From: Matt Coffin Date: Sat, 25 Jan 2020 13:04:05 -0500 Subject: [PATCH 553/658] drm/amdgpu/smu_v11_0: Correct behavior of restoring default tables (v2) Previously, the syfs functionality for restoring the default powerplay table was sourcing it's information from the currently-staged powerplay table. This patch adds a step to cache the first overdrive table that we see on boot, so that it can be used later to "restore" the powerplay table v2: sqaush my original with Matt's fix Bug: https://gitlab.freedesktop.org/drm/amd/issues/1020 Signed-off-by: Matt Coffin Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.5.x --- .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 7 +++++ drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 6 ++++ drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 28 ++++++------------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 3237eb1ff708..97b6714e83e6 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -273,6 +273,7 @@ struct smu_table_context uint8_t thermal_controller_type; void *overdrive_table; + void *boot_overdrive_table; }; struct smu_dpm_context { diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index a0a6dc2c190a..272e58510d0d 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -2065,6 +2065,13 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL return ret; od_table->UclkFmax = input[1]; break; + case PP_OD_RESTORE_DEFAULT_TABLE: + if (!(table_context->overdrive_table && table_context->boot_overdrive_table)) { + pr_err("Overdrive table was not initialized!\n"); + return -EINVAL; + } + memcpy(table_context->overdrive_table, table_context->boot_overdrive_table, sizeof(OverDriveTable_t)); + break; case PP_OD_COMMIT_DPM_TABLE: navi10_dump_od_table(od_table); ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 02f8c9cb89d9..0dc49479a7eb 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1882,6 +1882,12 @@ int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, pr_err("Failed to export overdrive table!\n"); return ret; } + if (!table_context->boot_overdrive_table) { + table_context->boot_overdrive_table = kmemdup(table_context->overdrive_table, overdrive_table_size, GFP_KERNEL); + if (!table_context->boot_overdrive_table) { + return -ENOMEM; + } + } } ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true); if (ret) { diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 38febd5ca4da..4ad8d6c14ee5 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -1706,22 +1706,11 @@ static int vega20_set_default_od_settings(struct smu_context *smu, struct smu_table_context *table_context = &smu->smu_table; int ret; + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); + if (ret) + return ret; + if (initialize) { - if (table_context->overdrive_table) - return -EINVAL; - - table_context->overdrive_table = kzalloc(sizeof(OverDriveTable_t), GFP_KERNEL); - - if (!table_context->overdrive_table) - return -ENOMEM; - - ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, - table_context->overdrive_table, false); - if (ret) { - pr_err("Failed to export over drive table!\n"); - return ret; - } - ret = vega20_set_default_od8_setttings(smu); if (ret) return ret; @@ -2778,12 +2767,11 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu, break; case PP_OD_RESTORE_DEFAULT_TABLE: - ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false); - if (ret) { - pr_err("Failed to export over drive table!\n"); - return ret; + if (!(table_context->overdrive_table && table_context->boot_overdrive_table)) { + pr_err("Overdrive table was not initialized!\n"); + return -EINVAL; } - + memcpy(table_context->overdrive_table, table_context->boot_overdrive_table, sizeof(OverDriveTable_t)); break; case PP_OD_COMMIT_DPM_TABLE: From 0531aa6eb38bfa9514609e2727558a051da7365f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 25 Jan 2020 13:30:45 -0500 Subject: [PATCH 554/658] drm/amdgpu: fetch default VDDC curve voltages (v2) Ask the SMU for the default VDDC curve voltage values. This properly reports the VDDC values in the OD interface. v2: only update if the original values are 0 Bug: https://gitlab.freedesktop.org/drm/amd/issues/1020 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.5.x --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 50 +++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 272e58510d0d..2c8c4cbce548 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -121,6 +121,8 @@ static struct smu_11_0_cmn2aisc_mapping navi10_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(ArmD3, PPSMC_MSG_ArmD3), MSG_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE,PPSMC_MSG_DALDisableDummyPstateChange), MSG_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE, PPSMC_MSG_DALEnableDummyPstateChange), + MSG_MAP(GetVoltageByDpm, PPSMC_MSG_GetVoltageByDpm), + MSG_MAP(GetVoltageByDpmOverdrive, PPSMC_MSG_GetVoltageByDpmOverdrive), }; static struct smu_11_0_cmn2aisc_mapping navi10_clk_map[SMU_CLK_COUNT] = { @@ -1934,6 +1936,28 @@ static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_tab return 0; } +static int navi10_overdrive_get_gfx_clk_base_voltage(struct smu_context *smu, + uint16_t *voltage, + uint32_t freq) +{ + uint32_t param = (freq & 0xFFFF) | (PPCLK_GFXCLK << 16); + uint32_t value = 0; + int ret; + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetVoltageByDpm, + param); + if (ret) { + pr_err("[GetBaseVoltage] failed to get GFXCLK AVFS voltage from SMU!"); + return ret; + } + + smu_read_smc_arg(smu, &value); + *voltage = (uint16_t)value; + + return 0; +} + static int navi10_setup_od_limits(struct smu_context *smu) { struct smu_11_0_overdrive_table *overdrive_table = NULL; struct smu_11_0_powerplay_table *powerplay_table = NULL; @@ -1960,16 +1984,40 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali if (ret) return ret; + od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; if (initialize) { ret = navi10_setup_od_limits(smu); if (ret) { pr_err("Failed to retrieve board OD limits\n"); return ret; } + if (od_table) { + if (!od_table->GfxclkVolt1) { + ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, + &od_table->GfxclkVolt1, + od_table->GfxclkFreq1); + if (ret) + od_table->GfxclkVolt1 = 0; + } + if (!od_table->GfxclkVolt2) { + ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, + &od_table->GfxclkVolt2, + od_table->GfxclkFreq2); + if (ret) + od_table->GfxclkVolt2 = 0; + } + + if (!od_table->GfxclkVolt3) { + ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, + &od_table->GfxclkVolt3, + od_table->GfxclkFreq3); + if (ret) + od_table->GfxclkVolt3 = 0; + } + } } - od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; if (od_table) { navi10_dump_od_table(od_table); } From c37243579d6c881c575dcfb54cf31c9ded88f946 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Jan 2020 14:39:45 -0500 Subject: [PATCH 555/658] drm/amdgpu/display: handle multiple numbers of fclks in dcn_calcs.c (v2) We might get different numbers of clocks from powerplay depending on what the OEM has populated. v2: add assert for at least one level Bug: https://gitlab.freedesktop.org/drm/amd/issues/963 Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index a27d84ca15a5..1a37550731de 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -1435,6 +1435,7 @@ void dcn_bw_update_from_pplib(struct dc *dc) struct dc_context *ctx = dc->ctx; struct dm_pp_clock_levels_with_voltage fclks = {0}, dcfclks = {0}; bool res; + unsigned vmin0p65_idx, vmid0p72_idx, vnom0p8_idx, vmax0p9_idx; /* TODO: This is not the proper way to obtain fabric_and_dram_bandwidth, should be min(fclk, memclk) */ res = dm_pp_get_clock_levels_by_type_with_voltage( @@ -1446,17 +1447,28 @@ void dcn_bw_update_from_pplib(struct dc *dc) res = verify_clock_values(&fclks); if (res) { - ASSERT(fclks.num_levels >= 3); - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 32 * (fclks.data[0].clocks_in_khz / 1000.0) / 1000.0; - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = dc->dcn_soc->number_of_channels * - (fclks.data[fclks.num_levels - (fclks.num_levels > 2 ? 3 : 2)].clocks_in_khz / 1000.0) - * ddr4_dram_factor_single_Channel / 1000.0; - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = dc->dcn_soc->number_of_channels * - (fclks.data[fclks.num_levels - 2].clocks_in_khz / 1000.0) - * ddr4_dram_factor_single_Channel / 1000.0; - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = dc->dcn_soc->number_of_channels * - (fclks.data[fclks.num_levels - 1].clocks_in_khz / 1000.0) - * ddr4_dram_factor_single_Channel / 1000.0; + ASSERT(fclks.num_levels); + + vmin0p65_idx = 0; + vmid0p72_idx = fclks.num_levels - + (fclks.num_levels > 2 ? 3 : (fclks.num_levels > 1 ? 2 : 1)); + vnom0p8_idx = fclks.num_levels - (fclks.num_levels > 1 ? 2 : 1); + vmax0p9_idx = fclks.num_levels - 1; + + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = + 32 * (fclks.data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0; + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = + dc->dcn_soc->number_of_channels * + (fclks.data[vmid0p72_idx].clocks_in_khz / 1000.0) + * ddr4_dram_factor_single_Channel / 1000.0; + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = + dc->dcn_soc->number_of_channels * + (fclks.data[vnom0p8_idx].clocks_in_khz / 1000.0) + * ddr4_dram_factor_single_Channel / 1000.0; + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = + dc->dcn_soc->number_of_channels * + (fclks.data[vmax0p9_idx].clocks_in_khz / 1000.0) + * ddr4_dram_factor_single_Channel / 1000.0; } else BREAK_TO_DEBUGGER(); From 4d0a72b66065dd7e274bad6aa450196d42fd8f84 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Jan 2020 13:19:51 -0500 Subject: [PATCH 556/658] drm/amdgpu/smu10: fix smu10_get_clock_by_type_with_latency Only send non-0 clocks to DC for validation. This mirrors what the windows driver does. Bug: https://gitlab.freedesktop.org/drm/amd/issues/963 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 4e8ab139bb3b..273126cfc37d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1026,12 +1026,15 @@ static int smu10_get_clock_by_type_with_latency(struct pp_hwmgr *hwmgr, clocks->num_levels = 0; for (i = 0; i < pclk_vol_table->count; i++) { - clocks->data[i].clocks_in_khz = pclk_vol_table->entries[i].clk * 10; - clocks->data[i].latency_in_us = latency_required ? - smu10_get_mem_latency(hwmgr, - pclk_vol_table->entries[i].clk) : - 0; - clocks->num_levels++; + if (pclk_vol_table->entries[i].clk) { + clocks->data[clocks->num_levels].clocks_in_khz = + pclk_vol_table->entries[i].clk * 10; + clocks->data[clocks->num_levels].latency_in_us = latency_required ? + smu10_get_mem_latency(hwmgr, + pclk_vol_table->entries[i].clk) : + 0; + clocks->num_levels++; + } } return 0; From 1064ad4aeef94f51ca230ac639a9e996fb7867a0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Jan 2020 12:42:57 -0500 Subject: [PATCH 557/658] drm/amdgpu/smu10: fix smu10_get_clock_by_type_with_voltage Cull out 0 clocks to avoid a warning in DC. Bug: https://gitlab.freedesktop.org/drm/amd/issues/963 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 273126cfc37d..689072a312a7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1080,9 +1080,11 @@ static int smu10_get_clock_by_type_with_voltage(struct pp_hwmgr *hwmgr, clocks->num_levels = 0; for (i = 0; i < pclk_vol_table->count; i++) { - clocks->data[i].clocks_in_khz = pclk_vol_table->entries[i].clk * 10; - clocks->data[i].voltage_in_mv = pclk_vol_table->entries[i].vol; - clocks->num_levels++; + if (pclk_vol_table->entries[i].clk) { + clocks->data[clocks->num_levels].clocks_in_khz = pclk_vol_table->entries[i].clk * 10; + clocks->data[clocks->num_levels].voltage_in_mv = pclk_vol_table->entries[i].vol; + clocks->num_levels++; + } } return 0; From 227823d2074da0c138d2abc0074b2dd281bbf923 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 22 Jan 2020 20:45:39 -0500 Subject: [PATCH 558/658] nfs: optimise readdir cache page invalidation When the directory is large and it's being modified by one client while another client is doing the 'ls -l' on the same directory then the cache page invalidation from nfs_force_use_readdirplus causes the reading client to keep restarting READDIRPLUS from cookie 0 which causes the 'ls -l' to take a very long time to complete, possibly never completing. Currently when nfs_force_use_readdirplus is called to switch from READDIR to READDIRPLUS, it invalidates all the cached pages of the directory. This cache page invalidation causes the next nfs_readdir to re-read the directory content from cookie 0. This patch is to optimise the cache invalidation in nfs_force_use_readdirplus by only truncating the cached pages from last page index accessed to the end the file. It also marks the inode to delay invalidating all the cached page of the directory until the next initial nfs_readdir of the next 'ls' instance. Signed-off-by: Dai Ngo Reviewed-by: Trond Myklebust [Anna - Fix conflicts with Trond's readdir patches] [Anna - Remove redundant call to nfs_zap_mapping()] [Anna - Replace d_inode(file_dentry(desc->file)) with file_inode(desc->file)] Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 9 +++++++-- include/linux/nfs_fs.h | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 88f6cf1ccf8c..1320288ff9ec 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -449,7 +449,8 @@ void nfs_force_use_readdirplus(struct inode *dir) if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && !list_empty(&nfsi->open_files)) { set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); - invalidate_mapping_pages(dir->i_mapping, 0, -1); + invalidate_mapping_pages(dir->i_mapping, + nfsi->page_index + 1, -1); } } @@ -720,6 +721,8 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) static int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { + struct inode *inode = file_inode(desc->file); + struct nfs_inode *nfsi = NFS_I(inode); int res; desc->page = get_cache_page(desc); @@ -731,8 +734,10 @@ int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) res = -EAGAIN; if (desc->page->mapping != NULL) { res = nfs_readdir_search_array(desc); - if (res == 0) + if (res == 0) { + nfsi->page_index = desc->page_index; return 0; + } } unlock_page(desc->page); error: diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c06b1fd130f3..a5f8f03ecd59 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -168,6 +168,9 @@ struct nfs_inode { struct rw_semaphore rmdir_sem; struct mutex commit_mutex; + /* track last access to cached pages */ + unsigned long page_index; + #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ From 123c23c6a7b7ecd2a3d6060bea1d94019f71fd66 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 3 Feb 2020 03:47:53 +0000 Subject: [PATCH 559/658] NFS: Fix memory leaks In _nfs42_proc_copy(), 'res->commit_res.verf' is allocated through kzalloc() if 'args->sync' is true. In the following code, if 'res->synchronous' is false, handle_async_copy() will be invoked. If an error occurs during the invocation, the following code will not be executed and the error will be returned . However, the allocated 'res->commit_res.verf' is not deallocated, leading to a memory leak. This is also true if the invocation of process_copy_commit() returns an error. To fix the above leaks, redirect the execution to the 'out' label if an error is encountered. Signed-off-by: Wenwen Wang Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 9637aad36bdc..e2ae54b35dfe 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -343,14 +343,14 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = handle_async_copy(res, dst_server, src_server, src, dst, &args->src_stateid, restart); if (status) - return status; + goto out; } if ((!res->synchronous || !args->sync) && res->write_res.verifier.committed != NFS_FILE_SYNC) { status = process_copy_commit(dst, pos_dst, res); if (status) - return status; + goto out; } truncate_pagecache_range(dst_inode, pos_dst, From 0b87a2b795d66be7b54779848ef0f3901c5e46fc Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 4 Feb 2020 14:38:09 +0200 Subject: [PATCH 560/658] nvmet: Fix error print message at nvmet_install_queue function Place the arguments in the correct order. Fixes: 1672ddb8d691 ("nvmet: Add install_queue callout") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f7297473d9eb..45ebc2e20458 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -136,7 +136,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", - qid, ret, ctrl->cntlid); + qid, ctrl->cntlid, ret); return ret; } } From 1a3f540d63152b8db0a12de508bfa03776217d83 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 4 Feb 2020 14:38:10 +0200 Subject: [PATCH 561/658] nvmet: Fix controller use after free After nvmet_install_queue() sets sq->ctrl calling to nvmet_sq_destroy() reduces the controller refcount. In case nvmet_install_queue() fails, calling to nvmet_ctrl_put() is done twice (at nvmet_sq_destroy and nvmet_execute_io_connect/nvmet_execute_admin_connect) instead of once for the queue which leads to use after free of the controller. Fix this by set NULL at sq->ctrl in case of a failure at nvmet_install_queue(). The bug leads to the following Call Trace: [65857.994862] refcount_t: underflow; use-after-free. [65858.108304] Workqueue: events nvmet_rdma_release_queue_work [nvmet_rdma] [65858.115557] RIP: 0010:refcount_warn_saturate+0xe5/0xf0 [65858.208141] Call Trace: [65858.211203] nvmet_sq_destroy+0xe1/0xf0 [nvmet] [65858.216383] nvmet_rdma_release_queue_work+0x37/0xf0 [nvmet_rdma] [65858.223117] process_one_work+0x167/0x370 [65858.227776] worker_thread+0x49/0x3e0 [65858.232089] kthread+0xf5/0x130 [65858.235895] ? max_active_store+0x80/0x80 [65858.240504] ? kthread_bind+0x10/0x10 [65858.244832] ret_from_fork+0x1f/0x30 [65858.249074] ---[ end trace f82d59250b54beb7 ]--- Fixes: bb1cc74790eb ("nvmet: implement valid sqhd values in completions") Fixes: 1672ddb8d691 ("nvmet: Add install_queue callout") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 45ebc2e20458..feef15c38ec9 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -109,6 +109,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 sqsize = le16_to_cpu(c->sqsize); struct nvmet_ctrl *old; + u16 ret; old = cmpxchg(&req->sq->ctrl, NULL, ctrl); if (old) { @@ -119,7 +120,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + goto err; } /* note: convert queue size from 0's-based value to 1's-based value */ @@ -132,16 +134,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) } if (ctrl->ops->install_queue) { - u16 ret = ctrl->ops->install_queue(req->sq); - + ret = ctrl->ops->install_queue(req->sq); if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", qid, ctrl->cntlid, ret); - return ret; + goto err; } } return 0; + +err: + req->sq->ctrl = NULL; + return ret; } static void nvmet_execute_admin_connect(struct nvmet_req *req) From c7e661a1c2ae98a4754db6a85fc686b4a89322ad Mon Sep 17 00:00:00 2001 From: Nikhil Mahale Date: Tue, 4 Feb 2020 15:57:46 +0530 Subject: [PATCH 562/658] ALSA: hda - Fix DP-MST support for NVIDIA codecs If dyn_pcm_assign is set, different jack objects are being created for pcm and pins. If dyn_pcm_assign is set, generic_hdmi_build_jack() calls into add_hdmi_jack_kctl() to create and track separate jack object for pcm. Like sync_eld_via_acomp(), hdmi_present_sense_via_verbs() also need to report status change of the pcm jack. Rename pin_idx_to_jack() to pin_idx_to_pcm_jack(). Update hdmi_present_sense_via_verbs() to report plug state of pcm jack object. Unlike sync_eld_via_acomp(), for !acomp drivers the pcm jack's plug state must be consistent with plug state of pin's jack. Fixes: 5398e94fb753 ("ALSA: hda - Add DP-MST support for NVIDIA codecs") Reported-and-tested-by: Martin Regner Signed-off-by: Nikhil Mahale Reviewed-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20200204102746.1356-1-nmahale@nvidia.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 98 +++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 33 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 7c006f9858c0..5119a9ae3d8a 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1550,6 +1550,34 @@ static bool update_eld(struct hda_codec *codec, return eld_changed; } +static struct snd_jack *pin_idx_to_pcm_jack(struct hda_codec *codec, + struct hdmi_spec_per_pin *per_pin) +{ + struct hdmi_spec *spec = codec->spec; + struct snd_jack *jack = NULL; + struct hda_jack_tbl *jack_tbl; + + /* if !dyn_pcm_assign, get jack from hda_jack_tbl + * in !dyn_pcm_assign case, spec->pcm_rec[].jack is not + * NULL even after snd_hda_jack_tbl_clear() is called to + * free snd_jack. This may cause access invalid memory + * when calling snd_jack_report + */ + if (per_pin->pcm_idx >= 0 && spec->dyn_pcm_assign) { + jack = spec->pcm_rec[per_pin->pcm_idx].jack; + } else if (!spec->dyn_pcm_assign) { + /* + * jack tbl doesn't support DP MST + * DP MST will use dyn_pcm_assign, + * so DP MST will never come here + */ + jack_tbl = snd_hda_jack_tbl_get_mst(codec, per_pin->pin_nid, + per_pin->dev_id); + if (jack_tbl) + jack = jack_tbl->jack; + } + return jack; +} /* update ELD and jack state via HD-audio verbs */ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, int repoll) @@ -1571,6 +1599,7 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, int present; bool ret; bool do_repoll = false; + struct snd_jack *pcm_jack = NULL; present = snd_hda_jack_pin_sense(codec, pin_nid, dev_id); @@ -1598,10 +1627,19 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, do_repoll = true; } - if (do_repoll) + if (do_repoll) { schedule_delayed_work(&per_pin->work, msecs_to_jiffies(300)); - else + } else { + /* + * pcm_idx >=0 before update_eld() means it is in monitor + * disconnected event. Jack must be fetched before + * update_eld(). + */ + pcm_jack = pin_idx_to_pcm_jack(codec, per_pin); update_eld(codec, per_pin, eld); + if (!pcm_jack) + pcm_jack = pin_idx_to_pcm_jack(codec, per_pin); + } ret = !repoll || !eld->monitor_present || eld->eld_valid; @@ -1610,40 +1648,34 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, jack->block_report = !ret; jack->pin_sense = (eld->monitor_present && eld->eld_valid) ? AC_PINSENSE_PRESENCE : 0; + + if (spec->dyn_pcm_assign && pcm_jack && !do_repoll) { + int state = 0; + + if (jack->pin_sense & AC_PINSENSE_PRESENCE) + state = SND_JACK_AVOUT; + snd_jack_report(pcm_jack, state); + } + + /* + * snd_hda_jack_pin_sense() call at the beginning of this + * function, updates jack->pins_sense and clears + * jack->jack_dirty, therefore snd_hda_jack_report_sync() will + * not override the jack->pin_sense. + * + * snd_hda_jack_report_sync() is superfluous for dyn_pcm_assign + * case. The jack->pin_sense update was already performed, and + * hda_jack->jack is NULL for dyn_pcm_assign. + * + * Don't call snd_hda_jack_report_sync() for + * dyn_pcm_assign. + */ + ret = ret && !spec->dyn_pcm_assign; } mutex_unlock(&per_pin->lock); return ret; } -static struct snd_jack *pin_idx_to_jack(struct hda_codec *codec, - struct hdmi_spec_per_pin *per_pin) -{ - struct hdmi_spec *spec = codec->spec; - struct snd_jack *jack = NULL; - struct hda_jack_tbl *jack_tbl; - - /* if !dyn_pcm_assign, get jack from hda_jack_tbl - * in !dyn_pcm_assign case, spec->pcm_rec[].jack is not - * NULL even after snd_hda_jack_tbl_clear() is called to - * free snd_jack. This may cause access invalid memory - * when calling snd_jack_report - */ - if (per_pin->pcm_idx >= 0 && spec->dyn_pcm_assign) - jack = spec->pcm_rec[per_pin->pcm_idx].jack; - else if (!spec->dyn_pcm_assign) { - /* - * jack tbl doesn't support DP MST - * DP MST will use dyn_pcm_assign, - * so DP MST will never come here - */ - jack_tbl = snd_hda_jack_tbl_get_mst(codec, per_pin->pin_nid, - per_pin->dev_id); - if (jack_tbl) - jack = jack_tbl->jack; - } - return jack; -} - /* update ELD and jack state via audio component */ static void sync_eld_via_acomp(struct hda_codec *codec, struct hdmi_spec_per_pin *per_pin) @@ -1677,10 +1709,10 @@ static void sync_eld_via_acomp(struct hda_codec *codec, /* pcm_idx >=0 before update_eld() means it is in monitor * disconnected event. Jack must be fetched before update_eld() */ - jack = pin_idx_to_jack(codec, per_pin); + jack = pin_idx_to_pcm_jack(codec, per_pin); changed = update_eld(codec, per_pin, eld); if (jack == NULL) - jack = pin_idx_to_jack(codec, per_pin); + jack = pin_idx_to_pcm_jack(codec, per_pin); if (changed && jack) snd_jack_report(jack, (eld->monitor_present && eld->eld_valid) ? From 0f5be6a4ff7b3f8bf3db15f904e3e76797a43d9a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 30 Jan 2020 10:29:34 -0800 Subject: [PATCH 563/658] nvmet: update AEN list and array at one place All async events are enqueued via nvmet_add_async_event() which updates the ctrl->async_event_cmds[] array and additionally an struct nvmet_async_event is added to the ctrl->async_events list. Under normal operations the nvmet_async_event_work() updates again the ctrl->async_event_cmds and removes the corresponding struct nvmet_async_event from the list again. Though nvmet_sq_destroy() could be called which calls nvmet_async_events_free() which only updates the ctrl->async_event_cmds[] array. Add new functions nvmet_async_events_process() and nvmet_async_events_free() to process async events, update an array and the list. When we destroy submission queue after clearing the aen present on the ctrl->async list we also loop over ctrl->async_event_cmds[] for any requests posted by the host for which we don't have the AEN in the ctrl->async_events list by calling nvmet_async_event_process() and nvmet_async_events_free(). Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner [chaitanya.kulkarni@wdc.com * Loop over and clear out outstanding requests * Update changelog ] Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 63 ++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 461987f669c5..576de773b4db 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -129,27 +129,8 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen) return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); } -static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) +static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) { - struct nvmet_req *req; - - while (1) { - mutex_lock(&ctrl->lock); - if (!ctrl->nr_async_event_cmds) { - mutex_unlock(&ctrl->lock); - return; - } - - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); - } -} - -static void nvmet_async_event_work(struct work_struct *work) -{ - struct nvmet_ctrl *ctrl = - container_of(work, struct nvmet_ctrl, async_event_work); struct nvmet_async_event *aen; struct nvmet_req *req; @@ -159,20 +140,43 @@ static void nvmet_async_event_work(struct work_struct *work) struct nvmet_async_event, entry); if (!aen || !ctrl->nr_async_event_cmds) { mutex_unlock(&ctrl->lock); - return; + break; } req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - nvmet_set_result(req, nvmet_async_event_result(aen)); + if (status == 0) + nvmet_set_result(req, nvmet_async_event_result(aen)); list_del(&aen->entry); kfree(aen); mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, 0); + nvmet_req_complete(req, status); } } +static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) +{ + struct nvmet_req *req; + + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds) { + req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; + mutex_unlock(&ctrl->lock); + nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); + mutex_lock(&ctrl->lock); + } + mutex_unlock(&ctrl->lock); +} + +static void nvmet_async_event_work(struct work_struct *work) +{ + struct nvmet_ctrl *ctrl = + container_of(work, struct nvmet_ctrl, async_event_work); + + nvmet_async_events_process(ctrl, 0); +} + void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page) { @@ -753,19 +757,24 @@ static void nvmet_confirm_sq(struct percpu_ref *ref) void nvmet_sq_destroy(struct nvmet_sq *sq) { + u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; + struct nvmet_ctrl *ctrl = sq->ctrl; + /* * If this is the admin queue, complete all AERs so that our * queue doesn't have outstanding requests on it. */ - if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) - nvmet_async_events_free(sq->ctrl); + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { + nvmet_async_events_process(ctrl, status); + nvmet_async_events_free(ctrl); + } percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); - if (sq->ctrl) { - nvmet_ctrl_put(sq->ctrl); + if (ctrl) { + nvmet_ctrl_put(ctrl); sq->ctrl = NULL; /* allows reusing the queue later */ } } From 924491f2e476f7234d722b24171a4daff61bbe13 Mon Sep 17 00:00:00 2001 From: Robert Milkowski Date: Tue, 28 Jan 2020 08:37:47 +0000 Subject: [PATCH 564/658] NFSv4: try lease recovery on NFS4ERR_EXPIRED Currently, if an nfs server returns NFS4ERR_EXPIRED to open(), we return EIO to applications without even trying to recover. Fixes: 272289a3df72 ("NFSv4: nfs4_do_handle_exception() handle revoke/expiry of a single stateid") Signed-off-by: Robert Milkowski Reviewed-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index df38db2eee3b..a4ace14756ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3199,6 +3199,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; From 7dc2993a9e51dd2eee955944efec65bef90265b7 Mon Sep 17 00:00:00 2001 From: Robert Milkowski Date: Thu, 30 Jan 2020 09:43:25 +0000 Subject: [PATCH 565/658] NFSv4.0: nfs4_do_fsinfo() should not do implicit lease renewals Currently, each time nfs4_do_fsinfo() is called it will do an implicit NFS4 lease renewal, which is not compliant with the NFS4 specification. This can result in a lease being expired by an NFS server. Commit 83ca7f5ab31f ("NFS: Avoid PUTROOTFH when managing leases") introduced implicit client lease renewal in nfs4_do_fsinfo(), which can result in the NFSv4.0 lease to expire on a server side, and servers returning NFS4ERR_EXPIRED or NFS4ERR_STALE_CLIENTID. This can easily be reproduced by frequently unmounting a sub-mount, then stat'ing it to get it mounted again, which will delay or even completely prevent client from sending RENEW operations if no other NFS operations are issued. Eventually nfs server will expire client's lease and return an error on file access or next RENEW. This can also happen when a sub-mount is automatically unmounted due to inactivity (after nfs_mountpoint_expiry_timeout), then it is mounted again via stat(). This can result in a short window during which client's lease will expire on a server but not on a client. This specific case was observed on production systems. This patch removes the implicit lease renewal from nfs4_do_fsinfo(). Fixes: 83ca7f5ab31f ("NFS: Avoid PUTROOTFH when managing leases") Signed-off-by: Robert Milkowski Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 4 +--- fs/nfs/nfs4proc.c | 12 ++++++++---- fs/nfs/nfs4renewd.c | 5 +---- fs/nfs/nfs4state.c | 4 +--- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c3e669dcee0e..8be1ba7c62bb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -447,9 +447,7 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); -extern void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed); +extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease); /* nfs4state.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a4ace14756ac..95d07a3dc5d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5053,16 +5053,13 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str struct nfs4_exception exception = { .interruptible = true, }; - unsigned long now = jiffies; int err; do { err = _nfs4_do_fsinfo(server, fhandle, fsinfo); trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); if (err == 0) { - nfs4_set_lease_period(server->nfs_client, - fsinfo->lease_time * HZ, - now); + nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ); break; } err = nfs4_handle_exception(server, err, &exception); @@ -6126,6 +6123,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .callback_data = &setclientid, .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; + unsigned long now = jiffies; int status; /* nfs_client_id4 */ @@ -6158,6 +6156,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } + + if (status == 0) + do_renew_lease(clp, now); out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); @@ -8245,6 +8246,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre struct rpc_task *task; struct nfs41_exchange_id_args *argp; struct nfs41_exchange_id_res *resp; + unsigned long now = jiffies; int status; task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL); @@ -8265,6 +8267,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; + do_renew_lease(clp, now); + clp->cl_clientid = resp->clientid; clp->cl_exchange_flags = resp->flags; clp->cl_seqid = resp->seqid; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6ea431b067dd..ff876dda7f06 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -138,15 +138,12 @@ nfs4_kill_renewd(struct nfs_client *clp) * * @clp: pointer to nfs_client * @lease: new value for lease period - * @lastrenewed: time at which lease was last renewed */ void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed) + unsigned long lease) { spin_lock(&clp->cl_lock); clp->cl_lease_time = lease; - clp->cl_last_renewal = lastrenewed; spin_unlock(&clp->cl_lock); /* Cap maximum reconnect timeout at 1/2 lease period */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 19b473bc560e..f7723d221945 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -92,17 +92,15 @@ static int nfs4_setup_state_renewal(struct nfs_client *clp) { int status; struct nfs_fsinfo fsinfo; - unsigned long now; if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { nfs4_schedule_state_renewal(clp); return 0; } - now = jiffies; status = nfs4_proc_get_lease_time(clp, &fsinfo); if (status == 0) { - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ); nfs4_schedule_state_renewal(clp); } From 9f198a2ac543eaaf47be275531ad5cbd50db3edf Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 09:10:47 +0300 Subject: [PATCH 566/658] help_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 25543a966c48..29eaa4544372 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -273,6 +273,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; From df069d80c8e38c19531c392322e9a16617475c44 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 4 Feb 2020 16:48:34 -0700 Subject: [PATCH 567/658] io_uring: spin for sq thread to idle on shutdown As part of io_uring shutdown, we cancel work that is pending and won't necessarily complete on its own. That includes requests like poll commands and timeouts. If we're using SQPOLL for kernel side submission and we shutdown the ring immediately after queueing such work, we can race with the sqthread doing the submission. This means we may miss cancelling some work, which results in the io_uring shutdown hanging forever. Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index edb00ae2619b..87f8655656b5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5070,7 +5070,8 @@ static int io_sq_thread(void *data) * reap events and wake us up. */ if (inflight || - (!time_after(jiffies, timeout) && ret != -EBUSY)) { + (!time_after(jiffies, timeout) && ret != -EBUSY && + !percpu_ref_is_dying(&ctx->refs))) { cond_resched(); continue; } @@ -6324,6 +6325,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) percpu_ref_kill(&ctx->refs); mutex_unlock(&ctx->uring_lock); + /* + * Wait for sq thread to idle, if we have one. It won't spin on new + * work after we've killed the ctx ref above. This is important to do + * before we cancel existing commands, as the thread could otherwise + * be queueing new work post that. If that's work we need to cancel, + * it could cause shutdown to hang. + */ + while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) + cpu_relax(); + io_kill_timeouts(ctx); io_poll_remove_all(ctx); From f06572ef476d368a239f0238ecf7b00b9cdbf5bf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 5 Feb 2020 02:08:31 +0100 Subject: [PATCH 568/658] cpuidle: Documentation: Clean up PM QoS description Clean up the language in one paragraph in the PM QoS description in Documentation/admin-guide/pm/cpuidle.rst. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpuidle.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index 311cd7cc2b75..6a06dc473dd6 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -632,16 +632,16 @@ class priority list and destroyed. If that happens, the priority list mechanism will be used, again, to determine the new effective value for the whole list and that value will become the new real constraint. -In turn, for each CPU there is only one resume latency PM QoS request -associated with the :file:`power/pm_qos_resume_latency_us` file under +In turn, for each CPU there is one resume latency PM QoS request associated with +the :file:`power/pm_qos_resume_latency_us` file under :file:`/sys/devices/system/cpu/cpu/` in ``sysfs`` and writing to it causes this single PM QoS request to be updated regardless of which user space process does that. In other words, this PM QoS request is shared by the entire user space, so access to the file associated with it needs to be arbitrated to avoid confusion. [Arguably, the only legitimate use of this mechanism in practice is to pin a process to the CPU in question and let it use the -``sysfs`` interface to control the resume latency constraint for it.] It -still only is a request, however. It is a member of a priority list used to +``sysfs`` interface to control the resume latency constraint for it.] It is +still only a request, however. It is an entry in a priority list used to determine the effective value to be set as the resume latency constraint for the CPU in question every time the list of requests is updated this way or another (there may be other requests coming from kernel code in that list). From 2faf852d1be8a4960d328492298da6448cca0279 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 4 Feb 2020 19:54:55 -0700 Subject: [PATCH 569/658] io_uring: cleanup fixed file data table references syzbot reports a use-after-free in io_ring_file_ref_switch() when it tries to switch back to percpu mode. When we put the final reference to the table by calling percpu_ref_kill_and_confirm(), we don't want the zero reference to queue async work for flushing the potentially queued up items. We currently do a few flush_work(), but they merely paper around the issue, since the work item may not have been queued yet depending on the when the percpu-ref callback gets run. Coming into the file unregister, we know we have the ring quiesced. io_ring_file_ref_switch() can check for whether or not the ref is dying or not, and not queue anything async at that point. Once the ref has been confirmed killed, flush any potential items manually. Reported-by: syzbot+7caeaea49c2c8a591e3d@syzkaller.appspotmail.com Fixes: 05f3fb3c5397 ("io_uring: avoid ring quiesce for fixed file set unregister and update") Signed-off-by: Jens Axboe --- fs/io_uring.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 87f8655656b5..deff11e84094 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -753,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_files_update *ip, unsigned nr_args); static int io_grab_files(struct io_kiocb *req); +static void io_ring_file_ref_flush(struct fixed_file_data *data); static struct kmem_cache *req_cachep; @@ -5261,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) if (!data) return -ENXIO; - /* protect against inflight atomic switch, which drops the ref */ - percpu_ref_get(&data->refs); - /* wait for existing switches */ - flush_work(&data->ref_work); percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill); - wait_for_completion(&data->done); - percpu_ref_put(&data->refs); - /* flush potential new switch */ flush_work(&data->ref_work); + wait_for_completion(&data->done); + io_ring_file_ref_flush(data); percpu_ref_exit(&data->refs); __io_sqe_files_unregister(ctx); @@ -5507,14 +5503,11 @@ struct io_file_put { struct completion *done; }; -static void io_ring_file_ref_switch(struct work_struct *work) +static void io_ring_file_ref_flush(struct fixed_file_data *data) { struct io_file_put *pfile, *tmp; - struct fixed_file_data *data; struct llist_node *node; - data = container_of(work, struct fixed_file_data, ref_work); - while ((node = llist_del_all(&data->put_llist)) != NULL) { llist_for_each_entry_safe(pfile, tmp, node, llist) { io_ring_file_put(data->ctx, pfile->file); @@ -5524,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work) kfree(pfile); } } +} +static void io_ring_file_ref_switch(struct work_struct *work) +{ + struct fixed_file_data *data; + + data = container_of(work, struct fixed_file_data, ref_work); + io_ring_file_ref_flush(data); percpu_ref_get(&data->refs); percpu_ref_switch_to_percpu(&data->refs); } @@ -5535,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) data = container_of(ref, struct fixed_file_data, refs); - /* we can't safely switch from inside this context, punt to wq */ - queue_work(system_wq, &data->ref_work); + /* + * We can't safely switch from inside this context, punt to wq. If + * the table ref is going away, the table is being unregistered. + * Don't queue up the async work for that case, the caller will + * handle it. + */ + if (!percpu_ref_is_dying(&data->refs)) + queue_work(system_wq, &data->ref_work); } static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, From 7b913a76a6cdda87e538df50df496624407373bf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Feb 2020 09:07:19 -0500 Subject: [PATCH 570/658] drm/amdgpu: update default voltage for boot od table for navi1x It needed to be updated as well so it will show the proper values if you reset to the defaults. Bug: https://gitlab.freedesktop.org/drm/amd/issues/1020 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 2c8c4cbce548..19a9846b730e 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -1977,7 +1977,7 @@ static int navi10_setup_od_limits(struct smu_context *smu) { } static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) { - OverDriveTable_t *od_table; + OverDriveTable_t *od_table, *boot_od_table; int ret = 0; ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); @@ -1985,6 +1985,7 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali return ret; od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; + boot_od_table = (OverDriveTable_t *)smu->smu_table.boot_overdrive_table; if (initialize) { ret = navi10_setup_od_limits(smu); if (ret) { @@ -1998,6 +1999,8 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali od_table->GfxclkFreq1); if (ret) od_table->GfxclkVolt1 = 0; + if (boot_od_table) + boot_od_table->GfxclkVolt1 = od_table->GfxclkVolt1; } if (!od_table->GfxclkVolt2) { @@ -2006,6 +2009,8 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali od_table->GfxclkFreq2); if (ret) od_table->GfxclkVolt2 = 0; + if (boot_od_table) + boot_od_table->GfxclkVolt2 = od_table->GfxclkVolt2; } if (!od_table->GfxclkVolt3) { @@ -2014,6 +2019,8 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali od_table->GfxclkFreq3); if (ret) od_table->GfxclkVolt3 = 0; + if (boot_od_table) + boot_od_table->GfxclkVolt3 = od_table->GfxclkVolt3; } } } From 58fe03d6dec908a1bec07eea7e94907af5c07eec Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 24 Jan 2020 14:10:46 -0500 Subject: [PATCH 571/658] drm/amd/dm/mst: Ignore payload update failures Disabling a display on MST can potentially happen after the entire MST topology has been removed, which means that we can't communicate with the topology at all in this scenario. Likewise, this also means that we can't properly update payloads on the topology and as such, it's a good idea to ignore payload update failures when disabling displays. Currently, amdgpu makes the mistake of halting the payload update process when any payload update failures occur, resulting in leaving DC's local copies of the payload tables out of date. This ends up causing problems with hotplugging MST topologies, and causes modesets on the second hotplug to fail like so: [drm] Failed to updateMST allocation table forpipe idx:1 ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1511 at drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link.c:2677 update_mst_stream_alloc_table+0x11e/0x130 [amdgpu] Modules linked in: cdc_ether usbnet fuse xt_conntrack nf_conntrack nf_defrag_ipv6 libcrc32c nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 nft_counter nft_compat nf_tables nfnetlink tun bridge stp llc sunrpc vfat fat wmi_bmof uvcvideo snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi videobuf2_vmalloc snd_hda_intel videobuf2_memops videobuf2_v4l2 snd_intel_dspcfg videobuf2_common crct10dif_pclmul snd_hda_codec videodev crc32_pclmul snd_hwdep snd_hda_core ghash_clmulni_intel snd_seq mc joydev pcspkr snd_seq_device snd_pcm sp5100_tco k10temp i2c_piix4 snd_timer thinkpad_acpi ledtrig_audio snd wmi soundcore video i2c_scmi acpi_cpufreq ip_tables amdgpu(O) rtsx_pci_sdmmc amd_iommu_v2 gpu_sched mmc_core i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec drm crc32c_intel serio_raw hid_multitouch r8152 mii nvme r8169 nvme_core rtsx_pci pinctrl_amd CPU: 5 PID: 1511 Comm: gnome-shell Tainted: G O 5.5.0-rc7Lyude-Test+ #4 Hardware name: LENOVO FA495SIT26/FA495SIT26, BIOS R12ET22W(0.22 ) 01/31/2019 RIP: 0010:update_mst_stream_alloc_table+0x11e/0x130 [amdgpu] Code: 28 00 00 00 75 2b 48 8d 65 e0 5b 41 5c 41 5d 41 5e 5d c3 0f b6 06 49 89 1c 24 41 88 44 24 08 0f b6 46 01 41 88 44 24 09 eb 93 <0f> 0b e9 2f ff ff ff e8 a6 82 a3 c2 66 0f 1f 44 00 00 0f 1f 44 00 RSP: 0018:ffffac428127f5b0 EFLAGS: 00010202 RAX: 0000000000000002 RBX: ffff8d1e166eee80 RCX: 0000000000000000 RDX: ffffac428127f668 RSI: ffff8d1e166eee80 RDI: ffffac428127f610 RBP: ffffac428127f640 R08: ffffffffc03d94a8 R09: 0000000000000000 R10: ffff8d1e24b02000 R11: ffffac428127f5b0 R12: ffff8d1e1b83d000 R13: ffff8d1e1bea0b08 R14: 0000000000000002 R15: 0000000000000002 FS: 00007fab23ffcd80(0000) GS:ffff8d1e28b40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f151f1711e8 CR3: 00000005997c0000 CR4: 00000000003406e0 Call Trace: ? mutex_lock+0xe/0x30 dc_link_allocate_mst_payload+0x9a/0x210 [amdgpu] ? dm_read_reg_func+0x39/0xb0 [amdgpu] ? core_link_enable_stream+0x656/0x730 [amdgpu] core_link_enable_stream+0x656/0x730 [amdgpu] dce110_apply_ctx_to_hw+0x58e/0x5d0 [amdgpu] ? dcn10_verify_allow_pstate_change_high+0x1d/0x280 [amdgpu] ? dcn10_wait_for_mpcc_disconnect+0x3c/0x130 [amdgpu] dc_commit_state+0x292/0x770 [amdgpu] ? add_timer+0x101/0x1f0 ? ttm_bo_put+0x1a1/0x2f0 [ttm] amdgpu_dm_atomic_commit_tail+0xb59/0x1ff0 [amdgpu] ? amdgpu_move_blit.constprop.0+0xb8/0x1f0 [amdgpu] ? amdgpu_bo_move+0x16d/0x2b0 [amdgpu] ? ttm_bo_handle_move_mem+0x118/0x570 [ttm] ? ttm_bo_validate+0x134/0x150 [ttm] ? dm_plane_helper_prepare_fb+0x1b9/0x2a0 [amdgpu] ? _cond_resched+0x15/0x30 ? wait_for_completion_timeout+0x38/0x160 ? _cond_resched+0x15/0x30 ? wait_for_completion_interruptible+0x33/0x190 commit_tail+0x94/0x130 [drm_kms_helper] drm_atomic_helper_commit+0x113/0x140 [drm_kms_helper] drm_atomic_helper_set_config+0x70/0xb0 [drm_kms_helper] drm_mode_setcrtc+0x194/0x6a0 [drm] ? _cond_resched+0x15/0x30 ? mutex_lock+0xe/0x30 ? drm_mode_getcrtc+0x180/0x180 [drm] drm_ioctl_kernel+0xaa/0xf0 [drm] drm_ioctl+0x208/0x390 [drm] ? drm_mode_getcrtc+0x180/0x180 [drm] amdgpu_drm_ioctl+0x49/0x80 [amdgpu] do_vfs_ioctl+0x458/0x6d0 ksys_ioctl+0x5e/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x55/0x1b0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fab2121f87b Code: 0f 1e fa 48 8b 05 0d 96 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d dd 95 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffd045f9068 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffd045f90a0 RCX: 00007fab2121f87b RDX: 00007ffd045f90a0 RSI: 00000000c06864a2 RDI: 000000000000000b RBP: 00007ffd045f90a0 R08: 0000000000000000 R09: 000055dbd2985d10 R10: 000055dbd2196280 R11: 0000000000000246 R12: 00000000c06864a2 R13: 000000000000000b R14: 0000000000000000 R15: 000055dbd2196280 ---[ end trace 6ea888c24d2059cd ]--- Note as well, I have only been able to reproduce this on setups with 2 MST displays. Changes since v1: * Don't return false when part 1 or part 2 of updating the payloads fails, we don't want to abort at any step of the process even if things fail Reviewed-by: Mikita Lipski Signed-off-by: Lyude Paul Acked-by: Harry Wentland Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 069b7a6f5597..318b474ff20e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -216,7 +216,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port); } - ret = drm_dp_update_payload_part1(mst_mgr); + /* It's OK for this to fail */ + drm_dp_update_payload_part1(mst_mgr); /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or * AUX message. The sequence is slot 1-63 allocated sequence for each @@ -225,9 +226,6 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( get_payload_table(aconnector, proposed_table); - if (ret) - return false; - return true; } @@ -285,7 +283,6 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; - int ret; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -299,10 +296,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (!mst_mgr->mst_state) return false; - ret = drm_dp_update_payload_part2(mst_mgr); - - if (ret) - return false; + /* It's OK for this to fail */ + drm_dp_update_payload_part2(mst_mgr); if (!enable) drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); From 50722f0bf631cfcbcce18dac6b3fb5c2b6432628 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jan 2020 18:48:43 -0800 Subject: [PATCH 572/658] xtensa: move fast exception handlers close to vectors On XIP kernels it makes sense to have exception vectors and fast exception handlers together (in a fast memory). In addition, with MTD XIP support both vectors and fast exception handlers must be outside of the FLASH. Add section .exception.text and move fast exception handlers to it. Put it together with vectors when vectors are outside of the .text. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/asmmacro.h | 2 ++ arch/xtensa/kernel/coprocessor.S | 5 ++--- arch/xtensa/kernel/entry.S | 18 ++++++++++++------ arch/xtensa/kernel/setup.c | 4 ++++ arch/xtensa/kernel/vectors.S | 3 ++- arch/xtensa/kernel/vmlinux.lds.S | 21 +++++++++++++++++---- 6 files changed, 39 insertions(+), 14 deletions(-) diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 71a7e846bc1f..bfc89e11f469 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -237,4 +237,6 @@ #error Unsupported Xtensa ABI #endif +#define __XTENSA_HANDLER .section ".exception.text", "ax" + #endif /* _XTENSA_ASMMACRO_H */ diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index c53ce6d8794f..c426b846beef 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -58,6 +58,8 @@ .endif; \ .long THREAD_XTREGS_CP##x + __XTENSA_HANDLER + SAVE_CP_REGS(0) SAVE_CP_REGS(1) SAVE_CP_REGS(2) @@ -76,7 +78,6 @@ LOAD_CP_REGS(6) LOAD_CP_REGS(7) - .section ".rodata", "a" .align 4 .Lsave_cp_regs_jump_table: SAVE_CP_REGS_TAB(0) @@ -98,8 +99,6 @@ LOAD_CP_REGS_TAB(6) LOAD_CP_REGS_TAB(7) - .previous - /* * coprocessor_flush(struct thread_info*, index) * a2 a3 diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index be897803834a..9d2e22d9f3c3 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -939,6 +939,9 @@ ENDPROC(unrecoverable_exception) /* -------------------------- FAST EXCEPTION HANDLERS ----------------------- */ + __XTENSA_HANDLER + .literal_position + /* * Fast-handler for alloca exceptions * @@ -1024,7 +1027,7 @@ ENDPROC(fast_alloca) ENTRY(fast_illegal_instruction_user) rsr a0, ps - bbsi.l a0, PS_WOE_BIT, user_exception + bbsi.l a0, PS_WOE_BIT, 1f s32i a3, a2, PT_AREG3 movi a3, PS_WOE_MASK or a0, a0, a3 @@ -1033,6 +1036,8 @@ ENTRY(fast_illegal_instruction_user) l32i a0, a2, PT_AREG0 rsr a2, depc rfe +1: + call0 user_exception ENDPROC(fast_illegal_instruction_user) #endif @@ -1071,7 +1076,7 @@ ENTRY(fast_syscall_user) _beqz a0, fast_syscall_spill_registers _beqi a0, __NR_xtensa, fast_syscall_xtensa - j user_exception + call0 user_exception ENDPROC(fast_syscall_user) @@ -1762,8 +1767,8 @@ ENTRY(fast_second_level_miss) rsr a2, ps bbsi.l a2, PS_UM_BIT, 1f - j _kernel_exception -1: j _user_exception + call0 _kernel_exception +1: call0 _user_exception ENDPROC(fast_second_level_miss) @@ -1859,13 +1864,14 @@ ENTRY(fast_store_prohibited) rsr a2, ps bbsi.l a2, PS_UM_BIT, 1f - j _kernel_exception -1: j _user_exception + call0 _kernel_exception +1: call0 _user_exception ENDPROC(fast_store_prohibited) #endif /* CONFIG_MMU */ + .text /* * System Calls. * diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 0f93b67c7a5a..fefbdce1db99 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -284,6 +284,8 @@ extern char _UserExceptionVector_text_start; extern char _UserExceptionVector_text_end; extern char _DoubleExceptionVector_text_start; extern char _DoubleExceptionVector_text_end; +extern char _exception_text_start; +extern char _exception_text_end; #if XCHAL_EXCM_LEVEL >= 2 extern char _Level2InterruptVector_text_start; extern char _Level2InterruptVector_text_end; @@ -363,6 +365,8 @@ void __init setup_arch(char **cmdline_p) mem_reserve(__pa(&_DoubleExceptionVector_text_start), __pa(&_DoubleExceptionVector_text_end)); + mem_reserve(__pa(&_exception_text_start), + __pa(&_exception_text_end)); #if XCHAL_EXCM_LEVEL >= 2 mem_reserve(__pa(&_Level2InterruptVector_text_start), __pa(&_Level2InterruptVector_text_end)); diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index 841503d3307c..95ad1e773991 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -43,6 +43,7 @@ */ #include +#include #include #include #include @@ -477,7 +478,6 @@ _DoubleExceptionVector_handle_exception: ENDPROC(_DoubleExceptionVector) - .text /* * Fixup handler for TLB miss in double exception handler for window owerflow. * We get here with windowbase set to the window that was being spilled and @@ -505,6 +505,7 @@ ENDPROC(_DoubleExceptionVector) * a3: exctable, original value in excsave1 */ + __XTENSA_HANDLER .literal_position ENTRY(window_overflow_restore_a0_fixup) diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 409c05cac15e..37a3205c404c 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -110,6 +110,8 @@ SECTIONS SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR) SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR) SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR) + + *(.exception.text) #endif IRQENTRY_TEXT @@ -190,6 +192,8 @@ SECTIONS .DoubleExceptionVector.text); RELOCATE_ENTRY(_DebugInterruptVector_text, .DebugInterruptVector.text); + RELOCATE_ENTRY(_exception_text, + .exception.text); #endif #ifdef CONFIG_XIP_KERNEL RELOCATE_ENTRY(_xip_data, .data); @@ -282,8 +286,7 @@ SECTIONS .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, .UserExceptionVector.text) - - . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; +#define LAST .DoubleExceptionVector.text #endif #if !defined(CONFIG_XIP_KERNEL) && defined(CONFIG_SMP) @@ -292,10 +295,20 @@ SECTIONS .SecondaryResetVector.text, RESET_VECTOR1_VADDR, .DoubleExceptionVector.text) - - . = LOADADDR(.SecondaryResetVector.text)+SIZEOF(.SecondaryResetVector.text); +#undef LAST +#define LAST .SecondaryResetVector.text #endif +#ifdef CONFIG_VECTORS_OFFSET + SECTION_VECTOR (_exception_text, + .exception.text, + , + LAST) +#undef LAST +#define LAST .exception.text + +#endif + . = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3; . = ALIGN(PAGE_SIZE); From 4f17664a4407004f71dc76de01c89fdb6efc0776 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jan 2020 21:25:03 -0800 Subject: [PATCH 573/658] xtensa: separate SMP and XIP support There's no real dependency between SMP and XIP, allow them to be selected together. Always define 2- and 4-argument SECTION_VECTOR macros, always use 4-argument macro for the secondary reset vector and always define relocation entry for it. Signed-off-by: Max Filippov --- arch/xtensa/kernel/vmlinux.lds.S | 68 ++++++++++++++++---------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 37a3205c404c..d9e547810b6c 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -54,8 +54,7 @@ jiffies = jiffies_64; * to description recorded in the corresponding RELOCATE_ENTRY. */ -#ifdef CONFIG_VECTORS_OFFSET -#define SECTION_VECTOR(sym, section, addr, prevsec) \ +#define SECTION_VECTOR4(sym, section, addr, prevsec) \ section addr : AT(((LOADADDR(prevsec) + SIZEOF(prevsec)) + 3) & ~ 3) \ { \ . = ALIGN(4); \ @@ -63,11 +62,10 @@ jiffies = jiffies_64; *(section) \ sym ## _end = ABSOLUTE(.); \ } -#else -#define SECTION_VECTOR(section, addr) \ + +#define SECTION_VECTOR2(section, addr) \ . = addr; \ *(section) -#endif /* * Mapping of input sections to output sections when linking. @@ -87,31 +85,31 @@ SECTIONS HEAD_TEXT #ifndef CONFIG_VECTORS_OFFSET - . = ALIGN(PAGE_SIZE); - _vecbase = .; + . = ALIGN(PAGE_SIZE); + _vecbase = .; - SECTION_VECTOR (.WindowVectors.text, WINDOW_VECTORS_VADDR) + SECTION_VECTOR2 (.WindowVectors.text, WINDOW_VECTORS_VADDR) #if XCHAL_EXCM_LEVEL >= 2 - SECTION_VECTOR (.Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR) + SECTION_VECTOR2 (.Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR) #endif #if XCHAL_EXCM_LEVEL >= 3 - SECTION_VECTOR (.Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR) + SECTION_VECTOR2 (.Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR) #endif #if XCHAL_EXCM_LEVEL >= 4 - SECTION_VECTOR (.Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR) + SECTION_VECTOR2 (.Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR) #endif #if XCHAL_EXCM_LEVEL >= 5 - SECTION_VECTOR (.Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR) + SECTION_VECTOR2 (.Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR) #endif #if XCHAL_EXCM_LEVEL >= 6 - SECTION_VECTOR (.Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR) + SECTION_VECTOR2 (.Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR) #endif - SECTION_VECTOR (.DebugInterruptVector.text, DEBUG_VECTOR_VADDR) - SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR) - SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR) - SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR) + SECTION_VECTOR2 (.DebugInterruptVector.text, DEBUG_VECTOR_VADDR) + SECTION_VECTOR2 (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR) + SECTION_VECTOR2 (.UserExceptionVector.text, USER_VECTOR_VADDR) + SECTION_VECTOR2 (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR) - *(.exception.text) + *(.exception.text) #endif IRQENTRY_TEXT @@ -198,11 +196,10 @@ SECTIONS #ifdef CONFIG_XIP_KERNEL RELOCATE_ENTRY(_xip_data, .data); RELOCATE_ENTRY(_xip_init_data, .init.data); -#else +#endif #if defined(CONFIG_SMP) RELOCATE_ENTRY(_SecondaryResetVector_text, .SecondaryResetVector.text); -#endif #endif __boot_reloc_table_end = ABSOLUTE(.) ; @@ -220,21 +217,24 @@ SECTIONS . = ALIGN(4); .dummy : { LONG(0) } +#undef LAST +#define LAST .dummy + #ifdef CONFIG_VECTORS_OFFSET /* The vectors are relocated to the real position at startup time */ - SECTION_VECTOR (_WindowVectors_text, + SECTION_VECTOR4 (_WindowVectors_text, .WindowVectors.text, WINDOW_VECTORS_VADDR, .dummy) - SECTION_VECTOR (_DebugInterruptVector_text, + SECTION_VECTOR4 (_DebugInterruptVector_text, .DebugInterruptVector.text, DEBUG_VECTOR_VADDR, .WindowVectors.text) #undef LAST #define LAST .DebugInterruptVector.text #if XCHAL_EXCM_LEVEL >= 2 - SECTION_VECTOR (_Level2InterruptVector_text, + SECTION_VECTOR4 (_Level2InterruptVector_text, .Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR, LAST) @@ -242,7 +242,7 @@ SECTIONS # define LAST .Level2InterruptVector.text #endif #if XCHAL_EXCM_LEVEL >= 3 - SECTION_VECTOR (_Level3InterruptVector_text, + SECTION_VECTOR4 (_Level3InterruptVector_text, .Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR, LAST) @@ -250,7 +250,7 @@ SECTIONS # define LAST .Level3InterruptVector.text #endif #if XCHAL_EXCM_LEVEL >= 4 - SECTION_VECTOR (_Level4InterruptVector_text, + SECTION_VECTOR4 (_Level4InterruptVector_text, .Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR, LAST) @@ -258,7 +258,7 @@ SECTIONS # define LAST .Level4InterruptVector.text #endif #if XCHAL_EXCM_LEVEL >= 5 - SECTION_VECTOR (_Level5InterruptVector_text, + SECTION_VECTOR4 (_Level5InterruptVector_text, .Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR, LAST) @@ -266,41 +266,41 @@ SECTIONS # define LAST .Level5InterruptVector.text #endif #if XCHAL_EXCM_LEVEL >= 6 - SECTION_VECTOR (_Level6InterruptVector_text, + SECTION_VECTOR4 (_Level6InterruptVector_text, .Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR, LAST) # undef LAST # define LAST .Level6InterruptVector.text #endif - SECTION_VECTOR (_KernelExceptionVector_text, + SECTION_VECTOR4 (_KernelExceptionVector_text, .KernelExceptionVector.text, KERNEL_VECTOR_VADDR, LAST) #undef LAST - SECTION_VECTOR (_UserExceptionVector_text, + SECTION_VECTOR4 (_UserExceptionVector_text, .UserExceptionVector.text, USER_VECTOR_VADDR, .KernelExceptionVector.text) - SECTION_VECTOR (_DoubleExceptionVector_text, + SECTION_VECTOR4 (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, .UserExceptionVector.text) #define LAST .DoubleExceptionVector.text #endif -#if !defined(CONFIG_XIP_KERNEL) && defined(CONFIG_SMP) +#if defined(CONFIG_SMP) - SECTION_VECTOR (_SecondaryResetVector_text, + SECTION_VECTOR4 (_SecondaryResetVector_text, .SecondaryResetVector.text, RESET_VECTOR1_VADDR, - .DoubleExceptionVector.text) + LAST) #undef LAST #define LAST .SecondaryResetVector.text #endif #ifdef CONFIG_VECTORS_OFFSET - SECTION_VECTOR (_exception_text, + SECTION_VECTOR4 (_exception_text, .exception.text, , LAST) From 5e4417f921238b5acf101bfcd59a7b3463fb2dbd Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jan 2020 20:11:24 -0800 Subject: [PATCH 574/658] xtensa: reorganize vectors placement Allow vectors to be either merged into the kernel .text or put at a fixed virtual address independently of XIP option. Drop option that puts vectors at a fixed offset from the kernel text. Add choice to Kconfig. Vectors at fixed virtual address may be useful for XIP-aware MTD support and for noMMU configurations with available IRAM. Configurations without VECBASE register must put their vectors at specific locations regardless of the selected option. All other configurations should happily use merged vectors. Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 40 ++++++++++++++++++++----- arch/xtensa/configs/smp_lx200_defconfig | 1 - arch/xtensa/configs/virt_defconfig | 1 - arch/xtensa/include/asm/vectors.h | 4 +-- arch/xtensa/kernel/setup.c | 4 +-- arch/xtensa/kernel/vmlinux.lds.S | 21 ++++++++----- 6 files changed, 51 insertions(+), 20 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 06e6161797fa..133385d13c02 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -572,15 +572,41 @@ config KERNEL_LOAD_ADDRESS If unsure, leave the default value here. -config VECTORS_OFFSET - hex "Kernel vectors offset" - default 0x00003000 - depends on !XIP_KERNEL +choice + prompt "Relocatable vectors location" + default XTENSA_VECTORS_IN_TEXT help - This is the offset of the kernel image from the relocatable vectors - base. + Choose whether relocatable vectors are merged into the kernel .text + or placed separately at runtime. This option does not affect + configurations without VECBASE register where vectors are always + placed at their hardware-defined locations. - If unsure, leave the default value here. +config XTENSA_VECTORS_IN_TEXT + bool "Merge relocatable vectors into kernel text" + depends on !MTD_XIP + help + This option puts relocatable vectors into the kernel .text section + with proper alignment. + This is a safe choice for most configurations. + +config XTENSA_VECTORS_SEPARATE + bool "Put relocatable vectors at fixed address" + help + This option puts relocatable vectors at specific virtual address. + Vectors are merged with the .init data in the kernel image and + are copied into their designated location during kernel startup. + Use it to put vectors into IRAM or out of FLASH on kernels with + XIP-aware MTD support. + +endchoice + +config VECTORS_ADDR + hex "Kernel vectors virtual address" + default 0x00000000 + depends on XTENSA_VECTORS_SEPARATE + help + This is the virtual address of the (relocatable) vectors base. + It must be within KSEG if MMU is used. config XIP_DATA_ADDR hex "XIP kernel data virtual address" diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 8b3bc92a079c..4f1c7998b030 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -30,7 +30,6 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set -CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0" diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig index 4fddd8512350..6d1387dfa96f 100644 --- a/arch/xtensa/configs/virt_defconfig +++ b/arch/xtensa/configs/virt_defconfig @@ -19,7 +19,6 @@ CONFIG_KALLSYMS_ALL=y CONFIG_PERF_EVENTS=y CONFIG_XTENSA_VARIANT_DC233C=y CONFIG_XTENSA_UNALIGNED_USER=y -CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_KSEG_512M=y CONFIG_HIGHMEM=y CONFIG_CMDLINE_BOOL=y diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index 140f30762cf9..704286c35640 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -34,8 +34,8 @@ #endif #define RESET_VECTOR1_VADDR (XCHAL_RESET_VECTOR1_VADDR) -#ifdef CONFIG_VECTORS_OFFSET -#define VECBASE_VADDR (KERNELOFFSET - CONFIG_VECTORS_OFFSET) +#ifdef CONFIG_VECTORS_ADDR +#define VECBASE_VADDR (CONFIG_VECTORS_ADDR) #else #define VECBASE_VADDR _vecbase #endif diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index fefbdce1db99..1e9b9e7d9583 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -349,7 +349,7 @@ void __init setup_arch(char **cmdline_p) mem_reserve(__pa(_xip_start), __pa(_xip_end)); #endif -#ifdef CONFIG_VECTORS_OFFSET +#ifdef CONFIG_VECTORS_ADDR mem_reserve(__pa(&_WindowVectors_text_start), __pa(&_WindowVectors_text_end)); @@ -388,7 +388,7 @@ void __init setup_arch(char **cmdline_p) __pa(&_Level6InterruptVector_text_end)); #endif -#endif /* CONFIG_VECTORS_OFFSET */ +#endif /* CONFIG_VECTORS_ADDR */ #ifdef CONFIG_SMP mem_reserve(__pa(&_SecondaryResetVector_text_start), diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index d9e547810b6c..d23a6e38f062 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -47,9 +47,15 @@ jiffies = jiffies_64; LONG(sym ## _end); \ LONG(LOADADDR(section)) +#if !defined(CONFIG_VECTORS_ADDR) && XCHAL_HAVE_VECBASE +#define MERGED_VECTORS 1 +#else +#define MERGED_VECTORS 0 +#endif + /* - * Macro to define a section for a vector. When CONFIG_VECTORS_OFFSET is - * defined code for every vector is located with other init data. At startup + * Macro to define a section for a vector. When MERGED_VECTORS is 0 + * code for every vector is located with other init data. At startup * time head.S copies code for every vector to its final position according * to description recorded in the corresponding RELOCATE_ENTRY. */ @@ -84,7 +90,7 @@ SECTIONS /* The HEAD_TEXT section must be the first section! */ HEAD_TEXT -#ifndef CONFIG_VECTORS_OFFSET +#if MERGED_VECTORS . = ALIGN(PAGE_SIZE); _vecbase = .; @@ -159,7 +165,7 @@ SECTIONS . = ALIGN(16); __boot_reloc_table_start = ABSOLUTE(.); -#ifdef CONFIG_VECTORS_OFFSET +#if !MERGED_VECTORS RELOCATE_ENTRY(_WindowVectors_text, .WindowVectors.text); #if XCHAL_EXCM_LEVEL >= 2 @@ -220,7 +226,7 @@ SECTIONS #undef LAST #define LAST .dummy -#ifdef CONFIG_VECTORS_OFFSET +#if !MERGED_VECTORS /* The vectors are relocated to the real position at startup time */ SECTION_VECTOR4 (_WindowVectors_text, @@ -299,7 +305,7 @@ SECTIONS #define LAST .SecondaryResetVector.text #endif -#ifdef CONFIG_VECTORS_OFFSET +#if !MERGED_VECTORS SECTION_VECTOR4 (_exception_text, .exception.text, , @@ -310,6 +316,7 @@ SECTIONS #endif . = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3; + .dummy1 : AT(ADDR(.dummy1)) { LONG(0) } . = ALIGN(PAGE_SIZE); #ifndef CONFIG_XIP_KERNEL @@ -327,7 +334,7 @@ SECTIONS #undef LOAD_OFFSET #define LOAD_OFFSET \ - (CONFIG_XIP_DATA_ADDR - (LOADADDR(.dummy) + SIZEOF(.dummy) + 3) & ~ 3) + (CONFIG_XIP_DATA_ADDR - (LOADADDR(.dummy1) + SIZEOF(.dummy1) + 3) & ~ 3) _xip_data_start = .; _sdata = .; From c74c0fd2282e0e3ce891cb571f325b9412cbaa3f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 4 Feb 2020 12:51:21 -0800 Subject: [PATCH 575/658] xtensa: ISS: improve simcall assembly Drop redundant result moving from inline assembly, use a1 and b1 values as return value and errno value respectively. Signed-off-by: Max Filippov --- arch/xtensa/platforms/iss/include/platform/simcall.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/xtensa/platforms/iss/include/platform/simcall.h b/arch/xtensa/platforms/iss/include/platform/simcall.h index 2ba45858e50a..111bcc5d0dc0 100644 --- a/arch/xtensa/platforms/iss/include/platform/simcall.h +++ b/arch/xtensa/platforms/iss/include/platform/simcall.h @@ -66,19 +66,17 @@ static int errno; static inline int __simc(int a, int b, int c, int d) { - int ret; register int a1 asm("a2") = a; register int b1 asm("a3") = b; register int c1 asm("a4") = c; register int d1 asm("a5") = d; __asm__ __volatile__ ( "simcall\n" - "mov %0, a2\n" - "mov %1, a3\n" - : "=a" (ret), "=a" (errno), "+r"(a1), "+r"(b1) + : "+r"(a1), "+r"(b1) : "r"(c1), "r"(d1) : "memory"); - return ret; + errno = b1; + return a1; } static inline int simc_exit(int exit_code) From 7495e0926fdf302cb9e62a49f7c22198815624cd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Feb 2020 07:33:53 -0500 Subject: [PATCH 576/658] bootconfig: Only load bootconfig if "bootconfig" is on the kernel cmdline As the bootconfig is appended to the initrd it is not as easy to modify as the kernel command line. If there's some issue with the kernel, and the developer wants to boot a pristine kernel, it should not be needed to modify the initrd to remove the bootconfig for a single boot. As bootconfig is silently added (if the admin does not know where to look they may not know it's being loaded). It should be explicitly added to the kernel cmdline. The loading of the bootconfig is only done if "bootconfig" is on the kernel command line. This will let admins know that the kernel command line is extended. Note, after adding printk()s for when the size is too great or the checksum is wrong, exposed that the current method always looked for the boot config, and if this size and checksum matched, it would parse it (as if either is wrong a printk has been added to show this). It's better to only check this if the boot config is asked to be looked for. Link: https://lore.kernel.org/r/CAHk-=wjfjO+h6bQzrTf=YCZA53Y3EDyAs3Z4gEsT7icA3u_Psw@mail.gmail.com Acked-by: Masami Hiramatsu Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 2 ++ .../admin-guide/kernel-parameters.txt | 6 ++++ init/main.c | 28 ++++++++++++++----- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 4d617693c0c8..b342a6796392 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -123,6 +123,8 @@ To remove the config from the image, you can use -d option as below:: # tools/bootconfig/bootconfig -d /boot/initrd.img-X.Y.Z +Then add "bootconfig" on the normal kernel command line to tell the +kernel to look for the bootconfig at the end of the initrd file. Config File Limitation ====================== diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ade4e6ec23e0..b48c70ba9841 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -437,6 +437,12 @@ no delay (0). Format: integer + bootconfig [KNL] + Extended command line options can be added to an initrd + and this will cause the kernel to look for it. + + See Documentation/admin-guide/bootconfig.rst + bert_disable [ACPI] Disable BERT OS support on buggy BIOSes. diff --git a/init/main.c b/init/main.c index dd7da62d99a5..f174a59d3903 100644 --- a/init/main.c +++ b/init/main.c @@ -336,28 +336,39 @@ u32 boot_config_checksum(unsigned char *p, u32 size) return ret; } -static void __init setup_boot_config(void) +static void __init setup_boot_config(const char *cmdline) { u32 size, csum; char *data, *copy; + const char *p; u32 *hdr; - if (!initrd_end) + p = strstr(cmdline, "bootconfig"); + if (!p || (p != cmdline && !isspace(*(p-1))) || + (p[10] && !isspace(p[10]))) return; + if (!initrd_end) + goto not_found; + hdr = (u32 *)(initrd_end - 8); size = hdr[0]; csum = hdr[1]; - if (size >= XBC_DATA_MAX) + if (size >= XBC_DATA_MAX) { + pr_err("bootconfig size %d greater than max size %d\n", + size, XBC_DATA_MAX); return; + } data = ((void *)hdr) - size; if ((unsigned long)data < initrd_start) - return; + goto not_found; - if (boot_config_checksum((unsigned char *)data, size) != csum) + if (boot_config_checksum((unsigned char *)data, size) != csum) { + pr_err("bootconfig checksum failed\n"); return; + } copy = memblock_alloc(size + 1, SMP_CACHE_BYTES); if (!copy) { @@ -377,9 +388,12 @@ static void __init setup_boot_config(void) /* Also, "init." keys are init arguments */ extra_init_args = xbc_make_cmdline("init"); } + return; +not_found: + pr_err("'bootconfig' found on command line, but no bootconfig found\n"); } #else -#define setup_boot_config() do { } while (0) +#define setup_boot_config(cmdline) do { } while (0) #endif /* Change NUL term back to "=", to make "param" the whole string. */ @@ -760,7 +774,7 @@ asmlinkage __visible void __init start_kernel(void) pr_notice("%s", linux_banner); early_security_init(); setup_arch(&command_line); - setup_boot_config(); + setup_boot_config(command_line); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); From 13426feaf46c48fcddb591e89d35120fcc90527f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Feb 2020 14:18:57 +0100 Subject: [PATCH 577/658] ASoC: wcd934x: Add missing COMMON_CLK dependency to SND_SOC_ALL_CODECS Just adding a dependency on COMMON_CLK to SND_SOC_WCD934X is not sufficient, as enabling SND_SOC_ALL_CODECS will still select it, breaking the build later: WARNING: unmet direct dependencies detected for SND_SOC_WCD934X Depends on [n]: SOUND [=m] && !UML && SND [=m] && SND_SOC [=m] && COMMON_CLK [=n] && MFD_WCD934X [=m] Selected by [m]: - SND_SOC_ALL_CODECS [=m] && SOUND [=m] && !UML && SND [=m] && SND_SOC [=m] && COMPILE_TEST [=y] && MFD_WCD934X [=m] ... ERROR: "of_clk_add_provider" [sound/soc/codecs/snd-soc-wcd934x.ko] undefined! ERROR: "of_clk_src_simple_get" [sound/soc/codecs/snd-soc-wcd934x.ko] undefined! ERROR: "clk_hw_register" [sound/soc/codecs/snd-soc-wcd934x.ko] undefined! ERROR: "__clk_get_name" [sound/soc/codecs/snd-soc-wcd934x.ko] undefined! Fix this by adding the missing dependency to SND_SOC_ALL_CODECS Fixes: 42b716359beca106 ("ASoC: wcd934x: Add missing COMMON_CLK dependency") Signed-off-by: Geert Uytterhoeven Tested-by: Stephen Rothwell Link: https://lore.kernel.org/r/20200204131857.7634-1-geert@linux-m68k.org Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 286514865960..7e90f5d83097 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -214,7 +214,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_UDA134X select SND_SOC_UDA1380 if I2C select SND_SOC_WCD9335 if SLIMBUS - select SND_SOC_WCD934X if MFD_WCD934X + select SND_SOC_WCD934X if MFD_WCD934X && COMMON_CLK select SND_SOC_WL1273 if MFD_WL1273_CORE select SND_SOC_WM0010 if SPI_MASTER select SND_SOC_WM1250_EV1 if I2C From b26a695a1d78cc415fe26d74d0463f5d887980de Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:04 -0600 Subject: [PATCH 578/658] kvm: lapic: Introduce APICv update helper function Re-factor code into a helper function for setting lapic parameters when activate/deactivate APICv, and export the function for subsequent usage. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 22 +++++++++++++++++----- arch/x86/kvm/lapic.h | 1 + 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cce1e6b204c8..eafc631d305c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2187,6 +2187,21 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) pr_warn_once("APIC base relocation is unsupported by KVM"); } +void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (vcpu->arch.apicv_active) { + /* irr_pending is always true when apicv is activated. */ + apic->irr_pending = true; + apic->isr_count = 1; + } else { + apic->irr_pending = (apic_search_irr(apic) != -1); + apic->isr_count = count_vectors(apic->regs + APIC_ISR); + } +} +EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); + void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -2229,8 +2244,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } - apic->irr_pending = vcpu->arch.apicv_active; - apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; + kvm_apic_update_apicv(vcpu); apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -2487,9 +2501,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); start_apic_timer(apic); - apic->irr_pending = true; - apic->isr_count = vcpu->arch.apicv_active ? - 1 : count_vectors(apic->regs + APIC_ISR); + kvm_apic_update_apicv(vcpu); apic->highest_isr_cache = -1; if (vcpu->arch.apicv_active) { kvm_x86_ops->apicv_post_state_restore(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index ec730ce7a344..ec6fbfe325cf 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -91,6 +91,7 @@ void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, struct dest_map *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); +void kvm_apic_update_apicv(struct kvm_vcpu *vcpu); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); From 4e19c36f2df8f84da22c7287de86729aaf3e352b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:05 -0600 Subject: [PATCH 579/658] kvm: x86: Introduce APICv inhibit reason bits There are several reasons in which a VM needs to deactivate APICv e.g. disable APICv via parameter during module loading, or when enable Hyper-V SynIC support. Additional inhibit reasons will be introduced later on when dynamic APICv is supported, Introduce KVM APICv inhibit reason bits along with a new variable, apicv_inhibit_reasons, to help keep track of APICv state for each VM, Initially, the APICV_INHIBIT_REASON_DISABLE bit is used to indicate the case where APICv is disabled during KVM module load. (e.g. insmod kvm_amd avic=0 or insmod kvm_intel enable_apicv=0). Signed-off-by: Suravee Suthikulpanit [Do not use get_enable_apicv; consider irqchip_split in svm.c. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/svm.c | 14 +++++++++++++- arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 20 +++++++++++++++++++- 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 329d01c689b7..4d57e4b74aae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -873,6 +873,8 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +#define APICV_INHIBIT_REASON_DISABLE 0 + struct kvm_arch { unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; @@ -904,6 +906,7 @@ struct kvm_arch { struct kvm_apic_map *apic_map; bool apic_access_page_done; + unsigned long apicv_inhibit_reasons; gpa_t wall_clock; @@ -1478,6 +1481,8 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); +bool kvm_apicv_activated(struct kvm *kvm); +void kvm_apicv_init(struct kvm *kvm, bool enable); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9dbb990c319a..ed39f72faeaf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2052,6 +2052,18 @@ free_avic: return err; } +static int svm_vm_init(struct kvm *kvm) +{ + if (avic) { + int ret = avic_vm_init(kvm); + if (ret) + return ret; + } + + kvm_apicv_init(kvm, avic && irqchip_split(kvm)); + return 0; +} + static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) { @@ -7274,7 +7286,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vm_alloc = svm_vm_alloc, .vm_free = svm_vm_free, - .vm_init = avic_vm_init, + .vm_init = svm_vm_init, .vm_destroy = svm_vm_destroy, .prepare_guest_switch = svm_prepare_guest_switch, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c475fa2aaae0..69bd10a563c0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6813,6 +6813,7 @@ static int vmx_vm_init(struct kvm *kvm) break; } } + kvm_apicv_init(kvm, enable_apicv); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2d3be7f3ad67..98209b8c18c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7469,6 +7469,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); } +bool kvm_apicv_activated(struct kvm *kvm) +{ + return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0); +} +EXPORT_SYMBOL_GPL(kvm_apicv_activated); + +void kvm_apicv_init(struct kvm *kvm, bool enable) +{ + if (enable) + clear_bit(APICV_INHIBIT_REASON_DISABLE, + &kvm->arch.apicv_inhibit_reasons); + else + set_bit(APICV_INHIBIT_REASON_DISABLE, + &kvm->arch.apicv_inhibit_reasons); +} +EXPORT_SYMBOL_GPL(kvm_apicv_init); + static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id) { struct kvm_vcpu *target = NULL; @@ -9219,10 +9236,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return r; if (irqchip_in_kernel(vcpu->kvm)) { - vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); if (r < 0) goto fail_mmu_destroy; + if (kvm_apicv_activated(vcpu->kvm)) + vcpu->arch.apicv_active = true; } else static_key_slow_inc(&kvm_no_apic_vcpu); From 7e3e67a98701cbcb4378b4f69b28a43351ca27c2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Jan 2020 16:54:37 +0100 Subject: [PATCH 580/658] KVM: x86: remove get_enable_apicv from kvm_x86_ops It is unused now. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 6 ------ 3 files changed, 13 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4d57e4b74aae..9945c7bebdf8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1121,7 +1121,6 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - bool (*get_enable_apicv)(struct kvm *kvm); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ed39f72faeaf..b0c343fef14d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5147,11 +5147,6 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) return; } -static bool svm_get_enable_apicv(struct kvm *kvm) -{ - return avic && irqchip_split(kvm); -} - static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) { } @@ -7343,7 +7338,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_apic_mode = svm_set_virtual_apic_mode, - .get_enable_apicv = svm_get_enable_apicv, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 69bd10a563c0..3e18df4cfb34 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3719,11 +3719,6 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) } } -static bool vmx_get_enable_apicv(struct kvm *kvm) -{ - return enable_apicv; -} - static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7787,7 +7782,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .get_enable_apicv = vmx_get_enable_apicv, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, .apicv_post_state_restore = vmx_apicv_post_state_restore, From 8df14af42f00a434c492c9964a8095bf59831a45 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:06 -0600 Subject: [PATCH 581/658] kvm: x86: Add support for dynamic APICv activation Certain runtime conditions require APICv to be temporary deactivated during runtime. The current implementation only support run-time deactivation of APICv when Hyper-V SynIC is enabled, which is not temporary. In addition, for AMD, when APICv is (de)activated at runtime, all vcpus in the VM have to operate in the same mode. Thus the requesting vcpu must notify the others. So, introduce the following: * A new KVM_REQ_APICV_UPDATE request bit * Interfaces to request all vcpus to update APICv status * A new interface to update APICV-related parameters for each vcpu Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/x86.c | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9945c7bebdf8..0189687877a7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -78,6 +78,8 @@ #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) +#define KVM_REQ_APICV_UPDATE \ + KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -1482,6 +1484,9 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); bool kvm_apicv_activated(struct kvm *kvm); void kvm_apicv_init(struct kvm *kvm, bool enable); +void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); +void kvm_request_apicv_update(struct kvm *kvm, bool activate, + unsigned long bit); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 98209b8c18c1..616491c134ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -26,6 +26,7 @@ #include "cpuid.h" #include "pmu.h" #include "hyperv.h" +#include "lapic.h" #include #include @@ -8013,6 +8014,40 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } +void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) +{ + if (!lapic_in_kernel(vcpu)) + return; + + vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm); + kvm_apic_update_apicv(vcpu); + kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); + +/* + * NOTE: Do not hold any lock prior to calling this. + * + * In particular, kvm_request_apicv_update() expects kvm->srcu not to be + * locked, because it calls __x86_set_memory_region() which does + * synchronize_srcu(&kvm->srcu). + */ +void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +{ + if (activate) { + if (!test_and_clear_bit(bit, &kvm->arch.apicv_inhibit_reasons) || + !kvm_apicv_activated(kvm)) + return; + } else { + if (test_and_set_bit(bit, &kvm->arch.apicv_inhibit_reasons) || + kvm_apicv_activated(kvm)) + return; + } + + kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); +} +EXPORT_SYMBOL_GPL(kvm_request_apicv_update); + static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { if (!kvm_apic_present(vcpu)) @@ -8203,6 +8238,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) kvm_hv_process_stimers(vcpu); + if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) + kvm_vcpu_update_apicv(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { From 24bbf74c0c36bfbaa276c9921b55b844018b241e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:07 -0600 Subject: [PATCH 582/658] kvm: x86: Add APICv (de)activate request trace points Add trace points when sending request to (de)activate APICv. Suggested-by: Alexander Graf Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/trace.h | 19 +++++++++++++++++++ arch/x86/kvm/x86.c | 2 ++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7c741a0c5f80..f194dd058470 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1291,6 +1291,25 @@ TRACE_EVENT(kvm_hv_stimer_cleanup, __entry->vcpu_id, __entry->timer_index) ); +TRACE_EVENT(kvm_apicv_update_request, + TP_PROTO(bool activate, unsigned long bit), + TP_ARGS(activate, bit), + + TP_STRUCT__entry( + __field(bool, activate) + __field(unsigned long, bit) + ), + + TP_fast_assign( + __entry->activate = activate; + __entry->bit = bit; + ), + + TP_printk("%s bit=%lu", + __entry->activate ? "activate" : "deactivate", + __entry->bit) +); + /* * Tracepoint for AMD AVIC */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 616491c134ae..3ceb0bc7d3f2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8044,6 +8044,7 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) return; } + trace_kvm_apicv_update_request(activate, bit); kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); @@ -10503,3 +10504,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); From 8937d762396d22bdb59f02732a66db6b58e746b1 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:08 -0600 Subject: [PATCH 583/658] kvm: x86: svm: Add support to (de)activate posted interrupts Introduce interface for (de)activate posted interrupts, and implement SVM hooks to toggle AMD IOMMU guest virtual APIC mode. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b0c343fef14d..e6118d17de6e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5155,17 +5155,52 @@ static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) { } +static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) +{ + int ret = 0; + unsigned long flags; + struct amd_svm_iommu_ir *ir; + struct vcpu_svm *svm = to_svm(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + return 0; + + /* + * Here, we go through the per-vcpu ir_list to update all existing + * interrupt remapping table entry targeting this vcpu. + */ + spin_lock_irqsave(&svm->ir_list_lock, flags); + + if (list_empty(&svm->ir_list)) + goto out; + + list_for_each_entry(ir, &svm->ir_list, node) { + if (activate) + ret = amd_iommu_activate_guest_mode(ir->data); + else + ret = amd_iommu_deactivate_guest_mode(ir->data); + if (ret) + break; + } +out: + spin_unlock_irqrestore(&svm->ir_list_lock, flags); + return ret; +} + /* Note: Currently only used by Hyper-V. */ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; + bool activated = kvm_vcpu_apicv_active(vcpu); - if (kvm_vcpu_apicv_active(vcpu)) + if (activated) vmcb->control.int_ctl |= AVIC_ENABLE_MASK; else vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; mark_dirty(vmcb, VMCB_AVIC); + + svm_set_pi_irte_mode(vcpu, activated); } static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) From dcbcfa287e964931f7051ff00ed33dbf33d39abd Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:09 -0600 Subject: [PATCH 584/658] KVM: svm: avic: Add support for dynamic setup/teardown of virtual APIC backing page Re-factor avic_init_access_page() to avic_update_access_page() since activate/deactivate AVIC requires setting/unsetting the memory region used for virtual APIC backing page (APIC_ACCESS_PAGE_PRIVATE_MEMSLOT). Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e6118d17de6e..dcea6b663d5c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1729,23 +1729,22 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, * field of the VMCB. Therefore, we set up the * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here. */ -static int avic_init_access_page(struct kvm_vcpu *vcpu) +static int avic_update_access_page(struct kvm *kvm, bool activate) { - struct kvm *kvm = vcpu->kvm; int ret = 0; mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page_done) + if (kvm->arch.apic_access_page_done == activate) goto out; ret = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_DEFAULT_PHYS_BASE, - PAGE_SIZE); + activate ? PAGE_SIZE : 0); if (ret) goto out; - kvm->arch.apic_access_page_done = true; + kvm->arch.apic_access_page_done = activate; out: mutex_unlock(&kvm->slots_lock); return ret; @@ -1758,7 +1757,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) int id = vcpu->vcpu_id; struct vcpu_svm *svm = to_svm(vcpu); - ret = avic_init_access_page(vcpu); + ret = avic_update_access_page(vcpu->kvm, true); if (ret) return ret; From ef8efd7a15bb7147a4ffb09758a6bd25d744a14e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:10 -0600 Subject: [PATCH 585/658] kvm: x86: Introduce APICv x86 ops for checking APIC inhibit reasons Inibit reason bits are used to determine if APICv deactivation is applicable for a particular hardware virtualization architecture. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 8 ++++++++ arch/x86/kvm/vmx/vmx.c | 8 ++++++++ arch/x86/kvm/x86.c | 4 ++++ 4 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0189687877a7..81c41bfb0a5f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1123,6 +1123,7 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + bool (*check_apicv_inhibit_reasons)(ulong bit); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index dcea6b663d5c..842c0630af35 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7298,6 +7298,13 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT)); } +static bool svm_check_apicv_inhibit_reasons(ulong bit) +{ + ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE); + + return supported & BIT(bit); +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7373,6 +7380,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_apic_mode = svm_set_virtual_apic_mode, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, + .check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3e18df4cfb34..7ba8de3325be 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7710,6 +7710,13 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } +static bool vmx_check_apicv_inhibit_reasons(ulong bit) +{ + ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE); + + return supported & BIT(bit); +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -7785,6 +7792,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, .apicv_post_state_restore = vmx_apicv_post_state_restore, + .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ceb0bc7d3f2..dbff8011f0f2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8034,6 +8034,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); */ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) { + if (!kvm_x86_ops->check_apicv_inhibit_reasons || + !kvm_x86_ops->check_apicv_inhibit_reasons(bit)) + return; + if (activate) { if (!test_and_clear_bit(bit, &kvm->arch.apicv_inhibit_reasons) || !kvm_apicv_activated(kvm)) From 2de9d0ccd0fea32fc6a684f3f22496967ed608bc Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:11 -0600 Subject: [PATCH 586/658] kvm: x86: Introduce x86 ops hook for pre-update APICv AMD SVM AVIC needs to update APIC backing page mapping before changing APICv mode. Introduce struct kvm_x86_ops.pre_update_apicv_exec_ctrl function hook to be called prior KVM APICv update request to each vcpu. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/x86.c | 2 ++ 3 files changed, 9 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 81c41bfb0a5f..19a7d0d3a5fa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1124,6 +1124,7 @@ struct kvm_x86_ops { void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); bool (*check_apicv_inhibit_reasons)(ulong bit); + void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 842c0630af35..d85e29bc6ff1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7305,6 +7305,11 @@ static bool svm_check_apicv_inhibit_reasons(ulong bit) return supported & BIT(bit); } +static void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate) +{ + avic_update_access_page(kvm, activate); +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7381,6 +7386,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .set_virtual_apic_mode = svm_set_virtual_apic_mode, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons, + .pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dbff8011f0f2..d2f15cbe2634 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8049,6 +8049,8 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) } trace_kvm_apicv_update_request(activate, bit); + if (kvm_x86_ops->pre_update_apicv_exec_ctrl) + kvm_x86_ops->pre_update_apicv_exec_ctrl(kvm, activate); kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); From 6c3e4422dd201c107948adc1b7615610d7381bcb Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:12 -0600 Subject: [PATCH 587/658] svm: Add support for dynamic APICv Add necessary logics to support (de)activate AVIC at runtime. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d85e29bc6ff1..3c211933a0a7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -387,6 +387,7 @@ static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); +static inline void avic_post_state_restore(struct kvm_vcpu *vcpu); static int nested_svm_exit_handled(struct vcpu_svm *svm); static int nested_svm_intercept(struct vcpu_svm *svm); @@ -1545,7 +1546,10 @@ static void avic_init_vmcb(struct vcpu_svm *svm) vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT; - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + if (kvm_apicv_activated(svm->vcpu.kvm)) + vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + else + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; } static void init_vmcb(struct vcpu_svm *svm) @@ -1752,21 +1756,24 @@ out: static int avic_init_backing_page(struct kvm_vcpu *vcpu) { - int ret; u64 *entry, new_entry; int id = vcpu->vcpu_id; struct vcpu_svm *svm = to_svm(vcpu); - ret = avic_update_access_page(vcpu->kvm, true); - if (ret) - return ret; - if (id >= AVIC_MAX_PHYSICAL_ID_COUNT) return -EINVAL; if (!svm->vcpu.arch.apic->regs) return -EINVAL; + if (kvm_apicv_activated(vcpu->kvm)) { + int ret; + + ret = avic_update_access_page(vcpu->kvm, true); + if (ret) + return ret; + } + svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs); /* Setting AVIC backing page address in the phy APIC ID table */ @@ -2234,7 +2241,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. */ - svm->avic_is_running = true; + if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm)) + svm->avic_is_running = true; svm->nested.hsave = page_address(hsave_page); @@ -2359,6 +2367,8 @@ static void svm_vcpu_blocking(struct kvm_vcpu *vcpu) static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) { + if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) + kvm_vcpu_update_apicv(vcpu); avic_set_running(vcpu, true); } @@ -5186,17 +5196,25 @@ out: return ret; } -/* Note: Currently only used by Hyper-V. */ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; bool activated = kvm_vcpu_apicv_active(vcpu); - if (activated) + if (activated) { + /** + * During AVIC temporary deactivation, guest could update + * APIC ID, DFR and LDR registers, which would not be trapped + * by avic_unaccelerated_access_interception(). In this case, + * we need to check and update the AVIC logical APIC ID table + * accordingly before re-activating. + */ + avic_post_state_restore(vcpu); vmcb->control.int_ctl |= AVIC_ENABLE_MASK; - else + } else { vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; + } mark_dirty(vmcb, VMCB_AVIC); svm_set_pi_irte_mode(vcpu, activated); From f4fdc0a2edf48f16f7b10cceaf4781fc56ab7fd9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:13 -0600 Subject: [PATCH 588/658] kvm: x86: hyperv: Use APICv update request interface Since disabling APICv has to be done for all vcpus on AMD-based system, adopt the newly introduced kvm_request_apicv_update() interface, and introduce a new APICV_INHIBIT_REASON_HYPERV. Also, remove the kvm_vcpu_deactivate_apicv() since no longer used. Cc: Roman Kagan Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/hyperv.c | 5 +++-- arch/x86/kvm/svm.c | 3 ++- arch/x86/kvm/vmx/vmx.c | 3 ++- arch/x86/kvm/x86.c | 13 ------------- 5 files changed, 8 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 19a7d0d3a5fa..90bfe8becc56 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -876,6 +876,7 @@ enum kvm_irqchip_mode { }; #define APICV_INHIBIT_REASON_DISABLE 0 +#define APICV_INHIBIT_REASON_HYPERV 1 struct kvm_arch { unsigned long n_used_mmu_pages; @@ -1483,7 +1484,6 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); -void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); bool kvm_apicv_activated(struct kvm *kvm); void kvm_apicv_init(struct kvm *kvm, bool enable); void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4df1c965bf1a..a86fda7a1d03 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -776,9 +776,10 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) /* * Hyper-V SynIC auto EOI SINT's are - * not compatible with APICV, so deactivate APICV + * not compatible with APICV, so request + * to deactivate APICV permanently. */ - kvm_vcpu_deactivate_apicv(vcpu); + kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); synic->active = true; synic->dont_zero_synic_pages = dont_zero_synic_pages; return 0; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3c211933a0a7..3b87ccd320d1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7318,7 +7318,8 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) static bool svm_check_apicv_inhibit_reasons(ulong bit) { - ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE); + ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_HYPERV); return supported & BIT(bit); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7ba8de3325be..678edbd6e278 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7712,7 +7712,8 @@ static __exit void hardware_unsetup(void) static bool vmx_check_apicv_inhibit_reasons(ulong bit) { - ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE); + ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_HYPERV); return supported & BIT(bit); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d2f15cbe2634..52edf0bb46e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7457,19 +7457,6 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } -void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) -{ - if (!lapic_in_kernel(vcpu)) { - WARN_ON_ONCE(vcpu->arch.apicv_active); - return; - } - if (!vcpu->arch.apicv_active) - return; - - vcpu->arch.apicv_active = false; - kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); -} - bool kvm_apicv_activated(struct kvm *kvm) { return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0); From 9a0bf05430699dc94b7ced940f6270c7cf1d77ef Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:14 -0600 Subject: [PATCH 589/658] svm: Deactivate AVIC when launching guest with nested SVM support Since AVIC does not currently work w/ nested virtualization, deactivate AVIC for the guest if setting CPUID Fn80000001_ECX[SVM] (i.e. indicate support for SVM, which is needed for nested virtualization). Also, introduce a new APICV_INHIBIT_REASON_NESTED bit to be used for this reason. Suggested-by: Alexander Graf Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 90bfe8becc56..ce19dea5f2dd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -877,6 +877,7 @@ enum kvm_irqchip_mode { #define APICV_INHIBIT_REASON_DISABLE 0 #define APICV_INHIBIT_REASON_HYPERV 1 +#define APICV_INHIBIT_REASON_NESTED 2 struct kvm_arch { unsigned long n_used_mmu_pages; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3b87ccd320d1..af90f83d7123 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5988,6 +5988,14 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) return; guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); + + /* + * Currently, AVIC does not work with nested virtualization. + * So, we disable AVIC when cpuid for SVM is set in the L1 guest. + */ + if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + kvm_request_apicv_update(vcpu->kvm, false, + APICV_INHIBIT_REASON_NESTED); } #define F feature_bit @@ -7319,7 +7327,8 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) static bool svm_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | - BIT(APICV_INHIBIT_REASON_HYPERV); + BIT(APICV_INHIBIT_REASON_HYPERV) | + BIT(APICV_INHIBIT_REASON_NESTED); return supported & BIT(bit); } From f3515dc3bef81e96bdb2ac93ef8fd20b1c2aaae5 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:15 -0600 Subject: [PATCH 590/658] svm: Temporarily deactivate AVIC during ExtINT handling AMD AVIC does not support ExtINT. Therefore, AVIC must be temporary deactivated and fall back to using legacy interrupt injection via vINTR and interrupt window. Also, introduce APICV_INHIBIT_REASON_IRQWIN to be used for this reason. Signed-off-by: Suravee Suthikulpanit [Rename svm_request_update_avic to svm_toggle_avic_for_extint. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 33 +++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ce19dea5f2dd..2bd7fd96d994 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -878,6 +878,7 @@ enum kvm_irqchip_mode { #define APICV_INHIBIT_REASON_DISABLE 0 #define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_NESTED 2 +#define APICV_INHIBIT_REASON_IRQWIN 3 struct kvm_arch { unsigned long n_used_mmu_pages; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index af90f83d7123..6d300c16d756 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -387,6 +387,7 @@ static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); +static void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate); static inline void avic_post_state_restore(struct kvm_vcpu *vcpu); static int nested_svm_exit_handled(struct vcpu_svm *svm); @@ -4461,6 +4462,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm) { kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); svm_clear_vintr(svm); + + /* + * For AVIC, the only reason to end up here is ExtINTs. + * In this case AVIC was temporarily disabled for + * requesting the IRQ window and we have to re-enable it. + */ + svm_toggle_avic_for_irq_window(&svm->vcpu, true); + svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; mark_dirty(svm->vmcb, VMCB_INTR); ++svm->vcpu.stat.irq_window_exits; @@ -5164,6 +5173,17 @@ static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) { } +static void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate) +{ + if (!avic || !lapic_in_kernel(vcpu)) + return; + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + kvm_request_apicv_update(vcpu->kvm, activate, + APICV_INHIBIT_REASON_IRQWIN); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); +} + static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) { int ret = 0; @@ -5504,9 +5524,6 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (kvm_vcpu_apicv_active(vcpu)) - return; - /* * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes * 1, because that's a separate STGI/VMRUN intercept. The next time we @@ -5516,6 +5533,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) * window under the assumption that the hardware will set the GIF. */ if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { + /* + * IRQ window is not needed when AVIC is enabled, + * unless we have pending ExtINT since it cannot be injected + * via AVIC. In such case, we need to temporarily disable AVIC, + * and fallback to injecting IRQ via V_IRQ. + */ + svm_toggle_avic_for_irq_window(vcpu, false); svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } @@ -7328,7 +7352,8 @@ static bool svm_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_HYPERV) | - BIT(APICV_INHIBIT_REASON_NESTED); + BIT(APICV_INHIBIT_REASON_NESTED) | + BIT(APICV_INHIBIT_REASON_IRQWIN); return supported & BIT(bit); } From e2ed4078a6ef3ddf4063329298852e24c36d46c8 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:16 -0600 Subject: [PATCH 591/658] kvm: i8254: Deactivate APICv when using in-kernel PIT re-injection mode. AMD SVM AVIC accelerates EOI write and does not trap. This causes in-kernel PIT re-injection mode to fail since it relies on irq-ack notifier mechanism. So, APICv is activated only when in-kernel PIT is in discard mode e.g. w/ qemu option: -global kvm-pit.lost_tick_policy=discard Also, introduce APICV_INHIBIT_REASON_PIT_REINJ bit to be used for this reason. Suggested-by: Paolo Bonzini Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/i8254.c | 12 ++++++++++++ arch/x86/kvm/svm.c | 11 +++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2bd7fd96d994..4dffbc10d3f8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -879,6 +879,7 @@ enum kvm_irqchip_mode { #define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_NESTED 2 #define APICV_INHIBIT_REASON_IRQWIN 3 +#define APICV_INHIBIT_REASON_PIT_REINJ 4 struct kvm_arch { unsigned long n_used_mmu_pages; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4a6dc54cc12b..b24c606ac04b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -295,12 +295,24 @@ void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject) if (atomic_read(&ps->reinject) == reinject) return; + /* + * AMD SVM AVIC accelerates EOI write and does not trap. + * This cause in-kernel PIT re-inject mode to fail + * since it checks ps->irq_ack before kvm_set_irq() + * and relies on the ack notifier to timely queue + * the pt->worker work iterm and reinject the missed tick. + * So, deactivate APICv when PIT is in reinject mode. + */ if (reinject) { + kvm_request_apicv_update(kvm, false, + APICV_INHIBIT_REASON_PIT_REINJ); /* The initial state is preserved while ps->reinject == 0. */ kvm_pit_reset_reinject(pit); kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier); kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); } else { + kvm_request_apicv_update(kvm, true, + APICV_INHIBIT_REASON_PIT_REINJ); kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier); kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6d300c16d756..0b05967aa455 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1739,7 +1739,13 @@ static int avic_update_access_page(struct kvm *kvm, bool activate) int ret = 0; mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page_done == activate) + /* + * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger + * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT + * memory region. So, we need to ensure that kvm->mm == current->mm. + */ + if ((kvm->arch.apic_access_page_done == activate) || + (kvm->mm != current->mm)) goto out; ret = __x86_set_memory_region(kvm, @@ -7353,7 +7359,8 @@ static bool svm_check_apicv_inhibit_reasons(ulong bit) ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_NESTED) | - BIT(APICV_INHIBIT_REASON_IRQWIN); + BIT(APICV_INHIBIT_REASON_IRQWIN) | + BIT(APICV_INHIBIT_REASON_PIT_REINJ); return supported & BIT(bit); } From 1ec2405c7cbf3afa7598c6b7546c81aa0cac78dc Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:18 -0600 Subject: [PATCH 592/658] kvm: ioapic: Refactor kvm_ioapic_update_eoi() Refactor code for handling IOAPIC EOI for subsequent patch. There is no functional change. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 110 +++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 26aa22cb9b29..453c79550917 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -154,10 +154,16 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) __rtc_irq_eoi_tracking_restore_one(vcpu); } -static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu, + int vector) { - if (test_and_clear_bit(vcpu->vcpu_id, - ioapic->rtc_status.dest_map.map)) { + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; + + /* RTC special handling */ + if (test_bit(vcpu->vcpu_id, dest_map->map) && + (vector == dest_map->vectors[vcpu->vcpu_id]) && + (test_and_clear_bit(vcpu->vcpu_id, + ioapic->rtc_status.dest_map.map))) { --ioapic->rtc_status.pending_eoi; rtc_status_pending_eoi_check_valid(ioapic); } @@ -454,72 +460,68 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work) } #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 - -static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, - struct kvm_ioapic *ioapic, int vector, int trigger_mode) +static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, + int trigger_mode, + int pin) { - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; struct kvm_lapic *apic = vcpu->arch.apic; - int i; + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[pin]; - /* RTC special handling */ - if (test_bit(vcpu->vcpu_id, dest_map->map) && - vector == dest_map->vectors[vcpu->vcpu_id]) - rtc_irq_eoi(ioapic, vcpu); + /* + * We are dropping lock while calling ack notifiers because ack + * notifier callbacks for assigned devices call into IOAPIC + * recursively. Since remote_irr is cleared only after call + * to notifiers if the same vector will be delivered while lock + * is dropped it will be put into irr and will be delivered + * after ack notifier returns. + */ + spin_unlock(&ioapic->lock); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); + spin_lock(&ioapic->lock); - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + if (trigger_mode != IOAPIC_LEVEL_TRIG || + kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) + return; - if (ent->fields.vector != vector) - continue; - - /* - * We are dropping lock while calling ack notifiers because ack - * notifier callbacks for assigned devices call into IOAPIC - * recursively. Since remote_irr is cleared only after call - * to notifiers if the same vector will be delivered while lock - * is dropped it will be put into irr and will be delivered - * after ack notifier returns. - */ - spin_unlock(&ioapic->lock); - kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); - spin_lock(&ioapic->lock); - - if (trigger_mode != IOAPIC_LEVEL_TRIG || - kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) - continue; - - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); - ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) { - ++ioapic->irq_eoi[i]; - if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { - /* - * Real hardware does not deliver the interrupt - * immediately during eoi broadcast, and this - * lets a buggy guest make slow progress - * even if it does not correctly handle a - * level-triggered interrupt. Emulate this - * behavior if we detect an interrupt storm. - */ - schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); - ioapic->irq_eoi[i] = 0; - trace_kvm_ioapic_delayed_eoi_inj(ent->bits); - } else { - ioapic_service(ioapic, i, false); - } + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << pin))) { + ++ioapic->irq_eoi[pin]; + if (ioapic->irq_eoi[pin] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the interrupt + * immediately during eoi broadcast, and this + * lets a buggy guest make slow progress + * even if it does not correctly handle a + * level-triggered interrupt. Emulate this + * behavior if we detect an interrupt storm. + */ + schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); + ioapic->irq_eoi[pin] = 0; + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); } else { - ioapic->irq_eoi[i] = 0; + ioapic_service(ioapic, pin, false); } + } else { + ioapic->irq_eoi[pin] = 0; } } void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { + int i; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; spin_lock(&ioapic->lock); - __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); + rtc_irq_eoi(ioapic, vcpu, vector); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.vector != vector) + continue; + kvm_ioapic_update_eoi_one(vcpu, ioapic, trigger_mode, i); + } spin_unlock(&ioapic->lock); } From f458d039db7e8518041db4169d657407e3217008 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 14 Nov 2019 14:15:19 -0600 Subject: [PATCH 593/658] kvm: ioapic: Lazy update IOAPIC EOI In-kernel IOAPIC does not receive EOI with AMD SVM AVIC since the processor accelerate write to APIC EOI register and does not trap if the interrupt is edge-triggered. Workaround this by lazy check for pending APIC EOI at the time when setting new IOPIC irq, and update IOAPIC EOI if no pending APIC EOI. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 453c79550917..7668fed1ce65 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -49,6 +49,11 @@ static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); +static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, + int trigger_mode, + int pin); + static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, unsigned long addr, unsigned long length) @@ -177,6 +182,31 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) return false; } +static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq) +{ + int i; + struct kvm_vcpu *vcpu; + union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; + + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) { + if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + entry->fields.dest_id, + entry->fields.dest_mode) || + kvm_apic_pending_eoi(vcpu, entry->fields.vector)) + continue; + + /* + * If no longer has pending EOI in LAPICs, update + * EOI for this vetor. + */ + rtc_irq_eoi(ioapic, vcpu, entry->fields.vector); + kvm_ioapic_update_eoi_one(vcpu, ioapic, + entry->fields.trig_mode, + irq); + break; + } +} + static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, int irq_level, bool line_status) { @@ -194,6 +224,15 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, goto out; } + /* + * AMD SVM AVIC accelerate EOI write and do not trap, + * in-kernel IOAPIC will not be able to receive the EOI. + * In this case, we do lazy update of the pending EOI when + * trying to set IOAPIC irq. + */ + if (kvm_apicv_activated(ioapic->kvm)) + ioapic_lazy_update_eoi(ioapic, irq); + /* * Return 0 for coalesced interrupts; for edge-triggered interrupts, * this only happens if a previous edge has not been delivered due From e8ef2a19a051b755b0b9973ef1b3f81e895e2bce Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Jan 2020 17:02:36 +0100 Subject: [PATCH 594/658] KVM: SVM: allow AVIC without split irqchip SVM is now able to disable AVIC dynamically whenever the in-kernel PIT sets up an ack notifier, so we can enable it even if in-kernel IOAPIC/PIC/PIT are in use. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b05967aa455..bf0556588ad0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2073,7 +2073,7 @@ static int svm_vm_init(struct kvm *kvm) return ret; } - kvm_apicv_init(kvm, avic && irqchip_split(kvm)); + kvm_apicv_init(kvm, avic); return 0; } From 33aabd029ffbafe314dad4763dadbc23d71296eb Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 23 Jan 2020 10:08:20 +0800 Subject: [PATCH 595/658] KVM: nVMX: delete meaningless nested_vmx_run() declaration The function nested_vmx_run() declaration is below its implementation. So this is meaningless and should be removed. Signed-off-by: Miaohe Lin Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2db21d59eaf5..53d522faaa69 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4723,8 +4723,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) return nested_vmx_succeed(vcpu); } -static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); - /* Emulate the VMLAUNCH instruction */ static int handle_vmlaunch(struct kvm_vcpu *vcpu) { From 917f9475c0a8ab8958db7f22a5d495b9a1d51be6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Jan 2020 14:32:20 +0100 Subject: [PATCH 596/658] KVM: x86: reorganize pvclock_gtod_data members We will need a copy of tk->offs_boot in the next patch. Store it and cleanup the struct: instead of storing tk->tkr_xxx.base with the tk->offs_boot included, store the raw value in struct pvclock_clock and sum it in do_monotonic_raw and do_realtime. tk->tkr_xxx.xtime_nsec also moves to struct pvclock_clock. While at it, fix a (usually harmless) typo in do_monotonic_raw, which was using gtod->clock.shift instead of gtod->raw_clock.shift. Fixes: 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock") Cc: stable@vger.kernel.org Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 52edf0bb46e5..8faa721e4c38 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1610,6 +1610,8 @@ struct pvclock_clock { u64 mask; u32 mult; u32 shift; + u64 base_cycles; + u64 offset; }; struct pvclock_gtod_data { @@ -1618,11 +1620,8 @@ struct pvclock_gtod_data { struct pvclock_clock clock; /* extract of a clocksource struct */ struct pvclock_clock raw_clock; /* extract of a clocksource struct */ - u64 boot_ns_raw; - u64 boot_ns; - u64 nsec_base; + ktime_t offs_boot; u64 wall_time_sec; - u64 monotonic_raw_nsec; }; static struct pvclock_gtod_data pvclock_gtod_data; @@ -1630,10 +1629,6 @@ static struct pvclock_gtod_data pvclock_gtod_data; static void update_pvclock_gtod(struct timekeeper *tk) { struct pvclock_gtod_data *vdata = &pvclock_gtod_data; - u64 boot_ns, boot_ns_raw; - - boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); - boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot)); write_seqcount_begin(&vdata->seq); @@ -1643,20 +1638,20 @@ static void update_pvclock_gtod(struct timekeeper *tk) vdata->clock.mask = tk->tkr_mono.mask; vdata->clock.mult = tk->tkr_mono.mult; vdata->clock.shift = tk->tkr_mono.shift; + vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec; + vdata->clock.offset = tk->tkr_mono.base; vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode; vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last; vdata->raw_clock.mask = tk->tkr_raw.mask; vdata->raw_clock.mult = tk->tkr_raw.mult; vdata->raw_clock.shift = tk->tkr_raw.shift; - - vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->tkr_mono.xtime_nsec; + vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec; + vdata->raw_clock.offset = tk->tkr_raw.base; vdata->wall_time_sec = tk->xtime_sec; - vdata->boot_ns_raw = boot_ns_raw; - vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec; + vdata->offs_boot = tk->offs_boot; write_seqcount_end(&vdata->seq); } @@ -2126,10 +2121,10 @@ static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp) do { seq = read_seqcount_begin(>od->seq); - ns = gtod->monotonic_raw_nsec; + ns = gtod->raw_clock.base_cycles; ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode); - ns >>= gtod->clock.shift; - ns += gtod->boot_ns_raw; + ns >>= gtod->raw_clock.shift; + ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot)); } while (unlikely(read_seqcount_retry(>od->seq, seq))); *t = ns; @@ -2146,7 +2141,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp) do { seq = read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_sec; - ns = gtod->nsec_base; + ns = gtod->clock.base_cycles; ns += vgettsc(>od->clock, tsc_timestamp, &mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); From 8171cd68806bd2fc28ef688e32fb2a3b3deb04e5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Jan 2020 14:36:09 +0100 Subject: [PATCH 597/658] KVM: x86: use raw clock values consistently Commit 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock") changed kvmclock to use tkr_raw instead of tkr_mono. However, the default kvmclock_offset for the VM was still based on the monotonic clock and, if the raw clock drifted enough from the monotonic clock, this could cause a negative system_time to be written to the guest's struct pvclock. RHEL5 does not like it and (if it boots fast enough to observe a negative time value) it hangs. There is another thing to be careful about: getboottime64 returns the host boot time with tkr_mono frequency, and subtracting the tkr_raw-based kvmclock value will cause the wallclock to be off if tkr_raw drifts from tkr_mono. To avoid this, compute the wallclock delta from the current time instead of being clever and using getboottime64. Fixes: 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock") Cc: stable@vger.kernel.org Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8faa721e4c38..6db92371ad21 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1655,6 +1655,18 @@ static void update_pvclock_gtod(struct timekeeper *tk) write_seqcount_end(&vdata->seq); } + +static s64 get_kvmclock_base_ns(void) +{ + /* Count up from boot time, but with the frequency of the raw clock. */ + return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot)); +} +#else +static s64 get_kvmclock_base_ns(void) +{ + /* Master clock not used, so we can just use CLOCK_BOOTTIME. */ + return ktime_get_boottime_ns(); +} #endif void kvm_set_pending_timer(struct kvm_vcpu *vcpu) @@ -1668,7 +1680,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) int version; int r; struct pvclock_wall_clock wc; - struct timespec64 boot; + u64 wall_nsec; if (!wall_clock) return; @@ -1688,17 +1700,12 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) /* * The guest calculates current wall clock time by adding * system time (updated by kvm_guest_time_update below) to the - * wall clock specified here. guest system time equals host - * system time for us, thus we must fill in host boot time here. + * wall clock specified here. We do the reverse here. */ - getboottime64(&boot); + wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm); - if (kvm->arch.kvmclock_offset) { - struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset); - boot = timespec64_sub(boot, ts); - } - wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */ - wc.nsec = boot.tv_nsec; + wc.nsec = do_div(wall_nsec, 1000000000); + wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */ wc.version = version; kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); @@ -1946,7 +1953,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boottime_ns(); + ns = get_kvmclock_base_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2284,7 +2291,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boottime_ns() + ka->kvmclock_offset; + return get_kvmclock_base_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2300,7 +2307,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boottime_ns() + ka->kvmclock_offset; + ret = get_kvmclock_base_ns() + ka->kvmclock_offset; put_cpu(); @@ -2399,7 +2406,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boottime_ns(); + kernel_ns = get_kvmclock_base_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -2439,6 +2446,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_guest_tsc = tsc_timestamp; + WARN_ON(vcpu->hv_clock.system_time < 0); /* If the host uses TSC clocksource, then it is stable */ pvclock_flags = 0; @@ -9677,7 +9685,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); + kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; From 9b5e85320fcc3af20ce0397b2c6363b6ee5815b6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 24 Jan 2020 15:07:22 -0800 Subject: [PATCH 598/658] KVM: x86: Take a u64 when checking for a valid dr7 value Take a u64 instead of an unsigned long in kvm_dr7_valid() to fix a build warning on i386 due to right-shifting a 32-bit value by 32 when checking for bits being set in dr7[63:32]. Alternatively, the warning could be resolved by rewriting the check to use an i386-friendly method, but taking a u64 fixes another oddity on 32-bit KVM. Beause KVM implements natural width VMCS fields as u64s to avoid layout issues between 32-bit and 64-bit, a devious guest can stuff vmcs12->guest_dr7 with a 64-bit value even when both the guest and host are 32-bit kernels. KVM eventually drops vmcs12->guest_dr7[63:32] when propagating vmcs12->guest_dr7 to vmcs02, but ideally KVM would not rely on that behavior for correctness. Cc: Jim Mattson Cc: Krish Sadhukhan Fixes: ecb697d10f70 ("KVM: nVMX: Check GUEST_DR7 on vmentry of nested guests") Reported-by: Randy Dunlap Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2d2ff855773b..3624665acee4 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -357,7 +357,7 @@ static inline bool kvm_pat_valid(u64 data) return (data | ((data & 0x0202020202020202ull) << 1)) == data; } -static inline bool kvm_dr7_valid(unsigned long data) +static inline bool kvm_dr7_valid(u64 data) { /* Bits [63:32] are reserved */ return !(data >> 32); From 7df003c85218b5f5b10a7f6418208f31e813f38f Mon Sep 17 00:00:00 2001 From: Zhuang Yanying Date: Sat, 12 Oct 2019 11:37:31 +0800 Subject: [PATCH 599/658] KVM: fix overflow of zero page refcount with ksm running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are testing Virtual Machine with KSM on v5.4-rc2 kernel, and found the zero_page refcount overflow. The cause of refcount overflow is increased in try_async_pf (get_user_page) without being decreased in mmu_set_spte() while handling ept violation. In kvm_release_pfn_clean(), only unreserved page will call put_page. However, zero page is reserved. So, as well as creating and destroy vm, the refcount of zero page will continue to increase until it overflows. step1: echo 10000 > /sys/kernel/pages_to_scan/pages_to_scan echo 1 > /sys/kernel/pages_to_scan/run echo 1 > /sys/kernel/pages_to_scan/use_zero_pages step2: just create several normal qemu kvm vms. And destroy it after 10s. Repeat this action all the time. After a long period of time, all domains hang because of the refcount of zero page overflow. Qemu print error log as follow: … error: kvm run failed Bad address EAX=00006cdc EBX=00000008 ECX=80202001 EDX=078bfbfd ESI=ffffffff EDI=00000000 EBP=00000008 ESP=00006cc4 EIP=000efd75 EFL=00010002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0010 00000000 ffffffff 00c09300 DPL=0 DS [-WA] CS =0008 00000000 ffffffff 00c09b00 DPL=0 CS32 [-RA] SS =0010 00000000 ffffffff 00c09300 DPL=0 DS [-WA] DS =0010 00000000 ffffffff 00c09300 DPL=0 DS [-WA] FS =0010 00000000 ffffffff 00c09300 DPL=0 DS [-WA] GS =0010 00000000 ffffffff 00c09300 DPL=0 DS [-WA] LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS32-busy GDT= 000f7070 00000037 IDT= 000f70ae 00000000 CR0=00000011 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000000 Code=00 01 00 00 00 e9 e8 00 00 00 c7 05 4c 55 0f 00 01 00 00 00 <8b> 35 00 00 01 00 8b 3d 04 00 01 00 b8 d8 d3 00 00 c1 e0 08 0c ea a3 00 00 01 00 c7 05 04 … Meanwhile, a kernel warning is departed. [40914.836375] WARNING: CPU: 3 PID: 82067 at ./include/linux/mm.h:987 try_get_page+0x1f/0x30 [40914.836412] CPU: 3 PID: 82067 Comm: CPU 0/KVM Kdump: loaded Tainted: G OE 5.2.0-rc2 #5 [40914.836415] RIP: 0010:try_get_page+0x1f/0x30 [40914.836417] Code: 40 00 c3 0f 1f 84 00 00 00 00 00 48 8b 47 08 a8 01 75 11 8b 47 34 85 c0 7e 10 f0 ff 47 34 b8 01 00 00 00 c3 48 8d 78 ff eb e9 <0f> 0b 31 c0 c3 66 90 66 2e 0f 1f 84 00 0 0 00 00 00 48 8b 47 08 a8 [40914.836418] RSP: 0018:ffffb4144e523988 EFLAGS: 00010286 [40914.836419] RAX: 0000000080000000 RBX: 0000000000000326 RCX: 0000000000000000 [40914.836420] RDX: 0000000000000000 RSI: 00004ffdeba10000 RDI: ffffdf07093f6440 [40914.836421] RBP: ffffdf07093f6440 R08: 800000424fd91225 R09: 0000000000000000 [40914.836421] R10: ffff9eb41bfeebb8 R11: 0000000000000000 R12: ffffdf06bbd1e8a8 [40914.836422] R13: 0000000000000080 R14: 800000424fd91225 R15: ffffdf07093f6440 [40914.836423] FS: 00007fb60ffff700(0000) GS:ffff9eb4802c0000(0000) knlGS:0000000000000000 [40914.836425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [40914.836426] CR2: 0000000000000000 CR3: 0000002f220e6002 CR4: 00000000003626e0 [40914.836427] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [40914.836427] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [40914.836428] Call Trace: [40914.836433] follow_page_pte+0x302/0x47b [40914.836437] __get_user_pages+0xf1/0x7d0 [40914.836441] ? irq_work_queue+0x9/0x70 [40914.836443] get_user_pages_unlocked+0x13f/0x1e0 [40914.836469] __gfn_to_pfn_memslot+0x10e/0x400 [kvm] [40914.836486] try_async_pf+0x87/0x240 [kvm] [40914.836503] tdp_page_fault+0x139/0x270 [kvm] [40914.836523] kvm_mmu_page_fault+0x76/0x5e0 [kvm] [40914.836588] vcpu_enter_guest+0xb45/0x1570 [kvm] [40914.836632] kvm_arch_vcpu_ioctl_run+0x35d/0x580 [kvm] [40914.836645] kvm_vcpu_ioctl+0x26e/0x5d0 [kvm] [40914.836650] do_vfs_ioctl+0xa9/0x620 [40914.836653] ksys_ioctl+0x60/0x90 [40914.836654] __x64_sys_ioctl+0x16/0x20 [40914.836658] do_syscall_64+0x5b/0x180 [40914.836664] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [40914.836666] RIP: 0033:0x7fb61cb6bfc7 Signed-off-by: LinFeng Signed-off-by: Zhuang Yanying Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7e63a3236364..67ae2d5c37b2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -186,6 +186,7 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn) */ if (pfn_valid(pfn)) return PageReserved(pfn_to_page(pfn)) && + !is_zero_pfn(pfn) && !kvm_is_zone_device_pfn(pfn); return true; From 64b38bd1906bb62a040b4e91815e56005db4784d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 31 Jan 2020 12:56:55 -0300 Subject: [PATCH 600/658] x86/kvm: do not setup pv tlb flush when not paravirtualized kvm_setup_pv_tlb_flush will waste memory and print a misguiding message when KVM paravirtualization is not available. Intel SDM says that the when cpuid is used with EAX higher than the maximum supported value for basic of extended function, the data for the highest supported basic function will be returned. So, in some systems, kvm_arch_para_features will return bogus data, causing kvm_setup_pv_tlb_flush to detect support for pv tlb flush. Testing for kvm_para_available will work as it checks for the hypervisor signature. Besides, when the "nopv" command line parameter is used, it should not continue as well, as kvm_guest_init will no be called in that case. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 81045aabb6f4..d817f255aed8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -736,6 +736,9 @@ static __init int kvm_setup_pv_tlb_flush(void) { int cpu; + if (!kvm_para_available() || nopv) + return 0; + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { From 09df6307125cec07ef9168f1db2ffdbbcb304b1a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 3 Feb 2020 10:41:59 -0800 Subject: [PATCH 601/658] KVM: MIPS: Fix a build error due to referencing not-yet-defined function Hoist kvm_mips_comparecount_wakeup() above its only user, kvm_arch_vcpu_create() to fix a compilation error due to referencing an undefined function. Fixes: d11dfed5d700 ("KVM: MIPS: Move all vcpu init code into kvm_arch_vcpu_create()") Reported-by: kbuild test robot Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2606f3f02b54..92509041b954 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -280,6 +280,27 @@ static inline void dump_handler(const char *symbol, void *start, void *end) pr_debug("\tEND(%s)\n", symbol); } +static void kvm_mips_comparecount_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvm_mips_callbacks->queue_timer_int(vcpu); + + vcpu->arch.wait = 0; + if (swq_has_sleeper(&vcpu->wq)) + swake_up_one(&vcpu->wq); +} + +/* low level hrtimer wake routine */ +static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); + kvm_mips_comparecount_func((unsigned long) vcpu); + return kvm_mips_count_timeout(vcpu); +} + int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { return 0; @@ -1209,27 +1230,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -static void kvm_mips_comparecount_func(unsigned long data) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - - kvm_mips_callbacks->queue_timer_int(vcpu); - - vcpu->arch.wait = 0; - if (swq_has_sleeper(&vcpu->wq)) - swake_up_one(&vcpu->wq); -} - -/* low level hrtimer wake routine */ -static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); - kvm_mips_comparecount_func((unsigned long) vcpu); - return kvm_mips_count_timeout(vcpu); -} - int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { From 879a37632b403eb8c0fe00e14f907759100c8071 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 3 Feb 2020 10:42:00 -0800 Subject: [PATCH 602/658] KVM: MIPS: Fold comparecount_func() into comparecount_wakeup() Fold kvm_mips_comparecount_func() into kvm_mips_comparecount_wakeup() to eliminate the nondescript function name as well as its unnecessary cast of a vcpu to "unsigned long" and back to a vcpu. Presumably func() was used as a callback at some point during pre-upstream development, as wakeup() is the only user of func() and has been the only user since both with introduced by commit 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM"). Cc: Davidlohr Bueso Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 92509041b954..71244bf87c3a 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -280,24 +280,19 @@ static inline void dump_handler(const char *symbol, void *start, void *end) pr_debug("\tEND(%s)\n", symbol); } -static void kvm_mips_comparecount_func(unsigned long data) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - - kvm_mips_callbacks->queue_timer_int(vcpu); - - vcpu->arch.wait = 0; - if (swq_has_sleeper(&vcpu->wq)) - swake_up_one(&vcpu->wq); -} - /* low level hrtimer wake routine */ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); - kvm_mips_comparecount_func((unsigned long) vcpu); + + kvm_mips_callbacks->queue_timer_int(vcpu); + + vcpu->arch.wait = 0; + if (swq_has_sleeper(&vcpu->wq)) + swake_up_one(&vcpu->wq); + return kvm_mips_count_timeout(vcpu); } From ea79a750927e1835fa869c9136bfd6da28e605e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 Feb 2020 07:32:59 -0800 Subject: [PATCH 603/658] KVM: nVMX: Remove stale comment from nested_vmx_load_cr3() The blurb pertaining to the return value of nested_vmx_load_cr3() no longer matches reality, remove it entirely as the behavior it is attempting to document is quite obvious when reading the actual code. Signed-off-by: Sean Christopherson Reviewed-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 53d522faaa69..0118637fb970 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1074,10 +1074,10 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) } /* - * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are - * emulating VM entry into a guest with EPT enabled. - * Returns 0 on success, 1 on failure. Invalid state exit qualification code - * is assigned to entry_failure_code on failure. + * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are + * emulating VM-Entry into a guest with EPT enabled. On failure, the expected + * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to + * @entry_failure_code. */ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, u32 *entry_failure_code) From 0a2b64c50db00196c85ec9e8e4c3d7506cd09db9 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 3 Feb 2020 15:09:09 -0800 Subject: [PATCH 604/658] kvm: mmu: Replace unsigned with unsigned int for PTE access There are several functions which pass an access permission mask for SPTEs as an unsigned. This works, but checkpatch complains about it. Switch the occurrences of unsigned to unsigned int to satisfy checkpatch. No functional change expected. Tested by running kvm-unit-tests on an Intel Haswell machine. This commit introduced no new failures. Signed-off-by: Ben Gardon Reviewed-by: Oliver Upton Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index adc84f0f16ba..7c544e17c5b3 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -452,7 +452,7 @@ static u64 get_mmio_spte_generation(u64 spte) } static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, - unsigned access) + unsigned int access) { u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; u64 mask = generation_mmio_spte_mask(gen); @@ -484,7 +484,7 @@ static unsigned get_mmio_spte_access(u64 spte) } static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, - kvm_pfn_t pfn, unsigned access) + kvm_pfn_t pfn, unsigned int access) { if (unlikely(is_noslot_pfn(pfn))) { mark_mmio_spte(vcpu, sptep, gfn, access); @@ -2475,7 +2475,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int direct, - unsigned access) + unsigned int access) { union kvm_mmu_page_role role; unsigned quadrant; @@ -2990,7 +2990,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int level, + unsigned int pte_access, int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { @@ -3081,9 +3081,10 @@ set_pte: return ret; } -static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, - int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, - bool speculative, bool host_writable) +static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, + unsigned int pte_access, int write_fault, int level, + gfn_t gfn, kvm_pfn_t pfn, bool speculative, + bool host_writable) { int was_rmapped = 0; int rmap_count; @@ -3165,7 +3166,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, { struct page *pages[PTE_PREFETCH_NUM]; struct kvm_memory_slot *slot; - unsigned access = sp->role.access; + unsigned int access = sp->role.access; int i, ret; gfn_t gfn; @@ -3400,7 +3401,8 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) } static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, - kvm_pfn_t pfn, unsigned access, int *ret_val) + kvm_pfn_t pfn, unsigned int access, + int *ret_val) { /* The pfn is invalid, report the error! */ if (unlikely(is_error_pfn(pfn))) { @@ -4005,7 +4007,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) if (is_mmio_spte(spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); - unsigned access = get_mmio_spte_access(spte); + unsigned int access = get_mmio_spte_access(spte); if (!check_mmio_spte(vcpu, spte)) return RET_PF_INVALID; @@ -4349,7 +4351,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, } static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, - unsigned access, int *nr_present) + unsigned int access, int *nr_present) { if (unlikely(is_mmio_spte(*sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { From 8f79b064959b1c858cddad1cecbf0511adca8209 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 3 Feb 2020 15:09:10 -0800 Subject: [PATCH 605/658] kvm: mmu: Separate generating and setting mmio ptes Separate the functions for generating MMIO page table entries from the function that inserts them into the paging structure. This refactoring will facilitate changes to the MMU sychronization model to use atomic compare / exchanges (which are not guaranteed to succeed) instead of a monolithic MMU lock. No functional change expected. Tested by running kvm-unit-tests on an Intel Haswell machine. This commit introduced no new failures. Signed-off-by: Ben Gardon Reviewed-by: Oliver Upton Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7c544e17c5b3..7011a4e54866 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -451,9 +451,9 @@ static u64 get_mmio_spte_generation(u64 spte) return gen; } -static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, - unsigned int access) +static u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) { + u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; u64 mask = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT; @@ -464,6 +464,17 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, mask |= (gpa & shadow_nonpresent_or_rsvd_mask) << shadow_nonpresent_or_rsvd_mask_len; + return mask; +} + +static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, + unsigned int access) +{ + u64 mask = make_mmio_spte(vcpu, gfn, access); + unsigned int gen = get_mmio_spte_generation(mask); + + access = mask & ACC_ALL; + trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); } From 31de3d2500e49e9f44fdda1830a37f4d9735bcdd Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 5 Feb 2020 13:30:33 +0100 Subject: [PATCH 606/658] x86/kvm/hyper-v: move VMX controls sanitization out of nested_enable_evmcs() With fine grained VMX feature enablement QEMU>=4.2 tries to do KVM_SET_MSRS with default (matching CPU model) values and in case eVMCS is also enabled, fails. It would be possible to drop VMX feature filtering completely and make this a guest's responsibility: if it decides to use eVMCS it should know which fields are available and which are not. Hyper-V mostly complies to this, however, there are some problematic controls: SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL which Hyper-V enables. As there are no corresponding fields in eVMCS, we can't handle this properly in KVM. This is a Hyper-V issue. Move VMX controls sanitization from nested_enable_evmcs() to vmx_get_msr(), and do the bare minimum (only clear controls which are known to cause issues). This allows userspace to keep setting controls it wants and at the same time hides them from the guest. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.c | 32 ++++++++++++++++++++++++++------ arch/x86/kvm/vmx/evmcs.h | 1 + arch/x86/kvm/vmx/vmx.c | 16 ++++++++++++++-- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 89c3e0caf39f..ba886fb7bc39 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -346,6 +346,32 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) return 0; } +void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) +{ + u32 ctl_low = (u32)*pdata; + u32 ctl_high = (u32)(*pdata >> 32); + + /* + * Hyper-V 2016 and 2019 try using these features even when eVMCS + * is enabled but there are no corresponding fields. + */ + switch (msr_index) { + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + break; + case MSR_IA32_VMX_ENTRY_CTLS: + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + } + + *pdata = ctl_low | ((u64)ctl_high << 32); +} + int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { @@ -356,11 +382,5 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, if (vmcs_version) *vmcs_version = nested_get_evmcs_version(vcpu); - vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; - vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; - vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; - vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; - vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC; - return 0; } diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 07ebf6882a45..b88d9807a796 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -201,5 +201,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa); uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); +void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata); #endif /* __KVM_X86_VMX_EVMCS_H */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 678edbd6e278..ba334acaa37e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1853,8 +1853,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) return 1; - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, - &msr_info->data); + if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, + &msr_info->data)) + return 1; + /* + * Enlightened VMCS v1 doesn't have certain fields, but buggy + * Hyper-V versions are still trying to use corresponding + * features when they are exposed. Filter out the essential + * minimum. + */ + if (!msr_info->host_initiated && + vmx->nested.enlightened_vmcs_enabled) + nested_evmcs_filter_control_msr(msr_info->index, + &msr_info->data); + break; case MSR_IA32_RTIT_CTL: if (pt_mode != PT_MODE_HOST_GUEST) return 1; From a83502314ce303c6341b249c41121759c7477ba1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 5 Feb 2020 13:30:34 +0100 Subject: [PATCH 607/658] x86/kvm/hyper-v: don't allow to turn on unsupported VMX controls for nested guests Sane L1 hypervisors are not supposed to turn any of the unsupported VMX controls on for its guests and nested_vmx_check_controls() checks for that. This is, however, not the case for the controls which are supported on the host but are missing in enlightened VMCS and when eVMCS is in use. It would certainly be possible to add these missing checks to nested_check_vm_execution_controls()/_vm_exit_controls()/.. but it seems preferable to keep eVMCS-specific stuff in eVMCS and reduce the impact on non-eVMCS guests by doing less unrelated checks. Create a separate nested_evmcs_check_controls() for this purpose. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.c | 53 +++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/evmcs.h | 2 ++ arch/x86/kvm/vmx/nested.c | 3 +++ 3 files changed, 58 insertions(+) diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index ba886fb7bc39..303813423c3e 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -7,6 +7,7 @@ #include "evmcs.h" #include "vmcs.h" #include "vmx.h" +#include "trace.h" DEFINE_STATIC_KEY_FALSE(enable_evmcs); @@ -372,6 +373,58 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) *pdata = ctl_low | ((u64)ctl_high << 32); } +int nested_evmcs_check_controls(struct vmcs12 *vmcs12) +{ + int ret = 0; + u32 unsupp_ctl; + + unsupp_ctl = vmcs12->pin_based_vm_exec_control & + EVMCS1_UNSUPPORTED_PINCTRL; + if (unsupp_ctl) { + trace_kvm_nested_vmenter_failed( + "eVMCS: unsupported pin-based VM-execution controls", + unsupp_ctl); + ret = -EINVAL; + } + + unsupp_ctl = vmcs12->secondary_vm_exec_control & + EVMCS1_UNSUPPORTED_2NDEXEC; + if (unsupp_ctl) { + trace_kvm_nested_vmenter_failed( + "eVMCS: unsupported secondary VM-execution controls", + unsupp_ctl); + ret = -EINVAL; + } + + unsupp_ctl = vmcs12->vm_exit_controls & + EVMCS1_UNSUPPORTED_VMEXIT_CTRL; + if (unsupp_ctl) { + trace_kvm_nested_vmenter_failed( + "eVMCS: unsupported VM-exit controls", + unsupp_ctl); + ret = -EINVAL; + } + + unsupp_ctl = vmcs12->vm_entry_controls & + EVMCS1_UNSUPPORTED_VMENTRY_CTRL; + if (unsupp_ctl) { + trace_kvm_nested_vmenter_failed( + "eVMCS: unsupported VM-entry controls", + unsupp_ctl); + ret = -EINVAL; + } + + unsupp_ctl = vmcs12->vm_function_control & EVMCS1_UNSUPPORTED_VMFUNC; + if (unsupp_ctl) { + trace_kvm_nested_vmenter_failed( + "eVMCS: unsupported VM-function controls", + unsupp_ctl); + ret = -EINVAL; + } + + return ret; +} + int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index b88d9807a796..6de47f2569c9 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -10,6 +10,7 @@ #include "capabilities.h" #include "vmcs.h" +#include "vmcs12.h" struct vmcs_config; @@ -202,5 +203,6 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata); +int nested_evmcs_check_controls(struct vmcs12 *vmcs12); #endif /* __KVM_X86_VMX_EVMCS_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0118637fb970..657c2eda357c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2757,6 +2757,9 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, nested_check_vm_entry_controls(vcpu, vmcs12)) return -EINVAL; + if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled) + return nested_evmcs_check_controls(vmcs12); + return 0; } From 4400cf546b4bb62d49198f6642add01bf6e9b34d Mon Sep 17 00:00:00 2001 From: Eric Hankland Date: Mon, 27 Jan 2020 13:22:56 -0800 Subject: [PATCH 608/658] KVM: x86: Fix perfctr WRMSR for running counters Correct the logic in intel_pmu_set_msr() for fixed and general purpose counters. This was recently changed to set pmc->counter without taking in to account the value of pmc_read_counter() which will be incorrect if the counter is currently running and non-zero; this changes back to the old logic which accounted for the value of currently running counters. Signed-off-by: Eric Hankland Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 34a3a17bb6d7..fd21cdb10b79 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -260,13 +260,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { - if (msr_info->host_initiated) - pmc->counter = data; - else - pmc->counter = (s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; + pmc->counter += data - pmc_read_counter(pmc); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { - pmc->counter = data; + pmc->counter += data - pmc_read_counter(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) From df7e8818926eb4712b67421442acf7d568fe2645 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Feb 2020 16:10:52 +0100 Subject: [PATCH 609/658] KVM: SVM: relax conditions for allowing MSR_IA32_SPEC_CTRL accesses Userspace that does not know about the AMD_IBRS bit might still allow the guest to protect itself with MSR_IA32_SPEC_CTRL using the Intel SPEC_CTRL bit. However, svm.c disallows this and will cause a #GP in the guest when writing to the MSR. Fix this by loosening the test and allowing the Intel CPUID bit, and in fact allow the AMD_STIBP bit as well since it allows writing to MSR_IA32_SPEC_CTRL too. Reported-by: Zhiyi Guo Analyzed-by: Dr. David Alan Gilbert Analyzed-by: Laszlo Ersek Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bf0556588ad0..a3e32d61d60c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4225,6 +4225,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; @@ -4310,6 +4312,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; From bcfcff640c4d736933c5990d5a801d6a0c22c28b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Feb 2020 16:20:23 +0100 Subject: [PATCH 610/658] x86: vmxfeatures: rename features for consistency with KVM and manual Three of the feature bits in vmxfeatures.h have names that are different from the Intel SDM. The names have been adjusted recently in KVM but they were using the old name in the tip tree's x86/cpu branch. Adjust for consistency. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 6 +++--- arch/x86/include/asm/vmxfeatures.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d380b3b7ddd9..2a85287b3685 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -22,8 +22,8 @@ /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING) -#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(TSC_OFFSETTING) +#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING) +#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING) #define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) #define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) #define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) @@ -34,7 +34,7 @@ #define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) #define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) #define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) -#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING) +#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING) #define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) #define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) #define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index 0d04d8bf15a5..a50e4a0de315 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -34,8 +34,8 @@ #define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */ /* Primary Processor-Based VM-Execution Controls, word 1 */ -#define VMX_FEATURE_VIRTUAL_INTR_PENDING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ -#define VMX_FEATURE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ +#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ #define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */ #define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */ #define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */ @@ -46,7 +46,7 @@ #define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ #define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ #define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ -#define VMX_FEATURE_VIRTUAL_NMI_PENDING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ #define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */ #define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/ #define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */ From d76c7fbc01b29257359ed8b0d16d662e725b7bf9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 28 Jan 2020 15:53:44 -0800 Subject: [PATCH 611/658] KVM: x86: Mark CR4.UMIP as reserved based on associated CPUID bit Re-add code to mark CR4.UMIP as reserved if UMIP is not supported by the host. The UMIP handling was unintentionally dropped during a recent refactoring. Not flagging CR4.UMIP allows the guest to set its CR4.UMIP regardless of host support or userspace desires. On CPUs with UMIP support, including emulated UMIP, this allows the guest to enable UMIP against the wishes of the userspace VMM. On CPUs without any form of UMIP, this results in a failed VM-Enter due to invalid guest state. Fixes: 345599f9a2928 ("KVM: x86: Add macro to ensure reserved cr4 bits checks stay in sync") Signed-off-by: Sean Christopherson Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6db92371ad21..fbabb2f06273 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -898,6 +898,8 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); __reserved_bits |= X86_CR4_PKE; \ if (!__cpu_has(__c, X86_FEATURE_LA57)) \ __reserved_bits |= X86_CR4_LA57; \ + if (!__cpu_has(__c, X86_FEATURE_UMIP)) \ + __reserved_bits |= X86_CR4_UMIP; \ __reserved_bits; \ }) From a8be1ad01b795bd2a13297ddbaecdb956ab0efd0 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 5 Feb 2020 23:33:53 +0800 Subject: [PATCH 612/658] KVM: vmx: delete meaningless vmx_decache_cr0_guest_bits() declaration The function vmx_decache_cr0_guest_bits() is only called below its implementation. So this is meaningless and should be removed. Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ba334acaa37e..9a6664886f2e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1428,8 +1428,6 @@ static bool emulation_required(struct kvm_vcpu *vcpu) return emulate_invalid_guest_state && !guest_state_valid(vcpu); } -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); - unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); From 4bfdebd6202da4cbf723c53f475999b7537d4e48 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 31 Jan 2020 21:52:33 +0100 Subject: [PATCH 613/658] docs/locking: Fix outdated section names Commit 2e4f5382d12a ("locking/doc: Rename LOCK/UNLOCK to ACQUIRE/RELEASE") has not appied to 'spinlock.rst'. This commit updates the doc for the change. Signed-off-by: SeongJae Park Link: https://lore.kernel.org/r/20200131205237.29535-2-sj38.park@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/locking/spinlocks.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/locking/spinlocks.rst b/Documentation/locking/spinlocks.rst index 66e3792f8a36..bec96f7a9f2d 100644 --- a/Documentation/locking/spinlocks.rst +++ b/Documentation/locking/spinlocks.rst @@ -25,9 +25,9 @@ worry about UP vs SMP issues: the spinlocks work correctly under both. Documentation/memory-barriers.txt - (5) LOCK operations. + (5) ACQUIRE operations. - (6) UNLOCK operations. + (6) RELEASE operations. The above is usually pretty simple (you usually need and want only one spinlock for most things - using more than one spinlock can make things a From 5549c20232659a43797078d035736f1e01137c46 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 31 Jan 2020 21:52:35 +0100 Subject: [PATCH 614/658] Documentation/ko_KR/howto: Update broken web addresses Commit 0ea6e6112219 ("Documentation: update broken web addresses.") removed a link to 'http://patchwork.ozlabs.org' in howto, but the change has not applied to the Korean translation. This commit simply applies the change to the Korean translation. The link is restored now, though. Signed-off-by: SeongJae Park Link: https://lore.kernel.org/r/20200131205237.29535-4-sj38.park@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ko_KR/howto.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index ae3ad897d2ae..58d908c96ac8 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -318,8 +318,8 @@ Andrew Morton의 글이 있다. 리뷰 프로세스는 patchwork라는 도구를 통해 추적된다. patchwork은 등록된 패치와 패치에 대한 코멘트, 패치의 버전을 볼 수 있는 웹 인터페이스를 제공하고, 메인테이너는 패치를 리뷰 중, 리뷰 통과, 또는 반려됨으로 표시할 수 있다. -대부분의 이러한 patchwork 사이트는 https://patchwork.kernel.org/ 또는 -http://patchwork.ozlabs.org/ 에 나열되어 있다. +대부분의 이러한 patchwork 사이트는 https://patchwork.kernel.org/ 에 나열되어 +있다. 통합 테스트를 위한 linux-next 커널 트리 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 95c472ffca38e4aaf6000722194612d188ce5ac7 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 31 Jan 2020 21:52:36 +0100 Subject: [PATCH 615/658] Documentation/ko_KR/howto: Update a broken link Signed-off-by: SeongJae Park Link: https://lore.kernel.org/r/20200131205237.29535-5-sj38.park@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ko_KR/howto.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index 58d908c96ac8..71d4823e41e1 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -328,7 +328,7 @@ Andrew Morton의 글이 있다. 거쳐야 한다. 이런 목적으로, 모든 서브시스템 트리의 변경사항을 거의 매일 받아가는 특수한 테스트 저장소가 존재한다: - https://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git + https://git.kernel.org/?p=linux/kernel/git/next/linux-next.git 이런 식으로, linux-next 커널을 통해 다음 머지 기간에 메인라인 커널에 어떤 변경이 가해질 것인지 간략히 알 수 있다. 모험심 강한 테스터라면 linux-next From 36a375c6dfad502217898c4c2f0cf05810c30ceb Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 3 Feb 2020 17:10:45 +0800 Subject: [PATCH 616/658] mailmap: add entry for Tiezhu Yang Add an entry to connect all my email addresses. Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1580721045-4988-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Jonathan Corbet --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 00581c1f0983..d1f9eed788ad 100644 --- a/.mailmap +++ b/.mailmap @@ -250,6 +250,7 @@ Sumit Semwal Tejun Heo Thomas Graf Thomas Pedersen +Tiezhu Yang Todor Tomov Tony Luck TripleX Chung From ff1e81a7e2239f61d0e9173f113a7de4dcaab661 Mon Sep 17 00:00:00 2001 From: Sameer Rahmani Date: Mon, 3 Feb 2020 20:15:43 +0000 Subject: [PATCH 617/658] Documentation: build warnings related to missing blank lines after explicit markups has been fixed Fix for several documentation build warnings related to missing blank lines after explicit mark up. Exact warning message: WARNING: Explicit markup ends without a blank line; unexpected unindent. Signed-off-by: Sameer Rahmani Link: https://lore.kernel.org/r/20200203201543.24834-1-lxsameer@gnu.org Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/contributing.rst | 1 + Documentation/doc-guide/maintainer-profile.rst | 1 + Documentation/trace/kprobetrace.rst | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/doc-guide/contributing.rst b/Documentation/doc-guide/contributing.rst index 10956583d22e..67ee3691f91f 100644 --- a/Documentation/doc-guide/contributing.rst +++ b/Documentation/doc-guide/contributing.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 + How to help improve kernel documentation ======================================== diff --git a/Documentation/doc-guide/maintainer-profile.rst b/Documentation/doc-guide/maintainer-profile.rst index aee2f508cc89..5afc0ddba40a 100644 --- a/Documentation/doc-guide/maintainer-profile.rst +++ b/Documentation/doc-guide/maintainer-profile.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 + Documentation subsystem maintainer entry profile ================================================ diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 55993055902c..cc4c5fc313df 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -97,6 +97,7 @@ which shows given pointer in "symbol+offset" style. For $comm, the default type is "string"; any other type is invalid. .. _user_mem_access: + User Memory Access ------------------ Kprobe events supports user-space memory access. For that purpose, you can use @@ -252,4 +253,3 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace. Each line shows when the kernel hits an event, and <- SYMBOL means kernel returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel returns from do_sys_open to sys_open+0x1b). - From 599e6f8d3d23ec79d31891cda11af6e79d591ead Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Jan 2020 15:15:15 -0800 Subject: [PATCH 618/658] Documentation: changes.rst: update several outdated project URLs Update projects URLs in the changes.rst file. Signed-off-by: Randy Dunlap Reviewed-by: Darrick J. Wong Acked-by: Theodore Ts'o Link: https://lore.kernel.org/r/a9c3c509-8f30-fcc4-d9e0-b53aeaa89e4f@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/process/changes.rst | 14 +++++++++----- .../translations/it_IT/process/changes.rst | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 2284f2221f02..e47863575917 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -383,7 +383,8 @@ Mkinitrd E2fsprogs --------- -- +- +- JFSutils -------- @@ -393,12 +394,13 @@ JFSutils Reiserfsprogs ------------- -- +- Xfsprogs -------- -- +- +- Pcmciautils ----------- @@ -437,7 +439,9 @@ Networking PPP --- -- +- +- +- NFS-utils --------- @@ -447,7 +451,7 @@ NFS-utils Iptables -------- -- +- Ip-route2 --------- diff --git a/Documentation/translations/it_IT/process/changes.rst b/Documentation/translations/it_IT/process/changes.rst index 94a6499742ac..37da4447a40d 100644 --- a/Documentation/translations/it_IT/process/changes.rst +++ b/Documentation/translations/it_IT/process/changes.rst @@ -390,7 +390,8 @@ Mkinitrd E2fsprogs --------- -- +- +- JFSutils -------- @@ -400,12 +401,13 @@ JFSutils Reiserfsprogs ------------- -- +- Xfsprogs -------- -- +- +- Pcmciautils ----------- @@ -444,7 +446,9 @@ Rete PPP --- -- +- +- +- NFS-utils @@ -455,7 +459,7 @@ NFS-utils Iptables -------- -- +- Ip-route2 --------- From d1c9038ab5c1c96c0fd9d13ec56f2d650fe4c59f Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 24 Jan 2020 19:33:16 +0100 Subject: [PATCH 619/658] Allow git builds of Sphinx When using a non-release version of Sphinx, from a local build (with improvements for kernel doc handling, why not), sphinx-build --version reports versions of the form sphinx-build 3.0.0+/4703d9119972 i.e. base version, a plus symbol, slash, and the start of the git hash of whatever repository the command is run in (no, not the hash that was used to build Sphinx!). This patch fixes the installation check in sphinx-pre-install to recognise such version output. Signed-off-by: Stephen Kitt Link: https://lore.kernel.org/r/20200124183316.1719218-1-steve@sk2.org Signed-off-by: Jonathan Corbet --- scripts/sphinx-pre-install | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install index 470ccfe678aa..a8f0c002a340 100755 --- a/scripts/sphinx-pre-install +++ b/scripts/sphinx-pre-install @@ -272,7 +272,7 @@ sub check_sphinx() open IN, "$sphinx --version 2>&1 |" or die "$sphinx returned an error"; while () { - if (m/^\s*sphinx-build\s+([\d\.]+)$/) { + if (m/^\s*sphinx-build\s+([\d\.]+)(\+\/[\da-f]+)?$/) { $cur_version=$1; last; } From 2e34673be0bd6bb0c6c496a861cbc3f7431e7ce3 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Wed, 29 Jan 2020 11:14:00 -0800 Subject: [PATCH 620/658] PCI/ATS: Use PF PASID for VFs Per PCIe r5.0, sec 9.3.7.14, if a PF implements the PASID Capability, the PF PASID configuration is shared by its VFs, and VFs must not implement their own PASID Capability. But commit 751035b8dc06 ("PCI/ATS: Cache PASID Capability offset") changed pci_max_pasids() and pci_pasid_features() to use the PASID Capability of the VF device instead of the associated PF device. This leads to IOMMU bind failures when pci_max_pasids() and pci_pasid_features() are called for VFs. In pci_max_pasids() and pci_pasid_features(), always use the PF PASID Capability. Fixes: 751035b8dc06 ("PCI/ATS: Cache PASID Capability offset") Link: https://lore.kernel.org/r/fe891f9755cb18349389609e7fed9940fc5b081a.1580325170.git.sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v5.5+ --- drivers/pci/ats.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 982b46f0a54d..b6f064c885c3 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -424,11 +424,12 @@ void pci_restore_pasid_state(struct pci_dev *pdev) int pci_pasid_features(struct pci_dev *pdev) { u16 supported; - int pasid = pdev->pasid_cap; + int pasid; if (pdev->is_virtfn) pdev = pci_physfn(pdev); + pasid = pdev->pasid_cap; if (!pasid) return -EINVAL; @@ -451,11 +452,12 @@ int pci_pasid_features(struct pci_dev *pdev) int pci_max_pasids(struct pci_dev *pdev) { u16 supported; - int pasid = pdev->pasid_cap; + int pasid; if (pdev->is_virtfn) pdev = pci_physfn(pdev); + pasid = pdev->pasid_cap; if (!pasid) return -EINVAL; From 24a9729f831462b1d9d61dc85ecc91c59037243f Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sat, 1 Feb 2020 12:57:04 +0530 Subject: [PATCH 621/658] tracing: Annotate ftrace_graph_hash pointer with __rcu Fix following instances of sparse error kernel/trace/ftrace.c:5664:29: error: incompatible types in comparison kernel/trace/ftrace.c:5785:21: error: incompatible types in comparison kernel/trace/ftrace.c:5864:36: error: incompatible types in comparison kernel/trace/ftrace.c:5866:25: error: incompatible types in comparison Use rcu_dereference_protected to access the __rcu annotated pointer. Link: http://lkml.kernel.org/r/20200201072703.17330-1-frextrite@gmail.com Reviewed-by: Joel Fernandes (Google) Signed-off-by: Amol Grover Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0e9612c30995..01d2ecd66161 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5591,7 +5591,7 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f5480a2aa334..18ceab59a5ba 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -964,22 +964,25 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions From fd0e6852c407dd9aefc594f54ddcc21d84803d3b Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Wed, 5 Feb 2020 11:27:02 +0530 Subject: [PATCH 622/658] tracing: Annotate ftrace_graph_notrace_hash pointer with __rcu Fix following instances of sparse error kernel/trace/ftrace.c:5667:29: error: incompatible types in comparison kernel/trace/ftrace.c:5813:21: error: incompatible types in comparison kernel/trace/ftrace.c:5868:36: error: incompatible types in comparison kernel/trace/ftrace.c:5870:25: error: incompatible types in comparison Use rcu_dereference_protected to dereference the newly annotated pointer. Link: http://lkml.kernel.org/r/20200205055701.30195-1-frextrite@gmail.com Signed-off-by: Amol Grover Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.h | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 01d2ecd66161..481ede3eac13 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5592,7 +5592,7 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 18ceab59a5ba..022def96d307 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -965,7 +965,7 @@ extern void __trace_graph_return(struct trace_array *tr, #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { @@ -1018,10 +1018,14 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); From 16052dd5bdfa16dbe18d8c1d4cde2ddab9d23177 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Feb 2020 02:17:57 -0500 Subject: [PATCH 623/658] ftrace: Add comment to why rcu_dereference_sched() is open coded Because the function graph tracer can execute in sections where RCU is not "watching", the rcu_dereference_sched() for the has needs to be open coded. This is fine because the RCU "flavor" of the ftrace hash is protected by its own RCU handling (it does its own little synchronization on every CPU and does not rely on RCU sched). Acked-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 022def96d307..8c52f5de9384 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -975,6 +975,11 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) preempt_disable_notrace(); + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); if (ftrace_hash_empty(hash)) { @@ -1022,6 +1027,11 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) preempt_disable_notrace(); + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); From 87fbfffcc89b92a4281b0aa53bd06af714087889 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Mon, 3 Feb 2020 09:15:00 -0800 Subject: [PATCH 624/658] broken ping to ipv6 linklocal addresses on debian buster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I am seeing ping failures to IPv6 linklocal addresses with Debian buster. Easiest example to reproduce is: $ ping -c1 -w1 ff02::1%eth1 connect: Invalid argument $ ping -c1 -w1 ff02::1%eth1 PING ff02::01%eth1(ff02::1%eth1) 56 data bytes 64 bytes from fe80::e0:f9ff:fe0c:37%eth1: icmp_seq=1 ttl=64 time=0.059 ms git bisect traced the failure to commit b9ef5513c99b ("smack: Check address length before reading address family") Arguably ping is being stupid since the buster version is not setting the address family properly (ping on stretch for example does): $ strace -e connect ping6 -c1 -w1 ff02::1%eth1 connect(5, {sa_family=AF_UNSPEC, sa_data="\4\1\0\0\0\0\377\2\0\0\0\0\0\0\0\0\0\0\0\0\0\1\3\0\0\0"}, 28) = -1 EINVAL (Invalid argument) but the command works fine on kernels prior to this commit, so this is breakage which goes against the Linux paradigm of "don't break userspace" Cc: stable@vger.kernel.org Reported-by: David Ahern Suggested-by: Tetsuo Handa Signed-off-by: Casey Schaufler  security/smack/smack_lsm.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) --- security/smack/smack_lsm.c | 41 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index ecea41ce919b..8bc7b04769a8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2831,42 +2831,39 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, int addrlen) { int rc = 0; -#if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; -#endif -#ifdef SMACK_IPV6_SECMARK_LABELING - struct smack_known *rsp; - struct socket_smack *ssp; -#endif if (sock->sk == NULL) return 0; - + if (sock->sk->sk_family != PF_INET && + (!IS_ENABLED(CONFIG_IPV6) || sock->sk->sk_family != PF_INET6)) + return 0; + if (addrlen < offsetofend(struct sockaddr, sa_family)) + return 0; + if (IS_ENABLED(CONFIG_IPV6) && sap->sa_family == AF_INET6) { + struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; #ifdef SMACK_IPV6_SECMARK_LABELING - ssp = sock->sk->sk_security; + struct smack_known *rsp; #endif - switch (sock->sk->sk_family) { - case PF_INET: - if (addrlen < sizeof(struct sockaddr_in) || - sap->sa_family != AF_INET) - return -EINVAL; - rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); - break; - case PF_INET6: - if (addrlen < SIN6_LEN_RFC2133 || sap->sa_family != AF_INET6) - return -EINVAL; + if (addrlen < SIN6_LEN_RFC2133) + return 0; #ifdef SMACK_IPV6_SECMARK_LABELING rsp = smack_ipv6host_label(sip); - if (rsp != NULL) + if (rsp != NULL) { + struct socket_smack *ssp = sock->sk->sk_security; + rc = smk_ipv6_check(ssp->smk_out, rsp, sip, - SMK_CONNECTING); + SMK_CONNECTING); + } #endif #ifdef SMACK_IPV6_PORT_LABELING rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); #endif - break; + return rc; } + if (sap->sa_family != AF_INET || addrlen < sizeof(struct sockaddr_in)) + return 0; + rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); return rc; } From 54a16ff6f2e50775145b210bcd94d62c3c2af117 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Feb 2020 09:20:32 -0500 Subject: [PATCH 625/658] ftrace: Protect ftrace_graph_hash with ftrace_sync As function_graph tracer can run when RCU is not "watching", it can not be protected by synchronize_rcu() it requires running a task on each CPU before it can be freed. Calling schedule_on_each_cpu(ftrace_sync) needs to be used. Link: https://lore.kernel.org/r/20200205131110.GT2935@paulmck-ThinkPad-P72 Cc: stable@vger.kernel.org Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables") Reported-by: "Paul E. McKenney" Reviewed-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 11 +++++++++-- kernel/trace/trace.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 481ede3eac13..3f7ee102868a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5867,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_rcu(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8c52f5de9384..3c75d29bd861 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -979,6 +979,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); @@ -1031,6 +1032,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); From e241d14a82910ffa9060d81864760c93b7256195 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Feb 2020 22:49:54 +0900 Subject: [PATCH 626/658] bootconfig: Use bootconfig instead of boot config Use "bootconfig" (1 word) instead of "boot config" (2 words) in the boot message. Link: http://lkml.kernel.org/r/158091059459.27924.14414336187441539879.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index f174a59d3903..2de2f9f7aab9 100644 --- a/init/main.c +++ b/init/main.c @@ -372,7 +372,7 @@ static void __init setup_boot_config(const char *cmdline) copy = memblock_alloc(size + 1, SMP_CACHE_BYTES); if (!copy) { - pr_err("Failed to allocate memory for boot config\n"); + pr_err("Failed to allocate memory for bootconfig\n"); return; } @@ -380,9 +380,9 @@ static void __init setup_boot_config(const char *cmdline) copy[size] = '\0'; if (xbc_init(copy) < 0) - pr_err("Failed to parse boot config\n"); + pr_err("Failed to parse bootconfig\n"); else { - pr_info("Load boot config: %d bytes\n", size); + pr_info("Load bootconfig: %d bytes\n", size); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); /* Also, "init." keys are init arguments */ From 597c0e3b4540972580ab36bcc8aa34394bd88cfb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Feb 2020 22:50:04 +0900 Subject: [PATCH 627/658] bootconfig: Add more parse error messages Add more error messages for following cases. - Exceeding max number of nodes - Config tree data is empty (e.g. comment only) - Config data is empty or exceeding max size - bootconfig is already initialized Link: http://lkml.kernel.org/r/158091060401.27924.9024818742827122764.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- lib/bootconfig.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 055014e233a5..a98ae136529c 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -373,7 +373,8 @@ static struct xbc_node * __init xbc_add_sibling(char *data, u32 flag) sib->next = xbc_node_index(node); } } - } + } else + xbc_parse_error("Too many nodes", data); return node; } @@ -657,8 +658,10 @@ static int __init xbc_verify_tree(void) struct xbc_node *n, *m; /* Empty tree */ - if (xbc_node_num == 0) + if (xbc_node_num == 0) { + xbc_parse_error("Empty config", xbc_data); return -ENOENT; + } for (i = 0; i < xbc_node_num; i++) { if (xbc_nodes[i].next > xbc_node_num) { @@ -732,12 +735,17 @@ int __init xbc_init(char *buf) char *p, *q; int ret, c; - if (xbc_data) + if (xbc_data) { + pr_err("Error: bootconfig is already initialized.\n"); return -EBUSY; + } ret = strlen(buf); - if (ret > XBC_DATA_MAX - 1 || ret == 0) + if (ret > XBC_DATA_MAX - 1 || ret == 0) { + pr_err("Error: Config data is %s.\n", + ret ? "too big" : "empty"); return -ERANGE; + } xbc_data = buf; xbc_data_size = ret + 1; From 0f0d0a77ccbdc6bfe51c0d184f92d99ebcc2e809 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Feb 2020 22:50:13 +0900 Subject: [PATCH 628/658] tools/bootconfig: Show the number of bootconfig nodes Show the number of bootconfig nodes when applying new bootconfig to initrd. Since there are limitations of bootconfig not only in its filesize, but also the number of nodes, the number should be shown when applying so that user can get the feeling of scale of current bootconfig. Link: http://lkml.kernel.org/r/158091061337.27924.10886706631693823982.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- lib/bootconfig.c | 5 ++++- tools/bootconfig/main.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/bootconfig.c b/lib/bootconfig.c index a98ae136529c..afb2e767e6fe 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -728,7 +728,8 @@ void __init xbc_destroy_all(void) * * This parses the boot config text in @buf. @buf must be a * null terminated string and smaller than XBC_DATA_MAX. - * Return 0 if succeeded, or -errno if there is any error. + * Return the number of stored nodes (>0) if succeeded, or -errno + * if there is any error. */ int __init xbc_init(char *buf) { @@ -788,6 +789,8 @@ int __init xbc_init(char *buf) if (ret < 0) xbc_destroy_all(); + else + ret = xbc_node_num; return ret; } diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 91c9a5c0c499..47f488458328 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -268,6 +268,7 @@ int apply_xbc(const char *path, const char *xbc_path) return ret; } printf("Apply %s to %s\n", xbc_path, path); + printf("\tNumber of nodes: %d\n", ret); printf("\tSize: %u bytes\n", (unsigned int)size); printf("\tChecksum: %d\n", (unsigned int)csum); From a00574036c261421721fa770ccd21a1012e1fbbd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Feb 2020 22:50:23 +0900 Subject: [PATCH 629/658] bootconfig: Show the number of nodes on boot message Show the number of bootconfig nodes on boot message. Link: http://lkml.kernel.org/r/158091062297.27924.9051634676068550285.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 2de2f9f7aab9..491f1cdb3105 100644 --- a/init/main.c +++ b/init/main.c @@ -342,6 +342,7 @@ static void __init setup_boot_config(const char *cmdline) char *data, *copy; const char *p; u32 *hdr; + int ret; p = strstr(cmdline, "bootconfig"); if (!p || (p != cmdline && !isspace(*(p-1))) || @@ -379,10 +380,11 @@ static void __init setup_boot_config(const char *cmdline) memcpy(copy, data, size); copy[size] = '\0'; - if (xbc_init(copy) < 0) + ret = xbc_init(copy); + if (ret < 0) pr_err("Failed to parse bootconfig\n"); else { - pr_info("Load bootconfig: %d bytes\n", size); + pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); /* Also, "init." keys are init arguments */ From 5df867145f8adad9e5cdf9d67db1fbc0f71351e9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2020 20:46:49 +0100 Subject: [PATCH 630/658] of: clk: Make self-contained MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depending on include order: include/linux/of_clk.h:11:45: warning: ‘struct device_node’ declared inside parameter list will not be visible outside of this definition or declaration unsigned int of_clk_get_parent_count(struct device_node *np); ^~~~~~~~~~~ include/linux/of_clk.h:12:43: warning: ‘struct device_node’ declared inside parameter list will not be visible outside of this definition or declaration const char *of_clk_get_parent_name(struct device_node *np, int index); ^~~~~~~~~~~ include/linux/of_clk.h:13:31: warning: ‘struct of_device_id’ declared inside parameter list will not be visible outside of this definition or declaration void of_clk_init(const struct of_device_id *matches); ^~~~~~~~~~~~ Fix this by adding forward declarations for struct device_node and struct of_device_id. Signed-off-by: Geert Uytterhoeven Link: https://lkml.kernel.org/r/20200205194649.31309-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- include/linux/of_clk.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h index b27da9f164cb..c86fcad23fc2 100644 --- a/include/linux/of_clk.h +++ b/include/linux/of_clk.h @@ -6,6 +6,9 @@ #ifndef __LINUX_OF_CLK_H #define __LINUX_OF_CLK_H +struct device_node; +struct of_device_id; + #if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF) unsigned int of_clk_get_parent_count(struct device_node *np); From f2adbae0cb20c8eaf06914b2187043ea944b0aff Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 5 Feb 2020 15:40:01 +0800 Subject: [PATCH 631/658] ALSA: hda/realtek - Fixed one of HP ALC671 platform Headset Mic supported HP want to keep BIOS verb table for release platform. So, it need to add 0x19 pin for quirk. Fixes: 5af29028fd6d ("ALSA: hda/realtek - Add Headset Mic supported for HP cPC") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/74636ccb700a4cbda24c58a99dc430ce@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3b38a13abb7a..4770fb3f51fb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9111,6 +9111,7 @@ static const struct snd_hda_pin_quirk alc662_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0671, 0x103c, "HP cPC", ALC671_FIXUP_HP_HEADSET_MIC2, {0x14, 0x01014010}, {0x17, 0x90170150}, + {0x19, 0x02a11060}, {0x1b, 0x01813030}, {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0671, 0x103c, "HP cPC", ALC671_FIXUP_HP_HEADSET_MIC2, From c64eb62cfce242a57a7276ca8280ae0baab29d05 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 22 Dec 2019 19:08:39 +0000 Subject: [PATCH 632/658] virtio-mmio: convert to devm_platform_ioremap_resource Use devm_platform_ioremap_resource() to simplify code, which contains platform_get_resource, devm_request_mem_region and devm_ioremap. Signed-off-by: Yangtao Li Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index e09edb5c5e06..97d5725fd9a2 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -531,18 +531,9 @@ static void virtio_mmio_release_dev(struct device *_d) static int virtio_mmio_probe(struct platform_device *pdev) { struct virtio_mmio_device *vm_dev; - struct resource *mem; unsigned long magic; int rc; - mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!mem) - return -EINVAL; - - if (!devm_request_mem_region(&pdev->dev, mem->start, - resource_size(mem), pdev->name)) - return -EBUSY; - vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); if (!vm_dev) return -ENOMEM; @@ -554,9 +545,9 @@ static int virtio_mmio_probe(struct platform_device *pdev) INIT_LIST_HEAD(&vm_dev->virtqueues); spin_lock_init(&vm_dev->lock); - vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); - if (vm_dev->base == NULL) - return -EFAULT; + vm_dev->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(vm_dev->base)) + return PTR_ERR(vm_dev->base); /* Check magic value */ magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE); From 5790b53390e18fdd21e70776e46d058c05eda2f2 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Fri, 3 Jan 2020 10:40:43 -0800 Subject: [PATCH 633/658] virtio-balloon: initialize all vq callbacks Ensure that elements of the callbacks array that correspond to unavailable features are set to NULL; previously, they would be left uninitialized. Since the corresponding names array elements were explicitly set to NULL, the uninitialized callback pointers would not actually be dereferenced; however, the uninitialized callbacks elements would still be read in vp_find_vqs_msix() and used to calculate the number of MSI-X vectors required. Cc: stable@vger.kernel.org Fixes: 86a559787e6f ("virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT") Reviewed-by: Cornelia Huck Signed-off-by: Daniel Verkamp Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 93f995f6cf36..8e400ece9273 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -475,7 +475,9 @@ static int init_vqs(struct virtio_balloon *vb) names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; + callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL; names[VIRTIO_BALLOON_VQ_STATS] = NULL; + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { From 303090b513fd1ee45aa1536b71a3838dc054bc05 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Fri, 3 Jan 2020 10:40:45 -0800 Subject: [PATCH 634/658] virtio-pci: check name when counting MSI-X vectors VQs without a name specified are not valid; they are skipped in the later loop that assigns MSI-X vectors to queues, but the per_vq_vectors loop above that counts the required number of vectors previously still counted any queue with a non-NULL callback as needing a vector. Add a check to the per_vq_vectors loop so that vectors with no name are not counted to make the two loops consistent. This prevents over-counting unnecessary vectors (e.g. for features which were not negotiated with the device). Cc: stable@vger.kernel.org Fixes: 86a559787e6f ("virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT") Reviewed-by: Cornelia Huck Signed-off-by: Daniel Verkamp Signed-off-by: Michael S. Tsirkin Reviewed-by: Wang, Wei W --- drivers/virtio/virtio_pci_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index f2862f66c2ac..222d630c41fc 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -294,7 +294,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) - if (callbacks[i]) + if (names[i] && callbacks[i]) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ From 782e067dba52de989a445f1bfa4bb770abb34480 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Dec 2019 17:37:19 +0100 Subject: [PATCH 635/658] virtio-blk: remove VIRTIO_BLK_F_SCSI support Since the need for a special flag to support SCSI passthrough on a block device was added in May 2017 the SCSI passthrough support in virtio-blk has been disabled. It has always been a bad idea (just ask the original author..) and we have virtio-scsi for proper passthrough. The feature also never made it into the virtio 1.0 or later specifications. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin Reviewed-by: Hannes Reinecke Reviewed-by: Stefan Hajnoczi --- arch/powerpc/configs/guest.config | 1 - drivers/block/Kconfig | 10 --- drivers/block/virtio_blk.c | 115 +----------------------------- 3 files changed, 1 insertion(+), 125 deletions(-) diff --git a/arch/powerpc/configs/guest.config b/arch/powerpc/configs/guest.config index 8b8cd18ecd7c..209f58515d88 100644 --- a/arch/powerpc/configs/guest.config +++ b/arch/powerpc/configs/guest.config @@ -1,5 +1,4 @@ CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_BLK_SCSI=y CONFIG_SCSI_VIRTIO=y CONFIG_VIRTIO_NET=y CONFIG_NET_FAILOVER=y diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 1bb8ec575352..025b1b77b11a 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -432,16 +432,6 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. -config VIRTIO_BLK_SCSI - bool "SCSI passthrough request for the Virtio block driver" - depends on VIRTIO_BLK - select BLK_SCSI_REQUEST - ---help--- - Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on - virtio-blk devices. This is only supported for the legacy - virtio protocol and not enabled by default by any hypervisor. - You probably want to use virtio-scsi instead. - config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7ffd719d89de..54158766334b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -56,11 +55,6 @@ struct virtio_blk { }; struct virtblk_req { -#ifdef CONFIG_VIRTIO_BLK_SCSI - struct scsi_request sreq; /* for SCSI passthrough, must be first */ - u8 sense[SCSI_SENSE_BUFFERSIZE]; - struct virtio_scsi_inhdr in_hdr; -#endif struct virtio_blk_outhdr out_hdr; u8 status; struct scatterlist sg[]; @@ -78,80 +72,6 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr) } } -/* - * If this is a packet command we need a couple of additional headers. Behind - * the normal outhdr we put a segment with the scsi command block, and before - * the normal inhdr we put the sense data and the inhdr with additional status - * information. - */ -#ifdef CONFIG_VIRTIO_BLK_SCSI -static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr, - struct scatterlist *data_sg, bool have_data) -{ - struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6]; - unsigned int num_out = 0, num_in = 0; - - sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); - sgs[num_out++] = &hdr; - sg_init_one(&cmd, vbr->sreq.cmd, vbr->sreq.cmd_len); - sgs[num_out++] = &cmd; - - if (have_data) { - if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) - sgs[num_out++] = data_sg; - else - sgs[num_out + num_in++] = data_sg; - } - - sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); - sgs[num_out + num_in++] = &sense; - sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); - sgs[num_out + num_in++] = &inhdr; - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); - sgs[num_out + num_in++] = &status; - - return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); -} - -static inline void virtblk_scsi_request_done(struct request *req) -{ - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - struct virtio_blk *vblk = req->q->queuedata; - struct scsi_request *sreq = &vbr->sreq; - - sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); - sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); - sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); -} - -static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long data) -{ - struct gendisk *disk = bdev->bd_disk; - struct virtio_blk *vblk = disk->private_data; - - /* - * Only allow the generic SCSI ioctls if the host can support it. - */ - if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) - return -ENOTTY; - - return scsi_cmd_blk_ioctl(bdev, mode, cmd, - (void __user *)data); -} -#else -static inline int virtblk_add_req_scsi(struct virtqueue *vq, - struct virtblk_req *vbr, struct scatterlist *data_sg, - bool have_data) -{ - return -EIO; -} -static inline void virtblk_scsi_request_done(struct request *req) -{ -} -#define virtblk_ioctl NULL -#endif /* CONFIG_VIRTIO_BLK_SCSI */ - static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *data_sg, bool have_data) { @@ -216,13 +136,6 @@ static inline void virtblk_request_done(struct request *req) req->special_vec.bv_offset); } - switch (req_op(req)) { - case REQ_OP_SCSI_IN: - case REQ_OP_SCSI_OUT: - virtblk_scsi_request_done(req); - break; - } - blk_mq_end_request(req, virtblk_result(vbr)); } @@ -299,10 +212,6 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, type = VIRTIO_BLK_T_WRITE_ZEROES; unmap = !(req->cmd_flags & REQ_NOUNMAP); break; - case REQ_OP_SCSI_IN: - case REQ_OP_SCSI_OUT: - type = VIRTIO_BLK_T_SCSI_CMD; - break; case REQ_OP_DRV_IN: type = VIRTIO_BLK_T_GET_ID; break; @@ -333,10 +242,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - if (blk_rq_is_scsi(req)) - err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num); - else - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); blk_mq_stop_hw_queue(hctx); @@ -404,7 +310,6 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) } static const struct block_device_operations virtblk_fops = { - .ioctl = virtblk_ioctl, .owner = THIS_MODULE, .getgeo = virtblk_getgeo, }; @@ -683,9 +588,6 @@ static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, struct virtio_blk *vblk = set->driver_data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); -#ifdef CONFIG_VIRTIO_BLK_SCSI - vbr->sreq.sense = vbr->sense; -#endif sg_init_table(vbr->sg, vblk->sg_elems); return 0; } @@ -698,23 +600,11 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) vblk->vdev, 0); } -#ifdef CONFIG_VIRTIO_BLK_SCSI -static void virtblk_initialize_rq(struct request *req) -{ - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - - scsi_req_init(&vbr->sreq); -} -#endif - static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, .init_request = virtblk_init_request, -#ifdef CONFIG_VIRTIO_BLK_SCSI - .initialize_rq_fn = virtblk_initialize_rq, -#endif .map_queues = virtblk_map_queues, }; @@ -991,9 +881,6 @@ static const struct virtio_device_id id_table[] = { static unsigned int features_legacy[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, -#ifdef CONFIG_VIRTIO_BLK_SCSI - VIRTIO_BLK_F_SCSI, -#endif VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, } From 6e9826e77249355c09db6ba41cd3f84e89f4b614 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 6 Feb 2020 02:40:58 -0500 Subject: [PATCH 636/658] virtio_balloon: prevent pfn array overflow Make sure, at build time, that pfn array is big enough to hold a single page. It happens to be true since the PAGE_SHIFT value at the moment is 20, which is 1M - exactly 256 4K balloon pages. Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8e400ece9273..2457c54b6185 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -158,6 +158,8 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; + BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); + /* * Set balloon pfns pointing at this page. * Note that the first pfn points at start of the page. From 6c22dc61c76b7e7d355f1697ba0ecf26d1334ba6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 5 Feb 2020 17:34:00 +0100 Subject: [PATCH 637/658] virtio-balloon: Fix memory leak when unloading while hinting is in progress When unloading the driver while hinting is in progress, we will not release the free page blocks back to MM, resulting in a memory leak. Fixes: 86a559787e6f ("virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT") Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Wei Wang Cc: Liang Li Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200205163402.42627-2-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 2457c54b6185..6d063eeaa08a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -970,6 +970,10 @@ static void remove_common(struct virtio_balloon *vb) leak_balloon(vb, vb->num_pages); update_balloon_size(vb); + /* There might be free pages that are being reported: release them. */ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + return_free_pages_to_mm(vb, ULONG_MAX); + /* Now we reset the device so we can clean up the queues. */ vb->vdev->config->reset(vb->vdev); From 1ad6f58ea9364b0a5d8ae06249653ac9304a8578 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 5 Feb 2020 17:34:01 +0100 Subject: [PATCH 638/658] virtio_balloon: Fix memory leaks on errors in virtballoon_probe() We forget to put the inode and unmount the kernfs used for compaction. Fixes: 71994620bb25 ("virtio_balloon: replace oom notifier with shrinker") Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Wei Wang Cc: Liang Li Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200205163402.42627-3-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6d063eeaa08a..7bfe365d9372 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -903,8 +903,7 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); - kern_unmount(balloon_mnt); - goto out_del_vqs; + goto out_kern_unmount; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif @@ -915,13 +914,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_del_vqs; + goto out_iput; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_del_vqs; + goto out_iput; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -955,6 +954,12 @@ static int virtballoon_probe(struct virtio_device *vdev) out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); +out_iput: +#ifdef CONFIG_BALLOON_COMPACTION + iput(vb->vb_dev_info.inode); +out_kern_unmount: + kern_unmount(balloon_mnt); +#endif out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: From 6d011d5057ff88ee556c000ac6fe0be23bdfcd72 Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Thu, 6 Feb 2020 15:40:53 +0530 Subject: [PATCH 639/658] ALSA: hda: Clear RIRB status before reading WP RIRB interrupt status getting cleared after the write pointer is read causes a race condition, where last response(s) into RIRB may remain unserviced by IRQ, eventually causing azx_rirb_get_response to fall back to polling mode. Clearing the RIRB interrupt status ahead of write pointer access ensures that this condition is avoided. Signed-off-by: Mohan Kumar Signed-off-by: Viswanath L Link: https://lore.kernel.org/r/1580983853-351-1-git-send-email-viswanathl@nvidia.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_controller.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 9757667cdd58..2609e391ce54 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -1110,16 +1110,23 @@ irqreturn_t azx_interrupt(int irq, void *dev_id) if (snd_hdac_bus_handle_stream_irq(bus, status, stream_update)) active = true; - /* clear rirb int */ status = azx_readb(chip, RIRBSTS); if (status & RIRB_INT_MASK) { + /* + * Clearing the interrupt status here ensures that no + * interrupt gets masked after the RIRB wp is read in + * snd_hdac_bus_update_rirb. This avoids a possible + * race condition where codec response in RIRB may + * remain unserviced by IRQ, eventually falling back + * to polling mode in azx_rirb_get_response. + */ + azx_writeb(chip, RIRBSTS, RIRB_INT_MASK); active = true; if (status & RIRB_INT_RESPONSE) { if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND) udelay(80); snd_hdac_bus_update_rirb(bus); } - azx_writeb(chip, RIRBSTS, RIRB_INT_MASK); } } while (active && ++repeat < 10); From fcf2736c82ca1908e3a0e74730c404baebd8ccdf Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Feb 2020 11:40:09 +0000 Subject: [PATCH 640/658] Revert "kdb: Get rid of confusing diag msg from "rd" if current task has no regs" This reverts commit bbfceba15f8d1260c328a254efc2b3f2deae4904. When DBG_MAX_REG_NUM is zero then a number of symbols are conditionally defined. It is therefore not possible to check it using C expressions. Reported-by: Anatoly Pugachev Acked-by: Doug Anderson Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_main.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index b22292b649c4..ba12e9f4661e 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -543,8 +543,9 @@ int kdbgetaddrarg(int argc, const char **argv, int *nextarg, if (diag) return diag; } else if (symname[0] == '%') { - if (kdb_check_regs()) - return 0; + diag = kdb_check_regs(); + if (diag) + return diag; /* Implement register values with % at a later time as it is * arch optional. */ @@ -1835,7 +1836,8 @@ static int kdb_go(int argc, const char **argv) */ static int kdb_rd(int argc, const char **argv) { - int len = 0; + int len = kdb_check_regs(); +#if DBG_MAX_REG_NUM > 0 int i; char *rname; int rsize; @@ -1844,14 +1846,8 @@ static int kdb_rd(int argc, const char **argv) u16 reg16; u8 reg8; - if (kdb_check_regs()) - return 0; - - /* Fallback to Linux showregs() if we don't have DBG_MAX_REG_NUM */ - if (DBG_MAX_REG_NUM <= 0) { - kdb_dumpregs(kdb_current_regs); - return 0; - } + if (len) + return len; for (i = 0; i < DBG_MAX_REG_NUM; i++) { rsize = dbg_reg_def[i].size * 2; @@ -1893,7 +1889,12 @@ static int kdb_rd(int argc, const char **argv) } } kdb_printf("\n"); +#else + if (len) + return len; + kdb_dumpregs(kdb_current_regs); +#endif return 0; } @@ -1927,8 +1928,9 @@ static int kdb_rm(int argc, const char **argv) if (diag) return diag; - if (kdb_check_regs()) - return 0; + diag = kdb_check_regs(); + if (diag) + return diag; diag = KDB_BADREG; for (i = 0; i < DBG_MAX_REG_NUM; i++) { From f658adeea45e430a24c7a157c3d5448925ac2038 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 6 Feb 2020 16:39:28 +0100 Subject: [PATCH 641/658] fix up iter on short count in fuse_direct_io() fuse_direct_io() can end up advancing the iterator by more than the amount of data read or written. This case is handled by the generic code if going through ->direct_IO(), but not in the FOPEN_DIRECT_IO case. Fix by reverting the extra bytes from the iterator in case of error or a short count. To test: install lxcfs, then the following testcase int fd = open("/var/lib/lxcfs/proc/uptime", O_RDONLY); sendfile(1, fd, NULL, 16777216); sendfile(1, fd, NULL, 16777216); will spew WARN_ON() in iov_iter_pipe(). Reported-by: Peter Geis Reported-by: Al Viro Fixes: 3c3db095b68c ("fuse: use iov_iter based generic splice helpers") Cc: # v5.1 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ce715380143c..695369f46f92 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1465,6 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, } ia = NULL; if (nres < 0) { + iov_iter_revert(iter, nbytes); err = nres; break; } @@ -1473,8 +1474,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, count -= nres; res += nres; pos += nres; - if (nres != nbytes) + if (nres != nbytes) { + iov_iter_revert(iter, nbytes - nres); break; + } if (count) { max_pages = iov_iter_npages(iter, fc->max_pages); ia = fuse_io_alloc(io, max_pages); From 2f1398291bf35fe027914ae7a9610d8e601fbfde Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 6 Feb 2020 16:39:28 +0100 Subject: [PATCH 642/658] fuse: don't overflow LLONG_MAX with end offset Handle the special case of fuse_readpages() wanting to read the last page of a hugest file possible and overflowing the end offset in the process. This is basically to unbreak xfstests:generic/525 and prevent filesystems from doing bad things with an overflowing offset. Reported-by: Xiao Yang Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 695369f46f92..3dd37a998ea9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -803,6 +803,10 @@ static int fuse_do_readpage(struct file *file, struct page *page) attr_ver = fuse_get_attr_version(fc); + /* Don't overflow end offset */ + if (pos + (desc.length - 1) == LLONG_MAX) + desc.length--; + fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); res = fuse_simple_request(fc, &ia.ap.args); if (res < 0) @@ -888,6 +892,14 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) ap->args.out_pages = true; ap->args.page_zeroing = true; ap->args.page_replace = true; + + /* Don't overflow end offset */ + if (pos + (count - 1) == LLONG_MAX) { + count--; + ap->descs[ap->num_pages - 1].length--; + } + WARN_ON((loff_t) (pos + count) < 0); + fuse_read_args_fill(ia, file, pos, count, FUSE_READ); ia->read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { From 519525fa47b5a8155f0b203e49a3a6a2319f75ae Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 5 Feb 2020 08:15:46 -0500 Subject: [PATCH 643/658] fuse: Support RENAME_WHITEOUT flag Allow fuse to pass RENAME_WHITEOUT to fuse server. Overlayfs on top of virtiofs uses RENAME_WHITEOUT. Without this patch renaming a directory in overlayfs (dir is on lower) fails with -EINVAL. With this patch it works. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ee190119f45c..de1e2fde60bd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -818,7 +818,7 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; if (flags) { From 8ab13bca428bb98e98d1933eaeacc90e80cf6192 Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Wed, 29 Jan 2020 02:06:21 -0300 Subject: [PATCH 644/658] Documentation: filesystems: convert fuse to RST Converts fuse.txt to reStructuredText format, improving the presentation without changing much of the underlying content. Signed-off-by: Daniel W. S. Almeida Signed-off-by: Miklos Szeredi --- .../filesystems/{fuse.txt => fuse.rst} | 163 ++++++++---------- Documentation/filesystems/index.rst | 1 + MAINTAINERS | 2 +- 3 files changed, 75 insertions(+), 91 deletions(-) rename Documentation/filesystems/{fuse.txt => fuse.rst} (80%) diff --git a/Documentation/filesystems/fuse.txt b/Documentation/filesystems/fuse.rst similarity index 80% rename from Documentation/filesystems/fuse.txt rename to Documentation/filesystems/fuse.rst index 13af4a49e7db..8e455065ce9e 100644 --- a/Documentation/filesystems/fuse.txt +++ b/Documentation/filesystems/fuse.rst @@ -1,41 +1,40 @@ +.. SPDX-License-Identifier: GPL-2.0 +============== +FUSE +============== + Definitions -~~~~~~~~~~~ +=========== Userspace filesystem: - A filesystem in which data and metadata are provided by an ordinary userspace process. The filesystem can be accessed normally through the kernel interface. Filesystem daemon: - The process(es) providing the data and metadata of the filesystem. Non-privileged mount (or user mount): - A userspace filesystem mounted by a non-privileged (non-root) user. The filesystem daemon is running with the privileges of the mounting user. NOTE: this is not the same as mounts allowed with the "user" option in /etc/fstab, which is not discussed here. Filesystem connection: - A connection between the filesystem daemon and the kernel. The connection exists until either the daemon dies, or the filesystem is umounted. Note that detaching (or lazy umounting) the filesystem - does _not_ break the connection, in this case it will exist until + does *not* break the connection, in this case it will exist until the last reference to the filesystem is released. Mount owner: - The user who does the mounting. User: - The user who is performing filesystem operations. What is FUSE? -~~~~~~~~~~~~~ +============= FUSE is a userspace filesystem framework. It consists of a kernel module (fuse.ko), a userspace library (libfuse.*) and a mount utility @@ -46,50 +45,41 @@ non-privileged mounts. This opens up new possibilities for the use of filesystems. A good example is sshfs: a secure network filesystem using the sftp protocol. -The userspace library and utilities are available from the FUSE -homepage: - - http://fuse.sourceforge.net/ +The userspace library and utilities are available from the +`FUSE homepage: `_ Filesystem type -~~~~~~~~~~~~~~~ +=============== The filesystem type given to mount(2) can be one of the following: -'fuse' + fuse + This is the usual way to mount a FUSE filesystem. The first + argument of the mount system call may contain an arbitrary string, + which is not interpreted by the kernel. - This is the usual way to mount a FUSE filesystem. The first - argument of the mount system call may contain an arbitrary string, - which is not interpreted by the kernel. - -'fuseblk' - - The filesystem is block device based. The first argument of the - mount system call is interpreted as the name of the device. + fuseblk + The filesystem is block device based. The first argument of the + mount system call is interpreted as the name of the device. Mount options -~~~~~~~~~~~~~ - -'fd=N' +============= +fd=N The file descriptor to use for communication between the userspace filesystem and the kernel. The file descriptor must have been obtained by opening the FUSE device ('/dev/fuse'). -'rootmode=M' - +rootmode=M The file mode of the filesystem's root in octal representation. -'user_id=N' - +user_id=N The numeric user id of the mount owner. -'group_id=N' - +group_id=N The numeric group id of the mount owner. -'default_permissions' - +default_permissions By default FUSE doesn't check file access permissions, the filesystem is free to implement its access policy or leave it to the underlying file access mechanism (e.g. in case of network @@ -97,28 +87,25 @@ Mount options access based on file mode. It is usually useful together with the 'allow_other' mount option. -'allow_other' - +allow_other This option overrides the security measure restricting file access to the user mounting the filesystem. This option is by default only allowed to root, but this restriction can be removed with a (userspace) configuration option. -'max_read=N' - +max_read=N With this option the maximum size of read operations can be set. The default is infinite. Note that the size of read requests is limited anyway to 32 pages (which is 128kbyte on i386). -'blksize=N' - +blksize=N Set the block size for the filesystem. The default is 512. This option is only valid for 'fuseblk' type mounts. Control filesystem -~~~~~~~~~~~~~~~~~~ +================== -There's a control filesystem for FUSE, which can be mounted by: +There's a control filesystem for FUSE, which can be mounted by:: mount -t fusectl none /sys/fs/fuse/connections @@ -130,53 +117,51 @@ named by a unique number. For each connection the following files exist within this directory: - 'waiting' + waiting + The number of requests which are waiting to be transferred to + userspace or being processed by the filesystem daemon. If there is + no filesystem activity and 'waiting' is non-zero, then the + filesystem is hung or deadlocked. - The number of requests which are waiting to be transferred to - userspace or being processed by the filesystem daemon. If there is - no filesystem activity and 'waiting' is non-zero, then the - filesystem is hung or deadlocked. - - 'abort' - - Writing anything into this file will abort the filesystem - connection. This means that all waiting requests will be aborted an - error returned for all aborted and new requests. + abort + Writing anything into this file will abort the filesystem + connection. This means that all waiting requests will be aborted an + error returned for all aborted and new requests. Only the owner of the mount may read or write these files. Interrupting filesystem operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +################################## If a process issuing a FUSE filesystem request is interrupted, the following will happen: - 1) If the request is not yet sent to userspace AND the signal is + - If the request is not yet sent to userspace AND the signal is fatal (SIGKILL or unhandled fatal signal), then the request is dequeued and returns immediately. - 2) If the request is not yet sent to userspace AND the signal is not - fatal, then an 'interrupted' flag is set for the request. When + - If the request is not yet sent to userspace AND the signal is not + fatal, then an interrupted flag is set for the request. When the request has been successfully transferred to userspace and this flag is set, an INTERRUPT request is queued. - 3) If the request is already sent to userspace, then an INTERRUPT + - If the request is already sent to userspace, then an INTERRUPT request is queued. INTERRUPT requests take precedence over other requests, so the userspace filesystem will receive queued INTERRUPTs before any others. The userspace filesystem may ignore the INTERRUPT requests entirely, -or may honor them by sending a reply to the _original_ request, with +or may honor them by sending a reply to the *original* request, with the error set to EINTR. It is also possible that there's a race between processing the original request and its INTERRUPT request. There are two possibilities: - 1) The INTERRUPT request is processed before the original request is + 1. The INTERRUPT request is processed before the original request is processed - 2) The INTERRUPT request is processed after the original request has + 2. The INTERRUPT request is processed after the original request has been answered If the filesystem cannot find the original request, it should wait for @@ -186,7 +171,7 @@ should reply to the INTERRUPT request with an EAGAIN error. In case reply will be ignored. Aborting a filesystem connection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +================================ It is possible to get into certain situations where the filesystem is not responding. Reasons for this may be: @@ -216,7 +201,7 @@ the filesystem. There are several ways to do this: powerful method, always works. How do non-privileged mounts work? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +================================== Since the mount() system call is a privileged operation, a helper program (fusermount) is needed, which is installed setuid root. @@ -235,15 +220,13 @@ system. Obvious requirements arising from this are: other users' or the super user's processes How are requirements fulfilled? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=============================== A) The mount owner could gain elevated privileges by either: - 1) creating a filesystem containing a device file, then opening - this device + 1. creating a filesystem containing a device file, then opening this device - 2) creating a filesystem containing a suid or sgid application, - then executing this application + 2. creating a filesystem containing a suid or sgid application, then executing this application The solution is not to allow opening device files and ignore setuid and setgid bits when executing programs. To ensure this @@ -275,16 +258,16 @@ How are requirements fulfilled? of other users' processes. i) It can slow down or indefinitely delay the execution of a - filesystem operation creating a DoS against the user or the - whole system. For example a suid application locking a - system file, and then accessing a file on the mount owner's - filesystem could be stopped, and thus causing the system - file to be locked forever. + filesystem operation creating a DoS against the user or the + whole system. For example a suid application locking a + system file, and then accessing a file on the mount owner's + filesystem could be stopped, and thus causing the system + file to be locked forever. ii) It can present files or directories of unlimited length, or - directory structures of unlimited depth, possibly causing a - system process to eat up diskspace, memory or other - resources, again causing DoS. + directory structures of unlimited depth, possibly causing a + system process to eat up diskspace, memory or other + resources, again causing *DoS*. The solution to this as well as B) is not to allow processes to access the filesystem, which could otherwise not be @@ -294,28 +277,27 @@ How are requirements fulfilled? ptrace can be used to check if a process is allowed to access the filesystem or not. - Note that the ptrace check is not strictly necessary to + Note that the *ptrace* check is not strictly necessary to prevent B/2/i, it is enough to check if mount owner has enough privilege to send signal to the process accessing the - filesystem, since SIGSTOP can be used to get a similar effect. + filesystem, since *SIGSTOP* can be used to get a similar effect. I think these limitations are unacceptable? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=========================================== If a sysadmin trusts the users enough, or can ensure through other measures, that system processes will never enter non-privileged -mounts, it can relax the last limitation with a "user_allow_other" +mounts, it can relax the last limitation with a 'user_allow_other' config option. If this config option is set, the mounting user can -add the "allow_other" mount option which disables the check for other +add the 'allow_other' mount option which disables the check for other users' processes. Kernel - userspace interface -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +============================ The following diagram shows how a filesystem operation (in this -example unlink) is performed in FUSE. +example unlink) is performed in FUSE. :: -NOTE: everything in this description is greatly simplified | "rm /mnt/fuse/file" | FUSE filesystem daemon | | @@ -357,12 +339,13 @@ NOTE: everything in this description is greatly simplified | From cabdb4fa2f666fad21b21b04c84709204f60af21 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Tue, 14 Jan 2020 20:39:45 +0800 Subject: [PATCH 645/658] fuse: use true,false for bool variable Fixes coccicheck warning: fs/fuse/readdir.c:335:1-19: WARNING: Assignment of 0/1 to bool variable fs/fuse/file.c:1398:2-19: WARNING: Assignment of 0/1 to bool variable fs/fuse/file.c:1400:2-20: WARNING: Assignment of 0/1 to bool variable fs/fuse/cuse.c:454:1-20: WARNING: Assignment of 0/1 to bool variable fs/fuse/cuse.c:455:1-19: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:497:2-17: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:504:2-23: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:511:2-22: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:518:2-23: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:522:2-26: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:526:2-18: WARNING: Assignment of 0/1 to bool variable fs/fuse/inode.c:1000:1-20: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 4 ++-- fs/fuse/file.c | 4 ++-- fs/fuse/inode.c | 14 +++++++------- fs/fuse/readdir.c | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 00015d851382..030f094910c3 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -451,8 +451,8 @@ static int cuse_send_init(struct cuse_conn *cc) ap->args.out_args[0].size = sizeof(ia->out); ap->args.out_args[0].value = &ia->out; ap->args.out_args[1].size = CUSE_INIT_INFO_MAX; - ap->args.out_argvar = 1; - ap->args.out_pages = 1; + ap->args.out_argvar = true; + ap->args.out_pages = true; ap->num_pages = 1; ap->pages = &ia->page; ap->descs = &ia->desc; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3dd37a998ea9..9d67b830fb7a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1409,9 +1409,9 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, } if (write) - ap->args.in_pages = 1; + ap->args.in_pages = true; else - ap->args.out_pages = 1; + ap->args.out_pages = true; *nbytesp = nbytes; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 16aec32f7f3d..77fef29ebe4f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -494,36 +494,36 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) case OPT_FD: ctx->fd = result.uint_32; - ctx->fd_present = 1; + ctx->fd_present = true; break; case OPT_ROOTMODE: if (!fuse_valid_type(result.uint_32)) return invalf(fc, "fuse: Invalid rootmode"); ctx->rootmode = result.uint_32; - ctx->rootmode_present = 1; + ctx->rootmode_present = true; break; case OPT_USER_ID: ctx->user_id = make_kuid(fc->user_ns, result.uint_32); if (!uid_valid(ctx->user_id)) return invalf(fc, "fuse: Invalid user_id"); - ctx->user_id_present = 1; + ctx->user_id_present = true; break; case OPT_GROUP_ID: ctx->group_id = make_kgid(fc->user_ns, result.uint_32); if (!gid_valid(ctx->group_id)) return invalf(fc, "fuse: Invalid group_id"); - ctx->group_id_present = 1; + ctx->group_id_present = true; break; case OPT_DEFAULT_PERMISSIONS: - ctx->default_permissions = 1; + ctx->default_permissions = true; break; case OPT_ALLOW_OTHER: - ctx->allow_other = 1; + ctx->allow_other = true; break; case OPT_MAX_READ: @@ -997,7 +997,7 @@ void fuse_send_init(struct fuse_conn *fc) /* Variable length argument used for backward compatibility with interface version < 7.5. Rest of init_out is zeroed by do_get_request(), so a short reply is not a problem */ - ia->args.out_argvar = 1; + ia->args.out_argvar = true; ia->args.out_args[0].size = sizeof(ia->out); ia->args.out_args[0].value = &ia->out; ia->args.force = true; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 6a40f75a0d25..90e3f01bd796 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -332,7 +332,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) return -ENOMEM; plus = fuse_use_readdirplus(inode, ctx); - ap->args.out_pages = 1; + ap->args.out_pages = true; ap->num_pages = 1; ap->pages = &page; ap->descs = &desc; From a9ceb060b3cf37987b6162223575eaf4f4e0fc36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:00:21 -0500 Subject: [PATCH 646/658] nfsd: Fix a perf warning perf does not know how to deal with a __builtin_bswap32() call, and complains. All other functions just store the xid etc in host endian form, so let's do that in the tracepoint for nfsd_file_acquire too. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/trace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index ffc78a0e28b2..b073bdc2e6e8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -228,7 +228,7 @@ TRACE_EVENT(nfsd_file_acquire, TP_ARGS(rqstp, hash, inode, may_flags, nf, status), TP_STRUCT__entry( - __field(__be32, xid) + __field(u32, xid) __field(unsigned int, hash) __field(void *, inode) __field(unsigned int, may_flags) @@ -236,11 +236,11 @@ TRACE_EVENT(nfsd_file_acquire, __field(unsigned long, nf_flags) __field(unsigned char, nf_may) __field(struct file *, nf_file) - __field(__be32, status) + __field(u32, status) ), TP_fast_assign( - __entry->xid = rqstp->rq_xid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; @@ -248,15 +248,15 @@ TRACE_EVENT(nfsd_file_acquire, __entry->nf_flags = nf ? nf->nf_flags : 0; __entry->nf_may = nf ? nf->nf_may : 0; __entry->nf_file = nf ? nf->nf_file : NULL; - __entry->status = status; + __entry->status = be32_to_cpu(status); ), TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", - be32_to_cpu(__entry->xid), __entry->hash, __entry->inode, + __entry->xid, __entry->hash, __entry->inode, show_nf_may(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nf_may(__entry->nf_may), __entry->nf_file, - be32_to_cpu(__entry->status)) + __entry->status) ); DECLARE_EVENT_CLASS(nfsd_file_search_class, From c19285596de699e4602f9c89785e6b8c29422286 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:00:22 -0500 Subject: [PATCH 647/658] nfsd: Define the file access mode enum for tracing Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/trace.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index b073bdc2e6e8..17ecef404e5b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -166,6 +166,12 @@ DEFINE_STATEID_EVENT(layout_recall_done); DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_release); +TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); +TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); +TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); + #define show_nf_flags(val) \ __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ From 689827cd5bfe89e4900db7e1c0c713083a76d04c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:02:44 -0500 Subject: [PATCH 648/658] nfsd: convert file cache to use over/underflow safe refcount Use the 'refcount_t' type instead of 'atomic_t' for improved refcounting safety. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 23 +++++++++++------------ fs/nfsd/filecache.h | 4 ++-- fs/nfsd/trace.h | 4 ++-- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 2fadf080ac42..23c1fa5da1e9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -100,7 +100,7 @@ nfsd_file_mark_free(struct fsnotify_mark *mark) static struct nfsd_file_mark * nfsd_file_mark_get(struct nfsd_file_mark *nfm) { - if (!atomic_inc_not_zero(&nfm->nfm_ref)) + if (!refcount_inc_not_zero(&nfm->nfm_ref)) return NULL; return nfm; } @@ -108,8 +108,7 @@ nfsd_file_mark_get(struct nfsd_file_mark *nfm) static void nfsd_file_mark_put(struct nfsd_file_mark *nfm) { - if (atomic_dec_and_test(&nfm->nfm_ref)) { - + if (refcount_dec_and_test(&nfm->nfm_ref)) { fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); fsnotify_put_mark(&nfm->nfm_mark); } @@ -148,7 +147,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) return NULL; fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; - atomic_set(&new->nfm_ref, 1); + refcount_set(&new->nfm_ref, 1); err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); @@ -186,7 +185,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, nf->nf_flags = 0; nf->nf_inode = inode; nf->nf_hashval = hashval; - atomic_set(&nf->nf_ref, 1); + refcount_set(&nf->nf_ref, 1); nf->nf_may = may & NFSD_FILE_MAY_MASK; if (may & NFSD_MAY_NOT_BREAK_LEASE) { if (may & NFSD_MAY_WRITE) @@ -280,7 +279,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ - if (atomic_add_unless(&nf->nf_ref, -1, 1)) + if (refcount_dec_not_one(&nf->nf_ref)) return true; list_add(&nf->nf_lru, dispose); @@ -292,7 +291,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) { trace_nfsd_file_put(nf); - if (atomic_dec_and_test(&nf->nf_ref)) { + if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); nfsd_file_free(nf); } @@ -304,7 +303,7 @@ nfsd_file_put(struct nfsd_file *nf) bool is_hashed; set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (atomic_read(&nf->nf_ref) > 2 || !nf->nf_file) { + if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { nfsd_file_put_noref(nf); return; } @@ -321,7 +320,7 @@ nfsd_file_put(struct nfsd_file *nf) struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(atomic_inc_not_zero(&nf->nf_ref))) + if (likely(refcount_inc_not_zero(&nf->nf_ref))) return nf; return NULL; } @@ -347,7 +346,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); - if (!atomic_dec_and_test(&nf->nf_ref)) + if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) flush = true; @@ -430,7 +429,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (atomic_read(&nf->nf_ref) > 1) + if (refcount_read(&nf->nf_ref) > 1) goto out_skip; /* @@ -1019,7 +1018,7 @@ out: open_file: nf = new; /* Take reference for the hashtable */ - atomic_inc(&nf->nf_ref); + refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); list_lru_add(&nfsd_file_lru, &nf->nf_lru); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 986c325a54bd..7872df5a0fe3 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -19,7 +19,7 @@ */ struct nfsd_file_mark { struct fsnotify_mark nfm_mark; - atomic_t nfm_ref; + refcount_t nfm_ref; }; /* @@ -43,7 +43,7 @@ struct nfsd_file { unsigned long nf_flags; struct inode *nf_inode; unsigned int nf_hashval; - atomic_t nf_ref; + refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; struct rw_semaphore nf_rwsem; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 17ecef404e5b..06dd0d337049 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -201,7 +201,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class, TP_fast_assign( __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; - __entry->nf_ref = atomic_read(&nf->nf_ref); + __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; @@ -250,7 +250,7 @@ TRACE_EVENT(nfsd_file_acquire, __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; - __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0; + __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; __entry->nf_flags = nf ? nf->nf_flags : 0; __entry->nf_may = nf ? nf->nf_may : 0; __entry->nf_file = nf ? nf->nf_file : NULL; From 91fd3c3edcd7f223c5340a8324f67b03a85a73aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 13 Jan 2020 16:23:07 +0300 Subject: [PATCH 649/658] nfsd4: fix double free in nfsd4_do_async_copy() This frees "copy->nf_src" before and again after the goto. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dan Carpenter Reviewed-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e4ddaf87493a..0e75f7fb5fec 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1469,7 +1469,6 @@ static int nfsd4_do_async_copy(void *data) copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, ©->stateid); if (IS_ERR(copy->nf_src->nf_file)) { - kfree(copy->nf_src); copy->nfserr = nfserr_offload_denied; nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; From 7582026f6f3588ecebd281965c8a71aff6fb6158 Mon Sep 17 00:00:00 2001 From: Abhi Das Date: Tue, 4 Feb 2020 14:14:56 -0600 Subject: [PATCH 650/658] gfs2: fix gfs2_find_jhead that returns uninitialized jhead with seq 0 When the first log header in a journal happens to have a sequence number of 0, a bug in gfs2_find_jhead() causes it to prematurely exit, and return an uninitialized jhead with seq 0. This can cause failures in the caller. For instance, a mount fails in one test case. The correct behavior is for it to continue searching through the journal to find the correct journal head with the highest sequence number. Fixes: f4686c26ecc3 ("gfs2: read journal in large chunks") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Abhi Das Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index d9431724b788..c090d5ad3f22 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -422,7 +422,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { - if (lh.lh_sequence > head->lh_sequence) + if (lh.lh_sequence >= head->lh_sequence) *head = lh; else { ret = true; From 4c0e8dda608a51855225c611b5c6b442f95fbc56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Jan 2020 16:38:29 +0100 Subject: [PATCH 651/658] gfs2: move setting current->backing_dev_info Set current->backing_dev_info just around the buffered write calls to prepare for the next fix. Fixes: 967bcc91b044 ("gfs2: iomap direct I/O support") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Christoph Hellwig Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9d58295ccf7a..21d032c4b077 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -867,18 +867,15 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) - goto out; - - /* We can write back this queue in page reclaim */ - current->backing_dev_info = inode_to_bdi(inode); + goto out_unlock; ret = file_remove_privs(file); if (ret) - goto out2; + goto out_unlock; ret = file_update_time(file); if (ret) - goto out2; + goto out_unlock; if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; @@ -887,11 +884,13 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) written = gfs2_file_direct_write(iocb, from); if (written < 0 || !iov_iter_count(from)) - goto out2; + goto out_unlock; + current->backing_dev_info = inode_to_bdi(inode); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; if (unlikely(ret < 0)) - goto out2; + goto out_unlock; buffered = ret; /* @@ -915,14 +914,14 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) */ } } else { + current->backing_dev_info = inode_to_bdi(inode); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; if (likely(ret > 0)) iocb->ki_pos += ret; } -out2: - current->backing_dev_info = NULL; -out: +out_unlock: inode_unlock(inode); if (likely(ret > 0)) { /* Handle various SYNC-type writes */ From 2d9384ff91770a71bd1ff24c25952ef1187a0e9c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 4 Feb 2020 14:59:24 +0100 Subject: [PATCH 652/658] drm/tegra: Relax IOMMU usage criteria on old Tegra Older Tegra devices only allow addressing 32 bits of memory, so whether or not the host1x is attached to an IOMMU doesn't matter. host1x IOMMU attachment is only needed on devices that can address memory beyond the 32-bit boundary and where the host1x doesn't support the wide GATHER opcode that allows it to access buffers at higher addresses. Cc: # v5.5 Signed-off-by: Thierry Reding Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/drm.c | 49 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index aa9e49f04988..bd268028fb3d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1037,23 +1037,9 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, free_pages((unsigned long)virt, get_order(size)); } -static int host1x_drm_probe(struct host1x_device *dev) +static bool host1x_drm_wants_iommu(struct host1x_device *dev) { - struct drm_driver *driver = &tegra_drm_driver; struct iommu_domain *domain; - struct tegra_drm *tegra; - struct drm_device *drm; - int err; - - drm = drm_dev_alloc(driver, &dev->dev); - if (IS_ERR(drm)) - return PTR_ERR(drm); - - tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); - if (!tegra) { - err = -ENOMEM; - goto put; - } /* * If the Tegra DRM clients are backed by an IOMMU, push buffers are @@ -1082,9 +1068,38 @@ static int host1x_drm_probe(struct host1x_device *dev) * up the device tree appropriately. This is considered an problem * of integration, so care must be taken for the DT to be consistent. */ - domain = iommu_get_domain_for_dev(drm->dev->parent); + domain = iommu_get_domain_for_dev(dev->dev.parent); - if (domain && iommu_present(&platform_bus_type)) { + /* + * Tegra20 and Tegra30 don't support addressing memory beyond the + * 32-bit boundary, so the regular GATHER opcodes will always be + * sufficient and whether or not the host1x is attached to an IOMMU + * doesn't matter. + */ + if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32)) + return true; + + return domain != NULL; +} + +static int host1x_drm_probe(struct host1x_device *dev) +{ + struct drm_driver *driver = &tegra_drm_driver; + struct tegra_drm *tegra; + struct drm_device *drm; + int err; + + drm = drm_dev_alloc(driver, &dev->dev); + if (IS_ERR(drm)) + return PTR_ERR(drm); + + tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); + if (!tegra) { + err = -ENOMEM; + goto put; + } + + if (host1x_drm_wants_iommu(dev) && iommu_present(&platform_bus_type)) { tegra->domain = iommu_domain_alloc(&platform_bus_type); if (!tegra->domain) { err = -ENOMEM; From 273da5a046965ccf0ec79eb63f2d5173467e20fa Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 4 Feb 2020 14:59:25 +0100 Subject: [PATCH 653/658] drm/tegra: Reuse IOVA mapping where possible This partially reverts the DMA API support that was recently merged because it was causing performance regressions on older Tegra devices. Unfortunately, the cache maintenance performed by dma_map_sg() and dma_unmap_sg() causes performance to drop by a factor of 10. The right solution for this would be to cache mappings for buffers per consumer device, but that's a bit involved. Instead, we simply revert to the old behaviour of sharing IOVA mappings when we know that devices can do so (i.e. they share the same IOMMU domain). Cc: # v5.5 Reported-by: Dmitry Osipenko Signed-off-by: Thierry Reding Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/gem.c | 10 +++++++- drivers/gpu/drm/tegra/plane.c | 44 ++++++++++++++++++++--------------- drivers/gpu/host1x/job.c | 32 ++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index bc15b430156d..c46b4d4190ac 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -60,8 +60,16 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, /* * If we've manually mapped the buffer object through the IOMMU, make * sure to return the IOVA address of our mapping. + * + * Similarly, for buffers that have been allocated by the DMA API the + * physical address can be used for devices that are not attached to + * an IOMMU. For these devices, callers must pass a valid pointer via + * the @phys argument. + * + * Imported buffers were also already mapped at import time, so the + * existing mapping can be reused. */ - if (phys && obj->mm) { + if (phys) { *phys = obj->iova; return NULL; } diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index cadcdd9ea427..9ccfb56e9b01 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -3,6 +3,8 @@ * Copyright (C) 2017 NVIDIA CORPORATION. All rights reserved. */ +#include + #include #include #include @@ -107,21 +109,27 @@ const struct drm_plane_funcs tegra_plane_funcs = { static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev); unsigned int i; int err; for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); + dma_addr_t phys_addr, *phys; + struct sg_table *sgt; - if (!dc->client.group) { - struct sg_table *sgt; + if (!domain || dc->client.group) + phys = &phys_addr; + else + phys = NULL; - sgt = host1x_bo_pin(dc->dev, &bo->base, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + sgt = host1x_bo_pin(dc->dev, &bo->base, phys); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } + if (sgt) { err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (err == 0) { @@ -143,7 +151,7 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) state->iova[i] = sg_dma_address(sgt->sgl); state->sgt[i] = sgt; } else { - state->iova[i] = bo->iova; + state->iova[i] = phys_addr; } } @@ -156,9 +164,11 @@ unpin: struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); struct sg_table *sgt = state->sgt[i]; - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); + if (sgt) + dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + host1x_bo_unpin(dc->dev, &bo->base, sgt); state->iova[i] = DMA_MAPPING_ERROR; state->sgt[i] = NULL; } @@ -172,17 +182,13 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); + struct sg_table *sgt = state->sgt[i]; - if (!dc->client.group) { - struct sg_table *sgt = state->sgt[i]; - - if (sgt) { - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - } - } + if (sgt) + dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + host1x_bo_unpin(dc->dev, &bo->base, sgt); state->iova[i] = DMA_MAPPING_ERROR; state->sgt[i] = NULL; } diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 25ca54de8fc5..0d53c08e9972 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -101,9 +102,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; + struct iommu_domain *domain; unsigned int i; int err; + domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { @@ -117,7 +120,19 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - if (client->group) + /* + * If the client device is not attached to an IOMMU, the + * physical address of the buffer object can be used. + * + * Similarly, when an IOMMU domain is shared between all + * host1x clients, the IOVA is already available, so no + * need to map the buffer object again. + * + * XXX Note that this isn't always safe to do because it + * relies on an assumption that no cache maintenance is + * needed on the buffer objects. + */ + if (!domain || client->group) phys = &phys_addr; else phys = NULL; @@ -176,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; + dma_addr_t *phys; unsigned int j; g->bo = host1x_bo_get(g->bo); @@ -184,7 +200,17 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - sgt = host1x_bo_pin(host->dev, g->bo, NULL); + /** + * If the host1x is not attached to an IOMMU, there is no need + * to map the buffer object for the host1x, since the physical + * address can simply be used. + */ + if (!iommu_get_domain_for_dev(host->dev)) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(host->dev, g->bo, phys); if (IS_ERR(sgt)) { err = PTR_ERR(sgt); goto unpin; @@ -214,7 +240,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->unpins[job->num_unpins].size = gather_size; phys_addr = iova_dma_addr(&host->iova, alloc); - } else { + } else if (sgt) { err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (!err) { From 98ae41adb252866158dd4e998551dfa85e612bed Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 4 Feb 2020 14:59:26 +0100 Subject: [PATCH 654/658] gpu: host1x: Set DMA direction only for DMA-mapped buffer objects The DMA direction is only used by the DMA API, so there is no use in setting it when a buffer object isn't mapped with the DMA API. Signed-off-by: Thierry Reding Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko --- drivers/gpu/host1x/job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 0d53c08e9972..6c689b5cc32c 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -248,6 +248,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } + job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; job->unpins[job->num_unpins].dev = host->dev; phys_addr = sg_dma_address(sgt->sgl); } @@ -255,7 +256,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->addr_phys[job->num_unpins] = phys_addr; job->gather_addr_phys[i] = phys_addr; - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].sgt = sgt; job->num_unpins++; From 6e5e41e2dc4e4413296d5a4af54ac92d7cd52317 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 14 Jan 2020 17:12:18 +0100 Subject: [PATCH 655/658] gfs2: fix O_SYNC write handling In gfs2_file_write_iter, for direct writes, the error checking in the buffered write fallback case is incomplete. This can cause inode write errors to go undetected. Fix and clean up gfs2_file_write_iter along the way. Based on a proposed fix by Christoph Hellwig . Fixes: 967bcc91b044 ("gfs2: iomap direct I/O support") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 51 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 21d032c4b077..cb26be6f4351 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -847,7 +847,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); - ssize_t written = 0, ret; + ssize_t ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -879,55 +879,46 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; - loff_t pos, endbyte; - ssize_t buffered; + ssize_t buffered, ret2; - written = gfs2_file_direct_write(iocb, from); - if (written < 0 || !iov_iter_count(from)) + ret = gfs2_file_direct_write(iocb, from); + if (ret < 0 || !iov_iter_count(from)) goto out_unlock; + iocb->ki_flags |= IOCB_DSYNC; current->backing_dev_info = inode_to_bdi(inode); - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); current->backing_dev_info = NULL; - if (unlikely(ret < 0)) + if (unlikely(buffered <= 0)) goto out_unlock; - buffered = ret; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT - * semantics. + * semantics. If the writeback or invalidate fails, only report + * the direct I/O range as we don't know if the buffered pages + * made it to disk. */ - pos = iocb->ki_pos; - endbyte = pos + buffered - 1; - ret = filemap_write_and_wait_range(mapping, pos, endbyte); - if (!ret) { - iocb->ki_pos += buffered; - written += buffered; - invalidate_mapping_pages(mapping, - pos >> PAGE_SHIFT, - endbyte >> PAGE_SHIFT); - } else { - /* - * We don't know how much we wrote, so just return - * the number of bytes which were direct-written - */ - } + iocb->ki_pos += buffered; + ret2 = generic_write_sync(iocb, buffered); + invalidate_mapping_pages(mapping, + (iocb->ki_pos - buffered) >> PAGE_SHIFT, + (iocb->ki_pos - 1) >> PAGE_SHIFT); + if (!ret || ret2 > 0) + ret += ret2; } else { current->backing_dev_info = inode_to_bdi(inode); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); current->backing_dev_info = NULL; - if (likely(ret > 0)) + if (likely(ret > 0)) { iocb->ki_pos += ret; + ret = generic_write_sync(iocb, ret); + } } out_unlock: inode_unlock(inode); - if (likely(ret > 0)) { - /* Handle various SYNC-type writes */ - ret = generic_write_sync(iocb, ret); - } - return written ? written : ret; + return ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, From 50d0def966a5f1237ba647e827a945e8ece4c10b Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Mon, 3 Feb 2020 09:43:57 +0800 Subject: [PATCH 656/658] nfsd: make nfsd_filecache_wq variable static Fix sparse warning: fs/nfsd/filecache.c:55:25: warning: symbol 'nfsd_filecache_wq' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Chen Zhou Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 23c1fa5da1e9..22e77ede9f14 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -52,7 +52,7 @@ struct nfsd_fcache_disposal { struct rcu_head rcu; }; -struct workqueue_struct *nfsd_filecache_wq __read_mostly; +static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; From 3d96208c30f84d6edf9ab4fac813306ac0d20c10 Mon Sep 17 00:00:00 2001 From: Roberto Bergantinos Corpas Date: Tue, 4 Feb 2020 11:32:56 +0100 Subject: [PATCH 657/658] sunrpc: expiry_time should be seconds not timeval When upcalling gssproxy, cache_head.expiry_time is set as a timeval, not seconds since boot. As such, RPC cache expiry logic will not clean expired objects created under auth.rpcsec.context cache. This has proven to cause kernel memory leaks on field. Using 64 bit variants of getboottime/timespec Expiration times have worked this way since 2010's c5b29f885afe "sunrpc: use seconds since boot in expiry cache". The gssproxy code introduced in 2012 added gss_proxy_save_rsc and introduced the bug. That's a while for this to lurk, but it required a bit of an extreme case to make it obvious. Signed-off-by: Roberto Bergantinos Corpas Cc: stable@vger.kernel.org Fixes: 030d794bf498 "SUNRPC: Use gssproxy upcall for server..." Tested-By: Frank Sorenson Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c62d1f10978b..cff77f096647 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1248,6 +1248,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, dprintk("RPC: No creds found!\n"); goto out; } else { + struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; @@ -1268,6 +1269,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, &expiry, GFP_KERNEL); if (status) goto out; + + getboottime64(&boot); + expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry; From 0ddad21d3e99c743a3aa473121dc5561679e26bb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Dec 2019 09:48:27 -0800 Subject: [PATCH 658/658] pipe: use exclusive waits when reading or writing This makes the pipe code use separate wait-queues and exclusive waiting for readers and writers, avoiding a nasty thundering herd problem when there are lots of readers waiting for data on a pipe (or, less commonly, lots of writers waiting for a pipe to have space). While this isn't a common occurrence in the traditional "use a pipe as a data transport" case, where you typically only have a single reader and a single writer process, there is one common special case: using a pipe as a source of "locking tokens" rather than for data communication. In particular, the GNU make jobserver code ends up using a pipe as a way to limit parallelism, where each job consumes a token by reading a byte from the jobserver pipe, and releases the token by writing a byte back to the pipe. This pattern is fairly traditional on Unix, and works very well, but will waste a lot of time waking up a lot of processes when only a single reader needs to be woken up when a writer releases a new token. A simplified test-case of just this pipe interaction is to create 64 processes, and then pass a single token around between them (this test-case also intentionally passes another token that gets ignored to test the "wake up next" logic too, in case anybody wonders about it): #include int main(int argc, char **argv) { int fd[2], counters[2]; pipe(fd); counters[0] = 0; counters[1] = -1; write(fd[1], counters, sizeof(counters)); /* 64 processes */ fork(); fork(); fork(); fork(); fork(); fork(); do { int i; read(fd[0], &i, sizeof(i)); if (i < 0) continue; counters[0] = i+1; write(fd[1], counters, (1+(i & 1)) *sizeof(int)); } while (counters[0] < 1000000); return 0; } and in a perfect world, passing that token around should only cause one context switch per transfer, when the writer of a token causes a directed wakeup of just a single reader. But with the "writer wakes all readers" model we traditionally had, on my test box the above case causes more than an order of magnitude more scheduling: instead of the expected ~1M context switches, "perf stat" shows 231,852.37 msec task-clock # 15.857 CPUs utilized 11,250,961 context-switches # 0.049 M/sec 616,304 cpu-migrations # 0.003 M/sec 1,648 page-faults # 0.007 K/sec 1,097,903,998,514 cycles # 4.735 GHz 120,781,778,352 instructions # 0.11 insn per cycle 27,997,056,043 branches # 120.754 M/sec 283,581,233 branch-misses # 1.01% of all branches 14.621273891 seconds time elapsed 0.018243000 seconds user 3.611468000 seconds sys before this commit. After this commit, I get 5,229.55 msec task-clock # 3.072 CPUs utilized 1,212,233 context-switches # 0.232 M/sec 103,951 cpu-migrations # 0.020 M/sec 1,328 page-faults # 0.254 K/sec 21,307,456,166 cycles # 4.074 GHz 12,947,819,999 instructions # 0.61 insn per cycle 2,881,985,678 branches # 551.096 M/sec 64,267,015 branch-misses # 2.23% of all branches 1.702148350 seconds time elapsed 0.004868000 seconds user 0.110786000 seconds sys instead. Much better. [ Note! This kernel improvement seems to be very good at triggering a race condition in the make jobserver (in GNU make 4.2.1) for me. It's a long known bug that was fixed back in June 2017 by GNU make commit b552b0525198 ("[SV 51159] Use a non-blocking read with pselect to avoid hangs."). But there wasn't a new release of GNU make until 4.3 on Jan 19 2020, so a number of distributions may still have the buggy version. Some have backported the fix to their 4.2.1 release, though, and even without the fix it's quite timing-dependent whether the bug actually is hit. ] Josh Triplett says: "I've been hammering on your pipe fix patch (switching to exclusive wait queues) for a month or so, on several different systems, and I've run into no issues with it. The patch *substantially* improves parallel build times on large (~100 CPU) systems, both with parallel make and with other things that use make's pipe-based jobserver. All current distributions (including stable and long-term stable distributions) have versions of GNU make that no longer have the jobserver bug" Tested-by: Josh Triplett Signed-off-by: Linus Torvalds --- fs/coredump.c | 4 +-- fs/pipe.c | 67 +++++++++++++++++++++++++-------------- fs/splice.c | 8 ++--- include/linux/pipe_fs_i.h | 2 +- 4 files changed, 51 insertions(+), 30 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index b1ea7dfbd149..f8296a82d01d 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -517,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file) pipe_lock(pipe); pipe->readers++; pipe->writers--; - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); pipe_unlock(pipe); @@ -525,7 +525,7 @@ static void wait_for_dump_helpers(struct file *file) * We actually want wait_event_freezable() but then we need * to clear TIF_SIGPENDING and improve dump_interrupted(). */ - wait_event_interruptible(pipe->wait, pipe->readers == 1); + wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); pipe_lock(pipe); pipe->readers--; diff --git a/fs/pipe.c b/fs/pipe.c index 57502c3c0fba..5a34d6c22d4c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -108,16 +108,19 @@ void pipe_double_lock(struct pipe_inode_info *pipe1, /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { - DEFINE_WAIT(wait); + DEFINE_WAIT(rdwait); + DEFINE_WAIT(wrwait); /* * Pipes are system-local resources, so sleeping on them * is considered a noninteractive wait: */ - prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); + prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE); pipe_unlock(pipe); schedule(); - finish_wait(&pipe->wait, &wait); + finish_wait(&pipe->rd_wait, &rdwait); + finish_wait(&pipe->wr_wait, &wrwait); pipe_lock(pipe); } @@ -286,7 +289,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) size_t total_len = iov_iter_count(to); struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; - bool was_full; + bool was_full, wake_next_reader = false; ssize_t ret; /* Null read succeeds. */ @@ -344,10 +347,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (!buf->len) { pipe_buf_release(pipe, buf); - spin_lock_irq(&pipe->wait.lock); + spin_lock_irq(&pipe->rd_wait.lock); tail++; pipe->tail = tail; - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); } total_len -= chars; if (!total_len) @@ -384,7 +387,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * no data. */ if (unlikely(was_full)) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -394,18 +397,23 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * since we've done any required wakeups and there's no need * to mark anything accessed. And we've dropped the lock. */ - if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0) + if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) return -ERESTARTSYS; __pipe_lock(pipe); was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); + wake_next_reader = true; } + if (pipe_empty(pipe->head, pipe->tail)) + wake_next_reader = false; __pipe_unlock(pipe); if (was_full) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } + if (wake_next_reader) + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); if (ret > 0) file_accessed(filp); return ret; @@ -437,6 +445,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) size_t total_len = iov_iter_count(from); ssize_t chars; bool was_empty = false; + bool wake_next_writer = false; /* Null write succeeds. */ if (unlikely(total_len == 0)) @@ -515,16 +524,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * it, either the reader will consume it or it'll still * be there for the next write. */ - spin_lock_irq(&pipe->wait.lock); + spin_lock_irq(&pipe->rd_wait.lock); head = pipe->head; if (pipe_full(head, pipe->tail, pipe->max_usage)) { - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); continue; } pipe->head = head + 1; - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); /* Insert it into the buffer array */ buf = &pipe->bufs[head & mask]; @@ -576,14 +585,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) */ __pipe_unlock(pipe); if (was_empty) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - wait_event_interruptible(pipe->wait, pipe_writable(pipe)); + wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); __pipe_lock(pipe); was_empty = pipe_empty(pipe->head, pipe->tail); + wake_next_writer = true; } out: + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + wake_next_writer = false; __pipe_unlock(pipe); /* @@ -596,9 +608,11 @@ out: * wake up pending jobs */ if (was_empty) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } + if (wake_next_writer) + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { int err = file_update_time(filp); if (err) @@ -642,12 +656,15 @@ pipe_poll(struct file *filp, poll_table *wait) unsigned int head, tail; /* - * Reading only -- no need for acquiring the semaphore. + * Reading pipe state only -- no need for acquiring the semaphore. * * But because this is racy, the code has to add the * entry to the poll table _first_ .. */ - poll_wait(filp, &pipe->wait, wait); + if (filp->f_mode & FMODE_READ) + poll_wait(filp, &pipe->rd_wait, wait); + if (filp->f_mode & FMODE_WRITE) + poll_wait(filp, &pipe->wr_wait, wait); /* * .. and only then can you do the racy tests. That way, @@ -706,7 +723,8 @@ pipe_release(struct inode *inode, struct file *file) pipe->writers--; if (pipe->readers || pipe->writers) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -789,7 +807,8 @@ struct pipe_inode_info *alloc_pipe_info(void) GFP_KERNEL_ACCOUNT); if (pipe->bufs) { - init_waitqueue_head(&pipe->wait); + init_waitqueue_head(&pipe->rd_wait); + init_waitqueue_head(&pipe->wr_wait); pipe->r_counter = pipe->w_counter = 1; pipe->max_usage = pipe_bufs; pipe->ring_size = pipe_bufs; @@ -1007,7 +1026,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) static void wake_up_partner(struct pipe_inode_info *pipe) { - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->rd_wait); + wake_up_interruptible(&pipe->wr_wait); } static int fifo_open(struct inode *inode, struct file *filp) @@ -1118,13 +1138,13 @@ static int fifo_open(struct inode *inode, struct file *filp) err_rd: if (!--pipe->readers) - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->wr_wait); ret = -ERESTARTSYS; goto err; err_wr: if (!--pipe->writers) - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->rd_wait); ret = -ERESTARTSYS; goto err; @@ -1251,7 +1271,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; - wake_up_interruptible_all(&pipe->wait); + wake_up_interruptible_all(&pipe->rd_wait); + wake_up_interruptible_all(&pipe->wr_wait); return pipe->max_usage * PAGE_SIZE; out_revert_acct: diff --git a/fs/splice.c b/fs/splice.c index 3009652a41c8..d671936d0aad 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -165,8 +165,8 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { static void wakeup_pipe_readers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->rd_wait)) + wake_up_interruptible(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -462,8 +462,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->wr_wait)) + wake_up_interruptible(&pipe->wr_wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index dbcfa6892384..d5765039652a 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -47,7 +47,7 @@ struct pipe_buffer { **/ struct pipe_inode_info { struct mutex mutex; - wait_queue_head_t wait; + wait_queue_head_t rd_wait, wr_wait; unsigned int head; unsigned int tail; unsigned int max_usage;