diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 858b3339f381..662660852397 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -76,3 +76,20 @@ config EROFS_FS_ZIP If you don't want to enable compression feature, say N. +config EROFS_FS_PCPU_KTHREAD + bool "EROFS per-cpu decompression kthread workers" + depends on EROFS_FS_ZIP + help + Saying Y here enables per-CPU kthread workers pool to carry out + async decompression for low latencies on some architectures. + + If unsure, say N. + +config EROFS_FS_PCPU_KTHREAD_HIPRI + bool "EROFS high priority per-CPU kthread workers" + depends on EROFS_FS_ZIP && EROFS_FS_PCPU_KTHREAD + help + This permits EROFS to configure per-CPU kthread workers to run + at higher priority. + + If unsure, say N. diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 7596db389201..9a7318251445 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -7,7 +7,7 @@ #include "zdata.h" #include "compress.h" #include - +#include #include /* @@ -125,24 +125,128 @@ typedef tagptr1_t compressed_page_t; static struct workqueue_struct *z_erofs_workqueue __read_mostly; -void z_erofs_exit_zip_subsystem(void) +#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD +static struct kthread_worker __rcu **z_erofs_pcpu_workers; + +static void erofs_destroy_percpu_workers(void) { - destroy_workqueue(z_erofs_workqueue); - z_erofs_destroy_pcluster_pool(); + struct kthread_worker *worker; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + worker = rcu_dereference_protected( + z_erofs_pcpu_workers[cpu], 1); + rcu_assign_pointer(z_erofs_pcpu_workers[cpu], NULL); + if (worker) + kthread_destroy_worker(worker); + } + kfree(z_erofs_pcpu_workers); } -static inline int z_erofs_init_workqueue(void) +static struct kthread_worker *erofs_init_percpu_worker(int cpu) { - const unsigned int onlinecpus = num_possible_cpus(); + struct kthread_worker *worker = + kthread_create_worker_on_cpu(cpu, 0, "erofs_worker/%u", cpu); - /* - * no need to spawn too many threads, limiting threads could minimum - * scheduling overhead, perhaps per-CPU threads should be better? - */ - z_erofs_workqueue = alloc_workqueue("erofs_unzipd", - WQ_UNBOUND | WQ_HIGHPRI, - onlinecpus + onlinecpus / 4); - return z_erofs_workqueue ? 0 : -ENOMEM; + if (IS_ERR(worker)) + return worker; + if (IS_ENABLED(CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI)) + sched_set_fifo_low(worker->task); + else + sched_set_normal(worker->task, 0); + return worker; +} + +static int erofs_init_percpu_workers(void) +{ + struct kthread_worker *worker; + unsigned int cpu; + + z_erofs_pcpu_workers = kcalloc(num_possible_cpus(), + sizeof(struct kthread_worker *), GFP_ATOMIC); + if (!z_erofs_pcpu_workers) + return -ENOMEM; + + for_each_online_cpu(cpu) { /* could miss cpu{off,on}line? */ + worker = erofs_init_percpu_worker(cpu); + if (!IS_ERR(worker)) + rcu_assign_pointer(z_erofs_pcpu_workers[cpu], worker); + } + return 0; +} +#else +static inline void erofs_destroy_percpu_workers(void) {} +static inline int erofs_init_percpu_workers(void) { return 0; } +#endif + +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_EROFS_FS_PCPU_KTHREAD) +static DEFINE_SPINLOCK(z_erofs_pcpu_worker_lock); +static enum cpuhp_state erofs_cpuhp_state; + +static int erofs_cpu_online(unsigned int cpu) +{ + struct kthread_worker *worker, *old; + + worker = erofs_init_percpu_worker(cpu); + if (IS_ERR(worker)) + return PTR_ERR(worker); + + spin_lock(&z_erofs_pcpu_worker_lock); + old = rcu_dereference_protected(z_erofs_pcpu_workers[cpu], + lockdep_is_held(&z_erofs_pcpu_worker_lock)); + if (!old) + rcu_assign_pointer(z_erofs_pcpu_workers[cpu], worker); + spin_unlock(&z_erofs_pcpu_worker_lock); + if (old) + kthread_destroy_worker(worker); + return 0; +} + +static int erofs_cpu_offline(unsigned int cpu) +{ + struct kthread_worker *worker; + + spin_lock(&z_erofs_pcpu_worker_lock); + worker = rcu_dereference_protected(z_erofs_pcpu_workers[cpu], + lockdep_is_held(&z_erofs_pcpu_worker_lock)); + rcu_assign_pointer(z_erofs_pcpu_workers[cpu], NULL); + spin_unlock(&z_erofs_pcpu_worker_lock); + + synchronize_rcu(); + if (worker) + kthread_destroy_worker(worker); + return 0; +} + +static int erofs_cpu_hotplug_init(void) +{ + int state; + + state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "fs/erofs:online", erofs_cpu_online, erofs_cpu_offline); + if (state < 0) + return state; + + erofs_cpuhp_state = state; + return 0; +} + +static void erofs_cpu_hotplug_destroy(void) +{ + if (erofs_cpuhp_state) + cpuhp_remove_state_nocalls(erofs_cpuhp_state); +} +#else /* !CONFIG_HOTPLUG_CPU || !CONFIG_EROFS_FS_PCPU_KTHREAD */ +static inline int erofs_cpu_hotplug_init(void) { return 0; } +static inline void erofs_cpu_hotplug_destroy(void) {} +#endif + +void z_erofs_exit_zip_subsystem(void) +{ + erofs_cpu_hotplug_destroy(); + erofs_destroy_percpu_workers(); + destroy_workqueue(z_erofs_workqueue); + z_erofs_destroy_pcluster_pool(); } int __init z_erofs_init_zip_subsystem(void) @@ -150,10 +254,29 @@ int __init z_erofs_init_zip_subsystem(void) int err = z_erofs_create_pcluster_pool(); if (err) - return err; - err = z_erofs_init_workqueue(); + goto out_error_pcluster_pool; + + z_erofs_workqueue = alloc_workqueue("erofs_worker", + WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus()); + if (!z_erofs_workqueue) + goto out_error_workqueue_init; + + err = erofs_init_percpu_workers(); if (err) - z_erofs_destroy_pcluster_pool(); + goto out_error_pcpu_worker; + + err = erofs_cpu_hotplug_init(); + if (err < 0) + goto out_error_cpuhp_init; + return err; + +out_error_cpuhp_init: + erofs_destroy_percpu_workers(); +out_error_pcpu_worker: + destroy_workqueue(z_erofs_workqueue); +out_error_workqueue_init: + z_erofs_destroy_pcluster_pool(); +out_error_pcluster_pool: return err; } @@ -782,6 +905,12 @@ err_out: } static void z_erofs_decompressqueue_work(struct work_struct *work); +#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD +static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work) +{ + z_erofs_decompressqueue_work((struct work_struct *)work); +} +#endif static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, bool sync, int bios) { @@ -799,7 +928,22 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, return; /* Use workqueue and sync decompression for atomic contexts only */ if (in_atomic() || irqs_disabled()) { +#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD + struct kthread_worker *worker; + + rcu_read_lock(); + worker = rcu_dereference( + z_erofs_pcpu_workers[raw_smp_processor_id()]); + if (!worker) { + INIT_WORK(&io->u.work, z_erofs_decompressqueue_work); + queue_work(z_erofs_workqueue, &io->u.work); + } else { + kthread_queue_work(worker, &io->u.kthread_work); + } + rcu_read_unlock(); +#else queue_work(z_erofs_workqueue, &io->u.work); +#endif sbi->ctx.readahead_sync_decompress = true; return; } @@ -1207,7 +1351,12 @@ jobqueue_init(struct super_block *sb, *fg = true; goto fg_out; } +#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD + kthread_init_work(&q->u.kthread_work, + z_erofs_decompressqueue_kthread_work); +#else INIT_WORK(&q->u.work, z_erofs_decompressqueue_work); +#endif } else { fg_out: q = fgq; @@ -1348,7 +1497,7 @@ submit_bio_retry: /* * although background is preferred, no one is pending for submission. - * don't issue workqueue for decompression but drop it directly instead. + * don't issue decompression but drop it directly instead. */ if (!*force_fg && !nr_bios) { kvfree(q[JQ_SUBMIT]); diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index 60bf396164ec..f2118ce60e46 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -7,6 +7,7 @@ #ifndef __EROFS_FS_ZDATA_H #define __EROFS_FS_ZDATA_H +#include #include "internal.h" #include "zpvec.h" @@ -92,6 +93,7 @@ struct z_erofs_decompressqueue { union { struct completion done; struct work_struct work; + struct kthread_work kthread_work; } u; };