diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 00704efe6398..f56132b0ae64 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -73,6 +73,8 @@ config DEVFREQ_GOV_PASSIVE device. This governor does not change the frequency by itself through sysfs entries. The passive governor recommends that devfreq device uses the OPP table to get the frequency/voltage. + Alternatively the governor can also be chosen to scale based on + the online CPUs current frequency. comment "DEVFREQ Drivers" diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index b094132bd20b..9cc57b083839 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -8,11 +8,103 @@ */ #include +#include +#include +#include #include #include +#include #include "governor.h" -static int devfreq_passive_get_target_freq(struct devfreq *devfreq, +struct devfreq_cpu_state { + unsigned int curr_freq; + unsigned int min_freq; + unsigned int max_freq; + unsigned int first_cpu; + struct device *cpu_dev; + struct opp_table *opp_table; +}; + +static unsigned long xlate_cpufreq_to_devfreq(struct devfreq_passive_data *data, + unsigned int cpu) +{ + unsigned int cpu_min_freq, cpu_max_freq, cpu_curr_freq_khz, cpu_percent; + unsigned long dev_min_freq, dev_max_freq, dev_max_state; + + struct devfreq_cpu_state *cpu_state = data->cpu_state[cpu]; + struct devfreq *devfreq = (struct devfreq *)data->this; + unsigned long *dev_freq_table = devfreq->profile->freq_table; + struct dev_pm_opp *opp = NULL, *p_opp = NULL; + unsigned long cpu_curr_freq, freq; + + if (!cpu_state || cpu_state->first_cpu != cpu || + !cpu_state->opp_table || !devfreq->opp_table) + return 0; + + cpu_curr_freq = cpu_state->curr_freq * 1000; + p_opp = devfreq_recommended_opp(cpu_state->cpu_dev, &cpu_curr_freq, 0); + if (IS_ERR(p_opp)) + return 0; + + opp = dev_pm_opp_xlate_required_opp(cpu_state->opp_table, + devfreq->opp_table, p_opp); + dev_pm_opp_put(p_opp); + + if (!IS_ERR(opp)) { + freq = dev_pm_opp_get_freq(opp); + dev_pm_opp_put(opp); + goto out; + } + + /* Use Interpolation if required opps is not available */ + cpu_min_freq = cpu_state->min_freq; + cpu_max_freq = cpu_state->max_freq; + cpu_curr_freq_khz = cpu_state->curr_freq; + + if (dev_freq_table) { + /* Get minimum frequency according to sorting order */ + dev_max_state = dev_freq_table[devfreq->profile->max_state - 1]; + if (dev_freq_table[0] < dev_max_state) { + dev_min_freq = dev_freq_table[0]; + dev_max_freq = dev_max_state; + } else { + dev_min_freq = dev_max_state; + dev_max_freq = dev_freq_table[0]; + } + } else { + dev_min_freq = dev_pm_qos_read_value(devfreq->dev.parent, + DEV_PM_QOS_MIN_FREQUENCY); + dev_max_freq = dev_pm_qos_read_value(devfreq->dev.parent, + DEV_PM_QOS_MAX_FREQUENCY); + + if (dev_max_freq <= dev_min_freq) + return 0; + } + cpu_percent = ((cpu_curr_freq_khz - cpu_min_freq) * 100) / cpu_max_freq - cpu_min_freq; + freq = dev_min_freq + mult_frac(dev_max_freq - dev_min_freq, cpu_percent, 100); + +out: + return freq; +} + +static int get_target_freq_with_cpufreq(struct devfreq *devfreq, + unsigned long *freq) +{ + struct devfreq_passive_data *p_data = + (struct devfreq_passive_data *)devfreq->data; + unsigned int cpu; + unsigned long target_freq = 0; + + for_each_online_cpu(cpu) + target_freq = max(target_freq, + xlate_cpufreq_to_devfreq(p_data, cpu)); + + *freq = target_freq; + + return 0; +} + +static int get_target_freq_with_devfreq(struct devfreq *devfreq, unsigned long *freq) { struct devfreq_passive_data *p_data @@ -23,14 +115,6 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq, int i, count; /* - * If the devfreq device with passive governor has the specific method - * to determine the next frequency, should use the get_target_freq() - * of struct devfreq_passive_data. - */ - if (p_data->get_target_freq) - return p_data->get_target_freq(devfreq, freq); - - /* * If the parent and passive devfreq device uses the OPP table, * get the next frequency by using the OPP table. */ @@ -98,6 +182,37 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq, return 0; } +static int devfreq_passive_get_target_freq(struct devfreq *devfreq, + unsigned long *freq) +{ + struct devfreq_passive_data *p_data = + (struct devfreq_passive_data *)devfreq->data; + int ret; + + /* + * If the devfreq device with passive governor has the specific method + * to determine the next frequency, should use the get_target_freq() + * of struct devfreq_passive_data. + */ + if (p_data->get_target_freq) + return p_data->get_target_freq(devfreq, freq); + + switch (p_data->parent_type) { + case DEVFREQ_PARENT_DEV: + ret = get_target_freq_with_devfreq(devfreq, freq); + break; + case CPUFREQ_PARENT_DEV: + ret = get_target_freq_with_cpufreq(devfreq, freq); + break; + default: + ret = -EINVAL; + dev_err(&devfreq->dev, "Invalid parent type\n"); + break; + } + + return ret; +} + static int devfreq_passive_notifier_call(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -130,16 +245,200 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb, return NOTIFY_DONE; } +static int cpufreq_passive_notifier_call(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct devfreq_passive_data *data = + container_of(nb, struct devfreq_passive_data, nb); + struct devfreq *devfreq = (struct devfreq *)data->this; + struct devfreq_cpu_state *cpu_state; + struct cpufreq_freqs *cpu_freq = ptr; + unsigned int curr_freq; + int ret; + + if (event != CPUFREQ_POSTCHANGE || !cpu_freq || + !data->cpu_state[cpu_freq->policy->cpu]) + return 0; + + cpu_state = data->cpu_state[cpu_freq->policy->cpu]; + if (cpu_state->curr_freq == cpu_freq->new) + return 0; + + /* Backup current freq and pre-update cpu state freq*/ + curr_freq = cpu_state->curr_freq; + cpu_state->curr_freq = cpu_freq->new; + + mutex_lock(&devfreq->lock); + ret = update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); + if (ret) { + cpu_state->curr_freq = curr_freq; + dev_err(&devfreq->dev, "Couldn't update the frequency.\n"); + return ret; + } + + return 0; +} + +static int cpufreq_passive_register(struct devfreq_passive_data **p_data) +{ + struct devfreq_passive_data *data = *p_data; + struct devfreq *devfreq = (struct devfreq *)data->this; + struct device *dev = devfreq->dev.parent; + struct opp_table *opp_table = NULL; + struct devfreq_cpu_state *cpu_state; + struct cpufreq_policy *policy; + struct device *cpu_dev; + unsigned int cpu; + int ret; + + cpus_read_lock(); + + data->nb.notifier_call = cpufreq_passive_notifier_call; + ret = cpufreq_register_notifier(&data->nb, + CPUFREQ_TRANSITION_NOTIFIER); + if (ret) { + dev_err(dev, "Couldn't register cpufreq notifier.\n"); + data->nb.notifier_call = NULL; + goto out; + } + + /* Populate devfreq_cpu_state */ + for_each_online_cpu(cpu) { + if (data->cpu_state[cpu]) + continue; + + policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EINVAL; + goto out; + } else if (PTR_ERR(policy) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto out; + } else if (IS_ERR(policy)) { + ret = PTR_ERR(policy); + dev_err(dev, "Couldn't get the cpufreq_poliy.\n"); + goto out; + } + + cpu_state = kzalloc(sizeof(*cpu_state), GFP_KERNEL); + if (!cpu_state) { + ret = -ENOMEM; + goto out; + } + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + dev_err(dev, "Couldn't get cpu device.\n"); + ret = -ENODEV; + goto out; + } + + opp_table = dev_pm_opp_get_opp_table(cpu_dev); + if (IS_ERR(devfreq->opp_table)) { + ret = PTR_ERR(opp_table); + goto out; + } + + cpu_state->cpu_dev = cpu_dev; + cpu_state->opp_table = opp_table; + cpu_state->first_cpu = cpumask_first(policy->related_cpus); + cpu_state->curr_freq = policy->cur; + cpu_state->min_freq = policy->cpuinfo.min_freq; + cpu_state->max_freq = policy->cpuinfo.max_freq; + data->cpu_state[cpu] = cpu_state; + + cpufreq_cpu_put(policy); + } + +out: + cpus_read_unlock(); + if (ret) + return ret; + + /* Update devfreq */ + mutex_lock(&devfreq->lock); + ret = update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); + if (ret) + dev_err(dev, "Couldn't update the frequency.\n"); + + return ret; +} + +static int cpufreq_passive_unregister(struct devfreq_passive_data **p_data) +{ + struct devfreq_passive_data *data = *p_data; + struct devfreq_cpu_state *cpu_state; + int cpu; + + if (data->nb.notifier_call) + cpufreq_unregister_notifier(&data->nb, + CPUFREQ_TRANSITION_NOTIFIER); + + for_each_possible_cpu(cpu) { + cpu_state = data->cpu_state[cpu]; + if (cpu_state) { + if (cpu_state->opp_table) + dev_pm_opp_put_opp_table(cpu_state->opp_table); + kfree(cpu_state); + cpu_state = NULL; + } + } + + return 0; +} + +int register_parent_dev_notifier(struct devfreq_passive_data **p_data) +{ + struct notifier_block *nb = &(*p_data)->nb; + int ret = 0; + + switch ((*p_data)->parent_type) { + case DEVFREQ_PARENT_DEV: + nb->notifier_call = devfreq_passive_notifier_call; + ret = devfreq_register_notifier((struct devfreq *)(*p_data)->parent, nb, + DEVFREQ_TRANSITION_NOTIFIER); + break; + case CPUFREQ_PARENT_DEV: + ret = cpufreq_passive_register(p_data); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +int unregister_parent_dev_notifier(struct devfreq_passive_data **p_data) +{ + int ret = 0; + + switch ((*p_data)->parent_type) { + case DEVFREQ_PARENT_DEV: + WARN_ON(devfreq_unregister_notifier((struct devfreq *)(*p_data)->parent, + &(*p_data)->nb, + DEVFREQ_TRANSITION_NOTIFIER)); + break; + case CPUFREQ_PARENT_DEV: + cpufreq_passive_unregister(p_data); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + static int devfreq_passive_event_handler(struct devfreq *devfreq, unsigned int event, void *data) { struct devfreq_passive_data *p_data = (struct devfreq_passive_data *)devfreq->data; struct devfreq *parent = (struct devfreq *)p_data->parent; - struct notifier_block *nb = &p_data->nb; int ret = 0; - if (!parent) + if (p_data->parent_type == DEVFREQ_PARENT_DEV && !parent) return -EPROBE_DEFER; switch (event) { @@ -147,13 +446,11 @@ static int devfreq_passive_event_handler(struct devfreq *devfreq, if (!p_data->this) p_data->this = devfreq; - nb->notifier_call = devfreq_passive_notifier_call; - ret = devfreq_register_notifier(parent, nb, - DEVFREQ_TRANSITION_NOTIFIER); + ret = register_parent_dev_notifier(&p_data); break; + case DEVFREQ_GOV_STOP: - WARN_ON(devfreq_unregister_notifier(parent, nb, - DEVFREQ_TRANSITION_NOTIFIER)); + ret = unregister_parent_dev_notifier(&p_data); break; default: break; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 26ea0850be9b..e0093b7c805c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -280,6 +280,25 @@ struct devfreq_simple_ondemand_data { #if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) /** + * struct devfreq_cpu_state - holds the per-cpu state + * @freq: the current frequency of the cpu. + * @min_freq: the min frequency of the cpu. + * @max_freq: the max frequency of the cpu. + * @first_cpu: the cpumask of the first cpu of a policy. + * @dev: reference to cpu device. + * @opp_table: reference to cpu opp table. + * + * This structure stores the required cpu_state of a cpu. + * This is auto-populated by the governor. + */ +struct devfreq_cpu_state; + +enum devfreq_parent_dev_type { + DEVFREQ_PARENT_DEV, + CPUFREQ_PARENT_DEV, +}; + +/** * struct devfreq_passive_data - ``void *data`` fed to struct devfreq * and devfreq_add_device * @parent: the devfreq instance of parent device. @@ -290,13 +309,15 @@ struct devfreq_simple_ondemand_data { * using governors except for passive governor. * If the devfreq device has the specific method to decide * the next frequency, should use this callback. + * @parent_type: parent type of the device * @this: the devfreq instance of own device. * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER list + * @cpu_state: the state min/max/current frequency of all online cpu's * * The devfreq_passive_data have to set the devfreq instance of parent * device with governors except for the passive governor. But, don't need to - * initialize the 'this' and 'nb' field because the devfreq core will handle - * them. + * initialize the 'this', 'nb' and 'cpu_state' field because the devfreq core + * will handle them. */ struct devfreq_passive_data { /* Should set the devfreq instance of parent device */ @@ -305,9 +326,13 @@ struct devfreq_passive_data { /* Optional callback to decide the next frequency of passvice device */ int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + /* Should set the type of parent device */ + enum devfreq_parent_dev_type parent_type; + /* For passive governor's internal use. Don't need to set them */ struct devfreq *this; struct notifier_block nb; + struct devfreq_cpu_state *cpu_state[NR_CPUS]; }; #endif