Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue changes from Tejun Heo:
 "Surprisingly, Lai and I didn't break too many things implementing
  custom pools and stuff last time around and there aren't any follow-up
  changes necessary at this point.

  The only change in this pull request is Viresh's patches to make some
  per-cpu workqueues to behave as unbound workqueues dependent on a boot
  param whose default can be configured via a config option.  This leads
  to higher processing overhead / lower bandwidth as more work items are
  bounced across CPUs; however, it can lead to noticeable powersave in
  certain configurations - ~10% w/ idlish constant workload on a
  big.LITTLE configuration according to Viresh.

  This is because per-cpu workqueues interfere with how the scheduler
  perceives whether or not each CPU is idle by forcing pinned tasks on
  them, which makes the scheduler's power-aware scheduling decisions
  less effective.

  Its effectiveness is likely less pronounced on homogenous
  configurations and this type of optimization can probably be made
  automatic; however, the changes are pretty minimal and the affected
  workqueues are clearly marked, so it's an easy gain for some
  configurations for the time being with pretty unintrusive changes."

* 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  fbcon: queue work on power efficient wq
  block: queue work on power efficient wq
  PHYLIB: queue work on system_power_efficient_wq
  workqueue: Add system wide power_efficient workqueues
  workqueues: Introduce new flag WQ_POWER_EFFICIENT for power oriented workqueues
This commit is contained in:
Linus Torvalds 2013-07-02 19:53:30 -07:00
commit f317ff9eed
9 changed files with 113 additions and 12 deletions

View file

@ -3341,6 +3341,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that this also can be controlled per-workqueue for
workqueues visible under /sys/bus/workqueue/.
workqueue.power_efficient
Per-cpu workqueues are generally preferred because
they show better performance thanks to cache
locality; unfortunately, per-cpu workqueues tend to
be more power hungry than unbound workqueues.
Enabling this makes the per-cpu workqueues which
were observed to contribute significantly to power
consumption unbound, leading to measurably lower
power usage at the cost of small performance
overhead.
The default value of this parameter is determined by
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.

View file

@ -3180,7 +3180,8 @@ int __init blk_dev_init(void)
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
WQ_MEM_RECLAIM | WQ_HIGHPRI |
WQ_POWER_EFFICIENT, 0);
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");

View file

@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc)
if (atomic_long_dec_and_test(&ioc->refcount)) {
spin_lock_irqsave(&ioc->lock, flags);
if (!hlist_empty(&ioc->icq_list))
schedule_work(&ioc->release_work);
queue_work(system_power_efficient_wq,
&ioc->release_work);
else
free_ioc = true;
spin_unlock_irqrestore(&ioc->lock, flags);

View file

@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, 0);
else if (intv)
queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, intv);
out_unlock:
spin_unlock_irqrestore(&ev->lock, flags);
}
@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
spin_lock_irq(&ev->lock);
ev->clearing |= mask;
if (!ev->block)
mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
mod_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, 0);
spin_unlock_irq(&ev->lock);
}
@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev,
intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv)
queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, intv);
spin_unlock_irq(&ev->lock);

View file

@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev,
{
phydev->adjust_state = handler;
schedule_delayed_work(&phydev->state_queue, HZ);
queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
}
/**
@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
disable_irq_nosync(irq);
atomic_inc(&phydev->irq_disable);
schedule_work(&phydev->phy_queue);
queue_work(system_power_efficient_wq, &phydev->phy_queue);
return IRQ_HANDLED;
}
@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work)
/* reschedule state queue work to run as soon as possible */
cancel_delayed_work_sync(&phydev->state_queue);
schedule_delayed_work(&phydev->state_queue, 0);
queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
return;
@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work)
if (err < 0)
phy_error(phydev);
schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ);
queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
PHY_STATE_TIME * HZ);
}
static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,

View file

@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr)
struct fb_info *info = (struct fb_info *) dev_addr;
struct fbcon_ops *ops = info->fbcon_par;
schedule_work(&info->queue);
queue_work(system_power_efficient_wq, &info->queue);
mod_timer(&ops->cursor_timer, jiffies + HZ/5);
}

View file

@ -303,6 +303,33 @@ enum {
WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
/*
* Per-cpu workqueues are generally preferred because they tend to
* show better performance thanks to cache locality. Per-cpu
* workqueues exclude the scheduler from choosing the CPU to
* execute the worker threads, which has an unfortunate side effect
* of increasing power consumption.
*
* The scheduler considers a CPU idle if it doesn't have any task
* to execute and tries to keep idle cores idle to conserve power;
* however, for example, a per-cpu work item scheduled from an
* interrupt handler on an idle CPU will force the scheduler to
* excute the work item on that CPU breaking the idleness, which in
* turn may lead to more scheduling choices which are sub-optimal
* in terms of power consumption.
*
* Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
* but become unbound if workqueue.power_efficient kernel param is
* specified. Per-cpu workqueues which are identified to
* contribute significantly to power-consumption are identified and
* marked with this flag and enabling the power_efficient mode
* leads to noticeable power saving at the cost of small
* performance disadvantage.
*
* http://thread.gmane.org/gmane.linux.kernel/1480396
*/
WQ_POWER_EFFICIENT = 1 << 7,
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
@ -333,11 +360,19 @@ enum {
*
* system_freezable_wq is equivalent to system_wq except that it's
* freezable.
*
* *_power_efficient_wq are inclined towards saving power and converted
* into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
* they are same as their non-power-efficient counterparts - e.g.
* system_power_efficient_wq is identical to system_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
*/
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
{

View file

@ -262,6 +262,26 @@ config PM_GENERIC_DOMAINS
bool
depends on PM
config WQ_POWER_EFFICIENT_DEFAULT
bool "Enable workqueue power-efficient mode by default"
depends on PM
default n
help
Per-cpu workqueues are generally preferred because they show
better performance thanks to cache locality; unfortunately,
per-cpu workqueues tend to be more power hungry than unbound
workqueues.
Enabling workqueue.power_efficient kernel parameter makes the
per-cpu workqueues which were observed to contribute
significantly to power consumption unbound, leading to measurably
lower power usage at the cost of small performance overhead.
This config option determines whether workqueue.power_efficient
is enabled by default.
If in doubt, say N.
config PM_GENERIC_DOMAINS_SLEEP
def_bool y
depends on PM_SLEEP && PM_GENERIC_DOMAINS

View file

@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
/* see the comment above the definition of WQ_POWER_EFFICIENT */
#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
static bool wq_power_efficient = true;
#else
static bool wq_power_efficient;
#endif
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_wq);
struct workqueue_struct *system_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_power_efficient_wq);
struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to,
@ -4086,6 +4099,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
/* see the comment above the definition of WQ_POWER_EFFICIENT */
if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
flags |= WQ_UNBOUND;
/* allocate wq and format name */
if (flags & WQ_UNBOUND)
tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@ -4985,8 +5002,15 @@ static int __init init_workqueues(void)
WQ_UNBOUND_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",
WQ_FREEZABLE, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient",
WQ_POWER_EFFICIENT, 0);
system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
WQ_FREEZABLE | WQ_POWER_EFFICIENT,
0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
!system_unbound_wq || !system_freezable_wq);
!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq);
return 0;
}
early_initcall(init_workqueues);