]> Pileus Git - ~andy/linux/blob - drivers/cpufreq/cpufreq.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[~andy/linux] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32
33 #include <trace/events/power.h>
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69
70 #define lock_policy_rwsem(mode, cpu)                                    \
71 static int lock_policy_rwsem_##mode                                     \
72 (int cpu)                                                               \
73 {                                                                       \
74         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
75         BUG_ON(policy_cpu == -1);                                       \
76         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
77         if (unlikely(!cpu_online(cpu))) {                               \
78                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
79                 return -1;                                              \
80         }                                                               \
81                                                                         \
82         return 0;                                                       \
83 }
84
85 lock_policy_rwsem(read, cpu);
86
87 lock_policy_rwsem(write, cpu);
88
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92         BUG_ON(policy_cpu == -1);
93         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99         BUG_ON(policy_cpu == -1);
100         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102
103
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106                 unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124         init_cpufreq_transition_notifier_list_called = true;
125         return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131
132 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133 {
134         struct cpufreq_policy *data;
135         unsigned long flags;
136
137         if (cpu >= nr_cpu_ids)
138                 goto err_out;
139
140         /* get the cpufreq driver */
141         spin_lock_irqsave(&cpufreq_driver_lock, flags);
142
143         if (!cpufreq_driver)
144                 goto err_out_unlock;
145
146         if (!try_module_get(cpufreq_driver->owner))
147                 goto err_out_unlock;
148
149
150         /* get the CPU */
151         data = per_cpu(cpufreq_cpu_data, cpu);
152
153         if (!data)
154                 goto err_out_put_module;
155
156         if (!kobject_get(&data->kobj))
157                 goto err_out_put_module;
158
159         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160         return data;
161
162 err_out_put_module:
163         module_put(cpufreq_driver->owner);
164 err_out_unlock:
165         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166 err_out:
167         return NULL;
168 }
169 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170
171
172 void cpufreq_cpu_put(struct cpufreq_policy *data)
173 {
174         kobject_put(&data->kobj);
175         module_put(cpufreq_driver->owner);
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178
179
180 /*********************************************************************
181  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182  *********************************************************************/
183
184 /**
185  * adjust_jiffies - adjust the system "loops_per_jiffy"
186  *
187  * This function alters the system "loops_per_jiffy" for the clock
188  * speed change. Note that loops_per_jiffy cannot be updated on SMP
189  * systems as each CPU might be scaled differently. So, use the arch
190  * per-CPU loops_per_jiffy value wherever possible.
191  */
192 #ifndef CONFIG_SMP
193 static unsigned long l_p_j_ref;
194 static unsigned int  l_p_j_ref_freq;
195
196 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197 {
198         if (ci->flags & CPUFREQ_CONST_LOOPS)
199                 return;
200
201         if (!l_p_j_ref_freq) {
202                 l_p_j_ref = loops_per_jiffy;
203                 l_p_j_ref_freq = ci->old;
204                 pr_debug("saving %lu as reference value for loops_per_jiffy; "
205                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206         }
207         if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
208             (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
209             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
210                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
211                                                                 ci->new);
212                 pr_debug("scaling loops_per_jiffy to %lu "
213                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
214         }
215 }
216 #else
217 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
218 {
219         return;
220 }
221 #endif
222
223
224 /**
225  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
226  * on frequency transition.
227  *
228  * This function calls the transition notifiers and the "adjust_jiffies"
229  * function. It is called twice on all CPU frequency changes that have
230  * external effects.
231  */
232 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
233 {
234         struct cpufreq_policy *policy;
235
236         BUG_ON(irqs_disabled());
237
238         freqs->flags = cpufreq_driver->flags;
239         pr_debug("notification %u of frequency transition to %u kHz\n",
240                 state, freqs->new);
241
242         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
243         switch (state) {
244
245         case CPUFREQ_PRECHANGE:
246                 /* detect if the driver reported a value as "old frequency"
247                  * which is not equal to what the cpufreq core thinks is
248                  * "old frequency".
249                  */
250                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
251                         if ((policy) && (policy->cpu == freqs->cpu) &&
252                             (policy->cur) && (policy->cur != freqs->old)) {
253                                 pr_debug("Warning: CPU frequency is"
254                                         " %u, cpufreq assumed %u kHz.\n",
255                                         freqs->old, policy->cur);
256                                 freqs->old = policy->cur;
257                         }
258                 }
259                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
260                                 CPUFREQ_PRECHANGE, freqs);
261                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
262                 break;
263
264         case CPUFREQ_POSTCHANGE:
265                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
266                 pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
267                         (unsigned long)freqs->cpu);
268                 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
269                 trace_cpu_frequency(freqs->new, freqs->cpu);
270                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
271                                 CPUFREQ_POSTCHANGE, freqs);
272                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
273                         policy->cur = freqs->new;
274                 break;
275         }
276 }
277 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
278
279
280
281 /*********************************************************************
282  *                          SYSFS INTERFACE                          *
283  *********************************************************************/
284
285 static struct cpufreq_governor *__find_governor(const char *str_governor)
286 {
287         struct cpufreq_governor *t;
288
289         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
290                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
291                         return t;
292
293         return NULL;
294 }
295
296 /**
297  * cpufreq_parse_governor - parse a governor string
298  */
299 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
300                                 struct cpufreq_governor **governor)
301 {
302         int err = -EINVAL;
303
304         if (!cpufreq_driver)
305                 goto out;
306
307         if (cpufreq_driver->setpolicy) {
308                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
309                         *policy = CPUFREQ_POLICY_PERFORMANCE;
310                         err = 0;
311                 } else if (!strnicmp(str_governor, "powersave",
312                                                 CPUFREQ_NAME_LEN)) {
313                         *policy = CPUFREQ_POLICY_POWERSAVE;
314                         err = 0;
315                 }
316         } else if (cpufreq_driver->target) {
317                 struct cpufreq_governor *t;
318
319                 mutex_lock(&cpufreq_governor_mutex);
320
321                 t = __find_governor(str_governor);
322
323                 if (t == NULL) {
324                         int ret;
325
326                         mutex_unlock(&cpufreq_governor_mutex);
327                         ret = request_module("cpufreq_%s", str_governor);
328                         mutex_lock(&cpufreq_governor_mutex);
329
330                         if (ret == 0)
331                                 t = __find_governor(str_governor);
332                 }
333
334                 if (t != NULL) {
335                         *governor = t;
336                         err = 0;
337                 }
338
339                 mutex_unlock(&cpufreq_governor_mutex);
340         }
341 out:
342         return err;
343 }
344
345
346 /**
347  * cpufreq_per_cpu_attr_read() / show_##file_name() -
348  * print out cpufreq information
349  *
350  * Write out information from cpufreq_driver->policy[cpu]; object must be
351  * "unsigned int".
352  */
353
354 #define show_one(file_name, object)                     \
355 static ssize_t show_##file_name                         \
356 (struct cpufreq_policy *policy, char *buf)              \
357 {                                                       \
358         return sprintf(buf, "%u\n", policy->object);    \
359 }
360
361 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
362 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
363 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
364 show_one(scaling_min_freq, min);
365 show_one(scaling_max_freq, max);
366 show_one(scaling_cur_freq, cur);
367
368 static int __cpufreq_set_policy(struct cpufreq_policy *data,
369                                 struct cpufreq_policy *policy);
370
371 /**
372  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
373  */
374 #define store_one(file_name, object)                    \
375 static ssize_t store_##file_name                                        \
376 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
377 {                                                                       \
378         unsigned int ret = -EINVAL;                                     \
379         struct cpufreq_policy new_policy;                               \
380                                                                         \
381         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
382         if (ret)                                                        \
383                 return -EINVAL;                                         \
384                                                                         \
385         ret = sscanf(buf, "%u", &new_policy.object);                    \
386         if (ret != 1)                                                   \
387                 return -EINVAL;                                         \
388                                                                         \
389         ret = __cpufreq_set_policy(policy, &new_policy);                \
390         policy->user_policy.object = policy->object;                    \
391                                                                         \
392         return ret ? ret : count;                                       \
393 }
394
395 store_one(scaling_min_freq, min);
396 store_one(scaling_max_freq, max);
397
398 /**
399  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
400  */
401 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
402                                         char *buf)
403 {
404         unsigned int cur_freq = __cpufreq_get(policy->cpu);
405         if (!cur_freq)
406                 return sprintf(buf, "<unknown>");
407         return sprintf(buf, "%u\n", cur_freq);
408 }
409
410
411 /**
412  * show_scaling_governor - show the current policy for the specified CPU
413  */
414 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
415 {
416         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
417                 return sprintf(buf, "powersave\n");
418         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
419                 return sprintf(buf, "performance\n");
420         else if (policy->governor)
421                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
422                                 policy->governor->name);
423         return -EINVAL;
424 }
425
426
427 /**
428  * store_scaling_governor - store policy for the specified CPU
429  */
430 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
431                                         const char *buf, size_t count)
432 {
433         unsigned int ret = -EINVAL;
434         char    str_governor[16];
435         struct cpufreq_policy new_policy;
436
437         ret = cpufreq_get_policy(&new_policy, policy->cpu);
438         if (ret)
439                 return ret;
440
441         ret = sscanf(buf, "%15s", str_governor);
442         if (ret != 1)
443                 return -EINVAL;
444
445         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
446                                                 &new_policy.governor))
447                 return -EINVAL;
448
449         /* Do not use cpufreq_set_policy here or the user_policy.max
450            will be wrongly overridden */
451         ret = __cpufreq_set_policy(policy, &new_policy);
452
453         policy->user_policy.policy = policy->policy;
454         policy->user_policy.governor = policy->governor;
455
456         if (ret)
457                 return ret;
458         else
459                 return count;
460 }
461
462 /**
463  * show_scaling_driver - show the cpufreq driver currently loaded
464  */
465 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
466 {
467         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
468 }
469
470 /**
471  * show_scaling_available_governors - show the available CPUfreq governors
472  */
473 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
474                                                 char *buf)
475 {
476         ssize_t i = 0;
477         struct cpufreq_governor *t;
478
479         if (!cpufreq_driver->target) {
480                 i += sprintf(buf, "performance powersave");
481                 goto out;
482         }
483
484         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
485                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
486                     - (CPUFREQ_NAME_LEN + 2)))
487                         goto out;
488                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
489         }
490 out:
491         i += sprintf(&buf[i], "\n");
492         return i;
493 }
494
495 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
496 {
497         ssize_t i = 0;
498         unsigned int cpu;
499
500         for_each_cpu(cpu, mask) {
501                 if (i)
502                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
503                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
504                 if (i >= (PAGE_SIZE - 5))
505                         break;
506         }
507         i += sprintf(&buf[i], "\n");
508         return i;
509 }
510
511 /**
512  * show_related_cpus - show the CPUs affected by each transition even if
513  * hw coordination is in use
514  */
515 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
516 {
517         if (cpumask_empty(policy->related_cpus))
518                 return show_cpus(policy->cpus, buf);
519         return show_cpus(policy->related_cpus, buf);
520 }
521
522 /**
523  * show_affected_cpus - show the CPUs affected by each transition
524  */
525 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
526 {
527         return show_cpus(policy->cpus, buf);
528 }
529
530 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
531                                         const char *buf, size_t count)
532 {
533         unsigned int freq = 0;
534         unsigned int ret;
535
536         if (!policy->governor || !policy->governor->store_setspeed)
537                 return -EINVAL;
538
539         ret = sscanf(buf, "%u", &freq);
540         if (ret != 1)
541                 return -EINVAL;
542
543         policy->governor->store_setspeed(policy, freq);
544
545         return count;
546 }
547
548 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
549 {
550         if (!policy->governor || !policy->governor->show_setspeed)
551                 return sprintf(buf, "<unsupported>\n");
552
553         return policy->governor->show_setspeed(policy, buf);
554 }
555
556 /**
557  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
558  */
559 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
560 {
561         unsigned int limit;
562         int ret;
563         if (cpufreq_driver->bios_limit) {
564                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
565                 if (!ret)
566                         return sprintf(buf, "%u\n", limit);
567         }
568         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
569 }
570
571 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
572 cpufreq_freq_attr_ro(cpuinfo_min_freq);
573 cpufreq_freq_attr_ro(cpuinfo_max_freq);
574 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
575 cpufreq_freq_attr_ro(scaling_available_governors);
576 cpufreq_freq_attr_ro(scaling_driver);
577 cpufreq_freq_attr_ro(scaling_cur_freq);
578 cpufreq_freq_attr_ro(bios_limit);
579 cpufreq_freq_attr_ro(related_cpus);
580 cpufreq_freq_attr_ro(affected_cpus);
581 cpufreq_freq_attr_rw(scaling_min_freq);
582 cpufreq_freq_attr_rw(scaling_max_freq);
583 cpufreq_freq_attr_rw(scaling_governor);
584 cpufreq_freq_attr_rw(scaling_setspeed);
585
586 static struct attribute *default_attrs[] = {
587         &cpuinfo_min_freq.attr,
588         &cpuinfo_max_freq.attr,
589         &cpuinfo_transition_latency.attr,
590         &scaling_min_freq.attr,
591         &scaling_max_freq.attr,
592         &affected_cpus.attr,
593         &related_cpus.attr,
594         &scaling_governor.attr,
595         &scaling_driver.attr,
596         &scaling_available_governors.attr,
597         &scaling_setspeed.attr,
598         NULL
599 };
600
601 struct kobject *cpufreq_global_kobject;
602 EXPORT_SYMBOL(cpufreq_global_kobject);
603
604 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
605 #define to_attr(a) container_of(a, struct freq_attr, attr)
606
607 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
608 {
609         struct cpufreq_policy *policy = to_policy(kobj);
610         struct freq_attr *fattr = to_attr(attr);
611         ssize_t ret = -EINVAL;
612         policy = cpufreq_cpu_get(policy->cpu);
613         if (!policy)
614                 goto no_policy;
615
616         if (lock_policy_rwsem_read(policy->cpu) < 0)
617                 goto fail;
618
619         if (fattr->show)
620                 ret = fattr->show(policy, buf);
621         else
622                 ret = -EIO;
623
624         unlock_policy_rwsem_read(policy->cpu);
625 fail:
626         cpufreq_cpu_put(policy);
627 no_policy:
628         return ret;
629 }
630
631 static ssize_t store(struct kobject *kobj, struct attribute *attr,
632                      const char *buf, size_t count)
633 {
634         struct cpufreq_policy *policy = to_policy(kobj);
635         struct freq_attr *fattr = to_attr(attr);
636         ssize_t ret = -EINVAL;
637         policy = cpufreq_cpu_get(policy->cpu);
638         if (!policy)
639                 goto no_policy;
640
641         if (lock_policy_rwsem_write(policy->cpu) < 0)
642                 goto fail;
643
644         if (fattr->store)
645                 ret = fattr->store(policy, buf, count);
646         else
647                 ret = -EIO;
648
649         unlock_policy_rwsem_write(policy->cpu);
650 fail:
651         cpufreq_cpu_put(policy);
652 no_policy:
653         return ret;
654 }
655
656 static void cpufreq_sysfs_release(struct kobject *kobj)
657 {
658         struct cpufreq_policy *policy = to_policy(kobj);
659         pr_debug("last reference is dropped\n");
660         complete(&policy->kobj_unregister);
661 }
662
663 static const struct sysfs_ops sysfs_ops = {
664         .show   = show,
665         .store  = store,
666 };
667
668 static struct kobj_type ktype_cpufreq = {
669         .sysfs_ops      = &sysfs_ops,
670         .default_attrs  = default_attrs,
671         .release        = cpufreq_sysfs_release,
672 };
673
674 /*
675  * Returns:
676  *   Negative: Failure
677  *   0:        Success
678  *   Positive: When we have a managed CPU and the sysfs got symlinked
679  */
680 static int cpufreq_add_dev_policy(unsigned int cpu,
681                                   struct cpufreq_policy *policy,
682                                   struct device *dev)
683 {
684         int ret = 0;
685 #ifdef CONFIG_SMP
686         unsigned long flags;
687         unsigned int j;
688 #ifdef CONFIG_HOTPLUG_CPU
689         struct cpufreq_governor *gov;
690
691         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
692         if (gov) {
693                 policy->governor = gov;
694                 pr_debug("Restoring governor %s for cpu %d\n",
695                        policy->governor->name, cpu);
696         }
697 #endif
698
699         for_each_cpu(j, policy->cpus) {
700                 struct cpufreq_policy *managed_policy;
701
702                 if (cpu == j)
703                         continue;
704
705                 /* Check for existing affected CPUs.
706                  * They may not be aware of it due to CPU Hotplug.
707                  * cpufreq_cpu_put is called when the device is removed
708                  * in __cpufreq_remove_dev()
709                  */
710                 managed_policy = cpufreq_cpu_get(j);
711                 if (unlikely(managed_policy)) {
712
713                         /* Set proper policy_cpu */
714                         unlock_policy_rwsem_write(cpu);
715                         per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
716
717                         if (lock_policy_rwsem_write(cpu) < 0) {
718                                 /* Should not go through policy unlock path */
719                                 if (cpufreq_driver->exit)
720                                         cpufreq_driver->exit(policy);
721                                 cpufreq_cpu_put(managed_policy);
722                                 return -EBUSY;
723                         }
724
725                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
726                         cpumask_copy(managed_policy->cpus, policy->cpus);
727                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
728                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
729
730                         pr_debug("CPU already managed, adding link\n");
731                         ret = sysfs_create_link(&dev->kobj,
732                                                 &managed_policy->kobj,
733                                                 "cpufreq");
734                         if (ret)
735                                 cpufreq_cpu_put(managed_policy);
736                         /*
737                          * Success. We only needed to be added to the mask.
738                          * Call driver->exit() because only the cpu parent of
739                          * the kobj needed to call init().
740                          */
741                         if (cpufreq_driver->exit)
742                                 cpufreq_driver->exit(policy);
743
744                         if (!ret)
745                                 return 1;
746                         else
747                                 return ret;
748                 }
749         }
750 #endif
751         return ret;
752 }
753
754
755 /* symlink affected CPUs */
756 static int cpufreq_add_dev_symlink(unsigned int cpu,
757                                    struct cpufreq_policy *policy)
758 {
759         unsigned int j;
760         int ret = 0;
761
762         for_each_cpu(j, policy->cpus) {
763                 struct cpufreq_policy *managed_policy;
764                 struct device *cpu_dev;
765
766                 if (j == cpu)
767                         continue;
768                 if (!cpu_online(j))
769                         continue;
770
771                 pr_debug("CPU %u already managed, adding link\n", j);
772                 managed_policy = cpufreq_cpu_get(cpu);
773                 cpu_dev = get_cpu_device(j);
774                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
775                                         "cpufreq");
776                 if (ret) {
777                         cpufreq_cpu_put(managed_policy);
778                         return ret;
779                 }
780         }
781         return ret;
782 }
783
784 static int cpufreq_add_dev_interface(unsigned int cpu,
785                                      struct cpufreq_policy *policy,
786                                      struct device *dev)
787 {
788         struct cpufreq_policy new_policy;
789         struct freq_attr **drv_attr;
790         unsigned long flags;
791         int ret = 0;
792         unsigned int j;
793
794         /* prepare interface data */
795         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
796                                    &dev->kobj, "cpufreq");
797         if (ret)
798                 return ret;
799
800         /* set up files for this cpu device */
801         drv_attr = cpufreq_driver->attr;
802         while ((drv_attr) && (*drv_attr)) {
803                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
804                 if (ret)
805                         goto err_out_kobj_put;
806                 drv_attr++;
807         }
808         if (cpufreq_driver->get) {
809                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
810                 if (ret)
811                         goto err_out_kobj_put;
812         }
813         if (cpufreq_driver->target) {
814                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
815                 if (ret)
816                         goto err_out_kobj_put;
817         }
818         if (cpufreq_driver->bios_limit) {
819                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
820                 if (ret)
821                         goto err_out_kobj_put;
822         }
823
824         spin_lock_irqsave(&cpufreq_driver_lock, flags);
825         for_each_cpu(j, policy->cpus) {
826                 if (!cpu_online(j))
827                         continue;
828                 per_cpu(cpufreq_cpu_data, j) = policy;
829                 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
830         }
831         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
832
833         ret = cpufreq_add_dev_symlink(cpu, policy);
834         if (ret)
835                 goto err_out_kobj_put;
836
837         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
838         /* assure that the starting sequence is run in __cpufreq_set_policy */
839         policy->governor = NULL;
840
841         /* set default policy */
842         ret = __cpufreq_set_policy(policy, &new_policy);
843         policy->user_policy.policy = policy->policy;
844         policy->user_policy.governor = policy->governor;
845
846         if (ret) {
847                 pr_debug("setting policy failed\n");
848                 if (cpufreq_driver->exit)
849                         cpufreq_driver->exit(policy);
850         }
851         return ret;
852
853 err_out_kobj_put:
854         kobject_put(&policy->kobj);
855         wait_for_completion(&policy->kobj_unregister);
856         return ret;
857 }
858
859
860 /**
861  * cpufreq_add_dev - add a CPU device
862  *
863  * Adds the cpufreq interface for a CPU device.
864  *
865  * The Oracle says: try running cpufreq registration/unregistration concurrently
866  * with with cpu hotplugging and all hell will break loose. Tried to clean this
867  * mess up, but more thorough testing is needed. - Mathieu
868  */
869 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
870 {
871         unsigned int cpu = dev->id;
872         int ret = 0, found = 0;
873         struct cpufreq_policy *policy;
874         unsigned long flags;
875         unsigned int j;
876 #ifdef CONFIG_HOTPLUG_CPU
877         int sibling;
878 #endif
879
880         if (cpu_is_offline(cpu))
881                 return 0;
882
883         pr_debug("adding CPU %u\n", cpu);
884
885 #ifdef CONFIG_SMP
886         /* check whether a different CPU already registered this
887          * CPU because it is in the same boat. */
888         policy = cpufreq_cpu_get(cpu);
889         if (unlikely(policy)) {
890                 cpufreq_cpu_put(policy);
891                 return 0;
892         }
893 #endif
894
895         if (!try_module_get(cpufreq_driver->owner)) {
896                 ret = -EINVAL;
897                 goto module_out;
898         }
899
900         ret = -ENOMEM;
901         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
902         if (!policy)
903                 goto nomem_out;
904
905         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
906                 goto err_free_policy;
907
908         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
909                 goto err_free_cpumask;
910
911         policy->cpu = cpu;
912         cpumask_copy(policy->cpus, cpumask_of(cpu));
913
914         /* Initially set CPU itself as the policy_cpu */
915         per_cpu(cpufreq_policy_cpu, cpu) = cpu;
916         ret = (lock_policy_rwsem_write(cpu) < 0);
917         WARN_ON(ret);
918
919         init_completion(&policy->kobj_unregister);
920         INIT_WORK(&policy->update, handle_update);
921
922         /* Set governor before ->init, so that driver could check it */
923 #ifdef CONFIG_HOTPLUG_CPU
924         for_each_online_cpu(sibling) {
925                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
926                 if (cp && cp->governor &&
927                     (cpumask_test_cpu(cpu, cp->related_cpus))) {
928                         policy->governor = cp->governor;
929                         found = 1;
930                         break;
931                 }
932         }
933 #endif
934         if (!found)
935                 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
936         /* call driver. From then on the cpufreq must be able
937          * to accept all calls to ->verify and ->setpolicy for this CPU
938          */
939         ret = cpufreq_driver->init(policy);
940         if (ret) {
941                 pr_debug("initialization failed\n");
942                 goto err_unlock_policy;
943         }
944         policy->user_policy.min = policy->min;
945         policy->user_policy.max = policy->max;
946
947         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
948                                      CPUFREQ_START, policy);
949
950         ret = cpufreq_add_dev_policy(cpu, policy, dev);
951         if (ret) {
952                 if (ret > 0)
953                         /* This is a managed cpu, symlink created,
954                            exit with 0 */
955                         ret = 0;
956                 goto err_unlock_policy;
957         }
958
959         ret = cpufreq_add_dev_interface(cpu, policy, dev);
960         if (ret)
961                 goto err_out_unregister;
962
963         unlock_policy_rwsem_write(cpu);
964
965         kobject_uevent(&policy->kobj, KOBJ_ADD);
966         module_put(cpufreq_driver->owner);
967         pr_debug("initialization complete\n");
968
969         return 0;
970
971
972 err_out_unregister:
973         spin_lock_irqsave(&cpufreq_driver_lock, flags);
974         for_each_cpu(j, policy->cpus)
975                 per_cpu(cpufreq_cpu_data, j) = NULL;
976         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
977
978         kobject_put(&policy->kobj);
979         wait_for_completion(&policy->kobj_unregister);
980
981 err_unlock_policy:
982         unlock_policy_rwsem_write(cpu);
983         free_cpumask_var(policy->related_cpus);
984 err_free_cpumask:
985         free_cpumask_var(policy->cpus);
986 err_free_policy:
987         kfree(policy);
988 nomem_out:
989         module_put(cpufreq_driver->owner);
990 module_out:
991         return ret;
992 }
993
994
995 /**
996  * __cpufreq_remove_dev - remove a CPU device
997  *
998  * Removes the cpufreq interface for a CPU device.
999  * Caller should already have policy_rwsem in write mode for this CPU.
1000  * This routine frees the rwsem before returning.
1001  */
1002 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1003 {
1004         unsigned int cpu = dev->id;
1005         unsigned long flags;
1006         struct cpufreq_policy *data;
1007         struct kobject *kobj;
1008         struct completion *cmp;
1009 #ifdef CONFIG_SMP
1010         struct device *cpu_dev;
1011         unsigned int j;
1012 #endif
1013
1014         pr_debug("unregistering CPU %u\n", cpu);
1015
1016         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1017         data = per_cpu(cpufreq_cpu_data, cpu);
1018
1019         if (!data) {
1020                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1021                 unlock_policy_rwsem_write(cpu);
1022                 return -EINVAL;
1023         }
1024         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1025
1026
1027 #ifdef CONFIG_SMP
1028         /* if this isn't the CPU which is the parent of the kobj, we
1029          * only need to unlink, put and exit
1030          */
1031         if (unlikely(cpu != data->cpu)) {
1032                 pr_debug("removing link\n");
1033                 cpumask_clear_cpu(cpu, data->cpus);
1034                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035                 kobj = &dev->kobj;
1036                 cpufreq_cpu_put(data);
1037                 unlock_policy_rwsem_write(cpu);
1038                 sysfs_remove_link(kobj, "cpufreq");
1039                 return 0;
1040         }
1041 #endif
1042
1043 #ifdef CONFIG_SMP
1044
1045 #ifdef CONFIG_HOTPLUG_CPU
1046         strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1047                         CPUFREQ_NAME_LEN);
1048 #endif
1049
1050         /* if we have other CPUs still registered, we need to unlink them,
1051          * or else wait_for_completion below will lock up. Clean the
1052          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1053          * the sysfs links afterwards.
1054          */
1055         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1056                 for_each_cpu(j, data->cpus) {
1057                         if (j == cpu)
1058                                 continue;
1059                         per_cpu(cpufreq_cpu_data, j) = NULL;
1060                 }
1061         }
1062
1063         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064
1065         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1066                 for_each_cpu(j, data->cpus) {
1067                         if (j == cpu)
1068                                 continue;
1069                         pr_debug("removing link for cpu %u\n", j);
1070 #ifdef CONFIG_HOTPLUG_CPU
1071                         strncpy(per_cpu(cpufreq_cpu_governor, j),
1072                                 data->governor->name, CPUFREQ_NAME_LEN);
1073 #endif
1074                         cpu_dev = get_cpu_device(j);
1075                         kobj = &cpu_dev->kobj;
1076                         unlock_policy_rwsem_write(cpu);
1077                         sysfs_remove_link(kobj, "cpufreq");
1078                         lock_policy_rwsem_write(cpu);
1079                         cpufreq_cpu_put(data);
1080                 }
1081         }
1082 #else
1083         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1084 #endif
1085
1086         if (cpufreq_driver->target)
1087                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1088
1089         kobj = &data->kobj;
1090         cmp = &data->kobj_unregister;
1091         unlock_policy_rwsem_write(cpu);
1092         kobject_put(kobj);
1093
1094         /* we need to make sure that the underlying kobj is actually
1095          * not referenced anymore by anybody before we proceed with
1096          * unloading.
1097          */
1098         pr_debug("waiting for dropping of refcount\n");
1099         wait_for_completion(cmp);
1100         pr_debug("wait complete\n");
1101
1102         lock_policy_rwsem_write(cpu);
1103         if (cpufreq_driver->exit)
1104                 cpufreq_driver->exit(data);
1105         unlock_policy_rwsem_write(cpu);
1106
1107 #ifdef CONFIG_HOTPLUG_CPU
1108         /* when the CPU which is the parent of the kobj is hotplugged
1109          * offline, check for siblings, and create cpufreq sysfs interface
1110          * and symlinks
1111          */
1112         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1113                 /* first sibling now owns the new sysfs dir */
1114                 cpumask_clear_cpu(cpu, data->cpus);
1115                 cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1116
1117                 /* finally remove our own symlink */
1118                 lock_policy_rwsem_write(cpu);
1119                 __cpufreq_remove_dev(dev, sif);
1120         }
1121 #endif
1122
1123         free_cpumask_var(data->related_cpus);
1124         free_cpumask_var(data->cpus);
1125         kfree(data);
1126
1127         return 0;
1128 }
1129
1130
1131 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1132 {
1133         unsigned int cpu = dev->id;
1134         int retval;
1135
1136         if (cpu_is_offline(cpu))
1137                 return 0;
1138
1139         if (unlikely(lock_policy_rwsem_write(cpu)))
1140                 BUG();
1141
1142         retval = __cpufreq_remove_dev(dev, sif);
1143         return retval;
1144 }
1145
1146
1147 static void handle_update(struct work_struct *work)
1148 {
1149         struct cpufreq_policy *policy =
1150                 container_of(work, struct cpufreq_policy, update);
1151         unsigned int cpu = policy->cpu;
1152         pr_debug("handle_update for cpu %u called\n", cpu);
1153         cpufreq_update_policy(cpu);
1154 }
1155
1156 /**
1157  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1158  *      @cpu: cpu number
1159  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1160  *      @new_freq: CPU frequency the CPU actually runs at
1161  *
1162  *      We adjust to current frequency first, and need to clean up later.
1163  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1164  */
1165 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1166                                 unsigned int new_freq)
1167 {
1168         struct cpufreq_freqs freqs;
1169
1170         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1171                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1172
1173         freqs.cpu = cpu;
1174         freqs.old = old_freq;
1175         freqs.new = new_freq;
1176         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1177         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1178 }
1179
1180
1181 /**
1182  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1183  * @cpu: CPU number
1184  *
1185  * This is the last known freq, without actually getting it from the driver.
1186  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1187  */
1188 unsigned int cpufreq_quick_get(unsigned int cpu)
1189 {
1190         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1191         unsigned int ret_freq = 0;
1192
1193         if (policy) {
1194                 ret_freq = policy->cur;
1195                 cpufreq_cpu_put(policy);
1196         }
1197
1198         return ret_freq;
1199 }
1200 EXPORT_SYMBOL(cpufreq_quick_get);
1201
1202 /**
1203  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1204  * @cpu: CPU number
1205  *
1206  * Just return the max possible frequency for a given CPU.
1207  */
1208 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1209 {
1210         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1211         unsigned int ret_freq = 0;
1212
1213         if (policy) {
1214                 ret_freq = policy->max;
1215                 cpufreq_cpu_put(policy);
1216         }
1217
1218         return ret_freq;
1219 }
1220 EXPORT_SYMBOL(cpufreq_quick_get_max);
1221
1222
1223 static unsigned int __cpufreq_get(unsigned int cpu)
1224 {
1225         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1226         unsigned int ret_freq = 0;
1227
1228         if (!cpufreq_driver->get)
1229                 return ret_freq;
1230
1231         ret_freq = cpufreq_driver->get(cpu);
1232
1233         if (ret_freq && policy->cur &&
1234                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1235                 /* verify no discrepancy between actual and
1236                                         saved value exists */
1237                 if (unlikely(ret_freq != policy->cur)) {
1238                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1239                         schedule_work(&policy->update);
1240                 }
1241         }
1242
1243         return ret_freq;
1244 }
1245
1246 /**
1247  * cpufreq_get - get the current CPU frequency (in kHz)
1248  * @cpu: CPU number
1249  *
1250  * Get the CPU current (static) CPU frequency
1251  */
1252 unsigned int cpufreq_get(unsigned int cpu)
1253 {
1254         unsigned int ret_freq = 0;
1255         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1256
1257         if (!policy)
1258                 goto out;
1259
1260         if (unlikely(lock_policy_rwsem_read(cpu)))
1261                 goto out_policy;
1262
1263         ret_freq = __cpufreq_get(cpu);
1264
1265         unlock_policy_rwsem_read(cpu);
1266
1267 out_policy:
1268         cpufreq_cpu_put(policy);
1269 out:
1270         return ret_freq;
1271 }
1272 EXPORT_SYMBOL(cpufreq_get);
1273
1274 static struct subsys_interface cpufreq_interface = {
1275         .name           = "cpufreq",
1276         .subsys         = &cpu_subsys,
1277         .add_dev        = cpufreq_add_dev,
1278         .remove_dev     = cpufreq_remove_dev,
1279 };
1280
1281
1282 /**
1283  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1284  *
1285  * This function is only executed for the boot processor.  The other CPUs
1286  * have been put offline by means of CPU hotplug.
1287  */
1288 static int cpufreq_bp_suspend(void)
1289 {
1290         int ret = 0;
1291
1292         int cpu = smp_processor_id();
1293         struct cpufreq_policy *cpu_policy;
1294
1295         pr_debug("suspending cpu %u\n", cpu);
1296
1297         /* If there's no policy for the boot CPU, we have nothing to do. */
1298         cpu_policy = cpufreq_cpu_get(cpu);
1299         if (!cpu_policy)
1300                 return 0;
1301
1302         if (cpufreq_driver->suspend) {
1303                 ret = cpufreq_driver->suspend(cpu_policy);
1304                 if (ret)
1305                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1306                                         "step on CPU %u\n", cpu_policy->cpu);
1307         }
1308
1309         cpufreq_cpu_put(cpu_policy);
1310         return ret;
1311 }
1312
1313 /**
1314  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1315  *
1316  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1317  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1318  *          restored. It will verify that the current freq is in sync with
1319  *          what we believe it to be. This is a bit later than when it
1320  *          should be, but nonethteless it's better than calling
1321  *          cpufreq_driver->get() here which might re-enable interrupts...
1322  *
1323  * This function is only executed for the boot CPU.  The other CPUs have not
1324  * been turned on yet.
1325  */
1326 static void cpufreq_bp_resume(void)
1327 {
1328         int ret = 0;
1329
1330         int cpu = smp_processor_id();
1331         struct cpufreq_policy *cpu_policy;
1332
1333         pr_debug("resuming cpu %u\n", cpu);
1334
1335         /* If there's no policy for the boot CPU, we have nothing to do. */
1336         cpu_policy = cpufreq_cpu_get(cpu);
1337         if (!cpu_policy)
1338                 return;
1339
1340         if (cpufreq_driver->resume) {
1341                 ret = cpufreq_driver->resume(cpu_policy);
1342                 if (ret) {
1343                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1344                                         "step on CPU %u\n", cpu_policy->cpu);
1345                         goto fail;
1346                 }
1347         }
1348
1349         schedule_work(&cpu_policy->update);
1350
1351 fail:
1352         cpufreq_cpu_put(cpu_policy);
1353 }
1354
1355 static struct syscore_ops cpufreq_syscore_ops = {
1356         .suspend        = cpufreq_bp_suspend,
1357         .resume         = cpufreq_bp_resume,
1358 };
1359
1360
1361 /*********************************************************************
1362  *                     NOTIFIER LISTS INTERFACE                      *
1363  *********************************************************************/
1364
1365 /**
1366  *      cpufreq_register_notifier - register a driver with cpufreq
1367  *      @nb: notifier function to register
1368  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1369  *
1370  *      Add a driver to one of two lists: either a list of drivers that
1371  *      are notified about clock rate changes (once before and once after
1372  *      the transition), or a list of drivers that are notified about
1373  *      changes in cpufreq policy.
1374  *
1375  *      This function may sleep, and has the same return conditions as
1376  *      blocking_notifier_chain_register.
1377  */
1378 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1379 {
1380         int ret;
1381
1382         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1383
1384         switch (list) {
1385         case CPUFREQ_TRANSITION_NOTIFIER:
1386                 ret = srcu_notifier_chain_register(
1387                                 &cpufreq_transition_notifier_list, nb);
1388                 break;
1389         case CPUFREQ_POLICY_NOTIFIER:
1390                 ret = blocking_notifier_chain_register(
1391                                 &cpufreq_policy_notifier_list, nb);
1392                 break;
1393         default:
1394                 ret = -EINVAL;
1395         }
1396
1397         return ret;
1398 }
1399 EXPORT_SYMBOL(cpufreq_register_notifier);
1400
1401
1402 /**
1403  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1404  *      @nb: notifier block to be unregistered
1405  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1406  *
1407  *      Remove a driver from the CPU frequency notifier list.
1408  *
1409  *      This function may sleep, and has the same return conditions as
1410  *      blocking_notifier_chain_unregister.
1411  */
1412 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1413 {
1414         int ret;
1415
1416         switch (list) {
1417         case CPUFREQ_TRANSITION_NOTIFIER:
1418                 ret = srcu_notifier_chain_unregister(
1419                                 &cpufreq_transition_notifier_list, nb);
1420                 break;
1421         case CPUFREQ_POLICY_NOTIFIER:
1422                 ret = blocking_notifier_chain_unregister(
1423                                 &cpufreq_policy_notifier_list, nb);
1424                 break;
1425         default:
1426                 ret = -EINVAL;
1427         }
1428
1429         return ret;
1430 }
1431 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1432
1433
1434 /*********************************************************************
1435  *                              GOVERNORS                            *
1436  *********************************************************************/
1437
1438
1439 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1440                             unsigned int target_freq,
1441                             unsigned int relation)
1442 {
1443         int retval = -EINVAL;
1444
1445         pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1446                 target_freq, relation);
1447         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1448                 retval = cpufreq_driver->target(policy, target_freq, relation);
1449
1450         return retval;
1451 }
1452 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1453
1454 int cpufreq_driver_target(struct cpufreq_policy *policy,
1455                           unsigned int target_freq,
1456                           unsigned int relation)
1457 {
1458         int ret = -EINVAL;
1459
1460         policy = cpufreq_cpu_get(policy->cpu);
1461         if (!policy)
1462                 goto no_policy;
1463
1464         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1465                 goto fail;
1466
1467         ret = __cpufreq_driver_target(policy, target_freq, relation);
1468
1469         unlock_policy_rwsem_write(policy->cpu);
1470
1471 fail:
1472         cpufreq_cpu_put(policy);
1473 no_policy:
1474         return ret;
1475 }
1476 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1477
1478 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1479 {
1480         int ret = 0;
1481
1482         policy = cpufreq_cpu_get(policy->cpu);
1483         if (!policy)
1484                 return -EINVAL;
1485
1486         if (cpu_online(cpu) && cpufreq_driver->getavg)
1487                 ret = cpufreq_driver->getavg(policy, cpu);
1488
1489         cpufreq_cpu_put(policy);
1490         return ret;
1491 }
1492 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1493
1494 /*
1495  * when "event" is CPUFREQ_GOV_LIMITS
1496  */
1497
1498 static int __cpufreq_governor(struct cpufreq_policy *policy,
1499                                         unsigned int event)
1500 {
1501         int ret;
1502
1503         /* Only must be defined when default governor is known to have latency
1504            restrictions, like e.g. conservative or ondemand.
1505            That this is the case is already ensured in Kconfig
1506         */
1507 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1508         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1509 #else
1510         struct cpufreq_governor *gov = NULL;
1511 #endif
1512
1513         if (policy->governor->max_transition_latency &&
1514             policy->cpuinfo.transition_latency >
1515             policy->governor->max_transition_latency) {
1516                 if (!gov)
1517                         return -EINVAL;
1518                 else {
1519                         printk(KERN_WARNING "%s governor failed, too long"
1520                                " transition latency of HW, fallback"
1521                                " to %s governor\n",
1522                                policy->governor->name,
1523                                gov->name);
1524                         policy->governor = gov;
1525                 }
1526         }
1527
1528         if (!try_module_get(policy->governor->owner))
1529                 return -EINVAL;
1530
1531         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1532                                                 policy->cpu, event);
1533         ret = policy->governor->governor(policy, event);
1534
1535         /* we keep one module reference alive for
1536                         each CPU governed by this CPU */
1537         if ((event != CPUFREQ_GOV_START) || ret)
1538                 module_put(policy->governor->owner);
1539         if ((event == CPUFREQ_GOV_STOP) && !ret)
1540                 module_put(policy->governor->owner);
1541
1542         return ret;
1543 }
1544
1545
1546 int cpufreq_register_governor(struct cpufreq_governor *governor)
1547 {
1548         int err;
1549
1550         if (!governor)
1551                 return -EINVAL;
1552
1553         mutex_lock(&cpufreq_governor_mutex);
1554
1555         err = -EBUSY;
1556         if (__find_governor(governor->name) == NULL) {
1557                 err = 0;
1558                 list_add(&governor->governor_list, &cpufreq_governor_list);
1559         }
1560
1561         mutex_unlock(&cpufreq_governor_mutex);
1562         return err;
1563 }
1564 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1565
1566
1567 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1568 {
1569 #ifdef CONFIG_HOTPLUG_CPU
1570         int cpu;
1571 #endif
1572
1573         if (!governor)
1574                 return;
1575
1576 #ifdef CONFIG_HOTPLUG_CPU
1577         for_each_present_cpu(cpu) {
1578                 if (cpu_online(cpu))
1579                         continue;
1580                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1581                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1582         }
1583 #endif
1584
1585         mutex_lock(&cpufreq_governor_mutex);
1586         list_del(&governor->governor_list);
1587         mutex_unlock(&cpufreq_governor_mutex);
1588         return;
1589 }
1590 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1591
1592
1593
1594 /*********************************************************************
1595  *                          POLICY INTERFACE                         *
1596  *********************************************************************/
1597
1598 /**
1599  * cpufreq_get_policy - get the current cpufreq_policy
1600  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1601  *      is written
1602  *
1603  * Reads the current cpufreq policy.
1604  */
1605 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1606 {
1607         struct cpufreq_policy *cpu_policy;
1608         if (!policy)
1609                 return -EINVAL;
1610
1611         cpu_policy = cpufreq_cpu_get(cpu);
1612         if (!cpu_policy)
1613                 return -EINVAL;
1614
1615         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1616
1617         cpufreq_cpu_put(cpu_policy);
1618         return 0;
1619 }
1620 EXPORT_SYMBOL(cpufreq_get_policy);
1621
1622
1623 /*
1624  * data   : current policy.
1625  * policy : policy to be set.
1626  */
1627 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1628                                 struct cpufreq_policy *policy)
1629 {
1630         int ret = 0;
1631
1632         pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1633                 policy->min, policy->max);
1634
1635         memcpy(&policy->cpuinfo, &data->cpuinfo,
1636                                 sizeof(struct cpufreq_cpuinfo));
1637
1638         if (policy->min > data->max || policy->max < data->min) {
1639                 ret = -EINVAL;
1640                 goto error_out;
1641         }
1642
1643         /* verify the cpu speed can be set within this limit */
1644         ret = cpufreq_driver->verify(policy);
1645         if (ret)
1646                 goto error_out;
1647
1648         /* adjust if necessary - all reasons */
1649         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1650                         CPUFREQ_ADJUST, policy);
1651
1652         /* adjust if necessary - hardware incompatibility*/
1653         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1654                         CPUFREQ_INCOMPATIBLE, policy);
1655
1656         /* verify the cpu speed can be set within this limit,
1657            which might be different to the first one */
1658         ret = cpufreq_driver->verify(policy);
1659         if (ret)
1660                 goto error_out;
1661
1662         /* notification of the new policy */
1663         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1664                         CPUFREQ_NOTIFY, policy);
1665
1666         data->min = policy->min;
1667         data->max = policy->max;
1668
1669         pr_debug("new min and max freqs are %u - %u kHz\n",
1670                                         data->min, data->max);
1671
1672         if (cpufreq_driver->setpolicy) {
1673                 data->policy = policy->policy;
1674                 pr_debug("setting range\n");
1675                 ret = cpufreq_driver->setpolicy(policy);
1676         } else {
1677                 if (policy->governor != data->governor) {
1678                         /* save old, working values */
1679                         struct cpufreq_governor *old_gov = data->governor;
1680
1681                         pr_debug("governor switch\n");
1682
1683                         /* end old governor */
1684                         if (data->governor)
1685                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1686
1687                         /* start new governor */
1688                         data->governor = policy->governor;
1689                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1690                                 /* new governor failed, so re-start old one */
1691                                 pr_debug("starting governor %s failed\n",
1692                                                         data->governor->name);
1693                                 if (old_gov) {
1694                                         data->governor = old_gov;
1695                                         __cpufreq_governor(data,
1696                                                            CPUFREQ_GOV_START);
1697                                 }
1698                                 ret = -EINVAL;
1699                                 goto error_out;
1700                         }
1701                         /* might be a policy change, too, so fall through */
1702                 }
1703                 pr_debug("governor: change or update limits\n");
1704                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1705         }
1706
1707 error_out:
1708         return ret;
1709 }
1710
1711 /**
1712  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1713  *      @cpu: CPU which shall be re-evaluated
1714  *
1715  *      Useful for policy notifiers which have different necessities
1716  *      at different times.
1717  */
1718 int cpufreq_update_policy(unsigned int cpu)
1719 {
1720         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1721         struct cpufreq_policy policy;
1722         int ret;
1723
1724         if (!data) {
1725                 ret = -ENODEV;
1726                 goto no_policy;
1727         }
1728
1729         if (unlikely(lock_policy_rwsem_write(cpu))) {
1730                 ret = -EINVAL;
1731                 goto fail;
1732         }
1733
1734         pr_debug("updating policy for CPU %u\n", cpu);
1735         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1736         policy.min = data->user_policy.min;
1737         policy.max = data->user_policy.max;
1738         policy.policy = data->user_policy.policy;
1739         policy.governor = data->user_policy.governor;
1740
1741         /* BIOS might change freq behind our back
1742           -> ask driver for current freq and notify governors about a change */
1743         if (cpufreq_driver->get) {
1744                 policy.cur = cpufreq_driver->get(cpu);
1745                 if (!data->cur) {
1746                         pr_debug("Driver did not initialize current freq");
1747                         data->cur = policy.cur;
1748                 } else {
1749                         if (data->cur != policy.cur)
1750                                 cpufreq_out_of_sync(cpu, data->cur,
1751                                                                 policy.cur);
1752                 }
1753         }
1754
1755         ret = __cpufreq_set_policy(data, &policy);
1756
1757         unlock_policy_rwsem_write(cpu);
1758
1759 fail:
1760         cpufreq_cpu_put(data);
1761 no_policy:
1762         return ret;
1763 }
1764 EXPORT_SYMBOL(cpufreq_update_policy);
1765
1766 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1767                                         unsigned long action, void *hcpu)
1768 {
1769         unsigned int cpu = (unsigned long)hcpu;
1770         struct device *dev;
1771
1772         dev = get_cpu_device(cpu);
1773         if (dev) {
1774                 switch (action) {
1775                 case CPU_ONLINE:
1776                 case CPU_ONLINE_FROZEN:
1777                         cpufreq_add_dev(dev, NULL);
1778                         break;
1779                 case CPU_DOWN_PREPARE:
1780                 case CPU_DOWN_PREPARE_FROZEN:
1781                         if (unlikely(lock_policy_rwsem_write(cpu)))
1782                                 BUG();
1783
1784                         __cpufreq_remove_dev(dev, NULL);
1785                         break;
1786                 case CPU_DOWN_FAILED:
1787                 case CPU_DOWN_FAILED_FROZEN:
1788                         cpufreq_add_dev(dev, NULL);
1789                         break;
1790                 }
1791         }
1792         return NOTIFY_OK;
1793 }
1794
1795 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1796     .notifier_call = cpufreq_cpu_callback,
1797 };
1798
1799 /*********************************************************************
1800  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1801  *********************************************************************/
1802
1803 /**
1804  * cpufreq_register_driver - register a CPU Frequency driver
1805  * @driver_data: A struct cpufreq_driver containing the values#
1806  * submitted by the CPU Frequency driver.
1807  *
1808  *   Registers a CPU Frequency driver to this core code. This code
1809  * returns zero on success, -EBUSY when another driver got here first
1810  * (and isn't unregistered in the meantime).
1811  *
1812  */
1813 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1814 {
1815         unsigned long flags;
1816         int ret;
1817
1818         if (!driver_data || !driver_data->verify || !driver_data->init ||
1819             ((!driver_data->setpolicy) && (!driver_data->target)))
1820                 return -EINVAL;
1821
1822         pr_debug("trying to register driver %s\n", driver_data->name);
1823
1824         if (driver_data->setpolicy)
1825                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1826
1827         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1828         if (cpufreq_driver) {
1829                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1830                 return -EBUSY;
1831         }
1832         cpufreq_driver = driver_data;
1833         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1834
1835         ret = subsys_interface_register(&cpufreq_interface);
1836         if (ret)
1837                 goto err_null_driver;
1838
1839         if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1840                 int i;
1841                 ret = -ENODEV;
1842
1843                 /* check for at least one working CPU */
1844                 for (i = 0; i < nr_cpu_ids; i++)
1845                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1846                                 ret = 0;
1847                                 break;
1848                         }
1849
1850                 /* if all ->init() calls failed, unregister */
1851                 if (ret) {
1852                         pr_debug("no CPU initialized for driver %s\n",
1853                                                         driver_data->name);
1854                         goto err_if_unreg;
1855                 }
1856         }
1857
1858         register_hotcpu_notifier(&cpufreq_cpu_notifier);
1859         pr_debug("driver %s up and running\n", driver_data->name);
1860
1861         return 0;
1862 err_if_unreg:
1863         subsys_interface_unregister(&cpufreq_interface);
1864 err_null_driver:
1865         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1866         cpufreq_driver = NULL;
1867         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1868         return ret;
1869 }
1870 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1871
1872
1873 /**
1874  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1875  *
1876  *    Unregister the current CPUFreq driver. Only call this if you have
1877  * the right to do so, i.e. if you have succeeded in initialising before!
1878  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1879  * currently not initialised.
1880  */
1881 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1882 {
1883         unsigned long flags;
1884
1885         if (!cpufreq_driver || (driver != cpufreq_driver))
1886                 return -EINVAL;
1887
1888         pr_debug("unregistering driver %s\n", driver->name);
1889
1890         subsys_interface_unregister(&cpufreq_interface);
1891         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1892
1893         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1894         cpufreq_driver = NULL;
1895         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1896
1897         return 0;
1898 }
1899 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1900
1901 static int __init cpufreq_core_init(void)
1902 {
1903         int cpu;
1904
1905         for_each_possible_cpu(cpu) {
1906                 per_cpu(cpufreq_policy_cpu, cpu) = -1;
1907                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1908         }
1909
1910         cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1911         BUG_ON(!cpufreq_global_kobject);
1912         register_syscore_ops(&cpufreq_syscore_ops);
1913
1914         return 0;
1915 }
1916 core_initcall(cpufreq_core_init);